├── .github
    ├── CODEOWNERS
    ├── dependabot.yml
    └── workflows
    │   ├── build.yml
    │   ├── docs.yaml
    │   ├── lint.yml
    │   ├── on-pr-to-main.yml
    │   ├── on-push-any-branch.yml
    │   ├── on-push-main-branch.yml
    │   ├── publish.yml
    │   ├── release-please.yml
    │   └── snyk.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── .pyup.yml
├── .snyk
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── SECURITY.md
├── azure-pipelines.yml
├── docs
    └── manual.md
├── documentation
    ├── .gitignore
    ├── .prettierrc.js
    ├── README.md
    ├── babel.config.js
    ├── docs
    │   ├── about
    │   │   ├── introduction.md
    │   │   └── usage
    │   │   │   ├── _category_.json
    │   │   │   ├── basic-usage.md
    │   │   │   ├── caching.md
    │   │   │   ├── data-source.md
    │   │   │   ├── fetching-metadata.md
    │   │   │   └── time-zone.md
    │   └── contribute
    │   │   ├── _category_.yaml
    │   │   ├── development-guide
    │   │       ├── _category_.json
    │   │       ├── publishing.md
    │   │       ├── setup.md
    │   │       ├── testing.md
    │   │       └── upgrading.md
    │   │   ├── documentation.md
    │   │   ├── ground-rules.md
    │   │   ├── how-to-start-contributing.md
    │   │   └── overview.md
    ├── docusaurus.config.js
    ├── package.json
    ├── sidebars.js
    ├── src
    │   ├── css
    │   │   └── custom.css
    │   └── pages
    │   │   └── index.tsx
    ├── static
    │   ├── .nojekyll
    │   └── img
    │   │   ├── favicon.png
    │   │   └── logo.svg
    ├── tsconfig.json
    └── yarn.lock
├── examples
    └── quickstart.ipynb
├── mypy.ini
├── poetry.lock
├── pyproject.toml
├── pytest.ini
├── tagreader
    ├── __init__.py
    ├── __version__.py
    ├── cache.py
    ├── clients.py
    ├── logger.py
    ├── utils.py
    └── web_handlers.py
└── tests
    ├── conftest.py
    ├── test_AspenHandlerREST.py
    ├── test_AspenHandlerREST_connect.py
    ├── test_PIHandlerREST.py
    ├── test_PIHandlerREST_connect.py
    ├── test_bucketcache.py
    ├── test_cache.py
    ├── test_clients.py
    └── test_utils.py


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # Ref. https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners
2 | * @Asgmel03 @lawoEq
3 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip"
 9 |     directory: "/"
10 |     schedule:
11 |       interval: "weekly"
12 |     commit-message:
13 |       prefix: "chore(deps): "
14 |   - package-ecosystem: 'github-actions'
15 |     directory: '/'
16 |     schedule:
17 |       interval: "weekly"
18 |     commit-message:
19 |       prefix: "chore(deps): "
20 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: "🧪 Build & test code"
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   workflow_call:
 6 | 
 7 | jobs:
 8 |   build:
 9 |     strategy:
10 |       fail-fast: false
11 |       matrix:
12 |         python-version: ["3.9", "3.10", "3.11"]
13 |         os: [windows-latest, ubuntu-latest, macos-latest]
14 |     runs-on: ${{ matrix.os }}
15 |     steps:
16 |       - name: Checkout code
17 |         uses: actions/checkout@v4
18 | 
19 |       - name: Install required Linux library for pykerberos
20 |         if: matrix.os == 'ubuntu-latest'
21 |         run: |
22 |           sudo apt-get update && sudo apt-get install libkrb5-dev
23 | 
24 |       - name: Install Poetry
25 |         run: |
26 |           pipx install poetry
27 | 
28 |       - name: Setup Python
29 |         uses: actions/setup-python@v4
30 |         with:
31 |           python-version: ${{matrix.python-version}}
32 |           cache: "poetry"
33 | 
34 |       - name: Check pyproject.toml validity
35 |         run: poetry check --no-interaction
36 | 
37 |       - name: Install deps
38 |         run: poetry install --no-interaction
39 | 
40 |       - name: Run tests
41 |         run: poetry run pytest
42 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yaml:
--------------------------------------------------------------------------------
 1 | name: "📚 Publish Docs"
 2 | 
 3 | on:
 4 |   # Workflow dispatch is used for manual triggers
 5 |   workflow_dispatch:
 6 |   # Workflow call is used for called from another workflow
 7 |   workflow_call:
 8 | 
 9 | env:
10 |   GITHUB_PAGES_BRANCH: gh-pages
11 | 
12 | jobs:
13 |   publish-docs:
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |     - name: Checkout code
18 |       uses: actions/checkout@v4
19 | 
20 |     - name: Setup node
21 |       uses: actions/setup-node@v4
22 |       with:
23 |         node-version: 20
24 |         cache: yarn
25 |         cache-dependency-path: documentation/yarn.lock
26 | 
27 |     - name: Install dependencies and build website
28 |       run: |
29 |         cd documentation
30 |         yarn install --frozen-lockfile
31 |         yarn build
32 | 
33 |     - name: Push static files to Github Pages branch
34 |       run: |
35 |         cd documentation/build
36 |         CREATED_FROM_REF=$(git rev-parse --short HEAD)
37 |         git init
38 |         git config user.name "GitHub Actions Bot"
39 |         git config user.email "<>"
40 |         git checkout -b $GITHUB_PAGES_BRANCH
41 |         git remote add $GITHUB_PAGES_BRANCH https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/equinor/tagreader-python
42 |         git add .
43 |         git commit -m "Built from commit '$CREATED_FROM_REF'"
44 |         git push -f --set-upstream gh-pages gh-pages
45 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: '💎 Code quality'
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   workflow_call:
 6 | 
 7 | jobs:
 8 |   lint:
 9 |     name: Lint
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - name: Checkout code
14 |         uses: actions/checkout@v4
15 | 
16 |       - name: Set up python
17 |         uses: actions/setup-python@v4
18 |         with:
19 |           python-version: '3.11'
20 | 
21 |       - name: Install pre-commit
22 |         run: pip install pre-commit
23 | 
24 |       - name: Run pre-commit
25 |         run: pre-commit run --all-files
26 | 


--------------------------------------------------------------------------------
/.github/workflows/on-pr-to-main.yml:
--------------------------------------------------------------------------------
 1 | name: '➕ Pull Request'
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types:
 6 |       - opened
 7 |       - edited
 8 |       - reopened
 9 | 
10 | env:
11 |   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
12 |   PR_LABEL: 'invalid_PR_title'
13 | jobs:
14 |   lint-pr:
15 |     name: Lint pull request title
16 |     if: ${{ github.actor != 'dependabot[bot]' }}
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |       - uses: actions/checkout@v4
20 |       - name: Checkout current PR
21 |         run: gh pr checkout $GITHUB_HEAD_REF
22 | 
23 |       - name: Create Label
24 |         continue-on-error: true
25 |         run: gh label create ${{ env.PR_LABEL }}
26 | 
27 |       - name: Lint pull request title
28 |         uses: jef/conventional-commits-pr-action@v1
29 |         with:
30 |           token: ${{ secrets.GITHUB_TOKEN }}
31 | 
32 |       - name: Remove label
33 |         run: gh pr edit --remove-label ${{ env.PR_LABEL }}
34 | 
35 |       - name: Add label
36 |         if: ${{ failure() }}
37 |         run: gh pr edit --add-label ${{ env.PR_LABEL }}
38 | 


--------------------------------------------------------------------------------
/.github/workflows/on-push-any-branch.yml:
--------------------------------------------------------------------------------
 1 | name: '✨ On push to any branch'
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - '**'
 7 |     tags-ignore:
 8 |       - '**'
 9 | 
10 | 
11 | jobs:
12 |   lint:
13 |     name: '💎 Code Quality'
14 |     uses: ./.github/workflows/lint.yml
15 | 
16 |   snyk:
17 |     name: '️‍🕵️‍♀️ Snyk vulnerability scan'
18 |     uses: ./.github/workflows/snyk.yml
19 |     secrets: inherit
20 | 
21 |   test:
22 |     name: '🧪 Build & test Code'
23 |     uses: ./.github/workflows/build.yml
24 |     secrets: inherit
25 | 


--------------------------------------------------------------------------------
/.github/workflows/on-push-main-branch.yml:
--------------------------------------------------------------------------------
 1 | name: "✨ On push to main branch"
 2 | on:
 3 |   workflow_dispatch:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   publish-docs:
10 |     name: '️‍📚️ Publish Docs'
11 |     uses: ./.github/workflows/docs.yaml


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: 🚀 Upload Python Package
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   workflow_call:
 6 | 
 7 | jobs:
 8 |   deploy:
 9 |     runs-on: windows-latest
10 |     steps:
11 |       - uses: actions/checkout@v4
12 |       - name: Set up Python
13 |         uses: actions/setup-python@v4
14 |         with:
15 |           python-version: '3.11'
16 | 
17 |       - name: Install Poetry
18 |         run: |
19 |           pipx install poetry
20 | 
21 |       - name: Setup Python
22 |         uses: actions/setup-python@v4
23 |         with:
24 |           python-version: ${{matrix.python-version}}
25 |           cache: 'poetry'
26 | 
27 |       - name: Check pyproject.toml validity
28 |         run: poetry check --no-interaction
29 | 
30 |       - name: Install deps
31 |         run: poetry install --no-interaction
32 | 
33 |       - name: Publish to PyPI
34 |         env:
35 |           POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_TOKEN }}
36 |         run: |
37 |           poetry publish --build
38 | 


--------------------------------------------------------------------------------
/.github/workflows/release-please.yml:
--------------------------------------------------------------------------------
 1 | name: 🎉 Release Please
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 | 
 7 | jobs:
 8 |   release_please:
 9 |     name: 🔖 Release Please
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: PyPI Release
13 |         id: release
14 |         uses: google-github-actions/release-please-action@v3
15 |         with:
16 |           release-type: python
17 |           bump-minor-pre-major: true
18 |           changelog-types: >
19 |             [{"type":"build", "section":"📦 Build system","hidden":false},
20 |             {"type":"chore", "section":"🧹 Chores","hidden":false},
21 |             {"type":"ci", "section":"👷 CI/CD","hidden":false},
22 |             {"type":"docs","section":"📚 Documentation","hidden":false},
23 |             {"type":"feat","section":"✨ Features","hidden":false},
24 |             {"type":"fix","section":"🐛 Bug Fixes","hidden":false},
25 |             {"type":"perf", "section":"🏎️ Performance","hidden":false},
26 |             {"type":"refactor", "section":"🔨 Refactor","hidden":false},
27 |             {"type":"revert", "section":"⏪️ Revert","hidden":false},
28 |             {"type":"style","section":"💎 Style","hidden":false},
29 |             {"type":"test", "section":"🧪 Tests","hidden":false}]
30 |     outputs:
31 |       release_created: ${{ steps.release.outputs.release_created }}
32 |       tag_name: ${{ steps.release.outputs.tag_name }}
33 | 
34 |   build:
35 |     name: '🛠️ Build'
36 |     if: ${{ needs.release_please.outputs.release_created }}
37 |     needs: [release_please]
38 |     uses: ./.github/workflows/build.yml
39 | 
40 |   deploy:
41 |     name: '🚀 Publish'
42 |     if: ${{ needs.release_please.outputs.release_created }}
43 |     needs: [build, release_please]
44 |     uses: ./.github/workflows/publish.yml
45 |     secrets: inherit
46 | 


--------------------------------------------------------------------------------
/.github/workflows/snyk.yml:
--------------------------------------------------------------------------------
 1 | name: '🕵️‍♀️ Snyk vulnerability scan'
 2 | on:
 3 |   # Workflow dispatch is used for manual triggers
 4 |   workflow_dispatch:
 5 |   # Workflow call is used for called from another workflow
 6 |   workflow_call:
 7 | 
 8 | 
 9 | jobs:
10 |   snyk:
11 |     name: Snyk vulnerability scan
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Checkout code
15 |         uses: actions/checkout@v4
16 | 
17 |       - name: Setup Snyk
18 |         uses: snyk/actions/setup@master
19 | 
20 |       - name: Scan third-party dependencies
21 |         env:
22 |           SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
23 |         run: snyk test --file=poetry.lock --package-manager=poetry --policy-path=.snyk --severity-threshold=medium
24 | 
25 |       - name: Scan code for vulnerabilities
26 |         continue-on-error: true
27 |         env:
28 |           SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
29 |         run: snyk code test
30 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | .cache*
 48 | *cache*
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # pipenv
 90 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 91 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 92 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 93 | #   install all needed dependencies.
 94 | #Pipfile.lock
 95 | 
 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 97 | __pypackages__/
 98 | 
 99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 | 
103 | # SageMath parsed files
104 | *.sage.py
105 | 
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv*/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 | 
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 | 
119 | # Rope project settings
120 | .ropeproject
121 | 
122 | # mkdocs documentation
123 | /site
124 | 
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 | 
130 | # Pyre type checker
131 | .pyre/
132 | 
133 | # pycharm
134 | .idea/
135 | 
136 | # version handled by setuptools_scm
137 | **/version.py
138 | 
139 | # Sometimes symlink, so treat as both dir and "file"
140 | # extratests
141 | # extratests/*
142 | 
143 | # tagreader cache file
144 | *.h5
145 | 
146 | # vscode
147 | .vscode/
148 | 
149 | test_scripts/*
150 | 
151 | # Certificate files
152 | *.pem
153 | *.cer
154 | 
155 | adhoc/
156 | 
157 | tests/test_adhoc.py
158 | 
159 | # SNYK
160 | .dccache
161 | 
162 | # Apple
163 | .DS_Store
164 | 
165 | # Other
166 | .tmp/


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "extratests"]
2 | 	path = tests/extratests
3 | 	url = https://github.com/equinor/tagreader-python-extra-tests.git
4 | [submodules "extratests"]
5 | 	branch = master
6 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.5.0
 4 |     hooks:
 5 |       - id: check-yaml
 6 |       - id: check-json
 7 |       - id: trailing-whitespace
 8 |       - id: check-merge-conflict
 9 |   - repo: https://github.com/psf/black
10 |     rev: 24.2.0
11 |     hooks:
12 |       - id: black
13 |         language_version: python3
14 | 
15 |   - repo: https://github.com/PyCQA/flake8
16 |     rev: '7.0.0'
17 |     hooks:
18 |       - id: flake8
19 |         args: ['--ignore=E501,W503,E231,E503,E203,F401,E702,E704']
20 | 
21 |   - repo: https://github.com/timothycrosley/isort
22 |     rev: '5.13.2'
23 |     hooks:
24 |       - id: isort
25 |         args:
26 |           [
27 |             '--line-length=88',
28 |             '--use-parentheses',
29 |             '--trailing-comma',
30 |             '--multi-line=3',
31 |           ]
32 | 
33 |   - repo: https://github.com/pre-commit/mirrors-mypy
34 |     rev: v1.9.0
35 |     hooks:
36 |       - id: mypy
37 |         args: [--strict, --ignore-missing-imports]
38 |         additional_dependencies:
39 |           - types-requests
40 |           - pandas-stubs
41 | 


--------------------------------------------------------------------------------
/.pyup.yml:
--------------------------------------------------------------------------------
 1 | # configure updates globally
 2 | # default: all
 3 | # allowed: all, insecure, False
 4 | update: insecure
 5 | 
 6 | # configure dependency pinning globally
 7 | # default: True
 8 | # allowed: True, False
 9 | pin: True
10 | 
11 | # set the default branch
12 | # default: empty, the default branch on GitHub
13 | #branch: dev
14 | 
15 | # update schedule
16 | # default: empty
17 | # allowed: "every day", "every week", ..
18 | schedule: "every week"
19 | 
20 | # search for requirement files
21 | # default: True
22 | # allowed: True, False
23 | search: True
24 | 


--------------------------------------------------------------------------------
/.snyk:
--------------------------------------------------------------------------------
 1 | # Snyk (https://snyk.io) policy file, patches or ignores known vulnerabilities.
 2 | # ignores vulnerabilities until expiry date; change duration by modifying expiry date
 3 | ignore:
 4 |   SNYK-JS-INFLIGHT-6095116:
 5 |     - '*':
 6 |         reason: 'No patch available'
 7 |         expires: 2025-04-01T00:00:00.000Z
 8 |         created: 2023-12-04T09:05:00.000Z
 9 |   SNYK-JS-KATEX-8647963:
10 |     - '*':
11 |         reason: 'No patch available'
12 |         expires: 2025-04-01T00:00:00.000Z
13 |         created: 2025-02-04T15:29:00.000Z
14 |   SNYK-PYTHON-PANDAS-8549481:
15 |     - '*':
16 |         reason: 'No patch available'
17 |         expires: 2025-04-01T00:00:00.000Z
18 |         created: 2025-01-02T10:30:00.000Z
19 |   SNYK-PYTHON-JINJA2-8548181:
20 |     - '*':
21 |         reason: 'Will patch later'
22 |         expires: 2025-04-01T00:00:00.000Z
23 |         created: 2025-02-04T15:28:00.000Z
24 |   SNYK-PYTHON-JINJA2-8548987:
25 |     - '*':
26 |         reason: 'Will patch later'
27 |         expires: 2025-04-01T00:00:00.000Z
28 |         created: 2025-02-04T15:28:00.000Z
29 |   SNYK-PYTHON-TORNADO-8400708:
30 |     - '*':
31 |         reason: ''
32 |         expires: 2025-04-01T00:00:00.000Z
33 |         created: 2025-02-04T15:28:00.000Z
34 | patch: {}
35 | version: v1.25.0


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contribute
2 | As Tagreader is an open source project, all contributions are welcome. This includes code, bug reports, issues,
3 | feature requests, and documentation. The preferred way of submitting a contribution is to either create an issue on
4 | GitHub or to fork the project and make a pull request.
5 | 
6 | To starting contributing, please see [Tagreader Docs - Contribute](https://equinor.github.io/tagreader-python/docs/contribute/overview)
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Equinor
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | exclude .*
2 | exclude requirements.txt test-requirements.txt environment.yml
3 | exclude azure-pipelines.yml
4 | recursive-exclude tests *
5 | recursive-exclude docs *
6 | recursive-exclude .github *
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # tagreader-python <!-- omit in toc -->
 2 | 
 3 | ![GitHub Build Status](https://github.com/equinor/tagreader-python/workflows/Test/badge.svg)
 4 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/tagreader)
 5 | ![PyPI](https://img.shields.io/pypi/v/tagreader)
 6 | [![Downloads](https://pepy.tech/badge/tagreader)](https://pepy.tech/project/tagreader)
 7 | 
 8 | Tagreader is a Python package for reading timeseries data from the OSIsoft PI and Aspen Infoplus.21
 9 | Information Management Systems (IMS). It is intended to be easy to use, and present as similar interfaces
10 | as possible to the backend plant historians.
11 | 
12 | ## Installation
13 | You can install tagreader directly into your project from pypi by using pip
14 | or another package manager. Supports Python version 3.9.2 and above.
15 | 
16 | ```shell
17 | pip install tagreader
18 | ```
19 | 
20 | ## Usage
21 | Tagreader is easy to use for both Equinor internal IMS services, and non-internal usage. For non-internal usage
22 | you simply need to provide the corresponding IMS service URLs and IMSType.
23 | See [data source](https://equinor.github.io/tagreader-python/docs/about/usage/data-source) for details.
24 | 
25 | ### Usage example
26 | ```python
27 | import tagreader
28 | c = tagreader.IMSClient("mysource", "aspenone")
29 | print(c.search("tag*"))
30 | df = c.read_tags(["tag1", "tag2"], "18.06.2020 08:00:00", "18.06.2020 09:00:00", 60)
31 | ```
32 | 
33 | Note, you can add a timeout argument to the search method in order to avoid long-running search queries.
34 | 
35 | ### Jupyter Notebook Quickstart
36 | Jupyter Notebook examples can be found in /examples. In order to run these examples, you need to install the
37 | optional dependencies.
38 | 
39 | ```shell
40 | pip install tagreader[notebooks]
41 | ```
42 | 
43 | The quickstart Jupyter Notebook can be found [here](https://github.com/equinor/tagreader-python/blob/main/examples/quickstart.ipynb)
44 | 
45 | For more details, see the [Tagreader Docs](https://equinor.github.io/tagreader-python/).
46 | 
47 | ## Documentation
48 | The full documentation can be found in [Tagreader Docs](https://equinor.github.io/tagreader-python/)
49 | 
50 | ## Contribute
51 | To starting contributing, please see [Tagreader Docs - Contribute](https://equinor.github.io/tagreader-python/docs/contribute/overview)
52 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security
 2 | 
 3 | If you discover a security vulnerability in this project, please follow these steps to responsibly disclose it:
 4 | 
 5 | 1. **Do not** create a public GitHub issue for the vulnerability.
 6 | 2. Follow our guideline for Responsible Disclosure Policy at [https://www.equinor.com/about-us/csirt](https://www.equinor.com/about-us/csirt) to report the issue
 7 | 
 8 | The following information will help us triage your report more quickly:
 9 | 
10 | - Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
11 | - Full paths of source file(s) related to the manifestation of the issue
12 | - The location of the affected source code (tag/branch/commit or direct URL)
13 | - Any special configuration required to reproduce the issue
14 | - Step-by-step instructions to reproduce the issue
15 | - Proof-of-concept or exploit code (if possible)
16 | - Impact of the issue, including how an attacker might exploit the issue
17 | 
18 | We prefer all communications to be in English.
19 | 


--------------------------------------------------------------------------------
/azure-pipelines.yml:
--------------------------------------------------------------------------------
 1 | trigger:
 2 | - master
 3 | 
 4 | pool:
 5 |   name: default
 6 | 
 7 | strategy:
 8 |   matrix:
 9 |     Python38:
10 |       python.version: '3.8'
11 |     Python39:
12 |       python.version: '3.9'
13 |     Python310:
14 |       python.version: '3.10'
15 |     Python311:
16 |       python.version: '3.11'
17 | 
18 | steps:
19 | - task: UsePythonVersion@0
20 |   inputs:
21 |     versionSpec: '$(python.version)'
22 |   displayName: 'Use Python $(python.version)'
23 | 
24 | - script: |
25 |     python -m pip install --upgrade pip pipx
26 |     pipx install poetry
27 |     poetry export -f requirements.txt --output requirements.txt
28 |     pip install -r requirements.txt
29 |     pip install pytest pytest-azurepipelines pytest-cov
30 |   displayName: 'Install dependencies'
31 |   env:
32 |     HTTPS_PROXY: $(var_http_proxy)
33 | 
34 | - script: |
35 |     pytest --junitxml=junit/test-results.xml --cov=tagreader --cov-report=xml
36 |   displayName: 'Run tests'
37 | 
38 | - task: PublishTestResults@2
39 |   condition: succeededOrFailed()
40 |   inputs:
41 |     testResultsFiles: '**/test-*.xml'
42 |     testRunTitle: 'Publish test results for Python $(python.version)'
43 | 
44 | - task: PublishCodeCoverageResults@1
45 |   inputs:
46 |     codeCoverageTool: Cobertura
47 |     summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
48 | 


--------------------------------------------------------------------------------
/docs/manual.md:
--------------------------------------------------------------------------------
  1 | # Tagreader-python <!-- omit in toc -->
  2 | 
  3 | Tagreader is a Python package for reading trend data from the OSIsoft PI and AspenTech InfoPlus.21 IMS systems. It can communicate with PI Web API, and with IP.21 using Process Data REST Web API.
  4 | 
  5 | Tagreader is intended to be easy to use, and present the same interface to the user regardless of IMS system and connection method.
  6 | 
  7 | # Index <!-- omit in toc -->
  8 | 
  9 | - [Requirements](#requirements)
 10 | - [Before getting started](#before-getting-started)
 11 | - [Installation](#installation)
 12 |   - [Adding host certificates](#adding-host-certificates)
 13 |     - [For Equinor users](#for-equinor-users)
 14 |     - [For non-Equinor users](#for-non-equinor-users)
 15 | - [Importing the module](#importing-the-module)
 16 | - [IMS types](#ims-types)
 17 | - [Listing available data sources](#listing-available-data-sources)
 18 | - [The Client](#the-client)
 19 |   - [Creating a client](#creating-a-client)
 20 |   - [Connecting to data source](#connecting-to-data-source)
 21 | - [Searching for tags](#searching-for-tags)
 22 | - [Reading data](#reading-data)
 23 |   - [Selecting what to read](#selecting-what-to-read)
 24 |   - [Status information](#status-information)
 25 |   - [Caching results](#caching-results)
 26 |   - [Time zones](#time-zones)
 27 | - [Fetching metadata](#fetching-metadata)
 28 |   - [get_units()](#getunits)
 29 |   - [get_description()](#getdescription)
 30 | - [Performing raw queries](#performing-raw-queries)
 31 | 
 32 | # Requirements
 33 | 
 34 | Python >= 3.8 with the following packages:
 35 | 
 36 |   + pandas >= 1.0.0
 37 |   + diskcache
 38 |   + requests
 39 |   + requests_kerberos
 40 | 
 41 | # Before getting started
 42 | 
 43 | It is highly recommended to go through the [quickstart](../examples/quickstart.ipynb) example. It contains references to relevant sections in this manual.
 44 | 
 45 | # Installation
 46 | 
 47 | To install and/or upgrade:
 48 | 
 49 | ```
 50 | pip install --upgrade tagreader
 51 | ```
 52 | 
 53 | ## Adding host certificates
 54 | 
 55 | ### For Equinor users
 56 | 
 57 | ***Note**: Since v2.7.0 the procedure described below will be automatically performed on Equinor hosts when importing the tagreader module. It should therefore no longer be necessary to perform this step manually.*
 58 | 
 59 | The Web APIs are queried with the `requests` package. `requests` does not utilize the system certificate store, but instead relies on the `certifi` bundle. In order to avoid SSL verification errors, we need to either turn off SSL verification (optional input argument `verifySSL=False` for relevant function calls) or, preferably, add the certificate to the `certifi` bundle. To do this, simply activate the virtual environment where you installed `tagreader`, and run the following snippet:
 60 | 
 61 | ``` python
 62 | from tagreader.utils import add_equinor_root_certificate
 63 |     add_equinor_root_certificate()
 64 | ```
 65 | 
 66 | The output should inform you that the certificate was successfully added. This needs to be repeated whenever certifi is upgraded in your python virtual environment. It is safe to run more than once: If the function detects that the certificate has already been added to your current certifi installation, the certificate will not be duplicated.
 67 | 
 68 | ### For non-Equinor users
 69 | 
 70 | If you run info SSL verification errors and prefer to not set `verifySSL=False` , you can try the procedure outlined [here](https://incognitjoe.github.io/adding-certs-to-requests.html).
 71 | 
 72 | # Importing the module
 73 | 
 74 | The module is imported with
 75 | 
 76 | ``` python
 77 | import tagreader
 78 | ```
 79 | 
 80 | # IMS types
 81 | 
 82 | Tagreader supports connecting to PI and IP.21 servers using Web API interfaces. When calling certain methods, the user will need to tell tagreader which system and which connection method to use. This input argument is called `imstype` , and can be one of the following case-insensitive strings:
 83 | 
 84 | * `piwebapi` : For connecting to OSISoft PI Web API
 85 | * `aspenone` : For connecting to AspenTech Process Data REST Web API
 86 | 
 87 | # Listing available data sources
 88 | 
 89 | The method `tagreader.list_sources()` can query for available PI and IP.21 servers available through Web API. Input arguments:
 90 | 
 91 | * `imstype` (optional) : The name of the [IMS type](#ims-types) to query. Valid values: `piwebapi` and `aspenone`.
 92 | 
 93 | The following input arguments are only relevant when calling `list_sources()` with a Web API `imstype` ( `piwebapi` or `aspenone` ):
 94 | 
 95 | * `url` (optional): Path to server root, e.g. _"https:<span>//aspenone/ProcessData/AtProcessDataREST.dll"_ or _"https:<span>//piwebapi/piwebapi"_. **Default**: Path to Equinor server corresponding to selected `imstype` if `imstype` is `piwebapi` or `aspenone` .
 96 | * `verifySSL` (optional): Whether to verify SSL certificate sent from server. **Default**: `True`.
 97 | * `auth` (optional): Auth object to pass to the server for authentication. **Default**: Kerberos-based auth objects that work with Equinor servers. If not connecting to an Equinor server, you may have to create your own auth.
 98 | 
 99 | **Example:**
100 | 
101 | ``` python
102 | from tagreader import list_sources
103 | list_sources("aspenone")
104 | list_sources("piwebapi")
105 | ```
106 | 
107 | When called with `imstype` set to `piwebapi` or `aspenone`, `list_sources()` will connect to the web server URL and query for the available list of data sources. This list is normally the complete set of data sources available on the server, and does not indicate whether the user is authorized to query the source or not.
108 | 
109 | When querying Equinor Web API for data sources, `list_sources()` should require no input argument except `imstype="piwebapi"` or `imstype="aspenone"`. For non-Equinor servers, `url` will need to be specified, as may `auth` and `verifySSL` .
110 | 
111 | # The Client
112 | 
113 | The client presents the interface for communicating with the data source to the user. The interface shall be as unified as possible, regardless of the IMS type that is used. A handler object specifically designed for each IMS type is attached to the client when the client is created. The handler is responsible for handling the communication and data interpretation between the server and the client object.
114 | 
115 | ## Creating a client
116 | 
117 | A connection to a data source is prepared by creating an instance of `tagreader.IMSClient` with the following input arguments:
118 | 
119 | * `datasource` : Name of data source
120 | * `imstype` (optional): The name of the [IMS type](#ims-types) to query. Indicates the type of data source that is requested, and therefore determines which handler type to use. Valid values are `piwebapi` and `aspenone`. If not provided it will search the available sources and find the type.
121 | * `tz` (optional): Time zone naive time stamps will be interpreted as belonging to this time zone. Similarly, the returned data points will be localized to this time zone. **Default**: _"Europe/Oslo"_.
122 | 
123 | The following input arguments can be used when connecting to either `piwebapi` or to `aspenone`. None of these should be necessary to supply when connecting to Equinor servers.
124 | 
125 | * `url` (optional): Path to server root, e.g. _"https:<span>//aspenone/ProcessData/AtProcessDataREST.dll"_ or _"https:<span>//piwebapi/piwebapi"_. **Default**: Path to Equinor server corresponding to selected `imstype` .
126 | * `verifySSL` (optional): Whether to verify SSL certificate sent from server. **Default**: `True`.
127 | * `auth` (optional): Auth object to pass to the server for authentication. **Default**: Kerberos-based auth object that works with Equinor servers.
128 | 
129 | ## Connecting to data source
130 | 
131 | After creating the client as described above, connect to the server with the `connect()` method.
132 | 
133 | **Example**
134 | 
135 | Connecting to the PINO PI data source using PI webapi:
136 | 
137 | ``` python
138 | c = tagreader.IMSClient("PINO")
139 | ```
140 | 
141 | Connecting to the Peregrino IP.21 data source using AspenTech Process Data REST Web API, specifying that all naive time stamps as well as the returned data shall use Rio local time, and using the local endpoint in Brazil:
142 | 
143 | ``` python
144 | c = tagreader.IMSClient(datasource="PER",
145 |                         imstype="aspenone",
146 |                         tz="Brazil/East",
147 |                         url="https://aspenone-per.equinor.com/ProcessExplorer/ProcessData/AtProcessDataREST.dll")
148 | c.connect()
149 | ```
150 | 
151 | Connecting to some other AspenTech Web API URL using NTLM authentication instead of default Kerberos and ignoring the server's host certificate:
152 | 
153 | ``` python
154 | import getpass
155 | from requests_ntlm import HttpNtlmAuth
156 | user = "mydomain\\" + getpass.getuser()
157 | pwd = getpass.getpass()
158 | auth = HttpNtlmAuth(user, pwd)
159 | c = tagreader.IMSClient(datasource="myplant",
160 |                         url="https://api.mycompany.com/aspenone",
161 |                         imstype="aspenone",
162 |                         auth=auth,
163 |                         verifySSL=False)
164 | c.connect()
165 | ```
166 | 
167 | # Searching for tags
168 | 
169 | The client method `search()` can be used to search for tags using either tag name, tag description or both.
170 | 
171 | Supply at least one of the following arguments:
172 | 
173 | * `tag` : Name of tag
174 | * `desc` : Description of tag
175 | 
176 | If both arguments are provided, the both must match.
177 | 
178 | `*` can be used as wildcard.
179 | 
180 | **Examples**
181 | 
182 | ``` python
183 | c = tagreader.IMSClient("PINO")
184 | c.connect()
185 | c.search("cd*158")
186 | c.search(desc="*reactor*")
187 | c.search(tag="BA:*", desc="*Temperature*")
188 | ```
189 | 
190 | # Reading data
191 | 
192 | Data is read by calling the client method `read()` with the following input arguments:
193 | 
194 | * `tags` : List of tagnames. Wildcards are not allowed.
195 | 
196 |   Tags with maps (relevant for some InfoPlus.21 servers) can be on the form `'tag;map'` , e.g. `'109-HIC005;CS A_AUTO'` .
197 | 
198 | * `start_time` : Start of time period.
199 | * `end_time` : End of time period.
200 | 
201 |   Both `start_time` and `end_time` can be either datetime object or string. Strings are interpreted by the [Timestamp](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html) method from Pandas. Both timestamps can be left out when `read_type = ReaderType.SNAPSHOT` . However, when using either of the Web APIs, `end_time` provides the time at which the snapshot is taken.
202 | 
203 | * `ts` : The interval between samples when querying interpolated or aggregated data. Ignored and can be left out when `read_type = ReaderType.SNAPSHOT` . **Default** 60 seconds.
204 | * `read_type` (optional): What kind of data to read. More info immediately below. **Default** Interpolated.
205 | * `get_status` (optonal): When set to `True` will fetch status information in addition to values. **Default** `False`.
206 | 
207 | ## Selecting what to read
208 | 
209 | By specifying the optional parameter `read_type` to `read()` , it is possible to specify what kind of data should be returned. The default query method is interpolated. All valid values for `read_type` are defined in the `utils.ReaderType` class (mirrored for convenience as `tagreader.ReaderType` ), although not all are currently implemented. Below is the list of implemented read types.
210 | 
211 | * `INT` : The raw data points are interpolated so that one new data point is generated at each step of length `ts` starting at `start_time` and ending at or less than `ts` seconds before `end_time` .
212 | * The following aggregated read types perform a weighted calculation of the raw data within each interval, using time-weighted calculations where applicable. Returned timestamps are anchored at the beginning of each interval. For example, for a 60-second interval from 08:11:00 to 08:12:00, the timestamp will be 08:11:00.
213 |   + `MIN` : The minimum value.
214 |   + `MAX` : The maximum value.
215 |   + `AVG` : The average value.
216 |   + `VAR` : The variance.
217 |   + `STD` : The standard deviation.
218 |   + `RNG` : The range (max-min).
219 | * `RAW` : Returns actual data points stored in the database.
220 | * `SNAPSHOT` : Returns the last recorded value. Only one tag can be read at a time. When using either of the Web API based handlers, providing `end_time` is possible in which case a snapshot at the specific time is returned.
221 | 
222 | **Examples**
223 | 
224 | Read interpolated data for the provided tag with 3-minute intervals between the two time stamps:
225 | 
226 | ``` python
227 | import tagreader
228 | c = tagreader.IMSClient("PINO")
229 | c.connect()
230 | df = c.read(['BA:ACTIVE.1'], '05-Jan-2020 08:00:00', '05/01/20 11:30am', 180)
231 | 
232 | ```
233 | 
234 | Read the average value for the two provided tags within each 3-minute interval between the two time stamps:
235 | 
236 | ``` python
237 | df = c.read(['BA:CONC.1'], '05-Jan-2020 08:00:00', '05/01/20 11:30am', 180, read_type=tagreader.ReaderType.AVG)
238 | ```
239 | 
240 | ## Status information
241 | 
242 | The optional parameter `get_status` was added to `IMSClient.read()` in release 2.6.0. If set to `True`, the resulting dataframe will be expanded with one additional column per tag. The column contains integer numbers that indicate the status, or quality, of the returned values.
243 | 
244 | In an effort to unify the status value for all IMS types, the following schema based on AspenTech was selected:
245 | 
246 | 0: Good
247 | 1: Suspect
248 | 2: Bad
249 | 4: Good/Modified
250 | 5: Suspect/Modified
251 | 6: Bad/Modified
252 | 
253 | The status value is obtained differently for the four IMS types:
254 | * Aspen Web API: Read directly from the `l` ("Level") field in the json output.
255 | * Aspen ODBC: Read directly from the `status` field in the table.
256 | * PI Web API: Calculated as `Questionable` + 2 * (1 - `Good`) + 4 * `Substituted`.
257 | * PI ODBC: Calculated as `questionable` + 2 * (`status` != 0) + 4 * `substituted`. `status` is 0 for good, positive or negative for various reasons for being bad.
258 | 
259 | For the two PI IMS types, it is assumed that `Questionable` is never `True` if `Good` is `False` or `status != 0`. This may be an incorrect assumption with resulting erroneous status value.
260 | 
261 | In summary, here is the resulting status value from tagreader for different combinations of status field values from the IMS types:
262 | 
263 | | tagreader | Aspen Web API | Aspen ODBC | PI Web API                                                        | PI ODBC                                                          |
264 | | :-------: | :-----------: | :--------: | ----------------------------------------------------------------- | ---------------------------------------------------------------- |
265 | |     0     |     l = 0     | status = 0 | Good = True<br /> Questionable = False<br /> Substituted = False  | status = 0<br /> questionable = False<br /> substituted = False  |
266 | |     1     |     l = 1     | status = 1 | Good = True<br /> Questionable = True<br /> Substituted = False   | status = 0<br /> questionable = True<br /> substituted = False   |
267 | |     2     |     l = 2     | status = 2 | Good = False<br /> Questionable = False<br /> Substituted = False | status != 0<br /> questionable = False<br /> substituted = False |
268 | |     4     |     l = 4     | status = 4 | Good = True<br /> Questionable = False<br /> Substituted = True   | status = 0<br /> questionable = False<br /> substituted = True   |
269 | |     5     |     l = 5     | status = 5 | Good = True<br /> Questionable = True<br /> Substituted = True    | status = 0<br /> questionable = True<br /> substituted = True    |
270 | |     6     |     l = 6     | status = 6 | Good = False<br /> Questionable = False<br /> Substituted = True  | status != 0<br /> questionable = False<br /> substituted = True  |
271 | 
272 | Please keep in mind when using `get_status`:
273 | * This is an experimental feature. It may work as intended, or it may result in erroneous status values in some cases. If that happens, please create an issue.
274 | * Both how fetching status is activated and how it is returned may be changed at a later time.
275 | 
276 | ## Caching results
277 | 
278 | By default, a cache-file using the SQLite file format will be attached to the client upon client creation. Whenever `IMSClient.read()` is called, the cache is queried for existing data. Any data that is not already in the cache will be queried from the data source. The cache can significantly speed up queries, and it is therefore recommended to always keep it enabled. The cache file will be created on use.
279 | 
280 | Data in the cache never expires. If the data for some reason becomes invalid, then the cache and data source will no longer produce the same data set. An existing cache file can safely be deleted at any time, at least as long as there is no ongoing query.
281 | 
282 | If, for any reason, you want to disable the cache, simply set it to `None` . This can be done at any time, but is normally done before connecting to the server, like this:
283 | 
284 | ``` python
285 | c = tagreader.IMSClient("PINO")
286 | c.cache = None
287 | c.connect()
288 | ```
289 | 
290 | Snapshots ( `read_type = ReaderType.SNAPSHOT` ) are of course never cached.
291 | 
292 | **Note**: Raw `read_type = ReaderType.RAW` data values are currently not cached pending a rewrite of the caching mechanisms.
293 | **Note**: Cache will be default off from version 5.
294 | 
295 | ## Time zones
296 | 
297 | It is important to understand how Tagreader uses and interprets time zones. Queries to the backend servers are always performed in UTC time, and return data is also always in UTC. However, it is usually not convenient to ensure all time stamps are in UTC time. The client and handlers therefore have functionality for converting between UTC and user-specified time zones.
298 | 
299 | There are two levels of determining which time zone input arguments should be interpreted as, and which time zone return data should be converted to:
300 | 
301 | 1. Time zone aware input arguments will use their corresponding time zone.
302 | 2. Time zone naive input arguments are assumed to have time zone as provided by the client.
303 | 
304 | The client-provided time zone can be specified with the optional `tz` argument (string, e.g. "*US/Central*") during client creation. If it is not specified, then the default value *Europe/Oslo* is used. Note that for the most common use case where Equinor employees want to fetch data from Norwegian assets and display them with Norwegian time stamps, nothing needs to be done.
305 | 
306 | *Note:* It is a good idea to update the `pytz` package rather frequently (at least twice per year) to ensure that time zone information is up to date. `pip install --upgrade pytz` .
307 | 
308 | **Example (advanced usage)**
309 | 
310 | An employee in Houston is contacted by her colleague in Brazil about an event that she needs to investigate. The colleague identified the time of the event at July 20th 2020 at 15:05:00 Rio time. The Houston employee wishes to extract interpolated data with 60-second intervals and display the data in her local time zone. She also wishes to send the data to her Norwegian colleague with datestamps in Norwegian time. One way of doing this is :
311 | 
312 | ``` python
313 | import tagreader
314 | from datetime import datetime, timedelta
315 | from dateutil import tz
316 | c = tagreader.IMSClient("PINO", "pi", tz="US/Central")  # Force output data to Houston time
317 | c.connect()
318 | tzinfo = tz.gettz("Brazil/East")  # Generate timezone object for Rio local time
319 | event_time = datetime(2020, 7, 20, 15, 5, 0, tzinfo=tzinfo)
320 | start_time = event_time - timedelta(minutes=30)
321 | end_time = event_time + timedelta(minutes=10)
322 | df = c.read(["BA:CONC.1"], start_time, end_time, ts=60)
323 | df_to_Norway = df.tz_convert("Europe/Oslo")  # Create a copy of the dataframe with Norwegian time stamps
324 | ```
325 | 
326 | # Fetching metadata
327 | 
328 | Two client methods have been created to fetch basic metadata for one or more tags.
329 | 
330 | ## get_units()
331 | 
332 | Fetches the engineering unit(s) for the tag(s) provided. The argument `tags` can be either a single tagname as string, or a list of tagnames.
333 | 
334 | ## get_description()
335 | 
336 | Fetches the description(s) for the tag(s) provided. The argument `tags` can be either a single tagname as string, or a list of tagnames.
337 | 
338 | **Example**:
339 | 
340 | ``` python
341 | tags = ["BA:ACTIVE.1", "BA:LEVEL.1", "BA:CONC.1"]
342 | units = c.get_units(tags)
343 | desc = c.get_descriptions(tags)
344 | tag = "BA:CONC.1"
345 | df[tag].plot(grid=True, title=desc[tag]).set_ylabel(units[tag])
346 | ```
347 | 


--------------------------------------------------------------------------------
/documentation/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dependencies
 2 | /node_modules
 3 | 
 4 | # Production
 5 | /build
 6 | 
 7 | # Generated files
 8 | .docusaurus
 9 | .cache-loader
10 | 
11 | # Misc
12 | .DS_Store
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 | 
18 | npm-debug.log*
19 | yarn-debug.log*
20 | yarn-error.log*
21 | 


--------------------------------------------------------------------------------
/documentation/.prettierrc.js:
--------------------------------------------------------------------------------
1 | // default config
2 | module.exports = {
3 |   trailingComma: 'es5',
4 |   tabWidth: 2,
5 |   semi: false,
6 |   singleQuote: true,
7 | }
8 | 


--------------------------------------------------------------------------------
/documentation/README.md:
--------------------------------------------------------------------------------
 1 | # Website
 2 | 
 3 | This website is built using [Docusaurus 2](https://docusaurus.io/), a modern static website generator.
 4 | 
 5 | ### Installation
 6 | 
 7 | ```
 8 | $ yarn
 9 | ```
10 | 
11 | ### Local Development
12 | 
13 | ```
14 | $ yarn start
15 | ```
16 | 
17 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server.
18 | 
19 | ### Build
20 | 
21 | ```
22 | $ yarn build
23 | ```
24 | 
25 | This command generates static content into the `build` directory and can be served using any static contents hosting service.
26 | 
27 | ### Deployment
28 | 
29 | Using SSH:
30 | 
31 | ```
32 | $ USE_SSH=true yarn deploy
33 | ```
34 | 
35 | Not using SSH:
36 | 
37 | ```
38 | $ GIT_USER=<Your GitHub username> yarn deploy
39 | ```
40 | 
41 | If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch.
42 | 


--------------------------------------------------------------------------------
/documentation/babel.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   presets: [require.resolve('@docusaurus/core/lib/babel/preset')],
3 | };
4 | 


--------------------------------------------------------------------------------
/documentation/docs/about/introduction.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 1
 3 | ---
 4 | # Introduction
 5 | 
 6 | Tagreader is a Python package for reading timeseries data from the OSIsoft PI and Aspen Infoplus.21
 7 | Information Manufacturing Systems (IMS) systems. It is intended to be easy to use, and present as similar interfaces
 8 | as possible to the backend historians.
 9 | 
10 | ## System requirements
11 | The only requirements are Python >= 3.8, with Windows, Linux or macOS.
12 | 
13 | ## Installation
14 | You can install tagreader directly into your project from pypi by using pip
15 | or another package manager.
16 | 
17 | ```shell"
18 | pip install tagreader
19 | ```
20 | 
21 | The following are required and will be installed:
22 | 
23 | * pandas
24 | * requests
25 | * requests-kerberos
26 | * certifi
27 | * diskcache
28 | 
29 | ## Usage
30 | Tagreader easy to use for both Equinor internal IMS services, and non-internal usage. For non-internal usage
31 | you simply need to provide the corresponding IMS service URLs and IMSType. See [data source](usage/data-source.md) for details.
32 | 
33 | ### Usage example
34 | ```python
35 | import tagreader
36 | c = tagreader.IMSClient("mysource", "aspenone")
37 | print(c.search("tag*"))
38 | df = c.read_tags(["tag1", "tag2"], "18.06.2020 08:00:00", "18.06.2020 09:00:00", 60)
39 | ```
40 | 
41 | ### Jupyter Notebook Quickstart
42 | Jupyter Notebook examples can be found in /examples. In order to run these examples, you need to install the
43 | optional dependencies.
44 | 
45 | ```shell
46 | pip install tagreader[notebooks]
47 | ```
48 | 
49 | The quickstart Jupyter Notebook can be found [here](https://github.com/equinor/tagreader-python/blob/main/examples/quickstart.ipynb)
50 | 
51 | For more details, see the [Usage section](/docs/about/usage/basic-usage).
52 | 
53 | ## Contribute
54 | As Tagreader is an open source project, all contributions are welcome. This includes code, bug reports, issues,
55 | feature requests, and documentation. The preferred way of submitting a contribution is to either create an issue on
56 | GitHub or to fork the project and make a pull request.
57 | 
58 | For starting contributing, see the [contribute section](../contribute/how-to-start-contributing.md)
59 | 
60 | 


--------------------------------------------------------------------------------
/documentation/docs/about/usage/_category_.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "label": "Usage",
 3 |   "position": 4,
 4 |   "collapsed": false,
 5 |   "link": {
 6 |     "type": "generated-index",
 7 |     "description": "Examples of tagreader usage."
 8 |   }
 9 | }
10 | 


--------------------------------------------------------------------------------
/documentation/docs/about/usage/basic-usage.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | sidebar_position: 1
  3 | ---
  4 | # Basic usage
  5 | 
  6 | The module is imported with
  7 | 
  8 | ``` python
  9 | import tagreader
 10 | ```
 11 | 
 12 | ## The Client
 13 | 
 14 | The client presents the interface for communicating with the data source to the user. The interface shall be as unified
 15 | as possible, regardless of the IMS type that is used. A handler object specifically designed for each IMS type is
 16 | attached to the client when the client is created. The handler is responsible for handling the communication and data
 17 | interpretation between the server and the client object.
 18 | 
 19 | :::info SSL verification
 20 | 
 21 | Equinor root certificates are automatically added when using an Equinor Managed computer, which allow SSL verification.
 22 | 
 23 | For non-Equinor users: If you run info SSL verification errors and prefer to not set `verifySSL=False` ,
 24 | you can try the procedure outlined [here](https://incognitjoe.github.io/adding-certs-to-requests.html).
 25 | :::
 26 | 
 27 | :::info ODBC support
 28 | Tagreader as of version 5 does no longer support ODBC clients, which has been deprecated in favor of REST services.
 29 | To use ODBC, please refer to [Tagreader v4 on PyPI](https://pypi.org/project/tagreader/#history).
 30 | Versioned documentation is available in the source code on [GitHub Releases](https://github.com/equinor/tagreader-python/releases).
 31 | 
 32 | Use at your own discretion.
 33 | :::
 34 | 
 35 | ## Creating a client
 36 | 
 37 | A connection to a data source is prepared by creating an instance of `tagreader.IMSClient` with the following input
 38 | arguments:
 39 | 
 40 | * `datasource` : Name of data source
 41 | * `imstype` : The name of the [IMS type](/docs/about/usage/data-source) to query. Indicates the type of data source
 42 | that is requested, and therefore determines which handler type to use. Valid values are
 43 | `piwebapi` and `aspenone`.
 44 | 
 45 | * `tz` (optional): Time zone naive time stamps will be interpreted as belonging to this time zone. Similarly,
 46 | the returned data points will be localized to this time zone. **Default**: _"Europe/Oslo"_.
 47 | 
 48 | The following input arguments can be used when connecting to either `piwebapi` or to `aspenone`. None of these
 49 | should be necessary to supply when connecting to Equinor servers.
 50 | 
 51 | * `url` (optional): Path to server root, e.g. _"https://aspenone/ProcessData/AtProcessDataREST.dll"_
 52 | or _"https://piwebapi/piwebapi"_. **Default**: Path to Equinor server corresponding to selected `imstype`.
 53 | * `verifySSL` (optional): Whether to verify SSL certificate sent from server. **Default**: `True`.
 54 | * `auth` (optional): Auth object to pass to the server for authentication. **Default**: Kerberos-based auth object
 55 | that works with Equinor servers.
 56 | * `cache` (optional): [Cache](caching.md) data locally in order to avoid re-reading the same data multiple times.
 57 | 
 58 | ## Connecting to data source
 59 | 
 60 | After creating the client as described above, connect to the server with the `connect()` method.
 61 | 
 62 | **Example**
 63 | 
 64 | Connecting to the PINO PI data source using REST Web API:
 65 | 
 66 | ``` python
 67 | c = tagreader.IMSClient("PINO", "piwebapi")
 68 | c.connect()
 69 | ```
 70 | 
 71 | Connecting to the Peregrino IP.21 data source using AspenTech Process Data REST Web API, specifying that all naive time
 72 | stamps as well as the returned data shall use Rio local time, and using the local endpoint in Brazil:
 73 | 
 74 | ``` python
 75 | c = tagreader.IMSClient(datasource="PER",
 76 |                         imstype="aspenone",
 77 |                         tz="Brazil/East",
 78 |                         url="https://aspenone-per.equinor.com/ProcessExplorer/ProcessData/AtProcessDataREST.dll")
 79 | c.connect()
 80 | ```
 81 | 
 82 | Connecting to some other AspenTech Web API URL using NTLM authentication instead of default Kerberos and ignoring the
 83 | server's host certificate:
 84 | 
 85 | ``` python
 86 | import getpass
 87 | from requests_ntlm import HttpNtlmAuth
 88 | user = "mydomain\\" + getpass.getuser()
 89 | pwd = getpass.getpass()
 90 | auth = HttpNtlmAuth(user, pwd)
 91 | c = tagreader.IMSClient(datasource="myplant",
 92 |                         url="https://api.mycompany.com/aspenone",
 93 |                         imstype="aspenone",
 94 |                         auth=auth,
 95 |                         verifySSL=False)
 96 | c.connect()
 97 | ```
 98 | 
 99 | ## Searching for tags
100 | 
101 | The client method `search()` can be used to search for tags using either tag name, tag description or both.
102 | 
103 | Supply at least one of the following arguments:
104 | 
105 | * `tag` : Name of tag
106 | * `desc` : Description of tag
107 | 
108 | If both arguments are provided, the both must match.
109 | 
110 | `*` can be used as wildcard.
111 | 
112 | **Examples**
113 | 
114 | ``` python
115 | c = tagreader.IMSClient("PINO", "piwebapi")
116 | c.connect()
117 | c.search("cd*158")
118 | c.search(desc="*reactor*")
119 | c.search(tag="BA:*", desc="*Temperature*")
120 | ```
121 | 
122 | ## Reading data
123 | 
124 | Data is read by calling the client method `read()` with the following input arguments:
125 | 
126 | * `tags` : List of tagnames. Wildcards are not allowed.
127 | 
128 |   Tags with maps (relevant for some InfoPlus.21 servers) can be on the form `'tag;map'` , e.g. `'109-HIC005;CS A_AUTO'`.
129 | 
130 | * `start_time` : Start of time period.
131 | * `end_time` : End of time period.
132 | 
133 |   Both `start_time` and `end_time` can be either datetime object or string. Strings are interpreted by the
134 | * [Timestamp](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html) method from Pandas.
135 | Both timestamps can be left out when `read_type = ReaderType.SNAPSHOT` . However, when using either of the Web APIs, `end_time` provides the time at which the snapshot is taken.
136 | 
137 | * `ts` : The interval between samples when querying interpolated or aggregated data. Ignored and can be left out when
138 | `read_type = ReaderType.SNAPSHOT` . **Default** 60 seconds.
139 | * `read_type` (optional): What kind of data to read. More info immediately below. **Default** Interpolated.
140 | * `get_status` (optonal): When set to `True` will fetch status information in addition to values. **Default** `False`.
141 | 
142 | ## Selecting what to read
143 | 
144 | By specifying the optional parameter `read_type` to `read()` , it is possible to specify what kind of data should be
145 | returned. The default query method is interpolated. All valid values for `read_type` are defined in the
146 | `utils.ReaderType` class (mirrored for convenience as `tagreader.ReaderType` ), although not all are currently
147 | implemented. Below is the list of implemented read types.
148 | 
149 | * `INT` : The raw data points are interpolated so that one new data point is generated at each step of length `ts`
150 | * starting at `start_time` and ending at or less than `ts` seconds before `end_time` .
151 | * The following aggregated read types perform a weighted calculation of the raw data within each interval.
152 | Where relevant, time-weighted calculations are used. Returned time stamps are anchored at the beginning of each
153 | interval. So for the 60 seconds long interval between 08:11:00 and 08:12:00, the time stamp will be 08:11:00.
154 |   + `MIN` : The minimum value.
155 |   + `MAX` : The maximum value.
156 |   + `AVG` : The average value.
157 |   + `VAR` : The variance.
158 |   + `STD` : The standard deviation.
159 |   + `RNG` : The range (max-min).
160 | * `RAW` : Returns actual data points stored in the database.
161 | * `SNAPSHOT` : Returns the last recorded value. Only one tag can be read at a time. When using either of the Web API
162 | based handlers, providing `end_time` is possible in which case a snapshot at the specific time is returned.
163 | 
164 | **Examples**
165 | 
166 | Read interpolated data for the provided tag with 3-minute intervals between the two time stamps:
167 | 
168 | ``` python
169 | c = tagreader.IMSClient("PINO", "piwebapi")
170 | c.connect()
171 | df = c.read(['BA:ACTIVE.1'], '05-Jan-2020 08:00:00', '05/01/20 11:30am', 180)
172 | 
173 | ```
174 | 
175 | Read the average value for the two provided tags within each 3-minute interval between the two time stamps:
176 | 
177 | ``` python
178 | df = c.read(['BA:CONC.1'], '05-Jan-2020 08:00:00', '05/01/20 11:30am', 180, read_type=tagreader.ReaderType.AVG)
179 | ```
180 | 
181 | ## Status information
182 | 
183 | The optional parameter `get_status` was added to `IMSClient.read()` in release 2.6.0. If set to `True`, the resulting
184 | dataframe will be expanded with one additional column per tag. The column contains integer numbers that indicate the
185 | status, or quality, of the returned values.
186 | 
187 | In an effort to unify the status value for all IMS types, the following schema based on AspenTech was selected:
188 | 
189 | 0: Good
190 | 1: Suspect
191 | 2: Bad
192 | 4: Good/Modified
193 | 5: Suspect/Modified
194 | 6: Bad/Modified
195 | 
196 | The status value is obtained differently for the four IMS types:
197 | * Aspen Web API: Read directly from the `l` ("Level") field in the json output.
198 | * PI Web API: Calculated as `Questionable` + 2 * (1 - `Good`) + 4 * `Substituted`.
199 | negative for various reasons for being bad.
200 | 
201 | For the two PI IMS types, it is assumed that `Questionable` is never `True` if `Good` is `False` or `status != 0`.
202 | This may be an incorrect assumption with resulting erroneous status value.
203 | 
204 | In summary, here is the resulting status value from tagreader for different combinations of status field values from
205 | the IMS types:
206 | 
207 | | tagreader | Aspen Web API | PI Web API                                                         |
208 | |:---------:|:-------------:|:-------------------------------------------------------------------|
209 | |     0     |     l = 0     | Good = True<br /> Questionable = False<br /> Substituted = False   |
210 | |     1     |     l = 1     | Good = True<br /> Questionable = True<br /> Substituted = False    |
211 | |     2     |     l = 2     | Good = False<br /> Questionable = False<br /> Substituted = False  |
212 | |     4     |     l = 4     | Good = True<br /> Questionable = False<br /> Substituted = True    |
213 | |     5     |     l = 5     | Good = True<br /> Questionable = True<br /> Substituted = True     |
214 | |     6     |     l = 6     | Good = False<br /> Questionable = False<br /> Substituted = True   |
215 | 
216 | Please keep in mind when using `get_status`:
217 | * This is an experimental feature. It may work as intended, or it may result in erroneous status values in some cases.
218 | If that happens, please create an issue.
219 | * Both how fetching status is activated and how it is returned may be changed at a later time.
220 | 


--------------------------------------------------------------------------------
/documentation/docs/about/usage/caching.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 4
 3 | ---
 4 | 
 5 | # Caching results
 6 | 
 7 | It is possible to cache data locally using SQLite-files that will be attached to the client upon client creation. Whenever
 8 | `IMSClient.read()` is called, the cache is queried for existing data. Any data that is not already in the cache will be
 9 | queried from the data source. The cache can significantly speed up queries, and it is therefore recommended to always
10 | have it enabled. The cache file will be created on use.
11 | 
12 | Data in the cache never expires. If the data for some reason becomes invalid, then the cache and data source will no
13 | longer produce the same data set. An existing cache file can safely be deleted at any time, at least as long as there
14 | is no ongoing query.
15 | 
16 | If, for any reason, you want to disable the cache, simply set it to the default value `None`.
17 | 
18 | ``` python
19 | c = tagreader.IMSClient("PINO", "pi", cache=None)
20 | c.connect()
21 | ```
22 | 
23 | If you want to cache data, we recommend using the provided SmartCache like this:
24 | 
25 | ``` python
26 | from pathlib import Path
27 | from tagreader.cache import SmartCache
28 | 
29 | c = tagreader.IMSClient("PINO", "pi", cache=SmartCache(path=Path(".cache"))
30 | c.connect()
31 | ```
32 | 
33 | Snapshots ( `read_type = ReaderType.SNAPSHOT` ) are of course never cached.
34 | 
35 | **Note**: Raw `read_type = ReaderType.RAW` data values are currently not cached pending a rewrite of the caching
36 | mechanisms.


--------------------------------------------------------------------------------
/documentation/docs/about/usage/data-source.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 2
 3 | ---
 4 | 
 5 | # Data sources
 6 | 
 7 | Tagreader supports connecting to PI and IP.21 servers using a Web API interfaces. When calling certain
 8 | methods, the user will need to tell tagreader which system and which connection method to use. This input argument is
 9 | called `imstype` , and can be one of the following case-insensitive strings:
10 | 
11 | * `piwebapi` : For connecting to OSISoft PI Web API
12 | * `aspenone` : For connecting to AspenTech Process Data REST Web API
13 | 
14 | ## Listing available data sources
15 | 
16 | The method `tagreader.list_sources()` can query for available PI and IP.21 servers available. Input arguments:
17 | 
18 | * `imstype` : The name of the IMS type to query. Valid values: `piwebapi` and `aspenone`.
19 | 
20 | The following input arguments are only relevant when calling `list_sources()` with a Web API `imstype` ( `piwebapi` or
21 | `aspenone` ):
22 | 
23 | * `url` (optional): Path to server root, e.g. _"https://aspenone/ProcessData/AtProcessDataREST.dll"_ or
24 | _"https://piwebapi/piwebapi"_. **Default**: Path to Equinor server corresponding to selected `imstype` if
25 | * `imstype` is `piwebapi` or `aspenone` .
26 | * `verifySSL` (optional): Whether to verify SSL certificate sent from server. **Default**: `True`.
27 | * `auth` (optional): Auth object to pass to the server for authentication. **Default**: Kerberos-based auth objects
28 | that work with Equinor servers. If not connecting to an Equinor server, you may have to create your own auth.
29 | 
30 | When called with `imstype` set to `pi` , `list_sources()` will search the registry at
31 | *HKEY_CURRENT_USER\Software\AspenTech\ADSA\Caches\AspenADSA\{username}* for available PI servers. Similarly,
32 | if called with `imstype` set to `ip21` , *HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\PISystem\PI-SDK* will be searched
33 | for available IP.21 servers. Servers found through the registry are normally servers to which the user is authorized,
34 | and does not necessarily include all available data sources in the organization.
35 | 
36 | **Example:**
37 | 
38 | ``` python
39 | from tagreader import list_sources
40 | list_sources("ip21")
41 | list_sources("piwebapi")
42 | ```
43 | 
44 | When called with `imstype` set to `piwebapi` or `aspenone` , `list_sources()` will connect to the web server URL and
45 | query for the available list of data sources. This list is normally the complete set of data sources available on the
46 | server, and does not indicate whether the user is authorized to query the source or not.
47 | 
48 | When querying Equinor Web API for data sources, `list_sources()` should require no input argument except
49 | `imstype="piwebapi"` or `imstype="aspenone"`. For non-Equinor servers, `url` will need to be specified, as may `auth`
50 | and `verifySSL` .


--------------------------------------------------------------------------------
/documentation/docs/about/usage/fetching-metadata.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 3
 3 | ---
 4 | 
 5 | # Fetching metadata
 6 | 
 7 | Two client methods have been created to fetch basic metadata for one or more tags.
 8 | 
 9 | ### get_units()
10 | 
11 | Fetches the engineering unit(s) for the tag(s) provided. The argument `tags` can be either a single tagname as string,
12 | or a list of tagnames.
13 | 
14 | ### get_description()
15 | 
16 | Fetches the description(s) for the tag(s) provided. The argument `tags` can be either a single tagname as string,
17 | or a list of tagnames.
18 | 
19 | **Example**:
20 | 
21 | ``` python
22 | tags = ["BA:ACTIVE.1", "BA:LEVEL.1", "BA:CONC.1"]
23 | units = c.get_units(tags)
24 | desc = c.get_descriptions(tags)
25 | tag = "BA:CONC.1"
26 | df[tag].plot(grid=True, title=desc[tag]).set_ylabel(units[tag])
27 | ```
28 | 
29 | 


--------------------------------------------------------------------------------
/documentation/docs/about/usage/time-zone.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 6
 3 | ---
 4 | 
 5 | # Time zones
 6 | 
 7 | It is important to understand how Tagreader uses and interprets time zones. Queries to the backend servers are always
 8 | performed in UTC time, and return data is also always in UTC. However, it is usually not convenient to ensure all time
 9 | stamps are in UTC time. The client and handlers therefore have functionality for converting between UTC and
10 | user-specified time zones.
11 | 
12 | There are two levels of determining which time zone input arguments should be interpreted as, and which time zone
13 | return data should be converted to:
14 | 
15 | 1. Time zone aware input arguments will use their corresponding time zone.
16 | 2. Time zone naive input arguments are assumed to have time zone as provided by the client.
17 | 
18 | The client-provided time zone can be specified with the optional `tz` argument (string, e.g. "*US/Central*") during
19 | client creation. If it is not specified, then the default value *Europe/Oslo* is used. Note that for the most common
20 | use case where Equinor employees want to fetch data from Norwegian assets and display them with Norwegian time stamps,
21 | nothing needs to be done.
22 | 
23 | *Note:* It is a good idea to update the `pytz` package rather frequently (at least twice per year) to ensure that time
24 | zone information is up-to-date. `pip install --upgrade pytz` .
25 | 
26 | **Example (advanced usage)**
27 | 
28 | An employee in Houston is contacted by her colleague in Brazil about an event that she needs to investigate.
29 | The colleague identified the time of the event at July 20th 2020 at 15:05:00 Rio time. The Houston employee wishes to
30 | extract interpolated data with 60-second intervals and display the data in her local time zone. She also wishes to send
31 | the data to her Norwegian colleague with datestamps in Norwegian time. One way of doing this is :
32 | 
33 | ``` python
34 | import tagreader
35 | from datetime import datetime, timedelta
36 | from dateutil import tz
37 | c = tagreader.IMSClient("PINO", "pi", tz="US/Central")  # Force output data to Houston time
38 | c.connect()
39 | tzinfo = tz.gettz("Brazil/East")  # Generate timezone object for Rio local time
40 | event_time = datetime(2020, 7, 20, 15, 5, 0, tzinfo=tzinfo)
41 | start_time = event_time - timedelta(minutes=30)
42 | end_time = event_time + timedelta(minutes=10)
43 | df = c.read(["BA:CONC.1"], start_time, end_time, ts=60)
44 | df_to_Norway = df.tz_convert("Europe/Oslo")  # Create a copy of the dataframe with Norwegian time stamps
45 | ```


--------------------------------------------------------------------------------
/documentation/docs/contribute/_category_.yaml:
--------------------------------------------------------------------------------
1 | position: 5
2 | collapsed: true
3 | 


--------------------------------------------------------------------------------
/documentation/docs/contribute/development-guide/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 |   "label": "Developer guide",
3 |   "position": 4,
4 |   "link": {
5 |     "type": "generated-index",
6 |     "description": "This section of the documentation lists instructions and guidelines on how to start developing"
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/documentation/docs/contribute/development-guide/publishing.md:
--------------------------------------------------------------------------------
1 | # Publishing
2 | TBD


--------------------------------------------------------------------------------
/documentation/docs/contribute/development-guide/setup.md:
--------------------------------------------------------------------------------
 1 | # Setup
 2 | 
 3 | ```mdx-code-block
 4 | import TabItem from '@theme/TabItem';
 5 | import Tabs from '@theme/Tabs';
 6 | ```
 7 | 
 8 | ## Prerequisites
 9 | 
10 | To work with the tagreader code you'll need to install:
11 | 
12 | Python >=3.8 with the following packages:
13 | 
14 | * pandas >= 1.0.0
15 | * requests
16 | * requests-kerberos
17 | * certifi >= 2023.5.7
18 | * diskcache
19 | * pyodbc (If using ODBC connection)
20 | 
21 | :::info  ODBC Connection
22 | If using ODBC connections, you must also install proprietary drivers for PI ODBC and/or Aspen IP.21 SQLPlus. These
23 | drivers are only available for Microsoft Windows. Pyodbc will therefore not be installed for non-Windows systems.
24 | :::
25 | 
26 | ## Pre-commit
27 | 
28 | When contributing to this project, pre-commits are necessary, as they run certain tests, sanitisers, and formatters.
29 | 
30 | The project provides a `.pre-commit-config.yaml` file that is used to set up git _pre-commit hooks_.
31 | 
32 | On commit locally, code is automatically formatted and checked for security vulnerabilities using pre-commit git hooks.
33 | 
34 | ### Installation
35 | 
36 | To initialize pre-commit in your local repository, run
37 | 
38 | ```shell
39 | pre-commit install
40 | ```
41 | 
42 | This tells pre-commit to run for this repository on every commit.
43 | 
44 | ### Usage
45 | 
46 | Pre-commit will run on every commit, but can also be run manually on all files:
47 | 
48 | ```shell
49 | pre-commit run --all-files
50 | ```
51 | 
52 | Pre-commit tests can be skipped on commits with `git commit --no-verify`.
53 | 
54 | :::caution
55 | If you have to skip the pre-commit tests, you're probably doing something you're not supposed to, and whoever commits after you might have to fix your "mistakes". Consider updating the pre-commit hooks if your code is non-compliant.
56 | :::
57 | 
58 | ### Install Poetry
59 | 
60 | Poetry is used to manage Python package dependencies.
61 | 
62 | ```shell
63 | $ pip install poetry
64 | ```
65 | 
66 | The installation instructions can be found [here](https://python-poetry.org/docs/#installation).
67 | 
68 | ### Install packages
69 | 
70 | ```shell
71 | $ poetry install
72 | ```
73 | 


--------------------------------------------------------------------------------
/documentation/docs/contribute/development-guide/testing.md:
--------------------------------------------------------------------------------
1 | # Testing
2 | 
3 | TBD


--------------------------------------------------------------------------------
/documentation/docs/contribute/development-guide/upgrading.md:
--------------------------------------------------------------------------------
1 | # Upgrading
2 | 
3 | TBD


--------------------------------------------------------------------------------
/documentation/docs/contribute/documentation.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Documentation
 3 | sidebar_position: 5
 4 | ---
 5 | # Documentation
 6 | 
 7 | This site was generated from the contents of your `documentation` folder using [Docusaurus,](https://docusaurus.io/) and we host it on GitHub Pages.
 8 | 
 9 | ## How it works
10 | 
11 | From Docusaurus own documentation:
12 | > Docusaurus is a static-site generator. It builds a single-page application with fast client-side navigation, leveraging the full power of React to make your site interactive. It provides out-of-the-box documentation features but can be used to create any kind of site (personal website, product, blog, marketing landing pages, etc).
13 | 
14 | While Docusaurus is rich on features, we use it mostly to host markdown pages. The main bulk of the documentation is located in `documentation/docs`.
15 | 
16 | ## Publishing
17 | 
18 | We are using the GitHub Action [`publish-docs.yaml`](https://github.com/equinor/awt/blob/main/.github/workflows/publish-docs.yaml) to build and publish the documentation website. This action is run every time someone pushes to the `main` branch.
19 | 
20 | This will check out the code, download the changelog from the `generate-changelog.yaml` action, and build the documentation. Then it will deploy the documentation (placed in the documentation/build/ folder) to GitHub Pages.
21 | 
22 | ## Initial settings
23 | 
24 | When deployed to GitHub Pages, you do need to configure your site under the settings. Pick the gh-pages branch and select either a private url or a public one. It will show you the site’s url, which should now contain your generated documentation site.
25 | 
26 | ## Assets
27 | 
28 | All assets files are places under `documentation/static`
29 | 


--------------------------------------------------------------------------------
/documentation/docs/contribute/ground-rules.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Ground rules
 3 | sidebar_position: 3
 4 | ---
 5 | # Ground rules
 6 | 
 7 | * Always make sure that the main branch is ready to deploy. If something break, we fix it as soon as possible.
 8 | 
 9 | ## Pull Requests
10 | 
11 | Please try to make your pull request easy to review for us.
12 | 
13 | * Make small pull requests. The smaller, the faster to review and the more likely it will be merged soon.
14 | 
15 | :::tip GitHub Tips
16 | When creating a Pull Request on GitHub, you can add Closes #&lt;Issue number>. When you merge the PR, the issue
17 | will automatically be closed.
18 | :::
19 | ## Git commit format
20 | 
21 | We use [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/#summary)
22 | 


--------------------------------------------------------------------------------
/documentation/docs/contribute/how-to-start-contributing.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Get started
 3 | sidebar_position: 2
 4 | ---
 5 | 
 6 | # How to start contributing
 7 | 
 8 | Welcome! We are glad that you want to contribute to our project! 💖
 9 | 
10 | This project accepts contributions via Github pull requests.
11 | 
12 | This document outlines the process to help get your contribution accepted.
13 | 
14 | There are many ways to contribute:
15 | 
16 | * Suggest [Features](https://github.com/equinor/awt/issues/new?assignees=&labels=type%3A+%3Abulb%3A+feature+request&template=feature-request.md&title=)
17 | * Suggest [Changes](https://github.com/equinor/awt/issues/new?assignees=&labels=type%3A+%3Awrench%3A+maintenance&template=code-maintenance.md&title=)
18 | * Report [Bugs](https://github.com/equinor/awt/issues/new?assignees=&labels=type%3A+%3Abug+bug&template=bug-report.md&title=)
19 | 
20 | You can start by looking through the current [Issues](https://github.com/equinor/awt/issues).
21 | 
22 | ## Workflow
23 | 
24 | 1. Create a new branch
25 | 2. Do work
26 | 3. Create a Pull Request
27 | 4. Review Pull Request until accepted
28 | 5. Rebase and merge on main
29 | 
30 | ## Code review process
31 | 
32 | The core team looks at pull requests on a regular basis. After feedback has been given we expect responses within three weeks. After three weeks we may close the pull request if it isn't showing any activity.


--------------------------------------------------------------------------------
/documentation/docs/contribute/overview.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Overview
 3 | sidebar_position: 1
 4 | ---
 5 | 
 6 | # Overview
 7 | 
 8 | ## Getting started
 9 | 
10 | For setting up a development environment, see the [development guide](development-guide/setup.md)
11 | 
12 | For starting contributing, see the [contribute section](how-to-start-contributing.md).
13 | 
14 | ## Project structure
15 | 
16 | Here’s how the app is organized.
17 | 
18 | ```
19 | ├── .github/ - GitHub Actions and more
20 | ├── tagreader/ - The source code
21 | ├── tests/ - Tests
22 | │── documentation/ - Documentation
23 | └── ...
24 | ```


--------------------------------------------------------------------------------
/documentation/docusaurus.config.js:
--------------------------------------------------------------------------------
  1 | // @ts-check
  2 | // Note: type annotations allow type checking and IDEs autocompletion
  3 | 
  4 | import { themes } from 'prism-react-renderer';
  5 | import simplePlantUML from '@akebifiky/remark-simple-plantuml'
  6 | import math from 'remark-math';
  7 | import katex from 'rehype-katex';
  8 | 
  9 | export default {
 10 |   title: 'Tagreader',
 11 |   tagline: 'Tagreader.',
 12 |   url: 'https://awt.app.radix.equinor.com/',
 13 |   baseUrl: '/tagreader-python/',
 14 |   onBrokenLinks: 'throw',
 15 |   onBrokenMarkdownLinks: 'warn',
 16 |   favicon: 'img/favicon.png',
 17 |   markdown: {
 18 |     mermaid: true
 19 |   },
 20 |   themes: [
 21 |     '@docusaurus/theme-mermaid'
 22 |   ],
 23 | 
 24 |   // GitHub pages deployment config.
 25 |   // If you aren't using GitHub pages, you don't need these.
 26 |   organizationName: 'equinor', // Usually your GitHub org/username.
 27 |   projectName: 'tagreader', // Usually your repo name.
 28 |   deploymentBranch: 'gh-pages',
 29 | 
 30 |   // Even if you don't use internalization, you can use this field to set useful
 31 |   // metadata like html lang. For example, if your site is Chinese, you may want
 32 |   // to replace "en" with "zh-Hans".
 33 |   i18n: {
 34 |     defaultLocale: 'en',
 35 |     locales: ['en'],
 36 |   },
 37 | 
 38 |   plugins: [
 39 | 
 40 |   ],
 41 | 
 42 |   presets: [
 43 |     [
 44 |       'classic',
 45 |       /** @type {import('@docusaurus/preset-classic').Options} */
 46 |       ({
 47 |         docs: {
 48 |           sidebarPath: require.resolve('./sidebars.js'),
 49 |           editUrl:
 50 |             'https://github.com/equinor/tagreader-python/tree/main/documentation/',
 51 |             remarkPlugins: [simplePlantUML, math],
 52 |             rehypePlugins: [katex],
 53 |         },
 54 |         blog: false,
 55 |         theme: {
 56 |           customCss: require.resolve('./src/css/custom.css'),
 57 |         },
 58 |       }),
 59 |     ],
 60 |   ],
 61 | 
 62 |   themeConfig:
 63 |     /** @type {import('@docusaurus/preset-classic').ThemeConfig} */
 64 |     ({
 65 |       navbar: {
 66 |         title: 'Tagreader',
 67 |         logo: {
 68 |           alt: 'Equinor Logo',
 69 |           src: 'img/logo.svg',
 70 |         },
 71 |         items: [
 72 |           {
 73 |             type: 'docSidebar',
 74 |             sidebarId: 'about',
 75 |             position: 'left',
 76 |             label: 'Docs',
 77 |           },
 78 |           {
 79 |             type: 'docSidebar',
 80 |             sidebarId: 'contribute',
 81 |             position: 'left',
 82 |             label: 'Contribute',
 83 |           },
 84 |           {
 85 |             href: 'https://github.com/equinor/tagreader-python',
 86 |             label: 'GitHub',
 87 |             position: 'right',
 88 |           },
 89 |         ],
 90 |       },
 91 |       footer: {
 92 |         style: 'dark',
 93 |         links: [
 94 |           {
 95 |             title: 'Docs',
 96 |             items: [
 97 |               {
 98 |                 label: 'Docs',
 99 |                 to: '/docs/about/introduction',
100 |               },
101 |                 {
102 |                 label: 'Contribute',
103 |                 to: '/docs/contribute/how-to-start-contributing',
104 |               },
105 |             ],
106 |           },
107 |           {
108 |             title: 'More',
109 |             items: [
110 |               {
111 |                 label: 'GitHub',
112 |                 href: 'https://github.com/equinor/tagreader',
113 |               },
114 |               {
115 |                 label: 'PyPi',
116 |                 href: 'https://pypi.org/project/tagreader/',
117 |               },
118 |             ],
119 |           },
120 |         ],
121 |         copyright: `Built with Docusaurus.`,
122 |       },
123 |       prism: {
124 |         theme: themes.github,
125 |         darkTheme: themes.dracula,
126 |       },
127 |       colorMode: {
128 |         defaultMode: 'dark',
129 |         disableSwitch: false,
130 |         respectPrefersColorScheme: true,
131 |       }
132 |     }),
133 |   stylesheets: [
134 |     {
135 |       href: 'https://cdn.jsdelivr.net/npm/katex@0.13.24/dist/katex.min.css',
136 |       type: 'text/css',
137 |       integrity:
138 |         'sha384-odtC+0UGzzFL/6PNoE8rX/SPcQDXBJ+uRepguP4QkPCm2LBxH3FA3y+fKSiJ+AmM',
139 |       crossorigin: 'anonymous',
140 |     },
141 |   ],
142 | }
143 | 


--------------------------------------------------------------------------------
/documentation/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "documentation",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "docusaurus": "docusaurus",
 7 |     "start": "docusaurus start",
 8 |     "build": "docusaurus build",
 9 |     "swizzle": "docusaurus swizzle",
10 |     "deploy": "docusaurus deploy",
11 |     "clear": "docusaurus clear",
12 |     "serve": "docusaurus serve",
13 |     "write-translations": "docusaurus write-translations",
14 |     "write-heading-ids": "docusaurus write-heading-ids",
15 |     "typecheck": "tsc"
16 |   },
17 |   "dependencies": {
18 |     "@akebifiky/remark-simple-plantuml": "^1.0.2",
19 |     "@docusaurus/core": "^3.6.3",
20 |     "@docusaurus/preset-classic": "^3.6.3",
21 |     "@docusaurus/theme-mermaid": "^3.6.3",
22 |     "@mdx-js/react": "^3.1.0",
23 |     "prism-react-renderer": "^2.4.1",
24 |     "react": "^18.3.1",
25 |     "react-dom": "^18.3.1",
26 |     "rehype-katex": "7.0.1",
27 |     "remark-math": "6.0.0"
28 |   },
29 |   "devDependencies": {
30 |     "@tsconfig/docusaurus": "^2.0.3",
31 |     "typescript": "^5.6.2"
32 |   },
33 |   "resolutions": {
34 |     "cookie": "0.7.0",
35 |     "path-to-regexp": "0.1.12",
36 |     "cross-spawn": "7.0.5",
37 |     "nanoid": "3.3.8",
38 |     "body-parser": "1.20.3",
39 |     "express": "4.20.0",
40 |     "micromatch": "4.0.8"
41 |   },
42 | 
43 |   "browserslist": {
44 |     "production": [
45 |       ">0.5%",
46 |       "not dead",
47 |       "not op_mini all"
48 |     ],
49 |     "development": [
50 |       "last 1 chrome version",
51 |       "last 1 firefox version",
52 |       "last 1 safari version"
53 |     ]
54 |   },
55 |   "engines": {
56 |     "node": ">=20.0"
57 |   },
58 |   "packageManager": "yarn@1.22.19+sha1.4ba7fc5c6e704fce2066ecbfb0b0d8976fe62447"
59 | }
60 | 


--------------------------------------------------------------------------------
/documentation/sidebars.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Creating a sidebar enables you to:
 3 |  - create an ordered group of docs
 4 |  - render a sidebar for each doc of that group
 5 |  - provide next/previous navigation
 6 | 
 7 |  The sidebars can be generated from the filesystem, or explicitly defined here.
 8 | 
 9 |  Create as many sidebars as you want.
10 |  */
11 | 
12 | // @ts-check
13 | 
14 | /** @type {import('@docusaurus/plugin-content-docs').SidebarsConfig} */
15 | const sidebars = {
16 |   // By default, Docusaurus generates a sidebar from the docs folder structure
17 |   about: [{type: 'autogenerated', dirName: 'about'}],
18 |   contribute: [{type: 'autogenerated', dirName: 'contribute'}],
19 | 
20 |   // But you can create a sidebar manually
21 |   /*
22 |   tutorialSidebar: [
23 |     'intro',
24 |     'hello',
25 |     {
26 |       type: 'category',
27 |       label: 'Tutorial',
28 |       items: ['tutorial-basics/create-a-document'],
29 |     },
30 |   ],
31 |    */
32 | };
33 | 
34 | module.exports = sidebars;
35 | 


--------------------------------------------------------------------------------
/documentation/src/css/custom.css:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Any CSS included here will be global. The classic template
 3 |  * bundles Infima by default. Infima is a CSS framework designed to
 4 |  * work well for content-centric websites.
 5 |  */
 6 | 
 7 | /* You can override the default Infima variables here. */
 8 | :root {
 9 |   --ifm-color-primary: #2e8555;
10 |   --ifm-color-primary-dark: #29784c;
11 |   --ifm-color-primary-darker: #277148;
12 |   --ifm-color-primary-darkest: #205d3b;
13 |   --ifm-color-primary-light: #33925d;
14 |   --ifm-color-primary-lighter: #359962;
15 |   --ifm-color-primary-lightest: #3cad6e;
16 |   --ifm-code-font-size: 95%;
17 |   --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1);
18 | }
19 | 
20 | /* For readability concerns, you should choose a lighter palette in dark mode. */
21 | [data-theme='dark'] {
22 |   --ifm-color-primary: #25c2a0;
23 |   --ifm-color-primary-dark: #21af90;
24 |   --ifm-color-primary-darker: #1fa588;
25 |   --ifm-color-primary-darkest: #1a8870;
26 |   --ifm-color-primary-light: #29d5b0;
27 |   --ifm-color-primary-lighter: #32d8b4;
28 |   --ifm-color-primary-lightest: #4fddbf;
29 |   --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3);
30 | }
31 | 


--------------------------------------------------------------------------------
/documentation/src/pages/index.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 | import { Redirect } from 'react-router-dom'
3 | 
4 | export default function Home(): JSX.Element {
5 |   return <Redirect to="docs/about/introduction" />
6 | }
7 | 


--------------------------------------------------------------------------------
/documentation/static/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/equinor/tagreader-python/9a735c05559657206abfe44993dcc9f70f12a8ff/documentation/static/.nojekyll


--------------------------------------------------------------------------------
/documentation/static/img/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/equinor/tagreader-python/9a735c05559657206abfe44993dcc9f70f12a8ff/documentation/static/img/favicon.png


--------------------------------------------------------------------------------
/documentation/static/img/logo.svg:
--------------------------------------------------------------------------------
1 | <svg width="50" height="48" viewBox="90 0 50 48" xmlns="http://www.w3.org/2000/svg">
2 | <path
3 |     fillRule="evenodd"
4 |     fill="#EB0037"
5 |     transform="translate(90 0)"
6 |     d="M8.33277 10.2045L8.33313 23.0959C8.33277 23.478 8.52438 23.831 8.85534 24.0216L20.0259 30.457C20.4997 30.73 21.1041 30.388 21.1044 29.8409V16.9499C21.1048 16.5677 20.9011 16.2147 20.5698 16.0238L9.39959 9.58875C8.92537 9.31573 8.33348 9.65771 8.33277 10.2045ZM42.7387 0.138079L26.6568 9.40318C26.1804 9.67762 25.8868 10.1856 25.8875 10.7356V29.2953C25.8882 30.083 26.7581 30.575 27.4403 30.1819L43.5226 20.9175C43.999 20.6427 44.2745 20.1347 44.2737 19.5847L44.2741 1.02502C44.2734 0.23726 43.4213 -0.254736 42.7387 0.138079ZM20.6778 39.6119L16.2086 42.1871C16.0764 42.2635 15.9946 42.4046 15.9946 42.5575L15.9943 47.7149C15.9943 47.9342 16.2367 48.0708 16.4262 47.9616L20.8954 45.3868C21.028 45.3107 21.1048 45.1693 21.1044 45.0168V39.8586C21.1044 39.64 20.8673 39.5031 20.6778 39.6119ZM18.0732 34.853L11.38 30.9799C11.1817 30.8655 10.9371 30.8655 10.7387 30.9799L4.04558 34.853C3.76154 35.0172 3.76154 35.4271 4.04558 35.5917L10.7387 39.464C10.9371 39.5792 11.1817 39.5792 11.38 39.464L18.0732 35.5917C18.3572 35.4271 18.3572 35.0172 18.0732 34.853ZM28.8484 35.5512L31.8224 37.2717C31.9984 37.3737 32.2156 37.3737 32.3919 37.2717L35.3655 35.5512C35.6179 35.4047 35.6179 35.0403 35.3655 34.8942L32.3919 33.1736C32.2156 33.0716 31.9984 33.0716 31.8224 33.1736L28.8484 34.8942C28.596 35.0403 28.596 35.4047 28.8484 35.5512ZM26.4563 39.6922L29.4332 41.4071C29.6095 41.5088 29.7183 41.6972 29.7183 41.9009L29.7147 45.3363C29.7143 45.6278 29.3987 45.8102 29.1459 45.6648L26.1691 43.9499C25.9927 43.8483 25.8879 43.6591 25.8879 43.4562L25.8875 40.0207C25.8875 39.7296 26.2035 39.5465 26.4563 39.6922Z"/>
7 | </svg>


--------------------------------------------------------------------------------
/documentation/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "@tsconfig/docusaurus/tsconfig.json",
3 |   "compilerOptions": {
4 |     "baseUrl": "."
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/examples/quickstart.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Quickstart\n",
  8 |     "This document provides a quick demonstration of the basic usage of tagreader. It will show you the steps from importing the package to fetching data and making a plot. Some cells contain links to more details that can be found in the [manual](../docs/manual.md).\n",
  9 |     "\n",
 10 |     "### Prerequisite\n",
 11 |     "In order to run this notebook, you need to install tagreader using \"pip install tagreader[notebooks]\". Please refer to the [README](../README.md) or the [docs](../docs/manual.md) for more details."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "Start by importing the package:"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import tagreader"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "If we don't know the name of the data source, we can check which PI and IP.21 servers we have access to via Web API ([more details](https://equinor.github.io/tagreader-python/docs/about/usage/data-source)):"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {
 41 |     "is_executing": true
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "tagreader.list_sources(\"piwebapi\")"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "By default, a cache-file using SQLite will be created to store previously read data values [more details](https://equinor.github.io/tagreader-python/docs/about/usage/caching). The cache can significantly speed up rereading of data, and it is recommended to always keep it enabled. If, for any reason, you want to disable the cache, set it to `None` when you are making the connection object."
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "Let's establish a web API connection to PINO. We need to specify that PINO is a PI server."
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "from tagreader.cache import SmartCache"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "c = tagreader.IMSClient(datasource=\"PINO\", imstype=\"piwebapi\", cache=SmartCache())"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "We can now establish a connection to the server:"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "c.connect()"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "After connecting, we can search for a tag ([more details](../docs/manual.md#searching-for-tags)):"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "c.search('BA:*')"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {},
115 |    "source": [
116 |     "Selecting three of the tags found above, we can read values for a duration of 3.5 hours starting January 5th at 8 in the morning with 3-minute (180-seconds) intervals. The default query method is interpolated, but several other methods are available by providing the `read_type` argument. Timestamps are parsed using [pandas.Timestamp](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html), and can therefore be provided in many different string formats. [More details](../docs/manual.md#reading_data)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": null,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "df = c.read(['BA:ACTIVE.1', 'BA:CONC.1', 'BA:LEVEL.1'], '05-Jan-2020 08:00:00', '05/01/20 11:30am', 180)"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "*Note*: Tags with maps (relevant for some InfoPlus.21 servers) can be specified on the form `'tag;map'`, e.g. `'17B-HIC192;CS A_AUTO'`.\n",
133 |     "\n",
134 |     "The result from the last call is a Pandas dataframe, and can be manipulated as such:"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "df.tail()"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "df['BA:LEVEL.1'].size"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "df['BA:CONC.1'].loc['2020-01-05 11:24:00']"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": null,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "max(df['BA:LEVEL.1'])"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "etc...\n",
178 |     "\n",
179 |     "Sometimes it can be handy to obtain the unit and description for the three tags:"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {},
186 |    "outputs": [],
187 |    "source": [
188 |     "tags = ['BA:ACTIVE.1', 'BA:LEVEL.1', 'BA:CONC.1']\n",
189 |     "units = c.get_units(tags)\n",
190 |     "desc = c.get_descriptions(tags)\n",
191 |     "print(units)\n",
192 |     "print(desc)"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "metadata": {
199 |     "scrolled": true
200 |    },
201 |    "outputs": [],
202 |    "source": [
203 |     "tag = 'BA:CONC.1'\n",
204 |     "df[tag].plot(grid=True, title=desc[tag]).set_ylabel(units[tag])"
205 |    ]
206 |   }
207 |  ],
208 |  "metadata": {
209 |   "kernelspec": {
210 |    "display_name": "Python 3",
211 |    "language": "python",
212 |    "name": "python3"
213 |   },
214 |   "language_info": {
215 |    "codemirror_mode": {
216 |     "name": "ipython",
217 |     "version": 3
218 |    },
219 |    "file_extension": ".py",
220 |    "mimetype": "text/x-python",
221 |    "name": "python",
222 |    "nbconvert_exporter": "python",
223 |    "pygments_lexer": "ipython3",
224 |    "version": "3.6.7"
225 |   }
226 |  },
227 |  "nbformat": 4,
228 |  "nbformat_minor": 4
229 | }
230 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | files = tagreader
 3 | check_untyped_defs = True
 4 | disallow_any_generics = True
 5 | # disallow_untyped_calls = True
 6 | disallow_untyped_defs = True
 7 | ignore_missing_imports = True
 8 | # Temporary fix for https://github.com/python/mypy/issues/10709:
 9 | ignore_missing_imports_per_module = True
10 | implicit_reexport = False
11 | local_partial_types = True
12 | # no_implicit_optional = True
13 | strict_equality = True
14 | warn_unused_ignores = True
15 | warn_redundant_casts = True
16 | warn_return_any = True
17 | warn_unreachable = True
18 | warn_unused_configs = True
19 | 
20 | [mypy-tagreader.clients]
21 | ignore_errors = True
22 | 
23 | [mypy-tagreader.odbc_handlers]
24 | ignore_errors = True
25 | 
26 | [mypy-tagreader.utils]
27 | ignore_errors = True
28 | 
29 | [mypy-tagreader.web_handlers]
30 | ignore_errors = True


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "tagreader"
 3 | version = "6.0.4"
 4 | description = "Tagreader is a Python package for reading trend data from the OSIsoft PI and Aspen Infoplus.21 IMS systems."
 5 | authors = ["Einar S. Idsø <eiids@equinor.com>", "Morten Dæhli Aslesen <masl@equinor.com"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | packages = [{include = "tagreader"}]
 9 | keywords=["Aspen InfoPlus.21", "OSIsoft PI"]
10 | homepage = "https://github.com/equinor/tagreader-python"
11 | repository = "https://github.com/equinor/tagreader-python"
12 | classifiers=[
13 |     "Development Status :: 5 - Production/Stable",
14 |     "Intended Audience :: Developers",
15 |     "Intended Audience :: Science/Research",
16 |     "License :: OSI Approved :: MIT License",
17 |     "Natural Language :: English",
18 |     "Operating System :: MacOS",
19 |     "Operating System :: Microsoft :: Windows",
20 |     "Operating System :: POSIX :: Linux",
21 |     "Programming Language :: Python :: 3.8",
22 |     "Programming Language :: Python :: 3.9",
23 |     "Programming Language :: Python :: 3.10",
24 |     "Programming Language :: Python :: 3.11",
25 |     "Topic :: Scientific/Engineering :: Interface Engine/Protocol Translator",
26 |     "Topic :: Software Development :: Libraries :: Python Modules",
27 |     "Topic :: Utilities",
28 | ]
29 | 
30 | [tool.poetry.dependencies]
31 | python = "^3.9.2"
32 | pandas = ">=1"
33 | certifi = ">=2024.12.14,<2026.0.0"
34 | requests = "^2"
35 | requests-kerberos = "^0"
36 | msal-bearer = "^1.3.0"
37 | notebook = { version = "^7.2.2", optional = true }
38 | matplotlib = { version = "^3.7.5", optional = true }
39 | diskcache = "^5.6.1"
40 | pycryptodome = "^3.20.0"
41 | requests-ntlm = ">=1.1,<=2.0"
42 | platformdirs = "^4.3.7"
43 | 
44 | [tool.poetry.group.dev.dependencies]
45 | pre-commit = "^3"
46 | pytest = ">=7,<9"
47 | 
48 | [tool.poetry.extras]
49 | notebooks = ["notebook", "matplotlib"]
50 | 
51 | [build-system]
52 | requires = ["poetry-core"]
53 | build-backend = "poetry.core.masonry.api"
54 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | filterwarnings =
3 |     error
4 |     ignore::ResourceWarning
5 |     ignore::FutureWarning
6 | 


--------------------------------------------------------------------------------
/tagreader/__init__.py:
--------------------------------------------------------------------------------
 1 | from tagreader.clients import IMSClient, list_sources
 2 | from tagreader.utils import (
 3 |     IMSType,
 4 |     ReaderType,
 5 |     add_equinor_root_certificate,
 6 |     is_equinor,
 7 |     is_mac,
 8 |     is_windows,
 9 | )
10 | 
11 | if is_equinor():
12 |     add_equinor_root_certificate()
13 | 
14 | from tagreader.__version__ import version as __version__
15 | 


--------------------------------------------------------------------------------
/tagreader/__version__.py:
--------------------------------------------------------------------------------
1 | from importlib.metadata import PackageNotFoundError, distribution
2 | 
3 | try:
4 |     version = distribution("tagreader").version
5 | except PackageNotFoundError:
6 |     # This will happen if you run tagreader without installing it as a package. E.g. poetry install --no-root.
7 |     version = "0.0.0"
8 | 


--------------------------------------------------------------------------------
/tagreader/cache.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime, timedelta
  2 | from pathlib import Path
  3 | from typing import Dict, List, Optional, Tuple, Union, cast
  4 | 
  5 | import pandas as pd
  6 | import pytz
  7 | from diskcache import Cache
  8 | 
  9 | from tagreader.logger import logger
 10 | from tagreader.utils import ReaderType
 11 | 
 12 | 
 13 | def safe_tagname(tagname: str) -> str:
 14 |     tagname = tagname.replace(".", "_")
 15 |     tagname = "".join(c for c in tagname if c.isalnum() or c == "_").strip()
 16 |     if tagname[0].isnumeric():
 17 |         tagname = "_" + tagname  # Conform to NaturalName
 18 |     return tagname
 19 | 
 20 | 
 21 | def timestamp_to_epoch(timestamp: datetime) -> int:
 22 |     origin = datetime(1970, 1, 1)
 23 |     if timestamp.tzinfo is not None:
 24 |         timestamp = timestamp.astimezone(pytz.utc).replace(tzinfo=None)
 25 |     return (timestamp - origin) // timedelta(seconds=1)
 26 | 
 27 | 
 28 | def _infer_pandas_index_freq(df: pd.DataFrame) -> pd.DataFrame:
 29 |     try:
 30 |         if pd.infer_freq(df.index):  # type: ignore[arg-type]
 31 |             df = df.asfreq(pd.infer_freq(df.index))  # type: ignore[arg-type]
 32 |     except (TypeError, ValueError) as e:
 33 |         logger.warning(f"Could not infer frequency of timeseries in Cache. {e}")
 34 |     return df
 35 | 
 36 | 
 37 | def _drop_duplicates_and_sort_index(df: pd.DataFrame) -> pd.DataFrame:
 38 |     return df[~df.index.duplicated(keep="first")].sort_index()
 39 | 
 40 | 
 41 | def clean_dataframe(df: pd.DataFrame) -> pd.DataFrame:
 42 |     return _infer_pandas_index_freq(_drop_duplicates_and_sort_index(df))
 43 | 
 44 | 
 45 | class BaseCache(Cache):  # type: ignore[misc]
 46 |     """
 47 |     Cache works as a Python dictionary with persistence. It is simple to use, and only requires a directory for
 48 |     the cache. The default directory is <current path>/.cache/
 49 |     """
 50 | 
 51 |     def __init__(  # type: ignore[no-untyped-def]
 52 |         self,
 53 |         directory: Path = Path(".") / ".cache",
 54 |         enable_stats: bool = False,
 55 |         *args,
 56 |         **kwargs,
 57 |     ) -> None:
 58 |         super().__init__(directory=directory.as_posix(), *args, **kwargs)
 59 | 
 60 |         if enable_stats:
 61 |             self.enable_cache_statistics()
 62 | 
 63 |     def enable_cache_statistics(self) -> None:
 64 |         self.stats(enable=True)
 65 | 
 66 |     def put(self, key: str, value: pd.DataFrame, expire: Optional[int] = None) -> None:
 67 |         self.add(key=key, value=value, expire=expire)
 68 | 
 69 |     def get_metadata(
 70 |         self, key: str, properties: Optional[Union[str, List[str]]]
 71 |     ) -> Optional[Dict[str, Union[str, int, float]]]:
 72 |         if isinstance(properties, str):
 73 |             properties = [properties]
 74 |         _key = f"$metadata${key}"
 75 |         metadata = cast(Optional[Dict[str, Union[str, int, float]]], self.get(_key))
 76 |         if metadata:
 77 |             if properties:
 78 |                 return {k: v for (k, v) in metadata.items() if k in properties}
 79 |             return metadata
 80 |         else:
 81 |             return None
 82 | 
 83 |     def put_metadata(
 84 |         self,
 85 |         key: str,
 86 |         value: Dict[str, Union[str, int, float]],
 87 |         expire: Optional[int] = None,
 88 |     ) -> Dict[str, Union[str, int, float]]:
 89 |         _key = f"$metadata${key}"
 90 |         combined_value = value
 91 |         if _key in self:
 92 |             existing = self.get(_key)
 93 |             if existing:
 94 |                 existing.update(value)
 95 |                 combined_value = existing
 96 |             else:
 97 |                 combined_value = value
 98 |             self.delete(_key)
 99 | 
100 |         self.add(_key, combined_value, expire=expire)
101 |         return combined_value
102 | 
103 |     def delete_metadata(self, key: str) -> None:
104 |         _key = f"$metadata${key}"
105 |         self.delete(_key)
106 | 
107 | 
108 | class BucketCache(BaseCache):
109 |     @staticmethod
110 |     def _key_path(
111 |         tagname: str,
112 |         read_type: ReaderType,
113 |         ts: timedelta,
114 |         stepped: bool,
115 |         get_status: bool,
116 |         start: Optional[datetime],
117 |         end: Optional[datetime],
118 |     ) -> str:
119 |         """Return a string on the form
120 |         $tagname$read_type[$sample_time][$stepped][$get_status]$_start_end
121 |         tagname: safe tagname
122 |         sample_time: integer value. Empty for RAW.
123 |         stepped: "stepped" if value was read as stepped. Empty if not.
124 |         get_status: "status" if value contains status. Empty if not.
125 |         start: The start of the query that created the bucket.
126 |         end: The end of the query that created the bucket.
127 |         """
128 |         tagname = safe_tagname(tagname)
129 |         timespan = ""
130 |         if start is not None:
131 |             start_epoch = timestamp_to_epoch(start)
132 |             end_epoch = timestamp_to_epoch(end) if end else end
133 |             timespan = f"$_{start_epoch}_{end_epoch}"
134 | 
135 |         keyval = (
136 |             f"${tagname}"
137 |             f"${read_type.name}"
138 |             f"{(read_type != ReaderType.RAW) * f'$s{str(int(ts.total_seconds()))}'}"
139 |             f"{stepped * '$stepped'}"
140 |             f"{get_status * '$status'}"
141 |             f"{timespan}"
142 |         )
143 |         return keyval
144 | 
145 |     def store(
146 |         self,
147 |         *,
148 |         df: pd.DataFrame,
149 |         tagname: str,
150 |         read_type: ReaderType,
151 |         ts: timedelta,
152 |         stepped: bool,
153 |         get_status: bool,
154 |         start: datetime,
155 |         end: datetime,
156 |     ) -> None:
157 |         if df.empty:
158 |             return
159 | 
160 |         intersecting = self.get_intersecting_datasets(
161 |             tagname=tagname,
162 |             read_type=read_type,
163 |             ts=ts,
164 |             stepped=stepped,
165 |             get_status=get_status,
166 |             start=start,
167 |             end=end,
168 |         )
169 |         if len(intersecting) > 0:
170 |             for dataset in intersecting:
171 |                 this_start, this_end = self._get_intervals_from_dataset_name(dataset)
172 |                 start = min(start, this_start if this_start else start)
173 |                 end = max(end, this_end if this_end else end)
174 |                 df2 = self.get(dataset)
175 |                 if df2 is not None:
176 |                     df = pd.concat([df, df2], axis=0)
177 |                 self.delete(dataset)
178 |         key = self._key_path(
179 |             tagname=tagname,
180 |             read_type=read_type,
181 |             ts=ts,
182 |             stepped=stepped,
183 |             get_status=get_status,
184 |             start=start,
185 |             end=end,
186 |         )
187 |         self.put(key=key, value=clean_dataframe(df))
188 | 
189 |     @staticmethod
190 |     def _get_intervals_from_dataset_name(
191 |         name: str,
192 |     ) -> Tuple[datetime, datetime]:
193 |         name_with_times = name.split("$")[-1]
194 |         if not name_with_times.count("_") == 2:
195 |             return None, None  # type: ignore[return-value]
196 |         _, start_epoch, end_epoch = name_with_times.split("_")
197 |         start = pd.to_datetime(int(start_epoch), unit="s").tz_localize("UTC")
198 |         end = pd.to_datetime(int(end_epoch), unit="s").tz_localize("UTC")
199 |         return start, end
200 | 
201 |     def get_intersecting_datasets(
202 |         self,
203 |         tagname: str,
204 |         read_type: ReaderType,
205 |         ts: timedelta,
206 |         stepped: bool,
207 |         get_status: bool,
208 |         start: datetime,
209 |         end: datetime,
210 |     ) -> List[str]:
211 |         if not len(self) > 0:
212 |             return []
213 |         intersecting_datasets = []
214 |         for dataset in self.iterkeys():
215 |             target_key = self._key_path(
216 |                 tagname=tagname,
217 |                 read_type=read_type,
218 |                 start=None,
219 |                 end=None,
220 |                 ts=ts,
221 |                 stepped=stepped,
222 |                 get_status=get_status,
223 |             )
224 |             if target_key in dataset:
225 |                 start_ds, end_ds = self._get_intervals_from_dataset_name(dataset)
226 |                 if end_ds >= start and end >= start_ds:
227 |                     intersecting_datasets.append(dataset)
228 |         return intersecting_datasets
229 | 
230 |     def get_missing_intervals(
231 |         self,
232 |         tagname: str,
233 |         read_type: ReaderType,
234 |         ts: timedelta,
235 |         stepped: bool,
236 |         get_status: bool,
237 |         start: datetime,
238 |         end: datetime,
239 |     ) -> List[Tuple[datetime, datetime]]:
240 |         datasets = self.get_intersecting_datasets(
241 |             tagname=tagname,
242 |             read_type=read_type,
243 |             ts=ts,
244 |             stepped=stepped,
245 |             get_status=get_status,
246 |             start=start,
247 |             end=end,
248 |         )
249 |         missing_intervals = [(start, end)]
250 |         for dataset in datasets:
251 |             b = self._get_intervals_from_dataset_name(dataset)
252 |             for _ in range(0, len(missing_intervals)):
253 |                 r = missing_intervals.pop(0)
254 |                 if b[1] < r[0] or b[0] > r[1]:
255 |                     # No overlap
256 |                     missing_intervals.append(r)
257 |                 elif b[0] <= r[0] and b[1] >= r[1]:
258 |                     # The bucket covers the entire interval
259 |                     continue
260 |                 elif b[0] > r[0] and b[1] < r[1]:
261 |                     # The bucket splits the interval in two
262 |                     missing_intervals.append((r[0], b[0]))
263 |                     missing_intervals.append((b[1], r[1]))
264 |                 elif b[0] <= r[0] and r[0] <= b[1] < r[1]:
265 |                     # The bucket chomps the start of the interval
266 |                     missing_intervals.append((b[1], r[1]))
267 |                 elif r[0] < b[0] <= r[1] and b[1] >= r[1]:
268 |                     # The bucket chomps the end of the interval
269 |                     missing_intervals.append((r[0], b[0]))
270 |         return missing_intervals
271 | 
272 |     def fetch(
273 |         self,
274 |         *,
275 |         tagname: str,
276 |         read_type: ReaderType,
277 |         ts: timedelta,
278 |         stepped: bool,
279 |         get_status: bool,
280 |         start: datetime,
281 |         end: datetime,
282 |     ) -> pd.DataFrame:
283 |         df = pd.DataFrame()
284 |         if not len(self) > 0:
285 |             return df
286 | 
287 |         datasets = self.get_intersecting_datasets(
288 |             tagname=tagname,
289 |             read_type=read_type,
290 |             ts=ts,
291 |             stepped=stepped,
292 |             get_status=get_status,
293 |             start=start,
294 |             end=end,
295 |         )
296 | 
297 |         for dataset in datasets:
298 |             df2 = self.get(dataset)
299 |             if df2 is not None:
300 |                 df = pd.concat([df, df2.loc[start:end]], axis=0)  # type: ignore[call-overload, misc]
301 | 
302 |         return clean_dataframe(df)
303 | 
304 | 
305 | class SmartCache(BaseCache):
306 |     @staticmethod
307 |     def _key_path(
308 |         *,
309 |         tagname: str,
310 |         read_type: ReaderType,
311 |         ts: timedelta,
312 |         get_status: bool,
313 |     ) -> str:
314 |         name = safe_tagname(tagname)
315 |         status = get_status * "$status"
316 |         if read_type != ReaderType.RAW:
317 |             interval = int(ts.total_seconds())
318 |             return f"{read_type.name}$s{interval}${name}{status}"
319 |         else:
320 |             return f"{read_type.name}${name}{status}"
321 | 
322 |     def store(
323 |         self,
324 |         *,
325 |         df: pd.DataFrame,
326 |         tagname: str,
327 |         read_type: ReaderType,
328 |         ts: timedelta,
329 |         get_status: bool,
330 |     ) -> None:
331 |         key = self._key_path(
332 |             tagname=tagname, read_type=read_type, ts=ts, get_status=get_status
333 |         )
334 |         if df.empty:
335 |             return  # Weirdness ensues when using empty df in select statement below
336 |         if key in self:
337 |             df2 = self.get(key)
338 |             if df2 is not None:
339 |                 df = pd.concat([df, df2], axis=0)
340 |             self.delete(key=key)
341 |             self.put(
342 |                 key=key,
343 |                 value=clean_dataframe(df),
344 |             )
345 |         else:
346 |             self.put(key, df)
347 | 
348 |     def fetch(
349 |         self,
350 |         *,
351 |         tagname: str,
352 |         read_type: ReaderType,
353 |         ts: timedelta,
354 |         start: Optional[datetime],
355 |         end: Optional[datetime],
356 |         get_status: bool,
357 |     ) -> pd.DataFrame:
358 |         key = self._key_path(
359 |             tagname=tagname, read_type=read_type, ts=ts, get_status=get_status
360 |         )
361 |         df = cast(Optional[pd.DataFrame], self.get(key=key))
362 |         if df is None:
363 |             return pd.DataFrame()
364 |         if start is not None:
365 |             df = df.loc[df.index >= start]
366 |         if end is not None:
367 |             df = df.loc[df.index <= end]
368 |         return df
369 | 


--------------------------------------------------------------------------------
/tagreader/clients.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime, timedelta, timezone, tzinfo
  2 | from itertools import groupby
  3 | from operator import itemgetter
  4 | from typing import Any, Dict, List, Optional, Tuple, Union
  5 | from urllib.error import HTTPError
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | import pytz
 10 | 
 11 | from tagreader.cache import BucketCache, SmartCache
 12 | from tagreader.logger import logger
 13 | from tagreader.utils import (
 14 |     IMSType,
 15 |     ReaderType,
 16 |     convert_to_pydatetime,
 17 |     ensure_datetime_with_tz,
 18 | )
 19 | from tagreader.web_handlers import (
 20 |     AspenHandlerWeb,
 21 |     PIHandlerWeb,
 22 |     get_auth_aspen,
 23 |     get_auth_pi,
 24 |     list_aspenone_sources,
 25 |     list_piwebapi_sources,
 26 | )
 27 | 
 28 | NONE_START_TIME = datetime(1970, 1, 1, tzinfo=pytz.UTC)
 29 | 
 30 | 
 31 | def list_sources(
 32 |     imstype: Union[IMSType, str],
 33 |     url: Optional[str] = None,
 34 |     auth: Optional[Any] = None,
 35 |     verifySSL: bool = True,
 36 | ) -> List[str]:
 37 |     if isinstance(imstype, str):
 38 |         try:
 39 |             imstype = getattr(IMSType, imstype.upper())
 40 |         except AttributeError:
 41 |             raise ValueError(
 42 |                 f"imstype needs to be one of {', '.join([v for v in IMSType.__members__.values() if v not in [IMSType.PI, IMSType.ASPEN, IMSType.IP21]])}."  # noqa
 43 |                 f" We suggest to use the tagreader.IMSType enumerator when initiating a client."
 44 |             )
 45 |     accepted_values = [IMSType.PIWEBAPI, IMSType.ASPENONE]
 46 | 
 47 |     if imstype == IMSType.PIWEBAPI:
 48 |         if auth is None:
 49 |             auth = get_auth_pi()
 50 |         return list_piwebapi_sources(url=url, auth=auth, verify_ssl=verifySSL)
 51 |     elif imstype == IMSType.ASPENONE:
 52 |         if auth is None:
 53 |             auth = get_auth_aspen()
 54 |         return list_aspenone_sources(url=url, auth=auth, verify_ssl=verifySSL)
 55 |     elif imstype in [IMSType.PI, IMSType.ASPEN, IMSType.IP21]:
 56 |         raise ValueError(
 57 |             f"ODBC clients are no longer supported. Given ims client type: {imstype}."
 58 |             " Please use tagreader version <= 4 for deprecated ODBC clients."
 59 |         )
 60 |     else:
 61 |         raise NotImplementedError(
 62 |             f"imstype: {imstype} has not been implemented. Accepted values are: {accepted_values}"
 63 |         )
 64 | 
 65 | 
 66 | def get_missing_intervals(
 67 |     df: pd.DataFrame,
 68 |     start: datetime,
 69 |     end: datetime,
 70 |     ts: Optional[timedelta],
 71 |     read_type: ReaderType,
 72 | ):
 73 |     if (
 74 |         read_type == ReaderType.RAW
 75 |     ):  # Fixme: How to check for completeness for RAW data?
 76 |         return [[start, end]]
 77 |     seconds = int(ts.total_seconds())
 78 |     tvec = pd.date_range(start=start, end=end, freq=f"{seconds}s")
 79 |     if len(df) == len(tvec):  # Short-circuit if dataset is complete
 80 |         return []
 81 |     values_in_df = tvec.isin(df.index)
 82 |     missing_intervals = []
 83 |     for k, g in groupby(enumerate(values_in_df), lambda ix: ix[1]):
 84 |         if not k:
 85 |             seq = list(map(itemgetter(0), g))
 86 |             missing_intervals.append(
 87 |                 (
 88 |                     pd.Timestamp(tvec[seq[0]]).to_pydatetime(),
 89 |                     pd.Timestamp(tvec[seq[-1]]).to_pydatetime(),
 90 |                 )
 91 |             )
 92 |             # Should be unnecessary to fetch overlapping points since get_next_timeslice
 93 |             # ensures start <= t <= end
 94 |             # missingintervals.append((pd.Timestamp(tvec[seq[0]]),
 95 |             #                          pd.Timestamp(tvec[min(seq[-1]+1, len(tvec)-1)])))
 96 |     return missing_intervals
 97 | 
 98 | 
 99 | def get_next_timeslice(
100 |     start: datetime,
101 |     end: datetime,
102 |     ts: Optional[timedelta],
103 |     max_steps: Optional[int],
104 | ) -> Tuple[datetime, datetime]:
105 |     if max_steps is None:
106 |         calc_end = end
107 |     else:
108 |         calc_end = start + ts * max_steps
109 |     calc_end = min(end, calc_end)
110 |     # Ensure we include the last data point.
111 |     # Discrepancies between Aspen and Pi for +ts
112 |     # Discrepancies between IMS and cache for e.g. ts.
113 |     # if calc_end == end:
114 |     #     calc_end += ts / 2
115 |     return start, calc_end
116 | 
117 | 
118 | def get_handler(
119 |     imstype: Optional[IMSType],
120 |     datasource: str,
121 |     url: Optional[str],
122 |     options: Dict[str, Union[int, float, str]],
123 |     verifySSL: Optional[bool],
124 |     auth: Optional[Any],
125 |     cache: Optional[Union[SmartCache, BucketCache]] = None,
126 | ):
127 |     if imstype is None:
128 |         try:
129 |             if datasource in list_aspenone_sources(
130 |                 url=None, auth=None, verify_ssl=verifySSL
131 |             ):
132 |                 imstype = IMSType.ASPENONE
133 |         except HTTPError as e:
134 |             logger.debug(f"Could not list Aspenone sources: {e}")
135 |     if imstype is None:
136 |         try:
137 |             if datasource in list_piwebapi_sources(
138 |                 url=None, auth=None, verify_ssl=verifySSL
139 |             ):
140 |                 imstype = IMSType.PIWEBAPI
141 |         except HTTPError as e:
142 |             logger.debug(f"Could not list PI sources: {e}")
143 | 
144 |     if imstype == IMSType.PIWEBAPI:
145 |         return PIHandlerWeb(
146 |             url=url,
147 |             datasource=datasource,
148 |             options=options,
149 |             verify_ssl=verifySSL,
150 |             auth=auth,
151 |             cache=cache,
152 |         )
153 | 
154 |     if imstype == IMSType.ASPENONE:
155 |         return AspenHandlerWeb(
156 |             datasource=datasource,
157 |             url=url,
158 |             options=options,
159 |             verify_ssl=verifySSL,
160 |             auth=auth,
161 |         )
162 |     elif imstype in [IMSType.PI, IMSType.ASPEN, IMSType.IP21]:
163 |         raise ValueError(
164 |             f"ODBC clients are no longer supported. Given ims client type: {imstype}."
165 |             " Please use tagreader version <= 4 for deprecated ODBC clients."
166 |         )
167 |     raise ValueError(
168 |         f"Could not infer IMSType for datasource: {datasource}. "
169 |         f"Please specify correct datasource, imstype or host, or refer to the user docs."
170 |     )
171 | 
172 | 
173 | class IMSClient:
174 |     def __init__(
175 |         self,
176 |         datasource: str,
177 |         imstype: Optional[Union[str, IMSType]] = None,
178 |         tz: Union[tzinfo, str] = pytz.timezone("Europe/Oslo"),
179 |         url: Optional[str] = None,
180 |         handler_options: Dict[str, Union[int, float, str]] = {},  # noqa:
181 |         verifySSL: bool = True,
182 |         auth: Optional[Any] = None,
183 |         cache: Optional[Union[SmartCache, BucketCache]] = None,
184 |     ):
185 |         if isinstance(imstype, str):
186 |             try:
187 |                 imstype = getattr(IMSType, imstype.upper())
188 |             except AttributeError:
189 |                 raise ValueError(
190 |                     f"imstype needs to be one of {', '.join([v for v in IMSType.__members__.values()])}."
191 |                     f" We suggest to use the tagreader.IMSType enumerator when initiating a client."
192 |                 )
193 | 
194 |         if isinstance(tz, str):
195 |             if tz in pytz.all_timezones:
196 |                 self.tz = pytz.timezone(tz)
197 |             else:
198 |                 raise ValueError(f"Invalid timezone string  Given type was {type(tz)}")
199 |         elif isinstance(tz, tzinfo):
200 |             self.tz = tz
201 |         else:
202 |             raise ValueError(
203 |                 f"timezone argument 'tz' needs to be either a valid timezone string or a tzinfo-object. Given type was {type(tz)}"
204 |             )
205 | 
206 |         self.cache = cache
207 |         self.handler = get_handler(
208 |             imstype=imstype,
209 |             datasource=datasource,
210 |             url=url,
211 |             options=handler_options,
212 |             verifySSL=verifySSL,
213 |             auth=auth,
214 |             cache=self.cache,
215 |         )
216 | 
217 |     def connect(self) -> None:
218 |         self.handler.connect()
219 | 
220 |     def search_tag(
221 |         self,
222 |         tag: Optional[str] = None,
223 |         desc: Optional[str] = None,
224 |         timeout: Optional[int] = None,
225 |     ) -> Union[List[Tuple[str, str]], List[str]]:
226 |         logger.warning("This function is deprecated. Please call 'search()' instead")
227 |         return self.search(tag=tag, desc=desc, timeout=timeout)
228 | 
229 |     def search(
230 |         self,
231 |         tag: Optional[str] = None,
232 |         desc: Optional[str] = None,
233 |         timeout: Optional[int] = None,
234 |         return_desc: bool = True,
235 |     ) -> Union[List[Tuple[str, str]], List[str]]:
236 |         return self.handler.search(
237 |             tag=tag, desc=desc, timeout=timeout, return_desc=return_desc
238 |         )
239 | 
240 |     def _get_metadata(self, tag: str):
241 |         return self.handler._get_tag_metadata(
242 |             tag
243 |         )  # noqa: Should probably expose this as a public method if needed.
244 | 
245 |     def _read_single_tag(
246 |         self,
247 |         tag: str,
248 |         start: Optional[datetime],
249 |         end: Optional[datetime],
250 |         ts: timedelta,
251 |         read_type: ReaderType,
252 |         get_status: bool,
253 |         cache: Optional[Union[BucketCache, SmartCache]],
254 |     ):
255 |         if read_type == ReaderType.SNAPSHOT:
256 |             metadata = self._get_metadata(tag)
257 |             df = self.handler.read_tag(
258 |                 tag=tag,
259 |                 start=start,
260 |                 end=end,
261 |                 sample_time=ts,
262 |                 read_type=read_type,
263 |                 metadata=metadata,
264 |                 get_status=get_status,
265 |             )
266 |         else:
267 |             stepped = False
268 |             missing_intervals = [(start, end)]
269 |             df = pd.DataFrame()
270 | 
271 |             if isinstance(cache, SmartCache):
272 |                 time_slice = get_next_timeslice(
273 |                     start=start, end=end, ts=ts, max_steps=None
274 |                 )
275 |                 df = cache.fetch(
276 |                     tagname=tag,
277 |                     read_type=read_type,
278 |                     ts=ts,
279 |                     start=time_slice[0],
280 |                     end=time_slice[1],
281 |                     get_status=get_status,
282 |                 )
283 |                 missing_intervals = get_missing_intervals(
284 |                     df=df,
285 |                     start=start,
286 |                     end=end,
287 |                     ts=ts,
288 |                     read_type=read_type,
289 |                 )
290 |                 if not missing_intervals:
291 |                     return df.tz_convert(self.tz).sort_index()
292 |             elif isinstance(cache, BucketCache):
293 |                 df = cache.fetch(
294 |                     tagname=tag,
295 |                     read_type=read_type,
296 |                     ts=ts,
297 |                     stepped=stepped,
298 |                     get_status=get_status,
299 |                     start=start,
300 |                     end=end,
301 |                 )
302 |                 missing_intervals = cache.get_missing_intervals(
303 |                     tagname=tag,
304 |                     read_type=read_type,
305 |                     ts=ts,
306 |                     stepped=stepped,
307 |                     get_status=get_status,
308 |                     start=start,
309 |                     end=end,
310 |                 )
311 |                 if not missing_intervals:
312 |                     return df.tz_convert(self.tz).sort_index()
313 | 
314 |             metadata = self._get_metadata(tag)
315 |             frames = [df]
316 |             for start, end in missing_intervals:
317 |                 while True:
318 |                     df = self.handler.read_tag(
319 |                         tag=tag,
320 |                         start=start,
321 |                         end=end,
322 |                         sample_time=ts,
323 |                         read_type=read_type,
324 |                         metadata=metadata,
325 |                         get_status=get_status,
326 |                     )
327 |                     if not df.empty and read_type != ReaderType.RAW:
328 |                         if isinstance(cache, SmartCache):
329 |                             cache.store(
330 |                                 df=df,
331 |                                 tagname=tag,
332 |                                 read_type=read_type,
333 |                                 ts=ts,
334 |                                 get_status=get_status,
335 |                             )
336 |                         if isinstance(cache, BucketCache):
337 |                             cache.store(
338 |                                 df=df,
339 |                                 tagname=tag,
340 |                                 read_type=read_type,
341 |                                 ts=ts,
342 |                                 stepped=stepped,
343 |                                 get_status=get_status,
344 |                                 start=start,
345 |                                 end=end,
346 |                             )
347 |                     frames.append(df)
348 |                     if len(df) < self.handler._max_rows:
349 |                         break
350 |                     start = df.index[-1]
351 | 
352 |             df = pd.concat(frames)
353 |             # read_type INT leads to overlapping values after concatenating
354 |             # due to both start time and end time included.
355 |             # Aggregate read_types (should) align perfectly and don't
356 |             # (shouldn't) need deduplication.
357 |             df = df[~df.index.duplicated(keep="first")]  # Deduplicate on index
358 |         df = df.tz_convert(self.tz).sort_index()
359 |         df = df.rename(columns={"value": tag})
360 |         return df
361 | 
362 |     def get_units(self, tags: Union[str, List[str]]):
363 |         if isinstance(tags, str):
364 |             tags = [tags]
365 |         units = {}
366 |         for tag in tags:
367 |             try:
368 |                 if self.cache is not None:
369 |                     r = self.cache.get_metadata(key=tag, properties="unit")
370 |                     if r is not None and "unit" in r:
371 |                         units[tag] = r["unit"]
372 |                 if tag not in units:
373 |                     unit = self.handler._get_tag_unit(tag)
374 |                     if self.cache is not None and unit is not None:
375 |                         self.cache.put_metadata(key=tag, value={"unit": unit})
376 |                     units[tag] = unit
377 |             except Exception:
378 |                 if self.search(tag) == []:  # check for nonexisting string
379 |                     logger.warning(f"Tag not found: {tag}")
380 |                     continue
381 |         return units
382 | 
383 |     def get_descriptions(self, tags: Union[str, List[str]]) -> Dict[str, str]:
384 |         if isinstance(tags, str):
385 |             tags = [tags]
386 |         descriptions = {}
387 |         for tag in tags:
388 |             try:
389 |                 if self.cache is not None:
390 |                     r = self.cache.get_metadata(key=tag, properties="description")
391 |                     if r is not None and "description" in r:
392 |                         descriptions[tag] = r["description"]
393 |                 if tag not in descriptions:
394 |                     desc = self.handler._get_tag_description(tag)
395 |                     if self.cache is not None and desc is not None:
396 |                         self.cache.put_metadata(key=tag, value={"description": desc})
397 |                     descriptions[tag] = desc
398 |             except Exception:
399 |                 if self.search(tag) == []:  # check for nonexisting string
400 |                     logger.warning(f"Tag not found: {tag}")
401 |                     continue
402 |         return descriptions
403 | 
404 |     def read_tags(
405 |         self,
406 |         tags: Union[str, List[str]],
407 |         start_time: Optional[Union[datetime, pd.Timestamp, str]] = None,
408 |         stop_time: Optional[Union[datetime, pd.Timestamp, str]] = None,
409 |         ts: Optional[Union[timedelta, pd.Timedelta]] = timedelta(seconds=60),
410 |         read_type: ReaderType = ReaderType.INT,
411 |         get_status: bool = False,
412 |     ):
413 |         start = start_time
414 |         end = stop_time
415 |         logger.warn(
416 |             (
417 |                 "This function has been renamed to read() and is deprecated. "
418 |                 "Please call 'read()' instead"
419 |             )
420 |         )
421 |         return self.read(
422 |             tags=tags,
423 |             start_time=start,
424 |             end_time=end,
425 |             ts=ts,
426 |             read_type=read_type,
427 |             get_status=get_status,
428 |         )
429 | 
430 |     def read(
431 |         self,
432 |         tags: Union[str, List[str]],
433 |         start_time: Optional[Union[datetime, pd.Timestamp, str]] = None,
434 |         end_time: Optional[Union[datetime, pd.Timestamp, str]] = None,
435 |         ts: Optional[Union[timedelta, pd.Timedelta, int]] = timedelta(seconds=60),
436 |         read_type: ReaderType = ReaderType.INT,
437 |         get_status: bool = False,
438 |     ) -> pd.DataFrame:
439 |         """Reads values for the specified [tags] from the IMS server for the
440 |         time interval from [start_time] to [stop_time] in intervals [ts].
441 | 
442 |         The interval [ts] can be specified as pd.Timedelta or as an integer,
443 |         in which case it will be interpreted as seconds.
444 | 
445 |         Default value for [read_type] is ReaderType.INT, which interpolates
446 |         the raw data.
447 |         All possible values for read_type are defined in the ReaderType class,
448 |         which can be imported as follows:
449 |             from utils import ReaderType
450 | 
451 |         Values for ReaderType.* that should work for all handlers are:
452 |             INT, RAW, MIN, MAX, RNG, AVG, VAR, STD and SNAPSHOT
453 |         """
454 |         start = start_time
455 |         end = end_time
456 |         if isinstance(tags, str):
457 |             tags = [tags]
458 |         if isinstance(read_type, str):
459 |             try:
460 |                 read_type = getattr(ReaderType, read_type)
461 |             except AttributeError:
462 |                 ValueError(
463 |                     "read_type needs to be of type ReaderType.* or a legal value. Please refer to the docstring."
464 |                 )
465 |         if read_type in [ReaderType.RAW, ReaderType.SNAPSHOT] and len(tags) > 1:
466 |             raise RuntimeError(
467 |                 "Unable to read raw/sampled data for multiple tags since they don't "
468 |                 "share time vector. Read one at a time."
469 |             )
470 | 
471 |         if isinstance(tags, str):
472 |             tags = [tags]
473 | 
474 |         if start is None:
475 |             start = NONE_START_TIME
476 |         elif isinstance(start, (str, pd.Timestamp)):
477 |             try:
478 |                 start = convert_to_pydatetime(start)
479 |             except ValueError:
480 |                 start = convert_to_pydatetime(start)
481 |         if end is None:
482 |             end = datetime.now(timezone.utc)
483 |         elif isinstance(end, (str, pd.Timestamp)):
484 |             end = convert_to_pydatetime(end)
485 | 
486 |         if isinstance(ts, pd.Timedelta):
487 |             ts = ts.to_pytimedelta()
488 |         elif isinstance(
489 |             ts,
490 |             (
491 |                 int,
492 |                 float,
493 |                 np.int32,
494 |                 np.int64,
495 |                 np.float32,
496 |                 np.float64,
497 |                 np.number,
498 |                 np.integer,
499 |             ),
500 |         ):
501 |             ts = timedelta(seconds=int(ts))
502 |         elif not ts and read_type not in [ReaderType.SNAPSHOT, ReaderType.RAW]:
503 |             raise ValueError(
504 |                 "ts needs to be a timedelta or an integer (number of seconds)"
505 |                 " unless you are reading raw or snapshot data."
506 |                 f" Given type: {type(ts)}"
507 |             )
508 |         elif not isinstance(ts, timedelta):
509 |             raise ValueError(
510 |                 "ts needs to be either a None, timedelta or and integer (number of seconds)."
511 |                 f" Given type: {type(ts)}"
512 |             )
513 | 
514 |         if read_type != ReaderType.SNAPSHOT:
515 |             start = ensure_datetime_with_tz(start, tz=self.tz)
516 |         if end:
517 |             end = ensure_datetime_with_tz(end, tz=self.tz)
518 | 
519 |         old_tags = tags
520 |         tags = list(dict.fromkeys(tags))
521 |         if len(old_tags) > len(tags):
522 |             duplicates = set([x for n, x in enumerate(old_tags) if x in old_tags[:n]])
523 |             logger.warning(
524 |                 f"Duplicate tags found, removed duplicates: {', '.join(duplicates)}"
525 |             )
526 | 
527 |         results = []
528 |         for i, tag in enumerate(tags):
529 |             results.append(
530 |                 self._read_single_tag(
531 |                     tag=tag,
532 |                     start=start,
533 |                     end=end,
534 |                     ts=ts,
535 |                     read_type=read_type,
536 |                     get_status=get_status,
537 |                     cache=self.cache,
538 |                 )
539 |             )
540 | 
541 |         return pd.concat(results, axis=1)
542 | 
543 |     def query_sql(self, query: str, parse: bool = True):
544 |         """[summary]
545 |         Args:
546 |             query (str): [description]
547 |             parse (bool, optional): Whether to attempt to parse query return
548 |                                     value as table. Defaults to True.
549 |         Returns:
550 |             Union[pd.DataFrame, pyodbc.Cursor, str]: Return value
551 |         """
552 |         df_or_cursor = self.handler.query_sql(query=query, parse=parse)
553 |         return df_or_cursor
554 | 


--------------------------------------------------------------------------------
/tagreader/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | logging.basicConfig(
 4 |     format="%(asctime)s %(levelname)-8s %(name)-15s %(message)s",
 5 |     datefmt="%Y-%m-%d %H:%M:%S",
 6 |     level=logging.INFO,
 7 | )
 8 | 
 9 | 
10 | logger = logging.getLogger("awt")
11 | 


--------------------------------------------------------------------------------
/tagreader/utils.py:
--------------------------------------------------------------------------------
  1 | import enum
  2 | import hashlib
  3 | import logging
  4 | import platform
  5 | import ssl
  6 | from datetime import datetime, tzinfo
  7 | from enum import Enum
  8 | from pathlib import Path
  9 | from typing import Union
 10 | 
 11 | import certifi
 12 | import pandas as pd
 13 | import pytz
 14 | import requests
 15 | from platformdirs import user_data_dir
 16 | 
 17 | from tagreader.logger import logger
 18 | 
 19 | 
 20 | def is_windows() -> bool:
 21 |     return platform.system() == "Windows"
 22 | 
 23 | 
 24 | def is_mac() -> bool:
 25 |     return platform.system() == "Darwin"
 26 | 
 27 | 
 28 | def is_linux() -> bool:
 29 |     return platform.system() == "Linux"
 30 | 
 31 | 
 32 | if is_windows():
 33 |     import winreg
 34 | 
 35 | if is_mac():
 36 |     import socket
 37 |     import subprocess
 38 | 
 39 | 
 40 | def convert_to_pydatetime(date_stamp: Union[datetime, str, pd.Timestamp]) -> datetime:
 41 |     if isinstance(date_stamp, datetime):
 42 |         return date_stamp
 43 |     elif isinstance(date_stamp, pd.Timestamp):
 44 |         return date_stamp.to_pydatetime()
 45 |     else:
 46 |         try:
 47 |             return pd.to_datetime(str(date_stamp), format="ISO8601").to_pydatetime()
 48 |         except ValueError:
 49 |             return pd.to_datetime(str(date_stamp), dayfirst=True).to_pydatetime()
 50 | 
 51 | 
 52 | def ensure_datetime_with_tz(
 53 |     date_stamp: Union[datetime, str, pd.Timestamp],
 54 |     tz: tzinfo = pytz.timezone("Europe/Oslo"),
 55 | ) -> datetime:
 56 |     date_stamp = convert_to_pydatetime(date_stamp)
 57 | 
 58 |     if not date_stamp.tzinfo:
 59 |         date_stamp = tz.localize(date_stamp)
 60 | 
 61 |     return date_stamp
 62 | 
 63 | 
 64 | def urljoin(*args) -> str:
 65 |     """
 66 |     Joins components of URL. Ensures slashes are inserted or removed where
 67 |     needed, and does not strip trailing slash of last element.
 68 | 
 69 |     Returns:
 70 |         str -- Generated URL
 71 |     """
 72 |     trailing_slash = "/" if args[-1].endswith("/") else ""
 73 |     return "/".join(map(lambda x: str(x).strip("/"), args)) + trailing_slash
 74 | 
 75 | 
 76 | class ReaderType(enum.IntEnum):
 77 |     """Enumerates available types of data to read.
 78 | 
 79 |     For members with more than one name per value, the first member (the
 80 |     original) needs to be untouched since it may be used as back-reference
 81 |     (specifically for cache hierarchies).
 82 |     """
 83 | 
 84 |     RAW = SAMPLED = ACTUAL = enum.auto()  # Raw sampled data
 85 |     SHAPEPRESERVING = BESTFIT = enum.auto()  # Minimum data points for preserving shape
 86 |     INT = INTERPOLATE = INTERPOLATED = enum.auto()  # Interpolated data
 87 |     MIN = MINIMUM = enum.auto()  # Min value
 88 |     MAX = MAXIMUM = enum.auto()  # Max value
 89 |     AVG = AVERAGE = AVERAGED = enum.auto()  # Averaged data
 90 |     VAR = VARIANCE = enum.auto()  # Variance of data
 91 |     STD = STDDEV = enum.auto()  # Standard deviation of data
 92 |     RNG = RANGE = enum.auto()  # Range of data
 93 |     COUNT = enum.auto()  # Number of data points
 94 |     GOOD = enum.auto()  # Number of good data points
 95 |     BAD = NOTGOOD = enum.auto()  # Number of not good data points
 96 |     TOTAL = enum.auto()  # Number of total data
 97 |     SUM = enum.auto()  # Sum of data
 98 |     SNAPSHOT = FINAL = LAST = enum.auto()  # Last sampled value
 99 | 
100 | 
101 | def add_equinor_root_certificate() -> bool:
102 |     """
103 |     This is a utility function for Equinor employees on Equinor managed machines.
104 | 
105 |     The function searches for the Equinor Root certificate in the
106 |     cert store and imports it to the cacert bundle. Does nothing if not
107 |     running on Equinor host.
108 | 
109 |     NB! This needs to be repeated after updating the cacert module.
110 | 
111 |     Returns:
112 |         bool: True if function completes successfully
113 |     """
114 |     certificate = find_local_equinor_root_certificate()
115 | 
116 |     # If certificate is not found locally, we download it from the Equinor server
117 |     if certificate == "":
118 |         logger.debug(
119 |             "Unable to locate Equinor Root CA certificate on this host. Downloading from Equinor server."
120 |         )
121 |         response = requests.get("http://pki.equinor.com/aia/ecpr.crt")
122 | 
123 |         if response.status_code != 200:
124 |             logger.error(
125 |                 "Unable to find Equinor Root CA certificate locally and on Equinor server."
126 |             )
127 |             return False
128 | 
129 |         certificate = response.text.replace("\r", "")
130 | 
131 |         # Write result to user data so we can read the cert from there next time
132 |         filepath = Path(user_data_dir("tagreader")) / "equinor_root_ca.crt"
133 |         try:
134 |             filepath.parent.mkdir(parents=True, exist_ok=True)
135 |             filepath.write_text(certificate)
136 |             logger.debug("Equinor Root CA certificate written to cache")
137 |         except Exception as e:
138 |             logger.debug(f"Failed to write Equinor Root CA certificate to cache: {e}")
139 | 
140 |     if certificate in certifi.contents():
141 |         logger.debug("Equinor Root Certificate already exists in certifi store")
142 |         return True
143 | 
144 |     ca_file = certifi.where()
145 |     with open(ca_file, "ab") as f:
146 |         f.write(bytes(certificate, "ascii"))
147 |     logger.debug("Equinor Root Certificate added to certifi store")
148 | 
149 | 
150 | def find_local_equinor_root_certificate() -> str:
151 |     equinor_root_pem_hash = "5A206332CE73CED1D44C8A99C4C43B7CEE03DF5F"
152 |     ca_search = "Equinor Root CA"
153 | 
154 |     if is_windows():
155 |         logger.debug("Checking for Equinor Root CA in Windows certificate store")
156 |         for cert in ssl.enum_certificates("CA"):
157 |             found_cert = cert[0]
158 |             # deepcode ignore InsecureHash: <Only hashes to compare with known hash>
159 |             if hashlib.sha1(found_cert).hexdigest().upper() == equinor_root_pem_hash:
160 |                 return ssl.DER_cert_to_PEM_cert(found_cert)
161 | 
162 |     elif is_mac():
163 |         logger.debug("Checking for Equinor Root CA in MacOS certificate store")
164 |         macos_ca_certs = subprocess.run(
165 |             ["security", "find-certificate", "-a", "-c", ca_search, "-Z"],
166 |             stdout=subprocess.PIPE,
167 |         ).stdout
168 | 
169 |         if equinor_root_pem_hash in str(macos_ca_certs).upper():
170 |             c = get_macos_equinor_certificates()
171 |             for cert in c:
172 |                 # deepcode ignore InsecureHash: <Only hashes to compare with known hash>
173 |                 if hashlib.sha1(cert).hexdigest().upper() == equinor_root_pem_hash:
174 |                     return ssl.DER_cert_to_PEM_cert(cert)
175 | 
176 |     # If the certificate is not found in the local cert store, look in the tagreader cache
177 |     filepath = Path(user_data_dir("tagreader")) / "equinor_root_ca.crt"
178 | 
179 |     try:
180 |         if filepath.exists():
181 |             return filepath.read_text()
182 |     except Exception as e:
183 |         logger.debug(f"Failed to read Equinor Root CA certificate from cache: {e}")
184 | 
185 |     return ""
186 | 
187 | 
188 | def get_macos_equinor_certificates():
189 |     import ssl
190 |     import tempfile
191 | 
192 |     ca_search = "Equinor Root CA"
193 | 
194 |     ctx = ssl.create_default_context()
195 |     macos_ca_certs = subprocess.run(
196 |         ["security", "find-certificate", "-a", "-c", ca_search, "-p"],
197 |         stdout=subprocess.PIPE,
198 |     ).stdout
199 |     with tempfile.NamedTemporaryFile("w+b", delete=False) as tmp_file:
200 |         tmp_file.write(macos_ca_certs)
201 | 
202 |     ctx.load_verify_locations(tmp_file.name)
203 | 
204 |     return ctx.get_ca_certs(binary_form=True)
205 | 
206 | 
207 | def is_equinor() -> bool:
208 |     """Determines whether code is running on an Equinor host
209 | 
210 |     If Windows host:
211 |     Finds host's domain in Windows Registry at
212 |     HKLM\\SYSTEM\\ControlSet001\\Services\\Tcpip\\Parameters\\Domain
213 |     If mac os host:
214 |     Finds statoil.net as AD hostname in certificates
215 |     If Linux host:
216 |     Checks whether statoil.no is search domain
217 | 
218 |     Returns:
219 |         bool: True if Equinor
220 |     """
221 |     if is_windows():
222 |         with winreg.OpenKey(
223 |             winreg.HKEY_LOCAL_MACHINE, r"SYSTEM\ControlSet001\Services\Tcpip\Parameters"
224 |         ) as key:
225 |             domain = winreg.QueryValueEx(key, "Domain")
226 |         if "statoil" in domain[0]:
227 |             return True
228 |     elif is_mac():
229 |         s = subprocess.run(
230 |             ["security", "find-certificate", "-a", "-c" "client.statoil.net"],
231 |             stdout=subprocess.PIPE,
232 |         ).stdout
233 | 
234 |         host = socket.gethostname()
235 | 
236 |         # deepcode ignore IdenticalBranches: Not an error. First test is just more precise.
237 |         if host + ".client.statoil.net" in str(s):
238 |             return True
239 |         elif "client.statoil.net" in host and host in str(s):
240 |             return True
241 |     elif is_linux():
242 |         with open("/etc/resolv.conf", "r") as f:
243 |             if "statoil.no" in f.read():
244 |                 return True
245 |     else:
246 |         raise OSError(
247 |             f"Unsupported system: {platform.system()}. Please report this as an issue."
248 |         )
249 |     return False
250 | 
251 | 
252 | class IMSType(str, Enum):
253 |     PIWEBAPI = "piwebapi"
254 |     ASPENONE = "aspenone"
255 |     PI = "pi"
256 |     ASPEN = "aspen"
257 |     IP21 = "ip21"
258 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Generator
 3 | 
 4 | import pytest
 5 | 
 6 | from tagreader.cache import SmartCache
 7 | 
 8 | 
 9 | @pytest.fixture  # type: ignore[misc]
10 | def cache(tmp_path: Path) -> Generator[SmartCache, None, None]:
11 |     cache = SmartCache(directory=tmp_path, size_limit=int(4e9))
12 |     yield cache
13 | 


--------------------------------------------------------------------------------
/tests/test_AspenHandlerREST.py:
--------------------------------------------------------------------------------
  1 | from datetime import timedelta
  2 | 
  3 | import pytest
  4 | 
  5 | from tagreader import utils
  6 | from tagreader.utils import ReaderType
  7 | from tagreader.web_handlers import AspenHandlerWeb
  8 | 
  9 | SAMPLE_TIME = timedelta(seconds=60)
 10 | 
 11 | 
 12 | @pytest.fixture  # type: ignore[misc]
 13 | def aspen_handler() -> AspenHandlerWeb:  # type: ignore[misc]
 14 |     h = AspenHandlerWeb(
 15 |         datasource="source_name", auth=None, options={}, url=None, verify_ssl=None
 16 |     )
 17 |     yield h
 18 | 
 19 | 
 20 | def test_generate_search_query() -> None:
 21 |     with pytest.raises(ValueError):
 22 |         AspenHandlerWeb.generate_search_query(tag="ATCAI", desc=None, datasource=None)
 23 |     assert AspenHandlerWeb.generate_search_query(
 24 |         tag="ATCAI", datasource="source_name", desc=None, max=100
 25 |     ) == {
 26 |         "datasource": "source_name",
 27 |         "tag": "ATCAI",
 28 |         "max": 100,
 29 |         "getTrendable": 0,
 30 |     }
 31 |     assert AspenHandlerWeb.generate_search_query(
 32 |         tag="ATC*", datasource="source_name", desc=None, max=100
 33 |     ) == {
 34 |         "datasource": "source_name",
 35 |         "tag": "ATC*",
 36 |         "max": 100,
 37 |         "getTrendable": 0,
 38 |     }
 39 |     assert AspenHandlerWeb.generate_search_query(
 40 |         tag="ATCAI", datasource="source_name", desc=None
 41 |     ) == {"datasource": "source_name", "tag": "ATCAI", "max": 100000, "getTrendable": 0}
 42 | 
 43 | 
 44 | def test_split_tagmap() -> None:
 45 |     assert AspenHandlerWeb.split_tagmap("ATCAI") == ("ATCAI", None)
 46 |     assert AspenHandlerWeb.split_tagmap("ATCAI;IP_ANALOGMAP") == (
 47 |         "ATCAI",
 48 |         "IP_ANALOGMAP",
 49 |     )
 50 | 
 51 | 
 52 | def test_generate_description_query(aspen_handler: AspenHandlerWeb) -> None:
 53 |     assert aspen_handler.generate_get_description_query("ATCAI") == (
 54 |         '<Q allQuotes="1" attributeData="1"><Tag><N><![CDATA[ATCAI]]></N><T>0</T>'
 55 |         "<G><![CDATA[ATCAI]]></G><D><![CDATA[source_name]]></D><AL><A>DSCR</A>"
 56 |         "<VS>0</VS></AL></Tag></Q>"
 57 |     )
 58 | 
 59 | 
 60 | def test_generate_unit_query(aspen_handler: AspenHandlerWeb) -> None:
 61 |     assert aspen_handler.generate_get_unit_query("ATCAI") == (
 62 |         '<Q allQuotes="1" attributeData="1"><Tag><N><![CDATA[ATCAI]]></N><T>0</T>'
 63 |         "<G><![CDATA[ATCAI]]></G><D><![CDATA[source_name]]></D><AL><A>Units</A>"
 64 |         "<A>MAP_Units</A><VS>0</VS></AL></Tag></Q>"
 65 |     )
 66 | 
 67 | 
 68 | def test_generate_map_query(aspen_handler: AspenHandlerWeb) -> None:
 69 |     assert aspen_handler.generate_get_map_query("ATCAI") == (
 70 |         '<Q allQuotes="1" categoryInfo="1"><Tag><N><![CDATA[ATCAI]]></N><T>0</T>'
 71 |         "<G><![CDATA[ATCAI]]></G><D><![CDATA[source_name]]></D></Tag></Q>"
 72 |     )
 73 | 
 74 | 
 75 | @pytest.mark.parametrize(  # type: ignore[misc]
 76 |     "read_type",
 77 |     [
 78 |         "RAW",
 79 |         "SHAPEPRESERVING",
 80 |         "INT",
 81 |         "MIN",
 82 |         "MAX",
 83 |         "RNG",
 84 |         "AVG",
 85 |         "VAR",
 86 |         "STD",
 87 |         # pytest.param("COUNT", 0, marks=pytest.mark.skip),
 88 |         # pytest.param("GOOD", 0, marks=pytest.mark.skip),
 89 |         # pytest.param("BAD", 0, marks=pytest.mark.skip),
 90 |         # pytest.param("TOTAL", 0, marks=pytest.mark.skip),
 91 |         # pytest.param("SUM", 0, marks=pytest.mark.skip),
 92 |         "SNAPSHOT",
 93 |     ],
 94 | )
 95 | def test_generate_tag_read_query(
 96 |     aspen_handler: AspenHandlerWeb, read_type: str
 97 | ) -> None:
 98 |     start = utils.ensure_datetime_with_tz("2020-06-24 17:00:00")
 99 |     end = utils.ensure_datetime_with_tz("2020-06-24 18:00:00")
100 |     ts = SAMPLE_TIME
101 |     res = aspen_handler.generate_read_query(
102 |         tagname="ATCAI",
103 |         mapname=None,
104 |         start=start,
105 |         end=end,
106 |         sample_time=ts,
107 |         read_type=getattr(ReaderType, read_type),
108 |         metadata={},
109 |     )
110 |     expected = {
111 |         "RAW": (
112 |             '<Q f="d" allQuotes="1"><Tag><N><![CDATA[ATCAI]]></N>'
113 |             "<D><![CDATA[source_name]]></D><F><![CDATA[VAL]]></F>"
114 |             "<HF>0</HF><St>1593010800000</St><Et>1593014400000</Et>"
115 |             "<RT>0</RT><X>100000</X><O>0</O></Tag></Q>"
116 |         ),
117 |         "SHAPEPRESERVING": (
118 |             '<Q f="d" allQuotes="1"><Tag><N><![CDATA[ATCAI]]></N>'
119 |             "<D><![CDATA[source_name]]></D><F><![CDATA[VAL]]></F>"
120 |             "<HF>0</HF><St>1593010800000</St><Et>1593014400000</Et>"
121 |             "<RT>2</RT><X>100000</X><O>0</O><S>0</S></Tag></Q>"
122 |         ),
123 |         "INT": (
124 |             '<Q f="d" allQuotes="1"><Tag><N><![CDATA[ATCAI]]></N>'
125 |             "<D><![CDATA[source_name]]></D><F><![CDATA[VAL]]></F>"
126 |             "<HF>0</HF><St>1593010800000</St><Et>1593014400000</Et>"
127 |             "<RT>1</RT><S>0</S><P>60</P><PU>3</PU></Tag></Q>"
128 |         ),
129 |         "MIN": (
130 |             '<Q f="d" allQuotes="1"><Tag><N><![CDATA[ATCAI]]></N>'
131 |             "<D><![CDATA[source_name]]></D><F><![CDATA[VAL]]></F>"
132 |             "<HF>0</HF><St>1593010800000</St><Et>1593014400000</Et>"
133 |             "<RT>14</RT><O>0</O><S>0</S><P>60</P><PU>3</PU><AM>0</AM>"
134 |             "<AS>0</AS><AA>0</AA><DSA>0</DSA></Tag></Q>"
135 |         ),
136 |         "MAX": (
137 |             '<Q f="d" allQuotes="1"><Tag><N><![CDATA[ATCAI]]></N>'
138 |             "<D><![CDATA[source_name]]></D><F><![CDATA[VAL]]></F>"
139 |             "<HF>0</HF><St>1593010800000</St><Et>1593014400000</Et>"
140 |             "<RT>13</RT><O>0</O><S>0</S><P>60</P><PU>3</PU><AM>0</AM>"
141 |             "<AS>0</AS><AA>0</AA><DSA>0</DSA></Tag></Q>"
142 |         ),
143 |         "RNG": (
144 |             '<Q f="d" allQuotes="1"><Tag><N><![CDATA[ATCAI]]></N>'
145 |             "<D><![CDATA[source_name]]></D><F><![CDATA[VAL]]></F>"
146 |             "<HF>0</HF><St>1593010800000</St><Et>1593014400000</Et>"
147 |             "<RT>15</RT><O>0</O><S>0</S><P>60</P><PU>3</PU><AM>0</AM>"
148 |             "<AS>0</AS><AA>0</AA><DSA>0</DSA></Tag></Q>"
149 |         ),
150 |         "AVG": (
151 |             '<Q f="d" allQuotes="1"><Tag><N><![CDATA[ATCAI]]></N>'
152 |             "<D><![CDATA[source_name]]></D><F><![CDATA[VAL]]></F>"
153 |             "<HF>0</HF><St>1593010800000</St><Et>1593014400000</Et>"
154 |             "<RT>12</RT><O>0</O><S>0</S><P>60</P><PU>3</PU><AM>0</AM>"
155 |             "<AS>0</AS><AA>0</AA><DSA>0</DSA></Tag></Q>"
156 |         ),
157 |         "VAR": (
158 |             '<Q f="d" allQuotes="1"><Tag><N><![CDATA[ATCAI]]></N>'
159 |             "<D><![CDATA[source_name]]></D><F><![CDATA[VAL]]></F>"
160 |             "<HF>0</HF><St>1593010800000</St><Et>1593014400000</Et>"
161 |             "<RT>18</RT><O>0</O><S>0</S><P>60</P><PU>3</PU><AM>0</AM>"
162 |             "<AS>0</AS><AA>0</AA><DSA>0</DSA></Tag></Q>"
163 |         ),
164 |         "STD": (
165 |             '<Q f="d" allQuotes="1"><Tag><N><![CDATA[ATCAI]]></N>'
166 |             "<D><![CDATA[source_name]]></D><F><![CDATA[VAL]]></F>"
167 |             "<HF>0</HF><St>1593010800000</St><Et>1593014400000</Et>"
168 |             "<RT>17</RT><O>0</O><S>0</S><P>60</P><PU>3</PU><AM>0</AM>"
169 |             "<AS>0</AS><AA>0</AA><DSA>0</DSA></Tag></Q>"
170 |         ),
171 |         "COUNT": "whatever",
172 |         "GOOD": "whatever",
173 |         "BAD": "whatever",
174 |         "TOTAL": "whatever",
175 |         "SUM": "whatever",
176 |         "SNAPSHOT": (
177 |             '<Q f="d" allQuotes="1" rt="1593014400000" uc="0">'
178 |             "<Tag><N><![CDATA[ATCAI]]></N>"
179 |             "<D><![CDATA[source_name]]></D><F><![CDATA[VAL]]></F>"
180 |             "<VS>1</VS><S>0</S></Tag></Q>"
181 |         ),
182 |     }
183 |     assert expected[read_type] == res
184 | 
185 | 
186 | def test_generate_read_query_long_sample_time(aspen_handler: AspenHandlerWeb) -> None:
187 |     start = utils.ensure_datetime_with_tz("2020-06-24 17:00:00")
188 |     end = utils.ensure_datetime_with_tz("2020-06-24 18:00:00")
189 |     ts = timedelta(seconds=86401)
190 | 
191 |     res = aspen_handler.generate_read_query(
192 |         tagname="ATCAI",
193 |         mapname=None,
194 |         start=start,
195 |         end=end,
196 |         sample_time=ts,
197 |         read_type=ReaderType.INT,
198 |         metadata={},
199 |     )
200 |     expected = (
201 |         '<Q f="d" allQuotes="1"><Tag><N><![CDATA[ATCAI]]></N>'
202 |         "<D><![CDATA[source_name]]></D><F><![CDATA[VAL]]></F>"
203 |         "<HF>0</HF><St>1593010800000</St><Et>1593014400000</Et>"
204 |         "<RT>1</RT><S>0</S><P>86401</P><PU>3</PU></Tag></Q>"
205 |     )
206 | 
207 |     assert expected == res
208 | 
209 | 
210 | def test_generate_sql_query(aspen_handler: AspenHandlerWeb) -> None:
211 |     res = aspen_handler.generate_sql_query(
212 |         datasource=None,
213 |         connection_string="my_connection_stringing",
214 |         query="myquery",
215 |         max_rows=9999,
216 |     )
217 |     expected = (
218 |         '<SQL c="my_connection_stringing" m="9999" to="30" s="1">'
219 |         "<![CDATA[myquery]]></SQL>"
220 |     )
221 |     assert res == expected
222 |     res = aspen_handler.generate_sql_query(
223 |         datasource="mydatasource",
224 |         query="myquery",
225 |         max_rows=9999,
226 |         connection_string=None,
227 |     )
228 |     expected = (
229 |         '<SQL t="SQLplus" ds="mydatasource" '
230 |         'dso="CHARINT=N;CHARFLOAT=N;CHARTIME=N;CONVERTERRORS=N" '
231 |         'm="9999" to="30" s="1">'
232 |         "<![CDATA[myquery]]></SQL>"
233 |     )
234 |     assert res == expected
235 | 


--------------------------------------------------------------------------------
/tests/test_AspenHandlerREST_connect.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from datetime import datetime, timedelta
  3 | from typing import Generator
  4 | 
  5 | import pandas as pd
  6 | import pytest
  7 | 
  8 | from tagreader.clients import IMSClient, list_sources
  9 | from tagreader.utils import IMSType
 10 | from tagreader.web_handlers import (
 11 |     AspenHandlerWeb,
 12 |     get_verify_ssl,
 13 |     list_aspenone_sources,
 14 | )
 15 | 
 16 | is_GITHUB_ACTIONS = "GITHUB_ACTION" in os.environ
 17 | is_AZURE_PIPELINE = "TF_BUILD" in os.environ
 18 | 
 19 | if is_GITHUB_ACTIONS:
 20 |     pytest.skip(
 21 |         "All tests in module require connection to Aspen server",
 22 |         allow_module_level=True,
 23 |     )
 24 | 
 25 | VERIFY_SSL = False if is_AZURE_PIPELINE else get_verify_ssl()
 26 | 
 27 | SOURCE = "TRB"
 28 | TAG = "xxx"
 29 | FAKE_TAG = "so_random_it_cant_exist"
 30 | START_TIME = datetime(2023, 5, 1, 10, 0, 0)
 31 | STOP_TIME = datetime(2023, 5, 1, 11, 0, 0)
 32 | SAMPLE_TIME = timedelta(seconds=60)
 33 | 
 34 | 
 35 | @pytest.fixture  # type: ignore[misc]
 36 | def client() -> Generator[IMSClient, None, None]:
 37 |     c = IMSClient(
 38 |         datasource=SOURCE,
 39 |         imstype="aspenone",
 40 |         verifySSL=bool(VERIFY_SSL),
 41 |     )
 42 |     c.cache = None
 43 |     c.connect()
 44 |     yield c
 45 |     if os.path.exists(SOURCE + ".h5"):
 46 |         os.remove(SOURCE + ".h5")
 47 | 
 48 | 
 49 | @pytest.fixture  # type: ignore[misc]
 50 | def aspen_handler() -> Generator[AspenHandlerWeb, None, None]:
 51 |     h = AspenHandlerWeb(
 52 |         datasource=SOURCE, verify_ssl=bool(VERIFY_SSL), auth=None, url=None, options={}
 53 |     )
 54 |     yield h
 55 | 
 56 | 
 57 | def test_list_all_aspen_one_sources() -> None:
 58 |     res = list_aspenone_sources(verify_ssl=bool(VERIFY_SSL), auth=None, url=None)
 59 |     assert isinstance(res, list)
 60 |     assert len(res) >= 1
 61 |     assert isinstance(res[0], str)
 62 |     for r in res:
 63 |         assert 3 <= len(r) <= 20
 64 | 
 65 | 
 66 | def test_list_sources_aspen_one() -> None:
 67 |     res = list_sources(imstype=IMSType.ASPENONE, verifySSL=bool(VERIFY_SSL))
 68 |     assert isinstance(res, list)
 69 |     assert len(res) >= 1
 70 |     assert isinstance(res[0], str)
 71 |     for r in res:
 72 |         assert 3 <= len(r) <= 20
 73 | 
 74 | 
 75 | def test_verify_connection(aspen_handler: AspenHandlerWeb) -> None:
 76 |     assert aspen_handler.verify_connection(SOURCE) is True
 77 |     assert aspen_handler.verify_connection("some_random_stuff_here") is False
 78 | 
 79 | 
 80 | def test_search_tag(client: IMSClient) -> None:
 81 |     res = client.search(tag=FAKE_TAG, desc=None)
 82 |     assert 0 == len(res)
 83 | 
 84 |     res = client.search(tag="AverageCPUTimeVals", desc=None)
 85 |     assert res == [("AverageCPUTimeVals", "Average CPU Time")]
 86 | 
 87 |     res = client.search(tag="Aspen*", desc=None, return_desc=False)
 88 |     assert len(res) < 5
 89 |     assert isinstance(res, list)
 90 |     assert isinstance(res[0], str)
 91 | 
 92 |     res = client.search(tag="Aspen*", desc=None)
 93 |     assert len(res) < 5
 94 |     assert isinstance(res, list)
 95 |     assert isinstance(res[0], tuple)
 96 | 
 97 |     res = client.search("AspenCalcTrigger1")
 98 |     assert res == [("AspenCalcTrigger1", "")]
 99 |     res = client.search("AspenCalcTrigger1", desc=None)
100 |     assert res == [("AspenCalcTrigger1", "")]
101 | 
102 |     res = client.search("AverageCPUTimeVals", "*CPU*")
103 |     assert res == [("AverageCPUTimeVals", "Average CPU Time")]
104 |     with pytest.raises(ValueError):
105 |         _ = client.search(desc="Sine Input")  # noqa
106 | 
107 |     with pytest.raises(ValueError):
108 |         res = client.search("")
109 | 
110 |     with pytest.raises(ValueError):
111 |         _ = client.search(
112 |             desc="Sine Input"
113 |         )  # noqa    res = client.search(tag="ATCM*", return_desc=False)´
114 | 
115 | 
116 | def test_read_unknown_tag(client: IMSClient) -> None:
117 |     df = client.read(tags=[FAKE_TAG], start_time=START_TIME, end_time=STOP_TIME)
118 |     assert len(df.index) == 0
119 |     df = client.read(tags=[TAG, FAKE_TAG], start_time=START_TIME, end_time=STOP_TIME)
120 |     assert len(df.index) > 0
121 |     assert len(df.columns == 1)
122 | 
123 | 
124 | def test_get_units(client: IMSClient) -> None:
125 |     d = client.get_units(FAKE_TAG)
126 |     assert isinstance(d, dict)
127 |     assert len(d.items()) == 0
128 | 
129 | 
130 | def test_get_desc(client: IMSClient) -> None:
131 |     d = client.get_descriptions(FAKE_TAG)
132 |     assert isinstance(d, dict)
133 |     assert len(d.items()) == 0
134 | 
135 | 
136 | def test_query_sql(client: IMSClient) -> None:
137 |     # The % causes WC_E_SYNTAX error in result. Tried "everything" but no go.
138 |     # Leaving it for now.
139 |     # query = "SELECT name, ip_description FROM ip_analogdef WHERE name LIKE 'ATC%'"
140 |     query = "Select name, ip_description from ip_analogdef where name = 'atc'"
141 |     res = client.query_sql(query=query, parse=False)
142 |     # print(res)
143 |     assert isinstance(res, str)
144 | 
145 |     res = client.query_sql(query=query, parse=True)
146 |     assert isinstance(res, pd.DataFrame)
147 |     assert res.empty
148 | 
149 |     query = "Select name, ip_description from ip_analogdef where name = 'AverageCPUTimeVals'"
150 |     res = client.query_sql(query=query, parse=True)
151 |     assert isinstance(res, pd.DataFrame)
152 |     assert len(res.index.values) == 1
153 |     assert res.index.values[0] == 0
154 | 


--------------------------------------------------------------------------------
/tests/test_PIHandlerREST.py:
--------------------------------------------------------------------------------
  1 | from datetime import timedelta
  2 | from typing import Generator, cast
  3 | 
  4 | import pytest
  5 | 
  6 | from tagreader.cache import SmartCache
  7 | from tagreader.utils import ReaderType, ensure_datetime_with_tz
  8 | from tagreader.web_handlers import PIHandlerWeb
  9 | 
 10 | START_TIME = "2020-04-01 11:05:00"
 11 | STOP_TIME = "2020-04-01 12:05:00"
 12 | SAMPLE_TIME = 60
 13 | 
 14 | 
 15 | @pytest.fixture  # type: ignore[misc]
 16 | def pi_handler(cache: SmartCache) -> Generator[PIHandlerWeb, None, None]:
 17 |     h = PIHandlerWeb(
 18 |         datasource="sourcename",
 19 |         auth=None,
 20 |         options={},
 21 |         url=None,
 22 |         verify_ssl=True,
 23 |         cache=cache,
 24 |     )
 25 |     if not isinstance(h.web_id_cache, SmartCache):
 26 |         raise ValueError("Expected SmartCache in the web client.")
 27 |     h.web_id_cache.add(key="alreadyknowntag", value="knownwebid")
 28 |     yield h
 29 | 
 30 | 
 31 | def test_escape_chars() -> None:
 32 |     assert (
 33 |         PIHandlerWeb.escape('+-&|(){}[]^"~*:\\') == r"\+\-\&\|\(\)\{\}\[\]\^\"\~*\:\\"
 34 |     )
 35 | 
 36 | 
 37 | def test_generate_search_query() -> None:
 38 |     assert PIHandlerWeb.generate_search_params(
 39 |         tag="SINUSOID", desc=None, datasource=None
 40 |     ) == {"query": "name:SINUSOID"}
 41 |     assert PIHandlerWeb.generate_search_params(
 42 |         tag=r"BA:*.1", desc=None, datasource=None
 43 |     ) == {
 44 |         "query": r"name:BA\:*.1",
 45 |     }
 46 |     assert PIHandlerWeb.generate_search_params(
 47 |         tag="BA:*.1", datasource=None, desc=None
 48 |     ) == {
 49 |         "query": r"name:BA\:*.1",
 50 |     }
 51 |     assert PIHandlerWeb.generate_search_params(
 52 |         desc="Concentration Reactor 1", datasource=None, tag=None
 53 |     ) == {
 54 |         "query": r"description:Concentration\ Reactor\ 1",
 55 |     }
 56 |     assert PIHandlerWeb.generate_search_params(
 57 |         tag="BA:*.1", desc="Concentration Reactor 1", datasource=None
 58 |     ) == {"query": r"name:BA\:*.1 AND description:Concentration\ Reactor\ 1"}
 59 | 
 60 | 
 61 | def test_is_summary(pi_handler: PIHandlerWeb) -> None:
 62 |     assert pi_handler._is_summary(ReaderType.AVG)
 63 |     assert pi_handler._is_summary(ReaderType.MIN)
 64 |     assert pi_handler._is_summary(ReaderType.MAX)
 65 |     assert pi_handler._is_summary(ReaderType.RNG)
 66 |     assert pi_handler._is_summary(ReaderType.STD)
 67 |     assert pi_handler._is_summary(ReaderType.VAR)
 68 |     assert not pi_handler._is_summary(ReaderType.RAW)
 69 |     assert not pi_handler._is_summary(ReaderType.SHAPEPRESERVING)
 70 |     assert not pi_handler._is_summary(ReaderType.INT)
 71 |     assert not pi_handler._is_summary(ReaderType.GOOD)
 72 |     assert not pi_handler._is_summary(ReaderType.BAD)
 73 |     assert not pi_handler._is_summary(ReaderType.SNAPSHOT)
 74 | 
 75 | 
 76 | @pytest.mark.parametrize(  # type: ignore[misc]
 77 |     "read_type",
 78 |     [
 79 |         "RAW",
 80 |         # pytest.param(
 81 |         #     "SHAPEPRESERVING", marks=pytest.mark.skip(reason="Not implemented")
 82 |         # ),
 83 |         "INT",
 84 |         "MIN",
 85 |         "MAX",
 86 |         "RNG",
 87 |         "AVG",
 88 |         "STD",
 89 |         "VAR",
 90 |         # pytest.param("COUNT", marks=pytest.mark.skip(reason="Not implemented")),
 91 |         # pytest.param("GOOD", marks=pytest.mark.skip(reason="Not implemented")),
 92 |         # pytest.param("BAD", marks=pytest.mark.skip(reason="Not implemented")),
 93 |         # pytest.param("TOTAL", marks=pytest.mark.skip(reason="Not implemented")),
 94 |         # pytest.param("SUM", marks=pytest.mark.skip(reason="Not implemented")),
 95 |         "SNAPSHOT",
 96 |     ],
 97 | )
 98 | def test_generate_read_query(pi_handler: PIHandlerWeb, read_type: str) -> None:
 99 |     if not isinstance(pi_handler.web_id_cache, SmartCache):
100 |         raise ValueError("Expected SmartCache in the fixture.")
101 |     start = ensure_datetime_with_tz(START_TIME)
102 |     stop = ensure_datetime_with_tz(STOP_TIME)
103 |     ts = timedelta(seconds=SAMPLE_TIME)
104 | 
105 |     (url, params) = pi_handler.generate_read_query(
106 |         tag=pi_handler.tag_to_web_id(tag="alreadyknowntag"),  # type: ignore[arg-type]
107 |         start=start,
108 |         end=stop,
109 |         sample_time=ts,
110 |         read_type=getattr(ReaderType, read_type),
111 |         metadata=None,
112 |     )
113 |     if read_type != "SNAPSHOT":
114 |         assert params["startTime"] == "01-Apr-20 09:05:00"
115 |         assert params["endTime"] == "01-Apr-20 10:05:00"
116 |         assert params["timeZone"] == "UTC"
117 | 
118 |     if read_type == "INT":
119 |         assert (
120 |             url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/interpolated"
121 |         )
122 |         assert params["selectedFields"] == "Links;Items.Timestamp;Items.Value"
123 |         assert params["interval"] == f"{SAMPLE_TIME}s"
124 |     elif read_type in ["AVG", "MIN", "MAX", "RNG", "STD", "VAR"]:
125 |         assert url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/summary"
126 |         assert (
127 |             params["selectedFields"] == "Links;Items.Value.Timestamp;Items.Value.Value"
128 |         )
129 |         assert {
130 |             "AVG": "Average",
131 |             "MIN": "Minimum",
132 |             "MAX": "Maximum",
133 |             "RNG": "Range",
134 |             "STD": "StdDev",
135 |             "VAR": "StdDev",
136 |         }.get(read_type) == params["summaryType"]
137 |         assert params["summaryDuration"] == f"{SAMPLE_TIME}s"
138 |     elif read_type == "SNAPSHOT":
139 |         assert url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/value"
140 |         assert params["selectedFields"] == "Timestamp;Value"
141 |         assert len(params) == 3
142 |     elif read_type == "RAW":
143 |         assert url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/recorded"
144 |         assert params["selectedFields"] == "Links;Items.Timestamp;Items.Value"
145 |         assert params["maxCount"] == 10000  # type: ignore[comparison-overlap]
146 | 
147 | 
148 | @pytest.mark.parametrize(  # type: ignore[misc]
149 |     "read_type",
150 |     [
151 |         "RAW",
152 |         # pytest.param(
153 |         #     "SHAPEPRESERVING", marks=pytest.mark.skip(reason="Not implemented")
154 |         # ),
155 |         "INT",
156 |         "MIN",
157 |         "MAX",
158 |         "RNG",
159 |         "AVG",
160 |         "STD",
161 |         "VAR",
162 |         # pytest.param("COUNT", marks=pytest.mark.skip(reason="Not implemented")),
163 |         # pytest.param("GOOD", marks=pytest.mark.skip(reason="Not implemented")),
164 |         # pytest.param("BAD", marks=pytest.mark.skip(reason="Not implemented")),
165 |         # pytest.param("TOTAL", marks=pytest.mark.skip(reason="Not implemented")),
166 |         # pytest.param("SUM", marks=pytest.mark.skip(reason="Not implemented")),
167 |         "SNAPSHOT",
168 |     ],
169 | )
170 | def test_generate_read_query_with_status(
171 |     pi_handler: PIHandlerWeb, read_type: str
172 | ) -> None:
173 |     if not isinstance(pi_handler.web_id_cache, SmartCache):
174 |         raise ValueError("Expected SmartCache in the fixture.")
175 |     start = ensure_datetime_with_tz(START_TIME)
176 |     stop = ensure_datetime_with_tz(STOP_TIME)
177 |     ts = timedelta(seconds=SAMPLE_TIME)
178 | 
179 |     (url, params) = pi_handler.generate_read_query(
180 |         tag=pi_handler.tag_to_web_id("alreadyknowntag"),  # type: ignore[arg-type]
181 |         start=start,
182 |         end=stop,
183 |         sample_time=ts,
184 |         read_type=getattr(ReaderType, read_type),
185 |         get_status=True,
186 |         metadata=None,
187 |     )
188 |     if read_type != "SNAPSHOT":
189 |         assert params["startTime"] == "01-Apr-20 09:05:00"
190 |         assert params["endTime"] == "01-Apr-20 10:05:00"
191 |         assert params["timeZone"] == "UTC"
192 | 
193 |     if read_type == "INT":
194 |         assert (
195 |             url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/interpolated"
196 |         )
197 |         assert params["selectedFields"] == (
198 |             "Links;Items.Timestamp;Items.Value;"
199 |             "Items.Good;Items.Questionable;Items.Substituted"
200 |         )
201 |         assert params["interval"] == f"{SAMPLE_TIME}s"
202 |     elif read_type in ["AVG", "MIN", "MAX", "RNG", "STD", "VAR"]:
203 |         assert url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/summary"
204 |         assert params["selectedFields"] == (
205 |             "Links;Items.Value.Timestamp;Items.Value.Value;"
206 |             "Items.Value.Good;Items.Value.Questionable;Items.Value.Substituted"
207 |         )
208 |         assert {
209 |             "AVG": "Average",
210 |             "MIN": "Minimum",
211 |             "MAX": "Maximum",
212 |             "RNG": "Range",
213 |             "STD": "StdDev",
214 |             "VAR": "StdDev",
215 |         }.get(read_type) == params["summaryType"]
216 |         assert params["summaryDuration"] == f"{SAMPLE_TIME}s"
217 |     elif read_type == "SNAPSHOT":
218 |         assert url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/value"
219 |         assert (
220 |             params["selectedFields"] == "Timestamp;Value;Good;Questionable;Substituted"
221 |         )
222 |         assert len(params) == 3
223 |     elif read_type == "RAW":
224 |         assert url == f"streams/{pi_handler.web_id_cache['alreadyknowntag']}/recorded"
225 |         assert params["selectedFields"] == (
226 |             "Links;Items.Timestamp;Items.Value;"
227 |             "Items.Good;Items.Questionable;Items.Substituted"
228 |         )
229 |         assert params["maxCount"] == 10000  # type: ignore[comparison-overlap]
230 | 
231 | 
232 | def test_generate_read_query_long_sample_time(pi_handler: PIHandlerWeb) -> None:
233 |     start = ensure_datetime_with_tz(START_TIME)
234 |     stop = ensure_datetime_with_tz(STOP_TIME)
235 |     ts = timedelta(seconds=86410)
236 | 
237 |     (url, params) = pi_handler.generate_read_query(
238 |         tag=pi_handler.tag_to_web_id("alreadyknowntag"),  # type: ignore[arg-type]
239 |         start=start,
240 |         end=stop,
241 |         sample_time=ts,
242 |         read_type=ReaderType.INT,
243 |         metadata=None,
244 |     )
245 |     assert params["interval"] == f"{86410}s"
246 | 


--------------------------------------------------------------------------------
/tests/test_PIHandlerREST_connect.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from datetime import timedelta
  3 | from typing import Generator
  4 | 
  5 | import pytest
  6 | 
  7 | from tagreader.cache import SmartCache
  8 | from tagreader.clients import IMSClient, list_sources
  9 | from tagreader.utils import ReaderType, ensure_datetime_with_tz
 10 | from tagreader.web_handlers import PIHandlerWeb, get_verify_ssl, list_piwebapi_sources
 11 | 
 12 | is_GITHUBACTION = "GITHUB_ACTION" in os.environ
 13 | is_AZUREPIPELINE = "TF_BUILD" in os.environ
 14 | 
 15 | if is_GITHUBACTION:
 16 |     pytest.skip(
 17 |         "All tests in module require connection to PI server", allow_module_level=True
 18 |     )
 19 | 
 20 | verifySSL = False if is_AZUREPIPELINE else get_verify_ssl()
 21 | 
 22 | SOURCE = "PIMAM"
 23 | TAGS = {
 24 |     "Float32": "CDT158",  # BA:CONC.1
 25 |     "Digital": "CDM158",  # BA:ACTIVE.1
 26 |     "Int32": "CDEP158",
 27 | }
 28 | 
 29 | START_TIME = "2020-04-01 11:05:00"
 30 | STOP_TIME = "2020-04-01 12:05:00"
 31 | SAMPLE_TIME = 60
 32 | 
 33 | 
 34 | @pytest.fixture  # type: ignore[misc]
 35 | def client() -> Generator[IMSClient, None, None]:
 36 |     c = IMSClient(
 37 |         datasource=SOURCE,
 38 |         imstype="piwebapi",
 39 |         verifySSL=bool(verifySSL),
 40 |     )
 41 |     c.cache = None
 42 |     c.connect()
 43 |     c.handler._max_rows = 1000  # For the long raw test
 44 |     yield c
 45 |     if os.path.exists(SOURCE + ".h5"):
 46 |         os.remove(SOURCE + ".h5")
 47 | 
 48 | 
 49 | @pytest.fixture  # type: ignore[misc]
 50 | def pi_handler(cache: SmartCache) -> Generator[PIHandlerWeb, None, None]:
 51 |     h = PIHandlerWeb(
 52 |         datasource=SOURCE,
 53 |         verify_ssl=bool(verifySSL),
 54 |         auth=None,
 55 |         options={},
 56 |         url=None,
 57 |         cache=cache,
 58 |     )
 59 |     if not isinstance(h.web_id_cache, SmartCache):
 60 |         raise ValueError("Expected SmartCache in the web client.")
 61 |     h.web_id_cache["alreadyknowntag"] = "knownwebid"
 62 |     yield h
 63 | 
 64 | 
 65 | def test_list_all_piwebapi_sources() -> None:
 66 |     res = list_piwebapi_sources(verify_ssl=bool(verifySSL), auth=None, url=None)
 67 |     assert isinstance(res, list)
 68 |     assert len(res) >= 1
 69 |     for r in res:
 70 |         assert isinstance(r, str)
 71 |         assert 3 <= len(r)
 72 | 
 73 | 
 74 | def test_list_sources_piwebapi() -> None:
 75 |     res = list_sources(imstype="piwebapi", verifySSL=bool(verifySSL))
 76 |     assert isinstance(res, list)
 77 |     assert len(res) >= 1
 78 |     for r in res:
 79 |         assert isinstance(r, str)
 80 |         assert 3 <= len(r)
 81 | 
 82 | 
 83 | def test_verify_connection(pi_handler: IMSClient) -> None:
 84 |     assert pi_handler.verify_connection("PIMAM") is True  # type: ignore[attr-defined]
 85 |     assert pi_handler.verify_connection("somerandomstuffhere") is False  # type: ignore[attr-defined]
 86 | 
 87 | 
 88 | def test_search_tag(client: IMSClient) -> None:
 89 |     res = client.search("SINUSOID")
 90 |     assert 1 == len(res)
 91 |     res = client.search("SIN*")
 92 |     assert isinstance(res, list)
 93 |     assert 3 <= len(res)
 94 |     assert isinstance(res[0], tuple)
 95 |     [taglist, desclist] = zip(*res)
 96 |     assert "SINUSOIDU" in taglist
 97 |     assert desclist[taglist.index("SINUSOID")] == "12 Hour Sine Wave"
 98 |     res = client.search("SIN*", return_desc=False)
 99 |     assert 3 <= len(res)
100 |     assert isinstance(res, list)
101 |     assert isinstance(res[0], str)
102 |     res = client.search(desc="12 Hour Sine Wave")
103 |     assert 1 <= len(res)
104 |     res = client.search(tag="SINUSOID", desc="*Sine*")
105 |     assert 1 <= len(res)
106 | 
107 | 
108 | def test_tag_to_web_id(pi_handler: PIHandlerWeb) -> None:
109 |     res = pi_handler.tag_to_web_id("SINUSOID")
110 |     assert isinstance(res, str)
111 |     assert len(res) >= 20
112 |     with pytest.raises(AssertionError):
113 |         _ = pi_handler.tag_to_web_id("SINUSOID*")
114 |     res = pi_handler.tag_to_web_id("somerandomgarbage")
115 |     assert not res
116 | 
117 | 
118 | @pytest.mark.parametrize(  # type: ignore[misc]
119 |     ("read_type", "size"),
120 |     [
121 |         ("RAW", 10),
122 |         # pytest.param(
123 |         #      "SHAPEPRESERVING", 0, marks=pytest.mark.skip(reason="Not implemented")
124 |         # ),
125 |         ("INT", 61),
126 |         ("MIN", 60),
127 |         ("MAX", 60),
128 |         ("RNG", 60),
129 |         ("AVG", 60),
130 |         ("VAR", 60),
131 |         ("STD", 60),
132 |         # pytest.param("COUNT", 0, marks=pytest.mark.skip(reason="Not implemented")),
133 |         # pytest.param("GOOD", 0, marks=pytest.mark.skip(reason="Not implemented")),
134 |         # pytest.param("BAD", 0, marks=pytest.mark.skip(reason="Not implemented")),
135 |         # pytest.param("TOTAL", 0, marks=pytest.mark.skip(reason="Not implemented")),
136 |         # pytest.param("SUM", 0, marks=pytest.mark.skip(reason="Not implemented")),
137 |         ("SNAPSHOT", 1),
138 |     ],
139 | )
140 | def test_read(client: IMSClient, read_type: str, size: int) -> None:
141 |     if read_type == "SNAPSHOT":
142 |         df = client.read(
143 |             tags=TAGS["Float32"],
144 |             read_type=getattr(ReaderType, read_type),
145 |             start_time=None,
146 |             end_time=None,
147 |         )
148 |     else:
149 |         df = client.read(
150 |             tags=TAGS["Float32"],
151 |             start_time=START_TIME,
152 |             end_time=STOP_TIME,
153 |             ts=SAMPLE_TIME,
154 |             read_type=getattr(ReaderType, read_type),
155 |         )
156 | 
157 |     if read_type not in ["SNAPSHOT", "RAW"]:
158 |         assert df.shape == (size, 1)
159 |         assert df.index[0] == ensure_datetime_with_tz(START_TIME)
160 |         assert df.index[-1] == df.index[0] + (size - 1) * timedelta(seconds=SAMPLE_TIME)
161 |     elif read_type in "RAW":
162 |         # Weirdness for test-tag which can have two different results,
163 |         # apparently depending on the day of the week, mood, lunar cycle...
164 |         assert df.shape == (size, 1) or df.shape == (size - 1, 1)
165 |         assert df.index[0] >= ensure_datetime_with_tz(START_TIME)
166 |         assert df.index[-1] <= ensure_datetime_with_tz(STOP_TIME)
167 | 
168 | 
169 | def test_read_with_status(client: IMSClient) -> None:
170 |     df = client.read(
171 |         tags=TAGS["Float32"],
172 |         start_time=START_TIME,
173 |         end_time=STOP_TIME,
174 |         ts=SAMPLE_TIME,
175 |         read_type=ReaderType.RAW,
176 |         get_status=True,
177 |     )
178 |     assert df.shape == (10, 2)
179 |     assert df[TAGS["Float32"] + "::status"].iloc[0] == 0
180 | 
181 | 
182 | def test_read_raw_long(client: IMSClient) -> None:
183 |     df = client.read(
184 |         tags=TAGS["Float32"],
185 |         start_time=START_TIME,
186 |         end_time="2020-04-11 20:00:00",
187 |         read_type=ReaderType.RAW,
188 |     )
189 |     assert len(df) > 1000
190 | 
191 | 
192 | def test_read_only_invalid_data_yields_nan_for_invalid(client: IMSClient) -> None:
193 |     tag = TAGS["Float32"]
194 |     df = client.read(
195 |         tags=tag,
196 |         start_time="2012-10-09 10:30:00",
197 |         end_time="2012-10-09 11:00:00",
198 |         ts=600,
199 |     )
200 |     assert df.shape == (4, 1)
201 |     assert df[tag].isna().all()
202 | 
203 | 
204 | def test_read_invalid_data_mixed_with_valid_yields_nan_for_invalid(
205 |     client: IMSClient,
206 | ) -> None:
207 |     # Hint, found first valid datapoint for tag
208 |     tag = TAGS["Float32"]
209 |     df = client.read(
210 |         tags=tag,
211 |         start_time="2018-04-23 15:20:00",
212 |         end_time="2018-04-23 15:50:00",
213 |         ts=600,
214 |     )
215 |     assert df.shape == (4, 1)
216 |     assert df[tag].iloc[[0, 1]].isna().all()  # type: ignore[call-overload]
217 |     assert df[tag].iloc[[2, 3]].notnull().all()  # type: ignore[call-overload]
218 | 
219 | 
220 | def test_digitalread_yields_integers(client: IMSClient) -> None:
221 |     tag = TAGS["Digital"]
222 |     df = client.read(
223 |         tags=tag,
224 |         start_time=START_TIME,
225 |         end_time=STOP_TIME,
226 |         ts=600,
227 |         read_type=ReaderType.INT,
228 |     )
229 |     assert all(x.is_integer() for x in df[tag])
230 | 
231 | 
232 | def test_get_unit(client: IMSClient) -> None:
233 |     res = client.get_units(list(TAGS.values()))
234 |     assert res[TAGS["Float32"]] == "DEG. C"
235 |     assert res[TAGS["Digital"]] == "STATE"
236 |     assert res[TAGS["Int32"]] == ""
237 | 
238 | 
239 | def test_get_description(client: IMSClient) -> None:
240 |     res = client.get_descriptions(list(TAGS.values()))
241 |     assert res[TAGS["Float32"]] == "Atmospheric Tower OH Vapor"
242 |     assert res[TAGS["Digital"]] == "Light Naphtha End Point Control"
243 |     assert res[TAGS["Int32"]] == "Light Naphtha End Point"
244 | 
245 | 
246 | def test_from_dst_folds_time(client: IMSClient) -> None:
247 |     if os.path.exists(SOURCE + ".h5"):
248 |         os.remove(SOURCE + ".h5")
249 |     tag = TAGS["Float32"]
250 |     interval = ["2017-10-29 00:30:00", "2017-10-29 04:30:00"]
251 |     df = client.read(tags=[tag], start_time=interval[0], end_time=interval[1], ts=600)
252 |     assert len(df) == (4 + 1) * 6 + 1
253 |     # Time exists inside fold:
254 |     assert (
255 |         df[tag].loc["2017-10-29 01:10:00+02:00":"2017-10-29 01:50:00+02:00"].size == 5  # type: ignore[misc]
256 |     )
257 |     # Time inside fold is always included:
258 |     assert (
259 |         df.loc["2017-10-29 01:50:00":"2017-10-29 03:10:00"].size == 2 + (1 + 1) * 6 + 1  # type: ignore[misc]
260 |     )
261 | 
262 | 
263 | def test_to_dst_skips_time(client: IMSClient) -> None:
264 |     if os.path.exists(SOURCE + ".h5"):
265 |         os.remove(SOURCE + ".h5")
266 |     tag = TAGS["Float32"]
267 |     interval = ["2018-03-25 00:30:00", "2018-03-25 03:30:00"]
268 |     df = client.read(tags=[tag], start_time=interval[0], end_time=interval[1], ts=600)
269 |     # Lose one hour:
270 |     assert (
271 |         df.loc["2018-03-25 01:50:00":"2018-03-25 03:10:00"].size == (2 + 1 * 6 + 1) - 6  # type: ignore[misc]
272 |     )
273 | 
274 | 
275 | def test_tags_with_no_data_included_in_results(client: IMSClient) -> None:
276 |     df = client.read(
277 |         tags=[TAGS["Float32"]],
278 |         start_time="2099-01-01 00:00:00",
279 |         end_time="2099-01-02 00:00:00",
280 |         ts=timedelta(seconds=60),
281 |     )
282 |     assert len(df.columns) == 1
283 | 
284 | 
285 | def test_tags_raw_with_no_data_included_in_results(client: IMSClient) -> None:
286 |     df = client.read(
287 |         tags=[TAGS["Float32"]],
288 |         start_time="2099-01-01 00:00:00",
289 |         end_time="2099-01-02 00:00:00",
290 |         read_type=ReaderType.RAW,
291 |         ts=timedelta(seconds=60),
292 |     )
293 |     assert df.empty
294 | 


--------------------------------------------------------------------------------
/tests/test_bucketcache.py:
--------------------------------------------------------------------------------
  1 | from datetime import timedelta
  2 | from pathlib import Path
  3 | from typing import Generator
  4 | 
  5 | import pandas as pd
  6 | import pytest
  7 | 
  8 | from tagreader.cache import BucketCache, safe_tagname, timestamp_to_epoch
  9 | from tagreader.utils import ReaderType
 10 | 
 11 | TAGNAME = "tag1"
 12 | READE_TYPE = ReaderType.INT
 13 | 
 14 | TZ = "UTC"
 15 | TS = timedelta(seconds=300)
 16 | MINUTE = timedelta(seconds=60)
 17 | FREQ = f"{int(TS.total_seconds())}s"
 18 | 
 19 | START_TIME_1 = pd.to_datetime("2020-01-01 12:00:00", utc=True)
 20 | END_TIME_1 = pd.to_datetime("2020-01-01 13:00:00", utc=True)
 21 | index = pd.date_range(start=START_TIME_1, end=END_TIME_1, freq=FREQ, name="time")
 22 | DF1 = pd.DataFrame({TAGNAME: range(0, len(index))}, index=index)
 23 | 
 24 | START_TIME_1_EPOCH = (
 25 |     START_TIME_1 - pd.to_datetime("1970-01-01", utc=True)
 26 | ) // pd.Timedelta(
 27 |     "1s"
 28 | )  # 1577880000
 29 | END_TIME_1_EPOCH = (
 30 |     END_TIME_1 - pd.to_datetime("1970-01-01", utc=True)
 31 | ) // pd.Timedelta(
 32 |     "1s"
 33 | )  # 1577883600
 34 | 
 35 | START_TIME_2 = pd.to_datetime("2020-01-01 13:30:00", utc=True)
 36 | END_TIME_2 = pd.to_datetime("2020-01-01 14:00:00", utc=True)
 37 | index = pd.date_range(start=START_TIME_2, end=END_TIME_2, freq=FREQ, name="time")
 38 | DF2 = pd.DataFrame({TAGNAME: range(0, len(index))}, index=index)
 39 | 
 40 | END_TIME_2_EPOCH = (
 41 |     END_TIME_2 - pd.to_datetime("1970-01-01", utc=True)
 42 | ) // pd.Timedelta(
 43 |     "1s"
 44 | )  # 1577887200
 45 | 
 46 | 
 47 | START_TIME_3 = pd.to_datetime("2020-01-01 12:40:00", utc=True)
 48 | END_TIME_3 = pd.to_datetime("2020-01-01 13:40:00", utc=True)
 49 | index = pd.date_range(start=START_TIME_3, end=END_TIME_3, freq=FREQ, name="time")
 50 | DF3 = pd.DataFrame({TAGNAME: range(0, len(index))}, index=index)
 51 | 
 52 | 
 53 | @pytest.fixture(autouse=True)  # type: ignore[misc]
 54 | def cache(tmp_path: Path) -> Generator[BucketCache, None, None]:
 55 |     cache = BucketCache(directory=tmp_path)
 56 |     yield cache
 57 | 
 58 | 
 59 | def test_timestamp_to_epoch() -> None:
 60 |     # Any timezone or naïve should work
 61 |     timestamp = pd.to_datetime("1970-01-01 01:00:00", utc=True)
 62 |     assert timestamp_to_epoch(timestamp) == 3600
 63 |     timestamp = pd.to_datetime("1970-01-01 01:00:00", utc=False)
 64 |     assert timestamp_to_epoch(timestamp) == 3600
 65 |     timestamp = pd.to_datetime("1970-01-01 01:00:00", utc=True)
 66 |     timestamp = timestamp.tz_convert("Europe/Oslo")
 67 |     assert timestamp_to_epoch(timestamp) == 3600
 68 | 
 69 | 
 70 | def test_safe_tagname() -> None:
 71 |     assert safe_tagname("ASGB.tt-___56_ _%_/_") == "ASGB_tt___56____"
 72 | 
 73 | 
 74 | def test_get_intervals_from_dataset_name(cache: BucketCache) -> None:
 75 |     bad_tag = f"/tag1/INT/{START_TIME_1_EPOCH}_{END_TIME_1_EPOCH}"
 76 |     good_tag = f"/tag1/INT/_{START_TIME_1_EPOCH}_{END_TIME_1_EPOCH}"
 77 |     start, end = cache._get_intervals_from_dataset_name(bad_tag)
 78 |     assert start is None
 79 |     assert end is None  # type: ignore[unreachable]
 80 |     start, end = cache._get_intervals_from_dataset_name(good_tag)
 81 |     assert start == START_TIME_1
 82 |     assert end == END_TIME_1
 83 | 
 84 | 
 85 | def test_key_path_with_time(cache: BucketCache) -> None:
 86 |     assert (
 87 |         cache._key_path(
 88 |             tagname=TAGNAME,
 89 |             read_type=READE_TYPE,
 90 |             ts=MINUTE,
 91 |             stepped=False,
 92 |             get_status=False,
 93 |             start=START_TIME_1,
 94 |             end=END_TIME_1,
 95 |         )
 96 |         == f"$tag1$INT$s60$_{START_TIME_1_EPOCH}_{END_TIME_1_EPOCH}"
 97 |     )
 98 | 
 99 | 
100 | def test_key_path_stepped(cache: BucketCache) -> None:
101 |     assert (
102 |         cache._key_path(
103 |             tagname=TAGNAME,
104 |             read_type=READE_TYPE,
105 |             ts=MINUTE,
106 |             stepped=True,
107 |             get_status=False,
108 |             start=START_TIME_1,
109 |             end=END_TIME_1,
110 |         )
111 |         == f"$tag1$INT$s60$stepped$_{START_TIME_1_EPOCH}_{END_TIME_1_EPOCH}"
112 |     )
113 | 
114 | 
115 | def test_key_path_with_status(cache: BucketCache) -> None:
116 |     assert (
117 |         cache._key_path(
118 |             tagname=TAGNAME,
119 |             read_type=READE_TYPE,
120 |             ts=MINUTE,
121 |             stepped=False,
122 |             get_status=True,
123 |             start=None,
124 |             end=None,
125 |         )
126 |         == "$tag1$INT$s60$status"
127 |     )
128 | 
129 | 
130 | def test_key_path_raw(cache: BucketCache) -> None:
131 |     assert (
132 |         cache._key_path(
133 |             tagname=TAGNAME,
134 |             read_type=ReaderType.RAW,
135 |             ts=MINUTE,
136 |             stepped=False,
137 |             get_status=False,
138 |             start=None,
139 |             end=None,
140 |         )
141 |         == "$tag1$RAW"
142 |     )
143 | 
144 | 
145 | def test_get_missing_intervals(cache: BucketCache) -> None:
146 |     cache.store(
147 |         df=DF1,
148 |         tagname=TAGNAME,
149 |         read_type=READE_TYPE,
150 |         ts=TS,
151 |         stepped=False,
152 |         get_status=False,
153 |         start=START_TIME_1,
154 |         end=END_TIME_1,
155 |     )
156 | 
157 |     cache.store(
158 |         df=DF2,
159 |         tagname=TAGNAME,
160 |         read_type=READE_TYPE,
161 |         ts=TS,
162 |         stepped=False,
163 |         get_status=False,
164 |         start=START_TIME_2,
165 |         end=END_TIME_2,
166 |     )
167 | 
168 |     # Perfect coverage, no missing intervals
169 |     missing_intervals = cache.get_missing_intervals(
170 |         tagname=TAGNAME,
171 |         read_type=READE_TYPE,
172 |         ts=TS,
173 |         stepped=False,
174 |         get_status=False,
175 |         start=START_TIME_1,
176 |         end=END_TIME_1,
177 |     )
178 | 
179 |     assert len(missing_intervals) == 0
180 | 
181 |     # Request subsection, no missing intervals
182 |     missing_intervals = cache.get_missing_intervals(
183 |         tagname=TAGNAME,
184 |         read_type=READE_TYPE,
185 |         ts=TS,
186 |         stepped=False,
187 |         get_status=False,
188 |         start=START_TIME_1 + pd.Timedelta("5m"),
189 |         end=END_TIME_1 - pd.Timedelta("5m"),
190 |     )
191 | 
192 |     assert len(missing_intervals) == 0
193 | 
194 |     # Request data from before to after, two missing intervals
195 |     missing_intervals = cache.get_missing_intervals(
196 |         tagname=TAGNAME,
197 |         read_type=READE_TYPE,
198 |         ts=TS,
199 |         stepped=False,
200 |         get_status=False,
201 |         start=START_TIME_1 - pd.Timedelta("15m"),
202 |         end=END_TIME_1 + pd.Timedelta("15m"),
203 |     )
204 | 
205 |     assert len(missing_intervals) == 2
206 |     assert missing_intervals[0] == (START_TIME_1 - pd.Timedelta("15m"), START_TIME_1)
207 |     assert missing_intervals[1] == (END_TIME_1, END_TIME_1 + pd.Timedelta("15m"))
208 | 
209 |     # Request data stretching from before first bucket, including
210 |     # space between buckets, to after second bucket. Three missing intervals.
211 |     missing_intervals = cache.get_missing_intervals(
212 |         tagname=TAGNAME,
213 |         read_type=READE_TYPE,
214 |         ts=TS,
215 |         stepped=False,
216 |         get_status=False,
217 |         start=START_TIME_1 - pd.Timedelta("15m"),
218 |         end=END_TIME_2 + pd.Timedelta("15m"),
219 |     )
220 | 
221 |     assert len(missing_intervals) == 3
222 |     assert missing_intervals[0] == (START_TIME_1 - pd.Timedelta("15m"), START_TIME_1)
223 |     assert missing_intervals[1] == (END_TIME_1, START_TIME_2)
224 |     assert missing_intervals[2] == (END_TIME_2, END_TIME_2 + pd.Timedelta("15m"))
225 | 
226 | 
227 | def test_get_intersecting_datasets(cache: BucketCache) -> None:
228 |     cache.store(
229 |         df=DF1,
230 |         tagname=TAGNAME,
231 |         read_type=READE_TYPE,
232 |         ts=TS,
233 |         stepped=False,
234 |         get_status=False,
235 |         start=START_TIME_1,
236 |         end=END_TIME_1,
237 |     )
238 | 
239 |     cache.store(
240 |         df=DF2,
241 |         tagname=TAGNAME,
242 |         read_type=READE_TYPE,
243 |         ts=TS,
244 |         stepped=False,
245 |         get_status=False,
246 |         start=START_TIME_2,
247 |         end=END_TIME_2,
248 |     )
249 | 
250 |     # Perfect coverage
251 |     intersecting_datasets = cache.get_intersecting_datasets(
252 |         tagname=TAGNAME,
253 |         read_type=READE_TYPE,
254 |         ts=TS,
255 |         stepped=False,
256 |         get_status=False,
257 |         start=START_TIME_1,
258 |         end=END_TIME_1,
259 |     )
260 | 
261 |     assert len(intersecting_datasets) == 1
262 | 
263 |     # Request subsection
264 |     intersecting_datasets = cache.get_intersecting_datasets(
265 |         tagname=TAGNAME,
266 |         read_type=READE_TYPE,
267 |         ts=TS,
268 |         stepped=False,
269 |         get_status=False,
270 |         start=START_TIME_1 + pd.Timedelta("5m"),
271 |         end=END_TIME_1 - pd.Timedelta("5m"),
272 |     )
273 | 
274 |     assert len(intersecting_datasets) == 1
275 | 
276 |     # Request data from before to after
277 |     intersecting_datasets = cache.get_intersecting_datasets(
278 |         tagname=TAGNAME,
279 |         read_type=READE_TYPE,
280 |         ts=TS,
281 |         stepped=False,
282 |         get_status=False,
283 |         start=START_TIME_1 - pd.Timedelta("15m"),
284 |         end=END_TIME_1 + pd.Timedelta("15m"),
285 |     )
286 | 
287 |     assert len(intersecting_datasets) == 1
288 | 
289 |     # Request data stretching from before first bucket, including
290 |     # space between buckets, to after second bucket.
291 |     intersecting_datasets = cache.get_intersecting_datasets(
292 |         tagname=TAGNAME,
293 |         read_type=READE_TYPE,
294 |         ts=TS,
295 |         stepped=False,
296 |         get_status=False,
297 |         start=START_TIME_1 - pd.Timedelta("15m"),
298 |         end=END_TIME_2 + pd.Timedelta("15m"),
299 |     )
300 | 
301 |     assert len(intersecting_datasets) == 2
302 | 
303 |     # Request data stretching from before first bucket, to
304 |     # inside second bucket.
305 |     intersecting_datasets = cache.get_intersecting_datasets(
306 |         tagname=TAGNAME,
307 |         read_type=READE_TYPE,
308 |         ts=TS,
309 |         stepped=False,
310 |         get_status=False,
311 |         start=START_TIME_1 - pd.Timedelta("15m"),
312 |         end=END_TIME_2 - pd.Timedelta("15m"),
313 |     )
314 | 
315 |     assert len(intersecting_datasets) == 2
316 | 
317 |     # Request data stretching from inside first bucket, to
318 |     # inside second bucket.
319 |     intersecting_datasets = cache.get_intersecting_datasets(
320 |         tagname=TAGNAME,
321 |         read_type=READE_TYPE,
322 |         ts=TS,
323 |         stepped=False,
324 |         get_status=False,
325 |         start=START_TIME_1 + pd.Timedelta("15m"),
326 |         end=END_TIME_2 - pd.Timedelta("15m"),
327 |     )
328 | 
329 |     assert len(intersecting_datasets) == 2
330 | 
331 | 
332 | def test_store_metadata(cache: BucketCache) -> None:
333 |     cache.put_metadata(key=TAGNAME, value={"unit": "%", "desc": "Some description"})
334 |     cache.put_metadata(key=TAGNAME, value={"max": 60})
335 |     r = cache.get_metadata(TAGNAME, "unit")
336 |     assert isinstance(r, dict)
337 |     assert "%" == r["unit"]
338 |     r = cache.get_metadata(TAGNAME, ["unit", "max", "noworky"])
339 |     assert isinstance(r, dict)
340 |     assert "%" == r["unit"]
341 |     assert 60 == r["max"]
342 |     assert "noworky" not in r
343 | 
344 | 
345 | def test_store_empty_df(cache: BucketCache) -> None:
346 |     # Empty dataframes should not be stored (note: df full of NaN is not empty!)
347 |     df = pd.DataFrame({TAGNAME: []})
348 |     cache.store(
349 |         df=df,
350 |         tagname=TAGNAME,
351 |         read_type=READE_TYPE,
352 |         ts=TS,
353 |         stepped=False,
354 |         get_status=False,
355 |         start=START_TIME_1,
356 |         end=END_TIME_1,
357 |     )  # Specify ts to ensure correct key /if/ stored
358 |     df_read = cache.fetch(
359 |         tagname=TAGNAME,
360 |         read_type=READE_TYPE,
361 |         ts=TS,
362 |         stepped=False,
363 |         get_status=False,
364 |         start=START_TIME_1,
365 |         end=END_TIME_1,
366 |     )
367 |     pd.testing.assert_frame_equal(df_read, pd.DataFrame())
368 | 
369 |     cache.store(
370 |         df=DF1,
371 |         tagname=TAGNAME,
372 |         read_type=READE_TYPE,
373 |         ts=TS,
374 |         stepped=False,
375 |         get_status=False,
376 |         start=START_TIME_1,
377 |         end=END_TIME_1,
378 |     )
379 |     df_read = cache.fetch(
380 |         tagname=TAGNAME,
381 |         read_type=READE_TYPE,
382 |         ts=TS,
383 |         stepped=False,
384 |         get_status=False,
385 |         start=START_TIME_1,
386 |         end=END_TIME_1,
387 |     )
388 |     pd.testing.assert_frame_equal(DF1, df_read, check_freq=False)
389 | 
390 |     cache.store(
391 |         df=df,
392 |         tagname=TAGNAME,
393 |         read_type=READE_TYPE,
394 |         ts=TS,
395 |         stepped=False,
396 |         get_status=False,
397 |         start=START_TIME_1,
398 |         end=END_TIME_1,
399 |     )  # Specify ts to ensure correct key /if/ stored
400 |     df_read = cache.fetch(
401 |         tagname=TAGNAME,
402 |         read_type=READE_TYPE,
403 |         ts=TS,
404 |         stepped=False,
405 |         get_status=False,
406 |         start=START_TIME_1,
407 |         end=END_TIME_1,
408 |     )
409 |     pd.testing.assert_frame_equal(DF1, df_read, check_freq=False)
410 | 
411 | 
412 | def test_store_single_df(cache: BucketCache) -> None:
413 |     cache.store(
414 |         df=DF1,
415 |         tagname=TAGNAME,
416 |         read_type=READE_TYPE,
417 |         ts=TS,
418 |         stepped=False,
419 |         get_status=False,
420 |         start=START_TIME_1,
421 |         end=END_TIME_1,
422 |     )
423 |     df_read = cache.fetch(
424 |         tagname=TAGNAME,
425 |         read_type=READE_TYPE,
426 |         ts=TS,
427 |         stepped=False,
428 |         get_status=False,
429 |         start=START_TIME_1,
430 |         end=END_TIME_1,
431 |     )
432 |     pd.testing.assert_frame_equal(DF1, df_read, check_freq=False)
433 | 
434 | 
435 | def test_fetch(cache: BucketCache) -> None:
436 |     cache.store(
437 |         df=DF1,
438 |         tagname=TAGNAME,
439 |         read_type=READE_TYPE,
440 |         ts=TS,
441 |         stepped=False,
442 |         get_status=False,
443 |         start=START_TIME_1,
444 |         end=END_TIME_1,
445 |     )
446 |     cache.store(
447 |         df=DF2,
448 |         tagname=TAGNAME,
449 |         read_type=READE_TYPE,
450 |         ts=TS,
451 |         stepped=False,
452 |         get_status=False,
453 |         start=START_TIME_2,
454 |         end=END_TIME_2,
455 |     )
456 | 
457 |     df_read = cache.fetch(
458 |         tagname=TAGNAME,
459 |         read_type=READE_TYPE,
460 |         ts=TS,
461 |         stepped=False,
462 |         get_status=False,
463 |         start=START_TIME_1,
464 |         end=END_TIME_1 - pd.Timedelta("15m"),
465 |     )
466 |     pd.testing.assert_frame_equal(
467 |         DF1.loc[START_TIME_1 : END_TIME_1 - pd.Timedelta("15m")],
468 |         df_read,
469 |         check_freq=False,
470 |     )
471 | 
472 |     df_read = cache.fetch(
473 |         tagname=TAGNAME,
474 |         read_type=READE_TYPE,
475 |         ts=TS,
476 |         stepped=False,
477 |         get_status=False,
478 |         start=START_TIME_1 - pd.Timedelta("15m"),
479 |         end=END_TIME_1 + pd.Timedelta("15m"),
480 |     )
481 |     pd.testing.assert_frame_equal(DF1, df_read, check_freq=False)
482 | 
483 |     df_read = cache.fetch(
484 |         tagname=TAGNAME,
485 |         read_type=READE_TYPE,
486 |         ts=TS,
487 |         stepped=False,
488 |         get_status=False,
489 |         start=START_TIME_1 - pd.Timedelta("15m"),
490 |         end=END_TIME_2 + pd.Timedelta("15m"),
491 |     )
492 |     pd.testing.assert_frame_equal(pd.concat([DF1, DF2]), df_read, check_freq=False)
493 | 
494 | 
495 | def test_store_overlapping_df(cache: BucketCache) -> None:
496 |     cache.store(
497 |         df=DF1,
498 |         tagname=TAGNAME,
499 |         read_type=READE_TYPE,
500 |         ts=TS,
501 |         stepped=False,
502 |         get_status=False,
503 |         start=START_TIME_1,
504 |         end=END_TIME_1,
505 |     )
506 |     cache.store(
507 |         df=DF2,
508 |         tagname=TAGNAME,
509 |         read_type=READE_TYPE,
510 |         ts=TS,
511 |         stepped=False,
512 |         get_status=False,
513 |         start=START_TIME_2,
514 |         end=END_TIME_2,
515 |     )
516 |     cache.store(
517 |         df=DF3,
518 |         tagname=TAGNAME,
519 |         read_type=READE_TYPE,
520 |         ts=TS,
521 |         stepped=False,
522 |         get_status=False,
523 |         start=START_TIME_3,
524 |         end=END_TIME_3,
525 |     )
526 |     leaves = None
527 |     for key in cache.iterkeys():
528 |         if len(key) > 0:
529 |             leaves = key
530 |     _, start, end = leaves.split("_")  # type: ignore[union-attr]
531 |     assert int(start) == START_TIME_1_EPOCH
532 |     assert int(end) == END_TIME_2_EPOCH
533 |     df_read = cache.fetch(
534 |         tagname=TAGNAME,
535 |         read_type=READE_TYPE,
536 |         ts=TS,
537 |         stepped=False,
538 |         get_status=False,
539 |         start=START_TIME_1,
540 |         end=END_TIME_2,
541 |     )
542 |     df_expected = pd.concat(
543 |         [
544 |             DF1[START_TIME_1 : START_TIME_3 - pd.Timedelta(TS, unit="s")],
545 |             DF3[START_TIME_3:END_TIME_3],
546 |             DF2[END_TIME_3 + pd.Timedelta(TS, unit="s") : END_TIME_2],
547 |         ]
548 |     )
549 | 
550 |     pd.testing.assert_frame_equal(
551 |         df_read,
552 |         df_expected,
553 |         check_freq=False,
554 |     )
555 | 


--------------------------------------------------------------------------------
/tests/test_cache.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from datetime import timedelta
  3 | from pathlib import Path
  4 | from typing import Generator
  5 | 
  6 | import pandas as pd
  7 | import pytest
  8 | 
  9 | from tagreader.cache import BaseCache, SmartCache, safe_tagname
 10 | from tagreader.utils import ReaderType
 11 | 
 12 | os.environ["NUMEXPR_MAX_THREADS"] = "8"
 13 | MINUTE = timedelta(seconds=60)
 14 | 
 15 | 
 16 | @pytest.fixture  # type: ignore[misc]
 17 | def data() -> Generator[pd.DataFrame, None, None]:
 18 |     length = 10
 19 |     df_total = pd.DataFrame(
 20 |         {"tag1": range(0, length)},
 21 |         index=pd.date_range(
 22 |             start="2018-01-18 05:00:00", freq="60s", periods=length, name="time"
 23 |         ),
 24 |     )
 25 |     yield df_total
 26 | 
 27 | 
 28 | def test_base_cache(tmp_path: Path) -> None:
 29 |     webidcache = BaseCache(directory=tmp_path)
 30 | 
 31 |     webid = "F1DPwgwnpmLxqECAJV2HpxdobgmQIAAAUElMQUIuRVFVSU5PUi5DT01cMTMyMC9BSU0sMTctVFQtNzE5Ng"
 32 |     tag = "example_tag_name"
 33 |     webidcache[tag] = webid
 34 | 
 35 |     del webidcache
 36 | 
 37 |     webidcache = BaseCache(directory=tmp_path)
 38 | 
 39 |     assert "example_tag_name" in webidcache
 40 |     assert webidcache["example_tag_name"] == webid
 41 | 
 42 | 
 43 | def test_safe_tagname() -> None:
 44 |     assert safe_tagname("ASGB.tt-___56_ _%_/_") == "ASGB_tt___56____"
 45 | 
 46 | 
 47 | def test_key_path(cache: SmartCache) -> None:
 48 |     assert cache.size_limit == int(4e9)
 49 | 
 50 | 
 51 | def test_cache_single_store_and_fetch(
 52 |     cache: SmartCache, data: pd.DataFrame, get_status: bool = False
 53 | ) -> None:
 54 |     cache.store(
 55 |         df=data,
 56 |         read_type=ReaderType.INT,
 57 |         get_status=get_status,
 58 |         tagname="tag1",
 59 |         ts=MINUTE,
 60 |     )
 61 |     df_read = cache.fetch(
 62 |         tagname="tag1",
 63 |         read_type=ReaderType.INT,
 64 |         ts=MINUTE,
 65 |         get_status=get_status,
 66 |         start=None,
 67 |         end=None,
 68 |     )
 69 |     pd.testing.assert_frame_equal(data, df_read)
 70 | 
 71 | 
 72 | def test_cache_multiple_store_single_fetch(
 73 |     cache: SmartCache, data: pd.DataFrame, get_status: bool = False
 74 | ) -> None:
 75 |     df1 = data[0:3]
 76 |     df2 = data[2:10]
 77 |     cache.store(
 78 |         df=df1, read_type=ReaderType.INT, tagname="tag1", ts=MINUTE, get_status=False
 79 |     )
 80 |     cache.store(
 81 |         df=df2, read_type=ReaderType.INT, tagname="tag1", ts=MINUTE, get_status=False
 82 |     )
 83 |     df_read = cache.fetch(
 84 |         tagname="tag1",
 85 |         read_type=ReaderType.INT,
 86 |         ts=MINUTE,
 87 |         get_status=False,
 88 |         start=None,
 89 |         end=None,
 90 |     )
 91 |     pd.testing.assert_frame_equal(df_read, data)
 92 | 
 93 | 
 94 | def test_interval_reads(
 95 |     cache: SmartCache, data: pd.DataFrame, get_status: bool = False
 96 | ) -> None:
 97 |     cache.store(
 98 |         df=data,
 99 |         read_type=ReaderType.INT,
100 |         get_status=get_status,
101 |         tagname="tag1",
102 |         ts=MINUTE,
103 |     )
104 |     start_oob = pd.to_datetime("2018-01-18 04:55:00")
105 |     start = pd.to_datetime("2018-01-18 05:05:00")
106 |     end = pd.to_datetime("2018-01-18 05:08:00")
107 |     end_oob = pd.to_datetime("2018-01-18 06:00:00")
108 | 
109 |     df_read = cache.fetch(
110 |         tagname="tag1",
111 |         read_type=ReaderType.INT,
112 |         ts=MINUTE,
113 |         start=start,
114 |         end=None,
115 |         get_status=get_status,
116 |     )
117 |     pd.testing.assert_frame_equal(data[start:], df_read)
118 |     df_read = cache.fetch(
119 |         tagname="tag1",
120 |         read_type=ReaderType.INT,
121 |         ts=MINUTE,
122 |         start=None,
123 |         end=end,
124 |         get_status=get_status,
125 |     )
126 |     pd.testing.assert_frame_equal(data[:end], df_read)
127 |     df_read = cache.fetch(
128 |         tagname="tag1",
129 |         read_type=ReaderType.INT,
130 |         ts=MINUTE,
131 |         start=start_oob,
132 |         end=None,
133 |         get_status=get_status,
134 |     )
135 |     pd.testing.assert_frame_equal(data, df_read)
136 |     df_read = cache.fetch(
137 |         tagname="tag1",
138 |         read_type=ReaderType.INT,
139 |         ts=MINUTE,
140 |         start=None,
141 |         end=end_oob,
142 |         get_status=get_status,
143 |     )
144 |     pd.testing.assert_frame_equal(data, df_read)
145 |     df_read = cache.fetch(
146 |         tagname="tag1",
147 |         read_type=ReaderType.INT,
148 |         ts=MINUTE,
149 |         start=start,
150 |         end=end,
151 |         get_status=get_status,
152 |     )
153 |     pd.testing.assert_frame_equal(data[start:end], df_read)
154 | 
155 | 
156 | def test_store_empty_df(
157 |     cache: SmartCache, data: pd.DataFrame, get_status: bool = False
158 | ) -> None:
159 |     # Empty dataframes should not be stored (note: df full of NaN is not empty!)
160 |     cache.store(
161 |         df=data,
162 |         read_type=ReaderType.INT,
163 |         get_status=get_status,
164 |         tagname="tag1",
165 |         ts=MINUTE,
166 |     )
167 |     df = pd.DataFrame({"tag1": []})
168 |     cache.store(
169 |         df=df, read_type=ReaderType.INT, ts=MINUTE, tagname="tag1", get_status=False
170 |     )  # Specify ts to ensure correct key /if/ stored
171 |     df_read = cache.fetch(
172 |         tagname="tag1",
173 |         read_type=ReaderType.INT,
174 |         get_status=get_status,
175 |         ts=MINUTE,
176 |         start=None,
177 |         end=None,
178 |     )
179 |     pd.testing.assert_frame_equal(data, df_read)
180 | 
181 | 
182 | def test_store_metadata(cache: SmartCache) -> None:
183 |     cache.put_metadata("tag1", {"unit": "%", "desc": "Some description"})
184 |     cache.put_metadata("tag1", {"max": 60})
185 |     r = cache.get_metadata("tag1", "unit")
186 |     assert isinstance(r, dict)
187 |     assert "%" == r["unit"]
188 |     r = cache.get_metadata("tag1", ["unit", "max", "noworky"])
189 |     assert isinstance(r, dict)
190 |     assert "%" == r["unit"]
191 |     assert 60 == r["max"]
192 |     assert "noworky" not in r
193 | 
194 | 
195 | def test_to_dst_skips_time(cache: SmartCache, get_status: bool = False) -> None:
196 |     index = pd.date_range(
197 |         start="2018-03-25 01:50:00",
198 |         end="2018-03-25 03:30:00",
199 |         tz="Europe/Oslo",
200 |         freq="600s",
201 |         name="time",
202 |     )
203 |     index.freq = None  # type: ignore[misc]
204 |     df = pd.DataFrame({"tag1": range(0, len(index))}, index=index)
205 |     assert (
206 |         df.loc["2018-03-25 01:50:00":"2018-03-25 03:10:00"].size == (2 + 1 * 6 + 1) - 6  # type: ignore[misc]
207 |     )
208 |     cache.store(
209 |         df=df,
210 |         read_type=ReaderType.INT,
211 |         get_status=get_status,
212 |         tagname="tag1",
213 |         ts=MINUTE,
214 |     )
215 |     df_read = cache.fetch(
216 |         tagname="tag1",
217 |         read_type=ReaderType.INT,
218 |         ts=MINUTE,
219 |         get_status=get_status,
220 |         start=None,
221 |         end=None,
222 |     )
223 |     pd.testing.assert_frame_equal(df_read, df)
224 | 
225 | 
226 | def test_from_dst_folds_time(cache: SmartCache, get_status: bool = False) -> None:
227 |     index = pd.date_range(
228 |         start="2017-10-29 00:30:00",
229 |         end="2017-10-29 04:30:00",
230 |         tz="Europe/Oslo",
231 |         freq="600s",
232 |         name="time",
233 |     )
234 |     index.freq = None  # type: ignore[misc]
235 |     df = pd.DataFrame({"tag1": range(0, len(index))}, index=index)
236 |     assert len(df) == (4 + 1) * 6 + 1
237 |     # Time exists inside fold:
238 |     assert (
239 |         df["tag1"].loc["2017-10-29 01:10:00+02:00":"2017-10-29 01:50:00+02:00"].size  # type: ignore[misc]
240 |         == 5
241 |     )
242 |     # Time inside fold is always included:
243 |     assert (
244 |         df.loc["2017-10-29 01:50:00":"2017-10-29 03:10:00"].size == 2 + (1 + 1) * 6 + 1  # type: ignore[misc]
245 |     )
246 |     cache.store(
247 |         df=df,
248 |         read_type=ReaderType.INT,
249 |         get_status=get_status,
250 |         tagname="tag1",
251 |         ts=MINUTE,
252 |     )
253 |     df_read = cache.fetch(
254 |         tagname="tag1",
255 |         read_type=ReaderType.INT,
256 |         ts=MINUTE,
257 |         get_status=get_status,
258 |         start=None,
259 |         end=None,
260 |     )
261 |     pd.testing.assert_frame_equal(df_read, df)
262 | 


--------------------------------------------------------------------------------
/tests/test_clients.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime, timedelta
  2 | 
  3 | import pandas as pd
  4 | import pytest
  5 | import pytz
  6 | 
  7 | from tagreader.clients import IMSClient, get_missing_intervals, get_next_timeslice
  8 | from tagreader.utils import IMSType, ReaderType
  9 | 
 10 | 
 11 | def test_init_client_without_cache() -> None:
 12 |     client = IMSClient(datasource="mock", imstype=IMSType.PIWEBAPI, cache=None)
 13 |     assert not client.cache
 14 | 
 15 | 
 16 | def test_init_client_with_tzinfo() -> None:
 17 |     """
 18 |     Currently testing valid timezone
 19 |     """
 20 |     client = IMSClient(
 21 |         datasource="mock", imstype=IMSType.PIWEBAPI, cache=None, tz="US/Eastern"
 22 |     )
 23 |     print(client.tz)
 24 |     assert client.tz == pytz.timezone("US/Eastern")
 25 | 
 26 |     client = IMSClient(
 27 |         datasource="mock",
 28 |         imstype=IMSType.PIWEBAPI,
 29 |         cache=None,
 30 |         tz=pytz.timezone("US/Eastern"),
 31 |     )
 32 |     print(client.tz)
 33 |     assert client.tz == pytz.timezone("US/Eastern")
 34 | 
 35 |     client = IMSClient(
 36 |         datasource="mock", imstype=IMSType.PIWEBAPI, cache=None, tz="Europe/Oslo"
 37 |     )
 38 |     print(client.tz)
 39 |     assert client.tz == pytz.timezone("Europe/Oslo")
 40 | 
 41 |     client = IMSClient(
 42 |         datasource="mock", imstype=IMSType.PIWEBAPI, cache=None, tz="US/Central"
 43 |     )
 44 |     print(client.tz)
 45 |     assert client.tz == pytz.timezone("US/Central")
 46 | 
 47 |     client = IMSClient(datasource="mock", imstype=IMSType.PIWEBAPI, cache=None)
 48 |     print(client.tz)
 49 |     assert client.tz == pytz.timezone("Europe/Oslo")
 50 | 
 51 |     with pytest.raises(ValueError):
 52 |         _ = IMSClient(
 53 |             datasource="mock", imstype=IMSType.PIWEBAPI, cache=None, tz="WRONGVALUE"
 54 |         )
 55 | 
 56 | 
 57 | def test_init_client_with_datasource() -> None:
 58 |     """
 59 |     Currently we initialize SmartCache by default, and the user is not able to specify no-cache when creating the
 60 |     client. This will change to no cache by default in version 5.
 61 |     """
 62 |     client = IMSClient(
 63 |         datasource="mock", imstype=IMSType.PIWEBAPI, cache=None, tz="US/Eastern"
 64 |     )
 65 |     print(client.tz)
 66 |     assert client.tz == pytz.timezone("US/Eastern")
 67 |     client = IMSClient(
 68 |         datasource="mock", imstype=IMSType.PIWEBAPI, cache=None, tz="US/Central"
 69 |     )
 70 |     print(client.tz)
 71 |     assert client.tz == pytz.timezone("US/Central")
 72 |     client = IMSClient(datasource="mock", imstype=IMSType.PIWEBAPI, cache=None)
 73 |     print(client.tz)
 74 |     assert client.tz == pytz.timezone("Europe/Oslo")
 75 |     with pytest.raises(ValueError):
 76 |         _ = IMSClient(
 77 |             datasource="mock", imstype=IMSType.PIWEBAPI, cache=None, tz="WRONGVALUE"
 78 |         )
 79 | 
 80 | 
 81 | def test_get_next_timeslice() -> None:
 82 |     start = pd.to_datetime("2018-01-02 14:00:00")
 83 |     end = pd.to_datetime("2018-01-02 14:15:00")
 84 |     # taglist = ['tag1', 'tag2', 'tag3']
 85 |     ts = timedelta(seconds=60)
 86 |     res = get_next_timeslice(start=start, end=end, ts=ts, max_steps=20)
 87 |     assert start, start + timedelta(seconds=6) == res
 88 |     res = get_next_timeslice(start=start, end=end, ts=ts, max_steps=100000)
 89 |     assert start, end == res
 90 | 
 91 | 
 92 | def test_get_missing_intervals() -> None:
 93 |     length = 10
 94 |     ts = 60
 95 |     data = {"tag1": range(0, length)}
 96 |     idx = pd.date_range(
 97 |         start="2018-01-18 05:00:00", freq=f"{ts}s", periods=length, name="time"
 98 |     )
 99 |     df_total = pd.DataFrame(data, index=idx)
100 |     df = pd.concat([df_total.iloc[0:2], df_total.iloc[3:4], df_total.iloc[8:]])
101 |     missing = get_missing_intervals(
102 |         df=df,
103 |         start=datetime(2018, 1, 18, 5, 0, 0),
104 |         end=datetime(2018, 1, 18, 6, 0, 0),
105 |         ts=timedelta(seconds=ts),
106 |         read_type=ReaderType.INT,
107 |     )
108 |     assert missing[0] == (idx[2], idx[2])
109 |     assert missing[1] == (idx[4], idx[7])
110 |     assert missing[2] == (
111 |         datetime(2018, 1, 18, 5, 10, 0),
112 |         datetime(2018, 1, 18, 6, 0, 0),
113 |     )
114 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | 
 4 | import pandas as pd
 5 | import pytz
 6 | from pytz import timezone
 7 | 
 8 | from tagreader.utils import ensure_datetime_with_tz, is_equinor, urljoin
 9 | 
10 | is_GITHUBACTION = "GITHUB_ACTION" in os.environ
11 | is_AZUREPIPELINE = "TF_BUILD" in os.environ
12 | 
13 | 
14 | def test_ensure_is_datetime_string() -> None:
15 |     assert ensure_datetime_with_tz("10. jan. 2018 13:45:15") == timezone(
16 |         "Europe/Oslo"
17 |     ).localize(datetime.datetime(2018, 1, 10, 13, 45, 15))
18 |     assert ensure_datetime_with_tz("01.02.03 00:00:00") == timezone(
19 |         "Europe/Oslo"
20 |     ).localize(datetime.datetime(2003, 2, 1, 0, 0, 0))
21 |     assert ensure_datetime_with_tz("02.01.03 00:00:00") == ensure_datetime_with_tz(
22 |         "2003-02-01 0:00:00am"
23 |     )
24 |     assert ensure_datetime_with_tz(
25 |         "02.01.03 00:00:00", pytz.timezone("America/Sao_Paulo")
26 |     ) == timezone("America/Sao_Paulo").localize(datetime.datetime(2003, 1, 2, 0, 0, 0))
27 |     assert ensure_datetime_with_tz(
28 |         "02.01.03 00:00:00", pytz.timezone("Brazil/East")
29 |     ) == timezone("Brazil/East").localize(datetime.datetime(2003, 1, 2, 0, 0, 0))
30 |     assert ensure_datetime_with_tz(
31 |         timezone("Brazil/East").localize(datetime.datetime(2003, 1, 2, 0, 0, 0)),
32 |         pytz.timezone("Brazil/East"),
33 |     ) == timezone("Brazil/East").localize(datetime.datetime(2003, 1, 2, 0, 0, 0))
34 | 
35 | 
36 | def test_ensure_is_datetime_pd_timestamp() -> None:
37 |     ts = datetime.datetime(2018, 1, 10, 13, 45, 15)
38 |     ts_with_tz = timezone("Europe/Oslo").localize(ts)
39 |     assert ensure_datetime_with_tz(ts_with_tz) == ts_with_tz
40 |     assert ensure_datetime_with_tz(ts) == ts_with_tz
41 | 
42 | 
43 | def test_ensure_is_datetime_datetime() -> None:
44 |     dt = datetime.datetime(2018, 1, 10, 13, 45, 15)
45 |     dt_with_tz = timezone("Europe/Oslo").localize(dt)
46 | 
47 |     assert ensure_datetime_with_tz(dt_with_tz) == dt_with_tz
48 |     assert ensure_datetime_with_tz(dt) == dt_with_tz
49 | 
50 | 
51 | def test_urljoin() -> None:
52 |     assert urljoin("https://some.where/to", "go") == "https://some.where/to/go"
53 |     assert urljoin("https://some.where/to/", "go") == "https://some.where/to/go"
54 |     assert urljoin("https://some.where/to", "/go") == "https://some.where/to/go"
55 |     assert urljoin("https://some.where/to/", "/go") == "https://some.where/to/go"
56 |     assert urljoin("https://some.where/to", "go/") == "https://some.where/to/go/"
57 | 
58 | 
59 | def test_equinor() -> None:
60 |     if is_GITHUBACTION:
61 |         assert is_equinor() is False
62 |     else:
63 |         assert is_equinor() is True
64 | 


--------------------------------------------------------------------------------