├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── build.yml │ └── publish-to-test-pypi.yml ├── .gitignore ├── .readthedocs.yml ├── LICENSE ├── MANIFEST.in ├── README.rst ├── artwork ├── tinyflux-dark.png ├── tinyflux-light.png ├── tinyfluxdb-dark.png └── tinyfluxdb-light.png ├── docs ├── Makefile ├── make.bat └── source │ ├── _static │ └── style.css │ ├── changelog.rst │ ├── conf.py │ ├── contributing-guidelines.rst │ ├── contributing-philosophy.rst │ ├── contributing-tooling.rst │ ├── data-elements.rst │ ├── design-principles.rst │ ├── exploring-data.rst │ ├── getting-started.rst │ ├── index.rst │ ├── installing-tinyflux.rst │ ├── internals.rst │ ├── intro.rst │ ├── measurements.rst │ ├── preparing-data.rst │ ├── querying-data.rst │ ├── removing-data.rst │ ├── time.rst │ ├── tinyflux.rst │ ├── tips.rst │ ├── updating-data.rst │ └── writing-data.rst ├── examples ├── 1_initializing_and_loading_new_db.ipynb ├── 2_analytics_workflow.ipynb ├── 3_iot_datastore_with_mqtt.py ├── 4_backing_up_tinyflux_at_the_edge.py ├── README.rst ├── example_data │ ├── ca_aqi_2019-2020.tinyflux │ ├── cbsa_ca_2019.geojson │ └── daily_aqi_by_cbsa_ca_2019-2020.csv └── requirements.txt ├── mypy.ini ├── pyproject.toml ├── requirements.txt ├── setup.cfg ├── tests ├── __init__.py ├── conftest.py ├── test_index.py ├── test_measurement.py ├── test_point.py ├── test_queries.py ├── test_storages.py ├── test_tinyflux.py └── test_utils.py └── tinyflux ├── __init__.py ├── database.py ├── index.py ├── measurement.py ├── point.py ├── py.typed ├── queries.py ├── storages.py ├── utils.py └── version.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Additional context** 27 | Add any other context about the problem here. 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | jobs: 5 | build: 6 | runs-on: ubuntu-latest 7 | strategy: 8 | matrix: 9 | platform: [ubuntu-latest, windows-latest] 10 | python-version: 11 | ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "pypy-3.9"] 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Set up Python ${{ matrix.python-version }} on platform ${{ matrix.platform }} 15 | uses: actions/setup-python@v4 16 | with: 17 | python-version: ${{ matrix.python-version }} 18 | - name: Assert no dependencies for TinyFlux 19 | run: python -c "import tinyflux" 20 | - name: Install pip requirements 21 | run: | 22 | pip install --upgrade pip 23 | pip install -r requirements.txt 24 | - name: Check README 25 | run: rstcheck README.rst 26 | - name: Check code formatting 27 | run: black --check tinyflux/ tests/ examples/ 28 | - name: Check code style 29 | run: flake8 tinyflux/ tests/ examples/ 30 | - name: Check static typing 31 | run: mypy tinyflux/ tests/ examples/ 32 | - name: Run tests 33 | run: coverage run --source tinyflux/ -m pytest && coverage report -m 34 | - name: Upload Coverage to Codecov 35 | uses: codecov/codecov-action@v3 36 | with: 37 | token: ${{ secrets.CODECOV_TOKEN }} 38 | -------------------------------------------------------------------------------- /.github/workflows/publish-to-test-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI and TestPyPI 2 | 3 | on: 4 | push: 5 | tags: 6 | - v*.*.* 7 | 8 | jobs: 9 | build-and-publish: 10 | name: Build and publish to PyPI and TestPyPI 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Set up Python 3.11 15 | uses: actions/setup-python@v4 16 | with: 17 | python-version: "3.11" 18 | - name: Install build 19 | run: | 20 | pip install --upgrade pip 21 | pip install build 22 | - name: Build a binary wheel and a source tarball 23 | run: | 24 | echo $GITHUB_REF_NAME >> version.txt 25 | python -m build 26 | - name: Publish distribution to PyPI 27 | uses: pypa/gh-action-pypi-publish@release/v1 28 | with: 29 | password: ${{ secrets.PYPI_API_TOKEN }} 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte code 2 | *.pyc 3 | 4 | # Jupyter Notebooks 5 | /**.ipynb 6 | **checkpoint.ipynb 7 | 8 | # VSCode 9 | .vscode* 10 | 11 | # Pyenv 12 | .python-version 13 | 14 | # Coverage.py 15 | .coverage 16 | 17 | # Mac OS 18 | **.DS_Store 19 | **.icloud 20 | 21 | # CSVs 22 | /**.csv 23 | 24 | # Sphinx 25 | docs/build/** 26 | 27 | # Setuptools 28 | dist/** 29 | build/** 30 | **.egg-info 31 | **.whl 32 | **.tar.gz 33 | 34 | # Other 35 | performance_tests/** -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | python: 4 | version: 3.8 5 | install: 6 | - requirements: requirements.txt 7 | - method: pip 8 | path: . 9 | extra_requirements: 10 | - docs 11 | 12 | formats: all 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Justin Fung 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | recursive-include tests *.py -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://github.com/citrusvanilla/tinyflux/blob/master/artwork/tinyfluxdb-light.png?raw=true#gh-dark-mode-only 2 | :width: 500px 3 | 4 | .. image:: https://github.com/citrusvanilla/tinyflux/blob/master/artwork/tinyfluxdb-dark.png?raw=true#gh-light-mode-only 5 | :width: 500px 6 | 7 | TinyFlux is the tiny time series database optimized for your happiness 😎 8 | 9 | TinyFlux is the time series version of `TinyDB `__ that is written in Python and has no external dependencies. It's a great companion for small analytics workflows and apps, as well as at-home IOT data stores. TinyFlux has 100% test coverage, over 75,000 downloads, and no open issues. 10 | 11 | |Docs| |Version| |Downloads| |Coverage| |Build Status| 12 | 13 | 14 | Quick Links 15 | *********** 16 | 17 | - `Example Code Snippets`_ 18 | - `Full Example Notebooks and Scripts `__ 19 | - `Documentation `__ 20 | - `Changelog `__ 21 | - `Contributing`_ 22 | 23 | 24 | Installation 25 | ************ 26 | 27 | TinyFlux is hosted at `PyPI `__ and is easily downloadable with ``pip``. TinyFlux has been tested with Python 3.7 - 3.12 and PyPy-3.9 on Linux and Windows platforms. 28 | 29 | .. code-block:: bash 30 | 31 | $ pip install tinyflux 32 | 33 | 34 | Introduction 35 | ************ 36 | 37 | TinyFlux is: 38 | 39 | - **optimized for your happiness:** TinyFlux is designed to be simple and fun to use by providing a clean API that can be learned in about 90 seconds. 40 | 41 | - **time-centric:** Python datetime objects are first-class citizens, and both the storage and queries are optimized for time above all else. 42 | 43 | - **human-friendly:** The primary datastore is a CSV, making your database human-readable from the very first write. No need to use SQL to investigate your data, just open the DB file in any tabular-friendly application. 44 | 45 | - **pure Python:** TinyFlux needs neither an external server nor any dependencies. 46 | 47 | - **tiny:** TinyFlux is about 150kb, unzipped. The current source code has 4,000 lines of code (with about 50% documentation) and 4,000 lines of tests. 48 | 49 | - **developed for modern Python:** TinyFlux works on all modern versions of Python (3.7 - 3.12) and PyPy (3.9). 50 | 51 | - **100% covered by tests:** No explanation needed. 52 | 53 | To get started, head over to the `TinyFlux docs `__. Examples can be found in the `examples directory `__. You can also discuss topics related to TinyFlux including general development, extensions, or showcase your TinyFlux-based projects on the `GitHub discussion forum `__. 54 | 55 | 56 | Example Code Snippets 57 | ********************* 58 | 59 | Writing to TinyFlux 60 | =================== 61 | 62 | .. code-block:: python 63 | 64 | >>> from datetime import datetime, timezone 65 | >>> from tinyflux import TinyFlux, Point 66 | 67 | >>> db = TinyFlux('/path/to/db.csv') 68 | 69 | >>> p = Point( 70 | ... time=datetime(2022, 5, 1, 16, 0, tzinfo=timezone.utc), 71 | ... tags={"room": "bedroom"}, 72 | ... fields={"temp": 72.0} 73 | ... ) 74 | >>> db.insert(p, compact_key_prefixes=True) 75 | 76 | 77 | Querying TinyFlux 78 | ================= 79 | 80 | .. code-block:: python 81 | 82 | >>> from tinyflux import FieldQuery, TagQuery, TimeQuery 83 | 84 | >>> # Search for a tag value. 85 | >>> Tag = TagQuery() 86 | >>> db.search(Tag.room == 'bedroom') 87 | [Point(time=2022-05-01T16:00:00+00:00, measurement=_default, tags=room:bedroom, fields=temp:72.0)] 88 | 89 | >>> # Search for a field value. 90 | >>> Field = FieldQuery() 91 | >>> db.select("tag.room", Field.temp > 60.0) 92 | ["bedroom"] 93 | 94 | >>> # Search for a time value. 95 | >>> Time = TimeQuery() 96 | >>> time_start = Time >= datetime(2019, 1, 1, tzinfo=timezone.utc) 97 | >>> time_end = Time < datetime(2023, 1, 1, tzinfo=timezone.utc) 98 | >>> db.count(time_start & time_end) 99 | 1 100 | 101 | 102 | Full Example Notebooks and Workflows 103 | ************************************ 104 | 105 | The `examples `__ directory of this repository contains four common uses cases for TinyFlux and the associated boilerplate to get you started: 106 | 107 | 1. `Loading a TinyFlux DB from a CSV `__ 108 | 2. `Local Analytics Workflow with a TinyFlux Database `__ 109 | 3. `TinyFlux as a MQTT Datastore for IoT Devices `__ 110 | 4. `TinyFlux at the Edge (with Backup Strategy) `__ 111 | 112 | Tips 113 | **** 114 | 115 | Checkout some tips for working with TinyFlux `here `__. 116 | 117 | 118 | TinyFlux Across the Internet 119 | **************************** 120 | 121 | Articles, tutorials, and other instances of TinyFlux in the wild: 122 | 123 | - `"Introducing TinyFlux: The Tiny Time Series Database for Python-based IoT & Analytics Applications" `__: A Medium.com article announcing the release of TinyFlux 124 | - `"Storing Time Series Data in Python Using TinyFluxDB" `__: A tutorial from `Steve's Internet Guide `__, a portal for learning MQTT and IoT development for Python 125 | - `"KaiCode 2024 Shortlist" `__: TinyFlux came in 10th place out of 412 entrants in the 7th edition of this open-source festival, a festival dedicated to recognizing projects with high-quality open-source principles. 126 | 127 | Contributing 128 | ************ 129 | 130 | New ideas, developer tools, improvements, and bugfixes are always welcome. Follow these guidelines before getting started: 131 | 132 | 1. Make sure to read `Getting Started `__ and the `Contributing Tooling and Conventions `__ section of the documentation. 133 | 2. Check GitHub for `existing open issues `__, `open a new issue `__ or `start a new discussion `__. 134 | 3. To get started on a pull request, fork the repository on GitHub, create a new branch, and make updates. 135 | 4. Write unit tests, ensure the code is 100% covered, update documentation where necessary, and format and style the code correctly. 136 | 5. Send a pull request. 137 | 138 | .. |Docs| image:: https://img.shields.io/readthedocs/docs 139 | :target: https://tinyflux.readthedocs.io/en/latest/ 140 | .. |Build Status| image:: https://github.com/citrusvanilla/tinyflux/actions/workflows/build.yml/badge.svg 141 | :target: https://github.com/citrusvanilla/tinyflux/actions 142 | .. |Coverage| image:: https://codecov.io/gh/citrusvanilla/tinyflux/branch/master/graph/badge.svg?token=IEGQ4E57VA 143 | :target: https://app.codecov.io/gh/citrusvanilla/tinyflux 144 | .. |Version| image:: http://img.shields.io/pypi/v/tinyflux.svg 145 | :target: https://pypi.python.org/pypi/tinyflux/ 146 | .. |Downloads| image:: https://img.shields.io/pepy/dt/tinyflux 147 | :target: https://pypi.python.org/pypi/tinyflux/ 148 | -------------------------------------------------------------------------------- /artwork/tinyflux-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/citrusvanilla/tinyflux/42c221441ba0f586deb77eceb1718e2a231d4097/artwork/tinyflux-dark.png -------------------------------------------------------------------------------- /artwork/tinyflux-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/citrusvanilla/tinyflux/42c221441ba0f586deb77eceb1718e2a231d4097/artwork/tinyflux-light.png -------------------------------------------------------------------------------- /artwork/tinyfluxdb-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/citrusvanilla/tinyflux/42c221441ba0f586deb77eceb1718e2a231d4097/artwork/tinyfluxdb-dark.png -------------------------------------------------------------------------------- /artwork/tinyfluxdb-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/citrusvanilla/tinyflux/42c221441ba0f586deb77eceb1718e2a231d4097/artwork/tinyfluxdb-light.png -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/_static/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: "Arial", Helvetica, sans-serif; 3 | } 4 | 5 | 6 | .rst-content .toctree-wrapper>p.caption, 7 | h1, 8 | h2, 9 | h3, 10 | h4, 11 | h5, 12 | h6, 13 | legend { 14 | margin-top: 0; 15 | font-weight: 700; 16 | font-family: "Arial", Helvetica, sans-serif !important 17 | } 18 | 19 | .blue { 20 | color: #005e9b; 21 | font-size: 4.5rem; 22 | } 23 | 24 | .wy-breadcrumbs { 25 | display: none !important; 26 | } -------------------------------------------------------------------------------- /docs/source/changelog.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | v1.0.0 - April 13, 2024 5 | ^^^^^^^^^^^^^^^^^^^^^^^ 6 | 7 | * Official Release 🎉. TinyFlux has been stable for over 20 months. 8 | 9 | 10 | v0.4.1 - September 25, 2023 11 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ 12 | 13 | * Spelling bug fix in support of issue #44. 14 | 15 | 16 | v0.4.0 - March 27, 2023 17 | ^^^^^^^^^^^^^^^^^^^^^^^ 18 | 19 | * Tags and Fields can be removed from individual points. See `the documentation `__ for more (resolves issue #27). 20 | 21 | 22 | v0.3.1 (2023-3-27) 23 | ^^^^^^^^^^^^^^^^^^ 24 | 25 | * Fixed bug that allowed user to delete key/field tags with `.update()` and `.update_all()`. (resolves issue #36). 26 | 27 | 28 | v0.3.0 (2023-3-21) 29 | ^^^^^^^^^^^^^^^^^^ 30 | 31 | * Tag and field keys can be compacted when using CSVStorage, saving potentially many bytes per Point (resolves issue #32). 32 | * Fixed bug that causes tag values of '' to be serialized as "_none" (resolves issue #33). 33 | 34 | 35 | v0.2.6 (2023-3-9) 36 | ^^^^^^^^^^^^^^^^^ 37 | 38 | * TinyFlux is now PEP 561 compliant (resolves issue #31). 39 | 40 | v0.2.4 (2023-2-15) 41 | ^^^^^^^^^^^^^^^^^^ 42 | 43 | * Fix bug that prevents updating Points when using a CSVStorage instance. 44 | 45 | 46 | v0.2.1 (2022-11-22) 47 | ^^^^^^^^^^^^^^^^^^^ 48 | 49 | * Fix bug that caused values of 0.0 to be serialized as None/null rather than "0.0". 50 | 51 | 52 | v0.2.0 (2022-11-09) 53 | ^^^^^^^^^^^^^^^^^^^ 54 | 55 | * Test and verification on Python 3.11 and Windows platforms 56 | * Disable universal newlines translation on CSV Storage instances 57 | 58 | 59 | v0.1.0 (2022-05-16) 60 | ^^^^^^^^^^^^^^^^^^^ 61 | 62 | * Initial release -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | 16 | sys.path.insert(0, os.path.abspath("../..")) 17 | 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = "TinyFlux" 22 | copyright = "2023, Justin Fung" 23 | author = "Justin Fung" 24 | 25 | # The full version, including alpha/beta/rc tags 26 | release = "0.1" 27 | 28 | 29 | # -- General configuration --------------------------------------------------- 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = [ 35 | "sphinx.ext.autodoc", 36 | "sphinx.ext.autosectionlabel", 37 | # "sphinx.ext.viewcode", 38 | # "sphinx.ext.napoleon", 39 | "sphinx_autodoc_typehints", 40 | ] 41 | 42 | # Add any paths that contain templates here, relative to this directory. 43 | templates_path = ["_templates"] 44 | 45 | html_css_files = [ 46 | "style.css", 47 | ] 48 | 49 | # List of patterns, relative to source directory, that match files and 50 | # directories to ignore when looking for source files. 51 | # This pattern also affects html_static_path and html_extra_path. 52 | exclude_patterns = [] 53 | 54 | 55 | # -- Options for HTML output ------------------------------------------------- 56 | 57 | # The theme to use for HTML and HTML Help pages. See the documentation for 58 | # a list of builtin themes. 59 | # 60 | html_theme = "sphinx_rtd_theme" 61 | 62 | # Add any paths that contain custom static files (such as style sheets) here, 63 | # relative to this directory. They are copied after the builtin static files, 64 | # so a file named "default.css" will overwrite the builtin "default.css". 65 | html_static_path = ["_static"] 66 | 67 | html_show_copyright = False 68 | html_show_sphinx = False 69 | html_show_sourcelink = False 70 | 71 | 72 | rst_prolog = """ 73 | .. include:: 74 | 75 | """ 76 | -------------------------------------------------------------------------------- /docs/source/contributing-guidelines.rst: -------------------------------------------------------------------------------- 1 | Guidelines 2 | ========== 3 | 4 | New ideas, improvements, bugfixes, and new developer tools are always welcome. Follow these guidelines before getting started: 5 | 6 | 1. Make sure to read :doc:`getting-started` and :doc:`contributing-tooling`. 7 | 2. Check GitHub_ for existing open issues, or open a new issue to begin a discussion. 8 | 3. To get started on a pull request, fork the repository on GitHub, create a new branch, and make updates. 9 | 4. Write unit tests, ensure the code is 100% covered, update documentation where necessary, and format and style the code correctly. 10 | 5. Send a pull request. 11 | 12 | 13 | 14 | .. _GitHub: https://github.com/citrusvanilla/tinyflux -------------------------------------------------------------------------------- /docs/source/contributing-philosophy.rst: -------------------------------------------------------------------------------- 1 | Philosophy 2 | ========== 3 | 4 | Like TinyDB, TinyFlux aims to be simple and fun to use. 5 | 6 | Like InfluxDB, TinyFlux places time before all else. 7 | 8 | Simplicity, enjoyment, and time- these are the three guiding principles of TinyFlux, both in its usage and in its development. 9 | 10 | Finally, when in doubt, over-document your code. -------------------------------------------------------------------------------- /docs/source/contributing-tooling.rst: -------------------------------------------------------------------------------- 1 | Tooling and Conventions 2 | ======================= 3 | 4 | TinyFlux should be developed locally with the latest stable version of Python on any platform (3.10 as of this writing). 5 | 6 | 7 | Versioning 8 | ---------- 9 | 10 | TinyFlux follows `semantic versioning`_ guidelines for releases. 11 | 12 | 13 | Workflow 14 | -------- 15 | 16 | TinyFlux development follows the branch-based workflow known as "`GitHub flow`_". 17 | 18 | 19 | Continuous Integration and Deployment 20 | ------------------------------------- 21 | 22 | TinyFlux uses `GitHub Actions`_ for its CI/CD workflow. 23 | 24 | 25 | Coding Conventions 26 | ------------------ 27 | 28 | TinyFlux conforms to `PEP 8`_ for style, and `Google Python Style Guide`_ for docstrings. TinyFlux uses common developer tools to check and enforce this. These checks should be performed locally before pushing to GitHub, as they will eventually be enforced with GitHub Actions (see ``.github/workflows`` in the TinyFlux GitHub repository for details). 29 | 30 | 31 | Formatting 32 | ^^^^^^^^^^ 33 | 34 | TinyFlux uses standard configuration black_ for code formatting, with an enforced line-length of 80 characters. 35 | 36 | After installing the project requirements: 37 | 38 | .. code-block:: bash 39 | 40 | /tinyflux $ black . 41 | 42 | 43 | Style 44 | ^^^^^ 45 | 46 | TinyFlux uses standard configuration flake8_ for style enforcement, with an enforced line-length of 80 characters. 47 | 48 | After installing the project requirements: 49 | 50 | .. code-block:: bash 51 | 52 | /tinyflux $ flake8 . 53 | 54 | Typing 55 | ^^^^^^ 56 | 57 | TinyFlux uses standard configuration mypy_ for static type checking. 58 | 59 | After installing the project requirements: 60 | 61 | .. code-block:: bash 62 | 63 | /tinyflux $ mypy . 64 | 65 | Documentation 66 | ^^^^^^^^^^^^^ 67 | 68 | TinyFlux hosts documentation on `Read The Docs`_. 69 | 70 | TinyFlux uses Sphinx_ for documentation generation, with a customized `Read the Docs Sphinx Theme`_, enabled for "Google-style" docstrings. 71 | 72 | After installing the project requirements: 73 | 74 | .. code-block:: bash 75 | 76 | /tinyflux $ cd docs 77 | /docs $ make html 78 | /docs $ open build/html/index.html 79 | 80 | Documentation is deployed to ReadTheDocs through third-party integration with GitHub. Commits to the ``master`` branch trigger builds and deployment with RTD. 81 | 82 | Testing 83 | ------- 84 | 85 | TinyFlux aims for 100% code coverage through unit testing. 86 | 87 | 88 | Test Framework 89 | ^^^^^^^^^^^^^^ 90 | 91 | TinyFlux uses pytest_ as its testing framework. 92 | 93 | After installing the project requirements: 94 | 95 | .. code-block:: bash 96 | 97 | /tinyflux $ pytest 98 | 99 | Coverage 100 | ^^^^^^^^ 101 | 102 | TinyFlux uses Coverage.py_ for measuring code coverage. 103 | 104 | .. code-block:: bash 105 | 106 | /tinyflux $ coverage run -m pytest 107 | /tinyflux $ coverage report -m 108 | 109 | 110 | 111 | .. _PEP 8: https://peps.python.org/pep-0008/ 112 | .. _Google Python Style Guide: https://google.github.io/styleguide/pyguide.html 113 | .. _black: https://black.readthedocs.io/en/stable/ 114 | .. _flake8: https://flake8.pycqa.org/en/latest/ 115 | .. _mypy: https://mypy.readthedocs.io/en/stable/ 116 | .. _Sphinx: https://www.sphinx-doc.org/en/master/ 117 | .. _Read the Docs Sphinx Theme: https://sphinx-rtd-theme.readthedocs.io/en/stable/ 118 | .. _pytest: https://docs.pytest.org/en/7.1.x/ 119 | .. _Coverage.py: https://coverage.readthedocs.io/en/6.3.3/ 120 | .. _GitHub Actions: https://docs.github.com/en/actions 121 | .. _Read the Docs: https://readthedocs.org/ 122 | .. _semantic versioning: https://semver.org/ 123 | .. _GitHub flow: https://docs.github.com/en/get-started/quickstart/github-flow -------------------------------------------------------------------------------- /docs/source/data-elements.rst: -------------------------------------------------------------------------------- 1 | Elements of Data in TinyFlux 2 | ---------------------------- 3 | 4 | Data elements and terms in TinyFlux mostly mirror those of InfluxDB. The following is a list of TinyFlux terms and concepts. Click on a term, or read on below. 5 | 6 | * :ref:`point` 7 | * :ref:`timestamp` 8 | * :ref:`measurement` 9 | * :ref:`tag set` 10 | * :ref:`tag key` 11 | * :ref:`tag value` 12 | * :ref:`field set` 13 | * :ref:`field key` 14 | * :ref:`field value` 15 | 16 | 17 | Point 18 | ^^^^^ 19 | 20 | The atomic data unit of TinyFlux. Consists of a :ref:`measurement`, :ref:`timestamp`, :ref:`tag set`, and a :ref:`field set`. In the primary disk CSV storage, all attributes are serialized to unicode using the system default encoding. 21 | 22 | 23 | In Python: 24 | 25 | >>> from tinyflux import Point 26 | >>> from datetime import datetime, timezone 27 | >>> p = Point( 28 | ... time=datetime.now(timezone.utc), 29 | ... measurement="thermostat home", 30 | ... tags={ 31 | ... "location": "bedroom", 32 | ... "scale": "fahrenheit", 33 | ... }, 34 | ... fields={ 35 | ... "temp": "70.0", 36 | ... } 37 | ... ) 38 | 39 | On disk: 40 | 41 | .. code-block:: bash 42 | 43 | 2022-05-13T23:19:46.573233,thermostat home,_tag_location,bedroom,_tag_scale,fahrenheit,_field_temp,70.0 44 | 45 | 46 | Timestamp 47 | ^^^^^^^^^ 48 | 49 | The time associated with a :ref:`point`. As an attribute of a :ref:`point`, it is a Python `datetime`_ object. Regardless of its state, when it is inserted into a TinyFlux database, it will become a timezone aware object cast to the UTC timezone. 50 | 51 | On disk, it is serialized as a `ISO 8601`_ formatted string and occupies the first column of the default CSV storage class. 52 | 53 | In Python: 54 | 55 | >>> Point() 56 | 57 | On disk: 58 | 59 | .. code-block:: bash 60 | 61 | 2022-05-13T23:19:46.573233,_default 62 | 63 | 64 | For details on time's relationship with TinyFlux, see :doc:`time`. 65 | 66 | 67 | Measurement 68 | ^^^^^^^^^^^ 69 | 70 | A measurement is a collection of :ref:`Points`, much like a table in a relational database. It is a string in memory and on disk. TinyFlux provides a convenient method for interacting with the :ref:`Points` through the ``db.measurement(...)`` method. 71 | 72 | In Python: 73 | 74 | >>> Point(measurement="cities") 75 | 76 | On disk: 77 | 78 | .. code-block:: bash 79 | 80 | 2022-05-13T23:19:46.573233,cities 81 | 82 | 83 | See :doc:`measurements` for more details. 84 | 85 | 86 | Tag Set 87 | ^^^^^^^ 88 | 89 | A tag set (or "tags") is the collection of :ref:`tag keys` and :ref:`tag values` belonging to a :ref:`point`. TinyFlux is schemaless, so any Point can contain zero, one, or more tag keys and associated tag values. Tag keys and tag values are both strings. Tag keys and their values map to Points with a hashmap in a TinyFlux index, providing for efficient retrieval. In a well-designed TinyFlux database, the number of distinct tag values should not be as numerous as the :ref:`field values`. On disk, tag sets occupy side-by-side columns- one for the tag key and one for the tag value. 90 | 91 | In Python: 92 | 93 | >>> Point( 94 | ... tags={ 95 | ... "city": "LA", 96 | ... "neighborhood": "Chinatown", 97 | ... "food": "good", 98 | ... } 99 | ... ) 100 | 101 | On disk: 102 | 103 | .. code-block:: bash 104 | 105 | 2022-05-13T23:19:46.573233,_default,_tag_city,LA,_tag_neighborhood,Chinatown,_tag_food,good 106 | 107 | 108 | Tag Key 109 | ^^^^^^^ 110 | 111 | A tag key is the identifier for a :ref:`tag value` in a :ref:`tag set`. On disk, a tag key is prefixed with ``_tag_`` (default) or ``t_`` (compact). 112 | 113 | In the following, the tag key is ``city``. 114 | 115 | >>> tags = {"city": "Los Angeles"} 116 | 117 | 118 | Tag Value 119 | ^^^^^^^^^ 120 | 121 | A tag value is the associated value for a tag key in a :ref:`tag set`. On disk, it occupies the column next to that of the its tag key. 122 | 123 | In the following, the tag value is ``Los Angeles``. 124 | 125 | >>> tags = {"city": "Los Angeles"} 126 | 127 | 128 | Field Set 129 | ^^^^^^^^^ 130 | 131 | A field set (or "fields") is the collection of :ref:`field keys` and :ref:`field values` belonging to a :ref:`point`. TinyFlux is schemaless, so any Point can contain zero, one, or more field keys and associated field values. Field keys are strings while field values are numeric (in Python, ``float`` or ``int``). Field keys and their values **do not** map to Points in a TinyFlux index as it is assumed that the number of their distinct values is too numerous. On disk, field sets occupy side-by-side columns- one for the field key and one for the field value. 132 | 133 | In Python: 134 | 135 | >>> Point( 136 | ... fields={ 137 | ... "num_restaurants": 12, 138 | ... "num_boba_shops": 3, 139 | ... } 140 | ... ) 141 | 142 | On disk: 143 | 144 | .. code-block:: bash 145 | 146 | 2022-05-13T23:19:46.573233,_default,_field_num_restaurants,12,_field_num_boba_shops,3 147 | 148 | 149 | Field Key 150 | ^^^^^^^^^ 151 | 152 | A field key is the identifier for a :ref:`field value` in a :ref:`field set`. On disk, a field key is prefixed with ``_field_`` (default) or ``f_`` (compact). 153 | 154 | In the following, the field key is ``num_restaurants``. 155 | 156 | >>> fields = {"num_restaurants": 12} 157 | 158 | 159 | Field Value 160 | ^^^^^^^^^^^ 161 | 162 | A field value is the associated value for a :ref:`field key` in a :ref:`field set`. On disk, it occupies the column next to that of the its field key. 163 | 164 | In the following, the field value is ``12``. 165 | 166 | >>> fields = {"num_restaurants": 12} 167 | 168 | 169 | .. _datetime: https://docs.python.org/3/library/datetime.html 170 | .. _ISO 8601: https://en.wikipedia.org/wiki/ISO_8601 171 | -------------------------------------------------------------------------------- /docs/source/design-principles.rst: -------------------------------------------------------------------------------- 1 | TinyFlux Design Principles 2 | ========================== 3 | 4 | InfluxDB implements optimal design principles for time series data. Some of these design principles have associated tradeoffs in performance. Design principles are discussed below. 5 | 6 | - :ref:`Prioritize High-Speed Writes` 7 | - :ref:`Minimize Memory Footprint` 8 | - :ref:`Prioritize Searches for Time` 9 | - :ref:`Schemaless design` 10 | - :ref:`IDs and Duplicates` 11 | 12 | 13 | Prioritize High-Speed Writes 14 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 15 | Time series data is often write-heavy, and in cases when a time series database is used as a real-time data store, the frequency of writes can be quite high. TinyFlux has been designed to minimize any disruptions to writing to disk in a single thread in as fast a manner as possible. To accomplish this, TinyFlux utilizes a default CSV store which supports nearly instantaneous appends, regardless of underlying file size. TinyFlux will also invalidate its index if upon any insert, the timestamp for a Point precedes that of the most-recent insert. TinyFlux will not attempt to rebuild its index upon invalidation during a write op. 16 | 17 | 18 | Minimize Memory Footprint 19 | ^^^^^^^^^^^^^^^^^^^^^^^^^ 20 | While it would be great if databases could live in memory, this is not a reasonable design choice for everyday users. TinyFlux has been designed to never read the entire contents of its storage into memory unless explicitly asked to do so, and to balance the 21 | need for fast querying with a small memory footprint, TinyFlux builds an internal index. This index is generally about 80% smaller than the memory required to hold the entire dataset in memory, and still allows for query performance to equal or surpass that of keeping the database in memory. For removals and updates, TinyFlux still visits all items in storage, but evaluates each item one at a time and writes to temporary storage before finally replacing the original storage with the updated one. TinyFlux also does not rewrite data in time-ascending order, as is the case with InfluxDB, as this would require either the entire dataset to be read into memory, or a computationally expensive eternal merge sort to be executed on disk. 22 | 23 | 24 | Prioritize Searches for Time 25 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 26 | TinyFlux builds an index on time by keeping a sorted container data structure of timestamps in memory, and searches over the index quickly by parsing queries and invoking optimized search algorithms for sorted containers to retrieve candidate Points quickly. This reduces potentially slow and exhaustive evaluations significantly. 27 | 28 | 29 | Schemaless design 30 | ^^^^^^^^^^^^^^^^^ 31 | Even though row-based data stores like CSV are not thought of as "schemaless", TinyFlux nonetheless allows for datasets to have flexible schemas so that signals that change over time, or multiple signals from multiple sources, can all occupy space in the same datastore. This allows the user to focus less on database design and more on capturing and analyzing data. 32 | 33 | 34 | IDs and Duplicates 35 | ^^^^^^^^^^^^^^^^^^ 36 | TinyFlux does not keep IDs as it is assumed data points are unique by their combination of timestamp and tag set. To this end, TinyFlux also does not currently have a mechanism for checking for duplicates. Searches matching duplicate Points will return duplicates. -------------------------------------------------------------------------------- /docs/source/exploring-data.rst: -------------------------------------------------------------------------------- 1 | Exploring Data 2 | ============== 3 | 4 | An understanding of how queries in TinyFlux work can be applied to several database operations. 5 | 6 | Query-based Exploration 7 | ----------------------- 8 | 9 | The primary method for query usage is through the ``.search(query)``. Other useful search methods are below: 10 | 11 | **.contains(query) <--> Check if the database contains any Points matching a Query** 12 | 13 | This returns a simple boolean value and is the fastest search op. 14 | 15 | >>> # Check if db contains any Points for Los Angeles after the start of 2022. 16 | >>> from datetime import datetime 17 | >>> from zoneinfo import ZoneInfo 18 | >>> q1 = TagQuery().city == "Los Angeles" 19 | >>> q2 = TimeQuery() >= datetime(2022, 1, 1, tzinfo = ZoneInfo("US/Pacific")) 20 | >>> db.contains(q1 & q2) 21 | 22 | 23 | **.count(query) <--> Count the number of Points matching a Query** 24 | 25 | This returns an integer. 26 | 27 | >>> # Count the number of Points for Los Angeles w/ a temp over 100 degrees. 28 | >>> q1 = TagQuery().city == "Los Angeles" 29 | >>> q2 = FieldQuery().temperature_f > 100.0 30 | >>> db.count(q1 & q2) 31 | 32 | 33 | **.get(query) <--> Get the first Point in the database matching a Query** 34 | 35 | This returns a Point instance, or ``None`` if no Points were found. 36 | 37 | >>> # Return the first Point in the db for LA w/ more than 1 inch of precipitation. 38 | >>> q1 = TagQuery().city == "Los Angeles" 39 | >>> q3 = FieldQuery().precipitation > 1.0 40 | >>> db.get(q1 & q3) 41 | 42 | 43 | **.search(query) <--> Get all the Points in the database matching a Query** 44 | 45 | This is the primary method for querying the database, and returns a list of Point instances, sorted by timestamp. 46 | 47 | >>> # Get all Points in the DB for Los Angeles in 2022 in which the AQI was "hazardous". 48 | >>> from datetime import datetime 49 | >>> from zoneinfo import ZoneInfo 50 | >>> q1 = TagQuery().city == "Los Angeles" 51 | >>> q2 = TimeQuery() >= datetime(2022, 1, 1, tzinfo = ZoneInfo("US/Pacific")) 52 | >>> q3 = TimeQuery() < datetime(2023, 1, 1, tzinfo = ZoneInfo("US/Pacific")) 53 | >>> q4 = FieldQuery().air_quality_index > 100 # hazardous is over 100 54 | >>> db.search(q1 & q2 & q3 & q4) 55 | 56 | **.select(attributes, query) <--> Get attributes from Points in the database matching a Query** 57 | 58 | This returns a list of attributes from Points matching the Query. Similar to SQL "select". 59 | 60 | >>> # Get the time, city, and air-quality index ("AQI") for all Points with an AQI over 100. 61 | >>> q = FieldQuery().aqi > 100 62 | >>> db.select("fields.aqi", q) 63 | [132] 64 | >>> db.select(("time", "city", "fields.aqi"), q) 65 | [(datetime.datetime(2020, 9, 15, 8, 0, tzinfo=datetime.timezone.utc), "Los Angeles", 132)] 66 | 67 | 68 | Attribute-based Exploration 69 | --------------------------- 70 | 71 | The database can also be explored based on attributes, as opposed to queries. 72 | 73 | 74 | **.get_measurements() <--> Get all the measurements in the database** 75 | 76 | This returns an alphabetically-sorted list of measurements in the database. 77 | 78 | >>> db.insert(Point(measurement="cities")) 79 | >>> db.insert(Point(measurement="stock prices")) 80 | >>> db.get_measurements() 81 | >>> ["cities", "stock prices"] 82 | 83 | 84 | **.get_field_keys() <--> Get all the field keys in the database** 85 | 86 | This returns an alphabetically-sorted list of field keys in the database. 87 | 88 | >>> db.insert(Point(fields={"temp_f": 50.2})) 89 | >>> db.insert(Point(fields={"price": 2107.44})) 90 | >>> db.get_field_keys() 91 | ["temp_f", "price"] 92 | 93 | 94 | **.get_field_values(field_key) <--> Get all the field values in the database** 95 | 96 | This returns all the values for a specified field_key, in order of insertion order in the database. This might be useful for determining a range of values a field could take. 97 | 98 | >>> db.insert(Point(fields={"temp_f": 50.2})) 99 | >>> db.insert(Point(fields={"price": 2107.44})) 100 | >>> db.get_field_values("temp_f") 101 | [50.2] 102 | 103 | 104 | **.get_tag_keys() <--> Get all the tag keys in the database** 105 | 106 | This returns an alphabetically-sorted list of tag keys in the database. 107 | 108 | >>> db.insert(Point(tags={"city": "LA"})) 109 | >>> db.insert(Point(tags={"company": "Amazon.com, Inc."})) 110 | >>> db.get_tag_keys() 111 | ["city", "company"] 112 | 113 | 114 | **.get_tag_values([tag_key]) <--> Get all the tag values in the database** 115 | 116 | This returns all the values for a list of specified tag keys. 117 | 118 | >>> db.insert(Point(tags={"city": "LA"})) 119 | >>> db.insert(Point(tags={"company": "Amazon.com, Inc."})) 120 | >>> db.get_tag_values() 121 | {"city": ["Los Angeles"], "company": ["Amazon.com, Inc."]} 122 | 123 | 124 | **.get_timestamps() <--> Get all the timestamps in the database** 125 | 126 | This returns all the timestamps in the database by insertion order. 127 | 128 | >>> from datetime import datetime 129 | >>> from zoneinfo import ZoneInfo 130 | >>> time_2022 = datetime(2022, 1, 1, tzinfo = ZoneInfo("US/Pacific")) 131 | >>> time_1900 = datetime(1900, 1, 1, tzinfo = ZoneInfo("US/Pacific")) 132 | >>> db.insert(Point(time=time_2022)) 133 | >>> db.insert(Point(time=time_1900)) 134 | >>> db.get_timestamps() 135 | [datetime.datetime(2022, 1, 1, 8, 0, tzinfo=datetime.timezone.utc), datetime.datetime(1900, 1, 1, 8, 0, tzinfo=datetime.timezone.utc)] 136 | 137 | 138 | Full Dataset Exploration 139 | ------------------------ 140 | 141 | Sometimes access to all the data is needed. There are two methods for doing so- one that brings in all the database items into memory, and one that provides a generator that iterates over items one at a time. 142 | 143 | **.all() <--> Get all of the points in the database** 144 | 145 | This returns all the points in the database by timestamp order. To retrieve by insertion order, pass ``sorted=False`` argument. This will bring all of the data into memory at once. 146 | 147 | >>> db.all() # Points returned sorted by timestamp. 148 | 149 | or 150 | 151 | >>> db.all(sorted=False) # Points returned by insertion order. 152 | 153 | **iter(db) <--> Iterate over all the points in the database** 154 | 155 | This returns a generator over which point-by-point logic can be applied. This does not pull everything into memory. 156 | 157 | >>> iter(db) 158 | 159 | >>> for point in db: 160 | ... print(point) 161 | Point(time=2022-01-01T08:00:00+00:00, measurement=_default) 162 | Point(time=1900-01-01T08:00:00+00:00, measurement=_default) 163 | 164 | The list of all the data exploration methods covered above: 165 | 166 | +------------------------------------+------------------------------------------------------------------+ 167 | | **Query-based Exploration** | 168 | +------------------------------------+------------------------------------------------------------------+ 169 | | ``db.contains(query)`` | Whether or not the database contains any points matching a query | 170 | +------------------------------------+------------------------------------------------------------------+ 171 | | ``db.count(query)`` | Count the number of points matching a query | 172 | +------------------------------------+------------------------------------------------------------------+ 173 | | ``db.get(query)`` | Get one point from the database matching a query | 174 | +------------------------------------+------------------------------------------------------------------+ 175 | | ``db.search(query)`` | Get all points from the database matching a query | 176 | +------------------------------------+------------------------------------------------------------------+ 177 | | ``db.select(attributes, query)`` | Get attributes from points matching a query | 178 | +------------------------------------+------------------------------------------------------------------+ 179 | | **Attribute-based Exploration** | 180 | +------------------------------------+------------------------------------------------------------------+ 181 | | ``db.get_measurements()`` | Get the names of all measurements in the database | 182 | +------------------------------------+------------------------------------------------------------------+ 183 | | ``db.get_timestamps()`` | Get all the timestamps from the database, by insertion order | 184 | +------------------------------------+------------------------------------------------------------------+ 185 | | ``db.get_tag_keys()`` | Get all tag keys from the database | 186 | +------------------------------------+------------------------------------------------------------------+ 187 | | ``db.get_tag_values()`` | Get all tag values from the database | 188 | +------------------------------------+------------------------------------------------------------------+ 189 | | ``db.get_field_keys()`` | Get all field keys from the database | 190 | +------------------------------------+------------------------------------------------------------------+ 191 | | ``db.get_field_values()`` | Get all field values from the database | 192 | +------------------------------------+------------------------------------------------------------------+ 193 | | **Full Dataset Exploration** | 194 | +------------------------------------+------------------------------------------------------------------+ 195 | | ``db.all()`` | Get all points in the database | 196 | +------------------------------------+------------------------------------------------------------------+ 197 | | ``iter(db)`` | Return a generator for all points in the database | 198 | +------------------------------------+------------------------------------------------------------------+ 199 | -------------------------------------------------------------------------------- /docs/source/getting-started.rst: -------------------------------------------------------------------------------- 1 | Getting Started 2 | =============== 3 | 4 | Initialize a new TinyFlux database (or connect to an existing file store) with the following: 5 | 6 | >>> from tinyflux import TinyFlux 7 | >>> db = TinyFlux('db.csv') 8 | 9 | ``db`` is now a reference to the TinyFlux database that stores its data in a file called ``db.csv``. 10 | 11 | An individual instance of data in a TinyFlux database is known as a "Point". In a traditional relational database, this would called called a "row", and in a document-oriented database it is called a "document". A TinyFlux ``Point`` is a convenient object for storing its four main attributes: 12 | 13 | +-----------------+----------------------------------------------------------+------------------------------------------------------+ 14 | | **Attribute** | **Python Type** | **Example** | 15 | +-----------------+----------------------------------------------------------+------------------------------------------------------+ 16 | | ``measurement`` | ``str`` | ``"california air quality"`` | 17 | +-----------------+----------------------------------------------------------+------------------------------------------------------+ 18 | | ``time`` | ``datetime`` | ``datetime.now(timezone.utc)`` | 19 | +-----------------+----------------------------------------------------------+------------------------------------------------------+ 20 | | ``tags`` | ``Dict`` of ``str`` keys and ``str`` values | ``{"city": "Los Angeles", "parameter": "PM2.5"}`` | 21 | +-----------------+----------------------------------------------------------+------------------------------------------------------+ 22 | | ``fields`` | ``Dict`` of ``str`` keys and ``float`` or ``int`` values | ``{"aqi": 112.0}`` | 23 | +-----------------+----------------------------------------------------------+------------------------------------------------------+ 24 | 25 | In keeping with the analogy of a traditional RDMS, a ``measurement`` is like a table. 26 | 27 | ``time`` is a field with the requirement that it is a ``datetime`` type, ``tags`` is a collection of string attributes, and ``fields`` is a collection of numeric attributes. TinyFlux is "schemaless", so tags and fields can be added/removed to any Point. 28 | 29 | To make a Point, import the Point definition and annotate the Point with the desired attributes. If ``measurement`` is not defined, it takes the default table name of ``_default``. 30 | 31 | >>> from tinyflux import Point 32 | >>> p1 = Point( 33 | ... time=datetime.fromisoformat("2020-08-28T00:00:00-07:00"), 34 | ... tags={"city": "LA"}, 35 | ... fields={"aqi": 112} 36 | ... ) 37 | >>> p2 = Point( 38 | ... time=datetime.fromisoformat("2020-12-05T00:00:00-08:00"), 39 | ... tags={"city": "SF"}, 40 | ... fields={"aqi": 128} 41 | ... ) 42 | 43 | To write to TinyFlux, simply: 44 | 45 | >>> db.insert(p1) 46 | >>> db.insert(p2) 47 | 48 | All points can be retrieved from the database with the following: 49 | 50 | >>> db.all() 51 | [Point(time=2020-01-01T00:08:00-00:00, measurement=_default, tags=city:LA, fields=aqi:112), Point(time=2020-12-05T00:08:00-00:00, measurement=_default, tags=city:SF, fields=aqi:128)] 52 | 53 | .. note:: TinyFlux will convert all time to UTC. Read more about it here: :doc:`time`. 54 | 55 | TinyFlux also allows iteration over stored Points: 56 | 57 | >>> for point in db: 58 | >>> print(point) 59 | Point(time=2020-08-28T00:07:00-00:00, measurement=_default, tags=city:LA, fields=aqi:112) 60 | Point(time=2020-12-05T00:08:00-00:00, measurement=_default, tags=city:SF, fields=aqi:128) 61 | 62 | To query for Points, there are four query types- one for each of a Point's four attributes. 63 | 64 | >>> from tinyflux import FieldQuery, MeasurementQuery, TagQuery, TimeQuery 65 | >>> Time = TimeQuery() 66 | >>> db.search(Time < datetime.fromisoformat("2020-11-00T00:00:00-08:00")) 67 | [Point(time=2020-08-28T00:07:00-00:00, measurement=_default, tags=city:LA, fields=aqi:112)] 68 | >>> Field = FieldQuery() 69 | >>> db.search(Field.aqi > 120) 70 | [Point(time=2020-12-05T00:08:00-00:00, measurement=_default, tags=city:SF, fields=aqi:128)] 71 | >>> Tag = TagQuery() 72 | >>> db.search(Tag.city == "LA") 73 | [Point(time=2020-08-28T00:07:00-00:00, measurement=_default, tags=city:LA, fields=aqi:112)] 74 | >>> Measurement = MeasurementQuery() 75 | >>> db.count(Measurement == "_default") 76 | 2 77 | 78 | Points can also be updated: 79 | 80 | >>> # Update the ``aqi`` field of the Los Angeles point. 81 | >>> db.update(tag.city == "LA", tags={"aqi": 118}) 82 | >>> for point in db: 83 | >>> print(point) 84 | Point(time=2020-08-28T00:07:00-00:00, measurement=_default, tags=city:LA, fields=aqi:118) 85 | Point(time=2020-12-05T00:08:00-00:00, measurement=_default, tags=city:SF, fields=aqi:128) 86 | 87 | Points can also be removed: 88 | 89 | >>> db.remove(tag.city == "SF") 90 | 1 91 | >>> db.all() 92 | [Point(time=2020-01-01T00:08:00-00:00, measurement=_default, tags=city:LA, fields=aqi:112)] 93 | 94 | Here is the basic syntax covered in this section: 95 | 96 | +-------------------------------+---------------------------------------------------------------+ 97 | | **Initialize a new TinyFlux Database** | 98 | +-------------------------------+---------------------------------------------------------------+ 99 | | ``db = TinyFlux("my_db.csv")``| Initialize or connect to existing with ``TinyFlux()`` | 100 | +-------------------------------+---------------------------------------------------------------+ 101 | | **Creating New Points** | 102 | +-------------------------------+---------------------------------------------------------------+ 103 | | ``Point(...)`` | Initialize a new point. | 104 | +-------------------------------+---------------------------------------------------------------+ 105 | | **Inserting Points Into the Database** | 106 | +-------------------------------+---------------------------------------------------------------+ 107 | | ``db.insert()`` | Insert a point. | 108 | +-------------------------------+---------------------------------------------------------------+ 109 | | **Retrieving Points** | 110 | +-------------------------------+---------------------------------------------------------------+ 111 | | ``db.all()`` | Get all points | 112 | +-------------------------------+---------------------------------------------------------------+ 113 | | ``iter(db)`` | Iterate over all points | 114 | +-------------------------------+---------------------------------------------------------------+ 115 | | ``db.search(query)`` | Get a list of points matching the query | 116 | +-------------------------------+---------------------------------------------------------------+ 117 | | ``db.count(query)`` | Count the number of points matching the query | 118 | +-------------------------------+---------------------------------------------------------------+ 119 | | **Updating Points** | 120 | +-------------------------------+---------------------------------------------------------------+ 121 | | ``db.update(query, ...)`` | Update all points matching the query | 122 | +-------------------------------+---------------------------------------------------------------+ 123 | | **Removing Points** | 124 | +-------------------------------+---------------------------------------------------------------+ 125 | | ``db.remove(query)`` | Remove all points matching the query | 126 | +-------------------------------+---------------------------------------------------------------+ 127 | | ``db.remove_all()`` | Remove all points | 128 | +-------------------------------+---------------------------------------------------------------+ 129 | | **Querying TinyFlux** | 130 | +-------------------------------+---------------------------------------------------------------+ 131 | | ``TimeQuery()`` | Create a new time query object | 132 | +-------------------------------+---------------------------------------------------------------+ 133 | | ``FieldQuery().f_key == 2`` | Match any point that has a field ``f_key`` with value | 134 | | | ``== 2`` (also possible: ``!=``, ``>``, ``>=``, ``<``, ``<=``)| 135 | +-------------------------------+---------------------------------------------------------------+ 136 | 137 | To continue with the introduction to TinyFlux, proceed to the next section, :doc:`preparing-data`. 138 | 139 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Getting started with 2 | ==================== 3 | 4 | .. image:: https://github.com/citrusvanilla/tinyflux/blob/master/artwork/tinyfluxdb-dark.png?raw=true#gh-light-mode-only 5 | :width: 500px 6 | 7 | 8 | The tiny time series database, optimized for your happiness. 9 | 10 | 11 | .. toctree:: 12 | :caption: Basics 13 | :maxdepth: 1 14 | :hidden: 15 | 16 | intro 17 | installing-tinyflux 18 | getting-started 19 | preparing-data 20 | writing-data 21 | querying-data 22 | exploring-data 23 | updating-data 24 | removing-data 25 | measurements 26 | time 27 | tips 28 | 29 | 30 | .. toctree:: 31 | :caption: Reference 32 | :maxdepth: 1 33 | :hidden: 34 | 35 | data-elements 36 | design-principles 37 | internals 38 | tinyflux 39 | 40 | 41 | .. toctree:: 42 | :caption: Contributing 43 | :maxdepth: 1 44 | :hidden: 45 | 46 | contributing-philosophy 47 | contributing-guidelines 48 | contributing-tooling 49 | changelog 50 | 51 | 52 | .. toctree:: 53 | :caption: Links 54 | :maxdepth: 1 55 | :hidden: 56 | 57 | GitHub 58 | Examples 59 | 60 | 61 | .. |br| raw:: html 62 | 63 |
-------------------------------------------------------------------------------- /docs/source/installing-tinyflux.rst: -------------------------------------------------------------------------------- 1 | Installing TinyFlux 2 | =================== 3 | 4 | To install TinyFlux from PyPI, run: 5 | 6 | .. code-block:: bash 7 | 8 | $ pip install tinyflux 9 | 10 | The latest development version is hosted on GitHub_. After downloading, install using: 11 | 12 | .. code-block:: bash 13 | 14 | $ pip install . 15 | 16 | 17 | 18 | .. References 19 | .. _GitHub: https://github.com/citrusvanilla/tinyflux 20 | -------------------------------------------------------------------------------- /docs/source/internals.rst: -------------------------------------------------------------------------------- 1 | TinyFlux Internals 2 | ================== 3 | 4 | Storage 5 | ------- 6 | 7 | TinyFlux ships with two types of storage: 8 | 9 | 1. A CSV store with is persistent to disk, and 10 | 2. A memory store which lasts only as long as the process in which it was declared. 11 | 12 | To use the CSV store, pass a filepath during TinyFlux initialization. 13 | 14 | >>> my_database = "db.csv" 15 | >>> db = TinyFlux(my_database) 16 | 17 | To use the memory store: 18 | 19 | >>> from tinyflux.storages import MemoryStorage 20 | >>> db = TinyFlux(storage=MemoryStorage) 21 | 22 | In nearly all cases, users should opt for the former as it persists the data on disk. 23 | 24 | The CSV format is familiar to most, but at its heart it's just a row-based datastore that supports sequential iteration and append-only writes. Contrast this with JSON, which--while fast once loaded into memory--must be loaded entirely into memory and does not support appending. 25 | 26 | The usage of CSV offers TinyFlux two distinct advantages for typical time-series workflows: 27 | 28 | 1. Appends do not require reading of data, and occur in a constant amount of time regardless of the size of the underlying database. 29 | 2. Sequential iteration allows for a full read of the data without having to simultaneously keep the entirety of the data store in memory all at once. Logic can be performed on an individual row, and results kept or discarded as desired. 30 | 31 | TinyFlux storage is also designed to be extensible. 32 | 33 | In case direct access to the storage instance is desired, use the ``storage`` property of the TinyFlux instance. 34 | 35 | >>> from tinyflux.storages import MemoryStorage 36 | >>> db = TinyFlux(storage=MemoryStorage) 37 | >>> my_data = db.storage.read() 38 | 39 | For more disucssion on storage, see :doc:`design-principles`. 40 | 41 | 42 | Indexing in TinyFlux 43 | -------------------- 44 | 45 | By default, TinyFlux will build an internal index when the database is initialized, and again at any point when a read operation is performed after the index becomes invalid. As TinyFlux's primary storage format is a CSV that is read from disk sequentially, the index allows for efficient retrieval operations that greatly reduce function calls, query evaluations, and the need to deserialize and reserialize data. 46 | 47 | .. note:: 48 | 49 | An index becomes invalid when points are inserted out-of-time-order. When the ``auto-index`` parameter of ``TinyFlux`` is set to ``True``, the next read operation will rebuild the index. 50 | 51 | Building an index is a non-trivial routine that occurs in the same process that TinyFlux is running in. For smaller amounts of data in a typical analytics workflow, building an index may not even be noticeable. As the database grows, the time needed to build or rebuild the index grows linearly. Automatically rebuilding of the index can be turned off by setting ``auto_index`` to ``False`` in the TinyFlux constructor: 52 | 53 | >>> db = TinyFlux("my_database.csv", auto_index=False) 54 | 55 | Setting this value to ``False`` will remove any indexing-building, but queries will slow down considerably. 56 | 57 | A reindex can be manually triggered should the need arise: 58 | 59 | >>> db.reindex() 60 | 61 | .. warning:: 62 | There is usually only one reason to turn off auto-indexing and that is when you are initializing the database instance and need to **immediately** start inserting points, as might be the case in IOT data-capture applications. In all other cases, particularly when reads will make up the majority of your workflow, you should leave ``auto-index`` set to ``True``. 63 | 64 | At some level of data, the building of the index will noticeably slow down a workflow. For tips on how to address growing data, see :doc:`tips`. -------------------------------------------------------------------------------- /docs/source/intro.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | TinyFlux combines the simplicity of the document-oriented TinyDB_ with the concepts and design of the fully-fledged time series database known as InfluxDB_. 5 | 6 | TinyFlux is a pure Python module that supports database-like operations on an in-memory or file datastore. It is optimized for time series data and as such, is considered a "time series database" (or "tsdb" for short). It is not, however, a database server that supports traditional RDMS features like the management of concurrent connections, management of indexes in background processes, or the provisioning of access control. Before using TinyFlux, you should be sure that TinyFlux is right for your intended use-case. 7 | 8 | 9 | Why Should I Use TinyFlux? 10 | -------------------------- 11 | 12 | **In TinyFlux, time comes first.** 13 | 14 | - Time in TinyFlux is a first-class citizen. TinyFlux expects and handles Python datetime objects with ease. Queries are optimized for time, above all else. 15 | 16 | **TinyFlux is a real time series database.** 17 | 18 | - Concepts around TinyFlux are based on InfluxDB. If you are looking for a gradual introduction into the world of time series databases, this is a great starting point. If your workflow outgrows the offerings of TinyFlux, you can jump to InfluxDB with very little introduction needed. 19 | 20 | **TinyFlux is written in pure, standard library Python.** 21 | 22 | - TinyFlux needs neither an external server nor any dependencies and works on all modern versions of Python. 23 | 24 | **TinyFlux is optimized for your happiness.** 25 | 26 | - Like TinyDB_, TinyFlux is designed to be simple and easy to use by providing a straight-forward and clean API. 27 | 28 | **TinyFlux is tiny.** 29 | 30 | - The current source code has 2000 lines of code (with about 50% documentation) and 2000 lines of tests. 31 | 32 | **TinyFlux has 100% test coverage.** 33 | 34 | - No explanation needed. 35 | 36 | 37 | If you have a moderate amount of time series data without the need or desire to provision and manage a full-fledged server and its configuration, and you want to interface easily with the greater Python ecosystem, TinyFlux might be the right choice for you. 38 | 39 | 40 | When To Look at Other Options 41 | ----------------------------- 42 | 43 | You should not use TinyFlux if you need advanced database features like: 44 | 45 | - access from multiple processes or threads 46 | - an HTTP server 47 | - management of relationships between tables 48 | - access-control and users 49 | - `ACID guarantees `_ 50 | - high performance as the size of your dataset grows 51 | 52 | If you have a large amount of data or you need advanced features and high performance, consider using databases like SQLite_ or InfluxDB_. 53 | 54 | 55 | What's the difference between TinyFlux and TinyDB? 56 | -------------------------------------------------- 57 | 58 | At its core, TinyFlux is a *time series database* while TinyDB is a *document-oriented database*. 59 | 60 | Let's break this down: 61 | 62 | **In TinyFlux, time is a "first-class citizen".** 63 | 64 | - In TinyDB, there is no special handling of time. 65 | 66 | **A TinyFlux database expects Python datetime objects to be passed with each and every data point.** 67 | 68 | - TinyDB does not accept datetime objects directly. In TinyDB, attributes representing time must be serialized and deserialized by the user, or an extension must added onto TinyDB to handle datetime objects. 69 | 70 | **In TinyFlux, queries are optimized for time.** 71 | 72 | - TinyFlux builds a small index in memory which includes an index on timestamps. This provides for ultra-fast search and retrieval of data when queries are time-based. TinyDB has no special mechanism for querying attributes of different types. 73 | 74 | **Data in TinyFlux is written to disk in "append-only" fashion.** 75 | 76 | - Irrespective of the current size of the database, inserting is always a constant-time operation on the order of nanoseconds. TinyFlux is optimized for time series datasets which are often write-heavy, as opposed to document-stores which are traditionally read-heavy. This allows high-frequency signals to be easily handled by TinyFlux. TinyDB does not expect high-frequency writes, and since it reads all data into memory before adding new data, its insert time increases linearly with the size of the database. 77 | 78 | **TinyFlux and TinyDB are both "schemaless".** 79 | 80 | - This means that attributes and their existence between items may differ with no exceptions being raised. TinyDB, as a document store, supports the storage of complex types including containers like arrays/lists and objects/dictionaries. TinyFlux, however, provides for just three types of attributes- numeric, string, and of course, datetime. 81 | 82 | 83 | Got it, so should I use TinyFlux or TinyDB? 84 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 85 | 86 | **You should use TinyFlux if:** 87 | - Your data is naturally time series in nature. That is, you have many observations of some phenomenon over time with varying measurements. Examples include stock prices, daily temperatures, or the accelerometer readings on a running watch. 88 | - You will be writing to the database at a regular, high frequency. 89 | 90 | **You should use TinyDB if:** 91 | - Your data has no time dimension. Examples include a database acting as a phonebook for Chicago, the catalogue of Beatles music, or configuration values for your dashboard app. 92 | - You will be writing to the database infrequently. 93 | 94 | 95 | .. References 96 | .. _InfluxDB: https://influxdata.com/ 97 | .. _SQLite: https://www.sqlite.org/ 98 | .. _TinyDB: https://github.com/msiemens/tinydb -------------------------------------------------------------------------------- /docs/source/measurements.rst: -------------------------------------------------------------------------------- 1 | Working with Measurements 2 | ------------------------- 3 | 4 | TinyFlux supports working with multiple measurements. A measurement is analogous to a "table" in traditional RDMS. By accessing TinyFlux through a measurement, the same database API is utilized, but with a filter for the passed measurement. 5 | 6 | To access TinyFlux through a measurement, use ``db.measurement(name)``: 7 | 8 | >>> db = TinyFlux("my_db.csv") 9 | >>> m = db.measurement("my_measurement") 10 | >>> m.insert(Point(time=datetime(2022, 1, 1, tzinfo=timezone.utc), tags={"my_tag_key": "my_tag_value"})) 11 | >>> m.all() 12 | [Point(time=2022-01-01T00:00:00+00:00, measurement=my_measurement, tags=my_tag_key:my_tag_value)] 13 | >>> for point in m: 14 | >>> print(point) 15 | Point(time=2022-01-01T00:00:00+00:00, measurement=my_measurement, tags=my_tag_key:my_tag_value) 16 | 17 | .. note:: 18 | 19 | TinyFlux uses a measurement named ``_default`` as the default measurement. 20 | 21 | To remove a measurement and all its points from a database, use: 22 | 23 | >>> db.drop_measurement('my_measurement') 24 | 25 | or 26 | 27 | >>> m.remove_all() 28 | 29 | To get a list with the names of all measurements in the database: 30 | 31 | >>> db.get_measurements() 32 | ["my_measurement"] 33 | -------------------------------------------------------------------------------- /docs/source/preparing-data.rst: -------------------------------------------------------------------------------- 1 | Preparing Data 2 | ============== 3 | 4 | Before inserting data into TinyFlux, data must be cast into specific types of objects known as a "Points". Here's an example: 5 | 6 | >>> from tinyflux import Point 7 | >>> from datetime import datetime, timezone 8 | >>> p = Point( 9 | ... measurement="city temperatures", 10 | ... time=datetime(2022, 1, 1, tzinfo=timezone.utc), 11 | ... tags={"city": "Greenwich", "country": "England"}, 12 | ... fields={"high": 52.0, "low": 41.0} 13 | ... ) 14 | 15 | This term "Point" comes from InfluxDB. A well-formed Point consists of four attributes: 16 | 17 | - ``measurement``: Known as a "table" in relational databases, its value type is ``str``. 18 | - ``time``: The timestamp of the observation, its value is a Python ``datetime`` object that should be "timezone aware". 19 | - ``tags``: Text attributes of the observation as a Python ``dict`` of ``str|str`` key value pairs. 20 | - ``fields``: Numeric attributes of the observation as a Python ``dict`` of ``str|int`` or ``str|float`` key value pairs. 21 | 22 | None of the four attributes is required during initialization; an empty Point can be initialized like the following: 23 | 24 | >>> from tinyflux import Point 25 | >>> Point() 26 | Point(time=None, measurement=_default) 27 | 28 | Notice that the ``time`` attribute is ``None``, and the ``measurement`` attribute has taken the value of ``_default``. The point also has no tags or fields. Tags and fields are not required, but from a user's perspective, such a data point has little meaning. 29 | 30 | .. note:: 31 | 32 | Points that do not have ``time`` values take on timestamps *when they are inserted into TinyFlux, not when they are created*. If you want `time` to reflect the time of creation, set time like: ``time=datetime.now(timezone.utc)``. 33 | 34 | A default ``measurement`` is assigned to Points that are initialized without one. 35 | 36 | Tags are string/string key value pairs. The reason for having separate attributes for ``tags`` and ``fields`` in TinyFlux (and in InfluxDB) is twofold: It enforces consistency of types and data on the user's side, and it allows the database to efficiently index on tags, which are attributes with low cardinality (compared to fields, which tend to have much higher variation across values). 37 | 38 | .. note:: 39 | 40 | While both TinyDB and TinyFlux are "schemaless", TinyFlux does not support complex types as values. If you want to store documents, which are often collections rather than primitive types, take a look at TinyDB. 41 | 42 | .. hint:: 43 | 44 | TinyFlux will raise a ``ValueError`` if you try to initialize a ``Point`` with incorrect types, so you can be sure you are not inserting malformed data into the database. 45 | 46 | -------------------------------------------------------------------------------- /docs/source/querying-data.rst: -------------------------------------------------------------------------------- 1 | Querying Data 2 | ============= 3 | 4 | TinyFlux's query syntax will be familiar to users of popular ORM tools. It is similar to that of TinyDB, but TinyFlux contains four different query types, one for each of a point's four attributes. 5 | 6 | The query types are: 7 | 8 | - ``TimeQuery`` for querying points by ``time``. 9 | - ``MeasurementQuery`` for querying points by ``measurement``. 10 | - ``TagQuery`` for querying points by ``tags``. 11 | - ``FieldQuery`` for querying points by ``fields``. 12 | 13 | For the remainder of this section, query examples will be illustrated with the ``.search()`` method of a TinyFlux database. This is the most common way to query TinyFlux, and the method accepts a query and returns a ``list`` of ``Point`` objects matching the query. In addition, there are a handful of other database methods that take queries as argument and perform some sort of search. See the :doc:`exploring-data` section for details. 14 | 15 | .. note:: 16 | 17 | ``.search()`` will return Points in sorted time order by default. To return points in insertion order, pass the ``sorted=False`` argument, like: ``db.search(query, sorted=False)``. 18 | 19 | 20 | Simple Queries 21 | .............. 22 | 23 | Examples of the four basic query types are below: 24 | 25 | Measurement Queries 26 | ^^^^^^^^^^^^^^^^^^^ 27 | 28 | To query for a specific measurement, the right-hand side of the ``MeasurementQuery`` should be a Python ``str``: 29 | 30 | >>> from tinyflux import MeasurementQuery 31 | >>> Measurement = MeasurementQuery() 32 | >>> db.search(Measurement == "city temperatures") 33 | 34 | Tag Queries 35 | ^^^^^^^^^^^ 36 | 37 | To query for tags, the *tag key* of interest takes the form of a query attribute (following the ``.``), while the *tag value* forms the right-hand side. An example to illustrate: 38 | 39 | >>> from tinyflux import TagQuery 40 | >>> Tags = TagQuery() 41 | >>> db.search(Tags.city == "Greenwich") 42 | 43 | This will query the database for all points with the tag key of ``city`` mapping to the tag value of ``Greenwich``. 44 | 45 | Field Queries 46 | ^^^^^^^^^^^^^ 47 | 48 | Similar to tags, to query for fields, the field key takes the form of a query attribute, while the field value forms the right-hand side: 49 | 50 | >>> from tinyflux import FieldQuery 51 | >>> Fields = FieldQuery() 52 | >>> db.search(Fields.high > 50.0) 53 | 54 | This will query the database for all points with the field key of ``high`` exceeding the value of 50.0. 55 | 56 | Some tag keys and field keys are not valid Python identifiers (for example, if the key contains whitespace), and can alternately be queried with string attributes: 57 | 58 | >>> from tinyflux import TagQuery 59 | >>> Tags = TagQuery() 60 | >>> db.search(Tags["country name"] == "United States of America") 61 | 62 | Time Queries 63 | ^^^^^^^^^^^^ 64 | 65 | To query based on time, the "right-hand side" of the ``TimeQuery`` should be a timezone-aware ``datetime`` object: 66 | 67 | >>> from tinyflux import TimeQuery 68 | >>> from datetime import datetime, timezone 69 | >>> Time = TimeQuery() 70 | >>> db.search(Time > datetime(2000, 1, 1, tzinfo=timezone.utc)) 71 | 72 | To query for a range of timestamps, it is most-performant to combine two ``TimeQuery`` instances with the ``&`` operator (for more details on compound queries, see :ref:`Compound Queries and Query Modifiers` below): 73 | 74 | >>> q1 = Time > datetime(1990, 1, 1, tzinfo=timezone.utc) 75 | >>> q2 = Time < datetime(2020, 1, 1, tzinfo=timezone.utc) 76 | >>> db.search(q1 & q2) 77 | 78 | .. note:: 79 | 80 | Queries can be optimized for faster results. See :doc:`tips` for details on optimizing queries. 81 | 82 | 83 | Advanced Simple Queries 84 | ....................... 85 | 86 | Some queries require transformations or comparisons that go beyond the basic operators like ``==``, ``<``, or ``>``. To this end, TinyFlux supports the following queries: 87 | 88 | 89 | **.map(...) <--> Arbitrary Transform Functions for All Query Types** 90 | 91 | The ``map()`` method will transform the tag/field value, which will be compared against the right-hand side value from the query. 92 | 93 | >>> # Get all points with a even value for 'number_of_pedals'. 94 | >>> def mod2(value): 95 | ... return value % 2 96 | >>> Field = FieldQuery() 97 | >>> db.search(Field.number_of_pedals.map(mod2) == 0) 98 | 99 | or: 100 | 101 | >>> # Get all points with a measurement starting with the letter "a". 102 | >>> def get_first_letter(value): 103 | ... return value[0] 104 | >>> Measurement = MeasurementQuery() 105 | >>> db.search(Measurement.map(get_first_letter) == "a") 106 | 107 | .. warning:: 108 | 109 | Resist the urge to build your own time range query using the ``.map()`` query method. This will result in slow queries. Instead, use two ``TimeQuery`` instances combined with the ``&`` or ``|`` operator. 110 | 111 | 112 | **.test(...) <--> Arbitrary Test Functions for All Query Types** 113 | 114 | The ``test()`` method will transform and test the tag/field value for truthiness, with no right-hand side value necessary. 115 | 116 | >>> # Get all points with a even value for 'number_of_pedals'. 117 | >>> def is_even(value): 118 | ... return value % 2 == 0 119 | >>> Field = FieldQuery() 120 | >>> db.search(Field.number_of_pedals.test(is_even)) 121 | 122 | or: 123 | 124 | >>> # Get all points with a measurement starting with the letter "a". 125 | >>> def starts_with_a(value): 126 | ... return value.startswith("a") 127 | >>> Measurement = MeasurementQuery() 128 | >>> db.search(Measurement.test(starts_with_a)) 129 | 130 | 131 | **.exists() <--> Existence of Tag Key or Field Key** 132 | 133 | This applies to ``TagQuery`` and ``FieldQuery`` only. 134 | 135 | >>> Field, Tag = TagQuery(), FieldQuery() 136 | >>> db.search(Tag.user_name.exists()) 137 | >>> db.search(Field.age.exists()) 138 | 139 | 140 | **.matches(...) and .search(...) <--> Regular Expression Queries for Measurements and Tag Values** 141 | 142 | RegEx queries that apply to ``MeasurementQuery`` and ``TagQuery`` only. 143 | 144 | >>> # Get all points with a user name containing "john", case-invariant. 145 | >>> Tag = TagQuery() 146 | >>> db.search(Tag.user_name.matches('.*john.*', flags=re.IGNORECASE)) 147 | 148 | 149 | Compound Queries and Query Modifiers 150 | .................................... 151 | 152 | TinyFlux also allows supports compound queries through the use of logical operators. This is particularly useful for time queries when a time range is needed. 153 | 154 | >>> from tinyflux import TimeQuery 155 | >>> from datetime import datetime, timezone 156 | >>> Time = TimeQuery() 157 | >>> q1 = Time > datetime(1990, 1, 1, tzinfo=timezone.utc) 158 | >>> q2 = Time < datetime(2020, 1, 1, tzinfo=timezone.utc) 159 | >>> db.search(q1 & q2) 160 | 161 | The three supported logical operators are **logical-and**, **logical-or**, and **logical-not**. 162 | 163 | Logical AND ("&") 164 | ^^^^^^^^^^^^^^^^^ 165 | 166 | >>> # Logical AND: 167 | >>> Time = TimeQuery() 168 | >>> t1 = datetime(2010, 1, 1, tzinfo=timezone.utc) 169 | >>> t2 = datetime(2020, 1, 1, tzinfo=timezone.utc) 170 | >>> db.search((Time >= t1) & (Time < t2)) # Get all points in 2010's. 171 | 172 | Logical OR ("|") 173 | ^^^^^^^^^^^^^^^^ 174 | 175 | >>> # Logical OR: 176 | >>> db.search((Time < t1) | (Time > t2)) # Get all points outside 2010's. 177 | 178 | Logical NOT ("~") 179 | ^^^^^^^^^^^^^^^^^ 180 | 181 | >>> # Negate a query: 182 | >>> Tag = TagQuery() 183 | >>> db.search(~(Tag.city == 'LA')) # Get all points whose city is not "LA". 184 | 185 | .. hint:: 186 | 187 | When using ``&`` or ``|``, make sure you wrap your queries on both sides with parentheses or Python will confuse the syntax. 188 | 189 | Also, when using negation (``~``) you'll have to wrap the query you want to negate in parentheses. 190 | 191 | While not aesthetically pleasing to the eye, the reason for these parenthesis is that Python's binary operators (``&``, ``|``, and ``~``) have a higher operator precedence than comparison operators (``==``, ``>``, etc.). For this reason, syntax like ``~User.name == 'John'`` is parsed by Python as ``(~User.name) == 'John'`` which will throw an exception. See the Python `docs on operator precedence 192 | `_ for details. 193 | 194 | .. note:: 195 | 196 | You **cannot** use ``and`` as a substitute for ``&``, ``or`` as a substitute for ``|``, or ``not`` as a substitute for ``~``. The ``and``, ``or``, and ``not`` keywords are reserved in Python and cannot be overridden, as the ``&``, ``|``, and ``~`` operators have been for TinyFlux queries. 197 | 198 | 199 | The query and search operations covered above: 200 | 201 | +-------------------------------------------------+------------------------------------------------------------------+ 202 | | **Simple Queries** | 203 | +-------------------------------------------------+------------------------------------------------------------------+ 204 | | ``MeasurementQuery() == my_measurement`` | Match any Point with the measurement ``my_measurement`` | 205 | +-------------------------------------------------+------------------------------------------------------------------+ 206 | | ``TimeQuery() < my_time_value`` | Match any Point with a timestamp prior to ``my_time_value`` | 207 | +-------------------------------------------------+------------------------------------------------------------------+ 208 | | ``TagQuery().my_tag_key == my_tag_value`` | Matches any Point with a tag key of ``my_tag_key`` mapping to | 209 | | | a tag value of ``my_tag_value`` | 210 | +-------------------------------------------------+------------------------------------------------------------------+ 211 | | ``FieldQuery().my_field_key == my_field_value`` | Matches any Point with a field key of ``my_field_key`` mapping | 212 | | | to a field value of ``my_field_value`` | 213 | +-------------------------------------------------+------------------------------------------------------------------+ 214 | | **Advanced Simple Queries** | 215 | +-------------------------------------------------+------------------------------------------------------------------+ 216 | | ``FieldQuery().my_field.exists()`` | Match any Point where a field called ``my_field`` exists | 217 | +-------------------------------------------------+------------------------------------------------------------------+ 218 | | ``FieldQuery().my_field.map()`` | Transform and tag or field value for comparison to a | 219 | | | right-hand side value. | 220 | +-------------------------------------------------+------------------------------------------------------------------+ 221 | | ``FieldQuery().my_field.test(func, *args)`` | Matches any Point for which the function returns | 222 | | | ``True`` | 223 | +-------------------------------------------------+------------------------------------------------------------------+ 224 | | ``FieldQuery().my_field.matches(regex)`` | Match any Point with the whole field matching the | 225 | | | regular expression | 226 | +-------------------------------------------------+------------------------------------------------------------------+ 227 | | ``FieldQuery().my_field.search(regex)`` | Match any Point with a substring of the field matching | 228 | | | the regular expression | 229 | +-------------------------------------------------+------------------------------------------------------------------+ 230 | | **Compound Queries and Query Modifiers** | 231 | +-------------------------------------------------+------------------------------------------------------------------+ 232 | | ``~(query)`` | Match Points that don't match the query | 233 | +-------------------------------------------------+------------------------------------------------------------------+ 234 | | ``(query1) & (query2)`` | Match Points that match both queries | 235 | +-------------------------------------------------+------------------------------------------------------------------+ 236 | | ``(query1) | (query2)`` | Match Points that match at least one of the queries | 237 | +-------------------------------------------------+------------------------------------------------------------------+ 238 | -------------------------------------------------------------------------------- /docs/source/removing-data.rst: -------------------------------------------------------------------------------- 1 | Removing Points 2 | =============== 3 | 4 | TinyFlux supports the removal of points with two methods. To remove by query, the ``remove()`` method is provided, and to remove all, use the ``remove_all()`` method. See below for examples. 5 | 6 | .. note:: 7 | 8 | If you are a developer, or are otherwise interested in how TinyFlux performs deletes behind the scenes, see the :doc:`design-principles` page. 9 | 10 | The following will remove all points with the measurement value of "US Metros": 11 | 12 | >>> Measurement = MeasurementQuery() 13 | >>> db.remove(Measurement == "US Metros") 14 | 15 | The following is an example of a manual time-based eviction. 16 | 17 | >>> from datetime import datetime, timedelta, timezone 18 | >>> Time = TimeQuery() 19 | >>> t = datetime.now(timezone.utc) - timedelta(days=7) 20 | >>> # Remove all points older that seven days. 21 | >>> db.remove(Time < t) 22 | 23 | To remove everything in the database , invoke ``remove_all()``: 24 | 25 | >>> db.remove_all() 26 | 27 | .. warning:: 28 | 29 | Like all other operations in TinyFlux, you cannot roll back the actions of ``remove()`` or ``remove_all()``. There is no confirmation step, no access-control mechanism that prevents non-admins from performing this action, nor are there automatic snapshots stored anywhere. If you need these kinds of features, TinyFlux is not for you. 30 | 31 | 32 | To recap, these are the two methods supporting the removal of data. 33 | 34 | +------------------------+-----------------------------------------------+ 35 | | **Methods** | 36 | +------------------------+-----------------------------------------------+ 37 | | ``db.remove(query)`` | Remove any point matching the input query. | 38 | +------------------------+-----------------------------------------------+ 39 | | ``db.remove_all()`` | Remove all points. | 40 | +------------------------+-----------------------------------------------+ 41 | 42 | Removing Tags and Fields 43 | ======================== 44 | 45 | TinyFlux supports the removal of individual tag and field key/values through the `unset_tags` and `unset_fields` arguments to `.update()` and `.update_all()`. The values can be either individual strings, or lists of strings. See below for examples. 46 | 47 | The following will remove all tags with the key of "city" from the database: 48 | 49 | >>> db.update_all(unset_tags="city") 50 | 51 | The following will remove all tags with the keys of "state" and "country" from the database: 52 | 53 | >>> db.update_all(unset_tags=["state", "country"]) 54 | 55 | The following will remove all tags with the key of "temperature" from all Points in the "bedroom" measurement: 56 | 57 | >>> db.update(MeasurementQuery() == "bedroom", unset_tags=["temperature"]) 58 | 59 | .. warning:: 60 | 61 | Like all other operations in TinyFlux, you cannot roll back the actions of ``update()`` or ``update_all()``. There is no confirmation step, no access-control mechanism that prevents non-admins from performing this action, nor are there automatic snapshots stored anywhere. If you need these kinds of features, TinyFlux is not for you. 62 | 63 | 64 | To recap, these are the two methods supporting the removal of individual tags and fields from points. 65 | 66 | +------------------------------------------------------------+------------------------------------------------------------+ 67 | | **Methods** | 68 | +------------------------------------------------------------+------------------------------------------------------------+ 69 | | ``db.update(query, unset_tags=..., unset_fields=...)`` | Remove the tags and fields from points matching the query. | 70 | +------------------------------------------------------------+------------------------------------------------------------+ 71 | | ``db.update_all(query, unset_tags=..., unset_fields=...)`` | Remove specified tags and fields from all points. | 72 | +------------------------------------------------------------+------------------------------------------------------------+ -------------------------------------------------------------------------------- /docs/source/time.rst: -------------------------------------------------------------------------------- 1 | Timezones in TinyFlux 2 | --------------------- 3 | 4 | Timestamps going in and out of TinyFlux are of the Python ``datetime`` type. At the file storage layer, TinyFlux stores these timestamps as ISO formatted strings in UTC. For seasoned Python users, this will be a familiar practice, as they will already be using timezone aware datetime objects in all cases and used to converting to-and-from UTC. 5 | 6 | .. hint:: 7 | 8 | If you aren't already using timezone-aware datetime objects, there is no better time to start than now. 9 | 10 | .. hint:: 11 | 12 | TLDR: All timestamps should be input as timezone-aware datetime objects in the UTC timezone. If you need to keep information about the local timezone of the observation, store it as a tag. Skip to example 5 below for proper initialization. 13 | 14 | To illustrate the way time is handled in TinyFlux, below are the five ways time could potentially be initialized by a user. The fifth and final example is "best practice": 15 | 16 | 1. ``time`` is not set by the user when the Point is initialized so its default value is ``None``. AFTER it is inserted into TinyFlux, it is assigned a UTC timestamp corresponding to the time of insertion. 17 | 18 | >>> from tinyflux import Point, TinyFlux 19 | >>> db = TinyFlux("my_db.csv") # an empty db 20 | >>> p = Point() 21 | >>> p.time is None 22 | True 23 | >>> db.insert(p) 24 | >>> p.time 25 | datetime.datetime(2021, 10, 30, 13, 53, 552872, tzinfo=datetime.timezone.utc) 26 | 27 | 2. ``time`` is set with a value, but it is not a ``datetime`` object. TinyFlux raises an exception. 28 | 29 | >>> Point(time="2022-01-01") 30 | ValueError: Time must be datetime object. 31 | 32 | 3. ``time`` is set with a ``datetime`` object that is "timezone-naive". TinyFlux considers this time to be local to the timezone of the computer that is running TinyFlux and will convert this time to UTC using the ``astimezone`` attribute of the ``datetime`` module upon insertion. This will lead to confusion down the road if TinyFlux is running on a remote computer, or the user was annotating data for points corresponding to places in other timezones. 33 | 34 | >>> from datetime import datetime 35 | >>> # Example: Our computer is in California, but we are working with a dataset of 36 | >>> # air quality measurements for Beijing, China. 37 | >>> # Here, AQI was measured at 1pm local time in Beijing on Aug 28, 2021. 38 | >>> p = Point( 39 | ... time=datetime(2021, 8, 28, 13, 0), # 1pm, datetime-naive 40 | ... tags={"city": "beijing"}, 41 | ... fields={"aqi": 118} 42 | ... ) 43 | >>> p.time 44 | datetime.datetime(2021, 8, 28, 13, 0) 45 | >>> # Insert the point into the database. 46 | >>> db.insert(p) 47 | >>> # The point is cast to UTC, assuming the time was local to California, not Beijing. 48 | >>> p.time 49 | datetime.datetime(2021, 8, 28, 20, 0, tzinfo=datetime.timezone.utc) 50 | 51 | 52 | 4. ``time`` is set with a ``datetime`` object that is timezone-aware but the timezone is not UTC- TinyFlux casts the time to UTC for internal storage and retrieval and the original timezone is lost (it is up to the user to cast the timezone again after retrieval). 53 | 54 | >>> from tinyflux import Point, TinyFlux 55 | >>> from datetime import datetime 56 | >>> from zoneinfo import ZoneInfo 57 | >>> db = TinyFlux("my_db.csv") # an empty db 58 | >>> la_point = Point( 59 | ... time=datetime(2000, 1, 1, tzinfo=ZoneInfo("US/Pacific")), 60 | ... tags={"city": "Los Angeles"} 61 | ... fields={"temp_f": 54.0} 62 | ... ) 63 | >>> ny_point = Point( 64 | ... time=datetime(2000, 1, 1, tzinfo=ZoneInfo("US/Eastern")), 65 | ... tags={"city": "New York City"} 66 | ... fields={"temp_f": 15.0} 67 | ... ) 68 | >>> db.insert_multiple([la_point, ny_point]) 69 | >>> # Notice the time attributes no longer carry the timezone information: 70 | >>> la_point.time 71 | datetime.datetime(2000, 1, 1, 8, 0, tzinfo=datetime.timezone.utc) 72 | >>> ny_point.time 73 | datetime.datetime(2000, 1, 1, 5, 0, tzinfo=datetime.timezone.utc) 74 | 75 | .. hint:: 76 | 77 | If you need to keep the original, non-UTC timezone along with the dataset, consider adding a ``tag`` to your point indicating the timezone, for easier conversion after retrieval. TinyFlux will not assume nor attempt to store the timezone of your data for you. 78 | 79 | 5. ``time`` is set with a ``datetime`` object that is timezone-aware and the timezone is UTC. This is the easiest way to handle time. If needed, information about the timezone is stored in a tag. 80 | 81 | >>> from datetime import datetime, timezone 82 | >>> from tinyflux import TinyFlux, Point 83 | >>> from zoneinfo import ZoneInfo 84 | >>> # Time now is 10am in Los Angeles, which is 6pm UTC: 85 | >>> t = datetime.now(timezone.utc) 86 | >>> t 87 | datetime.datetime(2022, 11, 9, 18, 0, 0, tzinfo=datetime.timezone.utc) 88 | >>> # Store the time in UTC, but keep the timezone as a tag for later use. 89 | >>> p = Point( 90 | ... time=t, 91 | ... tags={"room": "bedroom", "timezone": "America/Los_Angeles"}, 92 | ... fields={"temp": 72.0} 93 | ... ) 94 | >>> # Time is still UTC: 95 | >>> p.time 96 | datetime.datetime(2022, 11, 9, 18, 0, 0, tzinfo=datetime.timezone.utc) 97 | >>> # To cast back to local time in Los Angeles: 98 | >>> la_timezone = ZoneInfo(p.tags["timezone"]) 99 | >>> p.time.astimezone(la_timezone) 100 | datetime.datetime(2022, 11, 9, 10, 0, tzinfo=zoneinfo.ZoneInfo(key='America/Los_Angeles')) -------------------------------------------------------------------------------- /docs/source/tinyflux.rst: -------------------------------------------------------------------------------- 1 | TinyFlux API 2 | ============ 3 | 4 | See :doc:`getting-started` to get TinyFlux up and running with writing and querying data. 5 | 6 | Jump to an API section: 7 | 8 | * :ref:`TinyFlux Database API` 9 | * :ref:`Point API` 10 | * :ref:`Queries API` 11 | * :ref:`Measurement API` 12 | * :ref:`Index API` 13 | * :ref:`Storages API` 14 | * :ref:`Utils API` 15 | 16 | |hr| 17 | 18 | TinyFlux Database API 19 | --------------------- 20 | 21 | .. automodule:: tinyflux.database 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | 26 | |hr| 27 | 28 | Point API 29 | --------- 30 | 31 | .. automodule:: tinyflux.point 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | |hr| 37 | 38 | Queries API 39 | ----------- 40 | 41 | .. automodule:: tinyflux.queries 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | |hr| 47 | 48 | Measurement API 49 | --------------- 50 | 51 | .. automodule:: tinyflux.measurement 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | 56 | |hr| 57 | 58 | Index API 59 | --------- 60 | 61 | .. automodule:: tinyflux.index 62 | :members: 63 | :undoc-members: 64 | :show-inheritance: 65 | 66 | |hr| 67 | 68 | Storages API 69 | ------------ 70 | 71 | .. automodule:: tinyflux.storages 72 | :members: 73 | :undoc-members: 74 | :show-inheritance: 75 | 76 | |hr| 77 | 78 | Utils API 79 | --------- 80 | 81 | .. automodule:: tinyflux.utils 82 | :members: 83 | :undoc-members: 84 | :show-inheritance: 85 | 86 | 87 | .. |hr| raw:: html 88 | 89 |
-------------------------------------------------------------------------------- /docs/source/tips.rst: -------------------------------------------------------------------------------- 1 | Tips for TinyFlux 2 | ================= 3 | 4 | Below are some tips to get the most out of TinyFlux. 5 | 6 | 7 | Saving Space 8 | ^^^^^^^^^^^^ 9 | 10 | If you are using a text-based storage layer (such as the default ``CSVStorage``) keep in mind that every character requires usually one (but up to four) bytes of memory for storage in a UTF-8 encoding. To save space, here are a few tips: 11 | 12 | • Keep measurement names, tag keys, and field keys short and concise. 13 | • Precision matters! Even more so with text-backed storage. ``1.0000`` requires twice as much space to store compared to ``1.0``, and 5x more space than ``1``. 14 | • When inserting points into TinyFlux, make sure to set the ``compact_key_prefixes`` option to ``True`` (e.g. ``db.insert(my_point, compact_key_prefixes=True)``). This saves three bytes per tag key/value pair and five bytes per field key/value pair. 15 | 16 | If your dataset is approaching 1 GB in size, keep reading. 17 | 18 | 19 | Dealing with Growing Datasets 20 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 21 | 22 | As concurrency is not a feature of TinyFlux, a growing database will incur increases in query and index-building times. When queries start to slow down a workflow, it might be time to "shard" or de-normalize the data, or simply upgrade to a database server like InfluxDB. 23 | 24 | For example, if a TinyFlux database currently holds Points for two separate measurements, consider making two separate databases, one for each measurement: 25 | 26 | >>> from tinyflux import TinyFlux, Point, MeasurementQuery 27 | >>> from datetime import datetime, timedelta, timezone 28 | >>> db = TinyFlux("my_big_db.csv") # a growing db with two measurements 29 | >>> db.count(MeasurementQuery() == "measurement_1") 30 | 70000 31 | >>> db.count(MeasurementQuery() == "measurement_2") 32 | 85000 33 | >>> new_db = TinyFlux("my_new_single_measurement_db.csv") # a new empty db 34 | >>> for point in db: 35 | >>> if point.measurement == "measurement_2": 36 | >>> new_db.insert(point) 37 | >>> db.remove(MeasurementQuery() == "measurement_2") 38 | 85000 39 | >>> len(db) 40 | 70000 41 | >>> len(new_db) 42 | 85000 43 | 44 | .. hint:: 45 | 46 | When queries and indexes slow down a workflow, consider creating separate databases. Or, just migrate to InfluxDB. 47 | 48 | 49 | Optimizing Queries 50 | ^^^^^^^^^^^^^^^^^^ 51 | 52 | Unlike TinyDB, TinyFlux never pulls in the entirety of its data into memory (unless the ``.all()`` method is called). This has the benefit of reducing the memory footprint of the database, but means that database operations are usually I/O bound. By using an index, TinyFlux is able to construct a matching set of items from the storage layer without actually reading any of those items. For database operations that return Points, TinyFlux iterates over the storage, collects the items that belong in the set, deserializes them, and finally returns them to the caller. 53 | 54 | This ultimately means that the smaller the set of matches, the less I/O TinyFlux must perform. 55 | 56 | .. hint:: 57 | 58 | Queries that return smaller sets of matches perform best. 59 | 60 | .. warning:: 61 | 62 | Resist the urge to build your own time range query using the ``.map()`` query method. This will result in slow queries. Instead, use two ``TimeQuery`` instances combined with the ``&`` or ``|`` operator. 63 | 64 | 65 | Keeping The Index Intact 66 | ^^^^^^^^^^^^^^^^^^^^^^^^ 67 | 68 | TinyFlux must build an index when it is initialized as it currently does not save the index upon closing. If the workflow for the session is read-only, then the index state will never be modified. If, however, a TinyFlux session consists of a mix of writes and reads, then the index will become invalid if at any time, a Point is inserted out of time order. 69 | 70 | >>> from tinyflux import TinyFlux, Point 71 | >>> from datetime import datetime, timedelta, timezone 72 | >>> db = TinyFlux("my_db.csv") 73 | >>> t = datetime.now(timezone.utc) # current time 74 | >>> db.insert(Point(time=t)) 75 | >>> db.index.valid 76 | True 77 | >>> db.insert(Point(time=t - timedelta(hours=1))) # a Point out of time order 78 | >>> db.index.valid 79 | False 80 | 81 | If ``auto-index`` is set to ``True`` (the default setting), then the next read will rebuild the index, which may just seem like a very slow query. For smaller datasets, re-indexing is usually not noticeable. 82 | 83 | .. hint:: 84 | 85 | If possible, Points should be inserted into TinyFlux in time-order. 86 | -------------------------------------------------------------------------------- /docs/source/updating-data.rst: -------------------------------------------------------------------------------- 1 | Updating Points 2 | =============== 3 | 4 | Though updating time series data tends to occur much less frequently than with other types of data, TinyFlux nonetheless supports the updating of data with two methods: 1. Update by query with the ``update()`` method, and 2. Update all points with the ``update_all()`` method. ``measurement``, ``time``, ``tags``, and/or ``fields`` are updated on an individual basis through the associated keyword arguments to these two methods. The values for these arguments are either static values (like a string, float, integer, or boolean), or a ``Callable`` returning static values. See below for examples. 5 | 6 | .. note:: 7 | 8 | If you are a developer, or are otherwise interested in how TinyFlux performs updates behind the scenes, see the :doc:`design-principles` page. 9 | 10 | To update individual points in TinyFlux, first provide a query to the ``update()`` method, followed by one or more attributes to update and their values as keyword arguments. For example, to update the measurement names in the database for all points whose measurement value is "cities" to "US Metros", use a static value to the ``measurement`` keyword argument: 11 | 12 | >>> Measurement = MeasurementQuery() 13 | >>> db.update(Measurement == "cities", measurement="US Metros") 14 | 15 | To update all timestamps for the measurement "US Metros" to be shifted backwards in time by one year, use a callable as the ``time`` keyword argument instead of a static value: 16 | 17 | >>> from datetime import timedelta 18 | >>> Measurement = MeasurementQuery() 19 | >>> db.update(Measurement == "US Metros", time=lambda x: x - timedelta(days=365)) 20 | 21 | To change all instances of "CA" to "California" in a point's tag set for the "US Metros" measurement: 22 | 23 | >>> Measurement = MeasurementQuery() 24 | >>> def california_updater(tags): 25 | ... if "state" in tags and tags["state"] == "CA": 26 | ... return {**tags, "state": "California"} 27 | ... else: 28 | ... return tags 29 | >>> db.update(Measurement == "US Metros", tags=california_updater) 30 | 31 | Field updates occur much the same way as tags. To update all items in the database, use ``update_all()``. For example, to convert all temperatures from Fahrenheit to Celsius if the field ``temp`` exists: 32 | 33 | >>> def fahrenheit_to_celsius(fields): 34 | ... if "temp" in fields: 35 | ... temp_f = fields["temp"] 36 | ... temp_c = (temp_f - 32.0) * (5/9) 37 | ... return {**fields, "temp": temp_c} 38 | ... else: 39 | ... return fields 40 | >>> db.update_all(fields=fahrenheit_to_celsius) 41 | 42 | .. note:: 43 | 44 | Updating data with `.update()` or `.update_all()` through the `tags` or `fields` arguments will not remove tags or fields, even if they are not returned when using a Callable as the updater. This is consistent with the Python `dict API `_, in which keys can be overwritten, but not deleted. To remove tags and fields completely, see :ref:`Removing Tags and Fields with Update` below. 45 | 46 | .. warning:: 47 | 48 | Like all other operations in TinyFlux, you cannot roll back the actions of ``update()`` or ``update_all()``. There is no confirmation step, no access-control mechanism that prevents non-admins from performing this action, nor are there automatic snapshots stored anywhere. If you need these kinds of features, TinyFlux is not for you. 49 | 50 | to recap, these are the two methods supporting the updating of data. 51 | 52 | +------------------------------------------+-----------------------------------------------------+ 53 | | **Methods** | 54 | +------------------------------------------+-----------------------------------------------------+ 55 | | ``db.update(query, ...)`` | Update any point matching the input query. | 56 | +------------------------------------------+-----------------------------------------------------+ 57 | | ``db.update_all(...)`` | Update all points. | 58 | +------------------------------------------+-----------------------------------------------------+ 59 | 60 | Removing Tags and Fields with Update 61 | ------------------------------------ 62 | 63 | TinyFlux supports the removal of individual tag and field key/values through the `unset_tags` and `unset_fields` arguments to `.update()` and `.update_all()`. The values can be either individual strings, or lists of strings. See below for examples. 64 | 65 | The following will remove all tags with the key of "city" from the database: 66 | 67 | >>> db.update_all(unset_tags="city") 68 | 69 | The following will remove all tags with the keys of "state" and "country" from the database: 70 | 71 | >>> db.update_all(unset_tags=["state", "country"]) 72 | 73 | The following will remove all tags with the key of "temperature" from all Points in the "bedroom" measurement: 74 | 75 | >>> db.update(MeasurementQuery() == "bedroom", unset_tags=["temperature"]) 76 | 77 | .. warning:: 78 | 79 | Like all other operations in TinyFlux, you cannot roll back the actions of ``update()`` or ``update_all()``. There is no confirmation step, no access-control mechanism that prevents non-admins from performing this action, nor are there automatic snapshots stored anywhere. If you need these kinds of features, TinyFlux is not for you. 80 | 81 | 82 | To recap, these are the two methods supporting the removal of individual tags and fields from points. 83 | 84 | +------------------------------------------------------------+------------------------------------------------------------+ 85 | | **Methods** | 86 | +------------------------------------------------------------+------------------------------------------------------------+ 87 | | ``db.update(query, unset_tags=..., unset_fields=...)`` | Remove the tags and fields from points matching the query. | 88 | +------------------------------------------------------------+------------------------------------------------------------+ 89 | | ``db.update_all(query, unset_tags=..., unset_fields=...)`` | Remove specified tags and fields from all points. | 90 | +------------------------------------------------------------+------------------------------------------------------------+ -------------------------------------------------------------------------------- /docs/source/writing-data.rst: -------------------------------------------------------------------------------- 1 | Writing Data 2 | ============ 3 | 4 | The standard method for inserting a new data point is through the ``db.insert(...)`` method. To insert more than one Point at the same time, use the ``db.insert_multiple([...])`` method, which accepts a ``list`` of points. This might be useful when creating a TinyFlux database from a CSV of existing observations. 5 | 6 | .. hint:: 7 | 8 | To save space in text-based storage instances (including ``CSVStorage``), set the ``compact_key_prefixes`` argument to ``true`` in the ``.insert()`` and ``.insert_multiple()`` methods. This will result in the tag and field keys having a shorter ``t_`` and ``f_`` prefix in front of them in the storage layer rather than the default ``__tag__`` and ``__field__`` prefixes. Regardless of your choice, TinyFlux will handle Points with either prefix in the database. 9 | 10 | .. note:: 11 | 12 | **TinyFlux vs. TinyDB Alert!** 13 | 14 | In TinyDB there is a serious performance reason to use ``db.insert_multiple([...])`` over ``db.insert(...)`` as every write in TinyDB is preceded by a full read of the data. TinyFlux inserts are *append-only* and are **not** preceded by a read. Therefore, there is no significant *performance* reason to use ``db.insert_multiple([...])`` instead of ``db.insert(...)``. If you are using TinyFlux to capture real-time data, you should insert points into TinyFlux as you see them, with ``db.insert(...)``. 15 | 16 | Example: 17 | 18 | >>> from tinyflux import Point 19 | >>> p = Point( 20 | ... measurement="air quality", 21 | ... time=datetime.fromisoformat("2020-08-28T00:00:00-07:00"), 22 | ... tags={"city": "LA"}, 23 | ... fields={"aqi": 112} 24 | ... ) 25 | >>> db.insert(p) 26 | 27 | To recap, these are the two methods supporting the insertion of data. 28 | 29 | +------------------------------------------------------------------+-----------------------------------------------------+ 30 | | **Methods** | 31 | +------------------------------------------------------------------+-----------------------------------------------------+ 32 | | ``db.insert(point, compact_key_prefixes=False)`` | Insert one Point into the database. | 33 | +------------------------------------------------------------------+-----------------------------------------------------+ 34 | | ``db.insert_multiple([point, ...], compact_key_prefixes=False)`` | Insert multiple Points into the database. | 35 | +------------------------------------------------------------------+-----------------------------------------------------+ 36 | -------------------------------------------------------------------------------- /examples/1_initializing_and_loading_new_db.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "c3c62190", 6 | "metadata": {}, 7 | "source": [ 8 | "# Loading Data into TinyFlux\n", 9 | "\n", 10 | "This notebook demonstrates how to build a TinyFlux database from a CSV and a JSON datasource.\n", 11 | "\n", 12 | "AQI data comes from the [EPA website](https://aqs.epa.gov/aqsweb/airdata/download_files.html\n", 13 | ") and CBSA location data comes from the [US Census Bureau](https://catalog.data.gov/dataset/tiger-line-shapefile-2019-nation-u-s-current-metropolitan-statistical-area-micropolitan-statist)." 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "id": "2d2a2159", 19 | "metadata": {}, 20 | "source": [ 21 | "### Import TinyFlux" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "id": "3102f5df", 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "import csv\n", 32 | "from datetime import datetime\n", 33 | "import json\n", 34 | "from zoneinfo import ZoneInfo\n", 35 | "\n", 36 | "from tinyflux import TinyFlux, Point" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "id": "e2dac875", 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# Files.\n", 47 | "INPUT_FILE = \"example_data/daily_aqi_by_cbsa_ca_2019-2020.csv\"\n", 48 | "DB_FILE = \"example_data/ca_aqi_2019-2020.tinyflux\"" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "id": "b3d97fa9", 54 | "metadata": {}, 55 | "source": [ 56 | "### Read CSV into Memory" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 3, 62 | "id": "b1251667", 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "Csv loaded, 24608 records.\n", 70 | "\n", 71 | "Column names: CBSA, CBSA Code, Date, AQI, Category, Defining Parameter, Defining Site, Number of Sites Reporting\n", 72 | "\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "col_names = []\n", 78 | "data = []\n", 79 | "\n", 80 | "with open(INPUT_FILE) as f:\n", 81 | " r = csv.reader(f)\n", 82 | " for i, row in enumerate(r):\n", 83 | " if i == 0:\n", 84 | " col_names = row\n", 85 | " else:\n", 86 | " data.append(row)\n", 87 | "\n", 88 | "print(f\"Csv loaded, {len(data)} records.\\n\")\n", 89 | "print(f'Column names: {\", \".join(col_names)}\\n')" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "id": "bb86a521", 95 | "metadata": {}, 96 | "source": [ 97 | "### Initialize TinyFlux DB." 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 4, 103 | "id": "d187ed07", 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "db = TinyFlux(DB_FILE)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "id": "85c08fbb", 113 | "metadata": {}, 114 | "source": [ 115 | "### Initialize TinyFlux Points" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 5, 121 | "id": "160207a0", 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "points = []\n", 126 | "\n", 127 | "for row in data:\n", 128 | " # Measurement name, a string.\n", 129 | " measurement = \"aqi\"\n", 130 | " \n", 131 | " # Datetime object that is \"timezone-aware\".\n", 132 | " ts_naive = datetime.strptime(row[2], \"%Y-%m-%d\")\n", 133 | " ts_aware = ts_naive.replace(tzinfo=ZoneInfo(\"US/Pacific\"))\n", 134 | "\n", 135 | " # Tags as a dict of string/string key values.\n", 136 | " tags = {\n", 137 | " \"cbsa\": str(row[0]),\n", 138 | " \"cbsa_code\": str(row[1]),\n", 139 | " \"category\": str(row[4]),\n", 140 | " \"defining_parameter\": str(row[5]),\n", 141 | " \"defining_site\": str(row[6]),\n", 142 | " }\n", 143 | " \n", 144 | " # Fields as a dict of string/numeric key values.\n", 145 | " fields = {\n", 146 | " \"aqi\": int(row[3]),\n", 147 | " \"number_of_sites_reporting\": int(row[7]),\n", 148 | " }\n", 149 | "\n", 150 | " # Initialize the Point with the above attributes.\n", 151 | " p = Point(\n", 152 | " measurement=measurement,\n", 153 | " time=ts_aware,\n", 154 | " tags=tags,\n", 155 | " fields=fields,\n", 156 | " )\n", 157 | " \n", 158 | " points.append(p)" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "id": "1b22537e", 164 | "metadata": {}, 165 | "source": [ 166 | "### Insert Points into TinyFlux" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 6, 172 | "id": "b6b0363c", 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "data": { 177 | "text/plain": [ 178 | "24608" 179 | ] 180 | }, 181 | "execution_count": 6, 182 | "metadata": {}, 183 | "output_type": "execute_result" 184 | } 185 | ], 186 | "source": [ 187 | "db.insert_multiple(points)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 7, 193 | "id": "4c8de8c5", 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "name": "stdout", 198 | "output_type": "stream", 199 | "text": [ 200 | "TinyFlux database ca_aqi_2019-2020.tinyflux has 24608 records.\n" 201 | ] 202 | } 203 | ], 204 | "source": [ 205 | "print(f\"TinyFlux database {DB_FILE} has {len(db)} records.\")" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "id": "1ba0c734", 211 | "metadata": {}, 212 | "source": [ 213 | "## Read Geospatial Data" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "id": "77d09adb", 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "data = json.load(open(\"example_data/cbsa_ca_2019.geojson\"))" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "id": "f26f360c", 229 | "metadata": {}, 230 | "source": [ 231 | "### Insert geospatial data" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 8, 237 | "id": "8ad2b456", 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "for feature in data['features']:\n", 242 | " # Measurement name, a string.\n", 243 | " measurement = \"locations\"\n", 244 | "\n", 245 | " # Datetime object that is \"timezone-aware\".\n", 246 | " ts = datetime(2019, 1, 1, tzinfo=ZoneInfo(\"US/Pacific\"))\n", 247 | "\n", 248 | " # Tags as a dict of string/string key values.\n", 249 | " tags = {\n", 250 | " \"cbsa\": feature['properties']['NAME'],\n", 251 | " \"cbsa_code\": feature['properties']['CBSAFP'],\n", 252 | " }\n", 253 | " \n", 254 | " # Fields as a dict of string/numeric key values.\n", 255 | " fields = {\n", 256 | " \"latitude\": feature['geometry']['coordinates'][1],\n", 257 | " \"longitude\": feature['geometry']['coordinates'][0],\n", 258 | " }\n", 259 | "\n", 260 | " # Initialize the Point with the above attributes.\n", 261 | " p = Point(\n", 262 | " measurement=measurement,\n", 263 | " time=ts_aware,\n", 264 | " tags=tags,\n", 265 | " fields=fields,\n", 266 | " )\n", 267 | " \n", 268 | " db.insert(p)" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 9, 274 | "id": "70267fab", 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "name": "stdout", 279 | "output_type": "stream", 280 | "text": [ 281 | "TinyFlux database ca_aqi_2019-2020.tinyflux has 24642 records.\n" 282 | ] 283 | } 284 | ], 285 | "source": [ 286 | "print(f\"TinyFlux database {DB_FILE} has {len(db)} records.\")" 287 | ] 288 | } 289 | ], 290 | "metadata": { 291 | "kernelspec": { 292 | "display_name": "tinyflux-3.10.4", 293 | "language": "python", 294 | "name": "tinyflux-3.10.4" 295 | }, 296 | "language_info": { 297 | "codemirror_mode": { 298 | "name": "ipython", 299 | "version": 3 300 | }, 301 | "file_extension": ".py", 302 | "mimetype": "text/x-python", 303 | "name": "python", 304 | "nbconvert_exporter": "python", 305 | "pygments_lexer": "ipython3", 306 | "version": "3.10.4" 307 | } 308 | }, 309 | "nbformat": 4, 310 | "nbformat_minor": 5 311 | } 312 | -------------------------------------------------------------------------------- /examples/3_iot_datastore_with_mqtt.py: -------------------------------------------------------------------------------- 1 | r"""An example of using TinyFlux as an IOT datastore for MQTT messages. 2 | 3 | To test this script, you must be able to publish to the test Mosquitto MQTT 4 | broker, which is a free broker running at test.mosquitto.org. 5 | 6 | Port 1883 is unencrypted and unauthenticated, so it should only be used for 7 | test purposes. 8 | 9 | This script listens for messages being published to one topic on this broker. 10 | 11 | To download a Linux MQTT command line client to publish messages, use brew: 12 | 13 | $ brew install mosquitto 14 | 15 | In one terminal window/process, start this script: 16 | 17 | $ python 3_iot_datastore_with_mqtt.py 18 | 19 | You should see "Connecting to test.mosquitto.org... success.". 20 | 21 | In a second terminal window/process, copy and paste the following, which 22 | publishes a sample JSON encoded message to the test MQTT broker: 23 | 24 | $ mosquitto_pub \ 25 | -h test.mosquitto.org \ 26 | -t tinyflux_test_topic \ 27 | -m "{\"device\":\"thermostat\",\"temperature\":70.0,\"humidity\":0.25}" 28 | 29 | This multi-threaded approach to logging MQTT messages comes from Steve Cope's 30 | "Logging MQTT Sensor Data to SQLite DataBase With Python", available at 31 | http://www.steves-internet-guide.com. 32 | 33 | Author: 34 | Justin Fung (justincaseyfung@gmail.com) 35 | """ 36 | 37 | from datetime import datetime, timezone 38 | from queue import Queue 39 | import json 40 | import threading 41 | 42 | import paho.mqtt.client as mqtt 43 | 44 | from tinyflux import TinyFlux, Point 45 | 46 | 47 | MQTT_HOST = "test.mosquitto.org" 48 | MQTT_PORT = 1883 49 | MQTT_KEEPALIVE = 60 50 | MQTT_TOPIC = "tinyflux_test_topic" 51 | 52 | TINYFLUX_DB = "my_tinyflux_mqtt_database.db" 53 | 54 | # TinyFlux DB. 55 | db = TinyFlux(TINYFLUX_DB) 56 | 57 | # Inter-thread queue. 58 | q: Queue = Queue() 59 | 60 | # Init but do not set a threading exit event for graceful exit. 61 | exit_event = threading.Event() 62 | 63 | 64 | def on_connect(client, *args): 65 | """Define the on_connect callback. 66 | 67 | Subscribes to the default topic after connection has been made. 68 | 69 | Args: 70 | client: A Paho MQTT client instance. 71 | """ 72 | # Log. 73 | print("Connection success.\n") 74 | 75 | # Subscribe to the topic of interest. 76 | client.subscribe(MQTT_TOPIC) 77 | 78 | # Log. 79 | print(f"Subscribed to '{MQTT_TOPIC}' and waiting for messages.\n") 80 | 81 | return 82 | 83 | 84 | def on_disconnect(_, __, rc): 85 | """Define the on_disconnect callback. 86 | 87 | See http://docs.oasis-open.org/mqtt/mqtt/v3.1.1/os/ 88 | mqtt-v3.1.1-os.html#_Toc398718035 89 | for return codes descriptions. 90 | 91 | Args: 92 | rc: the disconnection result 93 | """ 94 | # Log. 95 | if rc == 0: 96 | print("Disconnected .\n") 97 | else: 98 | print(f"Unexpected disconnection, return code {rc}.\n") 99 | 100 | return 101 | 102 | 103 | def on_message(_, __, msg): 104 | """Define callback for new message event. 105 | 106 | Un-marshalls the message and writes new data to the inter-thread queue. 107 | 108 | Args: 109 | msg: A Paho MQTT message object. 110 | """ 111 | # Un-marshall the message. 112 | topic = msg.topic 113 | payload = json.loads(msg.payload.decode("utf-8")) 114 | 115 | # Log. 116 | print(f'• Message received for topic "{topic}"... ', flush=True, end="") 117 | 118 | # Put the message data on the queue. 119 | q.put({"topic": topic, "payload": payload}) 120 | 121 | return 122 | 123 | 124 | def initialize_mqtt_client(): 125 | """Initialize and return the MQTT client. 126 | 127 | Returns: 128 | A Paho MQTT Client object. 129 | """ 130 | # Initialize the client. 131 | client = mqtt.Client() 132 | 133 | # Register callbacks. 134 | client.on_connect = on_connect 135 | client.on_message = on_message 136 | client.on_disconnect = on_disconnect 137 | 138 | return client 139 | 140 | 141 | def run_tinyflux_worker(): 142 | """Define the TinyFlux worker thread. 143 | 144 | Loops until the exit event is set. Pops from the inter-thread queue 145 | and writes to TinyFlux. 146 | """ 147 | # Loop until exit_event is set by main thread. 148 | while True: 149 | # Check the queue for new packets. 150 | if not q.empty(): 151 | # Unpack MQTT packet. 152 | data = q.get() 153 | topic = data["topic"] 154 | payload = data["payload"] 155 | 156 | try: 157 | device = payload["device"] 158 | temperature = payload["temperature"] 159 | humidity = payload["humidity"] 160 | 161 | # Initialize a TinyFlux Point. 162 | p = Point( 163 | time=datetime.now(timezone.utc), 164 | measurement=topic, 165 | tags={"device": device}, 166 | fields={"temperature": temperature, "humidity": humidity}, 167 | ) 168 | 169 | # Insert the Point into the DB. 170 | db.insert(p) 171 | 172 | print(f'write to "{TINYFLUX_DB}" successful!') 173 | except Exception as e: 174 | print(f"\n **Problem attempting to write: {e}") 175 | 176 | # Check for exit condition. 177 | if exit_event.is_set(): 178 | break 179 | 180 | return 181 | 182 | 183 | def main(): 184 | """Define main.""" 185 | # Log. 186 | print(f"Connecting to {MQTT_HOST}... ", flush=True, end="") 187 | 188 | # Initialize TinyFlux worker thread. 189 | t = threading.Thread(target=run_tinyflux_worker) 190 | 191 | # Start the worker thread. 192 | t.start() 193 | 194 | # Initialise MQTT CLIENT. 195 | client = initialize_mqtt_client() 196 | 197 | # Start MQTT network loop in a threaded interface to unblock main thread. 198 | client.loop_start() 199 | 200 | # Connect to the broker. 201 | client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE) 202 | 203 | # Keep this process running until SIGINT received. 204 | try: 205 | while True: 206 | pass 207 | except KeyboardInterrupt: 208 | print("\nExiting gracefully... ", flush=True, end="") 209 | 210 | # SIGINT received: set the exit event so the worker thread knows to exit. 211 | exit_event.set() 212 | 213 | # Await spawned thread. 214 | t.join() 215 | 216 | # Stop network loop. 217 | client.loop_stop() 218 | 219 | # Close db. 220 | db.close() 221 | 222 | # Log. 223 | print("done.") 224 | 225 | 226 | if __name__ == "__main__": 227 | main() 228 | -------------------------------------------------------------------------------- /examples/4_backing_up_tinyflux_at_the_edge.py: -------------------------------------------------------------------------------- 1 | """Example of using TinyFlux at the edge along with a backup job. 2 | 3 | This is a simple way to capture sensor data and backup in the same 4 | process. 5 | 6 | This backs up to influx, but the same principle applies to any 7 | datastore of your choosing. 8 | 9 | Requires TinyFlux, influxdb-client, and schedule, downloadable from pip. 10 | 11 | See https://github.com/dbader/schedule for documentation. 12 | """ 13 | 14 | from datetime import datetime 15 | import time 16 | 17 | from influxdb_client import InfluxDBClient, Point as InfluxPoint 18 | from influxdb_client.client.write_api import SYNCHRONOUS 19 | 20 | import schedule 21 | 22 | from tinyflux import TinyFlux, Point as TinyFluxPoint, TimeQuery 23 | 24 | # This is an arbitrary module for a device. 25 | from myThermometer import ThermSensor1 # type: ignore 26 | 27 | # Your device. 28 | sensor = ThermSensor1() 29 | 30 | # TinyFlux db. 31 | db = TinyFlux("my_db.tinyflux") 32 | 33 | # Remote influx instance and associated write API. 34 | client = InfluxDBClient( 35 | url="http://localhost:8086", 36 | token="my-token", 37 | org="my-org", 38 | ) 39 | write_api = client.write_api(write_options=SYNCHRONOUS) 40 | 41 | # Keep track of the last backup (initialize to Jan. 1, 1 AD). 42 | LAST_BACKUP_TIME = datetime(1, 1, 1) 43 | 44 | 45 | # The backup job. 46 | def backup_db(): 47 | """Backup TinyFlux db to remote Influx database.""" 48 | global LAST_BACKUP_TIME 49 | 50 | # Get all points since last backup. 51 | points_needing_backup = db.search(TimeQuery() > LAST_BACKUP_TIME) 52 | 53 | # Turn TinyFlux records into Influx records. 54 | influx_records = [] 55 | 56 | for i in points_needing_backup: 57 | p = InfluxPoint("my_measurement") 58 | 59 | for tag_key, tag_val in i.tags.items(): 60 | p = p.tag(tag_key, tag_val) 61 | 62 | for field_key, field_val in i.fields.items(): 63 | p = p.field(field_key, field_val) 64 | 65 | influx_records.append(p) 66 | 67 | # Write to Influx. 68 | try: 69 | write_api.write(bucket="my-bucket", record=influx_records) 70 | 71 | # Update backup time. 72 | LAST_BACKUP_TIME = datetime.now() 73 | except Exception as e: 74 | print(f"Error backing up to Influx: {e}") 75 | 76 | return 77 | 78 | 79 | # The frequency with which you execute the backup job. 80 | schedule.every(4).hours.do(backup_db) 81 | 82 | 83 | def main(): 84 | """Define Main.""" 85 | # Your control loop. 86 | while True: 87 | # Get the temperature from your device. 88 | temperature = sensor.get_temperature() 89 | 90 | # Make a Point and insert the reading into TinyFlux. 91 | p = TinyFluxPoint( 92 | measurement="my_measurement", 93 | tags={"room": "bedroom"}, 94 | fields={"temperature": temperature}, 95 | ) 96 | 97 | try: 98 | db.insert(p) 99 | except Exception as e: 100 | print(f"Error writing to DB: {e}") 101 | 102 | # Run the job that is scheduled above (if due). 103 | schedule.run_pending() 104 | 105 | # Sleep for your predefined sampling interval. 106 | time.sleep(1) 107 | 108 | 109 | if __name__ == "__main__": 110 | main() 111 | -------------------------------------------------------------------------------- /examples/README.rst: -------------------------------------------------------------------------------- 1 | TinyFlux Examples 2 | =================== 3 | 4 | This directory contains various examples of TinyFlux in action. While TinyFlux does not have any Python dependencies itself, most of the workflows that it is used in are accompanied by other Python libraries. The other libraries needed to run the examples contained in this directory are found in this directory's `requirements.txt` file. You may install them by ``cd``-ing into this directory and running: 5 | 6 | .. code-block:: bash 7 | 8 | $ pip install requirements.txt 9 | 10 | 11 | Example 1: Loading a TinyFlux DB from a CSV 12 | ------------------------------------------- 13 | 14 | ``examples/1_initializing_and_loading_new_db.ipynb`` 15 | 16 | This example demonstrates a common workflow using TinyFlux, which is to create and load a new TinyFlux database with data from an existing CSV. It demonstrates creating new Point objects with associated timezone-aware datetime objects and inserting them into the database. 17 | 18 | To run the example locally, you'll need to install `Jupyter Notebook `_ and start a iPython kernel. It's a simple process, follow along with the link. 19 | 20 | 21 | Example 2: Local Analytics Workflow with a TinyFlux Database 22 | ------------------------------------------------------------ 23 | 24 | ``examples/2_analytics_workflow.ipynb`` 25 | 26 | This example demonstrates how the TinyFlux database created in the previous example serves as the source-of-truth for a simple exploratory analysis, using the example of California Air Quality Index (AQI) measurements for the years 2019 and 2020. As this example is a comparative analysis of data across years, TinyFlux and other time-based data stores are a natural candidate for querying and storing the data. 27 | 28 | This example uses the beautiful `Plotly `_ library for charts and graphics, in addition to Jupyter Notebook. To install Plotly: 29 | 30 | .. code-block:: bash 31 | 32 | $ pip install plotly 33 | 34 | 35 | Example 3: TinyFlux as a MQTT Datastore for IOT Devices 36 | ------------------------------------------------------- 37 | 38 | ``examples/3_iot_datastore_with_mqtt.py`` 39 | 40 | This example demonstrates how TinyFlux can serve as the primary datastore for IOT devices sending data through the `MQTT `_ protocol. The script initializes an MQTT client that subscribes to a sample topic from a test MQTT broker running in the cloud. The client listens for messages and places them into a queue where a simple worker in a background thread picks up the messages and writes them to TinyFlux. 41 | 42 | To run this example locally, you'll need the `Python MQTT client `_ from Eclipse to serve as a bridge. You may use the same client to publish messages, though the command line `Mosquitto client `_--also from Eclipse--is the preferred method. To install Paho: 43 | 44 | .. code-block:: bash 45 | 46 | $ pip install paho-mqtt 47 | 48 | To install Mosquitto using ``brew``: 49 | 50 | .. code-block:: bash 51 | 52 | $ brew install mosquitto 53 | 54 | 55 | Example 4: TinyFlux at the Edge (with a backup schedule) 56 | -------------------------------------------------------- 57 | 58 | ``examples/4_backing_up_tinyflux_at_the_edge.py`` 59 | 60 | This example demonstrates how TinyFlux can be backed up to a remote datastore when using TinyFlux as a datastore at the edge. The benefit of this method is using a python-based scheduler in the same process as the capture/store control loop. To run this example you will need to have an influx instance running, along with the `schedule` and `influx-client` pip libraries. 61 | 62 | .. code-block:: bash 63 | 64 | $ pip install schedule 65 | $ pip install 'influxdb-client[ciso]' 66 | 67 | 68 | Have Other Use-Cases for TinyFlux? 69 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 70 | 71 | We'd love to see them. `Share in a GitHub discussion here `_. -------------------------------------------------------------------------------- /examples/requirements.txt: -------------------------------------------------------------------------------- 1 | notebook 2 | paho-mqtt 3 | plotly 4 | tinyflux -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | exclude = docs/ -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 80 3 | exclude = '(docs|\.ipynb$)' 4 | 5 | [build-system] 6 | requires = ["setuptools>=42"] 7 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | black 2 | flake8 3 | flake8-docstrings 4 | influxdb_client 5 | mypy 6 | pytest 7 | pytest-cov 8 | rstcheck 9 | schedule 10 | sphinx 11 | sphinx_autodoc_typehints 12 | sphinx_rtd_theme 13 | types-influxdb-client 14 | types-paho-mqtt -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 80 3 | exclude = 4 | .git, 5 | __pycache__, 6 | docs/** 7 | extend-ignore = E203 8 | 9 | [metadata] 10 | name = tinyflux 11 | version = attr: tinyflux.version.__version__ 12 | author = Justin Fung 13 | author_email = justincaseyfung@gmail.com 14 | description = The Tiny Time-Series Database Optimized for Your Happiness 15 | long_description = file: README.rst 16 | long_description_content_type = text/x-rst 17 | url = https://github.com/citrusvanilla/tinyflux 18 | project_urls = 19 | Documentation = https://tinyflux.readthedocs.io/en/latest/ 20 | classifiers = 21 | Programming Language :: Python :: 3 22 | License :: OSI Approved :: MIT License 23 | Operating System :: OS Independent 24 | Development Status :: 5 - Production/Stable 25 | Intended Audience :: Developers 26 | Intended Audience :: System Administrators 27 | License :: OSI Approved :: MIT License 28 | Topic :: Database 29 | Topic :: Database :: Database Engines/Servers 30 | Topic :: Utilities 31 | Programming Language :: Python :: 3 32 | Programming Language :: Python :: 3.7 33 | Programming Language :: Python :: 3.8 34 | Programming Language :: Python :: 3.9 35 | Programming Language :: Python :: 3.10 36 | Programming Language :: Python :: 3.11 37 | Programming Language :: Python :: 3.12 38 | Programming Language :: Python :: Implementation :: CPython 39 | Programming Language :: Python :: Implementation :: PyPy 40 | Operating System :: OS Independent 41 | Typing :: Typed 42 | 43 | [options] 44 | packages = tinyflux 45 | python_requires = >=3.7 46 | 47 | [options.package_data] 48 | tinyflux = py.typed 49 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for TinyFlux. 2 | 3 | TinyFlux uses pyTest. 4 | 5 | Test fixtures and configuration are found in tests/conftest.py. 6 | """ 7 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """PyTest configuration and test fixtures.""" 2 | 3 | import pytest 4 | 5 | from tinyflux.storages import CSVStorage, MemoryStorage 6 | 7 | 8 | class CSVStorageWithCounters(CSVStorage): # pragma: no cover 9 | """CSVStorage with some counters for read/write/append ops.""" 10 | 11 | def __init__(self, *args, **kwargs): 12 | """Init this class.""" 13 | super().__init__(*args, **kwargs) 14 | self.reindex_count = 0 15 | self.write_count = 0 16 | self.append_count = 0 17 | 18 | def append(self, points): 19 | """Append with counter.""" 20 | self.append_count += 1 21 | return super().append(points) 22 | 23 | def _write(self, points): 24 | """Write with counter.""" 25 | self.write_count += 1 26 | return super()._write(points) 27 | 28 | 29 | class MemoryStorageWithCounters(MemoryStorage): # pragma: no cover 30 | """MemoryStorage with some counters for read/write/append ops.""" 31 | 32 | def __init__(self): 33 | """Init a MemoryStorage instance.""" 34 | super().__init__() 35 | self.append_count = 0 36 | self.reindex_count = 0 37 | self.write_count = 0 38 | 39 | def append(self, points): 40 | """Append with counter.""" 41 | self.append_count += 1 42 | return super().append(points) 43 | 44 | def _write(self, points): 45 | """Write with counter.""" 46 | self.write_count += 1 47 | return super()._write(points) 48 | 49 | 50 | @pytest.fixture 51 | def mem_storage_with_counters(): 52 | """Return a MemoryStorage class with counters for read/write/append.""" 53 | return MemoryStorageWithCounters 54 | 55 | 56 | @pytest.fixture 57 | def csv_storage_with_counters(): 58 | """Return a CSVStorage class with counters for read/write/append.""" 59 | return CSVStorageWithCounters 60 | -------------------------------------------------------------------------------- /tests/test_index.py: -------------------------------------------------------------------------------- 1 | """Tests for tinyflux.index module.""" 2 | 3 | from datetime import datetime, timezone, timedelta 4 | import pytest 5 | 6 | from tinyflux import Point, FieldQuery, TagQuery, MeasurementQuery, TimeQuery 7 | from tinyflux.index import Index 8 | 9 | 10 | def test_repr(): 11 | """Test __repr__ of Index.""" 12 | t = datetime.now(timezone.utc) 13 | index = Index() 14 | 15 | assert repr(index) == "" 16 | 17 | index = Index() 18 | index.build([Point(tags={"tk": "tv"}), Point(time=t), Point(time=t)]) 19 | 20 | assert repr(index) == "" 21 | 22 | 23 | def test_initialize_empty_index(): 24 | """Test initializing an empty Index.""" 25 | index = Index() 26 | 27 | assert isinstance(index._num_items, int) 28 | assert not index._num_items 29 | 30 | assert isinstance(index._tags, dict) 31 | assert not index._tags 32 | 33 | assert isinstance(index._fields, dict) 34 | assert not index._fields 35 | 36 | assert isinstance(index._measurements, dict) 37 | assert not index._measurements 38 | 39 | assert isinstance(index._timestamps, list) 40 | assert not index._timestamps 41 | 42 | 43 | def test_build(): 44 | """Test building an Index.""" 45 | t1 = datetime.now(timezone.utc) 46 | t2 = datetime.now(timezone.utc) + timedelta(seconds=1) 47 | t3 = datetime.now(timezone.utc) + timedelta(seconds=2) 48 | 49 | p1 = Point(time=t1, tags={"city": "la"}) 50 | p2 = Point( 51 | time=t2, measurement="cities", tags={"city": "sf"}, fields={"temp": 70} 52 | ) 53 | p3 = Point( 54 | time=t3, 55 | measurement="states", 56 | tags={"state": "ca"}, 57 | fields={"pop": 30000000}, 58 | ) 59 | 60 | index = Index() 61 | assert index.empty 62 | 63 | index.build([]) 64 | assert index.empty 65 | 66 | index.build([Point(time=t1), Point(time=t1)]) 67 | assert index._num_items == 2 68 | 69 | index.build([Point(time=t1)]) 70 | assert index._num_items == 1 71 | 72 | index = Index() 73 | index.build([p1, p2, p3]) 74 | 75 | assert index._num_items == 3 76 | 77 | assert index._tags == { 78 | "city": {"la": [0], "sf": [1]}, 79 | "state": {"ca": [2]}, 80 | } 81 | 82 | assert index._fields == {"temp": [(1, 70)], "pop": [(2, 30000000)]} 83 | 84 | assert index._measurements == { 85 | "_default": [0], 86 | "cities": [1], 87 | "states": [2], 88 | } 89 | 90 | assert index._timestamps == [i.timestamp() for i in [t1, t2, t3]] 91 | 92 | 93 | def test_empty_property(): 94 | """Test is_empty property of Index.""" 95 | index = Index() 96 | t = datetime.now(timezone.utc) 97 | assert index.empty 98 | 99 | index.build([Point(time=t) for _ in range(10)]) 100 | assert not index.empty 101 | 102 | index._reset() 103 | assert index.empty 104 | 105 | index.insert([Point(time=t) for _ in range(10)]) 106 | assert not index.empty 107 | 108 | 109 | def test_insert_time_method(): 110 | """Test _insert_time helper of Index.""" 111 | index = Index() 112 | 113 | t1 = datetime.now(timezone.utc) 114 | index._insert_time(t1) 115 | assert index._timestamps == [t1.timestamp()] 116 | 117 | t2 = datetime.now(timezone.utc) 118 | index._insert_time(t2) 119 | assert index._timestamps == [t1.timestamp(), t2.timestamp()] 120 | 121 | 122 | def test_index_measurements_method(): 123 | """Test _insert_measurements helper of Index.""" 124 | index = Index() 125 | 126 | index._insert_measurements(0, "_default") 127 | assert index._measurements == {"_default": [0]} 128 | 129 | index._insert_measurements(1, "cities") 130 | assert index._measurements == {"_default": [0], "cities": [1]} 131 | 132 | 133 | def test_insert_tags_method(): 134 | """Test _insert_tags helper of Index.""" 135 | index = Index() 136 | 137 | index._insert_tags(0, {"city": "la"}) 138 | assert index._tags == {"city": {"la": [0]}} 139 | 140 | index._insert_tags(1, {"state": "ca"}) 141 | assert index._tags == {"city": {"la": [0]}, "state": {"ca": [1]}} 142 | 143 | index._insert_tags(2, {"city": "la"}) 144 | assert index._tags == {"city": {"la": [0, 2]}, "state": {"ca": [1]}} 145 | 146 | 147 | def test_insert_fields_method(): 148 | """Test _insert_fields helper of Index.""" 149 | index = Index() 150 | 151 | index._insert_fields(0, {"temp": 70.0}) 152 | assert index._fields == {"temp": [(0, 70.0)]} 153 | 154 | index._insert_fields(1, {"temp": 71.0}) 155 | assert index._fields == {"temp": [(0, 70.0), (1, 71.0)]} 156 | 157 | index._insert_fields(2, {"pop": 5000}) 158 | assert index._fields == { 159 | "temp": [(0, 70.0), (1, 71.0)], 160 | "pop": [(2, 5000)], 161 | } 162 | 163 | 164 | def test_reset_method(): 165 | """Test reset of Index.""" 166 | index = Index() 167 | index.insert([Point(time=datetime.now(timezone.utc))]) 168 | assert not index.empty 169 | 170 | index._reset() 171 | assert index.empty 172 | 173 | 174 | def test_search_helper_exception(): 175 | """Test that the search helper of the index raises exceptions.""" 176 | index = Index() 177 | 178 | with pytest.raises( 179 | TypeError, match="Query must be SimpleQuery or CompoundQuery." 180 | ): 181 | index._search_helper(TimeQuery()) 182 | 183 | with pytest.raises( 184 | TypeError, match="Query must be SimpleQuery or CompoundQuery." 185 | ): 186 | index._search_helper(TagQuery().a) 187 | 188 | with pytest.raises( 189 | TypeError, match="Query must be SimpleQuery or CompoundQuery." 190 | ): 191 | index._search_helper(FieldQuery().a) 192 | 193 | 194 | def test_search_measurement_query(): 195 | """Test search_query of Index on MeasurementQuery.""" 196 | index = Index() 197 | q = MeasurementQuery() == "_default" 198 | 199 | index._insert_measurements(0, "_default") 200 | index._insert_measurements(1, "cities") 201 | index._insert_measurements(2, "_default") 202 | assert index._measurements == {"_default": [0, 2], "cities": [1]} 203 | 204 | rst = index.search(q) 205 | assert rst.items == {0, 2} 206 | 207 | 208 | def test_search_time_query(): 209 | """Test search_query of Index on TimeQuery.""" 210 | index = Index() 211 | t_now = datetime.now(timezone.utc) 212 | 213 | t0 = t_now - timedelta(days=3) 214 | t1 = t_now - timedelta(days=2) 215 | t2 = t_now - timedelta(days=1) 216 | t3 = t_now 217 | t4 = t_now 218 | t5 = t_now + timedelta(days=1) 219 | t6 = t_now + timedelta(days=2) 220 | 221 | index._insert_time(t1) 222 | index._insert_time(t2) 223 | index._insert_time(t3) 224 | index._insert_time(t4) 225 | index._insert_time(t5) 226 | assert index._timestamps == [i.timestamp() for i in [t1, t2, t3, t4, t5]] 227 | 228 | # Less than or equal. 229 | q = TimeQuery() <= t0 230 | assert index.search(q).items == set({}) 231 | q = TimeQuery() <= t1 232 | assert index.search(q).items == {0} 233 | q = TimeQuery() <= t4 234 | assert index.search(q).items == {0, 1, 2, 3} 235 | 236 | # Less than. 237 | q = TimeQuery() < t1 238 | assert index.search(q).items == set({}) 239 | q = TimeQuery() < t3 240 | assert index.search(q).items == {0, 1} 241 | 242 | # Greater than or equal. 243 | q = TimeQuery() >= t1 244 | assert index.search(q).items == {0, 1, 2, 3, 4} 245 | q = TimeQuery() >= t3 246 | assert index.search(q).items == {2, 3, 4} 247 | q = TimeQuery() >= t6 248 | assert index.search(q).items == set({}) 249 | 250 | # Greater than. 251 | q = TimeQuery() > t2 252 | assert index.search(q).items == {2, 3, 4} 253 | q = TimeQuery() > t5 254 | assert index.search(q).items == set({}) 255 | 256 | # Equal to. 257 | q = TimeQuery() == t1 258 | assert index.search(q).items == {0} 259 | q = TimeQuery() == t3 260 | assert index.search(q).items == {2, 3} 261 | q = TimeQuery() == t6 262 | assert index.search(q).items == set({}) 263 | 264 | # Not equal to. 265 | q = TimeQuery() != t2 266 | assert index.search(q).items == {0, 2, 3, 4} 267 | q = TimeQuery() != t3 268 | assert index.search(q).items == {0, 1, 4} 269 | q = TimeQuery() != t6 270 | assert index.search(q).items == {0, 1, 2, 3, 4} 271 | 272 | # Other type of test. 273 | q = TimeQuery().test(lambda x: x != t2) 274 | assert index.search(q).items == {0, 2, 3, 4} 275 | q = TimeQuery().test(lambda x: x != t3) 276 | assert index.search(q).items == {0, 1, 4} 277 | q = TimeQuery().test(lambda x: x != t6) 278 | assert index.search(q).items == {0, 1, 2, 3, 4} 279 | 280 | 281 | def test_search_tags_query(): 282 | """Test search_query of Index on TagQuery.""" 283 | index = Index() 284 | 285 | index._insert_tags(0, {"city": "la", "state": "ca"}) 286 | index._insert_tags(1, {"city": "sf", "state": "ca"}) 287 | index._insert_tags(2, {"city": "sf"}) 288 | index._insert_tags(3, {"neighborhood": "dtla"}) 289 | assert index._tags == { 290 | "city": {"la": [0], "sf": [1, 2]}, 291 | "state": {"ca": [0, 1]}, 292 | "neighborhood": {"dtla": [3]}, 293 | } 294 | 295 | rst = index.search(TagQuery().city == "la") 296 | assert rst.items == {0} 297 | 298 | rst = index.search(TagQuery().city != "la") 299 | assert rst.items == {1, 2} 300 | 301 | rst = index.search(TagQuery().city == "sf") 302 | assert rst.items == {1, 2} 303 | 304 | rst = index.search(TagQuery().city != "sf") 305 | assert rst.items == {0} 306 | 307 | rst = index.search(TagQuery().state == "ca") 308 | assert rst.items == {0, 1} 309 | 310 | rst = index.search(TagQuery().state != "ca") 311 | assert rst.items == set({}) 312 | 313 | rst = index.search(TagQuery().neighborhood == "dtla") 314 | assert rst.items == {3} 315 | 316 | rst = index.search(TagQuery().neighborhood != "dtla") 317 | assert rst.items == set({}) 318 | 319 | 320 | def test_search_field_query(): 321 | """Test search_query of Index on FieldQuery.""" 322 | # An index. 323 | index = Index() 324 | 325 | index._insert_fields(0, {"temp": 78.3}) 326 | index._insert_fields(1, {"temp": 59.1}) 327 | index._insert_fields(2, {"pop": 30000000}) 328 | assert index._fields == { 329 | "temp": [(0, 78.3), (1, 59.1)], 330 | "pop": [(2, 30000000)], 331 | } 332 | 333 | # Queries. 334 | rst = index.search(FieldQuery().temp == 70.0) 335 | assert rst.items == set({}) 336 | 337 | rst = index.search(FieldQuery().temp != 70.0) 338 | assert rst.items == {0, 1} 339 | 340 | rst = index.search(FieldQuery().pop >= 10000000) 341 | assert rst.items == set({2}) 342 | 343 | rst = index.search(FieldQuery().pop > 40000000) 344 | assert rst.items == set({}) 345 | 346 | rst = index.search(FieldQuery().pop < 1000) 347 | assert rst.items == set({}) 348 | 349 | rst = index.search(FieldQuery().pop <= 1000) 350 | assert rst.items == set({}) 351 | 352 | 353 | def test_search_compound_query_not(): 354 | """Test search_query of Index on compound 'not' queries.""" 355 | # Some timestamps. 356 | t_now = datetime.now(timezone.utc) 357 | 358 | # Some points. 359 | p1 = Point( 360 | time=t_now - timedelta(days=1), 361 | tags={"city": "la"}, 362 | fields={"temp": 70.0}, 363 | ) 364 | p2 = Point(time=t_now, tags={"state": "ca"}, fields={"pop": 30000000}) 365 | 366 | # An index. 367 | index = Index() 368 | index.build([p1, p2]) 369 | 370 | # Query types. 371 | meas_q = MeasurementQuery() == "cities" 372 | fiel_q = FieldQuery().temp == 70.0 373 | time_q = TimeQuery() == t_now 374 | tags_q = TagQuery().city == "la" 375 | 376 | # Measurement query. 377 | rst = index.search(~meas_q) 378 | assert rst.items == {0, 1} 379 | 380 | # Field query. Note for Field Queries, a NOT operator means we have to 381 | # check every single item in the storage layer. 382 | rst = index.search(~fiel_q) 383 | assert rst.items == {0, 1} 384 | 385 | # Compound NOT FieldQuery. 386 | rst = index.search(~fiel_q & tags_q) 387 | assert rst.items == {0} 388 | 389 | # Time query. 390 | rst = index.search(~time_q) 391 | assert rst.items == {0} 392 | 393 | # Tag query. 394 | rst = index.search(~tags_q) 395 | assert rst.items == {1} 396 | 397 | 398 | def test_search_compound_query_and(): 399 | """Test search_query of Index on compound 'and' queries.""" 400 | # Some timestamps. 401 | t_now = datetime.now(timezone.utc) 402 | 403 | # Some points. 404 | p1 = Point( 405 | time=t_now - timedelta(days=2), 406 | measurement="cities", 407 | tags={"city": "la"}, 408 | fields={"temp": 70.0}, 409 | ) 410 | p2 = Point( 411 | time=t_now - timedelta(days=1), 412 | measurement="states", 413 | tags={"state": "ca"}, 414 | fields={"pop": 30000000}, 415 | ) 416 | p3 = Point( 417 | time=t_now, 418 | measurement="cities", 419 | tags={"city": "la"}, 420 | fields={"temp": 82.8}, 421 | ) 422 | 423 | # An index. 424 | index = Index() 425 | index.build([p1, p2, p3]) 426 | 427 | # Query types. 428 | meas_q = MeasurementQuery() == "cities" 429 | fiel_q = FieldQuery().temp == 70.0 430 | time_q = TimeQuery() == t_now 431 | tags_q = TagQuery().city == "la" 432 | 433 | # Measurement and Field. 434 | rst = index.search(meas_q & fiel_q) 435 | assert rst.items == {0} 436 | 437 | # Measurement and Time. 438 | rst = index.search(meas_q & time_q) 439 | assert rst.items == {2} 440 | 441 | # Measurement and Tags. 442 | rst = index.search(meas_q & tags_q) 443 | assert rst.items == {0, 2} 444 | 445 | # Field and Time. 446 | rst = index.search(fiel_q & time_q) 447 | assert rst.items == set({}) 448 | 449 | # Field and Tags. 450 | rst = index.search(fiel_q & tags_q) 451 | assert rst.items == {0} 452 | 453 | # Time and Tags. 454 | rst = index.search(time_q & tags_q) 455 | assert rst.items == {2} 456 | 457 | 458 | def test_search_compound_query_or(): 459 | """Test search_query of Index on compound 'or' queries.""" 460 | # Some timestamps. 461 | t_now = datetime.now(timezone.utc) 462 | 463 | # Some points. 464 | p1 = Point( 465 | time=t_now - timedelta(days=2), 466 | measurement="cities", 467 | tags={"city": "la"}, 468 | fields={"temp": 70.0}, 469 | ) 470 | p2 = Point( 471 | time=t_now - timedelta(days=1), 472 | measurement="states", 473 | tags={"state": "ca"}, 474 | fields={"pop": 30000000}, 475 | ) 476 | p3 = Point( 477 | time=t_now, 478 | measurement="cities", 479 | tags={"city": "la"}, 480 | fields={"temp": 82.8}, 481 | ) 482 | 483 | # An index. 484 | index = Index() 485 | index.build([p1, p2, p3]) 486 | 487 | # Query types. 488 | meas_q = MeasurementQuery() == "cities" 489 | fiel_q = FieldQuery().temp == 70.0 490 | time_q = TimeQuery() == t_now 491 | tags_q = TagQuery().city == "la" 492 | 493 | # Measurement or Field. 494 | rst = index.search(meas_q | fiel_q) 495 | assert rst.items == {0, 2} 496 | 497 | # Measurement or Time. 498 | rst = index.search(meas_q | time_q) 499 | assert rst.items == {0, 2} 500 | 501 | # Measurement or Tags. 502 | rst = index.search(meas_q | tags_q) 503 | assert rst.items == {0, 2} 504 | 505 | # Field or Time. 506 | rst = index.search(fiel_q | time_q) 507 | assert rst.items == {0, 2} 508 | 509 | # Field or Tags. 510 | rst = index.search(fiel_q | tags_q) 511 | assert rst.items == {0, 2} 512 | 513 | # Time or Tags. 514 | rst = index.search(time_q | tags_q) 515 | assert rst.items == {0, 2} 516 | 517 | 518 | def test_update(): 519 | """Test update method of Index.""" 520 | index = Index() 521 | t = datetime.now(timezone.utc) 522 | 523 | index.insert([Point(time=t), Point(time=t)]) 524 | assert index._num_items == 2 525 | 526 | index.insert([Point(time=t)]) 527 | assert index._num_items == 3 528 | -------------------------------------------------------------------------------- /tests/test_point.py: -------------------------------------------------------------------------------- 1 | """Tests for the tinyflux.point module.""" 2 | 3 | from datetime import datetime, timezone, timedelta 4 | import pytest 5 | from tinyflux.point import Point, validate_tags, validate_fields 6 | 7 | 8 | def test_repr(): 9 | """Test the repr method of Point class.""" 10 | t = datetime.now(timezone.utc) 11 | t_str = t.isoformat() 12 | 13 | p = Point( 14 | time=t, 15 | measurement="m", 16 | tags={"a": "b", "c": "d"}, 17 | fields={"my_field": 3.0}, 18 | ) 19 | s = ( 20 | f"Point(time={t_str}, " 21 | f"measurement=m, " 22 | f"tags=a:b; c:d, " 23 | f"fields=my_field:3.0)" 24 | ) 25 | 26 | assert repr(p) == s 27 | 28 | 29 | def test_args_and_kwargs(): 30 | """Test validation of args and kwargs for Point class.""" 31 | # Point with no args should be valid. 32 | Point() 33 | 34 | # Point with unnamed args should be invalid. 35 | with pytest.raises(TypeError): 36 | Point(1) 37 | 38 | with pytest.raises(TypeError): 39 | Point({"tk": "tv"}) 40 | 41 | with pytest.raises(TypeError): 42 | Point({"tk": 1}) 43 | 44 | with pytest.raises(TypeError): 45 | Point("my_measurement") 46 | 47 | # Point with bad kwargs should be invalid. 48 | with pytest.raises(TypeError): 49 | Point(a=1) 50 | 51 | with pytest.raises(TypeError): 52 | Point( 53 | time=datetime.now(timezone.utc), 54 | tags={}, 55 | fields={}, 56 | measurement="", 57 | other_field={}, 58 | ) 59 | 60 | # Point with bad time type. 61 | with pytest.raises(ValueError): 62 | Point(time=1) 63 | 64 | # Point with bad measurement type. 65 | with pytest.raises(ValueError): 66 | Point(measurement=1) 67 | 68 | 69 | def test_validate_tags(): 70 | """Test validate_tags function.""" 71 | with pytest.raises(ValueError): 72 | validate_tags(3) 73 | 74 | with pytest.raises(ValueError): 75 | validate_tags({1: "A"}) 76 | 77 | with pytest.raises(ValueError): 78 | validate_tags({"a": 1}) 79 | 80 | validate_tags({"a": "b"}) 81 | 82 | 83 | def test_validate_fields(): 84 | """Test validate_fields function.""" 85 | with pytest.raises(ValueError): 86 | validate_fields(3) 87 | 88 | with pytest.raises(ValueError): 89 | validate_fields({1: "A"}) 90 | 91 | with pytest.raises(ValueError): 92 | validate_fields({"a": "A"}) 93 | 94 | validate_fields({"a": 1}) 95 | 96 | 97 | def test_time(): 98 | """Test Point time attribute.""" 99 | p = Point() 100 | t = datetime.now(timezone.utc) - timedelta(days=1) 101 | 102 | assert p.time == p._time 103 | p.time = t 104 | assert p.time == p._time == t 105 | 106 | valid_values = [ 107 | datetime.now(timezone.utc), 108 | datetime.strptime("01-01-2000 00:00:00", "%m-%d-%Y %H:%M:%S"), 109 | datetime.strptime("01-01-3000 00:00:00", "%m-%d-%Y %H:%M:%S"), 110 | ] 111 | 112 | points = [ 113 | Point( 114 | time=i, 115 | tags={"city": "nyc"}, 116 | fields={"temp_f": 30.1}, 117 | ) 118 | for i in valid_values 119 | ] 120 | 121 | for point, time in zip(points, valid_values): 122 | assert point.time == time 123 | 124 | 125 | def test_measurement(): 126 | """Test Point measurement attribute.""" 127 | p = Point() 128 | m = "m" 129 | 130 | assert p.measurement == p._measurement 131 | p.measurement = m 132 | assert p.measurement == p._measurement == m 133 | 134 | valid_values = ["", "_", "some_measurement"] 135 | 136 | points = [ 137 | Point( 138 | measurement=i, 139 | fields={"num_restaurants": 60}, 140 | ) 141 | for i in valid_values 142 | ] 143 | 144 | for point, measurement in zip(points, valid_values): 145 | assert point.measurement == measurement 146 | 147 | 148 | def test_tags(): 149 | """Test Point tags attribute.""" 150 | # Test invalid tags. 151 | invalid_values = [ 152 | 123.22, 153 | True, 154 | datetime.now(timezone.utc), 155 | {123: True}, 156 | {True: True}, 157 | {datetime.now(timezone.utc): "all good"}, 158 | {tuple((1, 2)): "ok"}, 159 | {"key": {"a": "b"}}, 160 | {"a": True}, 161 | {"a": 123}, 162 | ] 163 | 164 | for i in invalid_values: 165 | with pytest.raises((ValueError, TypeError)): 166 | Point( 167 | time=datetime.now(timezone.utc), 168 | tags=i, 169 | fields={"num_restaurants": 10}, 170 | ) 171 | 172 | # Test valid tags. 173 | p = Point() 174 | tags = {"a": "b"} 175 | 176 | assert p.tags == p._tags == {} 177 | p.tags = tags 178 | assert p.tags == p._tags == tags 179 | 180 | valid_values = [ 181 | { 182 | "key1": "value1", 183 | }, 184 | { 185 | "key1": "", 186 | }, 187 | { 188 | "key1": None, 189 | }, 190 | {"key2": "value2", "key3": "value3"}, 191 | ] 192 | 193 | points = [ 194 | Point( 195 | time=datetime.now(timezone.utc), 196 | tags=i, 197 | fields={"num_restaurants": 10}, 198 | ) 199 | for i in valid_values 200 | ] 201 | 202 | for point, tags in zip(points, valid_values): 203 | assert point.tags == tags 204 | 205 | 206 | def test_fields(): 207 | """Test Point fields attribute.""" 208 | # Invalid fields. 209 | invalid_values = [ 210 | 123.22, 211 | True, 212 | datetime.now(timezone.utc), 213 | {123: True}, 214 | {True: True}, 215 | {datetime.now(timezone.utc): "all good"}, 216 | {tuple((1, 2)): "ok"}, 217 | {"key": {"a": "b"}}, 218 | {"a": True}, 219 | {"a": 123}, 220 | ] 221 | 222 | for i in invalid_values: 223 | with pytest.raises(ValueError): 224 | Point( 225 | time=datetime.now(timezone.utc), 226 | tags={"key1", "value1"}, 227 | fields=i, 228 | ) 229 | 230 | p = Point() 231 | fields = {"a": 1.0} 232 | 233 | assert p.fields == p._fields 234 | p.fields = fields 235 | assert p.fields == p._fields == fields 236 | 237 | valid_values = [ 238 | {"key1": None}, 239 | {"key2": 3}, 240 | {"key3": 33333.3}, 241 | {"key4": 33333.3, "key5": 3}, 242 | ] 243 | 244 | points = [ 245 | Point( 246 | time=datetime.now(timezone.utc), 247 | tags={"tag1": "value1"}, 248 | fields=i, 249 | ) 250 | for i in valid_values 251 | ] 252 | 253 | for point, fields in zip(points, valid_values): 254 | assert point.fields == fields 255 | 256 | 257 | def test_points_are_equal(): 258 | """Test the __eq__ method of Point class.""" 259 | time_now = datetime.now(timezone.utc) 260 | 261 | p1 = Point( 262 | time=time_now, 263 | tags={"city": "nyc"}, 264 | fields={"temp_f": 30.1}, 265 | ) 266 | 267 | p2 = Point( 268 | time=time_now, 269 | tags={"city": "nyc"}, 270 | fields={"temp_f": 30.1}, 271 | ) 272 | 273 | p3 = Point( 274 | time=time_now, 275 | tags={"city": "los angeles"}, 276 | fields={"temp_f": 70.1}, 277 | ) 278 | 279 | assert p1 == p2 280 | assert p1 != p3 281 | assert p2 != p3 282 | assert p1 != {} 283 | assert p2 != {} 284 | assert p3 != {} 285 | 286 | 287 | def test_serialize_point(): 288 | """Test serialization of a Point object.""" 289 | time_now = datetime.now(timezone.utc) 290 | time_now_str = time_now.replace(tzinfo=None).isoformat() 291 | 292 | p1 = Point( 293 | time=time_now, 294 | tags={"city": "nyc"}, 295 | fields={"temp_f": 30.1}, 296 | ) 297 | 298 | p2 = Point( 299 | time=time_now, 300 | measurement="cities", 301 | tags={"city": "la"}, 302 | fields={"temp_f": 75.1, "population": 15000000}, 303 | ) 304 | 305 | p_tuple1 = p1._serialize_to_list() 306 | p_tuple2 = p2._serialize_to_list() 307 | 308 | p_tuple_expected1 = ( 309 | time_now_str, 310 | "_default", 311 | "_tag_city", 312 | "nyc", 313 | "_field_temp_f", 314 | "30.1", 315 | ) 316 | 317 | p_tuple_expected2 = ( 318 | time_now_str, 319 | "cities", 320 | "_tag_city", 321 | "la", 322 | "_field_temp_f", 323 | "75.1", 324 | "_field_population", 325 | "15000000.0", 326 | ) 327 | 328 | assert p_tuple1 == p_tuple_expected1 329 | assert p_tuple2 == p_tuple_expected2 330 | 331 | 332 | def test_deserialize_valid_point(): 333 | """Test deserialization of a Point object.""" 334 | time_now = datetime.now(timezone.utc) 335 | time_now_str = time_now.isoformat() 336 | 337 | p_tuple = ( 338 | time_now_str, 339 | "_default", 340 | "_tag_city", 341 | "nyc", 342 | "_field_temp_f", 343 | "30.1", 344 | ) 345 | 346 | p1_expected = Point( 347 | time=time_now, tags={"city": "nyc"}, fields={"temp_f": 30.1} 348 | ) 349 | 350 | p1 = Point()._deserialize_from_list(p_tuple) 351 | 352 | assert p1 == p1_expected 353 | 354 | p_tuple = ( 355 | time_now_str, 356 | "cities", 357 | "_tag_city", 358 | "la", 359 | "_field_temp_f", 360 | "75.1", 361 | "_field_population", 362 | "15000000", 363 | ) 364 | 365 | p2_expected = Point( 366 | time=time_now, 367 | measurement="cities", 368 | tags={"city": "la"}, 369 | fields={"temp_f": 75.1, "population": 15000000}, 370 | ) 371 | 372 | p2 = Point()._deserialize_from_list(p_tuple) 373 | 374 | assert p2 == p2_expected 375 | 376 | p_tuple = (time_now_str, "m", "_field_a", "_none") 377 | 378 | p3_expected = Point( 379 | time=time_now, 380 | measurement="m", 381 | fields={"a": None}, 382 | ) 383 | 384 | p3 = Point()._deserialize_from_list(p_tuple) 385 | 386 | assert p3 == p3_expected 387 | 388 | 389 | def test_deserialize_invalid_point(): 390 | """Test deserialization of an invalid Point.""" 391 | # Bad time value. 392 | bad_time = "ASDF" 393 | p_list = [ 394 | str(bad_time), 395 | "_default", 396 | "_field_temp_f", 397 | "asdf", 398 | ] 399 | 400 | with pytest.raises( 401 | ValueError, 402 | match="Invalid isoformat string: 'ASDF'", 403 | ): 404 | Point()._deserialize_from_list(p_list) 405 | 406 | 407 | def test_serialize_zero_values(): 408 | """Test (de)serialization of zero values. 409 | 410 | Resolves issue 23. 411 | """ 412 | p = Point(fields={"a": 0, "b": 0.0, "c": None}) 413 | s = p._serialize_to_list() 414 | 415 | assert s[3] == "0.0" and s[5] == "0.0" and s[7] == p._none_str 416 | 417 | new_p = Point()._deserialize_from_list(s) 418 | 419 | assert ( 420 | new_p.fields["a"] == 0 421 | and new_p.fields["b"] == 0.0 422 | and new_p.fields["c"] is None 423 | ) 424 | 425 | 426 | def test_serialize_none_values(): 427 | """Test serializing/deserializing None values.""" 428 | p = Point(fields={"a": None}, tags={"a": None}) 429 | s = p._serialize_to_list() 430 | assert s[3] == p._none_str and s[5] == p._none_str 431 | 432 | new_p = Point()._deserialize_from_list(s) 433 | 434 | assert p == new_p 435 | 436 | assert new_p.fields["a"] is None and new_p.tags["a"] is None 437 | 438 | 439 | def test_serialize_empty_strings(): 440 | """Test serializing/deserializing empty string tag values.""" 441 | p = Point(tags={"a": ""}) 442 | s = p._serialize_to_list() 443 | assert s[3] == "" 444 | 445 | new_p = Point()._deserialize_from_list(s) 446 | 447 | assert p == new_p 448 | 449 | assert new_p.tags["a"] == "" 450 | 451 | 452 | def test_compact_tag_keys(): 453 | """Test compact tag keys in CSV Storage.""" 454 | p = Point(fields={"a": 0, "b": 0.0, "c": None}) 455 | s = p._serialize_to_list(compact_key_prefixes=True) 456 | s1 = p._serialize_to_list(compact_key_prefixes=False) 457 | 458 | assert all(s[i].startswith(p._compact_field_key_prefix) for i in (2, 4, 6)) 459 | 460 | new_p = Point()._deserialize_from_list(s) 461 | new_p1 = Point()._deserialize_from_list(s1) 462 | 463 | assert p == new_p == new_p1 464 | 465 | assert all(i in new_p.fields for i in ("a", "b", "c")) 466 | 467 | assert ( 468 | new_p.fields["a"] == 0 469 | and new_p.fields["b"] == 0.0 470 | and new_p.fields["c"] is None 471 | ) 472 | 473 | 474 | def test_compact_field_keys(): 475 | """Test compact tag keys in CSV Storage.""" 476 | p = Point(tags={"a": "aa", "b": "bb", "c": None}) 477 | s = p._serialize_to_list(compact_key_prefixes=True) 478 | s1 = p._serialize_to_list(compact_key_prefixes=False) 479 | 480 | assert all(s[i].startswith(p._compact_tag_key_prefix) for i in (2, 4, 6)) 481 | 482 | new_p = Point()._deserialize_from_list(s) 483 | new_p1 = Point()._deserialize_from_list(s1) 484 | 485 | assert p == new_p == new_p1 486 | 487 | assert all(i in new_p.tags for i in ("a", "b", "c")) 488 | 489 | assert ( 490 | new_p.tags["a"] == "aa" 491 | and new_p.tags["b"] == "bb" 492 | and new_p.tags["c"] is None 493 | ) 494 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | """Tests for tinyflux.utils module.""" 2 | 3 | import pytest 4 | 5 | from tinyflux.utils import ( 6 | freeze, 7 | FrozenDict, 8 | find_eq, 9 | find_ge, 10 | find_gt, 11 | find_le, 12 | find_lt, 13 | ) 14 | 15 | 16 | def test_freeze(): 17 | """Test the freeze utility.""" 18 | frozen = freeze([0, 1, 2, {"a": [1, 2, 3]}, {1, 2}]) 19 | 20 | assert isinstance(frozen, tuple) 21 | assert isinstance(frozen[3], FrozenDict) 22 | assert isinstance(frozen[3]["a"], tuple) 23 | assert isinstance(frozen[4], frozenset) 24 | 25 | with pytest.raises(TypeError): 26 | frozen[0] = 10 27 | 28 | with pytest.raises(TypeError): 29 | frozen[3]["a"] = 10 30 | 31 | with pytest.raises(TypeError): 32 | frozen[3].pop("a") 33 | 34 | with pytest.raises(TypeError): 35 | frozen[3].update({"a": 9}) 36 | 37 | 38 | def test_frozen_dict_hash(): 39 | """Test the hash function on FrozenDict class.""" 40 | my_frozen_set1 = FrozenDict({"city": "la", "state": "ca"}) 41 | my_frozen_set2 = FrozenDict({"state": "ca", "city": "la"}) 42 | my_frozen_set3 = FrozenDict({"temp": 70}) 43 | 44 | assert hash(my_frozen_set1) == hash(my_frozen_set2) 45 | assert hash(my_frozen_set1) != hash(my_frozen_set3) 46 | assert hash(my_frozen_set2) != hash(my_frozen_set3) 47 | 48 | 49 | def test_find_eq(): 50 | """Test the find_eq function.""" 51 | present_numbers = range(3) 52 | absent_numbers = range(3, 6) 53 | 54 | # Normal sorted list. 55 | my_list = [i for i in present_numbers] 56 | 57 | for i, n in enumerate(present_numbers): 58 | assert find_eq(my_list, n) == i 59 | 60 | for i, n in enumerate(absent_numbers): 61 | assert find_eq(my_list, n) is None 62 | 63 | # Empty list. 64 | my_list = [] 65 | 66 | for n in present_numbers: 67 | assert find_eq(my_list, n) is None 68 | 69 | for i, n in enumerate(absent_numbers): 70 | assert find_eq(my_list, n) is None 71 | 72 | 73 | def test_find_lt(): 74 | """Test the find_lt function.""" 75 | present_numbers = range(3, 6) 76 | absent_numbers1 = range(3) 77 | absent_numbers2 = range(6, 9) 78 | 79 | # Normal sorted list. 80 | my_list = [i for i in present_numbers] 81 | 82 | assert find_lt(my_list, 3) is None 83 | assert find_lt(my_list, 4) == 0 84 | assert find_lt(my_list, 5) == 1 85 | 86 | for n in absent_numbers1: 87 | assert find_lt(my_list, n) is None 88 | 89 | for n in absent_numbers2: 90 | assert find_lt(my_list, n) == 2 91 | 92 | # Empty list. 93 | my_list = [] 94 | 95 | for n in present_numbers: 96 | assert find_lt(my_list, n) is None 97 | 98 | for n in absent_numbers1: 99 | assert find_lt(my_list, n) is None 100 | 101 | for n in absent_numbers2: 102 | assert find_lt(my_list, n) is None 103 | 104 | 105 | def test_find_le(): 106 | """Test the find_le function.""" 107 | present_numbers = range(3, 6) 108 | absent_numbers1 = range(3) 109 | absent_numbers2 = range(6, 9) 110 | 111 | # Normal sorted list. 112 | my_list = [i for i in present_numbers] 113 | 114 | for i, n in enumerate(present_numbers): 115 | assert find_le(my_list, n) == i 116 | 117 | for n in absent_numbers1: 118 | assert find_le(my_list, n) is None 119 | 120 | for n in absent_numbers2: 121 | assert find_le(my_list, n) == 2 122 | 123 | # Empty list. 124 | my_list = [] 125 | 126 | for n in present_numbers: 127 | assert find_le(my_list, n) is None 128 | 129 | for n in absent_numbers1: 130 | assert find_le(my_list, n) is None 131 | 132 | for n in absent_numbers2: 133 | assert find_le(my_list, n) is None 134 | 135 | 136 | def test_find_gt(): 137 | """Test the find_gt function.""" 138 | present_numbers = range(3, 6) 139 | absent_numbers1 = range(3) 140 | absent_numbers2 = range(6, 9) 141 | 142 | # Normal sorted list. 143 | my_list = [i for i in present_numbers] 144 | 145 | assert find_gt(my_list, 3) == 1 146 | assert find_gt(my_list, 4) == 2 147 | assert find_gt(my_list, 5) is None 148 | 149 | for n in absent_numbers1: 150 | assert find_gt(my_list, n) == 0 151 | 152 | for n in absent_numbers2: 153 | assert find_gt(my_list, n) is None 154 | 155 | # Empty list. 156 | my_list = [] 157 | 158 | for n in present_numbers: 159 | assert find_gt(my_list, n) is None 160 | 161 | for n in absent_numbers1: 162 | assert find_gt(my_list, n) is None 163 | 164 | for n in absent_numbers2: 165 | assert find_gt(my_list, n) is None 166 | 167 | 168 | def test_find_ge(): 169 | """Test the find_ge function.""" 170 | present_numbers = range(3, 6) 171 | absent_numbers1 = range(3) 172 | absent_numbers2 = range(6, 9) 173 | 174 | # Normal sorted list. 175 | my_list = [i for i in present_numbers] 176 | 177 | for i, n in enumerate(present_numbers): 178 | assert find_ge(my_list, n) == i 179 | 180 | for n in absent_numbers1: 181 | assert find_ge(my_list, n) == 0 182 | 183 | for n in absent_numbers2: 184 | assert find_ge(my_list, n) is None 185 | 186 | # Empty list. 187 | my_list = [] 188 | 189 | for n in present_numbers: 190 | assert find_ge(my_list, n) is None 191 | 192 | for n in absent_numbers1: 193 | assert find_ge(my_list, n) is None 194 | 195 | for n in absent_numbers2: 196 | assert find_ge(my_list, n) is None 197 | -------------------------------------------------------------------------------- /tinyflux/__init__.py: -------------------------------------------------------------------------------- 1 | """TinyFlux is a tiny, time-series database optimized for your happiness. 2 | 3 | TinyDB stores time-series data as Points using a configurable storage 4 | mechanism. It comes with a syntax for querying data and storing data in 5 | multiple measurements. 6 | 7 | TinyFlux was built on a fork of TinyDB, authored by Markus Siemens (email: 8 | markus_m-siemens.de). 9 | 10 | Author: 11 | Justin Fung (@citrusvanilla, citrusvanilla@gmail.com) 12 | 13 | Usage: 14 | >>> from tinyflux import TinyFlux, Point, FieldQuery, TimeQuery 15 | >>> db = TinyFlux("my_tinyflux_db.csv") 16 | >>> p = Point( 17 | ... measurement="california air quality", 18 | ... time=datetime.fromisoformat("2020-01-01T00:00:00-08:00"), 19 | ... tags={ 20 | ... "city": "Los Angeles", 21 | ... "parameter": "PM2.5", 22 | ... }, 23 | ... fields={"aqi": 112} 24 | ... ) 25 | >>> db.insert(p) 26 | >>> q1 = TimeQuery() >= datetime.fromisoformat("2020-01-01T00:00:00-00:00") 27 | >>> q2 = FieldQuery().aqi > 100 28 | >>> hazardous_days_in_LA_2020 = db.search(q1 & q2) 29 | """ 30 | 31 | from .database import TinyFlux 32 | from .point import Point 33 | from .queries import TagQuery, FieldQuery, MeasurementQuery, TimeQuery 34 | 35 | __all__ = [ 36 | "TinyFlux", 37 | "Point", 38 | "TagQuery", 39 | "FieldQuery", 40 | "MeasurementQuery", 41 | "TimeQuery", 42 | ] 43 | -------------------------------------------------------------------------------- /tinyflux/measurement.py: -------------------------------------------------------------------------------- 1 | """Definition of TinyFlux measurement class. 2 | 3 | The measurement class provides a convenient interface into a subset of 4 | data points with a common measurement name. A measurement is analogous to a 5 | table in a traditional RDBMS. 6 | 7 | Usage: 8 | >>> db = TinyFlux(storage=MemoryStorage) 9 | >>> m = db.measurement("my_measurement") 10 | """ 11 | 12 | from __future__ import annotations 13 | 14 | from datetime import datetime 15 | from typing import ( 16 | Callable, 17 | Dict, 18 | Iterable, 19 | Iterator, 20 | List, 21 | Optional, 22 | Tuple, 23 | Union, 24 | ) 25 | 26 | from .point import FieldSet, FieldValue, Point, TagSet 27 | from .queries import MeasurementQuery, Query, SimpleQuery 28 | from .index import Index 29 | from .storages import Storage 30 | 31 | from typing import TYPE_CHECKING 32 | 33 | if TYPE_CHECKING: 34 | from .database import TinyFlux # pragma: no cover 35 | 36 | 37 | class Measurement: 38 | """Define the Measurement class. 39 | 40 | Measurement objects are created at runtime when the TinyFlux 'measurement' 41 | method is invoked. 42 | 43 | Attributes: 44 | name: Name of the measurement. 45 | storage: Storage object for the measurement's parent TinyFlux db. 46 | index: Index object for the measurement's parent TinyFlux db. 47 | """ 48 | 49 | def __init__( 50 | self, 51 | name: str, 52 | db: TinyFlux, 53 | ) -> None: 54 | """Initialize a measurement instance. 55 | 56 | Args: 57 | name: The name of the measurement. 58 | db: A reference to the database this measurement belongs to. 59 | """ 60 | self._name = name 61 | self._db = db 62 | 63 | @property 64 | def index(self) -> Index: 65 | """Get the measurement storage instance.""" 66 | return self._db._index 67 | 68 | @property 69 | def name(self) -> str: 70 | """Get the measurement name.""" 71 | return self._name 72 | 73 | @property 74 | def storage(self) -> Storage: 75 | """Get the measurement storage instance.""" 76 | return self._db._storage 77 | 78 | def __iter__(self) -> Iterator[Point]: 79 | """Define the iterator for this class.""" 80 | for item in self._db._storage: 81 | _measurement = self._db._storage._deserialize_measurement(item) 82 | if _measurement == self._name: 83 | yield self._db._storage._deserialize_storage_item(item) 84 | 85 | def __len__(self) -> int: 86 | """Get total number of points in this measurement.""" 87 | # Check the index first. 88 | if self._db._auto_index and self._db._index.valid: 89 | if self.name in self._db._index._measurements: 90 | return len(self._db._index._measurements[self.name]) 91 | else: 92 | return 0 93 | 94 | # Otherwise, iterate over storage and increment a counter. 95 | count = 0 96 | 97 | for item in self._db._storage: 98 | if self._db._storage._deserialize_measurement(item) == self._name: 99 | count += 1 100 | 101 | return count 102 | 103 | def __repr__(self) -> str: 104 | """Get a printable representation of this measurement.""" 105 | if self._db._auto_index and self._db._index.valid: 106 | if self._name in self._db._index._measurements: 107 | count = len(self._db._index._measurements[self._name]) 108 | else: 109 | count = 0 110 | 111 | args = [ 112 | f"name={self.name}", 113 | f"total={count}", 114 | f"storage={self._db._storage}", 115 | ] 116 | else: 117 | args = [ 118 | f"name={self.name}", 119 | f"storage={self._db._storage}", 120 | ] 121 | 122 | return f'<{type(self).__name__} {", ".join(args)}>' 123 | 124 | def all(self, sorted: bool = True) -> List[Point]: 125 | """Get all points in this measurement. 126 | 127 | Args: 128 | sorted: Whether or not to return points in sorted time order. 129 | 130 | Returns: 131 | A list of points. 132 | """ 133 | points = list(iter(self)) 134 | 135 | if sorted: 136 | points.sort(key=lambda x: (x is None, x.time)) 137 | 138 | return points 139 | 140 | def contains(self, query: SimpleQuery) -> bool: 141 | """Check whether the measurement contains a point matching a query. 142 | 143 | Args: 144 | query: A SimpleQuery. 145 | 146 | Returns: 147 | True if point found, else False. 148 | """ 149 | return self._db.contains(query, self._name) 150 | 151 | def count(self, query: SimpleQuery) -> int: 152 | """Count the points matching a query in this measurement. 153 | 154 | Args: 155 | query: a SimpleQuery. 156 | 157 | Returns: 158 | A count of matching points in the measurement. 159 | """ 160 | return self._db.count(query, self._name) 161 | 162 | def get(self, query: SimpleQuery) -> Optional[Point]: 163 | """Get exactly one point specified by a query from this measurement. 164 | 165 | Returns None if the point doesn't exist. 166 | 167 | Args: 168 | query: A SimpleQuery. 169 | 170 | Returns: 171 | First found Point or None. 172 | """ 173 | return self._db.get(query, self._name) 174 | 175 | def get_field_keys(self) -> List[str]: 176 | """Get all field keys for this measurement. 177 | 178 | Returns: 179 | List of field keys, sorted. 180 | """ 181 | return self._db.get_field_keys(self._name) 182 | 183 | def get_field_values(self, field_key: str) -> List[FieldValue]: 184 | """Get field values from this measurement for the specified key. 185 | 186 | Args: 187 | field_key: The field key to get field values for. 188 | 189 | Returns: 190 | List of field keys, sorted. 191 | """ 192 | return self._db.get_field_values(field_key, self._name) 193 | 194 | def get_tag_keys(self) -> List[str]: 195 | """Get all tag keys for this measurement. 196 | 197 | Returns: 198 | List of tag keys, sorted. 199 | """ 200 | return self._db.get_tag_keys(self._name) 201 | 202 | def get_tag_values(self, tag_keys: List[str] = []) -> Dict[str, List[str]]: 203 | """Get all tag values in the database. 204 | 205 | Args: 206 | tag_keys: Optional list of tag keys to get associated values for. 207 | 208 | Returns: 209 | Mapping of tag_keys to associated tag values as a sorted list. 210 | """ 211 | return self._db.get_tag_values(tag_keys, self._name) 212 | 213 | def get_timestamps(self) -> List[datetime]: 214 | """Get all timestamps in the database. 215 | 216 | Returns timestamps in order of insertion in the database, as time-aware 217 | datetime objects with UTC timezone. 218 | 219 | Args: 220 | measurement: Optional measurement to filter by. 221 | 222 | Returns: 223 | List of timestamps by insertion order. 224 | """ 225 | return self._db.get_timestamps(self._name) 226 | 227 | def insert(self, point: Point) -> int: 228 | """Insert a Point into a measurement. 229 | 230 | If the passed Point has a different measurement value, 'insert' will 231 | update the measurement value with that of this measurement. 232 | 233 | Args: 234 | point: A Point object. 235 | 236 | Returns: 237 | 1 if success. 238 | 239 | Raises: 240 | TypeError if point is not a Point instance. 241 | """ 242 | return self._db.insert(point, self._name) 243 | 244 | def insert_multiple(self, points: Iterable[Point]) -> int: 245 | """Insert Points into this measurement. 246 | 247 | If the passed Point has a different measurement value, 'insert' will 248 | update the measurement value with that of this measurement. 249 | 250 | Args: 251 | points: An iterable of Point objects. 252 | 253 | Returns: 254 | The count of inserted points. 255 | 256 | Raises: 257 | TypeError if point is not a Point instance. 258 | """ 259 | return self._db.insert_multiple(points, self._name) 260 | 261 | def remove(self, query: SimpleQuery) -> int: 262 | """Remove Points from this measurement by query. 263 | 264 | This is irreversible. 265 | 266 | Returns: 267 | The count of removed points. 268 | """ 269 | return self._db.remove(query, self._name) 270 | 271 | def remove_all(self) -> int: 272 | """Remove all Points from this measurement. 273 | 274 | This is irreversible. 275 | 276 | Returns: 277 | The count of removed points. 278 | """ 279 | return self._db.drop_measurement(self._name) 280 | 281 | def search(self, query: SimpleQuery, sorted: bool = True) -> List[Point]: 282 | """Get all points specified by a query from this measurement. 283 | 284 | Order is not guaranteed. Returns empty list if no points are found. 285 | 286 | Args: 287 | query: A SimpleQuery. 288 | sorted: Whether or not to return points sorted by timestamp. 289 | 290 | Returns: 291 | A list of found Points. 292 | """ 293 | return self._db.search(query, self._name, sorted=sorted) 294 | 295 | def select( 296 | self, 297 | keys: Union[str, Iterable[str]], 298 | query: Query, 299 | ) -> List[Tuple[Union[datetime, str, int, float, None]]]: 300 | """Get specified attributes from Points specified by a query. 301 | 302 | 'keys' should be an iterable of attributes including 'time', 303 | 'measurement', and tag keys and tag values. Passing 'tags' or 'fields' 304 | in the 'keys' iterable will not retrieve all tag and/or field values. 305 | Tag and field keys must be specified individually. 306 | 307 | Args: 308 | keys: An iterable of Point attributes. 309 | query: A Query. 310 | 311 | Returns: 312 | A list of tuples of Point attribute values. 313 | """ 314 | return self._db.select(keys, query, self._name) 315 | 316 | def update( 317 | self, 318 | query: Query, 319 | time: Union[datetime, Callable[[datetime], datetime], None] = None, 320 | measurement: Union[str, Callable[[str], str], None] = None, 321 | tags: Union[TagSet, Callable[[TagSet], TagSet], None] = None, 322 | fields: Union[FieldSet, Callable[[FieldSet], FieldSet], None] = None, 323 | unset_fields: Union[str, Iterable[str], None] = None, 324 | unset_tags: Union[str, Iterable[str], None] = None, 325 | ) -> int: 326 | """Update all matching Points in this measurement with new attributes. 327 | 328 | Args: 329 | query: A query. 330 | time: A datetime object or Callable returning one. 331 | measurement: A string or Callable returning one. 332 | tags: A mapping or Callable returning one. 333 | fields: A mapping or Callable returning one. 334 | unset_fields: Field keys to remove upon update. 335 | unset_tags: Tag keys to remove upon update. 336 | 337 | Returns: 338 | A count of updated points. 339 | """ 340 | return self._db.update( 341 | query, 342 | time, 343 | measurement, 344 | tags, 345 | fields, 346 | unset_fields, 347 | unset_tags, 348 | self._name, 349 | ) 350 | 351 | def update_all( 352 | self, 353 | time: Union[datetime, Callable[[datetime], datetime], None] = None, 354 | measurement: Union[str, Callable[[str], str], None] = None, 355 | tags: Union[TagSet, Callable[[TagSet], TagSet], None] = None, 356 | fields: Union[FieldSet, Callable[[FieldSet], FieldSet], None] = None, 357 | unset_fields: Union[str, Iterable[str], None] = None, 358 | unset_tags: Union[str, Iterable[str], None] = None, 359 | ) -> int: 360 | """Update all matching Points in this measurement with new attributes. 361 | 362 | Args: 363 | query: A query. 364 | time: A datetime object or Callable returning one. 365 | measurement: A string or Callable returning one. 366 | tags: A mapping or Callable returning one. 367 | fields: A mapping or Callable returning one. 368 | unset_fields: Field keys to remove upon update. 369 | unset_tags: Tag keys to remove upon update. 370 | 371 | Returns: 372 | A count of updated points. 373 | """ 374 | return self._db.update( 375 | MeasurementQuery().noop(), 376 | time, 377 | measurement, 378 | tags, 379 | fields, 380 | unset_fields, 381 | unset_tags, 382 | self._name, 383 | ) 384 | -------------------------------------------------------------------------------- /tinyflux/point.py: -------------------------------------------------------------------------------- 1 | """Definition of the TinyFlux Point class. 2 | 3 | A Point is the data type upon which TinyFlux manages. It contains the time 4 | data and metadata for an individual observation. Points are serialized and 5 | deserialized from Storage. SimpleQuery act upon individual Points. 6 | 7 | A Point is comprised of a timestamp, a measurement, fields, and tags. 8 | 9 | Fields contains string/numeric key-values, while tags contain 10 | string/string key-values. This is enforced upon Point instantiation. 11 | 12 | Usage: 13 | 14 | >>> from tinyflux import Point 15 | >>> p = Point( 16 | time=datetime.now(timezone.utc), 17 | measurement="my measurement", 18 | fields={"my field": 123.45}, 19 | tags={"my tag key": "my tag value"} 20 | ) 21 | 22 | """ 23 | 24 | from datetime import datetime, timezone 25 | from typing import Any, Dict, Mapping, Optional, Sequence, Union 26 | 27 | TagSet = Dict[str, Optional[str]] 28 | FieldValue = Union[int, float, None] 29 | FieldSet = Dict[str, FieldValue] 30 | 31 | 32 | def validate_tags(tags: Any) -> None: 33 | """Validate tags. 34 | 35 | Args: 36 | tags: The object to validate. 37 | 38 | Raises: 39 | ValueError: Exception if tags cannot be validated. 40 | """ 41 | if not isinstance(tags, Mapping): 42 | raise ValueError("Tag set must be a mapping.") 43 | 44 | # Check keys. 45 | if not all(isinstance(i, str) for i in tags.keys()): 46 | raise ValueError("Tag set must contain only string keys.") 47 | 48 | # Check values. 49 | if not all(i is None or isinstance(i, str) for i in tags.values()): 50 | raise ValueError("Tag set must contain only string values or None.") 51 | 52 | return 53 | 54 | 55 | def validate_fields(fields: Any) -> None: 56 | """Validate fields. 57 | 58 | Args: 59 | fields: The object to validate. 60 | 61 | Raises: 62 | ValueError: Exception if fields cannot be validated. 63 | """ 64 | if not isinstance(fields, Mapping): 65 | raise ValueError("Field set must be a mapping.") 66 | 67 | # Check keys. 68 | if not all(isinstance(i, str) for i in fields): 69 | raise ValueError("Field set must contain only string keys.") 70 | 71 | # Check values. 72 | for i in fields.values(): 73 | if i is None: 74 | continue 75 | 76 | if isinstance(i, bool) or not isinstance(i, (int, float)): 77 | raise ValueError( 78 | "Field set must contain only numeric values or None." 79 | ) 80 | 81 | return 82 | 83 | 84 | class Point: 85 | """Define the Point class. 86 | 87 | This is the only data type that TinyFlux handles directly. It is composed 88 | of a timestamp, measurement, tag-set, and field-set. 89 | 90 | Usage: 91 | >>> p = Point( 92 | time=datetime.now(timezone.utc), 93 | measurement="my measurement", 94 | fields={"my field": 123.45}, 95 | tags={"my tag key": "my tag value"} 96 | ) 97 | """ 98 | 99 | _none_str = "_none" 100 | default_measurement_name = "_default" 101 | _valid_kwargs = set(["time", "measurement", "tags", "fields"]) 102 | __slots__ = ("_time", "_measurement", "_tags", "_fields") 103 | 104 | _default_tag_key_prefix = "_tag_" 105 | _default_field_key_prefix = "_field_" 106 | _compact_tag_key_prefix = "t_" 107 | _compact_field_key_prefix = "f_" 108 | 109 | _time: Optional[datetime] 110 | _measurement: str 111 | _tags: TagSet 112 | _fields: FieldSet 113 | 114 | def __init__(self, *args: Any, **kwargs: Any) -> None: 115 | """Init a Point. 116 | 117 | Attributes: 118 | time: Timestamp. Defaults to time at instantiation. 119 | measurement: Measurement. Defaults to "_default". 120 | tags: Tag set. Defaults to empty set. 121 | fields: Field set. Defaults to empty set. 122 | """ 123 | # Test for args. 124 | if args: 125 | raise TypeError( 126 | "Point may contain keyword args for time, " 127 | "measurement, tags, and fields only." 128 | ) 129 | 130 | if kwargs: 131 | self._validate_kwargs(kwargs) 132 | 133 | self._time = kwargs.get("time", datetime.now(timezone.utc)) 134 | self._measurement = kwargs.get( 135 | "measurement", self.default_measurement_name 136 | ) 137 | self._tags = kwargs.get("tags", {}) 138 | self._fields = kwargs.get("fields", {}) 139 | else: 140 | self._time = None 141 | self._measurement = self.default_measurement_name 142 | self._tags = {} 143 | self._fields = {} 144 | 145 | @property 146 | def time(self) -> Optional[datetime]: 147 | """Get time.""" 148 | return self._time 149 | 150 | @time.setter 151 | def time(self, value: Any) -> None: 152 | """Set time.""" 153 | if not isinstance(value, datetime): 154 | raise ValueError("Time must be datetime object.") 155 | self._time = value 156 | 157 | @property 158 | def measurement(self) -> str: 159 | """Get measurement.""" 160 | return self._measurement 161 | 162 | @measurement.setter 163 | def measurement(self, value: Any) -> None: 164 | """Set measurement.""" 165 | if not isinstance(value, str): 166 | raise ValueError("Measurement must be a string.") 167 | self._measurement = value 168 | 169 | @property 170 | def tags(self) -> TagSet: 171 | """Get tags.""" 172 | return self._tags 173 | 174 | @tags.setter 175 | def tags(self, value: Any) -> None: 176 | """Set tags.""" 177 | validate_tags(value) 178 | self._tags = value 179 | 180 | @property 181 | def fields(self) -> FieldSet: 182 | """Get fields.""" 183 | return self._fields 184 | 185 | @fields.setter 186 | def fields(self, value: Any) -> None: 187 | """Get fields.""" 188 | validate_fields(value) 189 | self._fields = value 190 | 191 | def __eq__(self, other: Any) -> bool: 192 | """Define __eq__. 193 | 194 | Args: 195 | other: Another Point instance. 196 | 197 | Returns: 198 | All point attributes are equivalent. 199 | """ 200 | if isinstance(other, self.__class__): 201 | return ( 202 | self._time == other._time 203 | and self._measurement == other._measurement 204 | and self._tags == other._tags 205 | and self._fields == other._fields 206 | ) 207 | 208 | return False 209 | 210 | def __repr__(self) -> str: 211 | """Return printable representation of Point.""" 212 | repr_str = "Point(" 213 | 214 | # Add time. 215 | repr_str += f"time={self._time.isoformat() if self._time else 'None'}, " 216 | 217 | # Add measurement. 218 | repr_str += f"measurement={self._measurement}" 219 | 220 | # Add tags. 221 | if self._tags: 222 | tags_str = "; ".join(f"{k}:{str(v)}" for k, v in self._tags.items()) 223 | repr_str += f", tags={tags_str}" 224 | 225 | # Add fields. 226 | if self._fields: 227 | tags_str = "; ".join( 228 | f"{k}:{str(v)}" for k, v in self._fields.items() 229 | ) 230 | repr_str += f", fields={tags_str}" 231 | 232 | # Add the end. 233 | repr_str += ")" 234 | 235 | return repr_str 236 | 237 | def _deserialize_from_list(self, row: Sequence[str]) -> "Point": 238 | """Deserialize a python list of utf-8 strings to a Point. 239 | 240 | Args: 241 | row: A well-formed row of strings, representing a Point. 242 | 243 | Returns: 244 | A Point object. 245 | """ 246 | p_time = datetime.fromisoformat(row[0]).replace(tzinfo=timezone.utc) 247 | p_measurement = row[1] 248 | 249 | p_tags: TagSet = {} 250 | p_fields: FieldSet = {} 251 | 252 | row_len = len(row) 253 | i = 2 254 | 255 | # Check for tag key/values. 256 | while i < row_len: 257 | # Default tag key prefix is "_tag_" (most-used case). 258 | if row[i][1] == "t": 259 | t_key = row[i][len(self._default_tag_key_prefix) :] 260 | # Compact tag key prefix is "t_". 261 | elif row[i][0] == "t": 262 | t_key = row[i][len(self._compact_tag_key_prefix) :] 263 | # Otherwise, its a field -> continue. 264 | else: 265 | break 266 | 267 | t_value = None if row[i + 1] == self._none_str else str(row[i + 1]) 268 | p_tags[t_key] = t_value 269 | i += 2 270 | 271 | # Check for field key/values. 272 | while i < row_len: 273 | # Default field key prefix is "_field_" (most-used case). 274 | if row[i][1] == "f": 275 | f_key = row[i][len(self._default_field_key_prefix) :] 276 | # Compact field key prefix is "f_". 277 | else: 278 | f_key = row[i][len(self._compact_field_key_prefix) :] 279 | 280 | f_value = row[i + 1] 281 | 282 | # Value is an integer. 283 | if f_value.isdigit() or ( 284 | f_value[0] == "-" and f_value[1:].isdigit() 285 | ): 286 | p_fields[f_key] = int(f_value) 287 | i += 2 288 | continue 289 | 290 | # Value is a float. 291 | try: 292 | p_fields[f_key] = float(f_value) 293 | 294 | # Value is None. 295 | except Exception: 296 | p_fields[f_key] = None 297 | 298 | i += 2 299 | 300 | self._time = p_time 301 | self._measurement = p_measurement 302 | self._tags = p_tags 303 | self._fields = p_fields 304 | 305 | return self 306 | 307 | def _serialize_to_list( 308 | self, compact_key_prefixes: bool = False 309 | ) -> Sequence[str]: 310 | """Serialize a Point to a tuple of strings. 311 | 312 | Args: 313 | compact_key_prefixes: Use compact key prefixes. 314 | 315 | Returns: 316 | A well-formed tuple of strings, representing a Point. 317 | 318 | Usage: 319 | >>> sp = Point()._serialize_to_list() 320 | """ 321 | # Time. 322 | t = ( 323 | self._time.replace(tzinfo=None).isoformat() 324 | if self._time 325 | else self._none_str 326 | ) 327 | 328 | # Measurement. 329 | m = str(self._measurement or self._none_str) 330 | 331 | # Tags. 332 | tag_key_prefix = ( 333 | self._compact_tag_key_prefix 334 | if compact_key_prefixes 335 | else self._default_tag_key_prefix 336 | ) 337 | tags = ( 338 | ( 339 | f"{tag_key_prefix}{k}", 340 | self._none_str if v is None else str(v), 341 | ) 342 | for k, v in self._tags.items() 343 | ) 344 | 345 | # Fields. 346 | field_key_prefix = ( 347 | self._compact_field_key_prefix 348 | if compact_key_prefixes 349 | else self._default_field_key_prefix 350 | ) 351 | fields = ( 352 | ( 353 | f"{field_key_prefix}{k}", 354 | self._none_str if v is None else str(float(v)), 355 | ) 356 | for k, v in self._fields.items() 357 | ) 358 | 359 | # Flatten. 360 | row = ( 361 | t, 362 | m, 363 | *(i for p in tags for i in p), 364 | *(i for p in fields for i in p), 365 | ) 366 | 367 | return row 368 | 369 | def _validate_kwargs(self, kwargs: Any) -> None: 370 | """Validate args and kwargs. 371 | 372 | Helper function validates types of 'time' and 'measurement' arguments. 373 | 374 | Args: 375 | args: Reference to Point constructor args. 376 | kwargs: Reference to Point constructor kwargs. 377 | 378 | Raises: 379 | TypeError: Bad argument keyword. 380 | ValueError: Unexpected type encountered. 381 | """ 382 | # Test for bad kwargs. 383 | unexpected_kwargs = set(kwargs.keys()) - self._valid_kwargs 384 | 385 | if unexpected_kwargs: 386 | raise TypeError( 387 | f"Unexpected kwargs " 388 | f"{', '.join(sorted(list(unexpected_kwargs)))}" 389 | ) 390 | 391 | # Check time. 392 | if "time" in kwargs and not isinstance(kwargs["time"], datetime): 393 | raise ValueError("Time must be datetime object.") 394 | 395 | # Check measurement. 396 | if "measurement" in kwargs and not isinstance( 397 | kwargs["measurement"], str 398 | ): 399 | raise ValueError("Measurement must be a string.") 400 | 401 | # check 402 | if "tags" in kwargs: 403 | validate_tags(kwargs["tags"]) 404 | 405 | # Check fields. 406 | if "fields" in kwargs: 407 | validate_fields(kwargs["fields"]) 408 | 409 | return 410 | -------------------------------------------------------------------------------- /tinyflux/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/citrusvanilla/tinyflux/42c221441ba0f586deb77eceb1718e2a231d4097/tinyflux/py.typed -------------------------------------------------------------------------------- /tinyflux/storages.py: -------------------------------------------------------------------------------- 1 | """Definition of TinyFlux storages classes. 2 | 3 | Storage defines an abstract base case using the built-in ABC of python. This 4 | class defines the requires abstract methods of read, write, and append, as well 5 | as getters and setters for attributes required to reindex the data. 6 | 7 | A storage object will manage data with a file handle, or in memory. 8 | 9 | A storage class is provided to the TinyFlux facade as an initial argument. The 10 | TinyFlux instance will manage the lifecycle of the storage instance. 11 | 12 | Usage: 13 | >>> my_mem_db = TinyFlux(storage=MemoryStorage) 14 | >>> my_csv_db = TinyFlux('path/to/my.csv', storage=CSVStorage) 15 | """ 16 | 17 | from abc import ABC, abstractmethod 18 | import csv 19 | from datetime import datetime 20 | import os 21 | from pathlib import Path 22 | import shutil 23 | from tempfile import NamedTemporaryFile 24 | 25 | from typing import Any, Iterator, List, Optional, Sequence, Union 26 | import _csv 27 | 28 | from .point import Point 29 | 30 | MemStorageItem = Point 31 | CSVStorageItem = Sequence[str] 32 | 33 | 34 | def create_file(path: Union[str, Path], create_dirs: bool) -> None: 35 | """Create a file if it doesn't exist yet. 36 | 37 | Args: 38 | path: The file to create. 39 | create_dirs: Whether to create all missing parent directories. 40 | """ 41 | if create_dirs: 42 | base_dir = os.path.dirname(path) 43 | 44 | # Check if we need to create missing parent directories 45 | if not os.path.exists(base_dir): 46 | os.makedirs(base_dir) 47 | 48 | # Create the file by opening it in 'a' mode which creates the file if it 49 | # does not exist yet but does not modify its contents 50 | with open(path, "a"): 51 | pass 52 | 53 | return 54 | 55 | 56 | class Storage(ABC): # pragma: no cover 57 | """The abstract base class for all storage types for TinyFlux. 58 | 59 | Defines an extensible, static interface with required read/write ops and 60 | index-related getter/setters. 61 | 62 | Custom storage classes should inherit like so: 63 | >>> from tinyflux import Storage 64 | >>> class MyStorageClass(Storage): 65 | ... 66 | """ 67 | 68 | _initially_empty: bool 69 | 70 | @property 71 | def can_append(self) -> bool: 72 | """Can append to DB.""" 73 | return True 74 | 75 | @property 76 | def can_read(self) -> bool: 77 | """Can read the DB.""" 78 | return True 79 | 80 | @property 81 | def can_write(self) -> bool: 82 | """Can write to DB.""" 83 | return True 84 | 85 | @abstractmethod 86 | def __iter__(self) -> Iterator[Union[MemStorageItem, CSVStorageItem]]: 87 | """Return a generator for items in storage.""" 88 | ... 89 | 90 | @abstractmethod 91 | def __len__(self) -> int: 92 | """Return the number of items.""" 93 | ... 94 | 95 | @abstractmethod 96 | def append(self, points: List[Any], temporary: bool = False) -> None: 97 | """Append points to the store. 98 | 99 | Args: 100 | points: A list of Point objects. 101 | temporary: Whether or not to append to temporary storage. 102 | """ 103 | ... 104 | 105 | def close(self) -> None: 106 | """Perform clean up ops.""" 107 | ... 108 | 109 | @abstractmethod 110 | def read(self) -> List[Point]: 111 | """Read from the store. 112 | 113 | Re-ordering the data after a read provides TinyFlux with the ability to 114 | build an index. 115 | 116 | Args: 117 | reindex_on_read: Reorder the store after data is read. 118 | 119 | Returns: 120 | A list of Points. 121 | """ 122 | return list(self._deserialize_storage_item(i) for i in iter(self)) 123 | 124 | @abstractmethod 125 | def reset(self) -> None: 126 | """Reset the storage instance. 127 | 128 | Removes all data. 129 | """ 130 | ... 131 | 132 | @abstractmethod 133 | def _deserialize_measurement(self, item: Any) -> str: 134 | """Deserialize an item from storage to a measurement.""" 135 | ... 136 | 137 | @abstractmethod 138 | def _deserialize_timestamp(self, item: Any) -> datetime: 139 | """Deserialize an item from storage to a timestamp.""" 140 | ... 141 | 142 | @abstractmethod 143 | def _deserialize_storage_item(self, item: Any) -> Point: 144 | """Deserialize an item from storage to a Point.""" 145 | ... 146 | 147 | @abstractmethod 148 | def _serialize_point(self, point: Point, *args: Any, **kwargs: Any) -> Any: 149 | """Serialize a point to an item for storage.""" 150 | ... 151 | 152 | @abstractmethod 153 | def _swap_temp_with_primary(self) -> None: 154 | """Swap primary data store with temporary data store.""" 155 | ... 156 | 157 | @abstractmethod 158 | def _write(self, items: List[Any]) -> None: 159 | """Write to the store. 160 | 161 | This function should overwrite the entire file. 162 | 163 | Args: 164 | points: A list of Point objects. 165 | temporary: Whether or not to write to temporary storage. 166 | """ 167 | ... 168 | 169 | 170 | class CSVStorage(Storage): 171 | """Define the default storage instance for TinyFlux, a CSV store. 172 | 173 | CSV provides append-only writes, which is efficient for high-frequency 174 | writes, common to time-series datasets. 175 | 176 | Usage: 177 | >>> from tinyflux import CSVStorage 178 | >>> db = TinyFlux("my_csv_store.csv", storage=CSVStorage) 179 | """ 180 | 181 | _timestamp_idx = 0 182 | _measurement_idx = 1 183 | 184 | def __init__( 185 | self, 186 | path: Union[str, Path], 187 | create_dirs: bool = False, 188 | encoding: Optional[str] = None, 189 | access_mode: str = "r+", 190 | flush_on_insert: bool = True, 191 | newline: Optional[str] = "", 192 | **kwargs: Any, 193 | ) -> None: 194 | """Init a CSVStorage instance. 195 | 196 | This will init a file object to the specified filepath. No reads are 197 | performed by default, so we don't know if data is present and 198 | therefore, the _initially_empty attribute is set to False. 199 | 200 | Args: 201 | path: Path to file. 202 | create_dirs: Create parent subdirectories. 203 | encoding: File encoding. 204 | access_mode: File access mode. 205 | flush_on_insert: Whether or not to flush IO buffer immediately. 206 | newline: Determines how to parse newline characters from the stream 207 | """ 208 | super().__init__() 209 | self._encoding = encoding 210 | self._mode = access_mode 211 | self.kwargs = kwargs 212 | self._latest_time = None 213 | self._initially_empty = False 214 | self._path = path 215 | self._flush_on_insert = flush_on_insert 216 | self._newline = newline 217 | 218 | # Create the file if it doesn't exist and creating is allowed. 219 | if any(i in self._mode for i in ("+", "w", "a")): 220 | create_file(path, create_dirs=create_dirs) 221 | 222 | # Open the file for reading/writing 223 | self._handle = open( 224 | path, mode=self._mode, encoding=encoding, newline=self._newline 225 | ) 226 | 227 | # Open a tempfile. 228 | self._temp_handle: Optional[Any] = None 229 | 230 | # Check if there is already data in the file. 231 | self._check_for_existing_data() 232 | 233 | @property 234 | def can_append(self) -> bool: 235 | """Return whether or not appends can occur.""" 236 | if self._mode not in ("r+", "w", "w+", "a", "a+"): 237 | raise IOError( 238 | f'Cannot update the database. Access mode is "{self._mode}"' 239 | ) 240 | 241 | return True 242 | 243 | @property 244 | def can_read(self) -> bool: 245 | """Return whether or not reads can occur.""" 246 | if self._mode not in ("r+", "r", "w+", "a+"): 247 | raise IOError( 248 | f'Cannot update the database. Access mode is "{self._mode}"' 249 | ) 250 | 251 | return True 252 | 253 | @property 254 | def can_write(self) -> bool: 255 | """Return whether or not writes can occur.""" 256 | if self._mode not in ("r+", "w", "w+"): 257 | raise IOError( 258 | f'Cannot update the database. Access mode is "{self._mode}"' 259 | ) 260 | 261 | return True 262 | 263 | def __iter__(self) -> _csv.reader: # type: ignore 264 | """Return a CSV reader object that can be iterated over.""" 265 | self._handle.seek(0) 266 | 267 | return csv.reader(self._handle, **self.kwargs) 268 | 269 | def __len__(self) -> int: 270 | """Return the number of items.""" 271 | self._handle.seek(0) 272 | 273 | return sum(1 for _ in self._handle) 274 | 275 | def append( 276 | self, items: List[CSVStorageItem], temporary: bool = False 277 | ) -> None: 278 | """Append points to the CSV store. 279 | 280 | Args: 281 | items: A list of objects. 282 | temporary: Whether or not to append to temporary storage. 283 | """ 284 | # Switch on temporary arg. 285 | if temporary: 286 | if not self._temp_handle: 287 | raise IOError 288 | else: 289 | handle = self._temp_handle 290 | else: 291 | handle = self._handle 292 | 293 | handle.seek(0, os.SEEK_END) 294 | 295 | csv_writer = csv.writer(handle, **self.kwargs) 296 | 297 | # Iterate over the points. 298 | for item in items: 299 | # Write the row. 300 | csv_writer.writerow(item) 301 | 302 | if self._flush_on_insert: 303 | # Ensure the file has been written. 304 | handle.flush() 305 | os.fsync(handle.fileno()) 306 | 307 | # Remove data that is behind the new cursor. 308 | handle.truncate() 309 | 310 | return 311 | 312 | def close(self) -> None: 313 | """Clean up data store. 314 | 315 | Closes the file object. 316 | """ 317 | self._handle.close() 318 | 319 | return 320 | 321 | def read(self) -> List[Point]: 322 | """Read all items from the storage into memory. 323 | 324 | Returns: 325 | A list of Point objects. 326 | """ 327 | return super().read() 328 | 329 | def reset(self) -> None: 330 | """Reset the storage instance. 331 | 332 | Removes all data. 333 | """ 334 | self._write([]) 335 | 336 | return 337 | 338 | def _check_for_existing_data(self) -> None: 339 | """Check the file for existing data, w/o reading data into memory.""" 340 | self._handle.seek(0, os.SEEK_END) 341 | size = self._handle.tell() 342 | 343 | # If the file is empty, flip index_intact to True. 344 | if not size: 345 | self._initially_empty = True 346 | 347 | return 348 | 349 | def _cleanup_temp_storage(self) -> None: 350 | """Clean up temporary storage.""" 351 | if self._temp_handle is not None: 352 | self._temp_handle.close() 353 | self._temp_handle = None 354 | 355 | return 356 | 357 | def _deserialize_measurement(self, row: CSVStorageItem) -> str: 358 | """Deserialize measurement from a row.""" 359 | return row[self._measurement_idx] 360 | 361 | def _deserialize_storage_item(self, row: CSVStorageItem) -> Point: 362 | """Deserialize a row from storage to a Point.""" 363 | return Point()._deserialize_from_list(row) 364 | 365 | def _deserialize_timestamp(self, row: CSVStorageItem) -> datetime: 366 | """Deserialize timestamp from a row.""" 367 | return datetime.fromisoformat(row[self._timestamp_idx]) 368 | 369 | def _init_temp_storage(self) -> None: 370 | """Initialize temporary storage.""" 371 | self._temp_handle = NamedTemporaryFile("w+t", newline="", delete=False) 372 | 373 | return 374 | 375 | def _serialize_point( 376 | self, point: Point, *args: Any, **kwargs: Any 377 | ) -> Sequence[Union[str, float, int]]: 378 | """Serialize a point to an item for storage.""" 379 | return point._serialize_to_list( 380 | compact_key_prefixes=kwargs.pop("compact_key_prefixes", False) 381 | ) 382 | 383 | def _swap_temp_with_primary(self) -> None: 384 | """Swap primary data store with temporary data store.""" 385 | if self._temp_handle is not None: 386 | # Close the primary storage file object. 387 | self._handle.close() 388 | 389 | # Copy auxiliary storage to primary location. 390 | shutil.copy(self._temp_handle.name, self._path) 391 | 392 | # Init a new file object with the initial handle reference. 393 | self._handle = open( 394 | self._path, 395 | mode=self._mode, 396 | encoding=self._encoding, 397 | newline=self._newline, 398 | ) 399 | 400 | return 401 | 402 | def _write(self, items: List[CSVStorageItem]) -> None: 403 | """Write Points to the CSV file. 404 | 405 | Checks each point to see if the index is intact. 406 | 407 | Write overwrites all content in the CSV. For appending, see the 408 | 'append' method. 409 | 410 | Args: 411 | items: A list of items to write. 412 | temporary: Whether or not to write to temporary storage. 413 | """ 414 | handle = self._handle 415 | 416 | # Dump the existing contents. 417 | handle.seek(0) 418 | handle.truncate() 419 | 420 | if items: 421 | # Write the serialized data to the file 422 | w = csv.writer(handle, **self.kwargs) 423 | w.writerows(items) 424 | 425 | # Ensure the file has been written. 426 | handle.flush() 427 | os.fsync(handle.fileno()) 428 | 429 | # Remove data that is behind the new cursor in case the file has 430 | # gotten shorter 431 | handle.truncate() 432 | 433 | return 434 | 435 | 436 | class MemoryStorage(Storage): 437 | """Define the in-memory storage instance for TinyFlux. 438 | 439 | Memory is cleaned up along with the parent process. 440 | 441 | Attributes: 442 | _initially_empty: No data in the storage instance. 443 | _memory: List of Points. 444 | _temp_memory: List of Points. 445 | 446 | Usage: 447 | >>> from tinyflux import MemoryStorage 448 | >>> db = TinyFlux(storage=MemoryStorage) 449 | """ 450 | 451 | _initially_empty: bool 452 | _memory: List[MemStorageItem] 453 | _temp_memory: List[MemStorageItem] 454 | 455 | def __init__(self) -> None: 456 | """Init a MemoryStorage instance.""" 457 | super().__init__() 458 | self._initially_empty = True 459 | self._memory = [] 460 | self._temp_memory: List[MemStorageItem] = [] 461 | 462 | def __iter__(self) -> Iterator[Point]: 463 | """Return a generator to memory that can be iterated over.""" 464 | for point in self._memory: 465 | yield point 466 | 467 | def __len__(self) -> int: 468 | """Return the number of items.""" 469 | return len(self._memory) 470 | 471 | def append( 472 | self, items: List[MemStorageItem], temporary: bool = False 473 | ) -> None: 474 | """Append points to the memory. 475 | 476 | Args: 477 | points: A list of Point objects. 478 | temporary: Whether or not to append to temporary storage. 479 | """ 480 | for item in items: 481 | if temporary: 482 | self._temp_memory.append(item) 483 | else: 484 | self._memory.append(item) 485 | 486 | return 487 | 488 | def read(self) -> List[Point]: 489 | """Read data from the store. 490 | 491 | Returns: 492 | A list of Point objects. 493 | """ 494 | return super().read() 495 | 496 | def reset(self) -> None: 497 | """Reset the storage instance. 498 | 499 | Removes all data. 500 | """ 501 | self._write([]) 502 | 503 | return 504 | 505 | def _cleanup_temp_storage(self) -> None: 506 | """Clean up temporary storage.""" 507 | del self._temp_memory 508 | self._temp_memory = [] 509 | 510 | return 511 | 512 | def _deserialize_measurement(self, item: MemStorageItem) -> str: 513 | """Deserialize measurement from a point.""" 514 | return item.measurement 515 | 516 | def _deserialize_storage_item(self, item: MemStorageItem) -> Point: 517 | """Deserialize a row from memory to a Point.""" 518 | return item 519 | 520 | def _deserialize_timestamp(self, item: MemStorageItem) -> datetime: 521 | """Deserialize timestamp from a point.""" 522 | if not item.time: # pragma: no cover 523 | raise ValueError 524 | 525 | return item.time 526 | 527 | def _init_temp_storage(self) -> None: 528 | """Initialize temporary storage.""" 529 | self._temp_memory = [] 530 | 531 | def _serialize_point( 532 | self, point: Point, *args: Any, **kwargs: Any 533 | ) -> MemStorageItem: 534 | """Serialize a point to an item for storage.""" 535 | return point 536 | 537 | def _swap_temp_with_primary(self) -> None: 538 | """Swap primary data store with temporary data store.""" 539 | self._memory = self._temp_memory 540 | 541 | return 542 | 543 | def _write(self, items: List[MemStorageItem]) -> None: 544 | """Write Points to memory. 545 | 546 | Checks each point to see if the index is intact. 547 | 548 | Write overwrites all content in memory. For appending, see the 549 | 'append' method. 550 | 551 | Args: 552 | items: A list of Point objects to serialize and write. 553 | temporary: Whether or not to write to temporary storage. 554 | """ 555 | del self._memory 556 | self._memory = items 557 | 558 | return 559 | -------------------------------------------------------------------------------- /tinyflux/utils.py: -------------------------------------------------------------------------------- 1 | """Definition of TinyFlux utils.""" 2 | 3 | import bisect 4 | from typing import Any, List, Optional 5 | 6 | 7 | class FrozenDict(dict): 8 | """ 9 | An immutable dictionary. 10 | 11 | This is used to generate stable hashes for queries that contain dicts. 12 | Usually, Python dicts are not hashable because they are mutable. This 13 | class removes the mutability and implements the ``__hash__`` method. 14 | 15 | From TinyDB. 16 | """ 17 | 18 | def __hash__(self) -> int: # type: ignore 19 | """Hash the value of a FrozenDict instance.""" 20 | # Calculate the has by hashing a tuple of all dict items 21 | return hash(tuple(sorted(self.items()))) 22 | 23 | def _immutable(self, *args: Any, **kwargs: Any) -> None: 24 | """Raise a TypeError for a given dict method.""" 25 | raise TypeError("object is immutable") 26 | 27 | # Disable write access to the dict 28 | __setitem__ = _immutable 29 | __delitem__ = _immutable 30 | clear = _immutable 31 | popitem = _immutable # type: ignore 32 | 33 | def update(self, *args: Any, **kwargs: Any) -> None: 34 | """Raise TypeError for update.""" 35 | raise TypeError("object is immutable") 36 | 37 | def pop(self, k: Any, d: Optional[Any] = None) -> None: 38 | """Raise TypeError for pop.""" 39 | raise TypeError("object is immutable") 40 | 41 | 42 | def freeze(obj: object) -> object: 43 | """Freeze an object by making it immutable and thus hashable. 44 | 45 | Args: 46 | obj: Any python object. 47 | 48 | Returns: 49 | The object in a hashable form. 50 | """ 51 | if isinstance(obj, dict): 52 | return FrozenDict((k, freeze(v)) for k, v in obj.items()) 53 | elif isinstance(obj, list): 54 | return tuple(freeze(i) for i in obj) 55 | elif isinstance(obj, set): 56 | return frozenset(obj) 57 | else: 58 | return obj 59 | 60 | 61 | def find_eq(sorted_list: List[Any], x: Any) -> Optional[int]: 62 | """Locate the leftmost value exactly equal to x. 63 | 64 | Args: 65 | sorted_list: The list to search. 66 | x: The element to search. 67 | 68 | Returns: 69 | The index of the found element or None. 70 | """ 71 | i = bisect.bisect_left(sorted_list, x) 72 | 73 | if i != len(sorted_list) and sorted_list[i] == x: 74 | return i 75 | 76 | return None 77 | 78 | 79 | def find_lt(sorted_list: List[Any], x: Any) -> Optional[int]: 80 | """Find rightmost value less than x. 81 | 82 | Args: 83 | sorted_list: The list to search. 84 | x: The element to search. 85 | 86 | Returns: 87 | The index of the found element or None. 88 | """ 89 | i = bisect.bisect_left(sorted_list, x) 90 | 91 | if i: 92 | return i - 1 93 | 94 | return None 95 | 96 | 97 | def find_le(sorted_list: List[Any], x: Any) -> Optional[int]: 98 | """Find rightmost value less than or equal to x. 99 | 100 | Args: 101 | sorted_list: The list to search. 102 | x: The element to search. 103 | 104 | Returns: 105 | The index of the found element or None. 106 | """ 107 | i = bisect.bisect_right(sorted_list, x) 108 | 109 | if i: 110 | return i - 1 111 | 112 | return None 113 | 114 | 115 | def find_gt(sorted_list: List[Any], x: Any) -> Optional[int]: 116 | """Find leftmost value greater than x. 117 | 118 | Args: 119 | sorted_list: The list to search. 120 | x: The element to search. 121 | 122 | Returns: 123 | The index of the found element or None. 124 | """ 125 | i = bisect.bisect_right(sorted_list, x) 126 | 127 | if i != len(sorted_list): 128 | return i 129 | 130 | return None 131 | 132 | 133 | def find_ge(sorted_list: List[Any], x: Any) -> Optional[int]: 134 | """Find leftmost item greater than or equal to x. 135 | 136 | Args: 137 | sorted_list: The list to search. 138 | x: The element to search. 139 | 140 | Returns: 141 | The index of the found element or None. 142 | """ 143 | i = bisect.bisect_left(sorted_list, x) 144 | 145 | if i != len(sorted_list): 146 | return i 147 | 148 | return None 149 | -------------------------------------------------------------------------------- /tinyflux/version.py: -------------------------------------------------------------------------------- 1 | """Version.""" 2 | 3 | __version__ = "1.0.0" # pragma: no cover 4 | --------------------------------------------------------------------------------