├── .coveragerc ├── .flake8 ├── .github ├── ISSUE_TEMPLATE │ └── bug_report.md └── workflows │ ├── actions.yml │ └── test.yml ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.rst ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.rst ├── docs ├── Makefile ├── _static │ ├── css │ │ └── custom.css │ └── mymarilyn-icon.png ├── _templates │ └── layout.html ├── api.rst ├── changelog.rst ├── conf.py ├── contents.rst.inc ├── contributing.rst ├── dbapi.rst ├── development.rst ├── features.rst ├── index.rst ├── installation.rst ├── license.rst ├── misc.rst ├── performance.rst ├── quickstart.rst ├── types.rst └── unsupportedserverversions.rst ├── example ├── bytewax │ ├── .dockerignore │ ├── Dockerfile │ ├── README.md │ ├── compose.yaml │ ├── grafana_provisioning │ │ ├── dashboards │ │ │ ├── grafana_dashboard.json │ │ │ └── main.yaml │ │ └── datasources │ │ │ └── automatic.yml │ ├── hackernews.py │ ├── init_sql │ │ └── init.sql │ ├── proton.py │ └── requirements.txt ├── descriptive_pipeline │ ├── Dockerfile │ ├── Makefile │ ├── README.md │ ├── config.yaml │ ├── requirements.txt │ ├── server │ │ ├── main.py │ │ └── utils │ │ │ └── logging.py │ └── test │ │ ├── script.js │ │ └── script_ws.js ├── idempotent │ └── idempotent.py ├── pandas │ └── dataframe.py └── streaming_query │ ├── car.py │ └── server monitor.py ├── proton_driver ├── __init__.py ├── block.py ├── blockstreamprofileinfo.py ├── bufferedreader.c ├── bufferedreader.pyx ├── bufferedwriter.c ├── bufferedwriter.pyx ├── client.py ├── clientinfo.py ├── columns │ ├── __init__.py │ ├── arraycolumn.py │ ├── base.py │ ├── boolcolumn.py │ ├── datecolumn.py │ ├── datetimecolumn.py │ ├── decimalcolumn.py │ ├── enumcolumn.py │ ├── exceptions.py │ ├── floatcolumn.py │ ├── intcolumn.py │ ├── intervalcolumn.py │ ├── ipcolumn.py │ ├── jsoncolumn.py │ ├── largeint.c │ ├── largeint.pyx │ ├── lowcardinalitycolumn.py │ ├── mapcolumn.py │ ├── nestedcolumn.py │ ├── nothingcolumn.py │ ├── nullablecolumn.py │ ├── nullcolumn.py │ ├── numpy │ │ ├── __init__.py │ │ ├── base.py │ │ ├── datecolumn.py │ │ ├── datetimecolumn.py │ │ ├── floatcolumn.py │ │ ├── intcolumn.py │ │ ├── lowcardinalitycolumn.py │ │ ├── service.py │ │ └── stringcolumn.py │ ├── service.py │ ├── simpleaggregatefunctioncolumn.py │ ├── stringcolumn.py │ ├── tuplecolumn.py │ ├── util.py │ └── uuidcolumn.py ├── compression │ ├── __init__.py │ ├── base.py │ ├── lz4.py │ ├── lz4hc.py │ └── zstd.py ├── connection.py ├── context.py ├── dbapi │ ├── __init__.py │ ├── connection.py │ ├── cursor.py │ ├── errors.py │ └── extras.py ├── defines.py ├── errors.py ├── log.py ├── numpy │ ├── __init__.py │ ├── block.py │ ├── helpers.py │ └── result.py ├── opentelemetry.py ├── progress.py ├── protocol.py ├── queryprocessingstage.py ├── reader.py ├── readhelpers.py ├── result.py ├── settings │ ├── __init__.py │ ├── available.py │ ├── types.py │ └── writer.py ├── streams │ ├── __init__.py │ ├── compressed.py │ └── native.py ├── util │ ├── __init__.py │ ├── compat.py │ ├── escape.py │ └── helpers.py ├── varint.c ├── varint.pyx └── writer.py ├── pyproject.toml ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── columns │ ├── __init__.py │ ├── test_array.py │ ├── test_bool.py │ ├── test_common.py │ ├── test_date.py │ ├── test_datetime.py │ ├── test_decimal.py │ ├── test_enum.py │ ├── test_fixedstring.py │ ├── test_float.py │ ├── test_int.py │ ├── test_interval.py │ ├── test_ip.py │ ├── test_json.py │ ├── test_low_cardinality.py │ ├── test_map.py │ ├── test_nested.py │ ├── test_null.py │ ├── test_nullable.py │ ├── test_simpleaggregatefunction.py │ ├── test_string.py │ ├── test_tuple.py │ ├── test_unknown.py │ └── test_uuid.py ├── conftest.py ├── docker-compose.yml ├── log.py ├── numpy │ ├── __init__.py │ ├── columns │ │ ├── __init__.py │ │ ├── test_datetime.py │ │ ├── test_float.py │ │ ├── test_int.py │ │ ├── test_low_cardinality.py │ │ ├── test_nullable.py │ │ ├── test_other.py │ │ └── test_string.py │ ├── test_external_tables.py │ ├── test_generic.py │ ├── testcase.py │ └── util.py ├── test_blocks.py ├── test_buffered_reader.py ├── test_client.py ├── test_compression.py ├── test_connect.py ├── test_dbapi.py ├── test_errors.py ├── test_external_tables.py ├── test_insert.py ├── test_opentelemetry.py ├── test_query_info.py ├── test_settings.py ├── test_substitution.py ├── test_varint.py ├── testcase.py └── util.py ├── testsrequire.py └── valgrind.supp /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | plugins = Cython.Coverage 3 | source = proton_driver 4 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | filename = *.py, *.pyx 3 | per-file-ignores = 4 | proton_driver/columns/largeint.pyx: E225, E226, E227, E999 5 | proton_driver/bufferedreader.pyx: E225, E226, E227, E999 6 | proton_driver/bufferedwriter.pyx: E225, E226, E227, E999 7 | proton_driver/varint.pyx: E225, E226, E227, E999 8 | # ignore example print warning. 9 | example/*: T201, T001 10 | exclude = venv,.conda,build 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Minimal piece of Python code that reproduces the problem. 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Versions** 20 | 21 | - Version of package with the problem. 22 | - Proton server version. Version can be obtained by running `SELECT version()` query. 23 | - Python version. 24 | -------------------------------------------------------------------------------- /.github/workflows/actions.yml: -------------------------------------------------------------------------------- 1 | on: 2 | - workflow_dispatch 3 | name: release 4 | jobs: 5 | build_wheels: 6 | name: Build wheels on ${{ matrix.os }} 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | matrix: 10 | os: [ ubuntu-20.04 , windows-2019, macos-12 ] 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Get proton-python-driver tag 14 | id: get_tag_name 15 | if: ${{ !startsWith( matrix.os, 'windows' ) }} 16 | run: | 17 | VERSION=`grep '^VERSION' proton_driver/__init__.py \ 18 | | sed 's/^VERSION = (//g' \ 19 | | sed 's/).*//g' \ 20 | | sed 's/, /./g'` 21 | VERSION=v$VERSION 22 | echo $VERSION 23 | echo "tag_name=$VERSION" >> $GITHUB_OUTPUT 24 | - name: Get proton-python-driver tag(windows) 25 | if: ${{ startsWith( matrix.os, 'windows' ) }} 26 | id: get_tag_name_win 27 | shell: pwsh 28 | run: | 29 | $VERSION=((Get-Content proton_driver/__init__.py | Select-String -Pattern '^VERSION') -replace "^VERSION = \((\d+), (\d+), (\d+)\)","v`$1.`$2.`$3") 30 | Write-Output $VERSION 31 | Write-Output "tag_name=$VERSION" >> $env:GITHUB_OUTPUT 32 | - name: Set up QEMU 33 | uses: docker/setup-qemu-action@v3 34 | if: ${{ startsWith( matrix.os, 'ubuntu' ) }} 35 | with: 36 | image: tonistiigi/binfmt:latest 37 | platforms: all 38 | - name: Build wheels 39 | uses: pypa/cibuildwheel@v2.21.3 40 | with: 41 | package-dir: . 42 | output-dir: wheelhouse 43 | config-file: pyproject.toml 44 | - name: Release wheels 45 | uses: softprops/action-gh-release@v2 46 | with: 47 | files: wheelhouse/*.whl 48 | generate_release_notes: true 49 | tag_name: ${{ join(steps.*.outputs.tag_name, '') }} 50 | env: 51 | GITHUB_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }} 52 | - name: Store the distribution packages 53 | uses: actions/upload-artifact@v3 54 | with: 55 | name: python-package-distributions 56 | path: wheelhouse/*.whl 57 | 58 | publish-to-pypi: 59 | name: Publish Python distribution to PyPI 60 | needs: 61 | - build_wheels 62 | runs-on: ubuntu-latest 63 | environment: 64 | name: pypi 65 | url: https://pypi.org/p/proton-driver 66 | permissions: 67 | id-token: write # IMPORTANT: mandatory for trusted publishing 68 | steps: 69 | - name: Download all the dists 70 | uses: actions/download-artifact@v3 71 | with: 72 | name: python-package-distributions 73 | path: dist/ 74 | - name: Publish distribution to PyPI 75 | uses: pypa/gh-action-pypi-publish@release/v1 76 | with: 77 | username: __token__ 78 | password: ${{ secrets.PYPI_TOKEN }} 79 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *,cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # IPython Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # dotenv 81 | .env 82 | 83 | # virtualenv 84 | venv/ 85 | ENV/ 86 | 87 | # Spyder project settings 88 | .spyderproject 89 | 90 | # Rope project settings 91 | .ropeproject 92 | 93 | # PyCharm project settings 94 | .idea/ 95 | .pypirc 96 | .vscode/ 97 | .DS_Store 98 | wheelhouse/ 99 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | How to Contribute 2 | ================= 3 | 4 | #. Check for open issues or open a fresh issue to start a discussion around a feature idea or a bug. 5 | #. Fork `the repository `_ on GitHub to start making your changes to the **master** branch (or branch off of it). 6 | #. Write a test which shows that the bug was fixed or that the feature works as expected. 7 | #. Send a pull request and bug the maintainer until it gets merged and published. 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is the MIT license: http://www.opensource.org/licenses/mit-license.php 2 | 3 | Copyright (c) 2017 by Konstantin Lebedev. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include proton_driver *.pyx 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | build: 3 | python3 -m pip install --upgrade build 4 | python3 -m build 5 | 6 | test: 7 | pip install ./dist/timeplus-proton-driver-0.2.7.tar.gz -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = . 8 | BUILDDIR = _build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | 2 | table.table-small-text { 3 | font-size: small; 4 | } 5 | 6 | 7 | table.table-center-header thead tr th { 8 | text-align: center; 9 | } 10 | 11 | 12 | table.table-right-text-align-results tbody tr td { 13 | text-align: right; 14 | } 15 | 16 | table.table-right-text-align-results tbody tr td:first-child { 17 | text-align: inherit; 18 | } 19 | -------------------------------------------------------------------------------- /docs/_static/mymarilyn-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/docs/_static/mymarilyn-icon.png -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | {% block extrahead %} 3 | 4 | {% endblock %} 5 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | .. _api: 2 | 3 | API 4 | === 5 | 6 | .. module:: clickhouse_driver 7 | 8 | This part of the documentation covers basic classes of the driver: Client, Connection and others. 9 | 10 | 11 | .. _api-client: 12 | 13 | Client 14 | ------ 15 | 16 | .. autoclass:: Client 17 | :members: 18 | :inherited-members: 19 | 20 | 21 | Connection 22 | ---------- 23 | 24 | .. autoclass:: clickhouse_driver.connection.Connection 25 | :members: 26 | :inherited-members: 27 | 28 | 29 | .. _query-result: 30 | 31 | QueryResult 32 | ----------- 33 | 34 | .. autoclass:: clickhouse_driver.result.QueryResult 35 | :members: 36 | :inherited-members: 37 | 38 | 39 | .. _progress-query-result: 40 | 41 | ProgressQueryResult 42 | ------------------- 43 | 44 | .. autoclass:: clickhouse_driver.result.ProgressQueryResult 45 | :members: 46 | :inherited-members: 47 | 48 | 49 | .. _iter-query-result: 50 | 51 | IterQueryResult 52 | --------------- 53 | 54 | .. autoclass:: clickhouse_driver.result.IterQueryResult 55 | :members: 56 | :inherited-members: 57 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | Changelog is available in `github repo `_. 5 | -------------------------------------------------------------------------------- /docs/contents.rst.inc: -------------------------------------------------------------------------------- 1 | User's Guide 2 | ------------ 3 | 4 | This part of the documentation focuses on step-by-step instructions for development with clickhouse-driver. 5 | 6 | Clickhouse-driver is designed to communicate with ClickHouse server from Python over native protocol. 7 | 8 | ClickHouse server provides two protocols for communication: 9 | 10 | * HTTP protocol (port 8123 by default); 11 | * Native (TCP) protocol (port 9000 by default). 12 | 13 | Each protocol has own advantages and disadvantages. Here we focus on advantages of native protocol: 14 | 15 | * Native protocol is more configurable by various settings. 16 | * Binary data transfer is more compact than text data. 17 | * Building python types from binary data is more effective than from text data. 18 | * LZ4 compression is `faster than gzip `_. 19 | Gzip compression is used in HTTP protocol. 20 | * Query profile info is available over native protocol. We can read rows before limit metric for example. 21 | 22 | Once again: clickhouse-driver uses native protocol (port 9000). 23 | 24 | There is an asynchronous wrapper for clickhouse-driver: aioch. It's available `here `_. 25 | 26 | .. toctree:: 27 | :maxdepth: 2 28 | 29 | installation 30 | quickstart 31 | features 32 | types 33 | performance 34 | misc 35 | unsupportedserverversions 36 | 37 | API Reference 38 | ------------- 39 | 40 | If you are looking for information on a specific function, class or 41 | method, this part of the documentation is for you. 42 | 43 | .. toctree:: 44 | :maxdepth: 2 45 | 46 | api 47 | dbapi 48 | 49 | Additional Notes 50 | ---------------- 51 | 52 | Legal information, changelog and contributing are here for the interested. 53 | 54 | .. toctree:: 55 | :maxdepth: 2 56 | 57 | development 58 | changelog 59 | license 60 | contributing 61 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CONTRIBUTING.rst 2 | -------------------------------------------------------------------------------- /docs/dbapi.rst: -------------------------------------------------------------------------------- 1 | .. _dbapi: 2 | 3 | DB API 2.0 4 | ========== 5 | 6 | This part of the documentation covers driver DB API. 7 | 8 | .. automodule:: clickhouse_driver.dbapi 9 | :members: 10 | :inherited-members: 11 | 12 | .. _dbapi-connection: 13 | 14 | Connection 15 | ---------- 16 | 17 | .. autoclass:: clickhouse_driver.dbapi.connection.Connection 18 | :members: 19 | :inherited-members: 20 | 21 | 22 | .. _dbapi-cursor: 23 | 24 | Cursor 25 | ------ 26 | 27 | .. autoclass:: clickhouse_driver.dbapi.cursor.Cursor 28 | :members: 29 | :inherited-members: 30 | 31 | Extras 32 | ------ 33 | 34 | .. _dbapi-extras: 35 | 36 | .. autoclass:: clickhouse_driver.dbapi.extras.DictCursor 37 | 38 | .. autoclass:: clickhouse_driver.dbapi.extras.NamedTupleCursor 39 | -------------------------------------------------------------------------------- /docs/development.rst: -------------------------------------------------------------------------------- 1 | .. _development: 2 | 3 | Development 4 | =========== 5 | 6 | Test configuration 7 | ------------------ 8 | 9 | In ``setup.cfg`` you can find ClickHouse server port, credentials, logging 10 | level and another options than can be tuned during local testing. 11 | 12 | Running tests locally 13 | --------------------- 14 | 15 | Install desired Python version with system package manager/pyenv/another manager. 16 | 17 | Install test requirements and build package: 18 | 19 | .. code-block:: bash 20 | 21 | python testsrequire.py && python setup.py develop 22 | 23 | You should install cython if you want to change ``*.pyx`` files: 24 | 25 | .. code-block:: bash 26 | 27 | pip install cython 28 | 29 | ClickHouse on host machine 30 | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 31 | 32 | Install desired versions of ``clickhouse-server`` and ``clickhouse-client`` on 33 | your machine. 34 | 35 | Run tests: 36 | 37 | .. code-block:: bash 38 | 39 | py.test -v 40 | 41 | ClickHouse in docker 42 | ^^^^^^^^^^^^^^^^^^^^ 43 | 44 | Create container desired version of ``clickhouse-server``: 45 | 46 | .. code-block:: bash 47 | 48 | docker run --rm -e "TZ=Europe/Moscow" -p 127.0.0.1:9000:9000 --name test-clickhouse-server yandex/clickhouse-server:$VERSION 49 | 50 | Create container with the same version of ``clickhouse-client``: 51 | 52 | .. code-block:: bash 53 | 54 | docker run --rm --entrypoint "/bin/sh" --name test-clickhouse-client --link test-clickhouse-server:clickhouse-server yandex/clickhouse-client:$VERSION -c 'while :; do sleep 1; done' 55 | 56 | Create ``clickhouse-client`` script on your host machine: 57 | 58 | .. code-block:: bash 59 | 60 | echo -e '#!/bin/bash\n\ndocker exec -e "`env | grep ^TZ=`" test-clickhouse-client clickhouse-client "$@"' | sudo tee /usr/local/bin/clickhouse-client > /dev/null 61 | sudo chmod +x /usr/local/bin/clickhouse-client 62 | 63 | After it container ``test-clickhouse-client`` will communicate with 64 | ``test-clickhouse-server`` transparently from host machine. 65 | 66 | Set ``host=clickhouse-server`` in ``setup.cfg``. 67 | 68 | Add entry in hosts file: 69 | 70 | .. code-block:: bash 71 | 72 | echo '127.0.0.1 clickhouse-server' | sudo tee -a /etc/hosts > /dev/null 73 | 74 | Set ``TZ=UTC`` and run tests: 75 | 76 | .. code-block:: bash 77 | 78 | export TZ=UTC 79 | py.test -v 80 | 81 | GitHub Actions in forked repository 82 | ----------------------------------- 83 | 84 | Workflows in forked repositories can be used for running tests. 85 | 86 | Workflows don't run in forked repositories by default. 87 | You must enable GitHub Actions in the **Actions** tab of the forked repository. 88 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to clickhouse-driver 2 | ============================ 3 | 4 | Release |release|. 5 | 6 | Welcome to clickhouse-driver's documentation. Get started with :ref:`installation` 7 | and then get an overview with the :ref:`quickstart` where common queries are described. 8 | 9 | 10 | .. include:: contents.rst.inc 11 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | .. _installation: 2 | 3 | Installation 4 | ============ 5 | 6 | Python Version 7 | -------------- 8 | 9 | Clickhouse-driver supports Python 3.4 and newer and PyPy. 10 | 11 | Build Dependencies 12 | ------------------ 13 | 14 | Starting from version *0.1.0* for building from source `gcc`, python and linux headers are required. 15 | 16 | Example for `python:alpine` docker image: 17 | 18 | .. code-block:: bash 19 | 20 | apk add gcc musl-dev 21 | 22 | By default there are wheels for Linux, Mac OS X and Windows. 23 | 24 | Packages for Linux and Mac OS X are available for python: 3.6 -- 3.10. 25 | 26 | Packages for Windows are available for python: 3.6 -- 3.10. 27 | 28 | Starting from version *0.2.3* there are wheels for musl-based Linux distributions. 29 | 30 | Dependencies 31 | ------------ 32 | 33 | These distributions will be installed automatically when installing clickhouse-driver. 34 | 35 | * `pytz`_ library for timezone calculations. 36 | * `enum34`_ backported Python 3.4 Enum. 37 | 38 | .. _pytz: http://pytz.sourceforge.net/ 39 | .. _enum34: https://pypi.org/project/enum34/ 40 | 41 | Optional dependencies 42 | ~~~~~~~~~~~~~~~~~~~~~ 43 | 44 | These distributions will not be installed automatically. Clickhouse-driver will detect and 45 | use them if you install them. 46 | 47 | * `clickhouse-cityhash`_ provides CityHash algorithm of specific version, see :ref:`compression-cityhash-notes`. 48 | * `lz4`_ enables `LZ4/LZ4HC compression `_ support. 49 | * `zstd`_ enables `ZSTD compression `_ support. 50 | 51 | .. _clickhouse-cityhash: https://pythonhosted.org/blinker/ 52 | .. _lz4: https://python-lz4.readthedocs.io/ 53 | .. _zstd: https://pypi.org/project/zstd/ 54 | 55 | 56 | .. _installation-pypi: 57 | 58 | Installation from PyPI 59 | ---------------------- 60 | 61 | The package can be installed using ``pip``: 62 | 63 | .. code-block:: bash 64 | 65 | pip install clickhouse-driver 66 | 67 | You can install extras packages if you need compression support. Example of 68 | LZ4 compression requirements installation: 69 | 70 | .. code-block:: bash 71 | 72 | pip install clickhouse-driver[lz4] 73 | 74 | You also can specify multiple extras by using comma. 75 | Install LZ4 and ZSTD requirements: 76 | 77 | .. code-block:: bash 78 | 79 | pip install clickhouse-driver[lz4,zstd] 80 | 81 | 82 | .. _installation-numpy-support: 83 | 84 | NumPy support 85 | ------------- 86 | 87 | You can install additional packages (NumPy and Pandas) if you need NumPy support: 88 | 89 | .. code-block:: bash 90 | 91 | pip install clickhouse-driver[numpy] 92 | 93 | NumPy supported versions are limited by ``numpy`` package python support. 94 | 95 | 96 | Installation from github 97 | ------------------------ 98 | 99 | Development version can be installed directly from github: 100 | 101 | .. code-block:: bash 102 | 103 | pip install git+https://github.com/mymarilyn/clickhouse-driver@master#egg=clickhouse-driver 104 | -------------------------------------------------------------------------------- /docs/license.rst: -------------------------------------------------------------------------------- 1 | License 2 | ======= 3 | 4 | ClickHouse Python Driver is distributed under the `MIT license 5 | `_. 6 | 7 | -------------------------------------------------------------------------------- /docs/misc.rst: -------------------------------------------------------------------------------- 1 | 2 | Miscellaneous 3 | ============= 4 | 5 | Client configuring from URL 6 | --------------------------- 7 | 8 | *New in version 0.1.1.* 9 | 10 | Client can be configured from the given URL: 11 | 12 | .. code-block:: python 13 | 14 | >>> from clickhouse_driver import Client 15 | >>> client = Client.from_url( 16 | ... 'clickhouse://login:password@host:port/database' 17 | ... ) 18 | 19 | Port 9000 is default for schema ``clickhouse``, port 9440 is default for schema ``clickhouses``. 20 | 21 | Connection to default database: 22 | 23 | .. code-block:: python 24 | 25 | >>> client = Client.from_url('clickhouse://localhost') 26 | 27 | 28 | Querystring arguments will be passed along to the :meth:`~clickhouse_driver.connection.Connection` class’s initializer: 29 | 30 | .. code-block:: python 31 | 32 | >>> client = Client.from_url( 33 | ... 'clickhouse://localhost/database?send_logs_level=trace&' 34 | ... 'client_name=myclient&' 35 | ... 'compression=lz4' 36 | ... ) 37 | 38 | If parameter doesn't match Connection's init signature will be treated as settings parameter. 39 | 40 | .. _insert-from-csv-file: 41 | 42 | Inserting data from CSV file 43 | ---------------------------- 44 | 45 | Let's assume you have following data in CSV file. 46 | 47 | .. code-block:: shell 48 | 49 | $ cat /tmp/data.csv 50 | time,order,qty 51 | 2019-08-01 15:23:14,New order1,5 52 | 2019-08-05 09:14:45,New order2,3 53 | 2019-08-13 12:20:32,New order3,7 54 | 55 | Data can be inserted into ClickHouse in the following way: 56 | 57 | 58 | .. code-block:: python 59 | 60 | >>> from csv import DictReader 61 | >>> from datetime import datetime 62 | >>> 63 | >>> from clickhouse_driver import Client 64 | >>> 65 | >>> 66 | >>> def iter_csv(filename): 67 | ... converters = { 68 | ... 'qty': int, 69 | ... 'time': lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S') 70 | ... } 71 | ... 72 | ... with open(filename, 'r') as f: 73 | ... reader = DictReader(f) 74 | ... for line in reader: 75 | ... yield {k: (converters[k](v) if k in converters else v) for k, v in line.items()} 76 | ... 77 | >>> client = Client('localhost') 78 | >>> 79 | >>> client.execute( 80 | ... 'CREATE TABLE IF NOT EXISTS data_csv ' 81 | ... '(' 82 | ... 'time DateTime, ' 83 | ... 'order String, ' 84 | ... 'qty Int32' 85 | ... ') Engine = Memory' 86 | ... ) 87 | >>> [] 88 | >>> client.execute('INSERT INTO data_csv VALUES', iter_csv('/tmp/data.csv')) 89 | 3 90 | 91 | 92 | 93 | Table can be populated with json file in the similar way. 94 | 95 | 96 | Adding missed settings 97 | ---------------------- 98 | 99 | It's hard to keep package settings in consistent state with ClickHouse 100 | server's. Some settings can be missed if your server is old. But, if setting 101 | is *supported by your server* and missed in the package it can be added by 102 | simple monkey pathing. Just look into ClickHouse server source and pick 103 | corresponding setting type from package or write your own type. 104 | 105 | .. code-block:: python 106 | 107 | >>> from clickhouse_driver.settings.available import settings as available_settings, SettingBool 108 | >>> from clickhouse_driver import Client 109 | >>> 110 | >>> available_settings['allow_suspicious_low_cardinality_types'] = SettingBool 111 | >>> 112 | >>> client = Client('localhost', settings={'allow_suspicious_low_cardinality_types': True}) 113 | >>> client.execute('CREATE TABLE test (x LowCardinality(Int32)) Engine = Null') 114 | [] 115 | 116 | 117 | *New in version 0.1.5.* 118 | 119 | Modern ClickHouse servers (20.*+) use text serialization for settings instead of 120 | binary serialization. You don't have to add missed settings manually into 121 | available. Just specify new settings and it will work. 122 | 123 | .. code-block:: python 124 | 125 | >>> client = Client('localhost', settings={'brand_new_setting': 42}) 126 | >>> client.execute('SELECT 1') 127 | -------------------------------------------------------------------------------- /docs/unsupportedserverversions.rst: -------------------------------------------------------------------------------- 1 | 2 | Unsupported server versions 3 | =========================== 4 | 5 | Following versions are not supported by this package: 6 | 7 | - 20.1.*. Due to keeping alias type name to metadata. 8 | 9 | However you can use these versions for your own risk. 10 | -------------------------------------------------------------------------------- /example/bytewax/.dockerignore: -------------------------------------------------------------------------------- 1 | # Include any files or directories that you don't want to be copied to your 2 | # container here (e.g., local build artifacts, temporary files, etc.). 3 | # 4 | # For more help, visit the .dockerignore file reference guide at 5 | # https://docs.docker.com/engine/reference/builder/#dockerignore-file 6 | 7 | **/.DS_Store 8 | **/__pycache__ 9 | **/.venv 10 | **/.classpath 11 | **/.dockerignore 12 | **/.env 13 | **/.git 14 | **/.gitignore 15 | **/.project 16 | **/.settings 17 | **/.toolstarget 18 | **/.vs 19 | **/.vscode 20 | **/*.*proj.user 21 | **/*.dbmdl 22 | **/*.jfm 23 | **/bin 24 | **/charts 25 | **/docker-compose* 26 | **/compose* 27 | **/Dockerfile* 28 | **/node_modules 29 | **/npm-debug.log 30 | **/obj 31 | **/secrets.dev.yaml 32 | **/values.dev.yaml 33 | LICENSE 34 | README.md 35 | -------------------------------------------------------------------------------- /example/bytewax/Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1 2 | 3 | FROM python:3.10 as base 4 | 5 | # Prevents Python from writing pyc files. 6 | ENV PYTHONDONTWRITEBYTECODE=1 7 | 8 | # Keeps Python from buffering stdout and stderr to avoid situations where 9 | # the application crashes without emitting any logs due to buffering. 10 | ENV PYTHONUNBUFFERED=1 11 | 12 | #WORKDIR /app 13 | 14 | # Create a non-privileged user that the app will run under. 15 | # See https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#user 16 | ARG UID=10001 17 | RUN adduser \ 18 | --disabled-password \ 19 | --gecos "" \ 20 | --home "/nonexistent" \ 21 | --shell "/sbin/nologin" \ 22 | --no-create-home \ 23 | --uid "${UID}" \ 24 | appuser 25 | 26 | # Download dependencies as a separate step to take advantage of Docker's caching. 27 | # Leverage a cache mount to /root/.cache/pip to speed up subsequent builds. 28 | # Leverage a bind mount to requirements.txt to avoid having to copy them into 29 | # into this layer. 30 | RUN --mount=type=cache,target=/root/.cache/pip \ 31 | --mount=type=bind,source=requirements.txt,target=requirements.txt \ 32 | python -m pip install -r requirements.txt 33 | 34 | # Switch to the non-privileged user to run the application. 35 | USER appuser 36 | 37 | # Copy the source code into the container. 38 | COPY . . 39 | 40 | # Run bytewax with 5 workers to chew through the network requests on startup. 41 | CMD python -m bytewax.run hackernews.py -w 5 42 | -------------------------------------------------------------------------------- /example/bytewax/README.md: -------------------------------------------------------------------------------- 1 | # Example to Integrate Bytewax and Proton together 2 | [proton.py](https://github.com/timeplus-io/proton-python-driver/blob/develop/example/bytewax/proton.py) is a Bytewax sink for [Timeplus Proton](https://github.com/timeplus-io/proton) streaming SQL engine. 3 | 4 | Inspired by https://bytewax.io/blog/polling-hacker-news, you can call Hacker News HTTP API with Bytewax and send latest news to Proton for SQL-based analysis, such as 5 | 6 | ```sql 7 | select * from story 8 | ``` 9 | 10 | ## Run with Docker Compose (Highly Recommended) 11 | 12 | Simply run `docker compose up` in this folder and it will start 13 | 14 | 1. A Proton instance with pre-configured streams, materialized views and views. 15 | 2. A container that leverages Bytewax to call Hacker News API and send data to Proton. 16 | 3. A pre-configured Grafana instance to visualize the live data. 17 | 18 | ## Run without Docker 19 | 20 | ```shell 21 | python3.10 -m venv py310-env 22 | source py310-env/bin/activate 23 | #git clone and cd to this proton-python-driver/example/bytewax folder 24 | pip install -r requirements.txt 25 | 26 | python -m bytewax.run hackernews.py -w 5 27 | ``` 28 | 29 | It will start bytewax with 5 workers and load new items every 15 seconds and send the data to Proton. 30 | 31 | ## How it works 32 | 33 | When the Proton server is started, we create 2 streams to receive the raw JSON data pushed from Bytewax. 34 | 35 | ```sql 36 | CREATE STREAM hn_stories_raw(raw string); 37 | CREATE STREAM hn_comments_raw(raw string); 38 | ``` 39 | 40 | Then we create 2 materialized view to extract the key information from the JSON and put into more meaningful columns: 41 | 42 | ```sql 43 | CREATE MATERIALIZED VIEW hn_stories AS 44 | SELECT to_time(raw:time) AS _tp_time,raw:id::int AS id,raw:title AS title,raw:by AS by, raw FROM hn_stories_raw; 45 | CREATE MATERIALIZED VIEW hn_comments AS 46 | SELECT to_time(raw:time) AS _tp_time,raw:id::int AS id,raw:root_id::int AS root_id,raw:by AS by, raw FROM hn_comments_raw; 47 | ``` 48 | 49 | Finally we create 2 views to load both incoming data and existin data: 50 | 51 | ```sql 52 | CREATE VIEW IF NOT EXISTS story AS SELECT * FROM hn_stories WHERE _tp_time>earliest_ts(); 53 | CREATE VIEW IF NOT EXISTS comment AS SELECT * FROM hn_comments WHERE _tp_time>earliest_ts() 54 | ``` 55 | 56 | With all those streams and views, you can query the data in whatever ways, e.g. 57 | 58 | ```sql 59 | select * from comment; 60 | 61 | select 62 | story._tp_time as story_time,comment._tp_time as comment_time, 63 | story.id as story_id, comment.id as comment_id, 64 | substring(story.title,1,20) as title,substring(comment.raw:text,1,20) as comment 65 | from story join comment on story.id=comment.root_id; 66 | ``` 67 | 68 | The key code in hackernews.py: 69 | 70 | ```python 71 | op.output("stories-out", story_stream, ProtonSink("hn_stories", os.environ.get("PROTON_HOST","127.0.0.1"))) 72 | ``` 73 | 74 | `hn_stories` is the stream name. The `ProtonSink` will create the stream if it doesn't exist. 75 | 76 | ```python 77 | class _ProtonSinkPartition(StatelessSinkPartition): 78 | def __init__(self, stream: str, host: str): 79 | self.client=client.Client(host=host, port=8463) 80 | self.stream=stream 81 | sql=f"CREATE STREAM IF NOT EXISTS `{stream}` (raw string)" 82 | logger.debug(sql) 83 | self.client.execute(sql) 84 | ``` 85 | 86 | and batch insert data 87 | 88 | ```python 89 | def write_batch(self, items): 90 | rows=[] 91 | for item in items: 92 | rows.append([item]) # single column in each row 93 | sql = f"INSERT INTO `{self.stream}` (raw) VALUES" 94 | # logger.debug(f"inserting data {sql}") 95 | self.client.execute(sql,rows) 96 | ``` 97 | 98 | ```python 99 | class ProtonSink(DynamicSink): 100 | def __init__(self, stream: str, host: str): 101 | self.stream = stream 102 | self.host = host if host is not None and host != "" else "127.0.0.1" 103 | 104 | 105 | def build(self, worker_index, worker_count): 106 | """See ABC docstring.""" 107 | return _ProtonSinkPartition(self.stream, self.host) 108 | ``` 109 | 110 | ### Querying and visualizing with Grafana 111 | 112 | Please try the docker-compose file. The Grafana instance is setup to install [Proton Grafana Data Source Plugin](https://github.com/timeplus-io/proton-grafana-source). Create such a data source and preconfigure a dashboard. Open Grafana UI at http://localhost:3000 in your browser and choose the `Hackernews Live Dashboard`. 113 | -------------------------------------------------------------------------------- /example/bytewax/compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | proton: 4 | image: ghcr.io/timeplus-io/proton:latest 5 | pull_policy: always 6 | ports: 7 | - 8463:8463 8 | - 3218:3218 9 | volumes: 10 | - ./init_sql:/docker-entrypoint-initdb.d 11 | healthcheck: 12 | test: wget --no-verbose --tries=1 --spider http://localhost:8123/?query=show+create+comment || exit 1 13 | interval: 5s 14 | timeout: 10s 15 | retries: 3 16 | start_period: 10s 17 | 18 | hn_stream: 19 | build: 20 | context: . 21 | image: timeplus/hackernews_bytewax:latest 22 | environment: 23 | - PROTON_HOST=proton 24 | depends_on: 25 | proton: 26 | condition: service_healthy 27 | 28 | grafana: 29 | image: grafana/grafana:latest 30 | pull_policy: always 31 | ports: 32 | - 3000:3000 33 | environment: 34 | GF_AUTH_ANONYMOUS_ENABLED: 1 35 | GF_AUTH_ANONYMOUS_ORG_ROLE: Admin 36 | GF_INSTALL_PLUGINS: timeplus-proton-datasource 37 | GF_LOG_LEVEL: error 38 | volumes: 39 | - ./grafana_provisioning:/etc/grafana/provisioning 40 | depends_on: 41 | - hn_stream 42 | -------------------------------------------------------------------------------- /example/bytewax/grafana_provisioning/dashboards/main.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | # an unique provider name. Required 5 | - name: 'a unique provider name' 6 | # Org id. Default to 1 7 | orgId: 1 8 | # name of the dashboard folder. 9 | folder: '' 10 | # folder UID. will be automatically generated if not specified 11 | folderUid: '' 12 | # provider type. Default to 'file' 13 | type: file 14 | # disable dashboard deletion 15 | disableDeletion: false 16 | # how often Grafana will scan for changed dashboards 17 | updateIntervalSeconds: 10 18 | # allow updating provisioned dashboards from the UI 19 | allowUiUpdates: true 20 | options: 21 | # path to dashboard files on disk. Required when using the 'file' type 22 | path: /etc/grafana/provisioning/dashboards 23 | # use folder names from filesystem to create folders in Grafana 24 | foldersFromFilesStructure: true -------------------------------------------------------------------------------- /example/bytewax/grafana_provisioning/datasources/automatic.yml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | datasources: 4 | - name: Proton 5 | type: timeplus-proton-datasource 6 | uid: c24e0faf-1490-4321-a373-7b2b07ca2e38 7 | typeName: Proton 8 | access: proxy 9 | url: 10 | password: 11 | user: 12 | database: 13 | isDefault: true 14 | basicAuth: false 15 | basicAuthUser: 16 | basicAuthPassword: 17 | withCredentials: 18 | jsonData: 19 | host: proton 20 | readOnly: false 21 | secureJsonData: 22 | tlsCACert: "" 23 | tlsClientCert: "" 24 | tlsClientKey: "" 25 | version: 1 26 | editable: true -------------------------------------------------------------------------------- /example/bytewax/hackernews.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import timedelta 3 | from typing import Optional, Tuple 4 | import os 5 | import json 6 | 7 | import requests 8 | from bytewax import operators as op 9 | from bytewax.dataflow import Dataflow 10 | from bytewax.inputs import SimplePollingSource 11 | 12 | from proton import ProtonSink 13 | 14 | logging.basicConfig(level=logging.INFO) 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class HNSource(SimplePollingSource): 19 | def next_item(self): 20 | return ( 21 | "GLOBAL_ID", 22 | requests.get( 23 | "https://hacker-news.firebaseio.com/v0/maxitem.json" 24 | ).json(), 25 | ) 26 | 27 | 28 | def get_id_stream(old_max_id, new_max_id) -> Tuple[str, list]: 29 | if old_max_id is None: 30 | # Get the last 150 items on the first run. 31 | old_max_id = new_max_id - 150 32 | return (new_max_id, range(old_max_id, new_max_id)) 33 | 34 | 35 | def download_metadata(hn_id) -> Optional[Tuple[str, dict]]: 36 | # Given an hacker news id returned from the api, fetch metadata 37 | # Try 3 times, waiting more and more, or give up 38 | data = requests.get( 39 | f"https://hacker-news.firebaseio.com/v0/item/{hn_id}.json" # noqa 40 | ).json() 41 | 42 | if data is None: 43 | logger.warning(f"Couldn't fetch item {hn_id}, skipping") 44 | return None 45 | return (str(hn_id), data) 46 | 47 | 48 | def recurse_tree(metadata, og_metadata=None) -> any: 49 | if not og_metadata: 50 | og_metadata = metadata 51 | try: 52 | parent_id = metadata["parent"] 53 | parent_metadata = download_metadata(parent_id) 54 | return recurse_tree(parent_metadata[1], og_metadata) 55 | except KeyError: 56 | return (metadata["id"], {**og_metadata, "root_id": metadata["id"]}) 57 | 58 | 59 | def key_on_parent(key__metadata) -> tuple: 60 | key, metadata = recurse_tree(key__metadata[1]) 61 | return (str(key), metadata) 62 | 63 | 64 | def format(id__metadata): 65 | id, metadata = id__metadata 66 | return json.dumps(metadata) 67 | 68 | 69 | flow = Dataflow("hn_scraper") 70 | max_id = op.input("in", flow, HNSource(timedelta(seconds=15))) 71 | id_stream = \ 72 | op.stateful_map("range", max_id, lambda: None, get_id_stream) \ 73 | .then(op.flat_map, "strip_key_flatten", lambda key_ids: key_ids[1]) \ 74 | .then(op.redistribute, "redist") 75 | 76 | id_stream = op.filter_map("meta_download", id_stream, download_metadata) 77 | split_stream = op.branch( 78 | "split_comments", id_stream, lambda item: item[1]["type"] == "story" 79 | ) 80 | story_stream = split_stream.trues 81 | story_stream = op.map("format_stories", story_stream, format) 82 | comment_stream = split_stream.falses 83 | comment_stream = op.map("key_on_parent", comment_stream, key_on_parent) 84 | comment_stream = op.map("format_comments", comment_stream, format) 85 | op.inspect("stories", story_stream) 86 | op.inspect("comments", comment_stream) 87 | op.output( 88 | "stories-out", 89 | story_stream, 90 | ProtonSink("hn_stories_raw", os.environ.get("PROTON_HOST", "127.0.0.1")), 91 | ) 92 | op.output( 93 | "comments-out", 94 | comment_stream, 95 | ProtonSink("hn_comments_raw", os.environ.get("PROTON_HOST", "127.0.0.1")), 96 | ) 97 | -------------------------------------------------------------------------------- /example/bytewax/init_sql/init.sql: -------------------------------------------------------------------------------- 1 | CREATE STREAM IF NOT EXISTS hn_stories_raw(raw string); 2 | 3 | CREATE STREAM IF NOT EXISTS hn_comments_raw(raw string); 4 | 5 | CREATE MATERIALIZED VIEW IF NOT EXISTS hn_stories AS 6 | SELECT to_time(raw:time) AS _tp_time,raw:id::int AS id,raw:title AS title,raw:by AS by, raw FROM hn_stories_raw; 7 | 8 | CREATE MATERIALIZED VIEW IF NOT EXISTS hn_comments AS 9 | SELECT to_time(raw:time) AS _tp_time,raw:id::int AS id,raw:root_id::int AS root_id,raw:by AS by, raw FROM hn_comments_raw; 10 | 11 | CREATE VIEW IF NOT EXISTS story AS SELECT * FROM hn_stories WHERE _tp_time>earliest_ts(); 12 | 13 | CREATE VIEW IF NOT EXISTS comment AS SELECT * FROM hn_comments WHERE _tp_time>earliest_ts(); -------------------------------------------------------------------------------- /example/bytewax/proton.py: -------------------------------------------------------------------------------- 1 | """Output to Timeplus Proton.""" 2 | from bytewax.outputs import DynamicSink, StatelessSinkPartition 3 | from proton_driver import client 4 | import logging 5 | 6 | __all__ = [ 7 | "ProtonSink", 8 | ] 9 | logger = logging.getLogger(__name__) 10 | logger.setLevel(logging.INFO) 11 | 12 | 13 | class _ProtonSinkPartition(StatelessSinkPartition): 14 | def __init__(self, stream: str, host: str): 15 | self.client = client.Client(host=host, port=8463) 16 | self.stream = stream 17 | sql = f"CREATE STREAM IF NOT EXISTS `{stream}` (raw string)" # noqa 18 | logger.debug(sql) 19 | self.client.execute(sql) 20 | 21 | def write_batch(self, items): 22 | logger.debug(f"inserting data {items}") 23 | rows = [] 24 | for item in items: 25 | rows.append([item]) # single column in each row 26 | sql = f"INSERT INTO `{self.stream}` (raw) VALUES" 27 | logger.debug(f"inserting data {sql}") 28 | self.client.execute(sql, rows) 29 | 30 | 31 | class ProtonSink(DynamicSink): 32 | def __init__(self, stream: str, host: str): 33 | self.stream = stream 34 | self.host = host if host is not None and host != "" else "127.0.0.1" 35 | 36 | """ 37 | Write each output item to Proton on that worker. 38 | 39 | Items consumed from the dataflow must look like a string. Use a 40 | proceeding map step to do custom formatting. 41 | 42 | Workers are the unit of parallelism. 43 | 44 | Can support at-least-once processing. Messages from the resume 45 | epoch will be duplicated right after resume. 46 | """ 47 | def build(self, worker_index, worker_count): 48 | """See ABC docstring.""" 49 | return _ProtonSinkPartition(self.stream, self.host) 50 | -------------------------------------------------------------------------------- /example/bytewax/requirements.txt: -------------------------------------------------------------------------------- 1 | bytewax==0.18 2 | requests 3 | proton-driver -------------------------------------------------------------------------------- /example/descriptive_pipeline/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9.10 2 | 3 | ARG VERSION 4 | 5 | WORKDIR /timeplus 6 | ADD ./requirements.txt /timeplus 7 | RUN pip3 install -r requirements.txt 8 | ADD ./app /timeplus/app/ 9 | ADD ./server /timeplus/server/ 10 | 11 | EXPOSE 5001 12 | 13 | ENTRYPOINT ["uvicorn", "server.main:app", "--host", "0.0.0.0", "--port", "5001", "--http", "h11"] -------------------------------------------------------------------------------- /example/descriptive_pipeline/Makefile: -------------------------------------------------------------------------------- 1 | VERSION = $(shell git rev-parse --short HEAD) 2 | BIN_NAME = proton-pipeline-service 3 | IMAGE_NAME = $(BIN_NAME):$(VERSION) 4 | DOCKER_ID_USER = timeplus 5 | FULLNAME=$(DOCKER_ID_USER)/${IMAGE_NAME} 6 | 7 | .PHONY: service 8 | 9 | service: 10 | uvicorn server.main:app --port 5001 --host 0.0.0.0 --reload 11 | 12 | proton: 13 | docker run -d -p 8463:8463 --pull always --name proton ghcr.io/timeplus-io/proton:latest 14 | 15 | docker: Dockerfile 16 | docker build -t $(IMAGE_NAME) . 17 | 18 | docker_run: 19 | docker run -p 5001:5001 $(IMAGE_NAME) 20 | 21 | push: 22 | docker tag $(IMAGE_NAME) $(FULLNAME) 23 | docker push $(FULLNAME) -------------------------------------------------------------------------------- /example/descriptive_pipeline/config.yaml: -------------------------------------------------------------------------------- 1 | # host: localhost 2 | # port: 8463 3 | # db: default 4 | # user: default 5 | # password: "" 6 | 7 | pipelines: 8 | - name: pipeline1 9 | sqls: 10 | - | 11 | DROP STREAM IF EXISTS devices 12 | - | 13 | CREATE RANDOM STREAM IF NOT EXISTS devices( 14 | device string default 'device'||to_string(rand()%4), 15 | temperature float default rand()%1000/10 16 | ) SETTINGS eps=1000000 17 | - | 18 | SELECT * FROM devices 19 | - name: pipeline2 20 | sqls: 21 | - | 22 | DROP STREAM IF EXISTS devices 23 | - | 24 | CREATE RANDOM STREAM IF NOT EXISTS devices( 25 | device string default 'device'||to_string(rand()%4), 26 | temperature float default rand()%1000/10 27 | ) SETTINGS eps=100000 28 | - | 29 | SELECT 30 | window_start, 31 | count(*) as count, min(temperature) as min, max(temperature) as max, p99(temperature) as p99 32 | FROM 33 | tumble(devices, 1s) 34 | GROUP BY 35 | window_start 36 | - name: pipeline3 37 | sqls: 38 | - | 39 | SELECT 1 -------------------------------------------------------------------------------- /example/descriptive_pipeline/requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi==0.75.0 2 | loguru==0.6.0 3 | uvicorn[standard]==0.17.6 4 | retry==0.9.2 5 | proton-driver==0.2.10 -------------------------------------------------------------------------------- /example/descriptive_pipeline/server/utils/logging.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from loguru import logger 5 | 6 | log_level = os.environ.get("APP_LOG_LEVEL", "INFO") 7 | 8 | logger.remove() 9 | 10 | logger.add( 11 | sys.stdout, 12 | colorize=True, 13 | format="{time} - {level} - {message}", 14 | level=log_level, 15 | ) 16 | logger.add("app.log", rotation="500 MB", level=log_level) 17 | 18 | 19 | def getLogger(): 20 | return logger 21 | -------------------------------------------------------------------------------- /example/descriptive_pipeline/test/script.js: -------------------------------------------------------------------------------- 1 | import http from 'k6/http'; 2 | import { sleep } from 'k6'; 3 | 4 | export const options = { 5 | vus: 1, 6 | duration: '30s', 7 | }; 8 | 9 | export default function() { 10 | http.get('http://localhost:5001/queries/pipeline1',{ timeout: '30s' }); 11 | sleep(1); 12 | } 13 | -------------------------------------------------------------------------------- /example/descriptive_pipeline/test/script_ws.js: -------------------------------------------------------------------------------- 1 | import ws from 'k6/ws'; 2 | import { check } from 'k6'; 3 | 4 | export const options = { 5 | vus: 1, 6 | duration: '30s', 7 | }; 8 | 9 | export default function () { 10 | const url = 'ws://localhost:5001/queries/pipeline1'; 11 | 12 | const res = ws.connect(url, {}, function (socket) { 13 | socket.on('open', () => console.log('connected')); 14 | socket.on('close', () => console.log('disconnected')); 15 | 16 | socket.setTimeout(function () { 17 | console.log('2 seconds passed, closing the socket'); 18 | socket.close(); 19 | }, 1000 * 30); 20 | }); 21 | 22 | check(res, { 'status is 101': (r) => r && r.status === 101 }); 23 | } -------------------------------------------------------------------------------- /example/idempotent/idempotent.py: -------------------------------------------------------------------------------- 1 | from proton_driver import connect, Client 2 | from datetime import date 3 | from time import sleep 4 | 5 | 6 | # Create a test stream 7 | def create_test_stream(operator, table_name, table_columns): 8 | operator.execute(f'DROP STREAM IF EXISTS {table_name};') 9 | operator.execute(f'CREATE STREAM {table_name} ({table_columns})') 10 | 11 | 12 | # Use dbapi to implement idempotent insertion 13 | def use_dbapi(): 14 | with connect('proton://localhost') as conn: 15 | with conn.cursor() as cur: 16 | create_test_stream( 17 | cur, 18 | 'test_user', 19 | 'id int32, name string, birthday date' 20 | ) 21 | # Set idempotent_id. 22 | cur.set_settings(dict(idempotent_id='batch1')) 23 | # Insert data into test_user multiple times with the same idempotent_id. # noqa 24 | # The query result should contain only the first inserted data. 25 | data = [ 26 | (123456, 'timeplus', date(2024, 10, 24)), 27 | (789012, 'stream ', date(2023, 10, 24)), 28 | (135790, 'proton ', date(2024, 10, 24)), 29 | (246801, 'database', date(2024, 10, 24)), 30 | ] 31 | # Execute multiple insert operations. 32 | for _ in range(10): 33 | cur.execute( 34 | 'INSERT INTO test_user (id, name, birthday) VALUES', 35 | data 36 | ) 37 | cur.fetchall() 38 | # wait for 3 sec to make sure data available in historical store. 39 | sleep(3) 40 | cur.execute('SELECT count() FROM table(test_user)') 41 | res = cur.fetchall() 42 | # Data is inserted only once,so res == (4,). 43 | print(res) 44 | 45 | 46 | # Use Client to implement idempotent insertion 47 | def use_client(): 48 | cli = Client('localhost', 8463) 49 | create_test_stream(cli, 'test_stream', '`i` int, `v` string') 50 | setting = { 51 | 'idempotent_id': 'batch1' 52 | } 53 | data = [ 54 | (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), 55 | (5, 'e'), (6, 'f'), (7, 'g'), (8, 'h') 56 | ] 57 | # Execute multiple insert operations. 58 | for _ in range(10): 59 | cli.execute( 60 | 'INSERT INTO test_stream (i, v) VALUES', 61 | data, 62 | settings=setting 63 | ) 64 | # wait for 3 sec to make sure data available in historical store. 65 | sleep(3) 66 | res = cli.execute('SELECT count() FROM table(test_stream)') 67 | # Data is inserted only once,so res == (8,). 68 | print(res) 69 | 70 | 71 | if __name__ == "__main__": 72 | use_dbapi() # (4,) 73 | use_client() # (8,) 74 | -------------------------------------------------------------------------------- /example/pandas/dataframe.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import time 3 | 4 | from proton_driver import client, connect 5 | 6 | if __name__ == "__main__": 7 | c = client.Client(host='127.0.0.1', port=8463) 8 | 9 | # setup the test stream 10 | c.execute("drop stream if exists test") 11 | c.execute( 12 | """create stream test ( 13 | year int16, 14 | first_name string 15 | )""" 16 | ) 17 | # add some data 18 | df = pd.DataFrame.from_records( 19 | [ 20 | {'year': 1994, 'first_name': 'Vova'}, 21 | {'year': 1995, 'first_name': 'Anja'}, 22 | {'year': 1996, 'first_name': 'Vasja'}, 23 | {'year': 1997, 'first_name': 'Petja'}, 24 | ] 25 | ) 26 | c.insert_dataframe( 27 | 'INSERT INTO "test" (year, first_name) VALUES', 28 | df, 29 | settings=dict(use_numpy=True), 30 | ) 31 | # or c.execute( 32 | # "INSERT INTO test(year, first_name) VALUES", df.to_dict('records') 33 | # ) 34 | # wait for 3 sec to make sure data available in historical store 35 | time.sleep(3) 36 | 37 | df = c.query_dataframe('SELECT * FROM table(test)') 38 | print(df) 39 | print(df.describe()) 40 | 41 | # Also you can use proton settings in DataFrame API like using `execute` function. # noqa 42 | # Here's an example with idempotent id. 43 | 44 | # Reset stream 45 | c.execute('drop stream if exists test') 46 | c.execute( 47 | """create stream test ( 48 | year int16, 49 | first_name string 50 | )""" 51 | ) 52 | settings = dict(use_numpy=True, idempotent_id='batch') 53 | 54 | # Execute multiple insert operations. 55 | for _ in range(5): 56 | c.insert_dataframe( 57 | 'INSERT INTO "test" (year, first_name) VALUES', 58 | df, 59 | settings=settings, 60 | ) 61 | time.sleep(3) 62 | 63 | rv = c.execute('SELECT COUNT(*) FROM table(test)') 64 | # Only the first times insert into the historical storage. 65 | print(rv) # (4,) 66 | 67 | # Converting query results to a variety of formats with dbapi 68 | with connect('proton://localhost') as conn: 69 | with conn.cursor() as cur: 70 | cur.execute('SELECT * FROM table(test)') 71 | print('--------------Pandas DataFrame--------------') 72 | print(cur.df()) 73 | 74 | cur.execute('SELECT * FROM table(test)') 75 | print('----------------Numpy Arrays----------------') 76 | print(cur.fetchnumpy()) 77 | 78 | cur.execute('SELECT * FROM table(test)') 79 | print('--------------Polars DataFrame--------------') 80 | print(cur.pl()) 81 | 82 | cur.execute('SELECT * FROM table(test)') 83 | print('-----------------Arrow Table----------------') 84 | print(cur.arrow()) 85 | -------------------------------------------------------------------------------- /example/streaming_query/car.py: -------------------------------------------------------------------------------- 1 | """ 2 | This example uses driver DB API. 3 | In this example, a thread writes a huge list of data of car speed into 4 | database, and another thread reads from the database to figure out which 5 | car is speeding. 6 | """ 7 | 8 | import datetime 9 | import random 10 | import threading 11 | import time 12 | 13 | from proton_driver import connect 14 | 15 | account = 'default:' 16 | 17 | 18 | def create_stream(): 19 | with connect(f"proton://{account}@localhost:8463/default") as conn: # noqa 20 | with conn.cursor() as cursor: 21 | cursor.execute("drop stream if exists cars") 22 | cursor.execute( 23 | "create stream if not exists car(id int64, speed float64)" 24 | ) 25 | 26 | 27 | def write_data(car_num: int): 28 | car_begin_date = datetime.datetime(2022, 1, 1, 1, 0, 0) 29 | for day in range(100): 30 | car_begin_date += datetime.timedelta(days=1) 31 | data = [ 32 | ( 33 | random.randint(0, car_num - 1), 34 | random.random() * 20 + 50, 35 | car_begin_date + datetime.timedelta(milliseconds=i * 100), 36 | ) 37 | for i in range(300000) 38 | ] 39 | with connect(f"proton://{account}@localhost:8463/default") as conn: 40 | with conn.cursor() as cursor: 41 | cursor.executemany( 42 | "insert into car (id, speed, _tp_time) values", data 43 | ) 44 | print(f"row count: {cursor.rowcount}") 45 | time.sleep(10) 46 | 47 | 48 | def query(conn): 49 | query_sql = """select id, avg(speed), window_start, window_end 50 | from session(car, 1h, [speed >= 60, speed < 60)) 51 | group by id, window_start, window_end""" 52 | cursor = conn.cursor() 53 | cursor.set_stream_results(stream_results=True, max_row_buffer=100) 54 | cursor.execute(query_sql) 55 | return cursor 56 | 57 | 58 | def fetch_result(cursor): 59 | while True: 60 | print(cursor.fetchone()) 61 | 62 | 63 | def main(): 64 | create_stream() 65 | conn = connect(f"proton://{account}@localhost:8463/default") 66 | cursor = query(conn) 67 | threading.Thread(target=write_data, args=[7]).start() 68 | fetch_result(cursor) 69 | cursor.close() 70 | conn.close() 71 | 72 | 73 | if __name__ == "__main__": 74 | main() 75 | -------------------------------------------------------------------------------- /example/streaming_query/server monitor.py: -------------------------------------------------------------------------------- 1 | """ 2 | This example uses basic classes of the driver: Client 3 | In this example, a few servers upload their logs of statue (include cpu, 4 | memory and disk usage, generate randomly) detected every 100ms to the database 5 | every 10 logs generated. The main thread will warn if any usage exceeds 95%. 6 | """ 7 | 8 | import random 9 | import threading 10 | import time 11 | from datetime import datetime 12 | 13 | from proton_driver import client 14 | 15 | 16 | class Server(threading.Thread): 17 | def __init__(self, name: str, buffer_max_size: int = 10): 18 | threading.Thread.__init__(self) 19 | self.name = name 20 | self.buffer = [] 21 | self.buffer_max_size = buffer_max_size 22 | self.client = None 23 | self.killed = False 24 | 25 | def __get_state(self) -> dict: 26 | return { 27 | "cpu": random.randint(0, 100), 28 | "memory": random.randint(0, 100), 29 | "disk": random.randint(0, 100), 30 | "server_name": self.name, 31 | "timestamp": datetime.now(), 32 | } 33 | 34 | def __send_data(self): 35 | self.client.execute( 36 | "insert into server_monitor (" 37 | "cpu, memory, disk, server_name, timestamp" 38 | ") values", 39 | self.buffer, 40 | ) 41 | 42 | def run(self) -> None: 43 | self.client = client.Client(host='127.0.0.1', port=8463) 44 | while not self.killed: 45 | self.buffer.append(self.__get_state()) 46 | if len(self.buffer) >= self.buffer_max_size: 47 | self.__send_data() 48 | self.buffer = [] 49 | time.sleep(0.1) 50 | self.client.disconnect() 51 | self.client = None 52 | 53 | 54 | def initial_stream(): 55 | c = client.Client(host='127.0.0.1', port=8463) 56 | c.execute("drop stream if exists server_monitor") 57 | c.execute( 58 | """create stream server_monitor ( 59 | cpu float, 60 | memory float, 61 | disk float, 62 | server_name string, 63 | timestamp datetime64(3) default now64(3) 64 | )""" 65 | ) 66 | 67 | 68 | def show(): 69 | c = client.Client(host='127.0.0.1', port=8463) 70 | limit = 95 71 | rows = c.execute_iter( 72 | "select cpu, memory, disk, server_name, timestamp from server_monitor " 73 | "where cpu > %(limit)f or memory > %(limit)f or disk > %(limit)f", 74 | {"limit": limit}, 75 | ) 76 | for row in rows: 77 | msg = ( 78 | f"{row[4].strftime('%d-%m-%Y %H:%M:%S')} WARNING server[{row[3]}]:" # noqa 79 | ) 80 | col_names = ["cpu", "memory", "disk"] 81 | for col_name, usage in zip(col_names, row[:3]): 82 | if usage > limit: 83 | msg += " %s[%.2f%%]" % (col_name, usage) 84 | print(msg) 85 | 86 | 87 | if __name__ == "__main__": 88 | initial_stream() 89 | servers = [Server(f"server_{i}") for i in range(7)] 90 | for server in servers: 91 | server.start() 92 | show() 93 | -------------------------------------------------------------------------------- /proton_driver/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .client import Client 3 | from .dbapi import connect 4 | 5 | 6 | VERSION = (0, 2, 13) 7 | __version__ = '.'.join(str(x) for x in VERSION) 8 | 9 | __all__ = ['Client', 'connect'] 10 | -------------------------------------------------------------------------------- /proton_driver/blockstreamprofileinfo.py: -------------------------------------------------------------------------------- 1 | from .reader import read_binary_uint8 2 | from .varint import read_varint 3 | 4 | 5 | class BlockStreamProfileInfo(object): 6 | def __init__(self): 7 | self.rows = 0 8 | self.blocks = 0 9 | self.bytes = 0 10 | self.applied_limit = False # bool 11 | self.rows_before_limit = 0 12 | self.calculated_rows_before_limit = 0 # bool 13 | 14 | super(BlockStreamProfileInfo, self).__init__() 15 | 16 | def read(self, fin): 17 | self.rows = read_varint(fin) 18 | self.blocks = read_varint(fin) 19 | self.bytes = read_varint(fin) 20 | self.applied_limit = bool(read_binary_uint8(fin)) 21 | self.rows_before_limit = read_varint(fin) 22 | self.calculated_rows_before_limit = bool(read_binary_uint8(fin)) 23 | -------------------------------------------------------------------------------- /proton_driver/columns/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/proton_driver/columns/__init__.py -------------------------------------------------------------------------------- /proton_driver/columns/boolcolumn.py: -------------------------------------------------------------------------------- 1 | from .base import FormatColumn 2 | 3 | 4 | class BoolColumn(FormatColumn): 5 | ch_type = 'bool' 6 | py_types = (bool, ) 7 | format = '?' 8 | -------------------------------------------------------------------------------- /proton_driver/columns/datecolumn.py: -------------------------------------------------------------------------------- 1 | from datetime import date, timedelta 2 | 3 | from .base import FormatColumn 4 | 5 | 6 | epoch_start = date(1970, 1, 1) 7 | epoch_end = date(2149, 6, 6) 8 | 9 | epoch_start_date32 = date(1925, 1, 1) 10 | epoch_end_date32 = date(2283, 11, 11) 11 | 12 | 13 | class DateColumn(FormatColumn): 14 | ch_type = 'date' 15 | py_types = (date, ) 16 | format = 'H' 17 | 18 | min_value = epoch_start 19 | max_value = epoch_end 20 | 21 | date_lut_days = (epoch_end - epoch_start).days + 1 22 | date_lut = {x: epoch_start + timedelta(x) for x in range(date_lut_days)} 23 | date_lut_reverse = {value: key for key, value in date_lut.items()} 24 | 25 | def before_write_items(self, items, nulls_map=None): 26 | null_value = self.null_value 27 | 28 | date_lut_reverse = self.date_lut_reverse 29 | min_value = self.min_value 30 | max_value = self.max_value 31 | 32 | for i, item in enumerate(items): 33 | if nulls_map and nulls_map[i]: 34 | items[i] = null_value 35 | continue 36 | 37 | if type(item) is not date: 38 | item = date(item.year, item.month, item.day) 39 | 40 | if min_value <= item <= max_value: 41 | items[i] = date_lut_reverse[item] 42 | else: 43 | items[i] = 0 44 | 45 | def after_read_items(self, items, nulls_map=None): 46 | date_lut = self.date_lut 47 | 48 | if nulls_map is None: 49 | return tuple(date_lut[item] for item in items) 50 | else: 51 | return tuple( 52 | (None if is_null else date_lut[items[i]]) 53 | for i, is_null in enumerate(nulls_map) 54 | ) 55 | 56 | 57 | class Date32Column(DateColumn): 58 | ch_type = 'date32' 59 | format = 'i' 60 | 61 | min_value = epoch_start_date32 62 | max_value = epoch_end_date32 63 | 64 | date_lut_days = (epoch_end_date32 - epoch_start).days + 1 65 | date_lut = { 66 | x: epoch_start + timedelta(x) 67 | for x in range((epoch_start_date32 - epoch_start).days, date_lut_days) 68 | } 69 | date_lut_reverse = {value: key for key, value in date_lut.items()} 70 | -------------------------------------------------------------------------------- /proton_driver/columns/decimalcolumn.py: -------------------------------------------------------------------------------- 1 | from decimal import Decimal, localcontext 2 | 3 | from .base import FormatColumn 4 | from .exceptions import ColumnTypeMismatchException 5 | from .intcolumn import Int128Column, Int256Column 6 | 7 | 8 | class DecimalColumn(FormatColumn): 9 | py_types = (Decimal, float, int) 10 | max_precision = None 11 | int_size = None 12 | 13 | def __init__(self, precision, scale, types_check=False, **kwargs): 14 | self.precision = precision 15 | self.scale = scale 16 | super(DecimalColumn, self).__init__(**kwargs) 17 | 18 | if types_check: 19 | max_signed_int = (1 << (8 * self.int_size - 1)) - 1 20 | 21 | def check_item(value): 22 | if value < -max_signed_int or value > max_signed_int: 23 | raise ColumnTypeMismatchException(value) 24 | 25 | self.check_item = check_item 26 | 27 | def after_read_items(self, items, nulls_map=None): 28 | if self.scale >= 1: 29 | scale = 10 ** self.scale 30 | 31 | if nulls_map is None: 32 | return tuple(Decimal(item) / scale for item in items) 33 | else: 34 | return tuple( 35 | (None if is_null else Decimal(items[i]) / scale) 36 | for i, is_null in enumerate(nulls_map) 37 | ) 38 | else: 39 | if nulls_map is None: 40 | return tuple(Decimal(item) for item in items) 41 | else: 42 | return tuple( 43 | (None if is_null else Decimal(items[i])) 44 | for i, is_null in enumerate(nulls_map) 45 | ) 46 | 47 | def before_write_items(self, items, nulls_map=None): 48 | null_value = self.null_value 49 | 50 | if self.scale >= 1: 51 | scale = 10 ** self.scale 52 | 53 | for i, item in enumerate(items): 54 | if nulls_map and nulls_map[i]: 55 | items[i] = null_value 56 | else: 57 | items[i] = int(Decimal(str(item)) * scale) 58 | 59 | else: 60 | for i, item in enumerate(items): 61 | if nulls_map and nulls_map[i]: 62 | items[i] = null_value 63 | else: 64 | items[i] = int(Decimal(str(item))) 65 | 66 | # Override default precision to the maximum supported by underlying type. 67 | def _write_data(self, items, buf): 68 | with localcontext() as ctx: 69 | ctx.prec = self.max_precision 70 | super(DecimalColumn, self)._write_data(items, buf) 71 | 72 | def _read_data(self, n_items, buf, nulls_map=None): 73 | with localcontext() as ctx: 74 | ctx.prec = self.max_precision 75 | return super(DecimalColumn, self)._read_data( 76 | n_items, buf, nulls_map=nulls_map 77 | ) 78 | 79 | 80 | class Decimal32Column(DecimalColumn): 81 | format = 'i' 82 | max_precision = 9 83 | int_size = 4 84 | 85 | 86 | class Decimal64Column(DecimalColumn): 87 | format = 'q' 88 | max_precision = 18 89 | int_size = 8 90 | 91 | 92 | class Decimal128Column(DecimalColumn, Int128Column): 93 | max_precision = 38 94 | 95 | 96 | class Decimal256Column(DecimalColumn, Int256Column): 97 | max_precision = 76 98 | 99 | 100 | def create_decimal_column(spec, column_options): 101 | precision, scale = spec[8:-1].split(',') 102 | precision, scale = int(precision), int(scale) 103 | 104 | # Maximum precisions for underlying types are: 105 | # Int32 10**9 106 | # Int64 10**18 107 | # Int128 10**38 108 | # Int256 10**76 109 | if precision <= 9: 110 | cls = Decimal32Column 111 | elif precision <= 18: 112 | cls = Decimal64Column 113 | elif precision <= 38: 114 | cls = Decimal128Column 115 | else: 116 | cls = Decimal256Column 117 | 118 | return cls(precision, scale, **column_options) 119 | -------------------------------------------------------------------------------- /proton_driver/columns/enumcolumn.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | from .. import errors 4 | from .intcolumn import IntColumn 5 | 6 | 7 | class EnumColumn(IntColumn): 8 | py_types = (Enum, int, str) 9 | 10 | def __init__(self, enum_cls, **kwargs): 11 | self.enum_cls = enum_cls 12 | super(EnumColumn, self).__init__(**kwargs) 13 | 14 | def before_write_items(self, items, nulls_map=None): 15 | null_value = self.null_value 16 | 17 | enum_cls = self.enum_cls 18 | 19 | for i, item in enumerate(items): 20 | if nulls_map and nulls_map[i]: 21 | items[i] = null_value 22 | continue 23 | 24 | source_value = item.name if isinstance(item, Enum) else item 25 | 26 | # Check real enum value 27 | try: 28 | if isinstance(source_value, str): 29 | items[i] = enum_cls[source_value].value 30 | else: 31 | items[i] = enum_cls(source_value).value 32 | except (ValueError, KeyError): 33 | choices = ', '.join( 34 | "'{}' = {}".format(x.name.replace("'", r"\'"), x.value) 35 | for x in enum_cls 36 | ) 37 | enum_str = '{}({})'.format(enum_cls.__name__, choices) 38 | 39 | raise errors.LogicalError( 40 | "Unknown element '{}' for type {}" 41 | .format(source_value, enum_str) 42 | ) 43 | 44 | def after_read_items(self, items, nulls_map=None): 45 | enum_cls = self.enum_cls 46 | 47 | if nulls_map is None: 48 | return tuple(enum_cls(item).name for item in items) 49 | else: 50 | return tuple( 51 | (None if is_null else enum_cls(items[i]).name) 52 | for i, is_null in enumerate(nulls_map) 53 | ) 54 | 55 | 56 | class Enum8Column(EnumColumn): 57 | ch_type = 'enum8' 58 | format = 'b' 59 | int_size = 1 60 | 61 | 62 | class Enum16Column(EnumColumn): 63 | ch_type = 'enum16' 64 | format = 'h' 65 | int_size = 2 66 | 67 | 68 | def create_enum_column(spec, column_options): 69 | if spec.startswith('enum8'): 70 | params = spec[6:-1] 71 | cls = Enum8Column 72 | else: 73 | params = spec[7:-1] 74 | cls = Enum16Column 75 | 76 | return cls(Enum(cls.ch_type, _parse_options(params)), **column_options) 77 | 78 | 79 | def _parse_options(option_string): 80 | options = dict() 81 | after_name = False 82 | escaped = False 83 | quote_character = None 84 | name = '' 85 | value = '' 86 | 87 | for ch in option_string: 88 | if escaped: 89 | name += ch 90 | escaped = False # accepting escaped character 91 | 92 | elif after_name: 93 | if ch in (' ', '='): 94 | pass 95 | elif ch == ',': 96 | options[name] = int(value) 97 | after_name = False 98 | name = '' 99 | value = '' # reset before collecting new option 100 | else: 101 | value += ch 102 | 103 | elif quote_character: 104 | if ch == '\\': 105 | escaped = True 106 | elif ch == quote_character: 107 | quote_character = None 108 | after_name = True # start collecting option value 109 | else: 110 | name += ch 111 | 112 | else: 113 | if ch == "'": 114 | quote_character = ch 115 | 116 | if after_name: 117 | options.setdefault(name, int(value)) # append word after last comma 118 | 119 | return options 120 | -------------------------------------------------------------------------------- /proton_driver/columns/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class ColumnException(Exception): 4 | pass 5 | 6 | 7 | class ColumnTypeMismatchException(ColumnException): 8 | pass 9 | 10 | 11 | class StructPackException(ColumnException): 12 | pass 13 | -------------------------------------------------------------------------------- /proton_driver/columns/floatcolumn.py: -------------------------------------------------------------------------------- 1 | from ctypes import c_float 2 | 3 | from .base import FormatColumn 4 | 5 | 6 | class FloatColumn(FormatColumn): 7 | py_types = (float, int) 8 | 9 | 10 | class Float32Column(FloatColumn): 11 | ch_type = 'float32' 12 | format = 'f' 13 | 14 | def __init__(self, types_check=False, **kwargs): 15 | super(Float32Column, self).__init__(types_check=types_check, **kwargs) 16 | 17 | if types_check: 18 | # Chop only bytes that fit current type. 19 | # Cast to -nan or nan if overflows. 20 | def before_write_items(items, nulls_map=None): 21 | null_value = self.null_value 22 | 23 | for i, item in enumerate(items): 24 | if nulls_map and nulls_map[i]: 25 | items[i] = null_value 26 | else: 27 | items[i] = c_float(item).value 28 | 29 | self.before_write_items = before_write_items 30 | 31 | 32 | class Float64Column(FloatColumn): 33 | ch_type = 'float64' 34 | format = 'd' 35 | -------------------------------------------------------------------------------- /proton_driver/columns/intervalcolumn.py: -------------------------------------------------------------------------------- 1 | from .intcolumn import Int64Column 2 | 3 | 4 | class IntervalColumn(Int64Column): 5 | pass 6 | 7 | 8 | class IntervalDayColumn(IntervalColumn): 9 | ch_type = 'interval_day' 10 | 11 | 12 | class IntervalWeekColumn(IntervalColumn): 13 | ch_type = 'interval_week' 14 | 15 | 16 | class IntervalMonthColumn(IntervalColumn): 17 | ch_type = 'interval_month' 18 | 19 | 20 | class IntervalYearColumn(IntervalColumn): 21 | ch_type = 'interval_year' 22 | 23 | 24 | class IntervalHourColumn(IntervalColumn): 25 | ch_type = 'interval_hour' 26 | 27 | 28 | class IntervalMinuteColumn(IntervalColumn): 29 | ch_type = 'interval_minute' 30 | 31 | 32 | class IntervalSecondColumn(IntervalColumn): 33 | ch_type = 'interval_second' 34 | -------------------------------------------------------------------------------- /proton_driver/columns/jsoncolumn.py: -------------------------------------------------------------------------------- 1 | from .base import Column 2 | from .stringcolumn import String 3 | from ..reader import read_binary_uint8, read_binary_str 4 | from ..util.compat import json 5 | from ..writer import write_binary_uint8 6 | 7 | 8 | class JsonColumn(Column): 9 | py_types = (dict, ) 10 | 11 | # No NULL value actually 12 | null_value = {} 13 | 14 | def __init__(self, column_by_spec_getter, **kwargs): 15 | self.column_by_spec_getter = column_by_spec_getter 16 | self.string_column = String(**kwargs) 17 | super(JsonColumn, self).__init__(**kwargs) 18 | 19 | def write_state_prefix(self, buf): 20 | # Read in binary format. 21 | # Write in text format. 22 | write_binary_uint8(1, buf) 23 | 24 | def read_items(self, n_items, buf): 25 | read_binary_uint8(buf) 26 | spec = read_binary_str(buf) 27 | col = self.column_by_spec_getter( 28 | spec, dict(namedtuple_as_json=True) 29 | ) 30 | col.read_state_prefix(buf) 31 | return col.read_data(n_items, buf) 32 | 33 | def write_items(self, items, buf): 34 | items = [x if isinstance(x, str) else json.dumps(x) for x in items] 35 | self.string_column.write_items(items, buf) 36 | 37 | 38 | def create_json_column(spec, column_by_spec_getter, column_options): 39 | return JsonColumn(column_by_spec_getter, **column_options) 40 | -------------------------------------------------------------------------------- /proton_driver/columns/lowcardinalitycolumn.py: -------------------------------------------------------------------------------- 1 | from math import log 2 | 3 | from ..reader import read_binary_uint64 4 | from ..writer import write_binary_int64 5 | from .base import Column 6 | from .intcolumn import UInt8Column, UInt16Column, UInt32Column, UInt64Column 7 | 8 | 9 | def create_low_cardinality_column(spec, column_by_spec_getter): 10 | inner = spec[16:-1] 11 | nested = column_by_spec_getter(inner) 12 | return LowCardinalityColumn(nested) 13 | 14 | 15 | class LowCardinalityColumn(Column): 16 | """ 17 | Stores column as index (unique elements) and keys. 18 | Good for de-duplication of large values with low cardinality. 19 | """ 20 | int_types = { 21 | 0: UInt8Column, 22 | 1: UInt16Column, 23 | 2: UInt32Column, 24 | 3: UInt64Column 25 | } 26 | 27 | # Need to read additional keys. 28 | # Additional keys are stored before indexes as value N and N keys 29 | # after them. 30 | has_additional_keys_bit = 1 << 9 31 | # Need to update dictionary. 32 | # It means that previous granule has different dictionary. 33 | need_update_dictionary = 1 << 10 34 | 35 | serialization_type = has_additional_keys_bit | need_update_dictionary 36 | 37 | def __init__(self, nested_column, **kwargs): 38 | self.nested_column = nested_column 39 | super(LowCardinalityColumn, self).__init__(**kwargs) 40 | 41 | def read_state_prefix(self, buf): 42 | return read_binary_uint64(buf) 43 | 44 | def write_state_prefix(self, buf): 45 | # KeysSerializationVersion. See Proton docs. 46 | write_binary_int64(1, buf) 47 | 48 | def _write_data(self, items, buf): 49 | index, keys = [], [] 50 | key_by_index_element = {} 51 | 52 | if self.nested_column.nullable: 53 | # First element represents NULL if column is nullable. 54 | index.append(self.nested_column.null_value) 55 | # Prevent null map writing. Reset nested column nullable flag. 56 | self.nested_column.nullable = False 57 | 58 | for x in items: 59 | if x is None: 60 | # Zero element for null. 61 | keys.append(0) 62 | 63 | else: 64 | key = key_by_index_element.get(x) 65 | # Get key from index or add it to index. 66 | if key is None: 67 | key = len(key_by_index_element) 68 | key_by_index_element[x] = key 69 | index.append(x) 70 | 71 | keys.append(key + 1) 72 | else: 73 | for x in items: 74 | key = key_by_index_element.get(x) 75 | 76 | # Get key from index or add it to index. 77 | if key is None: 78 | key = len(key_by_index_element) 79 | key_by_index_element[x] = len(key_by_index_element) 80 | index.append(x) 81 | 82 | keys.append(key) 83 | 84 | # Do not write anything for empty column. 85 | # May happen while writing empty arrays. 86 | if not len(index): 87 | return 88 | 89 | int_type = int(log(len(index), 2) / 8) 90 | int_column = self.int_types[int_type]() 91 | 92 | serialization_type = self.serialization_type | int_type 93 | 94 | write_binary_int64(serialization_type, buf) 95 | write_binary_int64(len(index), buf) 96 | 97 | self.nested_column.write_data(index, buf) 98 | write_binary_int64(len(items), buf) 99 | int_column.write_items(keys, buf) 100 | 101 | def _read_data(self, n_items, buf, nulls_map=None): 102 | if not n_items: 103 | return tuple() 104 | 105 | serialization_type = read_binary_uint64(buf) 106 | 107 | # Lowest byte contains info about key type. 108 | key_type = serialization_type & 0xf 109 | keys_column = self.int_types[key_type]() 110 | 111 | nullable = self.nested_column.nullable 112 | # Prevent null map reading. Reset nested column nullable flag. 113 | self.nested_column.nullable = False 114 | 115 | index_size = read_binary_uint64(buf) 116 | index = self.nested_column.read_data(index_size, buf) 117 | if nullable: 118 | index = (None, ) + index[1:] 119 | 120 | read_binary_uint64(buf) # number of keys 121 | keys = keys_column.read_data(n_items, buf) 122 | 123 | return tuple(index[x] for x in keys) 124 | -------------------------------------------------------------------------------- /proton_driver/columns/mapcolumn.py: -------------------------------------------------------------------------------- 1 | from .base import Column 2 | from .intcolumn import UInt64Column 3 | from ..util.helpers import pairwise 4 | from .util import get_inner_columns 5 | 6 | 7 | class MapColumn(Column): 8 | py_types = (dict, ) 9 | 10 | def __init__(self, key_column, value_column, **kwargs): 11 | self.offset_column = UInt64Column() 12 | self.key_column = key_column 13 | self.value_column = value_column 14 | super(MapColumn, self).__init__(**kwargs) 15 | 16 | def read_state_prefix(self, buf): 17 | self.key_column.read_state_prefix(buf) 18 | self.value_column.read_state_prefix(buf) 19 | 20 | def write_state_prefix(self, buf): 21 | self.key_column.write_state_prefix(buf) 22 | self.value_column.write_state_prefix(buf) 23 | 24 | def read_items(self, n_items, buf): 25 | offsets = list(self.offset_column.read_items(n_items, buf)) 26 | last_offset = offsets[-1] 27 | keys = self.key_column.read_data(last_offset, buf) 28 | values = self.value_column.read_data(last_offset, buf) 29 | 30 | offsets.insert(0, 0) 31 | 32 | return [ 33 | dict(zip(keys[begin:end], values[begin:end])) 34 | for begin, end in pairwise(offsets) 35 | ] 36 | 37 | def write_items(self, items, buf): 38 | offsets = [] 39 | keys = [] 40 | values = [] 41 | 42 | total = 0 43 | for x in items: 44 | total += len(x) 45 | offsets.append(total) 46 | keys.extend(x.keys()) 47 | values.extend(x.values()) 48 | 49 | self.offset_column.write_items(offsets, buf) 50 | self.key_column.write_data(keys, buf) 51 | self.value_column.write_data(values, buf) 52 | 53 | 54 | def create_map_column(spec, column_by_spec_getter): 55 | key, value = get_inner_columns('map', spec) 56 | key_column = column_by_spec_getter(key.strip()) 57 | value_column = column_by_spec_getter(value.strip()) 58 | 59 | return MapColumn(key_column, value_column) 60 | -------------------------------------------------------------------------------- /proton_driver/columns/nestedcolumn.py: -------------------------------------------------------------------------------- 1 | 2 | from .arraycolumn import create_array_column 3 | from .util import get_inner_spec 4 | 5 | 6 | def create_nested_column(spec, column_by_spec_getter, column_options): 7 | return create_array_column( 8 | 'array(tuple({}))'.format(get_inner_spec('nested', spec)), 9 | column_by_spec_getter, column_options 10 | ) 11 | -------------------------------------------------------------------------------- /proton_driver/columns/nothingcolumn.py: -------------------------------------------------------------------------------- 1 | from .intcolumn import FormatColumn 2 | 3 | 4 | class NothingColumn(FormatColumn): 5 | ch_type = 'nothing' 6 | format = 'B' 7 | 8 | @property 9 | def size(self): 10 | return 1 11 | 12 | def after_read_items(self, items, nulls_map=None): 13 | return (None, ) * len(items) 14 | -------------------------------------------------------------------------------- /proton_driver/columns/nullablecolumn.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def create_nullable_column(spec, column_by_spec_getter): 4 | inner = spec[9:-1] 5 | nested = column_by_spec_getter(inner) 6 | nested.nullable = True 7 | return nested 8 | -------------------------------------------------------------------------------- /proton_driver/columns/nullcolumn.py: -------------------------------------------------------------------------------- 1 | from .intcolumn import FormatColumn 2 | 3 | 4 | # TODO: Drop Null column support in future. 5 | # Compatibility with old servers. 6 | class NullColumn(FormatColumn): 7 | ch_type = 'NULL' 8 | format = 'B' 9 | 10 | @property 11 | def size(self): 12 | return 1 13 | 14 | def after_read_items(self, items, nulls_map=None): 15 | return (None, ) * len(items) 16 | -------------------------------------------------------------------------------- /proton_driver/columns/numpy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/proton_driver/columns/numpy/__init__.py -------------------------------------------------------------------------------- /proton_driver/columns/numpy/base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from ..base import Column 5 | 6 | 7 | class NumpyColumn(Column): 8 | dtype = None 9 | 10 | normalize_null_value = True 11 | 12 | def read_items(self, n_items, buf): 13 | data = buf.read(n_items * self.dtype.itemsize) 14 | return np.frombuffer(data, self.dtype.newbyteorder('<'), n_items) 15 | 16 | def write_items(self, items, buf): 17 | buf.write(items.astype(self.dtype.newbyteorder('<')).tobytes()) 18 | 19 | def _write_nulls_map(self, items, buf): 20 | s = self.make_null_struct(len(items)) 21 | nulls_map = self._get_nulls_map(items) 22 | buf.write(s.pack(*nulls_map)) 23 | 24 | def _get_nulls_map(self, items): 25 | return [bool(x) for x in pd.isnull(items)] 26 | 27 | def _read_data(self, n_items, buf, nulls_map=None): 28 | items = self.read_items(n_items, buf) 29 | 30 | if self.after_read_items: 31 | return self.after_read_items(items, nulls_map) 32 | elif nulls_map is not None: 33 | items = np.array(items, dtype=object) 34 | np.place(items, nulls_map, None) 35 | 36 | return items 37 | 38 | def prepare_items(self, items): 39 | nulls_map = pd.isnull(items) 40 | 41 | # Always replace null values to null_value for proper inserts into 42 | # non-nullable columns. 43 | if isinstance(items, np.ndarray) and self.normalize_null_value: 44 | items = np.array(items) 45 | np.place(items, nulls_map, self.null_value) 46 | 47 | return items 48 | -------------------------------------------------------------------------------- /proton_driver/columns/numpy/datecolumn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .base import NumpyColumn 4 | 5 | 6 | class NumpyDateColumn(NumpyColumn): 7 | dtype = np.dtype(np.uint16) 8 | ch_type = 'date' 9 | 10 | null_value = np.datetime64(0, 'Y') 11 | 12 | def read_items(self, n_items, buf): 13 | data = super(NumpyDateColumn, self).read_items(n_items, buf) 14 | return data.astype('datetime64[D]') 15 | 16 | def write_items(self, items, buf): 17 | super(NumpyDateColumn, self).write_items( 18 | items.astype('datetime64[D]'), buf 19 | ) 20 | -------------------------------------------------------------------------------- /proton_driver/columns/numpy/floatcolumn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .base import NumpyColumn 4 | 5 | # normalize_null_value = False due to float('nan') 6 | # With normalization pandas.isnull will threat float('nan') as NULL value. 7 | 8 | 9 | class NumpyFloat32Column(NumpyColumn): 10 | dtype = np.dtype(np.float32) 11 | ch_type = 'float32' 12 | normalize_null_value = False 13 | 14 | def _get_nulls_map(self, items): 15 | return [x is None for x in items] 16 | 17 | 18 | class NumpyFloat64Column(NumpyColumn): 19 | dtype = np.dtype(np.float64) 20 | ch_type = 'float64' 21 | normalize_null_value = False 22 | 23 | def _get_nulls_map(self, items): 24 | return [x is None for x in items] 25 | -------------------------------------------------------------------------------- /proton_driver/columns/numpy/intcolumn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .base import NumpyColumn 4 | 5 | 6 | class NumpyInt8Column(NumpyColumn): 7 | dtype = np.dtype(np.int8) 8 | ch_type = 'int8' 9 | 10 | 11 | class NumpyUInt8Column(NumpyColumn): 12 | dtype = np.dtype(np.uint8) 13 | ch_type = 'uint8' 14 | 15 | 16 | class NumpyInt16Column(NumpyColumn): 17 | dtype = np.dtype(np.int16) 18 | ch_type = 'int16' 19 | 20 | 21 | class NumpyUInt16Column(NumpyColumn): 22 | dtype = np.dtype(np.uint16) 23 | ch_type = 'uint16' 24 | 25 | 26 | class NumpyInt32Column(NumpyColumn): 27 | dtype = np.dtype(np.int32) 28 | ch_type = 'int32' 29 | 30 | 31 | class NumpyUInt32Column(NumpyColumn): 32 | dtype = np.dtype(np.uint32) 33 | ch_type = 'uint32' 34 | 35 | 36 | class NumpyInt64Column(NumpyColumn): 37 | dtype = np.dtype(np.int64) 38 | ch_type = 'int64' 39 | 40 | 41 | class NumpyUInt64Column(NumpyColumn): 42 | dtype = np.dtype(np.uint64) 43 | ch_type = 'uint64' 44 | -------------------------------------------------------------------------------- /proton_driver/columns/numpy/lowcardinalitycolumn.py: -------------------------------------------------------------------------------- 1 | from math import log 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from ..lowcardinalitycolumn import LowCardinalityColumn 7 | from ...reader import read_binary_uint64 8 | from ...writer import write_binary_int64 9 | from .intcolumn import ( 10 | NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column 11 | ) 12 | 13 | 14 | class NumpyLowCardinalityColumn(LowCardinalityColumn): 15 | int_types = { 16 | 0: NumpyUInt8Column, 17 | 1: NumpyUInt16Column, 18 | 2: NumpyUInt32Column, 19 | 3: NumpyUInt64Column 20 | } 21 | 22 | def __init__(self, nested_column, **kwargs): 23 | super(NumpyLowCardinalityColumn, self).__init__(nested_column, 24 | **kwargs) 25 | 26 | def _write_data(self, items, buf): 27 | # Do not write anything for empty column. 28 | # May happen while writing empty arrays. 29 | if not len(items): 30 | return 31 | 32 | # Replace nans with defaults if not nullabe. 33 | if isinstance(items, np.ndarray) and not self.nested_column.nullable: 34 | nulls = pd.isnull(items) 35 | items = np.where(nulls, self.nested_column.null_value, items) 36 | 37 | c = pd.Categorical(items) 38 | 39 | int_type = int(log(len(c.codes), 2) / 8) 40 | int_column = self.int_types[int_type]() 41 | 42 | serialization_type = self.serialization_type | int_type 43 | 44 | index = c.categories 45 | keys = c.codes 46 | 47 | if self.nested_column.nullable: 48 | # First element represents NULL if column is nullable. 49 | index = index.insert(0, self.nested_column.null_value) 50 | keys = keys + 1 51 | # Prevent null map writing. Reset nested column nullable flag. 52 | self.nested_column.nullable = False 53 | 54 | write_binary_int64(serialization_type, buf) 55 | write_binary_int64(len(index), buf) 56 | 57 | self.nested_column.write_data(index.to_numpy(items.dtype), buf) 58 | write_binary_int64(len(items), buf) 59 | int_column.write_items(keys, buf) 60 | 61 | def _read_data(self, n_items, buf, nulls_map=None): 62 | if not n_items: 63 | return tuple() 64 | 65 | serialization_type = read_binary_uint64(buf) 66 | 67 | # Lowest byte contains info about key type. 68 | key_type = serialization_type & 0xf 69 | keys_column = self.int_types[key_type]() 70 | 71 | nullable = self.nested_column.nullable 72 | # Prevent null map reading. Reset nested column nullable flag. 73 | self.nested_column.nullable = False 74 | 75 | index_size = read_binary_uint64(buf) 76 | index = self.nested_column.read_data(index_size, buf) 77 | 78 | read_binary_uint64(buf) # number of keys 79 | keys = keys_column.read_data(n_items, buf) 80 | 81 | if nullable: 82 | # Shift all codes by one ("No value" code is -1 for pandas 83 | # categorical) and drop corresponding first index 84 | # this is analog of original operation: 85 | # index = (None, ) + index[1:] 86 | keys = np.array(keys, dtype='int64') # deal with possible overflow 87 | keys = keys - 1 88 | index = index[1:] 89 | return pd.Categorical.from_codes(keys, index) 90 | 91 | 92 | def create_numpy_low_cardinality_column(spec, column_by_spec_getter): 93 | inner = spec[16:-1] 94 | nested = column_by_spec_getter(inner) 95 | return NumpyLowCardinalityColumn(nested) 96 | -------------------------------------------------------------------------------- /proton_driver/columns/numpy/service.py: -------------------------------------------------------------------------------- 1 | from ... import errors 2 | from .datecolumn import NumpyDateColumn 3 | from .datetimecolumn import create_numpy_datetime_column 4 | from .floatcolumn import NumpyFloat32Column, NumpyFloat64Column 5 | from .intcolumn import ( 6 | NumpyInt8Column, NumpyInt16Column, NumpyInt32Column, NumpyInt64Column, 7 | NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column 8 | ) 9 | from .lowcardinalitycolumn import create_numpy_low_cardinality_column 10 | from .stringcolumn import create_string_column 11 | from ..nullablecolumn import create_nullable_column 12 | 13 | column_by_type = {c.ch_type: c for c in [ 14 | NumpyDateColumn, 15 | NumpyFloat32Column, NumpyFloat64Column, 16 | NumpyInt8Column, NumpyInt16Column, NumpyInt32Column, NumpyInt64Column, 17 | NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column 18 | ]} 19 | 20 | 21 | def get_numpy_column_by_spec(spec, column_options): 22 | def create_column_with_options(x): 23 | return get_numpy_column_by_spec(x, column_options) 24 | 25 | if spec == 'string' or spec.startswith('fixed_string'): 26 | return create_string_column(spec, column_options) 27 | 28 | elif spec.startswith('datetime'): 29 | return create_numpy_datetime_column(spec, column_options) 30 | 31 | elif spec.startswith('nullable'): 32 | return create_nullable_column(spec, create_column_with_options) 33 | 34 | elif spec.startswith('low_cardinality'): 35 | return create_numpy_low_cardinality_column(spec, 36 | create_column_with_options) 37 | else: 38 | if spec in column_by_type: 39 | cls = column_by_type[spec] 40 | return cls(**column_options) 41 | 42 | raise errors.UnknownTypeError('Unknown type {}'.format(spec)) 43 | -------------------------------------------------------------------------------- /proton_driver/columns/numpy/stringcolumn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ... import defines 4 | from .base import NumpyColumn 5 | 6 | 7 | class NumpyStringColumn(NumpyColumn): 8 | null_value = '' 9 | 10 | default_encoding = defines.STRINGS_ENCODING 11 | 12 | def __init__(self, encoding=default_encoding, **kwargs): 13 | self.encoding = encoding 14 | super(NumpyStringColumn, self).__init__(**kwargs) 15 | 16 | def read_items(self, n_items, buf): 17 | return np.array( 18 | buf.read_strings(n_items, encoding=self.encoding), dtype=self.dtype 19 | ) 20 | 21 | def write_items(self, items, buf): 22 | return buf.write_strings(items.tolist(), encoding=self.encoding) 23 | 24 | 25 | class NumpyByteStringColumn(NumpyColumn): 26 | null_value = b'' 27 | 28 | def read_items(self, n_items, buf): 29 | return np.array(buf.read_strings(n_items), dtype=self.dtype) 30 | 31 | def write_items(self, items, buf): 32 | return buf.write_strings(items.tolist()) 33 | 34 | 35 | class NumpyFixedString(NumpyStringColumn): 36 | def __init__(self, length, **kwargs): 37 | self.length = length 38 | super(NumpyFixedString, self).__init__(**kwargs) 39 | 40 | def read_items(self, n_items, buf): 41 | return np.array(buf.read_fixed_strings( 42 | n_items, self.length, encoding=self.encoding 43 | ), dtype=self.dtype) 44 | 45 | def write_items(self, items, buf): 46 | return buf.write_fixed_strings( 47 | items.tolist(), self.length, encoding=self.encoding 48 | ) 49 | 50 | 51 | class NumpyByteFixedString(NumpyByteStringColumn): 52 | def __init__(self, length, **kwargs): 53 | self.length = length 54 | super(NumpyByteFixedString, self).__init__(**kwargs) 55 | 56 | def read_items(self, n_items, buf): 57 | return np.array( 58 | buf.read_fixed_strings(n_items, self.length), dtype=self.dtype 59 | ) 60 | 61 | def write_items(self, items, buf): 62 | return buf.write_fixed_strings(items.tolist(), self.length) 63 | 64 | 65 | def create_string_column(spec, column_options): 66 | client_settings = column_options['context'].client_settings 67 | strings_as_bytes = client_settings['strings_as_bytes'] 68 | encoding = client_settings.get( 69 | 'strings_encoding', NumpyStringColumn.default_encoding 70 | ) 71 | 72 | if spec == 'string': 73 | cls = NumpyByteStringColumn if strings_as_bytes else NumpyStringColumn 74 | return cls(encoding=encoding, **column_options) 75 | else: 76 | length = int(spec[13:-1]) 77 | cls = NumpyByteFixedString if strings_as_bytes else NumpyFixedString 78 | return cls(length, encoding=encoding, **column_options) 79 | -------------------------------------------------------------------------------- /proton_driver/columns/simpleaggregatefunctioncolumn.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def create_simple_aggregate_function_column(spec, column_by_spec_getter): 4 | # SimpleAggregateFunction(Func, Type) -> Type 5 | inner = spec[24:-1].split(',', 1)[1].strip() 6 | nested = column_by_spec_getter(inner) 7 | return nested 8 | -------------------------------------------------------------------------------- /proton_driver/columns/stringcolumn.py: -------------------------------------------------------------------------------- 1 | 2 | from .. import defines 3 | from .base import Column 4 | 5 | 6 | class String(Column): 7 | ch_type = 'string' 8 | py_types = (str, ) 9 | null_value = '' 10 | 11 | default_encoding = defines.STRINGS_ENCODING 12 | 13 | def __init__(self, encoding=default_encoding, **kwargs): 14 | self.encoding = encoding 15 | super(String, self).__init__(**kwargs) 16 | 17 | def write_items(self, items, buf): 18 | buf.write_strings(items, encoding=self.encoding) 19 | 20 | def read_items(self, n_items, buf): 21 | return buf.read_strings(n_items, encoding=self.encoding) 22 | 23 | 24 | class ByteString(String): 25 | py_types = (bytes, ) 26 | null_value = b'' 27 | 28 | def write_items(self, items, buf): 29 | buf.write_strings(items) 30 | 31 | def read_items(self, n_items, buf): 32 | return buf.read_strings(n_items) 33 | 34 | 35 | class FixedString(String): 36 | ch_type = 'fixed_string' 37 | 38 | def __init__(self, length, **kwargs): 39 | self.length = length 40 | super(FixedString, self).__init__(**kwargs) 41 | 42 | def read_items(self, n_items, buf): 43 | return buf.read_fixed_strings( 44 | n_items, self.length, encoding=self.encoding 45 | ) 46 | 47 | def write_items(self, items, buf): 48 | buf.write_fixed_strings(items, self.length, encoding=self.encoding) 49 | 50 | 51 | class ByteFixedString(FixedString): 52 | py_types = (bytearray, bytes) 53 | null_value = b'' 54 | 55 | def read_items(self, n_items, buf): 56 | return buf.read_fixed_strings(n_items, self.length) 57 | 58 | def write_items(self, items, buf): 59 | buf.write_fixed_strings(items, self.length) 60 | 61 | 62 | def create_string_column(spec, column_options): 63 | client_settings = column_options['context'].client_settings 64 | strings_as_bytes = client_settings['strings_as_bytes'] 65 | encoding = client_settings.get('strings_encoding', String.default_encoding) 66 | 67 | if spec == 'string': 68 | cls = ByteString if strings_as_bytes else String 69 | return cls(encoding=encoding, **column_options) 70 | else: 71 | length_str = spec[12:-1] 72 | if "(" in length_str: 73 | length_str = length_str.replace("(", "") 74 | length = int(length_str) 75 | cls = ByteFixedString if strings_as_bytes else FixedString 76 | return cls(length, encoding=encoding, **column_options) 77 | -------------------------------------------------------------------------------- /proton_driver/columns/tuplecolumn.py: -------------------------------------------------------------------------------- 1 | 2 | from .base import Column 3 | from .util import get_inner_columns_with_types 4 | 5 | 6 | class TupleColumn(Column): 7 | py_types = (list, tuple) 8 | 9 | def __init__(self, names, nested_columns, **kwargs): 10 | self.names = names 11 | self.nested_columns = nested_columns 12 | client_settings = kwargs['context'].client_settings 13 | self.namedtuple_as_json = client_settings.get( 14 | 'namedtuple_as_json', False 15 | ) 16 | 17 | super(TupleColumn, self).__init__(**kwargs) 18 | self.null_value = tuple(x.null_value for x in nested_columns) 19 | 20 | def write_data(self, items, buf): 21 | items = self.prepare_items(items) 22 | items = list(zip(*items)) 23 | 24 | for i, x in enumerate(self.nested_columns): 25 | x.write_data(list(items[i]), buf) 26 | 27 | def write_items(self, items, buf): 28 | return self.write_data(items, buf) 29 | 30 | def read_data(self, n_items, buf): 31 | rv = [x.read_data(n_items, buf) for x in self.nested_columns] 32 | rv = list(zip(*rv)) 33 | 34 | if self.names[0] and self.namedtuple_as_json: 35 | return [dict(zip(self.names, x)) for x in rv] 36 | else: 37 | return rv 38 | 39 | def read_items(self, n_items, buf): 40 | return self.read_data(n_items, buf) 41 | 42 | def read_state_prefix(self, buf): 43 | super(TupleColumn, self).read_state_prefix(buf) 44 | 45 | for x in self.nested_columns: 46 | x.read_state_prefix(buf) 47 | 48 | def write_state_prefix(self, buf): 49 | super(TupleColumn, self).write_state_prefix(buf) 50 | 51 | for x in self.nested_columns: 52 | x.write_state_prefix(buf) 53 | 54 | 55 | def create_tuple_column(spec, column_by_spec_getter, column_options): 56 | columns_with_types = get_inner_columns_with_types('tuple', spec) 57 | names, types = zip(*columns_with_types) 58 | 59 | return TupleColumn(names, [column_by_spec_getter(x) for x in types], 60 | **column_options) 61 | -------------------------------------------------------------------------------- /proton_driver/columns/util.py: -------------------------------------------------------------------------------- 1 | 2 | def get_inner_spec(column_name, spec): 3 | brackets = 0 4 | offset = len(column_name) 5 | 6 | for i, ch in enumerate(spec[offset:], offset): 7 | if ch == '(': 8 | brackets += 1 9 | 10 | elif ch == ')': 11 | brackets -= 1 12 | 13 | if brackets == 0: 14 | break 15 | 16 | return spec[offset + 1:i] 17 | 18 | 19 | def get_inner_columns(column_name, spec): 20 | inner_spec = get_inner_spec(column_name, spec) 21 | brackets = 0 22 | column_begin = 0 23 | 24 | columns = [] 25 | for i, x in enumerate(inner_spec + ','): 26 | if x == ',': 27 | if brackets == 0: 28 | columns.append(inner_spec[column_begin:i]) 29 | column_begin = i + 1 30 | elif x == '(': 31 | brackets += 1 32 | elif x == ')': 33 | brackets -= 1 34 | elif x == ' ': 35 | if brackets == 0: 36 | column_begin = i + 1 37 | return columns 38 | 39 | 40 | def get_inner_columns_with_types(column_name, spec): 41 | inner_spec = get_inner_spec(column_name, spec) 42 | inner_spec = inner_spec.strip() 43 | brackets = 0 44 | prev_comma = 0 45 | prev_space = 0 46 | 47 | columns = [] 48 | for i, x in enumerate(inner_spec.strip() + ','): 49 | if x == ',': 50 | if brackets == 0: 51 | columns.append(( 52 | inner_spec[prev_comma:prev_space].strip(), 53 | inner_spec[prev_space:i] 54 | )) 55 | prev_comma = i + 1 56 | elif x == '(': 57 | brackets += 1 58 | elif x == ')': 59 | brackets -= 1 60 | elif x == ' ': 61 | if brackets == 0: 62 | prev_space = i + 1 63 | return columns 64 | -------------------------------------------------------------------------------- /proton_driver/columns/uuidcolumn.py: -------------------------------------------------------------------------------- 1 | from uuid import UUID 2 | 3 | from .base import FormatColumn 4 | from .. import errors 5 | from ..writer import MAX_UINT64 6 | 7 | 8 | class UUIDColumn(FormatColumn): 9 | ch_type = 'uuid' 10 | py_types = (str, UUID) 11 | format = 'Q' 12 | 13 | # UUID is stored by two uint64 numbers. 14 | def write_items(self, items, buf): 15 | n_items = len(items) 16 | 17 | uint_64_pairs = [None] * 2 * n_items 18 | for i, x in enumerate(items): 19 | i2 = 2 * i 20 | uint_64_pairs[i2] = (x >> 64) & MAX_UINT64 21 | uint_64_pairs[i2 + 1] = x & MAX_UINT64 22 | 23 | s = self.make_struct(2 * n_items) 24 | buf.write(s.pack(*uint_64_pairs)) 25 | 26 | def read_items(self, n_items, buf): 27 | # TODO: cythonize 28 | s = self.make_struct(2 * n_items) 29 | items = s.unpack(buf.read(s.size)) 30 | 31 | uint_128_items = [None] * n_items 32 | for i in range(n_items): 33 | i2 = 2 * i 34 | uint_128_items[i] = (items[i2] << 64) + items[i2 + 1] 35 | 36 | return tuple(uint_128_items) 37 | 38 | def after_read_items(self, items, nulls_map=None): 39 | if nulls_map is None: 40 | return tuple(UUID(int=item) for item in items) 41 | else: 42 | return tuple( 43 | (None if is_null else UUID(int=items[i])) 44 | for i, is_null in enumerate(nulls_map) 45 | ) 46 | 47 | def before_write_items(self, items, nulls_map=None): 48 | null_value = self.null_value 49 | 50 | for i, item in enumerate(items): 51 | if nulls_map and nulls_map[i]: 52 | items[i] = null_value 53 | continue 54 | 55 | try: 56 | if not isinstance(item, UUID): 57 | item = UUID(item) 58 | 59 | except ValueError: 60 | raise errors.CannotParseUuidError( 61 | "Cannot parse uuid '{}'".format(item) 62 | ) 63 | 64 | items[i] = item.int 65 | -------------------------------------------------------------------------------- /proton_driver/compression/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | from .. import errors 4 | from ..protocol import CompressionMethodByte 5 | 6 | 7 | def get_compressor_cls(alg): 8 | try: 9 | module = importlib.import_module('.' + alg, __name__) 10 | return module.Compressor 11 | 12 | except ImportError: 13 | raise errors.UnknownCompressionMethod( 14 | "Unknown compression method: '{}'".format(alg) 15 | ) 16 | 17 | 18 | def get_decompressor_cls(method_type): 19 | if method_type == CompressionMethodByte.LZ4: 20 | module = importlib.import_module('.lz4', __name__) 21 | 22 | elif method_type == CompressionMethodByte.ZSTD: 23 | module = importlib.import_module('.zstd', __name__) 24 | 25 | else: 26 | raise errors.UnknownCompressionMethod() 27 | 28 | return module.Decompressor 29 | -------------------------------------------------------------------------------- /proton_driver/compression/base.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | 3 | from ..reader import read_binary_uint32 4 | from ..writer import write_binary_uint8, write_binary_uint32 5 | from .. import errors 6 | 7 | try: 8 | from clickhouse_cityhash.cityhash import CityHash128 9 | except ImportError: 10 | raise RuntimeError( 11 | 'Package clickhouse-cityhash is required to use compression' 12 | ) 13 | 14 | 15 | class BaseCompressor(object): 16 | """ 17 | Partial file-like object with write method. 18 | """ 19 | method = None 20 | method_byte = None 21 | 22 | def __init__(self): 23 | self.data = BytesIO() 24 | 25 | super(BaseCompressor, self).__init__() 26 | 27 | def get_value(self): 28 | value = self.data.getvalue() 29 | self.data.seek(0) 30 | self.data.truncate() 31 | return value 32 | 33 | def write(self, p_str): 34 | self.data.write(p_str) 35 | 36 | def compress_data(self, data): 37 | raise NotImplementedError 38 | 39 | def get_compressed_data(self, extra_header_size): 40 | rv = BytesIO() 41 | 42 | data = self.get_value() 43 | compressed = self.compress_data(data) 44 | 45 | header_size = extra_header_size + 4 + 4 # sizes 46 | 47 | write_binary_uint32(header_size + len(compressed), rv) 48 | write_binary_uint32(len(data), rv) 49 | rv.write(compressed) 50 | 51 | return rv.getvalue() 52 | 53 | 54 | class BaseDecompressor(object): 55 | method = None 56 | method_byte = None 57 | 58 | def __init__(self, real_stream): 59 | self.stream = real_stream 60 | super(BaseDecompressor, self).__init__() 61 | 62 | def decompress_data(self, data, uncompressed_size): 63 | raise NotImplementedError 64 | 65 | def check_hash(self, compressed_data, compressed_hash): 66 | if CityHash128(compressed_data) != compressed_hash: 67 | raise errors.ChecksumDoesntMatchError() 68 | 69 | def get_decompressed_data(self, method_byte, compressed_hash, 70 | extra_header_size): 71 | size_with_header = read_binary_uint32(self.stream) 72 | compressed_size = size_with_header - extra_header_size - 4 73 | 74 | compressed = BytesIO(self.stream.read(compressed_size)) 75 | 76 | block_check = BytesIO() 77 | write_binary_uint8(method_byte, block_check) 78 | write_binary_uint32(size_with_header, block_check) 79 | block_check.write(compressed.getvalue()) 80 | 81 | self.check_hash(block_check.getvalue(), compressed_hash) 82 | 83 | uncompressed_size = read_binary_uint32(compressed) 84 | 85 | compressed = compressed.read(compressed_size - 4) 86 | 87 | return self.decompress_data(compressed, uncompressed_size) 88 | -------------------------------------------------------------------------------- /proton_driver/compression/lz4.py: -------------------------------------------------------------------------------- 1 | from lz4 import block 2 | 3 | from .base import BaseCompressor, BaseDecompressor 4 | from ..protocol import CompressionMethod, CompressionMethodByte 5 | 6 | 7 | class Compressor(BaseCompressor): 8 | method = CompressionMethod.LZ4 9 | method_byte = CompressionMethodByte.LZ4 10 | mode = 'default' 11 | 12 | def compress_data(self, data): 13 | return block.compress(data, store_size=False, mode=self.mode) 14 | 15 | 16 | class Decompressor(BaseDecompressor): 17 | method = CompressionMethod.LZ4 18 | method_byte = CompressionMethodByte.LZ4 19 | 20 | def decompress_data(self, data, uncompressed_size): 21 | return block.decompress(data, uncompressed_size=uncompressed_size) 22 | -------------------------------------------------------------------------------- /proton_driver/compression/lz4hc.py: -------------------------------------------------------------------------------- 1 | from .lz4 import Compressor as BaseCompressor, Decompressor as BaseDecompressor 2 | 3 | 4 | class Compressor(BaseCompressor): 5 | mode = 'high_compression' 6 | 7 | 8 | class Decompressor(BaseDecompressor): 9 | pass 10 | -------------------------------------------------------------------------------- /proton_driver/compression/zstd.py: -------------------------------------------------------------------------------- 1 | import zstd 2 | 3 | from .base import BaseCompressor, BaseDecompressor 4 | from ..protocol import CompressionMethod, CompressionMethodByte 5 | 6 | 7 | class Compressor(BaseCompressor): 8 | method = CompressionMethod.ZSTD 9 | method_byte = CompressionMethodByte.ZSTD 10 | 11 | def compress_data(self, data): 12 | return zstd.compress(data) 13 | 14 | 15 | class Decompressor(BaseDecompressor): 16 | method = CompressionMethod.ZSTD 17 | method_byte = CompressionMethodByte.ZSTD 18 | 19 | def decompress_data(self, data, uncompressed_size): 20 | return zstd.decompress(data) 21 | -------------------------------------------------------------------------------- /proton_driver/context.py: -------------------------------------------------------------------------------- 1 | 2 | class Context(object): 3 | def __init__(self): 4 | self._server_info = None 5 | self._settings = None 6 | self._client_settings = None 7 | super(Context, self).__init__() 8 | 9 | @property 10 | def server_info(self): 11 | return self._server_info 12 | 13 | @server_info.setter 14 | def server_info(self, value): 15 | self._server_info = value 16 | 17 | @property 18 | def settings(self): 19 | return self._settings.copy() 20 | 21 | @settings.setter 22 | def settings(self, value): 23 | self._settings = value.copy() 24 | 25 | @property 26 | def client_settings(self): 27 | return self._client_settings.copy() 28 | 29 | @client_settings.setter 30 | def client_settings(self, value): 31 | self._client_settings = value.copy() 32 | 33 | def __repr__(self): 34 | return '' % ( 35 | self._server_info, self._client_settings, self._settings 36 | ) 37 | -------------------------------------------------------------------------------- /proton_driver/dbapi/__init__.py: -------------------------------------------------------------------------------- 1 | from .connection import Connection 2 | from .errors import ( 3 | Warning, Error, DataError, DatabaseError, ProgrammingError, IntegrityError, 4 | InterfaceError, InternalError, NotSupportedError, OperationalError 5 | ) 6 | from .. import defines 7 | 8 | apilevel = '2.0' 9 | 10 | threadsafety = 2 11 | 12 | paramstyle = 'pyformat' 13 | 14 | 15 | def connect(dsn=None, host=None, 16 | user=defines.DEFAULT_USER, password=defines.DEFAULT_PASSWORD, 17 | port=defines.DEFAULT_PORT, database=defines.DEFAULT_DATABASE, 18 | **kwargs): 19 | """ 20 | Create a new database connection. 21 | 22 | The connection can be specified via DSN: 23 | 24 | ``conn = connect("proton://localhost/test?param1=value1&...")`` 25 | 26 | or using database and credentials arguments: 27 | 28 | ``conn = connect(database="test", user="default", password="default", 29 | host="localhost", **kwargs)`` 30 | 31 | The basic connection parameters are: 32 | 33 | - *host*: host with running Proton server. 34 | - *port*: port Proton server is bound to. 35 | - *database*: database connect to. 36 | - *user*: database user. 37 | - *password*: user's password. 38 | 39 | See defaults in :data:`~proton_driver.connection.Connection` 40 | constructor. 41 | 42 | DSN or host is required. 43 | 44 | Any other keyword parameter will be passed to the underlying Connection 45 | class. 46 | 47 | :return: a new connection. 48 | """ 49 | 50 | if dsn is None and host is None: 51 | raise ValueError('host or dsn is required') 52 | 53 | return Connection(dsn=dsn, user=user, password=password, host=host, 54 | port=port, database=database, **kwargs) 55 | 56 | 57 | __all__ = [ 58 | 'connect', 59 | 'Warning', 'Error', 'DataError', 'DatabaseError', 'ProgrammingError', 60 | 'IntegrityError', 'InterfaceError', 'InternalError', 'NotSupportedError', 61 | 'OperationalError' 62 | ] 63 | -------------------------------------------------------------------------------- /proton_driver/dbapi/connection.py: -------------------------------------------------------------------------------- 1 | from ..client import Client 2 | from .. import defines 3 | from .cursor import Cursor 4 | from .errors import InterfaceError 5 | 6 | 7 | class Connection(object): 8 | """ 9 | Creates new Connection for accessing Proton database. 10 | 11 | Connection is just wrapper for handling multiple cursors (clients) and 12 | do not initiate actual connections to the Proton server. 13 | 14 | See parameters description in 15 | :data:`~proton_driver.connection.Connection`. 16 | """ 17 | def __init__(self, dsn=None, host=None, 18 | user=defines.DEFAULT_USER, password=defines.DEFAULT_PASSWORD, 19 | port=defines.DEFAULT_PORT, database=defines.DEFAULT_DATABASE, 20 | **kwargs): 21 | self.cursors = [] 22 | 23 | self.dsn = dsn 24 | self.user = user 25 | self.password = password 26 | self.host = host 27 | self.port = port 28 | self.database = database 29 | self.connection_kwargs = kwargs 30 | self.is_closed = False 31 | self._hosts = None 32 | super(Connection, self).__init__() 33 | 34 | def __repr__(self): 35 | return ''.format( 36 | id(self), self.is_closed 37 | ) 38 | 39 | # Context manager integrations. 40 | def __enter__(self): 41 | return self 42 | 43 | def __exit__(self, exc_type, exc_val, exc_tb): 44 | self.close() 45 | 46 | def _make_client(self): 47 | """ 48 | :return: a new Client instance. 49 | """ 50 | if self.dsn is not None: 51 | return Client.from_url(self.dsn) 52 | 53 | return Client(self.host, port=self.port, 54 | user=self.user, password=self.password, 55 | database=self.database, **self.connection_kwargs) 56 | 57 | def close(self): 58 | """ 59 | Close the connection now. The connection will be unusable from this 60 | point forward; an :data:`~proton_driver.dbapi.Error` (or subclass) 61 | exception will be raised if any operation is attempted with the 62 | connection. The same applies to all cursor objects trying to use the 63 | connection. 64 | """ 65 | for cursor in self.cursors: 66 | cursor.close() 67 | 68 | self.is_closed = True 69 | 70 | def commit(self): 71 | """ 72 | Do nothing since Proton has no transactions. 73 | """ 74 | pass 75 | 76 | def rollback(self): 77 | """ 78 | Do nothing since Proton has no transactions. 79 | """ 80 | pass 81 | 82 | def cursor(self, cursor_factory=None): 83 | """ 84 | :param cursor_factory: Argument can be used to create non-standard 85 | cursors. 86 | :return: a new cursor object using the connection. 87 | """ 88 | if self.is_closed: 89 | raise InterfaceError('connection already closed') 90 | 91 | client = self._make_client() 92 | if self._hosts is None: 93 | self._hosts = client.connection.hosts 94 | else: 95 | client.connection.hosts = self._hosts 96 | cursor_factory = cursor_factory or Cursor 97 | cursor = cursor_factory(client, self) 98 | self.cursors.append(cursor) 99 | return cursor 100 | -------------------------------------------------------------------------------- /proton_driver/dbapi/errors.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class Warning(Exception): 4 | pass 5 | 6 | 7 | class Error(Exception): 8 | pass 9 | 10 | 11 | class InterfaceError(Error): 12 | pass 13 | 14 | 15 | class DatabaseError(Error): 16 | pass 17 | 18 | 19 | class InternalError(DatabaseError): 20 | pass 21 | 22 | 23 | class OperationalError(DatabaseError): 24 | pass 25 | 26 | 27 | class ProgrammingError(DatabaseError): 28 | pass 29 | 30 | 31 | class IntegrityError(DatabaseError): 32 | pass 33 | 34 | 35 | class DataError(DatabaseError): 36 | pass 37 | 38 | 39 | class NotSupportedError(DatabaseError): 40 | pass 41 | -------------------------------------------------------------------------------- /proton_driver/dbapi/extras.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections import namedtuple 3 | from functools import lru_cache 4 | 5 | from .cursor import Cursor 6 | 7 | 8 | class DictCursor(Cursor): 9 | """ 10 | A cursor that generates results as :class:`dict`. 11 | 12 | ``fetch*()`` methods will return dicts instead of tuples. 13 | """ 14 | 15 | def fetchone(self): 16 | rv = super(DictCursor, self).fetchone() 17 | if rv is not None: 18 | rv = dict(zip(self._columns, rv)) 19 | return rv 20 | 21 | def fetchmany(self, size=None): 22 | rv = super(DictCursor, self).fetchmany(size=size) 23 | return [dict(zip(self._columns, x)) for x in rv] 24 | 25 | def fetchall(self): 26 | rv = super(DictCursor, self).fetchall() 27 | return [dict(zip(self._columns, x)) for x in rv] 28 | 29 | 30 | class NamedTupleCursor(Cursor): 31 | """ 32 | A cursor that generates results as named tuples created by 33 | :func:`~collections.namedtuple`. 34 | 35 | ``fetch*()`` methods will return named tuples instead of regular tuples, so 36 | their elements can be accessed both as regular numeric items as well as 37 | attributes. 38 | """ 39 | 40 | # ascii except alnum and underscore 41 | _re_clean = re.compile( 42 | '[' + re.escape(' !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~') + ']') 43 | 44 | @classmethod 45 | @lru_cache(512) 46 | def _make_nt(self, key): 47 | fields = [] 48 | for s in key: 49 | s = self._re_clean.sub('_', s) 50 | # Python identifier cannot start with numbers, namedtuple fields 51 | # cannot start with underscore. 52 | if s[0] == '_' or '0' <= s[0] <= '9': 53 | s = 'f' + s 54 | fields.append(s) 55 | 56 | return namedtuple('Record', fields) 57 | 58 | def fetchone(self): 59 | rv = super(NamedTupleCursor, self).fetchone() 60 | if rv is not None: 61 | nt = self._make_nt(self._columns) 62 | rv = nt(*rv) 63 | return rv 64 | 65 | def fetchmany(self, size=None): 66 | rv = super(NamedTupleCursor, self).fetchmany(size=size) 67 | nt = self._make_nt(self._columns) 68 | return [nt(*x) for x in rv] 69 | 70 | def fetchall(self): 71 | rv = super(NamedTupleCursor, self).fetchall() 72 | nt = self._make_nt(self._columns) 73 | return [nt(*x) for x in rv] 74 | -------------------------------------------------------------------------------- /proton_driver/defines.py: -------------------------------------------------------------------------------- 1 | 2 | DEFAULT_DATABASE = 'default' 3 | DEFAULT_USER = 'default' 4 | DEFAULT_PASSWORD = '' 5 | 6 | DEFAULT_PORT = 8463 7 | DEFAULT_SECURE_PORT = 9440 8 | 9 | DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES = 50264 10 | DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS = 51554 11 | DBMS_MIN_REVISION_WITH_BLOCK_INFO = 51903 12 | # Legacy above. 13 | DBMS_MIN_REVISION_WITH_CLIENT_INFO = 54032 14 | DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE = 54058 15 | DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO = 54060 16 | DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME = 54372 17 | DBMS_MIN_REVISION_WITH_VERSION_PATCH = 54401 18 | DBMS_MIN_REVISION_WITH_SERVER_LOGS = 54406 19 | DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA = 54410 20 | DBMS_MIN_REVISION_WITH_CLIENT_WRITE_INFO = 54420 21 | DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS = 54429 22 | DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET = 54441 23 | DBMS_MIN_REVISION_WITH_OPENTELEMETRY = 54442 24 | DBMS_MIN_PROTOCOL_VERSION_WITH_DISTRIBUTED_DEPTH = 54448 25 | DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME = 54449 26 | DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS = 54451 27 | DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS = 54453 28 | 29 | # Timeouts 30 | DBMS_DEFAULT_CONNECT_TIMEOUT_SEC = 10 31 | DBMS_DEFAULT_TIMEOUT_SEC = 300 32 | 33 | DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC = 5 34 | 35 | DEFAULT_COMPRESS_BLOCK_SIZE = 1048576 36 | DEFAULT_INSERT_BLOCK_SIZE = 1048576 37 | 38 | DBMS_NAME = 'Proton' 39 | CLIENT_NAME = 'python-driver' 40 | CLIENT_VERSION_MAJOR = 20 41 | CLIENT_VERSION_MINOR = 10 42 | CLIENT_VERSION_PATCH = 2 43 | CLIENT_REVISION = 54453 44 | 45 | BUFFER_SIZE = 1048576 46 | 47 | STRINGS_ENCODING = 'utf-8' 48 | -------------------------------------------------------------------------------- /proton_driver/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logger = logging.getLogger(__name__) 4 | 5 | 6 | log_priorities = ( 7 | 'Unknown', 8 | 'Fatal', 9 | 'Critical', 10 | 'Error', 11 | 'Warning', 12 | 'Notice', 13 | 'Information', 14 | 'Debug', 15 | 'Trace' 16 | ) 17 | 18 | 19 | def log_block(block): 20 | if block is None: 21 | return 22 | 23 | column_names = [x[0] for x in block.columns_with_types] 24 | 25 | for row in block.get_rows(): 26 | row = dict(zip(column_names, row)) 27 | 28 | if 1 <= row['priority'] <= 8: 29 | priority = log_priorities[row['priority']] 30 | else: 31 | priority = row[0] 32 | 33 | # thread_number in servers prior 20.x 34 | thread_id = row.get('thread_id') or row['thread_number'] 35 | 36 | logger.info( 37 | '[ %s ] [ %s ] {%s} <%s> %s: %s', 38 | row['host_name'], 39 | thread_id, 40 | row['query_id'], 41 | priority, 42 | row['source'], 43 | row['text'] 44 | ) 45 | -------------------------------------------------------------------------------- /proton_driver/numpy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/proton_driver/numpy/__init__.py -------------------------------------------------------------------------------- /proton_driver/numpy/block.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..block import ColumnOrientedBlock 4 | 5 | 6 | class NumpyColumnOrientedBlock(ColumnOrientedBlock): 7 | def transposed(self): 8 | return np.transpose(self.data) 9 | -------------------------------------------------------------------------------- /proton_driver/numpy/helpers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | def column_chunks(columns, n): 6 | for column in columns: 7 | if not isinstance(column, (np.ndarray, pd.DatetimeIndex)): 8 | raise TypeError( 9 | 'Unsupported column type: {}. ' 10 | 'ndarray/DatetimeIndex is expected.' 11 | .format(type(column)) 12 | ) 13 | 14 | # create chunk generator for every column 15 | chunked = [ 16 | iter(np.array_split(c, len(c) // n) if len(c) > n else [c]) 17 | for c in columns 18 | ] 19 | 20 | while True: 21 | # get next chunk for every column 22 | item = [next(column, []) for column in chunked] 23 | if not any(len(x) for x in item): 24 | break 25 | yield item 26 | -------------------------------------------------------------------------------- /proton_driver/numpy/result.py: -------------------------------------------------------------------------------- 1 | from itertools import chain 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.api.types import union_categoricals 6 | 7 | from ..progress import Progress 8 | from ..result import QueryResult 9 | 10 | 11 | class NumpyQueryResult(QueryResult): 12 | """ 13 | Stores query result from multiple blocks as numpy arrays. 14 | """ 15 | 16 | def store(self, packet): 17 | block = getattr(packet, 'block', None) 18 | if block is None: 19 | return 20 | 21 | # Header block contains no rows. Pick columns from it. 22 | if block.num_rows: 23 | if self.columnar: 24 | self.data.append(block.get_columns()) 25 | else: 26 | self.data.extend(block.get_rows()) 27 | 28 | elif not self.columns_with_types: 29 | self.columns_with_types = block.columns_with_types 30 | 31 | def get_result(self): 32 | """ 33 | :return: stored query result. 34 | """ 35 | 36 | for packet in self.packet_generator: 37 | self.store(packet) 38 | 39 | if self.columnar: 40 | data = [] 41 | # Transpose to a list of columns, each column is list of chunks 42 | for column_chunks in zip(*self.data): 43 | # Concatenate chunks for each column 44 | if isinstance(column_chunks[0], np.ndarray): 45 | column = np.concatenate(column_chunks) 46 | elif isinstance(column_chunks[0], pd.Categorical): 47 | column = union_categoricals(column_chunks) 48 | else: 49 | column = tuple(chain.from_iterable(column_chunks)) 50 | data.append(column) 51 | else: 52 | data = self.data 53 | 54 | if self.with_column_types: 55 | return data, self.columns_with_types 56 | else: 57 | return data 58 | 59 | 60 | class NumpyProgressQueryResult(NumpyQueryResult): 61 | """ 62 | Stores query result and progress information from multiple blocks. 63 | Provides iteration over query progress. 64 | """ 65 | 66 | def __init__(self, *args, **kwargs): 67 | self.progress_totals = Progress() 68 | 69 | super(NumpyProgressQueryResult, self).__init__(*args, **kwargs) 70 | 71 | def __iter__(self): 72 | return self 73 | 74 | def __next__(self): 75 | while True: 76 | packet = next(self.packet_generator) 77 | progress_packet = getattr(packet, 'progress', None) 78 | if progress_packet: 79 | self.progress_totals.increment(progress_packet) 80 | return ( 81 | self.progress_totals.rows, self.progress_totals.total_rows 82 | ) 83 | else: 84 | self.store(packet) 85 | 86 | def get_result(self): 87 | # Read all progress packets. 88 | for _ in self: 89 | pass 90 | 91 | return super(NumpyProgressQueryResult, self).get_result() 92 | 93 | 94 | class NumpyIterQueryResult(object): 95 | """ 96 | Provides iteration over returned data by chunks (streaming by chunks). 97 | """ 98 | 99 | def __init__( 100 | self, packet_generator, 101 | with_column_types=False): 102 | self.packet_generator = packet_generator 103 | self.with_column_types = with_column_types 104 | 105 | self.first_block = True 106 | super(NumpyIterQueryResult, self).__init__() 107 | 108 | def __iter__(self): 109 | return self 110 | 111 | def __next__(self): 112 | packet = next(self.packet_generator) 113 | block = getattr(packet, 'block', None) 114 | if block is None: 115 | return [] 116 | 117 | if self.first_block and self.with_column_types: 118 | self.first_block = False 119 | rv = [block.columns_with_types] 120 | rv.extend(block.get_rows()) 121 | return rv 122 | else: 123 | return block.get_rows() 124 | -------------------------------------------------------------------------------- /proton_driver/opentelemetry.py: -------------------------------------------------------------------------------- 1 | 2 | class OpenTelemetryTraceContext(object): 3 | traceparent_tpl = 'xx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-xxxxxxxxxxxxxxxx-xx' 4 | translation = str.maketrans('1234567890abcdef', 'xxxxxxxxxxxxxxxx') 5 | 6 | def __init__(self, traceparent, tracestate): 7 | # xx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-xxxxxxxxxxxxxxxx-xx 8 | # ^ ^ ^ ^ 9 | # version trace_id span_id flags 10 | 11 | self.trace_id = None # UUID 12 | self.span_id = None # UInt64 13 | self.tracestate = tracestate # String 14 | self.trace_flags = None # UInt8 15 | 16 | if traceparent is not None: 17 | self.parse_traceparent(traceparent) 18 | 19 | super(OpenTelemetryTraceContext, self).__init__() 20 | 21 | def parse_traceparent(self, traceparent): 22 | traceparent = traceparent.lower() 23 | 24 | if len(traceparent) != len(self.traceparent_tpl): 25 | raise ValueError('unexpected length {}, expected {}'.format( 26 | len(traceparent), len(self.traceparent_tpl) 27 | )) 28 | 29 | if traceparent.translate(self.translation) != self.traceparent_tpl: 30 | raise ValueError( 31 | 'Malformed traceparant header: {}'.format(traceparent) 32 | ) 33 | 34 | parts = traceparent.split('-') 35 | version = int(parts[0], 16) 36 | if version != 0: 37 | raise ValueError( 38 | 'unexpected version {}, expected 00'.format(parts[0]) 39 | ) 40 | 41 | self.trace_id = (int(parts[1][16:], 16) << 64) + int(parts[1][:16], 16) 42 | self.span_id = int(parts[2], 16) 43 | self.trace_flags = int(parts[3], 16) 44 | -------------------------------------------------------------------------------- /proton_driver/progress.py: -------------------------------------------------------------------------------- 1 | from . import defines 2 | from .varint import read_varint 3 | 4 | 5 | class Progress(object): 6 | def __init__(self): 7 | self.rows = 0 8 | self.bytes = 0 9 | self.total_rows = 0 10 | self.written_rows = 0 11 | self.written_bytes = 0 12 | 13 | super(Progress, self).__init__() 14 | 15 | def read(self, server_revision, fin): 16 | self.rows = read_varint(fin) 17 | self.bytes = read_varint(fin) 18 | 19 | revision = server_revision 20 | if revision >= defines.DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS: 21 | self.total_rows = read_varint(fin) 22 | 23 | if revision >= defines.DBMS_MIN_REVISION_WITH_CLIENT_WRITE_INFO: 24 | self.written_rows = read_varint(fin) 25 | self.written_bytes = read_varint(fin) 26 | 27 | def increment(self, another_progress): 28 | self.rows += another_progress.rows 29 | self.bytes += another_progress.bytes 30 | self.total_rows += another_progress.total_rows 31 | self.written_rows += another_progress.written_rows 32 | self.written_bytes += another_progress.written_bytes 33 | -------------------------------------------------------------------------------- /proton_driver/protocol.py: -------------------------------------------------------------------------------- 1 | 2 | class ClientPacketTypes(object): 3 | """ 4 | Packet types that client transmits 5 | """ 6 | # Name, version, revision, default DB 7 | HELLO = 0 8 | 9 | # Query id, query settings, stage up to which the query must be executed, 10 | # whether the compression must be used, query text 11 | # (without data for INSERTs). 12 | QUERY = 1 13 | 14 | # A block of data (compressed or not). 15 | DATA = 2 16 | 17 | # Cancel the query execution. 18 | CANCEL = 3 19 | 20 | # Check that connection to the server is alive. 21 | PING = 4 22 | 23 | # Check status of tables on the server. 24 | TABLES_STATUS_REQUEST = 5 25 | 26 | _types_str = [ 27 | 'Hello', 'Query', 'Data', 'Cancel', 'Ping', 'TablesStatusRequest' 28 | ] 29 | 30 | @classmethod 31 | def to_str(cls, packet): 32 | return 'Unknown packet' if packet > 5 else cls._types_str[packet] 33 | 34 | 35 | class ServerPacketTypes(object): 36 | """ 37 | Packet types that server transmits. 38 | """ 39 | # Name, version, revision. 40 | HELLO = 0 41 | 42 | # A block of data (compressed or not). 43 | DATA = 1 44 | 45 | # The exception during query execution. 46 | EXCEPTION = 2 47 | 48 | # Query execution progress: rows read, bytes read. 49 | PROGRESS = 3 50 | 51 | # Ping response 52 | PONG = 4 53 | 54 | # All packets were transmitted 55 | END_OF_STREAM = 5 56 | 57 | # Packet with profiling info. 58 | PROFILE_INFO = 6 59 | 60 | # A block with totals (compressed or not). 61 | TOTALS = 7 62 | 63 | # A block with minimums and maximums (compressed or not). 64 | EXTREMES = 8 65 | 66 | # A response to TablesStatus request. 67 | TABLES_STATUS_RESPONSE = 9 68 | 69 | # System logs of the query execution 70 | LOG = 10 71 | 72 | # Columns' description for default values calculation 73 | TABLE_COLUMNS = 11 74 | 75 | # List of unique parts ids. 76 | PART_UUIDS = 12 77 | 78 | # String (UUID) describes a request for which next task is needed 79 | READ_TASK_REQUEST = 13 80 | 81 | # Packet with profile events from server. 82 | PROFILE_EVENTS = 14 83 | 84 | _types_str = [ 85 | 'Hello', 'Data', 'Exception', 'Progress', 'Pong', 'EndOfStream', 86 | 'ProfileInfo', 'Totals', 'Extremes', 'TablesStatusResponse', 'Log', 87 | 'TableColumns', 'PartUUIDs', 'ReadTaskRequest', 'ProfileEvents' 88 | ] 89 | 90 | @classmethod 91 | def to_str(cls, packet): 92 | return 'Unknown packet' if packet > 14 else cls._types_str[packet] 93 | 94 | @classmethod 95 | def strings_in_message(cls, packet): 96 | if packet == cls.TABLE_COLUMNS: 97 | return 2 98 | return 0 99 | 100 | 101 | class Compression(object): 102 | DISABLED = 0 103 | ENABLED = 1 104 | 105 | 106 | class CompressionMethod(object): 107 | LZ4 = 1 108 | LZ4HC = 2 109 | ZSTD = 3 110 | 111 | 112 | class CompressionMethodByte(object): 113 | LZ4 = 0x82 114 | ZSTD = 0x90 115 | -------------------------------------------------------------------------------- /proton_driver/queryprocessingstage.py: -------------------------------------------------------------------------------- 1 | 2 | class QueryProcessingStage(object): 3 | """ 4 | Determines till which state SELECT query should be executed. 5 | """ 6 | FETCH_COLUMNS = 0 7 | WITH_MERGEABLE_STATE = 1 8 | COMPLETE = 2 9 | -------------------------------------------------------------------------------- /proton_driver/reader.py: -------------------------------------------------------------------------------- 1 | from struct import Struct 2 | 3 | from .varint import read_varint 4 | 5 | 6 | def read_binary_str(buf): 7 | length = read_varint(buf) 8 | return read_binary_str_fixed_len(buf, length) 9 | 10 | 11 | def read_binary_bytes(buf): 12 | length = read_varint(buf) 13 | return read_binary_bytes_fixed_len(buf, length) 14 | 15 | 16 | def read_binary_str_fixed_len(buf, length): 17 | return read_binary_bytes_fixed_len(buf, length).decode('utf-8') 18 | 19 | 20 | def read_binary_bytes_fixed_len(buf, length): 21 | return buf.read(length) 22 | 23 | 24 | def read_binary_int(buf, fmt): 25 | """ 26 | Reads int from buffer with provided format. 27 | """ 28 | # Little endian. 29 | s = Struct('<' + fmt) 30 | return s.unpack(buf.read(s.size))[0] 31 | 32 | 33 | def read_binary_int8(buf): 34 | return read_binary_int(buf, 'b') 35 | 36 | 37 | def read_binary_int16(buf): 38 | return read_binary_int(buf, 'h') 39 | 40 | 41 | def read_binary_int32(buf): 42 | return read_binary_int(buf, 'i') 43 | 44 | 45 | def read_binary_int64(buf): 46 | return read_binary_int(buf, 'q') 47 | 48 | 49 | def read_binary_uint8(buf): 50 | return read_binary_int(buf, 'B') 51 | 52 | 53 | def read_binary_uint16(buf): 54 | return read_binary_int(buf, 'H') 55 | 56 | 57 | def read_binary_uint32(buf): 58 | return read_binary_int(buf, 'I') 59 | 60 | 61 | def read_binary_uint64(buf): 62 | return read_binary_int(buf, 'Q') 63 | 64 | 65 | def read_binary_uint128(buf): 66 | hi = read_binary_int(buf, 'Q') 67 | lo = read_binary_int(buf, 'Q') 68 | 69 | return (hi << 64) + lo 70 | -------------------------------------------------------------------------------- /proton_driver/readhelpers.py: -------------------------------------------------------------------------------- 1 | from .errors import ServerException 2 | from .reader import read_binary_str, read_binary_uint8, read_binary_int32 3 | 4 | 5 | def read_exception(buf, additional_message=None): 6 | code = read_binary_int32(buf) 7 | name = read_binary_str(buf) 8 | message = read_binary_str(buf) 9 | stack_trace = read_binary_str(buf) 10 | has_nested = bool(read_binary_uint8(buf)) 11 | 12 | new_message = '' 13 | 14 | if additional_message: 15 | new_message += additional_message + '. ' 16 | 17 | if name != 'DB::Exception': 18 | new_message += name + ". " 19 | 20 | new_message += message + ". Stack trace:\n\n" + stack_trace 21 | 22 | nested = None 23 | if has_nested: 24 | nested = read_exception(buf) 25 | 26 | return ServerException(new_message, code, nested=nested) 27 | -------------------------------------------------------------------------------- /proton_driver/settings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/proton_driver/settings/__init__.py -------------------------------------------------------------------------------- /proton_driver/settings/types.py: -------------------------------------------------------------------------------- 1 | from ..util.helpers import asbool 2 | from ..varint import write_varint 3 | from ..writer import write_binary_str 4 | 5 | 6 | class SettingType(object): 7 | @classmethod 8 | def write(cls, value, buf): 9 | raise NotImplementedError 10 | 11 | 12 | class SettingUInt64(SettingType): 13 | @classmethod 14 | def write(cls, value, buf): 15 | write_varint(int(value), buf) 16 | 17 | 18 | class SettingBool(SettingType): 19 | @classmethod 20 | def write(cls, value, buf): 21 | write_varint(asbool(value), buf) 22 | 23 | 24 | class SettingString(SettingType): 25 | @classmethod 26 | def write(cls, value, buf): 27 | write_binary_str(value, buf) 28 | 29 | 30 | class SettingChar(SettingType): 31 | @classmethod 32 | def write(cls, value, buf): 33 | write_binary_str(value[0], buf) 34 | 35 | 36 | class SettingFloat(SettingType): 37 | @classmethod 38 | def write(cls, value, buf): 39 | """ 40 | Float is written in string representation. 41 | """ 42 | write_binary_str(str(value), buf) 43 | 44 | 45 | class SettingMaxThreads(SettingUInt64): 46 | @classmethod 47 | def write(cls, value, buf): 48 | if value == 'auto': 49 | value = 0 50 | super(SettingMaxThreads, cls).write(value, buf) 51 | -------------------------------------------------------------------------------- /proton_driver/settings/writer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from ..writer import write_binary_str, write_binary_uint8 4 | from .available import settings as available_settings 5 | 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | def write_settings(settings, buf, settings_as_strings, is_important=False): 11 | for setting, value in (settings or {}).items(): 12 | # If the server support settings as string we do not need to know 13 | # anything about them, so we can write any setting. 14 | if settings_as_strings: 15 | write_binary_str(setting, buf) 16 | write_binary_uint8(int(is_important), buf) 17 | write_binary_str(str(value), buf) 18 | 19 | else: 20 | # If the server requires string in binary, 21 | # then they cannot be written without type. 22 | setting_writer = available_settings.get(setting) 23 | if not setting_writer: 24 | logger.warning('Unknown setting %s. Skipping', setting) 25 | continue 26 | write_binary_str(setting, buf) 27 | setting_writer.write(value, buf) 28 | 29 | write_binary_str('', buf) # end of settings 30 | -------------------------------------------------------------------------------- /proton_driver/streams/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/proton_driver/streams/__init__.py -------------------------------------------------------------------------------- /proton_driver/streams/compressed.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | 3 | try: 4 | from clickhouse_cityhash.cityhash import CityHash128 5 | except ImportError: 6 | raise RuntimeError( 7 | 'Package clickhouse-cityhash is required to use compression' 8 | ) 9 | 10 | from .native import BlockOutputStream, BlockInputStream 11 | from ..bufferedreader import CompressedBufferedReader 12 | from ..bufferedwriter import CompressedBufferedWriter 13 | from ..compression import get_decompressor_cls 14 | from ..defines import BUFFER_SIZE 15 | from ..reader import read_binary_uint8, read_binary_uint128 16 | from ..writer import write_binary_uint8, write_binary_uint128 17 | 18 | 19 | class CompressedBlockOutputStream(BlockOutputStream): 20 | def __init__(self, compressor_cls, compress_block_size, fout, context): 21 | self.compressor_cls = compressor_cls 22 | self.compress_block_size = compress_block_size 23 | self.raw_fout = fout 24 | 25 | self.compressor = self.compressor_cls() 26 | self.fout = CompressedBufferedWriter(self.compressor, BUFFER_SIZE) 27 | super(CompressedBlockOutputStream, self).__init__(self.fout, context) 28 | 29 | def get_compressed_hash(self, data): 30 | return CityHash128(data) 31 | 32 | def finalize(self): 33 | self.fout.flush() 34 | 35 | compressed = self.get_compressed() 36 | compressed_size = len(compressed) 37 | 38 | compressed_hash = self.get_compressed_hash(compressed) 39 | write_binary_uint128(compressed_hash, self.raw_fout) 40 | 41 | block_size = self.compress_block_size 42 | 43 | i = 0 44 | while i < compressed_size: 45 | self.raw_fout.write(compressed[i:i + block_size]) 46 | i += block_size 47 | 48 | self.raw_fout.flush() 49 | 50 | def get_compressed(self): 51 | compressed = BytesIO() 52 | 53 | if self.compressor.method_byte is not None: 54 | write_binary_uint8(self.compressor.method_byte, compressed) 55 | extra_header_size = 1 # method 56 | else: 57 | extra_header_size = 0 58 | 59 | data = self.compressor.get_compressed_data(extra_header_size) 60 | compressed.write(data) 61 | 62 | return compressed.getvalue() 63 | 64 | 65 | class CompressedBlockInputStream(BlockInputStream): 66 | def __init__(self, fin, context): 67 | self.raw_fin = fin 68 | fin = CompressedBufferedReader(self.read_block, BUFFER_SIZE) 69 | super(CompressedBlockInputStream, self).__init__(fin, context) 70 | 71 | def get_compressed_hash(self, data): 72 | return CityHash128(data) 73 | 74 | def read_block(self): 75 | compressed_hash = read_binary_uint128(self.raw_fin) 76 | method_byte = read_binary_uint8(self.raw_fin) 77 | 78 | decompressor_cls = get_decompressor_cls(method_byte) 79 | decompressor = decompressor_cls(self.raw_fin) 80 | 81 | if decompressor.method_byte is not None: 82 | extra_header_size = 1 # method 83 | else: 84 | extra_header_size = 0 85 | 86 | return decompressor.get_decompressed_data( 87 | method_byte, compressed_hash, extra_header_size 88 | ) 89 | -------------------------------------------------------------------------------- /proton_driver/streams/native.py: -------------------------------------------------------------------------------- 1 | from ..block import ColumnOrientedBlock, BlockInfo 2 | from ..columns.service import read_column, write_column 3 | from ..reader import read_binary_str 4 | from ..varint import write_varint, read_varint 5 | from ..writer import write_binary_str 6 | from .. import defines 7 | 8 | 9 | class BlockOutputStream(object): 10 | def __init__(self, fout, context): 11 | self.fout = fout 12 | self.context = context 13 | 14 | super(BlockOutputStream, self).__init__() 15 | 16 | def write(self, block): 17 | revision = self.context.server_info.revision 18 | if revision >= defines.DBMS_MIN_REVISION_WITH_BLOCK_INFO: 19 | block.info.write(self.fout) 20 | 21 | # We write transposed data. 22 | n_columns = block.num_columns 23 | n_rows = block.num_rows 24 | 25 | write_varint(n_columns, self.fout) 26 | write_varint(n_rows, self.fout) 27 | 28 | for i, (col_name, col_type) in enumerate(block.columns_with_types): 29 | write_binary_str(col_name, self.fout) 30 | write_binary_str(col_type, self.fout) 31 | 32 | if n_columns: 33 | try: 34 | items = block.get_column_by_index(i) 35 | except IndexError: 36 | raise ValueError('Different rows length') 37 | 38 | write_column(self.context, col_name, col_type, items, 39 | self.fout, types_check=block.types_check) 40 | 41 | self.finalize() 42 | 43 | def finalize(self): 44 | self.fout.flush() 45 | 46 | 47 | class BlockInputStream(object): 48 | def __init__(self, fin, context): 49 | self.fin = fin 50 | self.context = context 51 | 52 | super(BlockInputStream, self).__init__() 53 | 54 | def read(self): 55 | info = BlockInfo() 56 | 57 | revision = self.context.server_info.revision 58 | if revision >= defines.DBMS_MIN_REVISION_WITH_BLOCK_INFO: 59 | info.read(self.fin) 60 | 61 | n_columns = read_varint(self.fin) 62 | n_rows = read_varint(self.fin) 63 | 64 | data, names, types = [], [], [] 65 | 66 | for i in range(n_columns): 67 | column_name = read_binary_str(self.fin) 68 | column_type = read_binary_str(self.fin) 69 | 70 | names.append(column_name) 71 | types.append(column_type) 72 | 73 | if n_rows: 74 | column = read_column(self.context, column_type, n_rows, 75 | self.fin) 76 | data.append(column) 77 | 78 | if self.context.client_settings['use_numpy']: 79 | from ..numpy.block import NumpyColumnOrientedBlock 80 | block_cls = NumpyColumnOrientedBlock 81 | else: 82 | block_cls = ColumnOrientedBlock 83 | 84 | block = block_cls( 85 | columns_with_types=list(zip(names, types)), 86 | data=data, 87 | info=info, 88 | ) 89 | 90 | return block 91 | -------------------------------------------------------------------------------- /proton_driver/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/proton_driver/util/__init__.py -------------------------------------------------------------------------------- /proton_driver/util/compat.py: -------------------------------------------------------------------------------- 1 | 2 | # Drop this when minimum supported version will be 3.7. 3 | try: 4 | import threading 5 | except ImportError: 6 | import dummy_threading as threading # noqa: F401 7 | 8 | import json # noqa: F401 9 | 10 | try: 11 | # since tzlocal 4.0+ 12 | # this will avoid warning for get_localzone().key 13 | from tzlocal import get_localzone_name 14 | 15 | def get_localzone_name_compat(): 16 | try: 17 | return get_localzone_name() 18 | except Exception: 19 | return None 20 | except ImportError: 21 | from tzlocal import get_localzone 22 | 23 | def get_localzone_name_compat(): 24 | try: 25 | return get_localzone().key 26 | except AttributeError: 27 | return get_localzone().zone 28 | except Exception: 29 | return None 30 | -------------------------------------------------------------------------------- /proton_driver/util/escape.py: -------------------------------------------------------------------------------- 1 | from datetime import date, datetime 2 | from enum import Enum 3 | from uuid import UUID 4 | 5 | from pytz import timezone 6 | 7 | 8 | escape_chars_map = { 9 | "\b": "\\b", 10 | "\f": "\\f", 11 | "\r": "\\r", 12 | "\n": "\\n", 13 | "\t": "\\t", 14 | "\0": "\\0", 15 | "\a": "\\a", 16 | "\v": "\\v", 17 | "\\": "\\\\", 18 | "'": "\\'" 19 | } 20 | 21 | 22 | def escape_datetime(item, context): 23 | server_tz = timezone(context.server_info.timezone) 24 | 25 | if item.tzinfo is not None: 26 | item = item.astimezone(server_tz) 27 | 28 | return "'%s'" % item.strftime('%Y-%m-%d %H:%M:%S') 29 | 30 | 31 | def escape_param(item, context): 32 | if item is None: 33 | return 'NULL' 34 | 35 | elif isinstance(item, datetime): 36 | return escape_datetime(item, context) 37 | 38 | elif isinstance(item, date): 39 | return "'%s'" % item.strftime('%Y-%m-%d') 40 | 41 | elif isinstance(item, str): 42 | return "'%s'" % ''.join(escape_chars_map.get(c, c) for c in item) 43 | 44 | elif isinstance(item, list): 45 | return "[%s]" % ', '.join(str(escape_param(x, context)) for x in item) 46 | 47 | elif isinstance(item, tuple): 48 | return "(%s)" % ', '.join(str(escape_param(x, context)) for x in item) 49 | 50 | elif isinstance(item, Enum): 51 | return escape_param(item.value, context) 52 | 53 | elif isinstance(item, UUID): 54 | return "'%s'" % str(item) 55 | 56 | else: 57 | return item 58 | 59 | 60 | def escape_params(params, context): 61 | escaped = {} 62 | 63 | for key, value in params.items(): 64 | escaped[key] = escape_param(value, context) 65 | 66 | return escaped 67 | -------------------------------------------------------------------------------- /proton_driver/util/helpers.py: -------------------------------------------------------------------------------- 1 | from itertools import islice, tee 2 | 3 | 4 | def chunks(seq, n): 5 | # islice is MUCH slower than slice for lists and tuples. 6 | if isinstance(seq, (list, tuple)): 7 | i = 0 8 | item = seq[i:i+n] 9 | while item: 10 | yield list(item) 11 | i += n 12 | item = seq[i:i+n] 13 | 14 | else: 15 | it = iter(seq) 16 | item = list(islice(it, n)) 17 | while item: 18 | yield item 19 | item = list(islice(it, n)) 20 | 21 | 22 | def pairwise(iterable): 23 | a, b = tee(iterable) 24 | next(b, None) 25 | return zip(a, b) 26 | 27 | 28 | def column_chunks(columns, n): 29 | for column in columns: 30 | if not isinstance(column, (list, tuple)): 31 | raise TypeError( 32 | 'Unsupported column type: {}. list or tuple is expected.' 33 | .format(type(column)) 34 | ) 35 | 36 | # create chunk generator for every column 37 | g = [chunks(column, n) for column in columns] 38 | 39 | while True: 40 | # get next chunk for every column 41 | item = [next(column, []) for column in g] 42 | if not any(item): 43 | break 44 | yield item 45 | 46 | 47 | # from paste.deploy.converters 48 | def asbool(obj): 49 | if isinstance(obj, str): 50 | obj = obj.strip().lower() 51 | if obj in ['true', 'yes', 'on', 'y', 't', '1']: 52 | return True 53 | elif obj in ['false', 'no', 'off', 'n', 'f', '0']: 54 | return False 55 | else: 56 | raise ValueError('String is not true/false: %r' % obj) 57 | return bool(obj) 58 | -------------------------------------------------------------------------------- /proton_driver/varint.pyx: -------------------------------------------------------------------------------- 1 | from cpython cimport PyBytes_FromStringAndSize 2 | 3 | 4 | def make_varint(unsigned long long number): 5 | """ 6 | Writes integer of variable length using LEB128. 7 | """ 8 | cdef unsigned char to_write, i = 0 9 | # unsigned PY_LONG_LONG checks integer on function call and 10 | # raises OverflowError if integer overflows unsigned PY_LONG_LONG. 11 | # Long enough for handling unsigned PY_LONG_LONG. 12 | cdef unsigned char num_buf[32] 13 | 14 | while True: 15 | to_write = number & 0x7f 16 | number >>= 7 17 | if number: 18 | num_buf[i] = to_write | 0x80 19 | i += 1 20 | else: 21 | num_buf[i] = to_write 22 | i += 1 23 | break 24 | 25 | return PyBytes_FromStringAndSize(num_buf, i) 26 | 27 | 28 | def write_varint(unsigned long long number, buf): 29 | """ 30 | Writes integer of variable length using LEB128. 31 | """ 32 | cdef unsigned char to_write, i = 0 33 | # unsigned PY_LONG_LONG checks integer on function call and 34 | # raises OverflowError if integer overflows unsigned PY_LONG_LONG. 35 | # Long enough for handling unsigned PY_LONG_LONG. 36 | cdef unsigned char num_buf[32] 37 | 38 | while True: 39 | to_write = number & 0x7f 40 | number >>= 7 41 | if number: 42 | num_buf[i] = to_write | 0x80 43 | i += 1 44 | else: 45 | num_buf[i] = to_write 46 | i += 1 47 | break 48 | 49 | buf.write(PyBytes_FromStringAndSize(num_buf, i)) 50 | 51 | 52 | def read_varint(f): 53 | """ 54 | Reads integer of variable length using LEB128. 55 | """ 56 | cdef unsigned char shift = 0 57 | cdef unsigned long long i, result = 0 58 | 59 | read_one = f.read_one 60 | 61 | while True: 62 | i = read_one() 63 | result |= (i & 0x7f) << shift 64 | shift += 7 65 | if i < 0x80: 66 | break 67 | 68 | return result 69 | -------------------------------------------------------------------------------- /proton_driver/writer.py: -------------------------------------------------------------------------------- 1 | import struct 2 | 3 | from .varint import write_varint 4 | 5 | 6 | MAX_UINT64 = (1 << 64) - 1 7 | MAX_INT64 = (1 << 63) - 1 8 | 9 | 10 | def _byte(b): 11 | return bytes((b, )) 12 | 13 | 14 | def write_binary_str(text, buf): 15 | text = text.encode('utf-8') 16 | write_binary_bytes(text, buf) 17 | 18 | 19 | def write_binary_bytes(text, buf): 20 | write_varint(len(text), buf) 21 | buf.write(text) 22 | 23 | 24 | def write_binary_int(number, buf, fmt): 25 | """ 26 | Writes int from buffer with provided format. 27 | """ 28 | fmt = '<' + fmt 29 | buf.write(struct.pack(fmt, number)) 30 | 31 | 32 | def write_binary_int8(number, buf): 33 | write_binary_int(number, buf, 'b') 34 | 35 | 36 | def write_binary_int16(number, buf): 37 | write_binary_int(number, buf, 'h') 38 | 39 | 40 | def write_binary_int32(number, buf): 41 | write_binary_int(number, buf, 'i') 42 | 43 | 44 | def write_binary_int64(number, buf): 45 | write_binary_int(number, buf, 'q') 46 | 47 | 48 | def write_binary_uint8(number, buf): 49 | write_binary_int(number, buf, 'B') 50 | 51 | 52 | def write_binary_uint16(number, buf): 53 | write_binary_int(number, buf, 'H') 54 | 55 | 56 | def write_binary_uint32(number, buf): 57 | write_binary_int(number, buf, 'I') 58 | 59 | 60 | def write_binary_uint64(number, buf): 61 | write_binary_int(number, buf, 'Q') 62 | 63 | 64 | def write_binary_uint128(number, buf): 65 | fmt = '> 64) & MAX_UINT64, number & MAX_UINT64) 67 | buf.write(packed) 68 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | # Flake8 default compliance with specification PEP8 3 | line-length = 79 4 | exclude = '\.git|\.hg|\.mypy_cache|\.tox|\.venv|venv|_build|buck-out|build|dist' 5 | skip-string-normalization = true 6 | 7 | [tool.cibuildwheel] 8 | build = "*" 9 | skip = "" 10 | test-skip = "" 11 | 12 | archs = ["auto64"] 13 | build-frontend = "default" 14 | config-settings = {} 15 | dependency-versions = "pinned" 16 | environment = {} 17 | environment-pass = [] 18 | build-verbosity = 0 19 | 20 | before-all = "" 21 | before-build = "" 22 | repair-wheel-command = "" 23 | 24 | test-command = "" 25 | before-test = "" 26 | test-requires = [] 27 | test-extras = [] 28 | 29 | container-engine = "docker" 30 | 31 | manylinux-x86_64-image = "manylinux2014" 32 | manylinux-i686-image = "manylinux2014" 33 | manylinux-aarch64-image = "manylinux2014" 34 | manylinux-ppc64le-image = "manylinux2014" 35 | manylinux-s390x-image = "manylinux2014" 36 | manylinux-pypy_x86_64-image = "manylinux2014" 37 | manylinux-pypy_i686-image = "manylinux2014" 38 | manylinux-pypy_aarch64-image = "manylinux2014" 39 | 40 | musllinux-x86_64-image = "musllinux_1_1" 41 | musllinux-i686-image = "musllinux_1_1" 42 | musllinux-aarch64-image = "musllinux_1_1" 43 | musllinux-ppc64le-image = "musllinux_1_1" 44 | musllinux-s390x-image = "musllinux_1_1" 45 | 46 | 47 | [tool.cibuildwheel.linux] 48 | repair-wheel-command = "auditwheel repair -w {dest_dir} {wheel}" 49 | archs = [ "aarch64", "x86_64" ] 50 | 51 | [tool.cibuildwheel.macos] 52 | repair-wheel-command = "delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel}" 53 | archs = [ "arm64", "x86_64" ] 54 | 55 | [tool.cibuildwheel.windows] 56 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [db] 2 | host=localhost 3 | port=8463 4 | database=default 5 | user=default 6 | password= 7 | compression=lz4,lz4hc,zstd 8 | client=proton-client 9 | 10 | [log] 11 | level=ERROR 12 | 13 | [bdist_wheel] 14 | universal = 0 15 | 16 | [metadata] 17 | license_file = LICENSE 18 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from codecs import open 4 | 5 | from setuptools import setup, find_packages 6 | from distutils.extension import Extension 7 | 8 | try: 9 | from Cython.Build import cythonize 10 | except ImportError: 11 | USE_CYTHON = False 12 | else: 13 | USE_CYTHON = True 14 | 15 | CYTHON_TRACE = bool(os.getenv('CYTHON_TRACE', False)) 16 | 17 | here = os.path.abspath(os.path.dirname(__file__)) 18 | 19 | 20 | def read_version(): 21 | regexp = re.compile(r'^VERSION\W*=\W*\(([^\(\)]*)\)') 22 | init_py = os.path.join(here, 'proton_driver', '__init__.py') 23 | with open(init_py, encoding='utf-8') as f: 24 | for line in f: 25 | match = regexp.match(line) 26 | if match is not None: 27 | return match.group(1).replace(', ', '.') 28 | else: 29 | raise RuntimeError( 30 | 'Cannot find version in proton_driver/__init__.py' 31 | ) 32 | 33 | 34 | with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f: 35 | long_description = f.read() 36 | 37 | # Prepare extensions. 38 | ext = '.pyx' if USE_CYTHON else '.c' 39 | extensions = [ 40 | Extension( 41 | 'proton_driver.bufferedreader', 42 | ['proton_driver/bufferedreader' + ext] 43 | ), 44 | Extension( 45 | 'proton_driver.bufferedwriter', 46 | ['proton_driver/bufferedwriter' + ext] 47 | ), 48 | Extension( 49 | 'proton_driver.columns.largeint', 50 | ['proton_driver/columns/largeint' + ext] 51 | ), 52 | Extension( 53 | 'proton_driver.varint', 54 | ['proton_driver/varint' + ext] 55 | ) 56 | ] 57 | 58 | if USE_CYTHON: 59 | compiler_directives = {'language_level': '3'} 60 | if CYTHON_TRACE: 61 | compiler_directives['linetrace'] = True 62 | 63 | extensions = cythonize(extensions, compiler_directives=compiler_directives) 64 | 65 | setup( 66 | name='proton-driver', 67 | version=read_version(), 68 | 69 | description='Python driver with native interface for Proton', 70 | long_description=long_description, 71 | 72 | url='https://github.com/timeplus-io/proton-python-driver', 73 | 74 | author='Gang Tao', 75 | author_email='gang@timeplus.com', 76 | 77 | license='MIT', 78 | 79 | classifiers=[ 80 | 'Development Status :: 4 - Beta', 81 | 82 | 83 | 'Environment :: Console', 84 | 85 | 86 | 'Intended Audience :: Developers', 87 | 'Intended Audience :: Information Technology', 88 | 89 | 90 | 'License :: OSI Approved :: MIT License', 91 | 92 | 93 | 'Operating System :: OS Independent', 94 | 95 | 96 | 'Programming Language :: SQL', 97 | 'Programming Language :: Python :: 3', 98 | 'Programming Language :: Python :: 3.8', 99 | 'Programming Language :: Python :: 3.9', 100 | 'Programming Language :: Python :: 3.10', 101 | 'Programming Language :: Python :: 3.11', 102 | 'Programming Language :: Python :: 3.12', 103 | 'Programming Language :: Python :: 3.13', 104 | 'Programming Language :: Python :: Implementation :: PyPy', 105 | 106 | 'Topic :: Database', 107 | 'Topic :: Software Development', 108 | 'Topic :: Software Development :: Libraries', 109 | 'Topic :: Software Development :: Libraries :: Application Frameworks', 110 | 'Topic :: Software Development :: Libraries :: Python Modules', 111 | 'Topic :: Scientific/Engineering :: Information Analysis' 112 | ], 113 | 114 | keywords='Proton db database cloud analytics', 115 | 116 | packages=find_packages('.', exclude=['tests*']), 117 | python_requires='>=3.8, <4', 118 | install_requires=[ 119 | 'pytz', 120 | 'tzlocal', 121 | 'tzlocal<2.1; python_version=="3.5"' 122 | ], 123 | ext_modules=extensions, 124 | extras_require={ 125 | 'lz4': [ 126 | 'lz4<=3.0.1; implementation_name=="pypy"', 127 | 'lz4; implementation_name!="pypy"', 128 | 'clickhouse-cityhash>=1.0.2.1' 129 | ], 130 | 'zstd': ['zstd', 'clickhouse-cityhash>=1.0.2.1'], 131 | 'numpy': ['numpy>=1.12.0', 'pandas>=0.24.0'] 132 | }, 133 | test_suite='pytest' 134 | ) 135 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/tests/__init__.py -------------------------------------------------------------------------------- /tests/columns/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/tests/columns/__init__.py -------------------------------------------------------------------------------- /tests/columns/test_bool.py: -------------------------------------------------------------------------------- 1 | from tests.testcase import BaseTestCase 2 | from proton_driver import errors 3 | 4 | 5 | class BoolTestCase(BaseTestCase): 6 | # required_server_version = (21, 12) 7 | 8 | def test_simple(self): 9 | columns = ("a bool") 10 | 11 | data = [(1,), (0,), (True,), (False,), (None,), ("False",), ("",)] 12 | with self.create_stream(columns): 13 | self.client.execute('INSERT INTO test (a) VALUES', data) 14 | 15 | query = 'SELECT * FROM test' 16 | inserted = self.emit_cli(query) 17 | self.assertEqual( 18 | inserted, ( 19 | 'true\n' 20 | 'false\n' 21 | 'true\n' 22 | 'false\n' 23 | 'false\n' 24 | 'true\n' 25 | 'false\n' 26 | ) 27 | ) 28 | 29 | inserted = self.client.execute(query) 30 | self.assertEqual( 31 | inserted, [ 32 | (True, ), 33 | (False, ), 34 | (True, ), 35 | (False, ), 36 | (False, ), 37 | (True, ), 38 | (False, ), 39 | ] 40 | ) 41 | 42 | def test_errors(self): 43 | columns = "a bool" 44 | with self.create_stream(columns): 45 | with self.assertRaises(errors.TypeMismatchError): 46 | self.client.execute( 47 | 'INSERT INTO test (a) VALUES', [(1, )], 48 | types_check=True 49 | ) 50 | 51 | def test_nullable(self): 52 | columns = "a nullable(bool)" 53 | 54 | data = [(None, ), (True, ), (False, )] 55 | with self.create_stream(columns): 56 | self.client.execute('INSERT INTO test (a) VALUES', data) 57 | 58 | query = 'SELECT * FROM test' 59 | inserted = self.emit_cli(query) 60 | self.assertEqual( 61 | inserted, ( 62 | '\\N\ntrue\nfalse\n' 63 | ) 64 | ) 65 | 66 | inserted = self.client.execute(query) 67 | self.assertEqual( 68 | inserted, [ 69 | (None, ), (True, ), (False, ), 70 | ] 71 | ) 72 | -------------------------------------------------------------------------------- /tests/columns/test_common.py: -------------------------------------------------------------------------------- 1 | from tests.testcase import BaseTestCase 2 | 3 | 4 | class CommonTestCase(BaseTestCase): 5 | client_kwargs = {'settings': {'insert_block_size': 1}} 6 | 7 | def setUp(self): 8 | super(CommonTestCase, self).setUp() 9 | 10 | self.send_data_count = 0 11 | old_send_data = self.client.connection.send_data 12 | 13 | def send_data(*args, **kwargs): 14 | self.send_data_count += 1 15 | return old_send_data(*args, **kwargs) 16 | 17 | self.client.connection.send_data = send_data 18 | 19 | def test_insert_block_size(self): 20 | with self.create_stream('a uint8'): 21 | data = [(x, ) for x in range(4)] 22 | self.client.execute( 23 | 'INSERT INTO test (a) VALUES', data 24 | ) 25 | # Two empty blocks: for end of sending external tables 26 | # and data. 27 | self.assertEqual(self.send_data_count, 4 + 2) 28 | 29 | query = 'SELECT * FROM test' 30 | inserted = self.emit_cli(query) 31 | self.assertEqual(inserted, '0\n1\n2\n3\n') 32 | inserted = self.client.execute(query) 33 | self.assertEqual(inserted, data) 34 | 35 | def test_columnar_insert_block_size(self): 36 | with self.create_stream('a uint8'): 37 | data = [(0, 1, 2, 3)] 38 | self.client.execute( 39 | 'INSERT INTO test (a) VALUES', data, columnar=True 40 | ) 41 | # Two empty blocks: for end of sending external tables 42 | # and data. 43 | self.assertEqual(self.send_data_count, 4 + 2) 44 | 45 | query = 'SELECT * FROM test' 46 | inserted = self.emit_cli(query) 47 | self.assertEqual(inserted, '0\n1\n2\n3\n') 48 | inserted = self.client.execute(query) 49 | expected = [(0, ), (1, ), (2, ), (3, )] 50 | self.assertEqual(inserted, expected) 51 | -------------------------------------------------------------------------------- /tests/columns/test_date.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import date, datetime 3 | from unittest.mock import patch 4 | 5 | from freezegun import freeze_time 6 | 7 | from tests.testcase import BaseTestCase 8 | 9 | 10 | class DateTestCase(BaseTestCase): 11 | @freeze_time('2017-03-05 03:00:00') 12 | def test_do_not_use_timezone(self): 13 | with self.create_stream('a Date'): 14 | data = [(date(1970, 1, 2), )] 15 | self.client.execute( 16 | 'INSERT INTO test (a) VALUES', data 17 | ) 18 | 19 | query = 'SELECT * FROM test' 20 | inserted = self.emit_cli(query) 21 | self.assertEqual(inserted, '1970-01-02\n') 22 | 23 | with patch.dict(os.environ, {'TZ': 'US/Hawaii'}): 24 | inserted = self.client.execute(query) 25 | self.assertEqual(inserted, data) 26 | 27 | def test_insert_datetime_to_date(self): 28 | with self.create_stream('a Date'): 29 | testTime = datetime(2015, 6, 6, 12, 30, 54) 30 | self.client.execute( 31 | 'INSERT INTO test (a) VALUES', [(testTime, )] 32 | ) 33 | query = 'SELECT * FROM test' 34 | inserted = self.emit_cli(query) 35 | self.assertEqual(inserted, '2015-06-06\n') 36 | 37 | def test_wrong_date_insert(self): 38 | with self.create_stream('a Date'): 39 | data = [ 40 | (date(5555, 1, 1), ), 41 | (date(1, 1, 1), ), 42 | (date(2149, 6, 7), ) 43 | ] 44 | self.client.execute('INSERT INTO test (a) VALUES', data) 45 | query = 'SELECT * FROM test' 46 | inserted = self.emit_cli(query) 47 | expected = (3 * '1970-01-01\n') 48 | self.assertEqual(inserted, expected) 49 | 50 | def test_boundaries(self): 51 | 52 | with self.create_stream('a Date'): 53 | data = [ 54 | (date(1970, 1, 1), ), 55 | ((date(2149, 6, 6), )) 56 | ] 57 | self.client.execute('INSERT INTO test (a) VALUES', data) 58 | 59 | query = 'SELECT * FROM test' 60 | inserted = self.emit_cli(query) 61 | expected = '1970-01-01\n2149-06-06\n' 62 | self.assertEqual(inserted, expected) 63 | 64 | inserted = self.client.execute(query) 65 | self.assertEqual(inserted, data) 66 | 67 | 68 | class Date32TestCase(BaseTestCase): 69 | # required_server_version = (21, 9) 70 | 71 | def test_wrong_date_insert(self): 72 | with self.create_stream('a Date32'): 73 | data = [ 74 | (date(5555, 1, 1), ), 75 | (date(1, 1, 1), ), 76 | (date(2284, 1, 1), ) 77 | ] 78 | self.client.execute('INSERT INTO test (a) VALUES', data) 79 | query = 'SELECT * FROM test' 80 | inserted = self.emit_cli(query) 81 | self.assertEqual(inserted, '1970-01-01\n1970-01-01\n1970-01-01\n') 82 | 83 | def test_boundaries(self): 84 | with self.create_stream('a Date32'): 85 | data = [(date(1925, 1, 1), ), (date(2283, 11, 11), )] 86 | self.client.execute('INSERT INTO test (a) VALUES', data) 87 | 88 | query = 'SELECT * FROM test' 89 | inserted = self.emit_cli(query) 90 | self.assertEqual(inserted, '1925-01-01\n2283-11-11\n') 91 | 92 | inserted = self.client.execute(query) 93 | self.assertEqual(inserted, data) 94 | -------------------------------------------------------------------------------- /tests/columns/test_float.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from tests.testcase import BaseTestCase 4 | from proton_driver import errors 5 | 6 | 7 | class FloatTestCase(BaseTestCase): 8 | def test_chop_to_type(self): 9 | with self.create_stream('a float32, b float64'): 10 | data = [ 11 | (3.4028235e38, 3.4028235e38), 12 | (3.4028235e39, 3.4028235e39), 13 | (-3.4028235e39, 3.4028235e39), 14 | (1, 2) 15 | ] 16 | 17 | with self.assertRaises(errors.TypeMismatchError) as e: 18 | self.client.execute( 19 | 'INSERT INTO test (a, b) VALUES', data 20 | ) 21 | 22 | self.assertIn('Column a', str(e.exception)) 23 | 24 | def test_simple(self): 25 | with self.create_stream('a float32, b float64'): 26 | data = [ 27 | (3.4028235e38, 3.4028235e38), 28 | (3.4028235e39, 3.4028235e39), 29 | (-3.4028235e39, 3.4028235e39), 30 | (1, 2) 31 | ] 32 | self.client.execute( 33 | 'INSERT INTO test (a, b) VALUES', data, types_check=True 34 | ) 35 | 36 | query = 'SELECT * FROM test' 37 | inserted = self.emit_cli(query) 38 | self.assertEqual( 39 | inserted, ( 40 | '3.4028235e38\t3.4028235e38\n' 41 | 'inf\t3.4028235e39\n' 42 | '-inf\t3.4028235e39\n' 43 | '1\t2\n' 44 | ) 45 | ) 46 | 47 | inserted = self.client.execute(query) 48 | self.assertEqual(inserted, [ 49 | (3.4028234663852886e+38, 3.4028235e38), 50 | (float('inf'), 3.4028235e39), 51 | (-float('inf'), 3.4028235e39), 52 | (1, 2) 53 | ]) 54 | 55 | def test_nullable(self): 56 | with self.create_stream('a nullable(float32)'): 57 | data = [(None, ), (0.5, ), (None, ), (1.5, )] 58 | self.client.execute( 59 | 'INSERT INTO test (a) VALUES', data 60 | ) 61 | 62 | query = 'SELECT * FROM test' 63 | inserted = self.emit_cli(query) 64 | self.assertEqual(inserted, '\\N\n0.5\n\\N\n1.5\n') 65 | 66 | inserted = self.client.execute(query) 67 | self.assertEqual(inserted, data) 68 | 69 | def test_nan(self): 70 | with self.create_stream('a float32'): 71 | data = [(float('nan'), ), (0.5, )] 72 | self.client.execute( 73 | 'INSERT INTO test (a) VALUES', data 74 | ) 75 | 76 | query = 'SELECT * FROM test' 77 | inserted = self.emit_cli(query) 78 | self.assertEqual(inserted, 'nan\n0.5\n') 79 | 80 | inserted = self.client.execute(query) 81 | self.assertEqual(len(inserted), 2) 82 | self.assertTrue(math.isnan(inserted[0][0])) 83 | self.assertEqual(inserted[1][0], 0.5) 84 | -------------------------------------------------------------------------------- /tests/columns/test_interval.py: -------------------------------------------------------------------------------- 1 | from tests.testcase import BaseTestCase 2 | 3 | 4 | class IntervalTestCase(BaseTestCase): 5 | required_server_version = (1, 1, 54310) 6 | 7 | def test_all(self): 8 | interval = [ 9 | ('YEAR', 1), 10 | ('MONTH', 2), 11 | ('WEEK', 3), 12 | ('DAY', 4), 13 | ('HOUR', 5), 14 | ('MINUTE', 6), 15 | ('SECOND', 7) 16 | ] 17 | columns = ', '.join(['INTERVAL {} {}'.format(v, k) 18 | for k, v in interval]) 19 | query = 'SELECT {}'.format(columns) 20 | 21 | cli_result = self.emit_cli(query) 22 | self.assertEqual(cli_result, '1\t2\t3\t4\t5\t6\t7\n') 23 | 24 | client_result = self.client.execute(query) 25 | self.assertEqual(client_result, [(1, 2, 3, 4, 5, 6, 7)]) 26 | -------------------------------------------------------------------------------- /tests/columns/test_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | from time import sleep 3 | from tests.testcase import BaseTestCase 4 | 5 | 6 | class JSONTestCase(BaseTestCase): 7 | def test_simple(self): 8 | rv = self.client.execute("SELECT '{\"bb\": {\"cc\": [255, 1]}}'::json") 9 | self.assertEqual(rv, [({'bb': {'cc': [255, 1]}},)]) 10 | 11 | def test_from_table(self): 12 | self.emit_cli('CREATE STREAM test (a json)') 13 | data = [ 14 | ({},), 15 | ({'key1': 1}, ), 16 | ({'key1': 2.1, 'key2': {'nested': 'key'}}, ), 17 | ({'key1': 3, 'key3': ['test'], 'key4': [10, 20]}, ) 18 | ] 19 | self.client.execute('INSERT INTO test (a) VALUES', data) 20 | sleep(3) 21 | query = 'SELECT a FROM table(test)' 22 | inserted = self.client.execute(query) 23 | self.assertEqual( 24 | inserted, 25 | [ 26 | ((0.0, ('',), [], []),), 27 | ((1.0, ('',), [], []),), 28 | ((2.1, ('key',), [], []),), 29 | ((3.0, ('',), ['test'], [10, 20]),) 30 | ] 31 | ) 32 | inserted = self.client.execute( 33 | query, settings=dict(namedtuple_as_json=True) 34 | ) 35 | data_with_all_keys = [ 36 | ({'key1': 0, 'key2': {'nested': ''}, 'key3': [], 'key4': []},), 37 | ({'key1': 1, 'key2': {'nested': ''}, 'key3': [], 'key4': []},), 38 | ({'key1': 2.1, 'key2': {'nested': 'key'}, 'key3': [], 39 | 'key4': []},), 40 | ({'key1': 3, 'key2': {'nested': ''}, 'key3': ['test'], 41 | 'key4': [10, 20]},) 42 | ] 43 | self.assertEqual(inserted, data_with_all_keys) 44 | self.emit_cli('DROP STREAM test') 45 | 46 | def test_insert_json_strings(self): 47 | self.emit_cli('CREATE STREAM test (a json)') 48 | data = [ 49 | (json.dumps({'i-am': 'dumped json'}),), 50 | ] 51 | self.client.execute('INSERT INTO test (a) VALUES', data) 52 | sleep(3) 53 | query = 'SELECT a FROM table(test)' 54 | inserted = self.client.execute(query) 55 | self.assertEqual( 56 | inserted, 57 | [(('dumped json',),)] 58 | ) 59 | inserted = self.client.execute( 60 | query, settings=dict(namedtuple_as_json=True) 61 | ) 62 | data_with_all_keys = [ 63 | ({'`i-am`': 'dumped json'},) 64 | ] 65 | self.assertEqual(inserted, data_with_all_keys) 66 | self.emit_cli('DROP STREAM test') 67 | 68 | def test_json_as_named_tuple(self): 69 | settings = {'namedtuple_as_json': True} 70 | query = 'SELECT a FROM table(test)' 71 | 72 | self.emit_cli('CREATE STREAM test (a json)') 73 | data = [ 74 | ({'key': 'value'}, ), 75 | ] 76 | self.client.execute('INSERT INTO test (a) VALUES', data) 77 | sleep(3) 78 | inserted = self.client.execute(query) 79 | self.assertEqual(inserted, [(('value',),)]) 80 | 81 | with self.created_client(settings=settings) as client: 82 | inserted = client.execute(query) 83 | self.assertEqual(inserted, data) 84 | self.emit_cli('DROP STREAM test') 85 | -------------------------------------------------------------------------------- /tests/columns/test_nested.py: -------------------------------------------------------------------------------- 1 | from tests.testcase import BaseTestCase 2 | from proton_driver.columns.util import ( 3 | get_inner_spec, 4 | get_inner_columns, 5 | get_inner_columns_with_types 6 | ) 7 | 8 | 9 | class NestedTestCase(BaseTestCase): 10 | def entuple(self, lst): 11 | return tuple( 12 | self.entuple(x) if isinstance(x, list) else x for x in lst 13 | ) 14 | 15 | def test_simple(self): 16 | columns = 'n nested(i int32, s string)' 17 | 18 | # INSERT INTO test_nested VALUES ([(0, 'a'), (1, 'b')]); 19 | data = [([(0, 'a'), (1, 'b')],)] 20 | 21 | with self.create_stream(columns, flatten_nested=0): 22 | self.client.execute( 23 | 'INSERT INTO test (n) VALUES', data 24 | ) 25 | 26 | query = 'SELECT * FROM test' 27 | inserted = self.emit_cli(query) 28 | self.assertEqual(inserted, "[(0,'a'),(1,'b')]\n") 29 | 30 | inserted = self.client.execute(query) 31 | self.assertEqual(inserted, data) 32 | 33 | projected_i = self.client.execute('SELECT n.i FROM test') 34 | self.assertEqual( 35 | projected_i, 36 | [([0, 1],)] 37 | ) 38 | 39 | projected_s = self.client.execute('SELECT n.s FROM test') 40 | self.assertEqual( 41 | projected_s, 42 | [(['a', 'b'],)] 43 | ) 44 | 45 | def test_multiple_rows(self): 46 | columns = 'n nested(i int32, s string)' 47 | 48 | data = [([(0, 'a'), (1, 'b')],), ([(3, 'd'), (4, 'e')],)] 49 | 50 | with self.create_stream(columns, flatten_nested=0): 51 | self.client.execute( 52 | 'INSERT INTO test (n) VALUES', data 53 | ) 54 | 55 | query = 'SELECT * FROM test' 56 | inserted = self.emit_cli(query) 57 | self.assertEqual( 58 | inserted, 59 | "[(0,'a'),(1,'b')]\n[(3,'d'),(4,'e')]\n" 60 | ) 61 | 62 | inserted = self.client.execute(query) 63 | self.assertEqual(inserted, data) 64 | 65 | def test_dict(self): 66 | columns = 'n nested(i int32, s string)' 67 | 68 | data = [ 69 | {'n': [{'i': 0, 's': 'a'}, {'i': 1, 's': 'b'}]}, 70 | {'n': [{'i': 3, 's': 'd'}, {'i': 4, 's': 'e'}]}, 71 | ] 72 | 73 | with self.create_stream(columns, flatten_nested=0): 74 | self.client.execute( 75 | 'INSERT INTO test (n) VALUES', data 76 | ) 77 | 78 | query = 'SELECT * FROM test' 79 | inserted = self.emit_cli(query) 80 | self.assertEqual( 81 | inserted, 82 | "[(0,'a'),(1,'b')]\n[(3,'d'),(4,'e')]\n" 83 | ) 84 | 85 | inserted = self.client.execute(query) 86 | self.assertEqual( 87 | inserted, 88 | [([(0, 'a'), (1, 'b')],), ([(3, 'd'), (4, 'e')],)] 89 | ) 90 | 91 | def test_get_nested_columns(self): 92 | spec = 'nested(a tuple(array(int8)),\n b nullable(string))' 93 | columns = get_inner_columns('nested', spec) 94 | self.assertEqual( 95 | columns, 96 | ['tuple(array(int8))', 'nullable(string)'] 97 | ) 98 | 99 | def test_get_columns_with_types(self): 100 | spec = 'nested(a tuple(array(int8)),\n b nullable(string))' 101 | columns = get_inner_columns_with_types('nested', spec) 102 | self.assertEqual( 103 | columns, 104 | [('a', 'tuple(array(int8))'), ('b', 'nullable(string)')] 105 | ) 106 | 107 | def test_get_inner_spec(self): 108 | inner = 'a tuple(array(int8), array(int64)), b nullable(string)' 109 | self.assertEqual( 110 | get_inner_spec('nested', 'nested({}) dummy '.format(inner)), 111 | inner 112 | ) 113 | -------------------------------------------------------------------------------- /tests/columns/test_null.py: -------------------------------------------------------------------------------- 1 | from tests.testcase import BaseTestCase 2 | 3 | 4 | class NullTestCase(BaseTestCase): 5 | def test_select_null(self): 6 | rv = self.client.execute('SELECT NULL') 7 | self.assertEqual(rv, [(None, )]) 8 | -------------------------------------------------------------------------------- /tests/columns/test_nullable.py: -------------------------------------------------------------------------------- 1 | from tests.testcase import BaseTestCase 2 | from proton_driver import errors 3 | 4 | ErrorCodes = errors.ErrorCodes 5 | 6 | 7 | class nullableTestCase(BaseTestCase): 8 | def test_simple(self): 9 | columns = 'a nullable(int32)' 10 | 11 | data = [(3, ), (None, ), (2, )] 12 | with self.create_stream(columns): 13 | self.client.execute( 14 | 'INSERT INTO test (a) VALUES', data 15 | ) 16 | 17 | query = 'SELECT * FROM test' 18 | inserted = self.emit_cli(query) 19 | self.assertEqual( 20 | inserted, '3\n\\N\n2\n' 21 | ) 22 | 23 | inserted = self.client.execute(query) 24 | self.assertEqual(inserted, data) 25 | 26 | def test_nullable_inside_nullable(self): 27 | columns = 'a nullable(nullable(int32))' 28 | 29 | with self.assertRaises(errors.ServerException) as e: 30 | self.client.execute( 31 | 'CREATE STREAM test ({}) ''ENGINE = Memory'.format(columns) 32 | ) 33 | 34 | self.assertEqual(e.exception.code, ErrorCodes.ILLEGAL_TYPE_OF_ARGUMENT) 35 | 36 | def test_nullable_array(self): 37 | columns = 'a nullable(array(nullable(array(nullable(int32)))))' 38 | 39 | with self.assertRaises(errors.ServerException) as e: 40 | self.client.execute( 41 | 'CREATE STREAM test ({}) ''ENGINE = Memory'.format(columns) 42 | ) 43 | 44 | self.assertEqual(e.exception.code, ErrorCodes.ILLEGAL_TYPE_OF_ARGUMENT) 45 | -------------------------------------------------------------------------------- /tests/columns/test_simpleaggregatefunction.py: -------------------------------------------------------------------------------- 1 | from enum import IntEnum 2 | 3 | from tests.testcase import BaseTestCase 4 | 5 | 6 | class SimpleAggregateFunctionTestCase(BaseTestCase): 7 | # required_server_version = (19, 8, 3) 8 | 9 | def test_simple(self): 10 | columns = 'a simple_aggregate_function(any, int32)' 11 | 12 | data = [(3, ), (2, )] 13 | with self.create_stream(columns): 14 | self.client.execute( 15 | 'INSERT INTO test (a) VALUES', data 16 | ) 17 | 18 | query = 'SELECT * FROM test' 19 | inserted = self.emit_cli(query) 20 | self.assertEqual( 21 | inserted, '3\n2\n' 22 | ) 23 | 24 | inserted = self.client.execute(query) 25 | self.assertEqual(inserted, data) 26 | 27 | def test_nullable(self): 28 | columns = 'a simple_aggregate_function(any, nullable(int32))' 29 | 30 | data = [(3, ), (None, ), (2, )] 31 | with self.create_stream(columns): 32 | self.client.execute( 33 | 'INSERT INTO test (a) VALUES', data 34 | ) 35 | 36 | query = 'SELECT * FROM test' 37 | inserted = self.emit_cli(query) 38 | self.assertEqual( 39 | inserted, '3\n\\N\n2\n' 40 | ) 41 | 42 | inserted = self.client.execute(query) 43 | self.assertEqual(inserted, data) 44 | 45 | def test_simple_agg_function(self): 46 | class A(IntEnum): 47 | hello = -1 48 | world = 2 49 | 50 | columns = "a simple_aggregate_function(any_last, " \ 51 | "enum8('hello' = -1, 'world' = 2))" 52 | 53 | data = [(A.hello,), (A.world,), (-1,), (2,)] 54 | with self.create_stream(columns): 55 | self.client.execute( 56 | 'INSERT INTO test (a) VALUES', data 57 | ) 58 | 59 | query = 'SELECT * FROM test' 60 | inserted = self.emit_cli(query) 61 | self.assertEqual( 62 | inserted, ( 63 | 'hello\n' 64 | 'world\n' 65 | 'hello\n' 66 | 'world\n' 67 | ) 68 | ) 69 | 70 | inserted = self.client.execute(query) 71 | self.assertEqual( 72 | inserted, [ 73 | ('hello',), ('world',), 74 | ('hello',), ('world',) 75 | ] 76 | ) 77 | 78 | def test_simple_agg_function_nullable(self): 79 | class A(IntEnum): 80 | hello = -1 81 | world = 2 82 | 83 | columns = "a simple_aggregate_function(any_last, " \ 84 | "nullable(enum8('hello' = -1, 'world' = 2)))" 85 | 86 | data = [(A.hello,), (A.world,), (None,), (-1,), (2,)] 87 | with self.create_stream(columns): 88 | self.client.execute( 89 | 'INSERT INTO test (a) VALUES', data 90 | ) 91 | 92 | query = 'SELECT * FROM test' 93 | inserted = self.emit_cli(query) 94 | self.assertEqual( 95 | inserted, ( 96 | 'hello\n' 97 | 'world\n' 98 | '\\N\n' 99 | 'hello\n' 100 | 'world\n' 101 | ) 102 | ) 103 | 104 | inserted = self.client.execute(query) 105 | self.assertEqual( 106 | inserted, [ 107 | ('hello',), ('world',), 108 | (None, ), 109 | ('hello',), ('world',) 110 | ] 111 | ) 112 | -------------------------------------------------------------------------------- /tests/columns/test_unknown.py: -------------------------------------------------------------------------------- 1 | 2 | from unittest import TestCase 3 | 4 | from proton_driver import errors 5 | from proton_driver.columns.service import get_column_by_spec 6 | 7 | 8 | class UnknownColumnTestCase(TestCase): 9 | def test_get_unknown_column(self): 10 | with self.assertRaises(errors.UnknownTypeError) as e: 11 | get_column_by_spec('Unicorn', {'context': {}}) 12 | 13 | self.assertIn('Unicorn', str(e.exception)) 14 | -------------------------------------------------------------------------------- /tests/columns/test_uuid.py: -------------------------------------------------------------------------------- 1 | from uuid import UUID 2 | from tests.testcase import BaseTestCase 3 | from proton_driver import errors 4 | 5 | 6 | class UUIDTestCase(BaseTestCase): 7 | def test_simple(self): 8 | with self.create_stream('a uuid'): 9 | data = [ 10 | (UUID('c0fcbba9-0752-44ed-a5d6-4dfb4342b89d'), ), 11 | ('2efcead4-ff55-4db5-bdb4-6b36a308d8e0', ) 12 | ] 13 | self.client.execute( 14 | 'INSERT INTO test (a) VALUES', data 15 | ) 16 | 17 | query = 'SELECT * FROM test' 18 | inserted = self.emit_cli(query) 19 | self.assertEqual(inserted, ( 20 | 'c0fcbba9-0752-44ed-a5d6-4dfb4342b89d\n' 21 | '2efcead4-ff55-4db5-bdb4-6b36a308d8e0\n' 22 | )) 23 | inserted = self.client.execute(query) 24 | self.assertEqual(inserted, [ 25 | (UUID('c0fcbba9-0752-44ed-a5d6-4dfb4342b89d'), ), 26 | (UUID('2efcead4-ff55-4db5-bdb4-6b36a308d8e0'), ) 27 | ]) 28 | 29 | def test_type_mismatch(self): 30 | data = [(62457709573696417404743346296141175008, )] 31 | with self.create_stream('a uuid'): 32 | with self.assertRaises(errors.TypeMismatchError): 33 | self.client.execute( 34 | 'INSERT INTO test (a) VALUES', data, types_check=True 35 | ) 36 | with self.assertRaises(AttributeError): 37 | self.client.execute( 38 | 'INSERT INTO test (a) VALUES', data 39 | ) 40 | 41 | def test_bad_uuid(self): 42 | data = [('a', )] 43 | with self.create_stream('a uuid'): 44 | with self.assertRaises(errors.CannotParseUuidError): 45 | self.client.execute( 46 | 'INSERT INTO test (a) VALUES', data 47 | ) 48 | 49 | def test_nullable(self): 50 | with self.create_stream('a nullable(uuid)'): 51 | data = [(UUID('2efcead4-ff55-4db5-bdb4-6b36a308d8e0'), ), (None, )] 52 | self.client.execute( 53 | 'INSERT INTO test (a) VALUES', data 54 | ) 55 | 56 | query = 'SELECT * FROM test' 57 | inserted = self.emit_cli(query) 58 | self.assertEqual(inserted, 59 | '2efcead4-ff55-4db5-bdb4-6b36a308d8e0\n\\N\n') 60 | 61 | inserted = self.client.execute(query) 62 | self.assertEqual(inserted, data) 63 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.fixture(autouse=True) 5 | def assert_empty_output(capfd): 6 | yield 7 | 8 | captured = capfd.readouterr() 9 | 10 | assert captured.out == '' 11 | assert captured.err == '' 12 | -------------------------------------------------------------------------------- /tests/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | proton-server: 5 | image: "timeplus/timeplusd:latest" 6 | container_name: test-proton-server 7 | environment: 8 | - TZ=Asia/Shanghai 9 | ports: 10 | - "127.0.0.1:8463:8463" 11 | command: > 12 | /bin/bash -c "echo sleeping; sleep 2; /entrypoint.sh" 13 | volumes: 14 | - /mnt/timeplusd:/var/lib/timeplusd 15 | 16 | proton-client: 17 | image: "timeplus/timeplusd:latest" 18 | container_name: test-proton-client 19 | entrypoint: /bin/sh 20 | command: [-c, 'while :; do sleep 1; done'] 21 | -------------------------------------------------------------------------------- /tests/log.py: -------------------------------------------------------------------------------- 1 | from logging.config import dictConfig 2 | 3 | 4 | def configure(level): 5 | dictConfig({ 6 | 'version': 1, 7 | 'disable_existing_loggers': False, 8 | 'formatters': { 9 | 'standard': { 10 | 'format': '%(asctime)s %(levelname)-8s %(name)s: %(message)s' 11 | }, 12 | }, 13 | 'handlers': { 14 | 'default': { 15 | 'level': level, 16 | 'formatter': 'standard', 17 | 'class': 'logging.StreamHandler', 18 | }, 19 | }, 20 | 'loggers': { 21 | '': { 22 | 'handlers': ['default'], 23 | 'level': level, 24 | 'propagate': True 25 | }, 26 | } 27 | }) 28 | -------------------------------------------------------------------------------- /tests/numpy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/tests/numpy/__init__.py -------------------------------------------------------------------------------- /tests/numpy/columns/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/tests/numpy/columns/__init__.py -------------------------------------------------------------------------------- /tests/numpy/columns/test_float.py: -------------------------------------------------------------------------------- 1 | from parameterized import parameterized 2 | 3 | try: 4 | import numpy as np 5 | except ImportError: 6 | np = None 7 | 8 | from tests.numpy.testcase import NumpyBaseTestCase 9 | 10 | 11 | class FloatTestCase(NumpyBaseTestCase): 12 | n = 10 13 | 14 | def check_result(self, rv, col_type): 15 | self.assertarraysEqual(rv[0], np.array(range(self.n))) 16 | self.assertEqual(rv[0].dtype, col_type) 17 | 18 | def get_query(self, ch_type): 19 | with self.create_stream('a {}'.format(ch_type)): 20 | data = [np.array(range(self.n))] 21 | self.client.execute( 22 | 'INSERT INTO test (a) VALUES', data, columnar=True 23 | ) 24 | 25 | query = 'SELECT * FROM test' 26 | inserted = self.emit_cli(query) 27 | self.assertEqual( 28 | inserted, '\n'.join(str(x) for x in data[0]) + '\n' 29 | ) 30 | return self.client.execute(query, columnar=True) 31 | 32 | def test_float32(self): 33 | rv = self.get_query('float32') 34 | self.check_result(rv, np.float32) 35 | 36 | def test_float64(self): 37 | rv = self.get_query('float64') 38 | self.check_result(rv, np.float64) 39 | 40 | def test_fractional_round_trip(self): 41 | with self.create_stream('a float32'): 42 | data = [np.array([0.5, 1.5], dtype=np.float32)] 43 | self.client.execute( 44 | 'INSERT INTO test (a) VALUES', data, columnar=True 45 | ) 46 | 47 | query = 'SELECT * FROM test' 48 | inserted = self.emit_cli(query) 49 | self.assertEqual(inserted, '0.5\n1.5\n') 50 | 51 | inserted = self.client.execute(query, columnar=True) 52 | self.assertarraysEqual(inserted[0], data[0]) 53 | 54 | @parameterized.expand(['float32', 'float64']) 55 | def test_nullable(self, float_type): 56 | with self.create_stream('a nullable({})'.format(float_type)): 57 | data = [np.array([np.nan, 0.5, None, 1.5], dtype=object)] 58 | self.client.execute( 59 | 'INSERT INTO test (a) VALUES', data, columnar=True 60 | ) 61 | 62 | query = 'SELECT * FROM test' 63 | inserted = self.emit_cli(query) 64 | self.assertEqual(inserted, 'nan\n0.5\n\\N\n1.5\n') 65 | 66 | inserted = self.client.execute(query, columnar=True) 67 | self.assertarraysEqual( 68 | inserted[0].astype(str), data[0].astype(str) 69 | ) 70 | self.assertEqual(inserted[0].dtype, object) 71 | 72 | def test_nan(self): 73 | with self.create_stream('a float32'): 74 | data = [np.array([float('nan'), 0.5], dtype=np.float32)] 75 | self.client.execute( 76 | 'INSERT INTO test (a) VALUES', data, columnar=True 77 | ) 78 | 79 | query = 'SELECT * FROM test' 80 | inserted = self.emit_cli(query) 81 | self.assertEqual(inserted, 'nan\n0.5\n') 82 | 83 | inserted = self.client.execute(query, columnar=True) 84 | self.assertarraysEqual( 85 | inserted[0].astype(str), data[0].astype(str) 86 | ) 87 | self.assertEqual(inserted[0].dtype, np.float32) 88 | -------------------------------------------------------------------------------- /tests/numpy/columns/test_int.py: -------------------------------------------------------------------------------- 1 | try: 2 | import numpy as np 3 | except ImportError: 4 | np = None 5 | 6 | from tests.numpy.testcase import NumpyBaseTestCase 7 | 8 | 9 | class IntTestCase(NumpyBaseTestCase): 10 | n = 10 11 | 12 | def check_result(self, rv, col_type): 13 | self.assertarraysEqual(rv[0], np.array(range(self.n))) 14 | self.assertEqual(rv[0].dtype, col_type) 15 | 16 | def get_query(self, ch_type): 17 | with self.create_stream('a {}'.format(ch_type)): 18 | data = [np.array(range(self.n))] 19 | self.client.execute( 20 | 'INSERT INTO test (a) VALUES', data, columnar=True 21 | ) 22 | 23 | query = 'SELECT * FROM test' 24 | inserted = self.emit_cli(query) 25 | self.assertEqual( 26 | inserted, '\n'.join(str(x) for x in data[0]) + '\n' 27 | ) 28 | return self.client.execute(query, columnar=True) 29 | 30 | def test_int8(self): 31 | rv = self.get_query('int8') 32 | self.check_result(rv, np.int8) 33 | 34 | def test_int16(self): 35 | rv = self.get_query('int16') 36 | self.check_result(rv, np.int16) 37 | 38 | def test_int32(self): 39 | rv = self.get_query('int32') 40 | self.check_result(rv, np.int32) 41 | 42 | def test_int64(self): 43 | rv = self.get_query('int64') 44 | self.check_result(rv, np.int64) 45 | 46 | def test_uint8(self): 47 | rv = self.get_query('uint8') 48 | self.check_result(rv, np.uint8) 49 | 50 | def test_uint16(self): 51 | rv = self.get_query('uint16') 52 | self.check_result(rv, np.uint16) 53 | 54 | def test_uint32(self): 55 | rv = self.get_query('uint32') 56 | self.check_result(rv, np.uint32) 57 | 58 | def test_uint64(self): 59 | rv = self.get_query('uint64') 60 | self.check_result(rv, np.uint64) 61 | 62 | def test_insert_nan_into_non_nullable(self): 63 | with self.create_stream('a int32'): 64 | data = [ 65 | np.array([123, np.nan], dtype=object) 66 | ] 67 | self.client.execute( 68 | 'INSERT INTO test (a) VALUES', data, columnar=True 69 | ) 70 | 71 | query = 'SELECT * FROM test' 72 | inserted = self.emit_cli(query) 73 | self.assertEqual( 74 | inserted, 75 | '123\n0\n' 76 | ) 77 | 78 | inserted = self.client.execute(query, columnar=True) 79 | self.assertarraysEqual(inserted[0], np.array([123, 0])) 80 | self.assertEqual(inserted[0].dtype, np.int32) 81 | 82 | def test_nullable(self): 83 | with self.create_stream('a nullable(int32)'): 84 | data = [np.array([2, None, 4, None, 8])] 85 | self.client.execute( 86 | 'INSERT INTO test (a) VALUES', data, columnar=True 87 | ) 88 | 89 | query = 'SELECT * FROM test' 90 | inserted = self.emit_cli(query) 91 | self.assertEqual(inserted, '2\n\\N\n4\n\\N\n8\n') 92 | 93 | inserted = self.client.execute(query, columnar=True) 94 | self.assertarraysEqual(inserted[0], data[0]) 95 | self.assertEqual(inserted[0].dtype, object) 96 | -------------------------------------------------------------------------------- /tests/numpy/columns/test_nullable.py: -------------------------------------------------------------------------------- 1 | try: 2 | import numpy as np 3 | except ImportError: 4 | np = None 5 | 6 | try: 7 | import pandas as pd 8 | except ImportError: 9 | pd = None 10 | 11 | from tests.numpy.testcase import NumpyBaseTestCase 12 | from proton_driver import errors 13 | 14 | ErrorCodes = errors.ErrorCodes 15 | 16 | 17 | class nullableTestCase(NumpyBaseTestCase): 18 | def test_simple(self): 19 | columns = 'a nullable(int32)' 20 | 21 | data = [np.array([3, None, 2], dtype=object)] 22 | with self.create_stream(columns): 23 | self.client.execute( 24 | 'INSERT INTO test (a) VALUES', data, columnar=True 25 | ) 26 | 27 | query = 'SELECT * FROM test' 28 | inserted = self.emit_cli(query) 29 | self.assertEqual( 30 | inserted, '3\n\\N\n2\n' 31 | ) 32 | 33 | inserted = self.client.execute(query, columnar=True) 34 | self.assertarraysEqual(inserted[0], data[0]) 35 | self.assertEqual(inserted[0].dtype, object) 36 | 37 | def test_simple_dataframe(self): 38 | columns = ( 39 | 'a int64, ' 40 | 'b nullable(float64), ' 41 | 'c nullable(string), ' 42 | 'd nullable(int64)' 43 | ) 44 | 45 | df = pd.DataFrame({ 46 | 'a': [1, 2, 3], 47 | 'b': [1.0, None, np.nan], 48 | 'c': ['a', None, np.nan], 49 | 'd': [1, None, None], 50 | }, dtype=object) 51 | expected = pd.DataFrame({ 52 | 'a': np.array([1, 2, 3], dtype=np.int64), 53 | 'b': np.array([1.0, None, np.nan], dtype=object), 54 | 'c': np.array(['a', None, None], dtype=object), 55 | 'd': np.array([1, None, None], dtype=object), 56 | }) 57 | 58 | with self.create_stream(columns): 59 | rv = self.client.insert_dataframe('INSERT INTO test VALUES', df) 60 | self.assertEqual(rv, 3) 61 | df2 = self.client.query_dataframe('SELECT * FROM test ORDER BY a') 62 | self.assertTrue(expected.equals(df2)) 63 | -------------------------------------------------------------------------------- /tests/numpy/columns/test_other.py: -------------------------------------------------------------------------------- 1 | from parameterized import parameterized 2 | 3 | from proton_driver import errors 4 | from proton_driver.columns.service import get_column_by_spec 5 | from proton_driver.context import Context 6 | 7 | from tests.numpy.testcase import NumpyBaseTestCase 8 | 9 | 10 | class OtherColumnsTestCase(NumpyBaseTestCase): 11 | def get_column(self, spec): 12 | ctx = Context() 13 | ctx.client_settings = {'strings_as_bytes': False, 'use_numpy': True} 14 | return get_column_by_spec(spec, {'context': ctx}) 15 | 16 | @parameterized.expand([ 17 | ("enum8('hello' = 1, 'world' = 2)", ), 18 | ('decimal(8, 4)', ), 19 | ('array(string)', ), 20 | ('tuple(string)', ), 21 | ('simple_aggregate_function(any, int32)', ), 22 | ('map(string, string)', ), 23 | ('array(low_cardinality(string))', ) 24 | ]) 25 | def test_generic_type(self, spec): 26 | col = self.get_column(spec) 27 | self.assertIsNotNone(col) 28 | 29 | def test_get_unknown_column(self): 30 | with self.assertRaises(errors.UnknownTypeError) as e: 31 | self.get_column('Unicorn') 32 | 33 | self.assertIn('Unicorn', str(e.exception)) 34 | -------------------------------------------------------------------------------- /tests/numpy/test_external_tables.py: -------------------------------------------------------------------------------- 1 | try: 2 | import numpy as np 3 | import pandas as pd 4 | except ImportError: 5 | np = None 6 | pd = None 7 | 8 | from tests.numpy.testcase import NumpyBaseTestCase 9 | 10 | 11 | class ExternalTablesTestCase(NumpyBaseTestCase): 12 | def test_select(self): 13 | tables = [{ 14 | 'name': 'test', 15 | 'structure': [('x', 'int32'), ('y', 'string')], 16 | 'data': pd.DataFrame({ 17 | 'x': [100, 500], 18 | 'y': ['abc', 'def'] 19 | }) 20 | }] 21 | rv = self.client.execute( 22 | 'SELECT * FROM test', external_tables=tables, columnar=True 23 | ) 24 | self.assertarraysListEqual( 25 | rv, [np.array([100, 500]), np.array(['abc', 'def'])] 26 | ) 27 | 28 | def test_send_empty_table(self): 29 | tables = [{ 30 | 'name': 'test', 31 | 'structure': [('x', 'int32')], 32 | 'data': pd.DataFrame({'x': []}) 33 | }] 34 | rv = self.client.execute( 35 | 'SELECT * FROM test', external_tables=tables, columnar=True 36 | ) 37 | self.assertarraysListEqual(rv, []) 38 | 39 | def test_send_empty_table_structure(self): 40 | tables = [{ 41 | 'name': 'test', 42 | 'structure': [], 43 | 'data': pd.DataFrame() 44 | }] 45 | with self.assertRaises(ValueError) as e: 46 | self.client.execute( 47 | 'SELECT * FROM test', external_tables=tables, columnar=True 48 | ) 49 | 50 | self.assertIn('Empty table "test" structure', str(e.exception)) 51 | -------------------------------------------------------------------------------- /tests/numpy/testcase.py: -------------------------------------------------------------------------------- 1 | try: 2 | import numpy as np 3 | except ImportError: 4 | np = None 5 | 6 | from tests.numpy.util import check_numpy 7 | from tests.testcase import BaseTestCase 8 | 9 | 10 | class NumpyBaseTestCase(BaseTestCase): 11 | client_kwargs = {'settings': {'use_numpy': True}} 12 | 13 | @check_numpy 14 | def setUp(self): 15 | super(NumpyBaseTestCase, self).setUp() 16 | 17 | def assertarraysEqual(self, first, second): 18 | return self.assertTrue((first == second).all()) 19 | 20 | def assertarraysListEqual(self, first, second): 21 | self.assertEqual(len(first), len(second)) 22 | for x, y in zip(first, second): 23 | self.assertTrue((x == y).all()) 24 | -------------------------------------------------------------------------------- /tests/numpy/util.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from unittest import SkipTest 3 | 4 | 5 | def check_numpy(f): 6 | @wraps(f) 7 | def wrapper(*args, **kwargs): 8 | try: 9 | return f(*args, **kwargs) 10 | except RuntimeError as e: 11 | if 'NumPy' in str(e): 12 | raise SkipTest('Numpy package is not installed') 13 | 14 | return wrapper 15 | -------------------------------------------------------------------------------- /tests/test_buffered_reader.py: -------------------------------------------------------------------------------- 1 | import socket 2 | from unittest import TestCase, mock 3 | 4 | from proton_driver.bufferedreader import BufferedSocketReader 5 | 6 | 7 | class BufferedReaderTestCase(TestCase): 8 | def test_overflow_signed_int_string_size(self): 9 | data = b'\xFF\xFE\xFC\xFE\xFE\xFE\xFE\xFE\x29\x80\x40\x00\x00\x01' 10 | 11 | def recv_into(buf): 12 | size = len(data) 13 | buf[0:size] = data 14 | return size 15 | 16 | with mock.patch('socket.socket') as mock_socket: 17 | mock_socket.return_value.recv_into.side_effect = recv_into 18 | reader = BufferedSocketReader(socket.socket(), 1024) 19 | 20 | # Trying to allocate huge amount of memory. 21 | with self.assertRaises(MemoryError): 22 | reader.read_strings(5, encoding='utf-8') 23 | -------------------------------------------------------------------------------- /tests/test_compression.py: -------------------------------------------------------------------------------- 1 | from datetime import date, datetime 2 | from unittest import TestCase 3 | 4 | from proton_driver import errors 5 | from proton_driver.client import Client 6 | from proton_driver.compression import get_compressor_cls 7 | from proton_driver.compression.lz4 import Compressor 8 | from .testcase import BaseTestCase, file_config 9 | 10 | 11 | class BaseCompressionTestCase(BaseTestCase): 12 | compression = False 13 | supported_compressions = file_config.get('db', 'compression').split(',') 14 | 15 | def _create_client(self): 16 | settings = None 17 | if self.compression: 18 | # Set server compression method explicitly 19 | # By default server sends blocks compressed by LZ4. 20 | method = self.compression 21 | if self.server_version > (19, ): 22 | method = method.upper() 23 | settings = {'network_compression_method': method} 24 | 25 | return Client( 26 | self.host, self.port, self.database, self.user, self.password, 27 | compression=self.compression, settings=settings 28 | ) 29 | 30 | def setUp(self): 31 | super(BaseCompressionTestCase, self).setUp() 32 | supported = ( 33 | self.compression is False or 34 | self.compression in self.supported_compressions 35 | ) 36 | 37 | if not supported: 38 | self.skipTest( 39 | 'Compression {} is not supported'.format(self.compression) 40 | ) 41 | 42 | def run_simple(self): 43 | with self.create_stream('a Date, b DateTime'): 44 | data = [(date(2012, 10, 25), datetime(2012, 10, 25, 14, 7, 19))] 45 | self.client.execute( 46 | 'INSERT INTO test (a, b) VALUES', data 47 | ) 48 | 49 | query = 'SELECT * FROM test' 50 | inserted = self.emit_cli(query) 51 | self.assertEqual(inserted, '2012-10-25\t2012-10-25 14:07:19\n') 52 | 53 | inserted = self.client.execute(query) 54 | self.assertEqual(inserted, data) 55 | 56 | def test(self): 57 | if self.compression is False: 58 | return 59 | 60 | self.run_simple() 61 | 62 | 63 | class LZ4ReadWriteTestCase(BaseCompressionTestCase): 64 | compression = 'lz4' 65 | 66 | 67 | class LZ4HCReadWriteTestCase(BaseCompressionTestCase): 68 | compression = 'lz4hc' 69 | 70 | 71 | class ZSTDReadWriteTestCase(BaseCompressionTestCase): 72 | compression = 'zstd' 73 | 74 | 75 | class MiscCompressionTestCase(TestCase): 76 | def test_default_compression(self): 77 | client = Client('localhost', compression=True) 78 | self.assertEqual(client.connection.compressor_cls, Compressor) 79 | 80 | def test_unknown_compressor(self): 81 | with self.assertRaises(errors.UnknownCompressionMethod) as e: 82 | get_compressor_cls('hello') 83 | 84 | self.assertEqual( 85 | e.exception.code, errors.ErrorCodes.UNKNOWN_COMPRESSION_METHOD 86 | ) 87 | 88 | 89 | class ReadByBlocksTestCase(BaseCompressionTestCase): 90 | compression = 'lz4' 91 | 92 | def test(self): 93 | with self.create_stream('a int32'): 94 | data = [(x % 200, ) for x in range(1000000)] 95 | 96 | self.client.execute( 97 | 'INSERT INTO test (a) VALUES', data 98 | ) 99 | 100 | query = 'SELECT * FROM test' 101 | 102 | inserted = self.client.execute(query) 103 | self.assertEqual(inserted, data) 104 | -------------------------------------------------------------------------------- /tests/test_errors.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import proton_driver.errors as err 4 | 5 | 6 | def picklable(o): 7 | picked = pickle.loads(pickle.dumps(o)) 8 | assert repr(o) == repr(picked) 9 | assert str(o) == str(picked) 10 | 11 | 12 | def test_exception_picklable(): 13 | picklable(err.Error('foo')) 14 | picklable(err.Error(message='foo')) 15 | 16 | picklable(err.ServerException('foo', 0, Exception())) 17 | picklable(err.ServerException(message='foo', code=0, nested=Exception())) 18 | -------------------------------------------------------------------------------- /tests/test_external_tables.py: -------------------------------------------------------------------------------- 1 | 2 | from tests.testcase import BaseTestCase 3 | 4 | 5 | class ExternalTablesTestCase(BaseTestCase): 6 | def test_select(self): 7 | tables = [{ 8 | 'name': 'test', 9 | 'structure': [('x', 'int32'), ('y', 'array(int32)')], 10 | 'data': [ 11 | {'x': 100, 'y': [2, 4, 6, 8]}, 12 | {'x': 500, 'y': [1, 3, 5, 7]}, 13 | ] 14 | }] 15 | rv = self.client.execute('SELECT * FROM test', external_tables=tables) 16 | self.assertEqual(rv, [(100, [2, 4, 6, 8]), (500, [1, 3, 5, 7])]) 17 | 18 | def test_send_empty_table(self): 19 | tables = [{ 20 | 'name': 'test', 21 | 'structure': [('x', 'int32')], 22 | 'data': [] 23 | }] 24 | rv = self.client.execute('SELECT * FROM test', external_tables=tables) 25 | self.assertEqual(rv, []) 26 | 27 | def test_send_empty_table_structure(self): 28 | tables = [{ 29 | 'name': 'test', 30 | 'structure': [], 31 | 'data': [] 32 | }] 33 | with self.assertRaises(ValueError) as e: 34 | self.client.execute('SELECT * FROM test', external_tables=tables) 35 | 36 | self.assertIn('Empty table "test" structure', str(e.exception)) 37 | -------------------------------------------------------------------------------- /tests/test_opentelemetry.py: -------------------------------------------------------------------------------- 1 | from tests.testcase import BaseTestCase 2 | from tests.util import capture_logging 3 | 4 | 5 | class OpenTelemetryTestCase(BaseTestCase): 6 | required_server_version = (20, 11, 2) 7 | 8 | def test_server_logs(self): 9 | tracestate = 'tracestate' 10 | traceparent = '00-1af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01' 11 | 12 | settings = { 13 | 'opentelemetry_tracestate': tracestate, 14 | 'opentelemetry_traceparent': traceparent 15 | 16 | } 17 | with self.created_client(settings=settings) as client: 18 | with capture_logging('proton_driver.log', 'INFO') as buffer: 19 | settings = {'send_logs_level': 'trace'} 20 | query = 'SELECT 1' 21 | client.execute(query, settings=settings) 22 | value = buffer.getvalue() 23 | self.assertIn('OpenTelemetry', value) 24 | 25 | # ClickHouse 22.2+ use big-endian: 26 | # https://github.com/ClickHouse/ClickHouse/pull/33723 27 | if self.server_version >= (22, 2): 28 | tp = '8448eb211c80319c1af7651916cd43dd' 29 | else: 30 | tp = '1af7651916cd43dd8448eb211c80319c' 31 | self.assertIn(tp, value) 32 | 33 | def test_no_tracestate(self): 34 | traceparent = '00-1af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01' 35 | 36 | settings = { 37 | 'opentelemetry_traceparent': traceparent 38 | 39 | } 40 | with self.created_client(settings=settings) as client: 41 | with capture_logging('proton_driver.log', 'INFO') as buffer: 42 | settings = {'send_logs_level': 'trace'} 43 | query = 'SELECT 1' 44 | client.execute(query, settings=settings) 45 | value = buffer.getvalue() 46 | self.assertIn('OpenTelemetry', value) 47 | # ClickHouse 22.2+ use big-endian: 48 | # https://github.com/ClickHouse/ClickHouse/pull/33723 49 | if self.server_version >= (22, 2): 50 | tp = '8448eb211c80319c1af7651916cd43dd' 51 | else: 52 | tp = '1af7651916cd43dd8448eb211c80319c' 53 | self.assertIn(tp, value) 54 | 55 | def test_bad_traceparent(self): 56 | settings = {'opentelemetry_traceparent': 'bad'} 57 | with self.created_client(settings=settings) as client: 58 | with self.assertRaises(ValueError) as e: 59 | client.execute('SELECT 1') 60 | 61 | self.assertEqual( 62 | str(e.exception), 63 | 'unexpected length 3, expected 55' 64 | ) 65 | 66 | traceparent = '00-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-yyyyyyyyyyyyyyyy-01' 67 | settings = {'opentelemetry_traceparent': traceparent} 68 | with self.created_client(settings=settings) as client: 69 | with self.assertRaises(ValueError) as e: 70 | client.execute('SELECT 1') 71 | 72 | self.assertEqual( 73 | str(e.exception), 74 | 'Malformed traceparant header: {}'.format(traceparent) 75 | ) 76 | 77 | def test_bad_traceparent_version(self): 78 | settings = { 79 | 'opentelemetry_traceparent': 80 | '01-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01' 81 | } 82 | with self.created_client(settings=settings) as client: 83 | with self.assertRaises(ValueError) as e: 84 | client.execute('SELECT 1') 85 | 86 | self.assertEqual( 87 | str(e.exception), 88 | 'unexpected version 01, expected 00' 89 | ) 90 | -------------------------------------------------------------------------------- /tests/test_varint.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | from unittest import TestCase 3 | 4 | from proton_driver.varint import read_varint, write_varint 5 | 6 | 7 | class VarIntTestCase(TestCase): 8 | def test_check_not_negative(self): 9 | n = 0x9FFFFFFF 10 | 11 | buf = BytesIO() 12 | write_varint(n, buf) 13 | val = buf.getvalue() 14 | self.assertEqual(b'\xFF\xFF\xFF\xFF\t', val) 15 | 16 | buf = BytesIO(val) 17 | buf.read_one = lambda: ord(buf.read(1)) 18 | m = read_varint(buf) 19 | self.assertEqual(m, n) 20 | -------------------------------------------------------------------------------- /tests/testcase.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | from contextlib import contextmanager 3 | import subprocess 4 | from unittest import TestCase 5 | 6 | from proton_driver.client import Client 7 | from tests import log 8 | from tests.util import skip_by_server_version 9 | 10 | 11 | file_config = configparser.ConfigParser() 12 | file_config.read(['setup.cfg']) 13 | 14 | 15 | log.configure(file_config.get('log', 'level')) 16 | 17 | 18 | class BaseTestCase(TestCase): 19 | required_server_version = None 20 | server_version = None 21 | 22 | proton_client_binary = file_config.get('db', 'client') 23 | host = file_config.get('db', 'host') 24 | port = file_config.getint('db', 'port') 25 | database = file_config.get('db', 'database') 26 | user = file_config.get('db', 'user') 27 | password = file_config.get('db', 'password') 28 | 29 | client = None 30 | client_kwargs = None 31 | cli_client_kwargs = None 32 | 33 | @classmethod 34 | def emit_cli(cls, statement, database=None, encoding='utf-8', **kwargs): 35 | if database is None: 36 | database = cls.database 37 | 38 | args = [ 39 | cls.proton_client_binary, 40 | '--database', database, 41 | '--host', cls.host, 42 | '--port', str(cls.port), 43 | '--query', str(statement) 44 | ] 45 | 46 | for key, value in kwargs.items(): 47 | args.extend(['--' + key, str(value)]) 48 | 49 | process = subprocess.Popen( 50 | args, stdout=subprocess.PIPE, stderr=subprocess.PIPE 51 | ) 52 | output = process.communicate() 53 | out, err = output 54 | 55 | if err: 56 | raise RuntimeError( 57 | 'Error during communication. {}'.format(err) 58 | ) 59 | 60 | return out.decode(encoding) 61 | 62 | def _create_client(self, **kwargs): 63 | client_kwargs = { 64 | 'port': self.port, 65 | 'database': self.database, 66 | 'user': self.user, 67 | 'password': self.password 68 | } 69 | client_kwargs.update(kwargs) 70 | return Client(self.host, **client_kwargs) 71 | 72 | def created_client(self, **kwargs): 73 | return self._create_client(**kwargs) 74 | 75 | @classmethod 76 | def setUpClass(cls): 77 | version_str = cls.emit_cli('SELECT version()').strip() 78 | cls.server_version = tuple(int(x) for x in version_str.split('.')) 79 | 80 | super(BaseTestCase, cls).setUpClass() 81 | 82 | def setUp(self): 83 | super(BaseTestCase, self).setUp() 84 | 85 | required = self.required_server_version 86 | 87 | if required and required > self.server_version: 88 | skip_by_server_version(self, self.required_server_version) 89 | 90 | if callable(self.client_kwargs): 91 | client_kwargs = self.client_kwargs(self.server_version) 92 | else: 93 | client_kwargs = self.client_kwargs 94 | client_kwargs = client_kwargs or {} 95 | self.client = self._create_client(**client_kwargs) 96 | 97 | def tearDown(self): 98 | self.client.disconnect() 99 | super(BaseTestCase, self).tearDown() 100 | 101 | @contextmanager 102 | def create_stream(self, columns, **kwargs): 103 | if self.cli_client_kwargs: 104 | if callable(self.cli_client_kwargs): 105 | cli_client_kwargs = self.cli_client_kwargs() 106 | if cli_client_kwargs: 107 | kwargs.update(cli_client_kwargs) 108 | else: 109 | kwargs.update(self.cli_client_kwargs) 110 | 111 | self.emit_cli( 112 | 'CREATE STREAM test ({}) ''ENGINE = Memory'.format(columns), 113 | **kwargs 114 | ) 115 | try: 116 | yield 117 | except Exception: 118 | raise 119 | finally: 120 | self.emit_cli('DROP STREAM test') 121 | -------------------------------------------------------------------------------- /tests/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | from contextlib import contextmanager 3 | from functools import wraps 4 | import logging 5 | from io import StringIO 6 | from time import tzset 7 | from unittest.mock import patch 8 | 9 | import tzlocal 10 | 11 | 12 | def skip_by_server_version(testcase, version_required): 13 | testcase.skipTest( 14 | 'Mininum revision required: {}'.format( 15 | '.'.join(str(x) for x in version_required) 16 | ) 17 | ) 18 | 19 | 20 | def require_server_version(*version_required): 21 | def check(f): 22 | @wraps(f) 23 | def wrapper(*args, **kwargs): 24 | self = args[0] 25 | self.client.connection.connect() 26 | 27 | current = self.client.connection.server_info.version_tuple() 28 | 29 | if version_required <= current: 30 | return f(*args, **kwargs) 31 | else: 32 | skip_by_server_version(self, version_required) 33 | 34 | return wrapper 35 | return check 36 | 37 | 38 | class LoggingCapturer(object): 39 | def __init__(self, logger_name, level): 40 | self.old_stdout_handlers = [] 41 | self.logger = logging.getLogger(logger_name) 42 | self.level = level 43 | super(LoggingCapturer, self).__init__() 44 | 45 | def __enter__(self): 46 | buffer = StringIO() 47 | 48 | self.new_handler = logging.StreamHandler(buffer) 49 | self.logger.addHandler(self.new_handler) 50 | self.old_logger_level = self.logger.level 51 | self.logger.setLevel(self.level) 52 | 53 | return buffer 54 | 55 | def __exit__(self, *exc_info): 56 | self.logger.setLevel(self.old_logger_level) 57 | self.logger.removeHandler(self.new_handler) 58 | 59 | 60 | capture_logging = LoggingCapturer 61 | 62 | 63 | def bust_tzlocal_cache(): 64 | try: 65 | tzlocal.unix._cache_tz = None 66 | tzlocal.unix._cache_tz_name = None 67 | except AttributeError: 68 | pass 69 | 70 | try: 71 | tzlocal.win32._cache_tz = None 72 | tzlocal.unix._cache_tz_name = None 73 | except AttributeError: 74 | pass 75 | 76 | 77 | @contextmanager 78 | def patch_env_tz(tz_name): 79 | bust_tzlocal_cache() 80 | 81 | # Although in many cases, changing the TZ environment variable may 82 | # affect the output of functions like localtime() without calling 83 | # tzset(), this behavior should not be relied on. 84 | # https://docs.python.org/3/library/time.html#time.tzset 85 | with patch.dict(os.environ, {'TZ': tz_name}): 86 | tzset() 87 | yield 88 | 89 | tzset() 90 | -------------------------------------------------------------------------------- /testsrequire.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | USE_NUMPY = bool(int(os.getenv('USE_NUMPY', '0'))) 5 | 6 | tests_require = [ 7 | 'pytest', 8 | 'parameterized', 9 | 'freezegun', 10 | 'zstd', 11 | 'clickhouse-cityhash>=1.0.2.1' 12 | ] 13 | 14 | if sys.implementation.name == 'pypy': 15 | tests_require.append('lz4<=3.0.1') 16 | else: 17 | tests_require.append('lz4') 18 | 19 | if USE_NUMPY: 20 | tests_require.extend(['numpy', 'pandas']) 21 | 22 | try: 23 | from pip import main as pipmain 24 | except ImportError: 25 | from pip._internal import main as pipmain 26 | 27 | pipmain(['install'] + tests_require) 28 | -------------------------------------------------------------------------------- /valgrind.supp: -------------------------------------------------------------------------------- 1 | { 2 | 3 | # See https://bugs.python.org/issue42176 4 | Memcheck:Cond 5 | fun:PyUnicode_Decode 6 | } 7 | --------------------------------------------------------------------------------