├── .coveragerc
├── .flake8
├── .github
    ├── ISSUE_TEMPLATE
    │   └── bug_report.md
    └── workflows
    │   ├── actions.yml
    │   └── test.yml
├── .gitignore
├── CHANGELOG.md
├── CONTRIBUTING.rst
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.rst
├── docs
    ├── Makefile
    ├── _static
    │   ├── css
    │   │   └── custom.css
    │   └── mymarilyn-icon.png
    ├── _templates
    │   └── layout.html
    ├── api.rst
    ├── changelog.rst
    ├── conf.py
    ├── contents.rst.inc
    ├── contributing.rst
    ├── dbapi.rst
    ├── development.rst
    ├── features.rst
    ├── index.rst
    ├── installation.rst
    ├── license.rst
    ├── misc.rst
    ├── performance.rst
    ├── quickstart.rst
    ├── types.rst
    └── unsupportedserverversions.rst
├── example
    ├── bytewax
    │   ├── .dockerignore
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── compose.yaml
    │   ├── grafana_provisioning
    │   │   ├── dashboards
    │   │   │   ├── grafana_dashboard.json
    │   │   │   └── main.yaml
    │   │   └── datasources
    │   │   │   └── automatic.yml
    │   ├── hackernews.py
    │   ├── init_sql
    │   │   └── init.sql
    │   ├── proton.py
    │   └── requirements.txt
    ├── descriptive_pipeline
    │   ├── Dockerfile
    │   ├── Makefile
    │   ├── README.md
    │   ├── config.yaml
    │   ├── requirements.txt
    │   ├── server
    │   │   ├── main.py
    │   │   └── utils
    │   │   │   └── logging.py
    │   └── test
    │   │   ├── script.js
    │   │   └── script_ws.js
    ├── idempotent
    │   └── idempotent.py
    ├── pandas
    │   └── dataframe.py
    └── streaming_query
    │   ├── car.py
    │   └── server monitor.py
├── proton_driver
    ├── __init__.py
    ├── block.py
    ├── blockstreamprofileinfo.py
    ├── bufferedreader.c
    ├── bufferedreader.pyx
    ├── bufferedwriter.c
    ├── bufferedwriter.pyx
    ├── client.py
    ├── clientinfo.py
    ├── columns
    │   ├── __init__.py
    │   ├── arraycolumn.py
    │   ├── base.py
    │   ├── boolcolumn.py
    │   ├── datecolumn.py
    │   ├── datetimecolumn.py
    │   ├── decimalcolumn.py
    │   ├── enumcolumn.py
    │   ├── exceptions.py
    │   ├── floatcolumn.py
    │   ├── intcolumn.py
    │   ├── intervalcolumn.py
    │   ├── ipcolumn.py
    │   ├── jsoncolumn.py
    │   ├── largeint.c
    │   ├── largeint.pyx
    │   ├── lowcardinalitycolumn.py
    │   ├── mapcolumn.py
    │   ├── nestedcolumn.py
    │   ├── nothingcolumn.py
    │   ├── nullablecolumn.py
    │   ├── nullcolumn.py
    │   ├── numpy
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── datecolumn.py
    │   │   ├── datetimecolumn.py
    │   │   ├── floatcolumn.py
    │   │   ├── intcolumn.py
    │   │   ├── lowcardinalitycolumn.py
    │   │   ├── service.py
    │   │   └── stringcolumn.py
    │   ├── service.py
    │   ├── simpleaggregatefunctioncolumn.py
    │   ├── stringcolumn.py
    │   ├── tuplecolumn.py
    │   ├── util.py
    │   └── uuidcolumn.py
    ├── compression
    │   ├── __init__.py
    │   ├── base.py
    │   ├── lz4.py
    │   ├── lz4hc.py
    │   └── zstd.py
    ├── connection.py
    ├── context.py
    ├── dbapi
    │   ├── __init__.py
    │   ├── connection.py
    │   ├── cursor.py
    │   ├── errors.py
    │   └── extras.py
    ├── defines.py
    ├── errors.py
    ├── log.py
    ├── numpy
    │   ├── __init__.py
    │   ├── block.py
    │   ├── helpers.py
    │   └── result.py
    ├── opentelemetry.py
    ├── progress.py
    ├── protocol.py
    ├── queryprocessingstage.py
    ├── reader.py
    ├── readhelpers.py
    ├── result.py
    ├── settings
    │   ├── __init__.py
    │   ├── available.py
    │   ├── types.py
    │   └── writer.py
    ├── streams
    │   ├── __init__.py
    │   ├── compressed.py
    │   └── native.py
    ├── util
    │   ├── __init__.py
    │   ├── compat.py
    │   ├── escape.py
    │   └── helpers.py
    ├── varint.c
    ├── varint.pyx
    └── writer.py
├── pyproject.toml
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    ├── columns
    │   ├── __init__.py
    │   ├── test_array.py
    │   ├── test_bool.py
    │   ├── test_common.py
    │   ├── test_date.py
    │   ├── test_datetime.py
    │   ├── test_decimal.py
    │   ├── test_enum.py
    │   ├── test_fixedstring.py
    │   ├── test_float.py
    │   ├── test_int.py
    │   ├── test_interval.py
    │   ├── test_ip.py
    │   ├── test_json.py
    │   ├── test_low_cardinality.py
    │   ├── test_map.py
    │   ├── test_nested.py
    │   ├── test_null.py
    │   ├── test_nullable.py
    │   ├── test_simpleaggregatefunction.py
    │   ├── test_string.py
    │   ├── test_tuple.py
    │   ├── test_unknown.py
    │   └── test_uuid.py
    ├── conftest.py
    ├── docker-compose.yml
    ├── log.py
    ├── numpy
    │   ├── __init__.py
    │   ├── columns
    │   │   ├── __init__.py
    │   │   ├── test_datetime.py
    │   │   ├── test_float.py
    │   │   ├── test_int.py
    │   │   ├── test_low_cardinality.py
    │   │   ├── test_nullable.py
    │   │   ├── test_other.py
    │   │   └── test_string.py
    │   ├── test_external_tables.py
    │   ├── test_generic.py
    │   ├── testcase.py
    │   └── util.py
    ├── test_blocks.py
    ├── test_buffered_reader.py
    ├── test_client.py
    ├── test_compression.py
    ├── test_connect.py
    ├── test_dbapi.py
    ├── test_errors.py
    ├── test_external_tables.py
    ├── test_insert.py
    ├── test_opentelemetry.py
    ├── test_query_info.py
    ├── test_settings.py
    ├── test_substitution.py
    ├── test_varint.py
    ├── testcase.py
    └── util.py
├── testsrequire.py
└── valgrind.supp


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | plugins = Cython.Coverage
3 | source = proton_driver
4 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | filename = *.py, *.pyx
 3 | per-file-ignores =
 4 |     proton_driver/columns/largeint.pyx: E225, E226, E227, E999
 5 |     proton_driver/bufferedreader.pyx: E225, E226, E227, E999
 6 |     proton_driver/bufferedwriter.pyx: E225, E226, E227, E999
 7 |     proton_driver/varint.pyx: E225, E226, E227, E999
 8 |     # ignore example print warning.
 9 |     example/*: T201, T001
10 | exclude = venv,.conda,build
11 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Minimal piece of Python code that reproduces the problem.
15 | 
16 | **Expected behavior**
17 | A clear and concise description of what you expected to happen.
18 | 
19 | **Versions**
20 | 
21 | - Version of package with the problem.
22 | - Proton server version. Version can be obtained by running `SELECT version()` query.
23 | - Python version.
24 | 


--------------------------------------------------------------------------------
/.github/workflows/actions.yml:
--------------------------------------------------------------------------------
 1 | on: 
 2 |   - workflow_dispatch
 3 | name: release
 4 | jobs:
 5 |   build_wheels:
 6 |     name: Build wheels on ${{ matrix.os }}
 7 |     runs-on: ${{ matrix.os }}
 8 |     strategy:
 9 |       matrix:
10 |         os: [ ubuntu-20.04 , windows-2019, macos-12 ]
11 |     steps:
12 |       - uses: actions/checkout@v4
13 |       - name: Get proton-python-driver tag
14 |         id: get_tag_name
15 |         if: ${{ !startsWith( matrix.os, 'windows' ) }}
16 |         run: |
17 |           VERSION=`grep '^VERSION' proton_driver/__init__.py \
18 |           | sed 's/^VERSION = (//g' \
19 |           | sed 's/).*//g' \
20 |           | sed 's/, /./g'`
21 |           VERSION=v$VERSION
22 |           echo $VERSION
23 |           echo "tag_name=$VERSION" >> $GITHUB_OUTPUT
24 |       - name: Get proton-python-driver tag(windows)
25 |         if: ${{ startsWith( matrix.os, 'windows' ) }}
26 |         id: get_tag_name_win
27 |         shell: pwsh
28 |         run: |
29 |           $VERSION=((Get-Content proton_driver/__init__.py | Select-String -Pattern '^VERSION') -replace "^VERSION = \((\d+), (\d+), (\d+)\)","v`$1.`$2.`$3")
30 |           Write-Output $VERSION
31 |           Write-Output "tag_name=$VERSION" >> $env:GITHUB_OUTPUT
32 |       - name: Set up QEMU
33 |         uses: docker/setup-qemu-action@v3
34 |         if: ${{ startsWith( matrix.os, 'ubuntu' ) }}
35 |         with:
36 |           image: tonistiigi/binfmt:latest
37 |           platforms: all
38 |       - name: Build wheels
39 |         uses: pypa/cibuildwheel@v2.21.3
40 |         with:
41 |           package-dir: .
42 |           output-dir: wheelhouse
43 |           config-file: pyproject.toml
44 |       - name: Release wheels
45 |         uses: softprops/action-gh-release@v2
46 |         with:
47 |           files: wheelhouse/*.whl
48 |           generate_release_notes: true
49 |           tag_name: ${{ join(steps.*.outputs.tag_name, '') }}
50 |         env:
51 |           GITHUB_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }}
52 |       - name: Store the distribution packages
53 |         uses: actions/upload-artifact@v3
54 |         with:
55 |           name: python-package-distributions
56 |           path: wheelhouse/*.whl
57 |   
58 |   publish-to-pypi:
59 |     name: Publish Python distribution to PyPI
60 |     needs:
61 |       - build_wheels
62 |     runs-on: ubuntu-latest
63 |     environment:
64 |       name: pypi
65 |       url: https://pypi.org/p/proton-driver
66 |     permissions:
67 |       id-token: write  # IMPORTANT: mandatory for trusted publishing
68 |     steps:
69 |     - name: Download all the dists
70 |       uses: actions/download-artifact@v3
71 |       with:
72 |         name: python-package-distributions
73 |         path: dist/
74 |     - name: Publish distribution to PyPI
75 |       uses: pypa/gh-action-pypi-publish@release/v1
76 |       with:
77 |         username: __token__
78 |         password: ${{ secrets.PYPI_TOKEN }}
79 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .venv
 2 | 
 3 | # Byte-compiled / optimized / DLL files
 4 | __pycache__/
 5 | *.py[cod]
 6 | *$py.class
 7 | 
 8 | # C extensions
 9 | *.so
10 | 
11 | # Distribution / packaging
12 | .Python
13 | env/
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | 
29 | # PyInstaller
30 | #  Usually these files are written by a python script from a template
31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 | 
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 | 
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *,cover
48 | .hypothesis/
49 | 
50 | # Translations
51 | *.mo
52 | *.pot
53 | 
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | 
58 | # Flask stuff:
59 | instance/
60 | .webassets-cache
61 | 
62 | # Scrapy stuff:
63 | .scrapy
64 | 
65 | # Sphinx documentation
66 | docs/_build/
67 | 
68 | # PyBuilder
69 | target/
70 | 
71 | # IPython Notebook
72 | .ipynb_checkpoints
73 | 
74 | # pyenv
75 | .python-version
76 | 
77 | # celery beat schedule file
78 | celerybeat-schedule
79 | 
80 | # dotenv
81 | .env
82 | 
83 | # virtualenv
84 | venv/
85 | ENV/
86 | 
87 | # Spyder project settings
88 | .spyderproject
89 | 
90 | # Rope project settings
91 | .ropeproject
92 | 
93 | # PyCharm project settings
94 | .idea/
95 | .pypirc
96 | .vscode/
97 | .DS_Store
98 | wheelhouse/
99 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
1 | How to Contribute
2 | =================
3 | 
4 | #. Check for open issues or open a fresh issue to start a discussion around a feature idea or a bug.
5 | #. Fork `the repository <https://github.com/mymarilyn/clickhouse-driver>`_ on GitHub to start making your changes to the **master** branch (or branch off of it).
6 | #. Write a test which shows that the bug was fixed or that the feature works as expected.
7 | #. Send a pull request and bug the maintainer until it gets merged and published.
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is the MIT license: http://www.opensource.org/licenses/mit-license.php
 2 | 
 3 | Copyright (c) 2017 by Konstantin Lebedev.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include proton_driver *.pyx
2 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | build:
3 | 	python3 -m pip install --upgrade build
4 | 	python3 -m build
5 | 
6 | test:
7 | 	pip install ./dist/timeplus-proton-driver-0.2.7.tar.gz


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = .
 8 | BUILDDIR      = _build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
 1 | 
 2 | table.table-small-text {
 3 |     font-size: small;
 4 | }
 5 | 
 6 | 
 7 | table.table-center-header thead tr th {
 8 |     text-align: center;
 9 | }
10 | 
11 | 
12 | table.table-right-text-align-results tbody tr td {
13 |     text-align: right;
14 | }
15 | 
16 | table.table-right-text-align-results tbody tr td:first-child {
17 |     text-align: inherit;
18 | }
19 | 


--------------------------------------------------------------------------------
/docs/_static/mymarilyn-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/docs/_static/mymarilyn-icon.png


--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 | {% block extrahead %}
3 |     <link href="{{ pathto("_static/css/custom.css", True) }}" rel="stylesheet" type="text/css">
4 | {% endblock %}
5 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | .. _api:
 2 | 
 3 | API
 4 | ===
 5 | 
 6 | .. module:: clickhouse_driver
 7 | 
 8 | This part of the documentation covers basic classes of the driver: Client, Connection and others.
 9 | 
10 | 
11 | .. _api-client:
12 | 
13 | Client
14 | ------
15 | 
16 | .. autoclass:: Client
17 |    :members:
18 |    :inherited-members:
19 | 
20 | 
21 | Connection
22 | ----------
23 | 
24 | .. autoclass:: clickhouse_driver.connection.Connection
25 |    :members:
26 |    :inherited-members:
27 | 
28 | 
29 | .. _query-result:
30 | 
31 | QueryResult
32 | -----------
33 | 
34 | .. autoclass:: clickhouse_driver.result.QueryResult
35 |    :members:
36 |    :inherited-members:
37 | 
38 | 
39 | .. _progress-query-result:
40 | 
41 | ProgressQueryResult
42 | -------------------
43 | 
44 | .. autoclass:: clickhouse_driver.result.ProgressQueryResult
45 |    :members:
46 |    :inherited-members:
47 | 
48 | 
49 | .. _iter-query-result:
50 | 
51 | IterQueryResult
52 | ---------------
53 | 
54 | .. autoclass:: clickhouse_driver.result.IterQueryResult
55 |    :members:
56 |    :inherited-members:
57 | 


--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
1 | Changelog
2 | =========
3 | 
4 | Changelog is available in `github repo <https://github.com/mymarilyn/clickhouse-driver/blob/master/CHANGELOG.md>`_.
5 | 


--------------------------------------------------------------------------------
/docs/contents.rst.inc:
--------------------------------------------------------------------------------
 1 | User's Guide
 2 | ------------
 3 | 
 4 | This part of the documentation focuses on step-by-step instructions for development with clickhouse-driver.
 5 | 
 6 | Clickhouse-driver is designed to communicate with ClickHouse server from Python over native protocol.
 7 | 
 8 | ClickHouse server provides two protocols for communication:
 9 | 
10 | * HTTP protocol (port 8123 by default);
11 | * Native (TCP) protocol (port 9000 by default).
12 | 
13 | Each protocol has own advantages and disadvantages. Here we focus on advantages of native protocol:
14 | 
15 | * Native protocol is more configurable by various settings.
16 | * Binary data transfer is more compact than text data.
17 | * Building python types from binary data is more effective than from text data.
18 | * LZ4 compression is `faster than gzip <https://catchchallenger.first-world.info/wiki/Quick_Benchmark:_Gzip_vs_Bzip2_vs_LZMA_vs_XZ_vs_LZ4_vs_LZO#Compression_time>`_.
19 |   Gzip compression is used in HTTP protocol.
20 | * Query profile info is available over native protocol. We can read rows before limit metric for example.
21 | 
22 | Once again: clickhouse-driver uses native protocol (port 9000).
23 | 
24 | There is an asynchronous wrapper for clickhouse-driver: aioch. It's available `here <https://github.com/mymarilyn/aioch>`_.
25 | 
26 | .. toctree::
27 |    :maxdepth: 2
28 | 
29 |    installation
30 |    quickstart
31 |    features
32 |    types
33 |    performance
34 |    misc
35 |    unsupportedserverversions
36 | 
37 | API Reference
38 | -------------
39 | 
40 | If you are looking for information on a specific function, class or
41 | method, this part of the documentation is for you.
42 | 
43 | .. toctree::
44 |    :maxdepth: 2
45 | 
46 |    api
47 |    dbapi
48 | 
49 | Additional Notes
50 | ----------------
51 | 
52 | Legal information, changelog and contributing are here for the interested.
53 | 
54 | .. toctree::
55 |    :maxdepth: 2
56 | 
57 |    development
58 |    changelog
59 |    license
60 |    contributing
61 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CONTRIBUTING.rst
2 | 


--------------------------------------------------------------------------------
/docs/dbapi.rst:
--------------------------------------------------------------------------------
 1 | .. _dbapi:
 2 | 
 3 | DB API 2.0
 4 | ==========
 5 | 
 6 | This part of the documentation covers driver DB API.
 7 | 
 8 | .. automodule:: clickhouse_driver.dbapi
 9 |    :members:
10 |    :inherited-members:
11 | 
12 | .. _dbapi-connection:
13 | 
14 | Connection
15 | ----------
16 | 
17 | .. autoclass:: clickhouse_driver.dbapi.connection.Connection
18 |    :members:
19 |    :inherited-members:
20 | 
21 | 
22 | .. _dbapi-cursor:
23 | 
24 | Cursor
25 | ------
26 | 
27 | .. autoclass:: clickhouse_driver.dbapi.cursor.Cursor
28 |    :members:
29 |    :inherited-members:
30 | 
31 | Extras
32 | ------
33 | 
34 | .. _dbapi-extras:
35 | 
36 | .. autoclass:: clickhouse_driver.dbapi.extras.DictCursor
37 | 
38 | .. autoclass:: clickhouse_driver.dbapi.extras.NamedTupleCursor
39 | 


--------------------------------------------------------------------------------
/docs/development.rst:
--------------------------------------------------------------------------------
 1 | .. _development:
 2 | 
 3 | Development
 4 | ===========
 5 | 
 6 | Test configuration
 7 | ------------------
 8 | 
 9 | In ``setup.cfg`` you can find ClickHouse server port, credentials, logging
10 | level and another options than can be tuned during local testing.
11 | 
12 | Running tests locally
13 | ---------------------
14 | 
15 | Install desired Python version with system package manager/pyenv/another manager.
16 | 
17 | Install test requirements and build package:
18 | 
19 |     .. code-block:: bash
20 | 
21 |         python testsrequire.py && python setup.py develop
22 | 
23 | You should install cython if you want to change ``*.pyx`` files:
24 | 
25 |     .. code-block:: bash
26 | 
27 |         pip install cython
28 | 
29 | ClickHouse on host machine
30 | ^^^^^^^^^^^^^^^^^^^^^^^^^^
31 | 
32 | Install desired versions of ``clickhouse-server`` and ``clickhouse-client`` on
33 | your machine.
34 | 
35 | Run tests:
36 | 
37 |     .. code-block:: bash
38 | 
39 |         py.test -v
40 | 
41 | ClickHouse in docker
42 | ^^^^^^^^^^^^^^^^^^^^
43 | 
44 | Create container desired version of ``clickhouse-server``:
45 | 
46 |     .. code-block:: bash
47 | 
48 |         docker run --rm -e "TZ=Europe/Moscow" -p 127.0.0.1:9000:9000 --name test-clickhouse-server yandex/clickhouse-server:$VERSION
49 | 
50 | Create container with the same version of ``clickhouse-client``:
51 | 
52 |     .. code-block:: bash
53 | 
54 |         docker run --rm --entrypoint "/bin/sh" --name test-clickhouse-client --link test-clickhouse-server:clickhouse-server yandex/clickhouse-client:$VERSION -c 'while :; do sleep 1; done'
55 | 
56 | Create ``clickhouse-client`` script on your host machine:
57 | 
58 |     .. code-block:: bash
59 | 
60 |         echo -e '#!/bin/bash\n\ndocker exec -e "`env | grep ^TZ=`" test-clickhouse-client clickhouse-client "$@"' | sudo tee /usr/local/bin/clickhouse-client > /dev/null
61 |         sudo chmod +x /usr/local/bin/clickhouse-client
62 | 
63 | After it container ``test-clickhouse-client`` will communicate with
64 | ``test-clickhouse-server`` transparently from host machine.
65 | 
66 | Set ``host=clickhouse-server`` in ``setup.cfg``.
67 | 
68 | Add entry in hosts file:
69 | 
70 |     .. code-block:: bash
71 | 
72 |         echo '127.0.0.1 clickhouse-server' | sudo tee -a /etc/hosts > /dev/null
73 | 
74 | Set ``TZ=UTC`` and run tests:
75 | 
76 |     .. code-block:: bash
77 | 
78 |         export TZ=UTC
79 |         py.test -v
80 | 
81 | GitHub Actions in forked repository
82 | -----------------------------------
83 | 
84 | Workflows in forked repositories can be used for running tests.
85 | 
86 | Workflows don't run in forked repositories by default.
87 | You must enable GitHub Actions in the **Actions** tab of the forked repository.
88 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to clickhouse-driver
 2 | ============================
 3 | 
 4 | Release |release|.
 5 | 
 6 | Welcome to clickhouse-driver's documentation. Get started with :ref:`installation`
 7 | and then get an overview with the :ref:`quickstart` where common queries are described.
 8 | 
 9 | 
10 | .. include:: contents.rst.inc
11 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
  1 | .. _installation:
  2 | 
  3 | Installation
  4 | ============
  5 | 
  6 | Python Version
  7 | --------------
  8 | 
  9 | Clickhouse-driver supports Python 3.4 and newer and PyPy.
 10 | 
 11 | Build Dependencies
 12 | ------------------
 13 | 
 14 | Starting from version *0.1.0* for building from source `gcc`, python and linux headers are required.
 15 | 
 16 | Example for `python:alpine` docker image:
 17 | 
 18 |     .. code-block:: bash
 19 | 
 20 |        apk add gcc musl-dev
 21 | 
 22 | By default there are wheels for Linux, Mac OS X and Windows.
 23 | 
 24 | Packages for Linux and Mac OS X are available for python: 3.6 -- 3.10.
 25 | 
 26 | Packages for Windows are available for python: 3.6 -- 3.10.
 27 | 
 28 | Starting from version *0.2.3* there are wheels for musl-based Linux distributions.
 29 | 
 30 | Dependencies
 31 | ------------
 32 | 
 33 | These distributions will be installed automatically when installing clickhouse-driver.
 34 | 
 35 | * `pytz`_ library for timezone calculations.
 36 | * `enum34`_ backported Python 3.4 Enum.
 37 | 
 38 | .. _pytz: http://pytz.sourceforge.net/
 39 | .. _enum34: https://pypi.org/project/enum34/
 40 | 
 41 | Optional dependencies
 42 | ~~~~~~~~~~~~~~~~~~~~~
 43 | 
 44 | These distributions will not be installed automatically. Clickhouse-driver will detect and
 45 | use them if you install them.
 46 | 
 47 | * `clickhouse-cityhash`_ provides CityHash algorithm of specific version, see :ref:`compression-cityhash-notes`.
 48 | * `lz4`_ enables `LZ4/LZ4HC compression <http://www.lz4.org/>`_ support.
 49 | * `zstd`_ enables `ZSTD compression <https://facebook.github.io/zstd/>`_ support.
 50 | 
 51 | .. _clickhouse-cityhash: https://pythonhosted.org/blinker/
 52 | .. _lz4: https://python-lz4.readthedocs.io/
 53 | .. _zstd: https://pypi.org/project/zstd/
 54 | 
 55 | 
 56 | .. _installation-pypi:
 57 | 
 58 | Installation from PyPI
 59 | ----------------------
 60 | 
 61 | The package can be installed using ``pip``:
 62 | 
 63 |     .. code-block:: bash
 64 | 
 65 |        pip install clickhouse-driver
 66 | 
 67 | You can install extras packages if you need compression support. Example of
 68 | LZ4 compression requirements installation:
 69 | 
 70 |     .. code-block:: bash
 71 | 
 72 |        pip install clickhouse-driver[lz4]
 73 | 
 74 | You also can specify multiple extras by using comma.
 75 | Install LZ4 and ZSTD requirements:
 76 | 
 77 |     .. code-block:: bash
 78 | 
 79 |        pip install clickhouse-driver[lz4,zstd]
 80 | 
 81 | 
 82 | .. _installation-numpy-support:
 83 | 
 84 | NumPy support
 85 | -------------
 86 | 
 87 | You can install additional packages (NumPy and Pandas) if you need NumPy support:
 88 | 
 89 |     .. code-block:: bash
 90 | 
 91 |        pip install clickhouse-driver[numpy]
 92 | 
 93 | NumPy supported versions are limited by ``numpy`` package python support.
 94 | 
 95 | 
 96 | Installation from github
 97 | ------------------------
 98 | 
 99 | Development version can be installed directly from github:
100 | 
101 |     .. code-block:: bash
102 | 
103 |        pip install git+https://github.com/mymarilyn/clickhouse-driver@master#egg=clickhouse-driver
104 | 


--------------------------------------------------------------------------------
/docs/license.rst:
--------------------------------------------------------------------------------
1 | License
2 | =======
3 | 
4 | ClickHouse Python Driver is distributed under the `MIT license
5 | <http://www.opensource.org/licenses/mit-license.php>`_.
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/misc.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | Miscellaneous
  3 | =============
  4 | 
  5 | Client configuring from URL
  6 | ---------------------------
  7 | 
  8 | *New in version 0.1.1.*
  9 | 
 10 | Client can be configured from the given URL:
 11 | 
 12 |     .. code-block:: python
 13 | 
 14 |         >>> from clickhouse_driver import Client
 15 |         >>> client = Client.from_url(
 16 |         ...     'clickhouse://login:password@host:port/database'
 17 |         ... )
 18 | 
 19 | Port 9000 is default for schema ``clickhouse``, port 9440 is default for schema ``clickhouses``.
 20 | 
 21 | Connection to default database:
 22 | 
 23 |     .. code-block:: python
 24 | 
 25 |         >>> client = Client.from_url('clickhouse://localhost')
 26 | 
 27 | 
 28 | Querystring arguments will be passed along to the :meth:`~clickhouse_driver.connection.Connection` class’s initializer:
 29 | 
 30 |     .. code-block:: python
 31 | 
 32 |         >>> client = Client.from_url(
 33 |         ...     'clickhouse://localhost/database?send_logs_level=trace&'
 34 |         ...     'client_name=myclient&'
 35 |         ...     'compression=lz4'
 36 |         ... )
 37 | 
 38 | If parameter doesn't match Connection's init signature will be treated as settings parameter.
 39 | 
 40 | .. _insert-from-csv-file:
 41 | 
 42 | Inserting data from CSV file
 43 | ----------------------------
 44 | 
 45 | Let's assume you have following data in CSV file.
 46 | 
 47 |     .. code-block:: shell
 48 | 
 49 |         $ cat /tmp/data.csv
 50 |         time,order,qty
 51 |         2019-08-01 15:23:14,New order1,5
 52 |         2019-08-05 09:14:45,New order2,3
 53 |         2019-08-13 12:20:32,New order3,7
 54 | 
 55 | Data can be inserted into ClickHouse in the following way:
 56 | 
 57 | 
 58 |     .. code-block:: python
 59 | 
 60 |         >>> from csv import DictReader
 61 |         >>> from datetime import datetime
 62 |         >>>
 63 |         >>> from clickhouse_driver import Client
 64 |         >>>
 65 |         >>>
 66 |         >>> def iter_csv(filename):
 67 |         ...     converters = {
 68 |         ...         'qty': int,
 69 |         ...         'time': lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
 70 |         ...     }
 71 |         ...
 72 |         ...     with open(filename, 'r') as f:
 73 |         ...         reader = DictReader(f)
 74 |         ...         for line in reader:
 75 |         ...             yield {k: (converters[k](v) if k in converters else v) for k, v in line.items()}
 76 |         ...
 77 |         >>> client = Client('localhost')
 78 |         >>>
 79 |         >>> client.execute(
 80 |         ...     'CREATE TABLE IF NOT EXISTS data_csv '
 81 |         ...     '('
 82 |         ...         'time DateTime, '
 83 |         ...         'order String, '
 84 |         ...         'qty Int32'
 85 |         ...     ') Engine = Memory'
 86 |         ... )
 87 |         >>> []
 88 |         >>> client.execute('INSERT INTO data_csv VALUES', iter_csv('/tmp/data.csv'))
 89 |         3
 90 | 
 91 | 
 92 | 
 93 | Table can be populated with json file in the similar way.
 94 | 
 95 | 
 96 | Adding missed settings
 97 | ----------------------
 98 | 
 99 | It's hard to keep package settings in consistent state with ClickHouse
100 | server's. Some settings can be missed if your server is old. But, if setting
101 | is *supported by your server* and missed in the package it can be added by
102 | simple monkey pathing. Just look into ClickHouse server source and pick
103 | corresponding setting type from package or write your own type.
104 | 
105 |     .. code-block:: python
106 | 
107 |         >>> from clickhouse_driver.settings.available import settings as available_settings, SettingBool
108 |         >>> from clickhouse_driver import Client
109 |         >>>
110 |         >>> available_settings['allow_suspicious_low_cardinality_types'] = SettingBool
111 |         >>>
112 |         >>> client = Client('localhost', settings={'allow_suspicious_low_cardinality_types': True})
113 |         >>> client.execute('CREATE TABLE test (x LowCardinality(Int32)) Engine = Null')
114 |         []
115 | 
116 | 
117 | *New in version 0.1.5.*
118 | 
119 | Modern ClickHouse servers (20.*+) use text serialization for settings instead of
120 | binary serialization. You don't have to add missed settings manually into
121 | available. Just specify new settings and it will work.
122 | 
123 |     .. code-block:: python
124 | 
125 |         >>> client = Client('localhost', settings={'brand_new_setting': 42})
126 |         >>> client.execute('SELECT 1')
127 | 


--------------------------------------------------------------------------------
/docs/unsupportedserverversions.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Unsupported server versions
 3 | ===========================
 4 | 
 5 | Following versions are not supported by this package:
 6 | 
 7 | - 20.1.*. Due to keeping alias type name to metadata.
 8 | 
 9 | However you can use these versions for your own risk.
10 | 


--------------------------------------------------------------------------------
/example/bytewax/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Include any files or directories that you don't want to be copied to your
 2 | # container here (e.g., local build artifacts, temporary files, etc.).
 3 | #
 4 | # For more help, visit the .dockerignore file reference guide at
 5 | # https://docs.docker.com/engine/reference/builder/#dockerignore-file
 6 | 
 7 | **/.DS_Store
 8 | **/__pycache__
 9 | **/.venv
10 | **/.classpath
11 | **/.dockerignore
12 | **/.env
13 | **/.git
14 | **/.gitignore
15 | **/.project
16 | **/.settings
17 | **/.toolstarget
18 | **/.vs
19 | **/.vscode
20 | **/*.*proj.user
21 | **/*.dbmdl
22 | **/*.jfm
23 | **/bin
24 | **/charts
25 | **/docker-compose*
26 | **/compose*
27 | **/Dockerfile*
28 | **/node_modules
29 | **/npm-debug.log
30 | **/obj
31 | **/secrets.dev.yaml
32 | **/values.dev.yaml
33 | LICENSE
34 | README.md
35 | 


--------------------------------------------------------------------------------
/example/bytewax/Dockerfile:
--------------------------------------------------------------------------------
 1 | # syntax=docker/dockerfile:1
 2 | 
 3 | FROM python:3.10 as base
 4 | 
 5 | # Prevents Python from writing pyc files.
 6 | ENV PYTHONDONTWRITEBYTECODE=1
 7 | 
 8 | # Keeps Python from buffering stdout and stderr to avoid situations where
 9 | # the application crashes without emitting any logs due to buffering.
10 | ENV PYTHONUNBUFFERED=1
11 | 
12 | #WORKDIR /app
13 | 
14 | # Create a non-privileged user that the app will run under.
15 | # See https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#user
16 | ARG UID=10001
17 | RUN adduser \
18 |     --disabled-password \
19 |     --gecos "" \
20 |     --home "/nonexistent" \
21 |     --shell "/sbin/nologin" \
22 |     --no-create-home \
23 |     --uid "${UID}" \
24 |     appuser
25 | 
26 | # Download dependencies as a separate step to take advantage of Docker's caching.
27 | # Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
28 | # Leverage a bind mount to requirements.txt to avoid having to copy them into
29 | # into this layer.
30 | RUN --mount=type=cache,target=/root/.cache/pip \
31 |     --mount=type=bind,source=requirements.txt,target=requirements.txt \
32 |     python -m pip install -r requirements.txt
33 | 
34 | # Switch to the non-privileged user to run the application.
35 | USER appuser
36 | 
37 | # Copy the source code into the container.
38 | COPY . .
39 | 
40 | # Run bytewax with 5 workers to chew through the network requests on startup.
41 | CMD python -m bytewax.run hackernews.py -w 5
42 | 


--------------------------------------------------------------------------------
/example/bytewax/README.md:
--------------------------------------------------------------------------------
  1 | # Example to Integrate Bytewax and Proton together
  2 | [proton.py](https://github.com/timeplus-io/proton-python-driver/blob/develop/example/bytewax/proton.py) is a Bytewax sink for [Timeplus Proton](https://github.com/timeplus-io/proton) streaming SQL engine.
  3 | 
  4 | Inspired by https://bytewax.io/blog/polling-hacker-news, you can call Hacker News HTTP API with Bytewax and send latest news to Proton for SQL-based analysis, such as
  5 | 
  6 | ```sql
  7 | select * from story
  8 | ```
  9 | 
 10 | ## Run with Docker Compose (Highly Recommended)
 11 | 
 12 | Simply run `docker compose up` in this folder and it will start
 13 | 
 14 | 1. A Proton instance with pre-configured streams, materialized views and views.
 15 | 2. A container that leverages Bytewax to call Hacker News API and send data to Proton.
 16 | 3. A pre-configured Grafana instance to visualize the live data.
 17 | 
 18 | ## Run without Docker
 19 | 
 20 | ```shell
 21 | python3.10 -m venv py310-env
 22 | source py310-env/bin/activate
 23 | #git clone and cd to this proton-python-driver/example/bytewax folder
 24 | pip install -r requirements.txt
 25 | 
 26 | python -m bytewax.run hackernews.py -w 5
 27 | ```
 28 | 
 29 | It will start bytewax with 5 workers and load new items every 15 seconds and send the data to Proton.
 30 | 
 31 | ## How it works
 32 | 
 33 | When the Proton server is started, we create 2 streams to receive the raw JSON data pushed from Bytewax.
 34 | 
 35 | ```sql
 36 | CREATE STREAM hn_stories_raw(raw string);
 37 | CREATE STREAM hn_comments_raw(raw string);
 38 | ```
 39 | 
 40 | Then we create 2 materialized view to extract the key information from the JSON and put into more meaningful columns:
 41 | 
 42 | ```sql
 43 | CREATE MATERIALIZED VIEW hn_stories AS
 44 |   SELECT to_time(raw:time) AS _tp_time,raw:id::int AS id,raw:title AS title,raw:by AS by, raw FROM hn_stories_raw;
 45 | CREATE MATERIALIZED VIEW hn_comments AS
 46 |   SELECT to_time(raw:time) AS _tp_time,raw:id::int AS id,raw:root_id::int AS root_id,raw:by AS by, raw FROM hn_comments_raw;
 47 | ```
 48 | 
 49 | Finally we create 2 views to load both incoming data and existin data:
 50 | 
 51 | ```sql
 52 | CREATE VIEW IF NOT EXISTS story AS SELECT * FROM hn_stories WHERE _tp_time>earliest_ts();
 53 | CREATE VIEW IF NOT EXISTS comment AS SELECT * FROM hn_comments WHERE _tp_time>earliest_ts()
 54 | ```
 55 | 
 56 | With all those streams and views, you can query the data in whatever ways, e.g.
 57 | 
 58 | ```sql
 59 | select * from comment;
 60 | 
 61 | select 
 62 |     story._tp_time as story_time,comment._tp_time as comment_time,
 63 |     story.id as story_id, comment.id as comment_id,
 64 |     substring(story.title,1,20) as title,substring(comment.raw:text,1,20) as comment
 65 | from story join comment on story.id=comment.root_id;
 66 | ```
 67 | 
 68 | The key code in hackernews.py:
 69 | 
 70 | ```python
 71 | op.output("stories-out", story_stream, ProtonSink("hn_stories", os.environ.get("PROTON_HOST","127.0.0.1")))
 72 | ```
 73 | 
 74 | `hn_stories` is the stream name. The `ProtonSink` will create the stream if it doesn't exist.
 75 | 
 76 | ```python
 77 | class _ProtonSinkPartition(StatelessSinkPartition):
 78 |     def __init__(self, stream: str, host: str):
 79 |         self.client=client.Client(host=host, port=8463)
 80 |         self.stream=stream
 81 |         sql=f"CREATE STREAM IF NOT EXISTS `{stream}` (raw string)"
 82 |         logger.debug(sql)
 83 |         self.client.execute(sql)
 84 | ```
 85 | 
 86 | and batch insert data
 87 | 
 88 | ```python
 89 |     def write_batch(self, items):
 90 |         rows=[]
 91 |         for item in items:
 92 |             rows.append([item]) # single column in each row
 93 |         sql = f"INSERT INTO `{self.stream}` (raw) VALUES"
 94 |         # logger.debug(f"inserting data {sql}")
 95 |         self.client.execute(sql,rows)
 96 | ```
 97 | 
 98 | ```python
 99 | class ProtonSink(DynamicSink):
100 |     def __init__(self, stream: str, host: str):
101 |         self.stream = stream
102 |         self.host = host if host is not None and host != "" else "127.0.0.1"
103 | 
104 | 
105 |     def build(self, worker_index, worker_count):
106 |         """See ABC docstring."""
107 |         return _ProtonSinkPartition(self.stream, self.host)
108 | ```
109 | 
110 | ### Querying and visualizing with Grafana
111 | 
112 | Please try the docker-compose file. The Grafana instance is setup to install [Proton Grafana Data Source Plugin](https://github.com/timeplus-io/proton-grafana-source). Create such a data source and preconfigure a dashboard. Open Grafana UI at http://localhost:3000 in your browser and choose the `Hackernews Live Dashboard`.
113 | 


--------------------------------------------------------------------------------
/example/bytewax/compose.yaml:
--------------------------------------------------------------------------------
 1 | services:
 2 | 
 3 |   proton:
 4 |     image: ghcr.io/timeplus-io/proton:latest
 5 |     pull_policy: always
 6 |     ports:
 7 |       - 8463:8463
 8 |       - 3218:3218
 9 |     volumes:
10 |       - ./init_sql:/docker-entrypoint-initdb.d
11 |     healthcheck:
12 |       test: wget --no-verbose --tries=1 --spider http://localhost:8123/?query=show+create+comment || exit 1
13 |       interval: 5s
14 |       timeout: 10s
15 |       retries: 3
16 |       start_period: 10s  
17 |       
18 |   hn_stream:
19 |     build:
20 |       context: .  
21 |     image: timeplus/hackernews_bytewax:latest
22 |     environment:
23 |       - PROTON_HOST=proton
24 |     depends_on:
25 |       proton:
26 |         condition: service_healthy
27 | 
28 |   grafana:
29 |     image: grafana/grafana:latest
30 |     pull_policy: always
31 |     ports:
32 |       - 3000:3000
33 |     environment:
34 |       GF_AUTH_ANONYMOUS_ENABLED: 1
35 |       GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
36 |       GF_INSTALL_PLUGINS: timeplus-proton-datasource
37 |       GF_LOG_LEVEL: error
38 |     volumes:
39 |       - ./grafana_provisioning:/etc/grafana/provisioning
40 |     depends_on:
41 |       - hn_stream
42 | 


--------------------------------------------------------------------------------
/example/bytewax/grafana_provisioning/dashboards/main.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: 1
 2 | 
 3 | providers:
 4 |   # <string> an unique provider name. Required
 5 |   - name: 'a unique provider name'
 6 |     # <int> Org id. Default to 1
 7 |     orgId: 1
 8 |     # <string> name of the dashboard folder.
 9 |     folder: ''
10 |     # <string> folder UID. will be automatically generated if not specified
11 |     folderUid: ''
12 |     # <string> provider type. Default to 'file'
13 |     type: file
14 |     # <bool> disable dashboard deletion
15 |     disableDeletion: false
16 |     # <int> how often Grafana will scan for changed dashboards
17 |     updateIntervalSeconds: 10
18 |     # <bool> allow updating provisioned dashboards from the UI
19 |     allowUiUpdates: true
20 |     options:
21 |       # <string, required> path to dashboard files on disk. Required when using the 'file' type
22 |       path: /etc/grafana/provisioning/dashboards
23 |       # <bool> use folder names from filesystem to create folders in Grafana
24 |       foldersFromFilesStructure: true


--------------------------------------------------------------------------------
/example/bytewax/grafana_provisioning/datasources/automatic.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: 1
 2 | 
 3 | datasources:
 4 | - name: Proton
 5 |   type: timeplus-proton-datasource
 6 |   uid: c24e0faf-1490-4321-a373-7b2b07ca2e38
 7 |   typeName: Proton
 8 |   access: proxy
 9 |   url: 
10 |   password:
11 |   user:
12 |   database: 
13 |   isDefault: true
14 |   basicAuth: false
15 |   basicAuthUser:
16 |   basicAuthPassword:
17 |   withCredentials:
18 |   jsonData:
19 |     host: proton
20 |   readOnly: false
21 |   secureJsonData:
22 |     tlsCACert: ""
23 |     tlsClientCert: ""
24 |     tlsClientKey: ""
25 |   version: 1
26 |   editable: true


--------------------------------------------------------------------------------
/example/bytewax/hackernews.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from datetime import timedelta
 3 | from typing import Optional, Tuple
 4 | import os
 5 | import json
 6 | 
 7 | import requests
 8 | from bytewax import operators as op
 9 | from bytewax.dataflow import Dataflow
10 | from bytewax.inputs import SimplePollingSource
11 | 
12 | from proton import ProtonSink
13 | 
14 | logging.basicConfig(level=logging.INFO)
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class HNSource(SimplePollingSource):
19 |     def next_item(self):
20 |         return (
21 |             "GLOBAL_ID",
22 |             requests.get(
23 |                 "https://hacker-news.firebaseio.com/v0/maxitem.json"
24 |             ).json(),
25 |         )
26 | 
27 | 
28 | def get_id_stream(old_max_id, new_max_id) -> Tuple[str, list]:
29 |     if old_max_id is None:
30 |         # Get the last 150 items on the first run.
31 |         old_max_id = new_max_id - 150
32 |     return (new_max_id, range(old_max_id, new_max_id))
33 | 
34 | 
35 | def download_metadata(hn_id) -> Optional[Tuple[str, dict]]:
36 |     # Given an hacker news id returned from the api, fetch metadata
37 |     # Try 3 times, waiting more and more, or give up
38 |     data = requests.get(
39 |         f"https://hacker-news.firebaseio.com/v0/item/{hn_id}.json"  # noqa
40 |     ).json()
41 | 
42 |     if data is None:
43 |         logger.warning(f"Couldn't fetch item {hn_id}, skipping")
44 |         return None
45 |     return (str(hn_id), data)
46 | 
47 | 
48 | def recurse_tree(metadata, og_metadata=None) -> any:
49 |     if not og_metadata:
50 |         og_metadata = metadata
51 |     try:
52 |         parent_id = metadata["parent"]
53 |         parent_metadata = download_metadata(parent_id)
54 |         return recurse_tree(parent_metadata[1], og_metadata)
55 |     except KeyError:
56 |         return (metadata["id"], {**og_metadata, "root_id": metadata["id"]})
57 | 
58 | 
59 | def key_on_parent(key__metadata) -> tuple:
60 |     key, metadata = recurse_tree(key__metadata[1])
61 |     return (str(key), metadata)
62 | 
63 | 
64 | def format(id__metadata):
65 |     id, metadata = id__metadata
66 |     return json.dumps(metadata)
67 | 
68 | 
69 | flow = Dataflow("hn_scraper")
70 | max_id = op.input("in", flow, HNSource(timedelta(seconds=15)))
71 | id_stream = \
72 |     op.stateful_map("range", max_id, lambda: None, get_id_stream) \
73 |     .then(op.flat_map, "strip_key_flatten", lambda key_ids: key_ids[1]) \
74 |     .then(op.redistribute, "redist")
75 | 
76 | id_stream = op.filter_map("meta_download", id_stream, download_metadata)
77 | split_stream = op.branch(
78 |     "split_comments", id_stream, lambda item: item[1]["type"] == "story"
79 | )
80 | story_stream = split_stream.trues
81 | story_stream = op.map("format_stories", story_stream, format)
82 | comment_stream = split_stream.falses
83 | comment_stream = op.map("key_on_parent", comment_stream, key_on_parent)
84 | comment_stream = op.map("format_comments", comment_stream, format)
85 | op.inspect("stories", story_stream)
86 | op.inspect("comments", comment_stream)
87 | op.output(
88 |     "stories-out",
89 |     story_stream,
90 |     ProtonSink("hn_stories_raw", os.environ.get("PROTON_HOST", "127.0.0.1")),
91 | )
92 | op.output(
93 |     "comments-out",
94 |     comment_stream,
95 |     ProtonSink("hn_comments_raw", os.environ.get("PROTON_HOST", "127.0.0.1")),
96 | )
97 | 


--------------------------------------------------------------------------------
/example/bytewax/init_sql/init.sql:
--------------------------------------------------------------------------------
 1 | CREATE STREAM IF NOT EXISTS hn_stories_raw(raw string);
 2 | 
 3 | CREATE STREAM IF NOT EXISTS hn_comments_raw(raw string);
 4 | 
 5 | CREATE MATERIALIZED VIEW IF NOT EXISTS hn_stories AS 
 6 | SELECT to_time(raw:time) AS _tp_time,raw:id::int AS id,raw:title AS title,raw:by AS by, raw FROM hn_stories_raw;
 7 | 
 8 | CREATE MATERIALIZED VIEW IF NOT EXISTS hn_comments AS 
 9 | SELECT to_time(raw:time) AS _tp_time,raw:id::int AS id,raw:root_id::int AS root_id,raw:by AS by, raw FROM hn_comments_raw;
10 | 
11 | CREATE VIEW IF NOT EXISTS story AS SELECT * FROM hn_stories WHERE _tp_time>earliest_ts();
12 | 
13 | CREATE VIEW IF NOT EXISTS comment AS SELECT * FROM hn_comments WHERE _tp_time>earliest_ts();


--------------------------------------------------------------------------------
/example/bytewax/proton.py:
--------------------------------------------------------------------------------
 1 | """Output to Timeplus Proton."""
 2 | from bytewax.outputs import DynamicSink, StatelessSinkPartition
 3 | from proton_driver import client
 4 | import logging
 5 | 
 6 | __all__ = [
 7 |     "ProtonSink",
 8 | ]
 9 | logger = logging.getLogger(__name__)
10 | logger.setLevel(logging.INFO)
11 | 
12 | 
13 | class _ProtonSinkPartition(StatelessSinkPartition):
14 |     def __init__(self, stream: str, host: str):
15 |         self.client = client.Client(host=host, port=8463)
16 |         self.stream = stream
17 |         sql = f"CREATE STREAM IF NOT EXISTS `{stream}` (raw string)"  # noqa
18 |         logger.debug(sql)
19 |         self.client.execute(sql)
20 | 
21 |     def write_batch(self, items):
22 |         logger.debug(f"inserting data {items}")
23 |         rows = []
24 |         for item in items:
25 |             rows.append([item])  # single column in each row
26 |         sql = f"INSERT INTO `{self.stream}` (raw) VALUES"
27 |         logger.debug(f"inserting data {sql}")
28 |         self.client.execute(sql, rows)
29 | 
30 | 
31 | class ProtonSink(DynamicSink):
32 |     def __init__(self, stream: str, host: str):
33 |         self.stream = stream
34 |         self.host = host if host is not None and host != "" else "127.0.0.1"
35 | 
36 |     """
37 |     Write each output item to Proton on that worker.
38 | 
39 |     Items consumed from the dataflow must look like a string. Use a
40 |     proceeding map step to do custom formatting.
41 | 
42 |     Workers are the unit of parallelism.
43 | 
44 |     Can support at-least-once processing. Messages from the resume
45 |     epoch will be duplicated right after resume.
46 |     """
47 |     def build(self, worker_index, worker_count):
48 |         """See ABC docstring."""
49 |         return _ProtonSinkPartition(self.stream, self.host)
50 | 


--------------------------------------------------------------------------------
/example/bytewax/requirements.txt:
--------------------------------------------------------------------------------
1 | bytewax==0.18
2 | requests
3 | proton-driver


--------------------------------------------------------------------------------
/example/descriptive_pipeline/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9.10
 2 | 
 3 | ARG VERSION
 4 | 
 5 | WORKDIR /timeplus
 6 | ADD ./requirements.txt /timeplus
 7 | RUN pip3 install -r requirements.txt
 8 | ADD ./app /timeplus/app/
 9 | ADD ./server /timeplus/server/
10 | 
11 | EXPOSE 5001
12 | 
13 | ENTRYPOINT ["uvicorn", "server.main:app", "--host", "0.0.0.0", "--port", "5001", "--http", "h11"]


--------------------------------------------------------------------------------
/example/descriptive_pipeline/Makefile:
--------------------------------------------------------------------------------
 1 | VERSION = $(shell git rev-parse --short HEAD)
 2 | BIN_NAME = proton-pipeline-service
 3 | IMAGE_NAME = $(BIN_NAME):$(VERSION)
 4 | DOCKER_ID_USER = timeplus
 5 | FULLNAME=$(DOCKER_ID_USER)/${IMAGE_NAME}
 6 | 
 7 | .PHONY: service
 8 | 
 9 | service:
10 | 	uvicorn server.main:app --port 5001 --host 0.0.0.0 --reload
11 | 
12 | proton:
13 | 	docker run -d -p 8463:8463 --pull always --name proton ghcr.io/timeplus-io/proton:latest
14 | 
15 | docker: Dockerfile
16 | 	docker build -t $(IMAGE_NAME) .
17 | 
18 | docker_run: 
19 | 	docker run -p 5001:5001  $(IMAGE_NAME)
20 | 
21 | push:
22 | 	docker tag $(IMAGE_NAME) $(FULLNAME)
23 | 	docker push $(FULLNAME)


--------------------------------------------------------------------------------
/example/descriptive_pipeline/config.yaml:
--------------------------------------------------------------------------------
 1 | # host: localhost
 2 | # port: 8463
 3 | # db: default
 4 | # user: default
 5 | # password: ""
 6 | 
 7 | pipelines:
 8 |   - name: pipeline1
 9 |     sqls:
10 |       - |  
11 |         DROP STREAM IF EXISTS devices
12 |       - |
13 |         CREATE RANDOM STREAM IF NOT EXISTS devices(
14 |           device string default 'device'||to_string(rand()%4), 
15 |           temperature float default rand()%1000/10
16 |         ) SETTINGS eps=1000000
17 |       - |
18 |         SELECT * FROM devices
19 |   - name: pipeline2
20 |     sqls:
21 |       - |  
22 |         DROP STREAM IF EXISTS devices
23 |       - |
24 |         CREATE RANDOM STREAM IF NOT EXISTS devices(
25 |           device string default 'device'||to_string(rand()%4), 
26 |           temperature float default rand()%1000/10
27 |         ) SETTINGS eps=100000
28 |       - |
29 |         SELECT 
30 |           window_start, 
31 |           count(*) as count, min(temperature) as min, max(temperature) as max, p99(temperature) as p99
32 |         FROM 
33 |           tumble(devices, 1s) 
34 |         GROUP BY 
35 |           window_start
36 |   - name: pipeline3
37 |     sqls:
38 |       - |
39 |         SELECT 1


--------------------------------------------------------------------------------
/example/descriptive_pipeline/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi==0.75.0
2 | loguru==0.6.0
3 | uvicorn[standard]==0.17.6
4 | retry==0.9.2
5 | proton-driver==0.2.10


--------------------------------------------------------------------------------
/example/descriptive_pipeline/server/utils/logging.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | from loguru import logger
 5 | 
 6 | log_level = os.environ.get("APP_LOG_LEVEL", "INFO")
 7 | 
 8 | logger.remove()
 9 | 
10 | logger.add(
11 |     sys.stdout,
12 |     colorize=True,
13 |     format="{time} - {level} - {message}",
14 |     level=log_level,
15 | )
16 | logger.add("app.log", rotation="500 MB", level=log_level)
17 | 
18 | 
19 | def getLogger():
20 |     return logger
21 | 


--------------------------------------------------------------------------------
/example/descriptive_pipeline/test/script.js:
--------------------------------------------------------------------------------
 1 | import http from 'k6/http';
 2 | import { sleep } from 'k6';
 3 | 
 4 | export const options = {
 5 |   vus: 1,
 6 |   duration: '30s',
 7 | };
 8 | 
 9 | export default function() {
10 |   http.get('http://localhost:5001/queries/pipeline1',{ timeout: '30s' });
11 |   sleep(1);
12 | }
13 | 


--------------------------------------------------------------------------------
/example/descriptive_pipeline/test/script_ws.js:
--------------------------------------------------------------------------------
 1 | import ws from 'k6/ws';
 2 | import { check } from 'k6';
 3 | 
 4 | export const options = {
 5 |     vus: 1,
 6 |     duration: '30s',
 7 | };
 8 | 
 9 | export default function () {
10 |   const url = 'ws://localhost:5001/queries/pipeline1';
11 | 
12 |   const res = ws.connect(url, {}, function (socket) {
13 |     socket.on('open', () => console.log('connected'));
14 |     socket.on('close', () => console.log('disconnected'));
15 | 
16 |     socket.setTimeout(function () {
17 |         console.log('2 seconds passed, closing the socket');
18 |         socket.close();
19 |       }, 1000 * 30);
20 |   });
21 | 
22 |   check(res, { 'status is 101': (r) => r && r.status === 101 });
23 | }


--------------------------------------------------------------------------------
/example/idempotent/idempotent.py:
--------------------------------------------------------------------------------
 1 | from proton_driver import connect, Client
 2 | from datetime import date
 3 | from time import sleep
 4 | 
 5 | 
 6 | # Create a test stream
 7 | def create_test_stream(operator, table_name, table_columns):
 8 |     operator.execute(f'DROP STREAM IF EXISTS {table_name};')
 9 |     operator.execute(f'CREATE STREAM {table_name} ({table_columns})')
10 | 
11 | 
12 | # Use dbapi to implement idempotent insertion
13 | def use_dbapi():
14 |     with connect('proton://localhost') as conn:
15 |         with conn.cursor() as cur:
16 |             create_test_stream(
17 |                 cur,
18 |                 'test_user',
19 |                 'id int32, name string, birthday date'
20 |             )
21 |             # Set idempotent_id.
22 |             cur.set_settings(dict(idempotent_id='batch1'))
23 |             # Insert data into test_user multiple times with the same idempotent_id. # noqa
24 |             # The query result should contain only the first inserted data.
25 |             data = [
26 |                 (123456, 'timeplus', date(2024, 10, 24)),
27 |                 (789012, 'stream  ', date(2023, 10, 24)),
28 |                 (135790, 'proton  ', date(2024, 10, 24)),
29 |                 (246801, 'database', date(2024, 10, 24)),
30 |             ]
31 |             # Execute multiple insert operations.
32 |             for _ in range(10):
33 |                 cur.execute(
34 |                     'INSERT INTO test_user (id, name, birthday) VALUES',
35 |                     data
36 |                 )
37 |                 cur.fetchall()
38 |             # wait for 3 sec to make sure data available in historical store.
39 |             sleep(3)
40 |             cur.execute('SELECT count() FROM table(test_user)')
41 |             res = cur.fetchall()
42 |             # Data is inserted only once,so res == (4,).
43 |             print(res)
44 | 
45 | 
46 | # Use Client to implement idempotent insertion
47 | def use_client():
48 |     cli = Client('localhost', 8463)
49 |     create_test_stream(cli, 'test_stream', '`i` int,  `v` string')
50 |     setting = {
51 |         'idempotent_id': 'batch1'
52 |     }
53 |     data = [
54 |         (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'),
55 |         (5, 'e'), (6, 'f'), (7, 'g'), (8, 'h')
56 |     ]
57 |     # Execute multiple insert operations.
58 |     for _ in range(10):
59 |         cli.execute(
60 |             'INSERT INTO test_stream (i, v) VALUES',
61 |             data,
62 |             settings=setting
63 |         )
64 |     # wait for 3 sec to make sure data available in historical store.
65 |     sleep(3)
66 |     res = cli.execute('SELECT count() FROM table(test_stream)')
67 |     # Data is inserted only once,so res == (8,).
68 |     print(res)
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     use_dbapi()  # (4,)
73 |     use_client()  # (8,)
74 | 


--------------------------------------------------------------------------------
/example/pandas/dataframe.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import time
 3 | 
 4 | from proton_driver import client, connect
 5 | 
 6 | if __name__ == "__main__":
 7 |     c = client.Client(host='127.0.0.1', port=8463)
 8 | 
 9 |     # setup the test stream
10 |     c.execute("drop stream if exists test")
11 |     c.execute(
12 |         """create stream test (
13 |                     year int16,
14 |                     first_name string
15 |                 )"""
16 |     )
17 |     # add some data
18 |     df = pd.DataFrame.from_records(
19 |         [
20 |             {'year': 1994, 'first_name': 'Vova'},
21 |             {'year': 1995, 'first_name': 'Anja'},
22 |             {'year': 1996, 'first_name': 'Vasja'},
23 |             {'year': 1997, 'first_name': 'Petja'},
24 |         ]
25 |     )
26 |     c.insert_dataframe(
27 |         'INSERT INTO "test" (year, first_name) VALUES',
28 |         df,
29 |         settings=dict(use_numpy=True),
30 |     )
31 |     # or c.execute(
32 |     #     "INSERT INTO test(year, first_name) VALUES", df.to_dict('records')
33 |     # )
34 |     # wait for 3 sec to make sure data available in historical store
35 |     time.sleep(3)
36 | 
37 |     df = c.query_dataframe('SELECT * FROM table(test)')
38 |     print(df)
39 |     print(df.describe())
40 | 
41 |     # Also you can use proton settings in DataFrame API like using `execute` function. # noqa
42 |     # Here's an example with idempotent id.
43 | 
44 |     # Reset stream
45 |     c.execute('drop stream if exists test')
46 |     c.execute(
47 |         """create stream test (
48 |                     year int16,
49 |                     first_name string
50 |                 )"""
51 |     )
52 |     settings = dict(use_numpy=True, idempotent_id='batch')
53 | 
54 |     # Execute multiple insert operations.
55 |     for _ in range(5):
56 |         c.insert_dataframe(
57 |             'INSERT INTO "test" (year, first_name) VALUES',
58 |             df,
59 |             settings=settings,
60 |         )
61 |     time.sleep(3)
62 | 
63 |     rv = c.execute('SELECT COUNT(*) FROM table(test)')
64 |     # Only the first times insert into the historical storage.
65 |     print(rv)  # (4,)
66 | 
67 |     # Converting query results to a variety of formats with dbapi
68 |     with connect('proton://localhost') as conn:
69 |         with conn.cursor() as cur:
70 |             cur.execute('SELECT * FROM table(test)')
71 |             print('--------------Pandas DataFrame--------------')
72 |             print(cur.df())
73 | 
74 |             cur.execute('SELECT * FROM table(test)')
75 |             print('----------------Numpy Arrays----------------')
76 |             print(cur.fetchnumpy())
77 | 
78 |             cur.execute('SELECT * FROM table(test)')
79 |             print('--------------Polars DataFrame--------------')
80 |             print(cur.pl())
81 | 
82 |             cur.execute('SELECT * FROM table(test)')
83 |             print('-----------------Arrow Table----------------')
84 |             print(cur.arrow())
85 | 


--------------------------------------------------------------------------------
/example/streaming_query/car.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This example uses driver DB API.
 3 | In this example, a thread writes a huge list of data of car speed into
 4 | database, and another thread reads from the database to figure out which
 5 | car is speeding.
 6 | """
 7 | 
 8 | import datetime
 9 | import random
10 | import threading
11 | import time
12 | 
13 | from proton_driver import connect
14 | 
15 | account = 'default:'
16 | 
17 | 
18 | def create_stream():
19 |     with connect(f"proton://{account}@localhost:8463/default") as conn:  # noqa
20 |         with conn.cursor() as cursor:
21 |             cursor.execute("drop stream if exists cars")
22 |             cursor.execute(
23 |                 "create stream if not exists car(id int64, speed float64)"
24 |             )
25 | 
26 | 
27 | def write_data(car_num: int):
28 |     car_begin_date = datetime.datetime(2022, 1, 1, 1, 0, 0)
29 |     for day in range(100):
30 |         car_begin_date += datetime.timedelta(days=1)
31 |         data = [
32 |             (
33 |                 random.randint(0, car_num - 1),
34 |                 random.random() * 20 + 50,
35 |                 car_begin_date + datetime.timedelta(milliseconds=i * 100),
36 |             )
37 |             for i in range(300000)
38 |         ]
39 |         with connect(f"proton://{account}@localhost:8463/default") as conn:
40 |             with conn.cursor() as cursor:
41 |                 cursor.executemany(
42 |                     "insert into car (id, speed, _tp_time) values", data
43 |                 )
44 |                 print(f"row count: {cursor.rowcount}")
45 |         time.sleep(10)
46 | 
47 | 
48 | def query(conn):
49 |     query_sql = """select id, avg(speed), window_start, window_end
50 |                 from session(car, 1h, [speed >= 60, speed < 60))
51 |                 group by id, window_start, window_end"""
52 |     cursor = conn.cursor()
53 |     cursor.set_stream_results(stream_results=True, max_row_buffer=100)
54 |     cursor.execute(query_sql)
55 |     return cursor
56 | 
57 | 
58 | def fetch_result(cursor):
59 |     while True:
60 |         print(cursor.fetchone())
61 | 
62 | 
63 | def main():
64 |     create_stream()
65 |     conn = connect(f"proton://{account}@localhost:8463/default")
66 |     cursor = query(conn)
67 |     threading.Thread(target=write_data, args=[7]).start()
68 |     fetch_result(cursor)
69 |     cursor.close()
70 |     conn.close()
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     main()
75 | 


--------------------------------------------------------------------------------
/example/streaming_query/server monitor.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This example uses basic classes of the driver: Client
 3 | In this example, a few servers upload their logs of statue (include cpu,
 4 | memory and disk usage, generate randomly) detected every 100ms to the database
 5 | every 10 logs generated. The main thread will warn if any usage exceeds 95%.
 6 | """
 7 | 
 8 | import random
 9 | import threading
10 | import time
11 | from datetime import datetime
12 | 
13 | from proton_driver import client
14 | 
15 | 
16 | class Server(threading.Thread):
17 |     def __init__(self, name: str, buffer_max_size: int = 10):
18 |         threading.Thread.__init__(self)
19 |         self.name = name
20 |         self.buffer = []
21 |         self.buffer_max_size = buffer_max_size
22 |         self.client = None
23 |         self.killed = False
24 | 
25 |     def __get_state(self) -> dict:
26 |         return {
27 |             "cpu": random.randint(0, 100),
28 |             "memory": random.randint(0, 100),
29 |             "disk": random.randint(0, 100),
30 |             "server_name": self.name,
31 |             "timestamp": datetime.now(),
32 |         }
33 | 
34 |     def __send_data(self):
35 |         self.client.execute(
36 |             "insert into server_monitor ("
37 |             "cpu, memory, disk, server_name, timestamp"
38 |             ") values",
39 |             self.buffer,
40 |         )
41 | 
42 |     def run(self) -> None:
43 |         self.client = client.Client(host='127.0.0.1', port=8463)
44 |         while not self.killed:
45 |             self.buffer.append(self.__get_state())
46 |             if len(self.buffer) >= self.buffer_max_size:
47 |                 self.__send_data()
48 |                 self.buffer = []
49 |             time.sleep(0.1)
50 |         self.client.disconnect()
51 |         self.client = None
52 | 
53 | 
54 | def initial_stream():
55 |     c = client.Client(host='127.0.0.1', port=8463)
56 |     c.execute("drop stream if exists server_monitor")
57 |     c.execute(
58 |         """create stream server_monitor (
59 |                     cpu float,
60 |                     memory float,
61 |                     disk float,
62 |                     server_name string,
63 |                     timestamp datetime64(3) default now64(3)
64 |                 )"""
65 |     )
66 | 
67 | 
68 | def show():
69 |     c = client.Client(host='127.0.0.1', port=8463)
70 |     limit = 95
71 |     rows = c.execute_iter(
72 |         "select cpu, memory, disk, server_name, timestamp from server_monitor "
73 |         "where cpu > %(limit)f or memory > %(limit)f or disk > %(limit)f",
74 |         {"limit": limit},
75 |     )
76 |     for row in rows:
77 |         msg = (
78 |             f"{row[4].strftime('%d-%m-%Y %H:%M:%S')} WARNING server[{row[3]}]:"  # noqa
79 |         )
80 |         col_names = ["cpu", "memory", "disk"]
81 |         for col_name, usage in zip(col_names, row[:3]):
82 |             if usage > limit:
83 |                 msg += " %s[%.2f%%]" % (col_name, usage)
84 |         print(msg)
85 | 
86 | 
87 | if __name__ == "__main__":
88 |     initial_stream()
89 |     servers = [Server(f"server_{i}") for i in range(7)]
90 |     for server in servers:
91 |         server.start()
92 |     show()
93 | 


--------------------------------------------------------------------------------
/proton_driver/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .client import Client
 3 | from .dbapi import connect
 4 | 
 5 | 
 6 | VERSION = (0, 2, 13)
 7 | __version__ = '.'.join(str(x) for x in VERSION)
 8 | 
 9 | __all__ = ['Client', 'connect']
10 | 


--------------------------------------------------------------------------------
/proton_driver/blockstreamprofileinfo.py:
--------------------------------------------------------------------------------
 1 | from .reader import read_binary_uint8
 2 | from .varint import read_varint
 3 | 
 4 | 
 5 | class BlockStreamProfileInfo(object):
 6 |     def __init__(self):
 7 |         self.rows = 0
 8 |         self.blocks = 0
 9 |         self.bytes = 0
10 |         self.applied_limit = False  # bool
11 |         self.rows_before_limit = 0
12 |         self.calculated_rows_before_limit = 0  # bool
13 | 
14 |         super(BlockStreamProfileInfo, self).__init__()
15 | 
16 |     def read(self, fin):
17 |         self.rows = read_varint(fin)
18 |         self.blocks = read_varint(fin)
19 |         self.bytes = read_varint(fin)
20 |         self.applied_limit = bool(read_binary_uint8(fin))
21 |         self.rows_before_limit = read_varint(fin)
22 |         self.calculated_rows_before_limit = bool(read_binary_uint8(fin))
23 | 


--------------------------------------------------------------------------------
/proton_driver/columns/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/proton_driver/columns/__init__.py


--------------------------------------------------------------------------------
/proton_driver/columns/boolcolumn.py:
--------------------------------------------------------------------------------
1 | from .base import FormatColumn
2 | 
3 | 
4 | class BoolColumn(FormatColumn):
5 |     ch_type = 'bool'
6 |     py_types = (bool, )
7 |     format = '?'
8 | 


--------------------------------------------------------------------------------
/proton_driver/columns/datecolumn.py:
--------------------------------------------------------------------------------
 1 | from datetime import date, timedelta
 2 | 
 3 | from .base import FormatColumn
 4 | 
 5 | 
 6 | epoch_start = date(1970, 1, 1)
 7 | epoch_end = date(2149, 6, 6)
 8 | 
 9 | epoch_start_date32 = date(1925, 1, 1)
10 | epoch_end_date32 = date(2283, 11, 11)
11 | 
12 | 
13 | class DateColumn(FormatColumn):
14 |     ch_type = 'date'
15 |     py_types = (date, )
16 |     format = 'H'
17 | 
18 |     min_value = epoch_start
19 |     max_value = epoch_end
20 | 
21 |     date_lut_days = (epoch_end - epoch_start).days + 1
22 |     date_lut = {x: epoch_start + timedelta(x) for x in range(date_lut_days)}
23 |     date_lut_reverse = {value: key for key, value in date_lut.items()}
24 | 
25 |     def before_write_items(self, items, nulls_map=None):
26 |         null_value = self.null_value
27 | 
28 |         date_lut_reverse = self.date_lut_reverse
29 |         min_value = self.min_value
30 |         max_value = self.max_value
31 | 
32 |         for i, item in enumerate(items):
33 |             if nulls_map and nulls_map[i]:
34 |                 items[i] = null_value
35 |                 continue
36 | 
37 |             if type(item) is not date:
38 |                 item = date(item.year, item.month, item.day)
39 | 
40 |             if min_value <= item <= max_value:
41 |                 items[i] = date_lut_reverse[item]
42 |             else:
43 |                 items[i] = 0
44 | 
45 |     def after_read_items(self, items, nulls_map=None):
46 |         date_lut = self.date_lut
47 | 
48 |         if nulls_map is None:
49 |             return tuple(date_lut[item] for item in items)
50 |         else:
51 |             return tuple(
52 |                 (None if is_null else date_lut[items[i]])
53 |                 for i, is_null in enumerate(nulls_map)
54 |             )
55 | 
56 | 
57 | class Date32Column(DateColumn):
58 |     ch_type = 'date32'
59 |     format = 'i'
60 | 
61 |     min_value = epoch_start_date32
62 |     max_value = epoch_end_date32
63 | 
64 |     date_lut_days = (epoch_end_date32 - epoch_start).days + 1
65 |     date_lut = {
66 |         x: epoch_start + timedelta(x)
67 |         for x in range((epoch_start_date32 - epoch_start).days, date_lut_days)
68 |     }
69 |     date_lut_reverse = {value: key for key, value in date_lut.items()}
70 | 


--------------------------------------------------------------------------------
/proton_driver/columns/decimalcolumn.py:
--------------------------------------------------------------------------------
  1 | from decimal import Decimal, localcontext
  2 | 
  3 | from .base import FormatColumn
  4 | from .exceptions import ColumnTypeMismatchException
  5 | from .intcolumn import Int128Column, Int256Column
  6 | 
  7 | 
  8 | class DecimalColumn(FormatColumn):
  9 |     py_types = (Decimal, float, int)
 10 |     max_precision = None
 11 |     int_size = None
 12 | 
 13 |     def __init__(self, precision, scale, types_check=False, **kwargs):
 14 |         self.precision = precision
 15 |         self.scale = scale
 16 |         super(DecimalColumn, self).__init__(**kwargs)
 17 | 
 18 |         if types_check:
 19 |             max_signed_int = (1 << (8 * self.int_size - 1)) - 1
 20 | 
 21 |             def check_item(value):
 22 |                 if value < -max_signed_int or value > max_signed_int:
 23 |                     raise ColumnTypeMismatchException(value)
 24 | 
 25 |             self.check_item = check_item
 26 | 
 27 |     def after_read_items(self, items, nulls_map=None):
 28 |         if self.scale >= 1:
 29 |             scale = 10 ** self.scale
 30 | 
 31 |             if nulls_map is None:
 32 |                 return tuple(Decimal(item) / scale for item in items)
 33 |             else:
 34 |                 return tuple(
 35 |                     (None if is_null else Decimal(items[i]) / scale)
 36 |                     for i, is_null in enumerate(nulls_map)
 37 |                 )
 38 |         else:
 39 |             if nulls_map is None:
 40 |                 return tuple(Decimal(item) for item in items)
 41 |             else:
 42 |                 return tuple(
 43 |                     (None if is_null else Decimal(items[i]))
 44 |                     for i, is_null in enumerate(nulls_map)
 45 |                 )
 46 | 
 47 |     def before_write_items(self, items, nulls_map=None):
 48 |         null_value = self.null_value
 49 | 
 50 |         if self.scale >= 1:
 51 |             scale = 10 ** self.scale
 52 | 
 53 |             for i, item in enumerate(items):
 54 |                 if nulls_map and nulls_map[i]:
 55 |                     items[i] = null_value
 56 |                 else:
 57 |                     items[i] = int(Decimal(str(item)) * scale)
 58 | 
 59 |         else:
 60 |             for i, item in enumerate(items):
 61 |                 if nulls_map and nulls_map[i]:
 62 |                     items[i] = null_value
 63 |                 else:
 64 |                     items[i] = int(Decimal(str(item)))
 65 | 
 66 |     # Override default precision to the maximum supported by underlying type.
 67 |     def _write_data(self, items, buf):
 68 |         with localcontext() as ctx:
 69 |             ctx.prec = self.max_precision
 70 |             super(DecimalColumn, self)._write_data(items, buf)
 71 | 
 72 |     def _read_data(self, n_items, buf, nulls_map=None):
 73 |         with localcontext() as ctx:
 74 |             ctx.prec = self.max_precision
 75 |             return super(DecimalColumn, self)._read_data(
 76 |                 n_items, buf, nulls_map=nulls_map
 77 |             )
 78 | 
 79 | 
 80 | class Decimal32Column(DecimalColumn):
 81 |     format = 'i'
 82 |     max_precision = 9
 83 |     int_size = 4
 84 | 
 85 | 
 86 | class Decimal64Column(DecimalColumn):
 87 |     format = 'q'
 88 |     max_precision = 18
 89 |     int_size = 8
 90 | 
 91 | 
 92 | class Decimal128Column(DecimalColumn, Int128Column):
 93 |     max_precision = 38
 94 | 
 95 | 
 96 | class Decimal256Column(DecimalColumn, Int256Column):
 97 |     max_precision = 76
 98 | 
 99 | 
100 | def create_decimal_column(spec, column_options):
101 |     precision, scale = spec[8:-1].split(',')
102 |     precision, scale = int(precision), int(scale)
103 | 
104 |     # Maximum precisions for underlying types are:
105 |     # Int32    10**9
106 |     # Int64   10**18
107 |     # Int128  10**38
108 |     # Int256  10**76
109 |     if precision <= 9:
110 |         cls = Decimal32Column
111 |     elif precision <= 18:
112 |         cls = Decimal64Column
113 |     elif precision <= 38:
114 |         cls = Decimal128Column
115 |     else:
116 |         cls = Decimal256Column
117 | 
118 |     return cls(precision, scale, **column_options)
119 | 


--------------------------------------------------------------------------------
/proton_driver/columns/enumcolumn.py:
--------------------------------------------------------------------------------
  1 | from enum import Enum
  2 | 
  3 | from .. import errors
  4 | from .intcolumn import IntColumn
  5 | 
  6 | 
  7 | class EnumColumn(IntColumn):
  8 |     py_types = (Enum, int, str)
  9 | 
 10 |     def __init__(self, enum_cls, **kwargs):
 11 |         self.enum_cls = enum_cls
 12 |         super(EnumColumn, self).__init__(**kwargs)
 13 | 
 14 |     def before_write_items(self, items, nulls_map=None):
 15 |         null_value = self.null_value
 16 | 
 17 |         enum_cls = self.enum_cls
 18 | 
 19 |         for i, item in enumerate(items):
 20 |             if nulls_map and nulls_map[i]:
 21 |                 items[i] = null_value
 22 |                 continue
 23 | 
 24 |             source_value = item.name if isinstance(item, Enum) else item
 25 | 
 26 |             # Check real enum value
 27 |             try:
 28 |                 if isinstance(source_value, str):
 29 |                     items[i] = enum_cls[source_value].value
 30 |                 else:
 31 |                     items[i] = enum_cls(source_value).value
 32 |             except (ValueError, KeyError):
 33 |                 choices = ', '.join(
 34 |                     "'{}' = {}".format(x.name.replace("'", r"\'"), x.value)
 35 |                     for x in enum_cls
 36 |                 )
 37 |                 enum_str = '{}({})'.format(enum_cls.__name__, choices)
 38 | 
 39 |                 raise errors.LogicalError(
 40 |                     "Unknown element '{}' for type {}"
 41 |                     .format(source_value, enum_str)
 42 |                 )
 43 | 
 44 |     def after_read_items(self, items, nulls_map=None):
 45 |         enum_cls = self.enum_cls
 46 | 
 47 |         if nulls_map is None:
 48 |             return tuple(enum_cls(item).name for item in items)
 49 |         else:
 50 |             return tuple(
 51 |                 (None if is_null else enum_cls(items[i]).name)
 52 |                 for i, is_null in enumerate(nulls_map)
 53 |             )
 54 | 
 55 | 
 56 | class Enum8Column(EnumColumn):
 57 |     ch_type = 'enum8'
 58 |     format = 'b'
 59 |     int_size = 1
 60 | 
 61 | 
 62 | class Enum16Column(EnumColumn):
 63 |     ch_type = 'enum16'
 64 |     format = 'h'
 65 |     int_size = 2
 66 | 
 67 | 
 68 | def create_enum_column(spec, column_options):
 69 |     if spec.startswith('enum8'):
 70 |         params = spec[6:-1]
 71 |         cls = Enum8Column
 72 |     else:
 73 |         params = spec[7:-1]
 74 |         cls = Enum16Column
 75 | 
 76 |     return cls(Enum(cls.ch_type, _parse_options(params)), **column_options)
 77 | 
 78 | 
 79 | def _parse_options(option_string):
 80 |     options = dict()
 81 |     after_name = False
 82 |     escaped = False
 83 |     quote_character = None
 84 |     name = ''
 85 |     value = ''
 86 | 
 87 |     for ch in option_string:
 88 |         if escaped:
 89 |             name += ch
 90 |             escaped = False  # accepting escaped character
 91 | 
 92 |         elif after_name:
 93 |             if ch in (' ', '='):
 94 |                 pass
 95 |             elif ch == ',':
 96 |                 options[name] = int(value)
 97 |                 after_name = False
 98 |                 name = ''
 99 |                 value = ''  # reset before collecting new option
100 |             else:
101 |                 value += ch
102 | 
103 |         elif quote_character:
104 |             if ch == '\\':
105 |                 escaped = True
106 |             elif ch == quote_character:
107 |                 quote_character = None
108 |                 after_name = True  # start collecting option value
109 |             else:
110 |                 name += ch
111 | 
112 |         else:
113 |             if ch == "'":
114 |                 quote_character = ch
115 | 
116 |     if after_name:
117 |         options.setdefault(name, int(value))  # append word after last comma
118 | 
119 |     return options
120 | 


--------------------------------------------------------------------------------
/proton_driver/columns/exceptions.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class ColumnException(Exception):
 4 |     pass
 5 | 
 6 | 
 7 | class ColumnTypeMismatchException(ColumnException):
 8 |     pass
 9 | 
10 | 
11 | class StructPackException(ColumnException):
12 |     pass
13 | 


--------------------------------------------------------------------------------
/proton_driver/columns/floatcolumn.py:
--------------------------------------------------------------------------------
 1 | from ctypes import c_float
 2 | 
 3 | from .base import FormatColumn
 4 | 
 5 | 
 6 | class FloatColumn(FormatColumn):
 7 |     py_types = (float, int)
 8 | 
 9 | 
10 | class Float32Column(FloatColumn):
11 |     ch_type = 'float32'
12 |     format = 'f'
13 | 
14 |     def __init__(self, types_check=False, **kwargs):
15 |         super(Float32Column, self).__init__(types_check=types_check, **kwargs)
16 | 
17 |         if types_check:
18 |             # Chop only bytes that fit current type.
19 |             # Cast to -nan or nan if overflows.
20 |             def before_write_items(items, nulls_map=None):
21 |                 null_value = self.null_value
22 | 
23 |                 for i, item in enumerate(items):
24 |                     if nulls_map and nulls_map[i]:
25 |                         items[i] = null_value
26 |                     else:
27 |                         items[i] = c_float(item).value
28 | 
29 |             self.before_write_items = before_write_items
30 | 
31 | 
32 | class Float64Column(FloatColumn):
33 |     ch_type = 'float64'
34 |     format = 'd'
35 | 


--------------------------------------------------------------------------------
/proton_driver/columns/intervalcolumn.py:
--------------------------------------------------------------------------------
 1 | from .intcolumn import Int64Column
 2 | 
 3 | 
 4 | class IntervalColumn(Int64Column):
 5 |     pass
 6 | 
 7 | 
 8 | class IntervalDayColumn(IntervalColumn):
 9 |     ch_type = 'interval_day'
10 | 
11 | 
12 | class IntervalWeekColumn(IntervalColumn):
13 |     ch_type = 'interval_week'
14 | 
15 | 
16 | class IntervalMonthColumn(IntervalColumn):
17 |     ch_type = 'interval_month'
18 | 
19 | 
20 | class IntervalYearColumn(IntervalColumn):
21 |     ch_type = 'interval_year'
22 | 
23 | 
24 | class IntervalHourColumn(IntervalColumn):
25 |     ch_type = 'interval_hour'
26 | 
27 | 
28 | class IntervalMinuteColumn(IntervalColumn):
29 |     ch_type = 'interval_minute'
30 | 
31 | 
32 | class IntervalSecondColumn(IntervalColumn):
33 |     ch_type = 'interval_second'
34 | 


--------------------------------------------------------------------------------
/proton_driver/columns/jsoncolumn.py:
--------------------------------------------------------------------------------
 1 | from .base import Column
 2 | from .stringcolumn import String
 3 | from ..reader import read_binary_uint8, read_binary_str
 4 | from ..util.compat import json
 5 | from ..writer import write_binary_uint8
 6 | 
 7 | 
 8 | class JsonColumn(Column):
 9 |     py_types = (dict, )
10 | 
11 |     # No NULL value actually
12 |     null_value = {}
13 | 
14 |     def __init__(self, column_by_spec_getter, **kwargs):
15 |         self.column_by_spec_getter = column_by_spec_getter
16 |         self.string_column = String(**kwargs)
17 |         super(JsonColumn, self).__init__(**kwargs)
18 | 
19 |     def write_state_prefix(self, buf):
20 |         # Read in binary format.
21 |         # Write in text format.
22 |         write_binary_uint8(1, buf)
23 | 
24 |     def read_items(self, n_items, buf):
25 |         read_binary_uint8(buf)
26 |         spec = read_binary_str(buf)
27 |         col = self.column_by_spec_getter(
28 |             spec, dict(namedtuple_as_json=True)
29 |         )
30 |         col.read_state_prefix(buf)
31 |         return col.read_data(n_items, buf)
32 | 
33 |     def write_items(self, items, buf):
34 |         items = [x if isinstance(x, str) else json.dumps(x) for x in items]
35 |         self.string_column.write_items(items, buf)
36 | 
37 | 
38 | def create_json_column(spec, column_by_spec_getter, column_options):
39 |     return JsonColumn(column_by_spec_getter, **column_options)
40 | 


--------------------------------------------------------------------------------
/proton_driver/columns/lowcardinalitycolumn.py:
--------------------------------------------------------------------------------
  1 | from math import log
  2 | 
  3 | from ..reader import read_binary_uint64
  4 | from ..writer import write_binary_int64
  5 | from .base import Column
  6 | from .intcolumn import UInt8Column, UInt16Column, UInt32Column, UInt64Column
  7 | 
  8 | 
  9 | def create_low_cardinality_column(spec, column_by_spec_getter):
 10 |     inner = spec[16:-1]
 11 |     nested = column_by_spec_getter(inner)
 12 |     return LowCardinalityColumn(nested)
 13 | 
 14 | 
 15 | class LowCardinalityColumn(Column):
 16 |     """
 17 |     Stores column as index (unique elements) and keys.
 18 |     Good for de-duplication of large values with low cardinality.
 19 |     """
 20 |     int_types = {
 21 |         0: UInt8Column,
 22 |         1: UInt16Column,
 23 |         2: UInt32Column,
 24 |         3: UInt64Column
 25 |     }
 26 | 
 27 |     # Need to read additional keys.
 28 |     # Additional keys are stored before indexes as value N and N keys
 29 |     # after them.
 30 |     has_additional_keys_bit = 1 << 9
 31 |     # Need to update dictionary.
 32 |     # It means that previous granule has different dictionary.
 33 |     need_update_dictionary = 1 << 10
 34 | 
 35 |     serialization_type = has_additional_keys_bit | need_update_dictionary
 36 | 
 37 |     def __init__(self, nested_column, **kwargs):
 38 |         self.nested_column = nested_column
 39 |         super(LowCardinalityColumn, self).__init__(**kwargs)
 40 | 
 41 |     def read_state_prefix(self, buf):
 42 |         return read_binary_uint64(buf)
 43 | 
 44 |     def write_state_prefix(self, buf):
 45 |         # KeysSerializationVersion. See Proton docs.
 46 |         write_binary_int64(1, buf)
 47 | 
 48 |     def _write_data(self, items, buf):
 49 |         index, keys = [], []
 50 |         key_by_index_element = {}
 51 | 
 52 |         if self.nested_column.nullable:
 53 |             # First element represents NULL if column is nullable.
 54 |             index.append(self.nested_column.null_value)
 55 |             # Prevent null map writing. Reset nested column nullable flag.
 56 |             self.nested_column.nullable = False
 57 | 
 58 |             for x in items:
 59 |                 if x is None:
 60 |                     # Zero element for null.
 61 |                     keys.append(0)
 62 | 
 63 |                 else:
 64 |                     key = key_by_index_element.get(x)
 65 |                     # Get key from index or add it to index.
 66 |                     if key is None:
 67 |                         key = len(key_by_index_element)
 68 |                         key_by_index_element[x] = key
 69 |                         index.append(x)
 70 | 
 71 |                     keys.append(key + 1)
 72 |         else:
 73 |             for x in items:
 74 |                 key = key_by_index_element.get(x)
 75 | 
 76 |                 # Get key from index or add it to index.
 77 |                 if key is None:
 78 |                     key = len(key_by_index_element)
 79 |                     key_by_index_element[x] = len(key_by_index_element)
 80 |                     index.append(x)
 81 | 
 82 |                 keys.append(key)
 83 | 
 84 |         # Do not write anything for empty column.
 85 |         # May happen while writing empty arrays.
 86 |         if not len(index):
 87 |             return
 88 | 
 89 |         int_type = int(log(len(index), 2) / 8)
 90 |         int_column = self.int_types[int_type]()
 91 | 
 92 |         serialization_type = self.serialization_type | int_type
 93 | 
 94 |         write_binary_int64(serialization_type, buf)
 95 |         write_binary_int64(len(index), buf)
 96 | 
 97 |         self.nested_column.write_data(index, buf)
 98 |         write_binary_int64(len(items), buf)
 99 |         int_column.write_items(keys, buf)
100 | 
101 |     def _read_data(self, n_items, buf, nulls_map=None):
102 |         if not n_items:
103 |             return tuple()
104 | 
105 |         serialization_type = read_binary_uint64(buf)
106 | 
107 |         # Lowest byte contains info about key type.
108 |         key_type = serialization_type & 0xf
109 |         keys_column = self.int_types[key_type]()
110 | 
111 |         nullable = self.nested_column.nullable
112 |         # Prevent null map reading. Reset nested column nullable flag.
113 |         self.nested_column.nullable = False
114 | 
115 |         index_size = read_binary_uint64(buf)
116 |         index = self.nested_column.read_data(index_size, buf)
117 |         if nullable:
118 |             index = (None, ) + index[1:]
119 | 
120 |         read_binary_uint64(buf)  # number of keys
121 |         keys = keys_column.read_data(n_items, buf)
122 | 
123 |         return tuple(index[x] for x in keys)
124 | 


--------------------------------------------------------------------------------
/proton_driver/columns/mapcolumn.py:
--------------------------------------------------------------------------------
 1 | from .base import Column
 2 | from .intcolumn import UInt64Column
 3 | from ..util.helpers import pairwise
 4 | from .util import get_inner_columns
 5 | 
 6 | 
 7 | class MapColumn(Column):
 8 |     py_types = (dict, )
 9 | 
10 |     def __init__(self, key_column, value_column, **kwargs):
11 |         self.offset_column = UInt64Column()
12 |         self.key_column = key_column
13 |         self.value_column = value_column
14 |         super(MapColumn, self).__init__(**kwargs)
15 | 
16 |     def read_state_prefix(self, buf):
17 |         self.key_column.read_state_prefix(buf)
18 |         self.value_column.read_state_prefix(buf)
19 | 
20 |     def write_state_prefix(self, buf):
21 |         self.key_column.write_state_prefix(buf)
22 |         self.value_column.write_state_prefix(buf)
23 | 
24 |     def read_items(self, n_items, buf):
25 |         offsets = list(self.offset_column.read_items(n_items, buf))
26 |         last_offset = offsets[-1]
27 |         keys = self.key_column.read_data(last_offset, buf)
28 |         values = self.value_column.read_data(last_offset, buf)
29 | 
30 |         offsets.insert(0, 0)
31 | 
32 |         return [
33 |             dict(zip(keys[begin:end], values[begin:end]))
34 |             for begin, end in pairwise(offsets)
35 |         ]
36 | 
37 |     def write_items(self, items, buf):
38 |         offsets = []
39 |         keys = []
40 |         values = []
41 | 
42 |         total = 0
43 |         for x in items:
44 |             total += len(x)
45 |             offsets.append(total)
46 |             keys.extend(x.keys())
47 |             values.extend(x.values())
48 | 
49 |         self.offset_column.write_items(offsets, buf)
50 |         self.key_column.write_data(keys, buf)
51 |         self.value_column.write_data(values, buf)
52 | 
53 | 
54 | def create_map_column(spec, column_by_spec_getter):
55 |     key, value = get_inner_columns('map', spec)
56 |     key_column = column_by_spec_getter(key.strip())
57 |     value_column = column_by_spec_getter(value.strip())
58 | 
59 |     return MapColumn(key_column, value_column)
60 | 


--------------------------------------------------------------------------------
/proton_driver/columns/nestedcolumn.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .arraycolumn import create_array_column
 3 | from .util import get_inner_spec
 4 | 
 5 | 
 6 | def create_nested_column(spec, column_by_spec_getter, column_options):
 7 |     return create_array_column(
 8 |         'array(tuple({}))'.format(get_inner_spec('nested', spec)),
 9 |         column_by_spec_getter, column_options
10 |     )
11 | 


--------------------------------------------------------------------------------
/proton_driver/columns/nothingcolumn.py:
--------------------------------------------------------------------------------
 1 | from .intcolumn import FormatColumn
 2 | 
 3 | 
 4 | class NothingColumn(FormatColumn):
 5 |     ch_type = 'nothing'
 6 |     format = 'B'
 7 | 
 8 |     @property
 9 |     def size(self):
10 |         return 1
11 | 
12 |     def after_read_items(self, items, nulls_map=None):
13 |         return (None, ) * len(items)
14 | 


--------------------------------------------------------------------------------
/proton_driver/columns/nullablecolumn.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | def create_nullable_column(spec, column_by_spec_getter):
4 |     inner = spec[9:-1]
5 |     nested = column_by_spec_getter(inner)
6 |     nested.nullable = True
7 |     return nested
8 | 


--------------------------------------------------------------------------------
/proton_driver/columns/nullcolumn.py:
--------------------------------------------------------------------------------
 1 | from .intcolumn import FormatColumn
 2 | 
 3 | 
 4 | # TODO: Drop Null column support in future.
 5 | # Compatibility with old servers.
 6 | class NullColumn(FormatColumn):
 7 |     ch_type = 'NULL'
 8 |     format = 'B'
 9 | 
10 |     @property
11 |     def size(self):
12 |         return 1
13 | 
14 |     def after_read_items(self, items, nulls_map=None):
15 |         return (None, ) * len(items)
16 | 


--------------------------------------------------------------------------------
/proton_driver/columns/numpy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/proton_driver/columns/numpy/__init__.py


--------------------------------------------------------------------------------
/proton_driver/columns/numpy/base.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from ..base import Column
 5 | 
 6 | 
 7 | class NumpyColumn(Column):
 8 |     dtype = None
 9 | 
10 |     normalize_null_value = True
11 | 
12 |     def read_items(self, n_items, buf):
13 |         data = buf.read(n_items * self.dtype.itemsize)
14 |         return np.frombuffer(data, self.dtype.newbyteorder('<'), n_items)
15 | 
16 |     def write_items(self, items, buf):
17 |         buf.write(items.astype(self.dtype.newbyteorder('<')).tobytes())
18 | 
19 |     def _write_nulls_map(self, items, buf):
20 |         s = self.make_null_struct(len(items))
21 |         nulls_map = self._get_nulls_map(items)
22 |         buf.write(s.pack(*nulls_map))
23 | 
24 |     def _get_nulls_map(self, items):
25 |         return [bool(x) for x in pd.isnull(items)]
26 | 
27 |     def _read_data(self, n_items, buf, nulls_map=None):
28 |         items = self.read_items(n_items, buf)
29 | 
30 |         if self.after_read_items:
31 |             return self.after_read_items(items, nulls_map)
32 |         elif nulls_map is not None:
33 |             items = np.array(items, dtype=object)
34 |             np.place(items, nulls_map, None)
35 | 
36 |         return items
37 | 
38 |     def prepare_items(self, items):
39 |         nulls_map = pd.isnull(items)
40 | 
41 |         # Always replace null values to null_value for proper inserts into
42 |         # non-nullable columns.
43 |         if isinstance(items, np.ndarray) and self.normalize_null_value:
44 |             items = np.array(items)
45 |             np.place(items, nulls_map, self.null_value)
46 | 
47 |         return items
48 | 


--------------------------------------------------------------------------------
/proton_driver/columns/numpy/datecolumn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from .base import NumpyColumn
 4 | 
 5 | 
 6 | class NumpyDateColumn(NumpyColumn):
 7 |     dtype = np.dtype(np.uint16)
 8 |     ch_type = 'date'
 9 | 
10 |     null_value = np.datetime64(0, 'Y')
11 | 
12 |     def read_items(self, n_items, buf):
13 |         data = super(NumpyDateColumn, self).read_items(n_items, buf)
14 |         return data.astype('datetime64[D]')
15 | 
16 |     def write_items(self, items, buf):
17 |         super(NumpyDateColumn, self).write_items(
18 |             items.astype('datetime64[D]'), buf
19 |         )
20 | 


--------------------------------------------------------------------------------
/proton_driver/columns/numpy/floatcolumn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from .base import NumpyColumn
 4 | 
 5 | # normalize_null_value = False due to float('nan')
 6 | # With normalization pandas.isnull will threat float('nan') as NULL value.
 7 | 
 8 | 
 9 | class NumpyFloat32Column(NumpyColumn):
10 |     dtype = np.dtype(np.float32)
11 |     ch_type = 'float32'
12 |     normalize_null_value = False
13 | 
14 |     def _get_nulls_map(self, items):
15 |         return [x is None for x in items]
16 | 
17 | 
18 | class NumpyFloat64Column(NumpyColumn):
19 |     dtype = np.dtype(np.float64)
20 |     ch_type = 'float64'
21 |     normalize_null_value = False
22 | 
23 |     def _get_nulls_map(self, items):
24 |         return [x is None for x in items]
25 | 


--------------------------------------------------------------------------------
/proton_driver/columns/numpy/intcolumn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from .base import NumpyColumn
 4 | 
 5 | 
 6 | class NumpyInt8Column(NumpyColumn):
 7 |     dtype = np.dtype(np.int8)
 8 |     ch_type = 'int8'
 9 | 
10 | 
11 | class NumpyUInt8Column(NumpyColumn):
12 |     dtype = np.dtype(np.uint8)
13 |     ch_type = 'uint8'
14 | 
15 | 
16 | class NumpyInt16Column(NumpyColumn):
17 |     dtype = np.dtype(np.int16)
18 |     ch_type = 'int16'
19 | 
20 | 
21 | class NumpyUInt16Column(NumpyColumn):
22 |     dtype = np.dtype(np.uint16)
23 |     ch_type = 'uint16'
24 | 
25 | 
26 | class NumpyInt32Column(NumpyColumn):
27 |     dtype = np.dtype(np.int32)
28 |     ch_type = 'int32'
29 | 
30 | 
31 | class NumpyUInt32Column(NumpyColumn):
32 |     dtype = np.dtype(np.uint32)
33 |     ch_type = 'uint32'
34 | 
35 | 
36 | class NumpyInt64Column(NumpyColumn):
37 |     dtype = np.dtype(np.int64)
38 |     ch_type = 'int64'
39 | 
40 | 
41 | class NumpyUInt64Column(NumpyColumn):
42 |     dtype = np.dtype(np.uint64)
43 |     ch_type = 'uint64'
44 | 


--------------------------------------------------------------------------------
/proton_driver/columns/numpy/lowcardinalitycolumn.py:
--------------------------------------------------------------------------------
 1 | from math import log
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | from ..lowcardinalitycolumn import LowCardinalityColumn
 7 | from ...reader import read_binary_uint64
 8 | from ...writer import write_binary_int64
 9 | from .intcolumn import (
10 |     NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column
11 | )
12 | 
13 | 
14 | class NumpyLowCardinalityColumn(LowCardinalityColumn):
15 |     int_types = {
16 |         0: NumpyUInt8Column,
17 |         1: NumpyUInt16Column,
18 |         2: NumpyUInt32Column,
19 |         3: NumpyUInt64Column
20 |     }
21 | 
22 |     def __init__(self, nested_column, **kwargs):
23 |         super(NumpyLowCardinalityColumn, self).__init__(nested_column,
24 |                                                         **kwargs)
25 | 
26 |     def _write_data(self, items, buf):
27 |         # Do not write anything for empty column.
28 |         # May happen while writing empty arrays.
29 |         if not len(items):
30 |             return
31 | 
32 |         # Replace nans with defaults if not nullabe.
33 |         if isinstance(items, np.ndarray) and not self.nested_column.nullable:
34 |             nulls = pd.isnull(items)
35 |             items = np.where(nulls, self.nested_column.null_value, items)
36 | 
37 |         c = pd.Categorical(items)
38 | 
39 |         int_type = int(log(len(c.codes), 2) / 8)
40 |         int_column = self.int_types[int_type]()
41 | 
42 |         serialization_type = self.serialization_type | int_type
43 | 
44 |         index = c.categories
45 |         keys = c.codes
46 | 
47 |         if self.nested_column.nullable:
48 |             # First element represents NULL if column is nullable.
49 |             index = index.insert(0, self.nested_column.null_value)
50 |             keys = keys + 1
51 |             # Prevent null map writing. Reset nested column nullable flag.
52 |             self.nested_column.nullable = False
53 | 
54 |         write_binary_int64(serialization_type, buf)
55 |         write_binary_int64(len(index), buf)
56 | 
57 |         self.nested_column.write_data(index.to_numpy(items.dtype), buf)
58 |         write_binary_int64(len(items), buf)
59 |         int_column.write_items(keys, buf)
60 | 
61 |     def _read_data(self, n_items, buf, nulls_map=None):
62 |         if not n_items:
63 |             return tuple()
64 | 
65 |         serialization_type = read_binary_uint64(buf)
66 | 
67 |         # Lowest byte contains info about key type.
68 |         key_type = serialization_type & 0xf
69 |         keys_column = self.int_types[key_type]()
70 | 
71 |         nullable = self.nested_column.nullable
72 |         # Prevent null map reading. Reset nested column nullable flag.
73 |         self.nested_column.nullable = False
74 | 
75 |         index_size = read_binary_uint64(buf)
76 |         index = self.nested_column.read_data(index_size, buf)
77 | 
78 |         read_binary_uint64(buf)  # number of keys
79 |         keys = keys_column.read_data(n_items, buf)
80 | 
81 |         if nullable:
82 |             # Shift all codes by one ("No value" code is -1 for pandas
83 |             # categorical) and drop corresponding first index
84 |             # this is analog of original operation:
85 |             # index = (None, ) + index[1:]
86 |             keys = np.array(keys, dtype='int64')  # deal with possible overflow
87 |             keys = keys - 1
88 |             index = index[1:]
89 |         return pd.Categorical.from_codes(keys, index)
90 | 
91 | 
92 | def create_numpy_low_cardinality_column(spec, column_by_spec_getter):
93 |     inner = spec[16:-1]
94 |     nested = column_by_spec_getter(inner)
95 |     return NumpyLowCardinalityColumn(nested)
96 | 


--------------------------------------------------------------------------------
/proton_driver/columns/numpy/service.py:
--------------------------------------------------------------------------------
 1 | from ... import errors
 2 | from .datecolumn import NumpyDateColumn
 3 | from .datetimecolumn import create_numpy_datetime_column
 4 | from .floatcolumn import NumpyFloat32Column, NumpyFloat64Column
 5 | from .intcolumn import (
 6 |     NumpyInt8Column, NumpyInt16Column, NumpyInt32Column, NumpyInt64Column,
 7 |     NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column
 8 | )
 9 | from .lowcardinalitycolumn import create_numpy_low_cardinality_column
10 | from .stringcolumn import create_string_column
11 | from ..nullablecolumn import create_nullable_column
12 | 
13 | column_by_type = {c.ch_type: c for c in [
14 |     NumpyDateColumn,
15 |     NumpyFloat32Column, NumpyFloat64Column,
16 |     NumpyInt8Column, NumpyInt16Column, NumpyInt32Column, NumpyInt64Column,
17 |     NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column
18 | ]}
19 | 
20 | 
21 | def get_numpy_column_by_spec(spec, column_options):
22 |     def create_column_with_options(x):
23 |         return get_numpy_column_by_spec(x, column_options)
24 | 
25 |     if spec == 'string' or spec.startswith('fixed_string'):
26 |         return create_string_column(spec, column_options)
27 | 
28 |     elif spec.startswith('datetime'):
29 |         return create_numpy_datetime_column(spec, column_options)
30 | 
31 |     elif spec.startswith('nullable'):
32 |         return create_nullable_column(spec, create_column_with_options)
33 | 
34 |     elif spec.startswith('low_cardinality'):
35 |         return create_numpy_low_cardinality_column(spec,
36 |                                                    create_column_with_options)
37 |     else:
38 |         if spec in column_by_type:
39 |             cls = column_by_type[spec]
40 |             return cls(**column_options)
41 | 
42 |         raise errors.UnknownTypeError('Unknown type {}'.format(spec))
43 | 


--------------------------------------------------------------------------------
/proton_driver/columns/numpy/stringcolumn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from ... import defines
 4 | from .base import NumpyColumn
 5 | 
 6 | 
 7 | class NumpyStringColumn(NumpyColumn):
 8 |     null_value = ''
 9 | 
10 |     default_encoding = defines.STRINGS_ENCODING
11 | 
12 |     def __init__(self, encoding=default_encoding, **kwargs):
13 |         self.encoding = encoding
14 |         super(NumpyStringColumn, self).__init__(**kwargs)
15 | 
16 |     def read_items(self, n_items, buf):
17 |         return np.array(
18 |             buf.read_strings(n_items, encoding=self.encoding), dtype=self.dtype
19 |         )
20 | 
21 |     def write_items(self, items, buf):
22 |         return buf.write_strings(items.tolist(), encoding=self.encoding)
23 | 
24 | 
25 | class NumpyByteStringColumn(NumpyColumn):
26 |     null_value = b''
27 | 
28 |     def read_items(self, n_items, buf):
29 |         return np.array(buf.read_strings(n_items), dtype=self.dtype)
30 | 
31 |     def write_items(self, items, buf):
32 |         return buf.write_strings(items.tolist())
33 | 
34 | 
35 | class NumpyFixedString(NumpyStringColumn):
36 |     def __init__(self, length, **kwargs):
37 |         self.length = length
38 |         super(NumpyFixedString, self).__init__(**kwargs)
39 | 
40 |     def read_items(self, n_items, buf):
41 |         return np.array(buf.read_fixed_strings(
42 |             n_items, self.length, encoding=self.encoding
43 |         ), dtype=self.dtype)
44 | 
45 |     def write_items(self, items, buf):
46 |         return buf.write_fixed_strings(
47 |             items.tolist(), self.length, encoding=self.encoding
48 |         )
49 | 
50 | 
51 | class NumpyByteFixedString(NumpyByteStringColumn):
52 |     def __init__(self, length, **kwargs):
53 |         self.length = length
54 |         super(NumpyByteFixedString, self).__init__(**kwargs)
55 | 
56 |     def read_items(self, n_items, buf):
57 |         return np.array(
58 |             buf.read_fixed_strings(n_items, self.length), dtype=self.dtype
59 |         )
60 | 
61 |     def write_items(self, items, buf):
62 |         return buf.write_fixed_strings(items.tolist(), self.length)
63 | 
64 | 
65 | def create_string_column(spec, column_options):
66 |     client_settings = column_options['context'].client_settings
67 |     strings_as_bytes = client_settings['strings_as_bytes']
68 |     encoding = client_settings.get(
69 |         'strings_encoding', NumpyStringColumn.default_encoding
70 |     )
71 | 
72 |     if spec == 'string':
73 |         cls = NumpyByteStringColumn if strings_as_bytes else NumpyStringColumn
74 |         return cls(encoding=encoding, **column_options)
75 |     else:
76 |         length = int(spec[13:-1])
77 |         cls = NumpyByteFixedString if strings_as_bytes else NumpyFixedString
78 |         return cls(length, encoding=encoding, **column_options)
79 | 


--------------------------------------------------------------------------------
/proton_driver/columns/simpleaggregatefunctioncolumn.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | def create_simple_aggregate_function_column(spec, column_by_spec_getter):
4 |     # SimpleAggregateFunction(Func, Type) -> Type
5 |     inner = spec[24:-1].split(',', 1)[1].strip()
6 |     nested = column_by_spec_getter(inner)
7 |     return nested
8 | 


--------------------------------------------------------------------------------
/proton_driver/columns/stringcolumn.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .. import defines
 3 | from .base import Column
 4 | 
 5 | 
 6 | class String(Column):
 7 |     ch_type = 'string'
 8 |     py_types = (str, )
 9 |     null_value = ''
10 | 
11 |     default_encoding = defines.STRINGS_ENCODING
12 | 
13 |     def __init__(self, encoding=default_encoding, **kwargs):
14 |         self.encoding = encoding
15 |         super(String, self).__init__(**kwargs)
16 | 
17 |     def write_items(self, items, buf):
18 |         buf.write_strings(items, encoding=self.encoding)
19 | 
20 |     def read_items(self, n_items, buf):
21 |         return buf.read_strings(n_items, encoding=self.encoding)
22 | 
23 | 
24 | class ByteString(String):
25 |     py_types = (bytes, )
26 |     null_value = b''
27 | 
28 |     def write_items(self, items, buf):
29 |         buf.write_strings(items)
30 | 
31 |     def read_items(self, n_items, buf):
32 |         return buf.read_strings(n_items)
33 | 
34 | 
35 | class FixedString(String):
36 |     ch_type = 'fixed_string'
37 | 
38 |     def __init__(self, length, **kwargs):
39 |         self.length = length
40 |         super(FixedString, self).__init__(**kwargs)
41 | 
42 |     def read_items(self, n_items, buf):
43 |         return buf.read_fixed_strings(
44 |             n_items, self.length, encoding=self.encoding
45 |         )
46 | 
47 |     def write_items(self, items, buf):
48 |         buf.write_fixed_strings(items, self.length, encoding=self.encoding)
49 | 
50 | 
51 | class ByteFixedString(FixedString):
52 |     py_types = (bytearray, bytes)
53 |     null_value = b''
54 | 
55 |     def read_items(self, n_items, buf):
56 |         return buf.read_fixed_strings(n_items, self.length)
57 | 
58 |     def write_items(self, items, buf):
59 |         buf.write_fixed_strings(items, self.length)
60 | 
61 | 
62 | def create_string_column(spec, column_options):
63 |     client_settings = column_options['context'].client_settings
64 |     strings_as_bytes = client_settings['strings_as_bytes']
65 |     encoding = client_settings.get('strings_encoding', String.default_encoding)
66 | 
67 |     if spec == 'string':
68 |         cls = ByteString if strings_as_bytes else String
69 |         return cls(encoding=encoding, **column_options)
70 |     else:
71 |         length_str = spec[12:-1]
72 |         if "(" in length_str:
73 |             length_str = length_str.replace("(", "")
74 |         length = int(length_str)
75 |         cls = ByteFixedString if strings_as_bytes else FixedString
76 |         return cls(length, encoding=encoding, **column_options)
77 | 


--------------------------------------------------------------------------------
/proton_driver/columns/tuplecolumn.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .base import Column
 3 | from .util import get_inner_columns_with_types
 4 | 
 5 | 
 6 | class TupleColumn(Column):
 7 |     py_types = (list, tuple)
 8 | 
 9 |     def __init__(self, names, nested_columns, **kwargs):
10 |         self.names = names
11 |         self.nested_columns = nested_columns
12 |         client_settings = kwargs['context'].client_settings
13 |         self.namedtuple_as_json = client_settings.get(
14 |             'namedtuple_as_json', False
15 |         )
16 | 
17 |         super(TupleColumn, self).__init__(**kwargs)
18 |         self.null_value = tuple(x.null_value for x in nested_columns)
19 | 
20 |     def write_data(self, items, buf):
21 |         items = self.prepare_items(items)
22 |         items = list(zip(*items))
23 | 
24 |         for i, x in enumerate(self.nested_columns):
25 |             x.write_data(list(items[i]), buf)
26 | 
27 |     def write_items(self, items, buf):
28 |         return self.write_data(items, buf)
29 | 
30 |     def read_data(self, n_items, buf):
31 |         rv = [x.read_data(n_items, buf) for x in self.nested_columns]
32 |         rv = list(zip(*rv))
33 | 
34 |         if self.names[0] and self.namedtuple_as_json:
35 |             return [dict(zip(self.names, x)) for x in rv]
36 |         else:
37 |             return rv
38 | 
39 |     def read_items(self, n_items, buf):
40 |         return self.read_data(n_items, buf)
41 | 
42 |     def read_state_prefix(self, buf):
43 |         super(TupleColumn, self).read_state_prefix(buf)
44 | 
45 |         for x in self.nested_columns:
46 |             x.read_state_prefix(buf)
47 | 
48 |     def write_state_prefix(self, buf):
49 |         super(TupleColumn, self).write_state_prefix(buf)
50 | 
51 |         for x in self.nested_columns:
52 |             x.write_state_prefix(buf)
53 | 
54 | 
55 | def create_tuple_column(spec, column_by_spec_getter, column_options):
56 |     columns_with_types = get_inner_columns_with_types('tuple', spec)
57 |     names, types = zip(*columns_with_types)
58 | 
59 |     return TupleColumn(names, [column_by_spec_getter(x) for x in types],
60 |                        **column_options)
61 | 


--------------------------------------------------------------------------------
/proton_driver/columns/util.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def get_inner_spec(column_name, spec):
 3 |     brackets = 0
 4 |     offset = len(column_name)
 5 | 
 6 |     for i, ch in enumerate(spec[offset:], offset):
 7 |         if ch == '(':
 8 |             brackets += 1
 9 | 
10 |         elif ch == ')':
11 |             brackets -= 1
12 | 
13 |         if brackets == 0:
14 |             break
15 | 
16 |     return spec[offset + 1:i]
17 | 
18 | 
19 | def get_inner_columns(column_name, spec):
20 |     inner_spec = get_inner_spec(column_name, spec)
21 |     brackets = 0
22 |     column_begin = 0
23 | 
24 |     columns = []
25 |     for i, x in enumerate(inner_spec + ','):
26 |         if x == ',':
27 |             if brackets == 0:
28 |                 columns.append(inner_spec[column_begin:i])
29 |                 column_begin = i + 1
30 |         elif x == '(':
31 |             brackets += 1
32 |         elif x == ')':
33 |             brackets -= 1
34 |         elif x == ' ':
35 |             if brackets == 0:
36 |                 column_begin = i + 1
37 |     return columns
38 | 
39 | 
40 | def get_inner_columns_with_types(column_name, spec):
41 |     inner_spec = get_inner_spec(column_name, spec)
42 |     inner_spec = inner_spec.strip()
43 |     brackets = 0
44 |     prev_comma = 0
45 |     prev_space = 0
46 | 
47 |     columns = []
48 |     for i, x in enumerate(inner_spec.strip() + ','):
49 |         if x == ',':
50 |             if brackets == 0:
51 |                 columns.append((
52 |                     inner_spec[prev_comma:prev_space].strip(),
53 |                     inner_spec[prev_space:i]
54 |                 ))
55 |                 prev_comma = i + 1
56 |         elif x == '(':
57 |             brackets += 1
58 |         elif x == ')':
59 |             brackets -= 1
60 |         elif x == ' ':
61 |             if brackets == 0:
62 |                 prev_space = i + 1
63 |     return columns
64 | 


--------------------------------------------------------------------------------
/proton_driver/columns/uuidcolumn.py:
--------------------------------------------------------------------------------
 1 | from uuid import UUID
 2 | 
 3 | from .base import FormatColumn
 4 | from .. import errors
 5 | from ..writer import MAX_UINT64
 6 | 
 7 | 
 8 | class UUIDColumn(FormatColumn):
 9 |     ch_type = 'uuid'
10 |     py_types = (str, UUID)
11 |     format = 'Q'
12 | 
13 |     # UUID is stored by two uint64 numbers.
14 |     def write_items(self, items, buf):
15 |         n_items = len(items)
16 | 
17 |         uint_64_pairs = [None] * 2 * n_items
18 |         for i, x in enumerate(items):
19 |             i2 = 2 * i
20 |             uint_64_pairs[i2] = (x >> 64) & MAX_UINT64
21 |             uint_64_pairs[i2 + 1] = x & MAX_UINT64
22 | 
23 |         s = self.make_struct(2 * n_items)
24 |         buf.write(s.pack(*uint_64_pairs))
25 | 
26 |     def read_items(self, n_items, buf):
27 |         # TODO: cythonize
28 |         s = self.make_struct(2 * n_items)
29 |         items = s.unpack(buf.read(s.size))
30 | 
31 |         uint_128_items = [None] * n_items
32 |         for i in range(n_items):
33 |             i2 = 2 * i
34 |             uint_128_items[i] = (items[i2] << 64) + items[i2 + 1]
35 | 
36 |         return tuple(uint_128_items)
37 | 
38 |     def after_read_items(self, items, nulls_map=None):
39 |         if nulls_map is None:
40 |             return tuple(UUID(int=item) for item in items)
41 |         else:
42 |             return tuple(
43 |                 (None if is_null else UUID(int=items[i]))
44 |                 for i, is_null in enumerate(nulls_map)
45 |             )
46 | 
47 |     def before_write_items(self, items, nulls_map=None):
48 |         null_value = self.null_value
49 | 
50 |         for i, item in enumerate(items):
51 |             if nulls_map and nulls_map[i]:
52 |                 items[i] = null_value
53 |                 continue
54 | 
55 |             try:
56 |                 if not isinstance(item, UUID):
57 |                     item = UUID(item)
58 | 
59 |             except ValueError:
60 |                 raise errors.CannotParseUuidError(
61 |                     "Cannot parse uuid '{}'".format(item)
62 |                 )
63 | 
64 |             items[i] = item.int
65 | 


--------------------------------------------------------------------------------
/proton_driver/compression/__init__.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | 
 3 | from .. import errors
 4 | from ..protocol import CompressionMethodByte
 5 | 
 6 | 
 7 | def get_compressor_cls(alg):
 8 |     try:
 9 |         module = importlib.import_module('.' + alg, __name__)
10 |         return module.Compressor
11 | 
12 |     except ImportError:
13 |         raise errors.UnknownCompressionMethod(
14 |             "Unknown compression method: '{}'".format(alg)
15 |         )
16 | 
17 | 
18 | def get_decompressor_cls(method_type):
19 |     if method_type == CompressionMethodByte.LZ4:
20 |         module = importlib.import_module('.lz4', __name__)
21 | 
22 |     elif method_type == CompressionMethodByte.ZSTD:
23 |         module = importlib.import_module('.zstd', __name__)
24 | 
25 |     else:
26 |         raise errors.UnknownCompressionMethod()
27 | 
28 |     return module.Decompressor
29 | 


--------------------------------------------------------------------------------
/proton_driver/compression/base.py:
--------------------------------------------------------------------------------
 1 | from io import BytesIO
 2 | 
 3 | from ..reader import read_binary_uint32
 4 | from ..writer import write_binary_uint8, write_binary_uint32
 5 | from .. import errors
 6 | 
 7 | try:
 8 |     from clickhouse_cityhash.cityhash import CityHash128
 9 | except ImportError:
10 |     raise RuntimeError(
11 |         'Package clickhouse-cityhash is required to use compression'
12 |     )
13 | 
14 | 
15 | class BaseCompressor(object):
16 |     """
17 |     Partial file-like object with write method.
18 |     """
19 |     method = None
20 |     method_byte = None
21 | 
22 |     def __init__(self):
23 |         self.data = BytesIO()
24 | 
25 |         super(BaseCompressor, self).__init__()
26 | 
27 |     def get_value(self):
28 |         value = self.data.getvalue()
29 |         self.data.seek(0)
30 |         self.data.truncate()
31 |         return value
32 | 
33 |     def write(self, p_str):
34 |         self.data.write(p_str)
35 | 
36 |     def compress_data(self, data):
37 |         raise NotImplementedError
38 | 
39 |     def get_compressed_data(self, extra_header_size):
40 |         rv = BytesIO()
41 | 
42 |         data = self.get_value()
43 |         compressed = self.compress_data(data)
44 | 
45 |         header_size = extra_header_size + 4 + 4  # sizes
46 | 
47 |         write_binary_uint32(header_size + len(compressed), rv)
48 |         write_binary_uint32(len(data), rv)
49 |         rv.write(compressed)
50 | 
51 |         return rv.getvalue()
52 | 
53 | 
54 | class BaseDecompressor(object):
55 |     method = None
56 |     method_byte = None
57 | 
58 |     def __init__(self, real_stream):
59 |         self.stream = real_stream
60 |         super(BaseDecompressor, self).__init__()
61 | 
62 |     def decompress_data(self, data, uncompressed_size):
63 |         raise NotImplementedError
64 | 
65 |     def check_hash(self, compressed_data, compressed_hash):
66 |         if CityHash128(compressed_data) != compressed_hash:
67 |             raise errors.ChecksumDoesntMatchError()
68 | 
69 |     def get_decompressed_data(self, method_byte, compressed_hash,
70 |                               extra_header_size):
71 |         size_with_header = read_binary_uint32(self.stream)
72 |         compressed_size = size_with_header - extra_header_size - 4
73 | 
74 |         compressed = BytesIO(self.stream.read(compressed_size))
75 | 
76 |         block_check = BytesIO()
77 |         write_binary_uint8(method_byte, block_check)
78 |         write_binary_uint32(size_with_header, block_check)
79 |         block_check.write(compressed.getvalue())
80 | 
81 |         self.check_hash(block_check.getvalue(), compressed_hash)
82 | 
83 |         uncompressed_size = read_binary_uint32(compressed)
84 | 
85 |         compressed = compressed.read(compressed_size - 4)
86 | 
87 |         return self.decompress_data(compressed, uncompressed_size)
88 | 


--------------------------------------------------------------------------------
/proton_driver/compression/lz4.py:
--------------------------------------------------------------------------------
 1 | from lz4 import block
 2 | 
 3 | from .base import BaseCompressor, BaseDecompressor
 4 | from ..protocol import CompressionMethod, CompressionMethodByte
 5 | 
 6 | 
 7 | class Compressor(BaseCompressor):
 8 |     method = CompressionMethod.LZ4
 9 |     method_byte = CompressionMethodByte.LZ4
10 |     mode = 'default'
11 | 
12 |     def compress_data(self, data):
13 |         return block.compress(data, store_size=False, mode=self.mode)
14 | 
15 | 
16 | class Decompressor(BaseDecompressor):
17 |     method = CompressionMethod.LZ4
18 |     method_byte = CompressionMethodByte.LZ4
19 | 
20 |     def decompress_data(self, data, uncompressed_size):
21 |         return block.decompress(data, uncompressed_size=uncompressed_size)
22 | 


--------------------------------------------------------------------------------
/proton_driver/compression/lz4hc.py:
--------------------------------------------------------------------------------
 1 | from .lz4 import Compressor as BaseCompressor, Decompressor as BaseDecompressor
 2 | 
 3 | 
 4 | class Compressor(BaseCompressor):
 5 |     mode = 'high_compression'
 6 | 
 7 | 
 8 | class Decompressor(BaseDecompressor):
 9 |     pass
10 | 


--------------------------------------------------------------------------------
/proton_driver/compression/zstd.py:
--------------------------------------------------------------------------------
 1 | import zstd
 2 | 
 3 | from .base import BaseCompressor, BaseDecompressor
 4 | from ..protocol import CompressionMethod, CompressionMethodByte
 5 | 
 6 | 
 7 | class Compressor(BaseCompressor):
 8 |     method = CompressionMethod.ZSTD
 9 |     method_byte = CompressionMethodByte.ZSTD
10 | 
11 |     def compress_data(self, data):
12 |         return zstd.compress(data)
13 | 
14 | 
15 | class Decompressor(BaseDecompressor):
16 |     method = CompressionMethod.ZSTD
17 |     method_byte = CompressionMethodByte.ZSTD
18 | 
19 |     def decompress_data(self, data, uncompressed_size):
20 |         return zstd.decompress(data)
21 | 


--------------------------------------------------------------------------------
/proton_driver/context.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class Context(object):
 3 |     def __init__(self):
 4 |         self._server_info = None
 5 |         self._settings = None
 6 |         self._client_settings = None
 7 |         super(Context, self).__init__()
 8 | 
 9 |     @property
10 |     def server_info(self):
11 |         return self._server_info
12 | 
13 |     @server_info.setter
14 |     def server_info(self, value):
15 |         self._server_info = value
16 | 
17 |     @property
18 |     def settings(self):
19 |         return self._settings.copy()
20 | 
21 |     @settings.setter
22 |     def settings(self, value):
23 |         self._settings = value.copy()
24 | 
25 |     @property
26 |     def client_settings(self):
27 |         return self._client_settings.copy()
28 | 
29 |     @client_settings.setter
30 |     def client_settings(self, value):
31 |         self._client_settings = value.copy()
32 | 
33 |     def __repr__(self):
34 |         return '<Context(server_info=%s, client_settings=%s, settings=%s)>' % (
35 |             self._server_info, self._client_settings, self._settings
36 |         )
37 | 


--------------------------------------------------------------------------------
/proton_driver/dbapi/__init__.py:
--------------------------------------------------------------------------------
 1 | from .connection import Connection
 2 | from .errors import (
 3 |     Warning, Error, DataError, DatabaseError, ProgrammingError, IntegrityError,
 4 |     InterfaceError, InternalError, NotSupportedError, OperationalError
 5 | )
 6 | from .. import defines
 7 | 
 8 | apilevel = '2.0'
 9 | 
10 | threadsafety = 2
11 | 
12 | paramstyle = 'pyformat'
13 | 
14 | 
15 | def connect(dsn=None, host=None,
16 |             user=defines.DEFAULT_USER, password=defines.DEFAULT_PASSWORD,
17 |             port=defines.DEFAULT_PORT, database=defines.DEFAULT_DATABASE,
18 |             **kwargs):
19 |     """
20 |     Create a new database connection.
21 | 
22 |     The connection can be specified via DSN:
23 | 
24 |         ``conn = connect("proton://localhost/test?param1=value1&...")``
25 | 
26 |     or using database and credentials arguments:
27 | 
28 |         ``conn = connect(database="test", user="default", password="default",
29 |         host="localhost", **kwargs)``
30 | 
31 |     The basic connection parameters are:
32 | 
33 |     - *host*: host with running Proton server.
34 |     - *port*: port Proton server is bound to.
35 |     - *database*: database connect to.
36 |     - *user*: database user.
37 |     - *password*: user's password.
38 | 
39 |     See defaults in :data:`~proton_driver.connection.Connection`
40 |     constructor.
41 | 
42 |     DSN or host is required.
43 | 
44 |     Any other keyword parameter will be passed to the underlying Connection
45 |     class.
46 | 
47 |     :return: a new connection.
48 |     """
49 | 
50 |     if dsn is None and host is None:
51 |         raise ValueError('host or dsn is required')
52 | 
53 |     return Connection(dsn=dsn, user=user, password=password, host=host,
54 |                       port=port, database=database, **kwargs)
55 | 
56 | 
57 | __all__ = [
58 |     'connect',
59 |     'Warning', 'Error', 'DataError', 'DatabaseError', 'ProgrammingError',
60 |     'IntegrityError', 'InterfaceError', 'InternalError', 'NotSupportedError',
61 |     'OperationalError'
62 | ]
63 | 


--------------------------------------------------------------------------------
/proton_driver/dbapi/connection.py:
--------------------------------------------------------------------------------
  1 | from ..client import Client
  2 | from .. import defines
  3 | from .cursor import Cursor
  4 | from .errors import InterfaceError
  5 | 
  6 | 
  7 | class Connection(object):
  8 |     """
  9 |     Creates new Connection for accessing Proton database.
 10 | 
 11 |     Connection is just wrapper for handling multiple cursors (clients) and
 12 |     do not initiate actual connections to the Proton server.
 13 | 
 14 |     See parameters description in
 15 |     :data:`~proton_driver.connection.Connection`.
 16 |     """
 17 |     def __init__(self, dsn=None, host=None,
 18 |                  user=defines.DEFAULT_USER, password=defines.DEFAULT_PASSWORD,
 19 |                  port=defines.DEFAULT_PORT, database=defines.DEFAULT_DATABASE,
 20 |                  **kwargs):
 21 |         self.cursors = []
 22 | 
 23 |         self.dsn = dsn
 24 |         self.user = user
 25 |         self.password = password
 26 |         self.host = host
 27 |         self.port = port
 28 |         self.database = database
 29 |         self.connection_kwargs = kwargs
 30 |         self.is_closed = False
 31 |         self._hosts = None
 32 |         super(Connection, self).__init__()
 33 | 
 34 |     def __repr__(self):
 35 |         return '<connection object at 0x{0:x}; closed: {1:}>'.format(
 36 |             id(self), self.is_closed
 37 |         )
 38 | 
 39 |     # Context manager integrations.
 40 |     def __enter__(self):
 41 |         return self
 42 | 
 43 |     def __exit__(self, exc_type, exc_val, exc_tb):
 44 |         self.close()
 45 | 
 46 |     def _make_client(self):
 47 |         """
 48 |         :return: a new Client instance.
 49 |         """
 50 |         if self.dsn is not None:
 51 |             return Client.from_url(self.dsn)
 52 | 
 53 |         return Client(self.host, port=self.port,
 54 |                       user=self.user, password=self.password,
 55 |                       database=self.database, **self.connection_kwargs)
 56 | 
 57 |     def close(self):
 58 |         """
 59 |         Close the connection now. The connection will be unusable from this
 60 |         point forward; an :data:`~proton_driver.dbapi.Error` (or subclass)
 61 |         exception will be raised if any operation is attempted with the
 62 |         connection. The same applies to all cursor objects trying to use the
 63 |         connection.
 64 |         """
 65 |         for cursor in self.cursors:
 66 |             cursor.close()
 67 | 
 68 |         self.is_closed = True
 69 | 
 70 |     def commit(self):
 71 |         """
 72 |         Do nothing since Proton has no transactions.
 73 |         """
 74 |         pass
 75 | 
 76 |     def rollback(self):
 77 |         """
 78 |         Do nothing since Proton has no transactions.
 79 |         """
 80 |         pass
 81 | 
 82 |     def cursor(self, cursor_factory=None):
 83 |         """
 84 |         :param cursor_factory: Argument can be used to create non-standard
 85 |                                cursors.
 86 |         :return: a new cursor object using the connection.
 87 |         """
 88 |         if self.is_closed:
 89 |             raise InterfaceError('connection already closed')
 90 | 
 91 |         client = self._make_client()
 92 |         if self._hosts is None:
 93 |             self._hosts = client.connection.hosts
 94 |         else:
 95 |             client.connection.hosts = self._hosts
 96 |         cursor_factory = cursor_factory or Cursor
 97 |         cursor = cursor_factory(client, self)
 98 |         self.cursors.append(cursor)
 99 |         return cursor
100 | 


--------------------------------------------------------------------------------
/proton_driver/dbapi/errors.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class Warning(Exception):
 4 |     pass
 5 | 
 6 | 
 7 | class Error(Exception):
 8 |     pass
 9 | 
10 | 
11 | class InterfaceError(Error):
12 |     pass
13 | 
14 | 
15 | class DatabaseError(Error):
16 |     pass
17 | 
18 | 
19 | class InternalError(DatabaseError):
20 |     pass
21 | 
22 | 
23 | class OperationalError(DatabaseError):
24 |     pass
25 | 
26 | 
27 | class ProgrammingError(DatabaseError):
28 |     pass
29 | 
30 | 
31 | class IntegrityError(DatabaseError):
32 |     pass
33 | 
34 | 
35 | class DataError(DatabaseError):
36 |     pass
37 | 
38 | 
39 | class NotSupportedError(DatabaseError):
40 |     pass
41 | 


--------------------------------------------------------------------------------
/proton_driver/dbapi/extras.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from collections import namedtuple
 3 | from functools import lru_cache
 4 | 
 5 | from .cursor import Cursor
 6 | 
 7 | 
 8 | class DictCursor(Cursor):
 9 |     """
10 |     A cursor that generates results as :class:`dict`.
11 | 
12 |     ``fetch*()`` methods will return dicts instead of tuples.
13 |     """
14 | 
15 |     def fetchone(self):
16 |         rv = super(DictCursor, self).fetchone()
17 |         if rv is not None:
18 |             rv = dict(zip(self._columns, rv))
19 |         return rv
20 | 
21 |     def fetchmany(self, size=None):
22 |         rv = super(DictCursor, self).fetchmany(size=size)
23 |         return [dict(zip(self._columns, x)) for x in rv]
24 | 
25 |     def fetchall(self):
26 |         rv = super(DictCursor, self).fetchall()
27 |         return [dict(zip(self._columns, x)) for x in rv]
28 | 
29 | 
30 | class NamedTupleCursor(Cursor):
31 |     """
32 |     A cursor that generates results as named tuples created by
33 |     :func:`~collections.namedtuple`.
34 | 
35 |     ``fetch*()`` methods will return named tuples instead of regular tuples, so
36 |     their elements can be accessed both as regular numeric items as well as
37 |     attributes.
38 |     """
39 | 
40 |     # ascii except alnum and underscore
41 |     _re_clean = re.compile(
42 |         '[' + re.escape(' !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~') + ']')
43 | 
44 |     @classmethod
45 |     @lru_cache(512)
46 |     def _make_nt(self, key):
47 |         fields = []
48 |         for s in key:
49 |             s = self._re_clean.sub('_', s)
50 |             # Python identifier cannot start with numbers, namedtuple fields
51 |             # cannot start with underscore.
52 |             if s[0] == '_' or '0' <= s[0] <= '9':
53 |                 s = 'f' + s
54 |             fields.append(s)
55 | 
56 |         return namedtuple('Record', fields)
57 | 
58 |     def fetchone(self):
59 |         rv = super(NamedTupleCursor, self).fetchone()
60 |         if rv is not None:
61 |             nt = self._make_nt(self._columns)
62 |             rv = nt(*rv)
63 |         return rv
64 | 
65 |     def fetchmany(self, size=None):
66 |         rv = super(NamedTupleCursor, self).fetchmany(size=size)
67 |         nt = self._make_nt(self._columns)
68 |         return [nt(*x) for x in rv]
69 | 
70 |     def fetchall(self):
71 |         rv = super(NamedTupleCursor, self).fetchall()
72 |         nt = self._make_nt(self._columns)
73 |         return [nt(*x) for x in rv]
74 | 


--------------------------------------------------------------------------------
/proton_driver/defines.py:
--------------------------------------------------------------------------------
 1 | 
 2 | DEFAULT_DATABASE = 'default'
 3 | DEFAULT_USER = 'default'
 4 | DEFAULT_PASSWORD = ''
 5 | 
 6 | DEFAULT_PORT = 8463
 7 | DEFAULT_SECURE_PORT = 9440
 8 | 
 9 | DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES = 50264
10 | DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS = 51554
11 | DBMS_MIN_REVISION_WITH_BLOCK_INFO = 51903
12 | # Legacy above.
13 | DBMS_MIN_REVISION_WITH_CLIENT_INFO = 54032
14 | DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE = 54058
15 | DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO = 54060
16 | DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME = 54372
17 | DBMS_MIN_REVISION_WITH_VERSION_PATCH = 54401
18 | DBMS_MIN_REVISION_WITH_SERVER_LOGS = 54406
19 | DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA = 54410
20 | DBMS_MIN_REVISION_WITH_CLIENT_WRITE_INFO = 54420
21 | DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS = 54429
22 | DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET = 54441
23 | DBMS_MIN_REVISION_WITH_OPENTELEMETRY = 54442
24 | DBMS_MIN_PROTOCOL_VERSION_WITH_DISTRIBUTED_DEPTH = 54448
25 | DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME = 54449
26 | DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS = 54451
27 | DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS = 54453
28 | 
29 | # Timeouts
30 | DBMS_DEFAULT_CONNECT_TIMEOUT_SEC = 10
31 | DBMS_DEFAULT_TIMEOUT_SEC = 300
32 | 
33 | DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC = 5
34 | 
35 | DEFAULT_COMPRESS_BLOCK_SIZE = 1048576
36 | DEFAULT_INSERT_BLOCK_SIZE = 1048576
37 | 
38 | DBMS_NAME = 'Proton'
39 | CLIENT_NAME = 'python-driver'
40 | CLIENT_VERSION_MAJOR = 20
41 | CLIENT_VERSION_MINOR = 10
42 | CLIENT_VERSION_PATCH = 2
43 | CLIENT_REVISION = 54453
44 | 
45 | BUFFER_SIZE = 1048576
46 | 
47 | STRINGS_ENCODING = 'utf-8'
48 | 


--------------------------------------------------------------------------------
/proton_driver/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | logger = logging.getLogger(__name__)
 4 | 
 5 | 
 6 | log_priorities = (
 7 |     'Unknown',
 8 |     'Fatal',
 9 |     'Critical',
10 |     'Error',
11 |     'Warning',
12 |     'Notice',
13 |     'Information',
14 |     'Debug',
15 |     'Trace'
16 | )
17 | 
18 | 
19 | def log_block(block):
20 |     if block is None:
21 |         return
22 | 
23 |     column_names = [x[0] for x in block.columns_with_types]
24 | 
25 |     for row in block.get_rows():
26 |         row = dict(zip(column_names, row))
27 | 
28 |         if 1 <= row['priority'] <= 8:
29 |             priority = log_priorities[row['priority']]
30 |         else:
31 |             priority = row[0]
32 | 
33 |         # thread_number in servers prior 20.x
34 |         thread_id = row.get('thread_id') or row['thread_number']
35 | 
36 |         logger.info(
37 |             '[ %s ] [ %s ] {%s} <%s> %s: %s',
38 |             row['host_name'],
39 |             thread_id,
40 |             row['query_id'],
41 |             priority,
42 |             row['source'],
43 |             row['text']
44 |         )
45 | 


--------------------------------------------------------------------------------
/proton_driver/numpy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/proton_driver/numpy/__init__.py


--------------------------------------------------------------------------------
/proton_driver/numpy/block.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | from ..block import ColumnOrientedBlock
4 | 
5 | 
6 | class NumpyColumnOrientedBlock(ColumnOrientedBlock):
7 |     def transposed(self):
8 |         return np.transpose(self.data)
9 | 


--------------------------------------------------------------------------------
/proton_driver/numpy/helpers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def column_chunks(columns, n):
 6 |     for column in columns:
 7 |         if not isinstance(column, (np.ndarray, pd.DatetimeIndex)):
 8 |             raise TypeError(
 9 |                 'Unsupported column type: {}. '
10 |                 'ndarray/DatetimeIndex is expected.'
11 |                 .format(type(column))
12 |             )
13 | 
14 |     # create chunk generator for every column
15 |     chunked = [
16 |         iter(np.array_split(c, len(c) // n) if len(c) > n else [c])
17 |         for c in columns
18 |     ]
19 | 
20 |     while True:
21 |         # get next chunk for every column
22 |         item = [next(column, []) for column in chunked]
23 |         if not any(len(x) for x in item):
24 |             break
25 |         yield item
26 | 


--------------------------------------------------------------------------------
/proton_driver/numpy/result.py:
--------------------------------------------------------------------------------
  1 | from itertools import chain
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from pandas.api.types import union_categoricals
  6 | 
  7 | from ..progress import Progress
  8 | from ..result import QueryResult
  9 | 
 10 | 
 11 | class NumpyQueryResult(QueryResult):
 12 |     """
 13 |     Stores query result from multiple blocks as numpy arrays.
 14 |     """
 15 | 
 16 |     def store(self, packet):
 17 |         block = getattr(packet, 'block', None)
 18 |         if block is None:
 19 |             return
 20 | 
 21 |         # Header block contains no rows. Pick columns from it.
 22 |         if block.num_rows:
 23 |             if self.columnar:
 24 |                 self.data.append(block.get_columns())
 25 |             else:
 26 |                 self.data.extend(block.get_rows())
 27 | 
 28 |         elif not self.columns_with_types:
 29 |             self.columns_with_types = block.columns_with_types
 30 | 
 31 |     def get_result(self):
 32 |         """
 33 |         :return: stored query result.
 34 |         """
 35 | 
 36 |         for packet in self.packet_generator:
 37 |             self.store(packet)
 38 | 
 39 |         if self.columnar:
 40 |             data = []
 41 |             # Transpose to a list of columns, each column is list of chunks
 42 |             for column_chunks in zip(*self.data):
 43 |                 # Concatenate chunks for each column
 44 |                 if isinstance(column_chunks[0], np.ndarray):
 45 |                     column = np.concatenate(column_chunks)
 46 |                 elif isinstance(column_chunks[0], pd.Categorical):
 47 |                     column = union_categoricals(column_chunks)
 48 |                 else:
 49 |                     column = tuple(chain.from_iterable(column_chunks))
 50 |                 data.append(column)
 51 |         else:
 52 |             data = self.data
 53 | 
 54 |         if self.with_column_types:
 55 |             return data, self.columns_with_types
 56 |         else:
 57 |             return data
 58 | 
 59 | 
 60 | class NumpyProgressQueryResult(NumpyQueryResult):
 61 |     """
 62 |     Stores query result and progress information from multiple blocks.
 63 |     Provides iteration over query progress.
 64 |     """
 65 | 
 66 |     def __init__(self, *args, **kwargs):
 67 |         self.progress_totals = Progress()
 68 | 
 69 |         super(NumpyProgressQueryResult, self).__init__(*args, **kwargs)
 70 | 
 71 |     def __iter__(self):
 72 |         return self
 73 | 
 74 |     def __next__(self):
 75 |         while True:
 76 |             packet = next(self.packet_generator)
 77 |             progress_packet = getattr(packet, 'progress', None)
 78 |             if progress_packet:
 79 |                 self.progress_totals.increment(progress_packet)
 80 |                 return (
 81 |                     self.progress_totals.rows, self.progress_totals.total_rows
 82 |                 )
 83 |             else:
 84 |                 self.store(packet)
 85 | 
 86 |     def get_result(self):
 87 |         # Read all progress packets.
 88 |         for _ in self:
 89 |             pass
 90 | 
 91 |         return super(NumpyProgressQueryResult, self).get_result()
 92 | 
 93 | 
 94 | class NumpyIterQueryResult(object):
 95 |     """
 96 |     Provides iteration over returned data by chunks (streaming by chunks).
 97 |     """
 98 | 
 99 |     def __init__(
100 |             self, packet_generator,
101 |             with_column_types=False):
102 |         self.packet_generator = packet_generator
103 |         self.with_column_types = with_column_types
104 | 
105 |         self.first_block = True
106 |         super(NumpyIterQueryResult, self).__init__()
107 | 
108 |     def __iter__(self):
109 |         return self
110 | 
111 |     def __next__(self):
112 |         packet = next(self.packet_generator)
113 |         block = getattr(packet, 'block', None)
114 |         if block is None:
115 |             return []
116 | 
117 |         if self.first_block and self.with_column_types:
118 |             self.first_block = False
119 |             rv = [block.columns_with_types]
120 |             rv.extend(block.get_rows())
121 |             return rv
122 |         else:
123 |             return block.get_rows()
124 | 


--------------------------------------------------------------------------------
/proton_driver/opentelemetry.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class OpenTelemetryTraceContext(object):
 3 |     traceparent_tpl = 'xx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-xxxxxxxxxxxxxxxx-xx'
 4 |     translation = str.maketrans('1234567890abcdef', 'xxxxxxxxxxxxxxxx')
 5 | 
 6 |     def __init__(self, traceparent, tracestate):
 7 |         # xx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-xxxxxxxxxxxxxxxx-xx
 8 |         # ^              ^                           ^         ^
 9 |         # version     trace_id                    span_id      flags
10 | 
11 |         self.trace_id = None  # UUID
12 |         self.span_id = None  # UInt64
13 |         self.tracestate = tracestate  # String
14 |         self.trace_flags = None  # UInt8
15 | 
16 |         if traceparent is not None:
17 |             self.parse_traceparent(traceparent)
18 | 
19 |         super(OpenTelemetryTraceContext, self).__init__()
20 | 
21 |     def parse_traceparent(self, traceparent):
22 |         traceparent = traceparent.lower()
23 | 
24 |         if len(traceparent) != len(self.traceparent_tpl):
25 |             raise ValueError('unexpected length {}, expected {}'.format(
26 |                 len(traceparent), len(self.traceparent_tpl)
27 |             ))
28 | 
29 |         if traceparent.translate(self.translation) != self.traceparent_tpl:
30 |             raise ValueError(
31 |                 'Malformed traceparant header: {}'.format(traceparent)
32 |             )
33 | 
34 |         parts = traceparent.split('-')
35 |         version = int(parts[0], 16)
36 |         if version != 0:
37 |             raise ValueError(
38 |                 'unexpected version {}, expected 00'.format(parts[0])
39 |             )
40 | 
41 |         self.trace_id = (int(parts[1][16:], 16) << 64) + int(parts[1][:16], 16)
42 |         self.span_id = int(parts[2], 16)
43 |         self.trace_flags = int(parts[3], 16)
44 | 


--------------------------------------------------------------------------------
/proton_driver/progress.py:
--------------------------------------------------------------------------------
 1 | from . import defines
 2 | from .varint import read_varint
 3 | 
 4 | 
 5 | class Progress(object):
 6 |     def __init__(self):
 7 |         self.rows = 0
 8 |         self.bytes = 0
 9 |         self.total_rows = 0
10 |         self.written_rows = 0
11 |         self.written_bytes = 0
12 | 
13 |         super(Progress, self).__init__()
14 | 
15 |     def read(self, server_revision, fin):
16 |         self.rows = read_varint(fin)
17 |         self.bytes = read_varint(fin)
18 | 
19 |         revision = server_revision
20 |         if revision >= defines.DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS:
21 |             self.total_rows = read_varint(fin)
22 | 
23 |         if revision >= defines.DBMS_MIN_REVISION_WITH_CLIENT_WRITE_INFO:
24 |             self.written_rows = read_varint(fin)
25 |             self.written_bytes = read_varint(fin)
26 | 
27 |     def increment(self, another_progress):
28 |         self.rows += another_progress.rows
29 |         self.bytes += another_progress.bytes
30 |         self.total_rows += another_progress.total_rows
31 |         self.written_rows += another_progress.written_rows
32 |         self.written_bytes += another_progress.written_bytes
33 | 


--------------------------------------------------------------------------------
/proton_driver/protocol.py:
--------------------------------------------------------------------------------
  1 | 
  2 | class ClientPacketTypes(object):
  3 |     """
  4 |     Packet types that client transmits
  5 |     """
  6 |     # Name, version, revision, default DB
  7 |     HELLO = 0
  8 | 
  9 |     # Query id, query settings, stage up to which the query must be executed,
 10 |     # whether the compression must be used, query text
 11 |     # (without data for INSERTs).
 12 |     QUERY = 1
 13 | 
 14 |     # A block of data (compressed or not).
 15 |     DATA = 2
 16 | 
 17 |     # Cancel the query execution.
 18 |     CANCEL = 3
 19 | 
 20 |     # Check that connection to the server is alive.
 21 |     PING = 4
 22 | 
 23 |     # Check status of tables on the server.
 24 |     TABLES_STATUS_REQUEST = 5
 25 | 
 26 |     _types_str = [
 27 |         'Hello', 'Query', 'Data', 'Cancel', 'Ping', 'TablesStatusRequest'
 28 |     ]
 29 | 
 30 |     @classmethod
 31 |     def to_str(cls, packet):
 32 |         return 'Unknown packet' if packet > 5 else cls._types_str[packet]
 33 | 
 34 | 
 35 | class ServerPacketTypes(object):
 36 |     """
 37 |     Packet types that server transmits.
 38 |     """
 39 |     # Name, version, revision.
 40 |     HELLO = 0
 41 | 
 42 |     # A block of data (compressed or not).
 43 |     DATA = 1
 44 | 
 45 |     # The exception during query execution.
 46 |     EXCEPTION = 2
 47 | 
 48 |     # Query execution progress: rows read, bytes read.
 49 |     PROGRESS = 3
 50 | 
 51 |     # Ping response
 52 |     PONG = 4
 53 | 
 54 |     # All packets were transmitted
 55 |     END_OF_STREAM = 5
 56 | 
 57 |     # Packet with profiling info.
 58 |     PROFILE_INFO = 6
 59 | 
 60 |     # A block with totals (compressed or not).
 61 |     TOTALS = 7
 62 | 
 63 |     # A block with minimums and maximums (compressed or not).
 64 |     EXTREMES = 8
 65 | 
 66 |     # A response to TablesStatus request.
 67 |     TABLES_STATUS_RESPONSE = 9
 68 | 
 69 |     # System logs of the query execution
 70 |     LOG = 10
 71 | 
 72 |     # Columns' description for default values calculation
 73 |     TABLE_COLUMNS = 11
 74 | 
 75 |     # List of unique parts ids.
 76 |     PART_UUIDS = 12
 77 | 
 78 |     # String (UUID) describes a request for which next task is needed
 79 |     READ_TASK_REQUEST = 13
 80 | 
 81 |     # Packet with profile events from server.
 82 |     PROFILE_EVENTS = 14
 83 | 
 84 |     _types_str = [
 85 |         'Hello', 'Data', 'Exception', 'Progress', 'Pong', 'EndOfStream',
 86 |         'ProfileInfo', 'Totals', 'Extremes', 'TablesStatusResponse', 'Log',
 87 |         'TableColumns', 'PartUUIDs', 'ReadTaskRequest', 'ProfileEvents'
 88 |     ]
 89 | 
 90 |     @classmethod
 91 |     def to_str(cls, packet):
 92 |         return 'Unknown packet' if packet > 14 else cls._types_str[packet]
 93 | 
 94 |     @classmethod
 95 |     def strings_in_message(cls, packet):
 96 |         if packet == cls.TABLE_COLUMNS:
 97 |             return 2
 98 |         return 0
 99 | 
100 | 
101 | class Compression(object):
102 |     DISABLED = 0
103 |     ENABLED = 1
104 | 
105 | 
106 | class CompressionMethod(object):
107 |     LZ4 = 1
108 |     LZ4HC = 2
109 |     ZSTD = 3
110 | 
111 | 
112 | class CompressionMethodByte(object):
113 |     LZ4 = 0x82
114 |     ZSTD = 0x90
115 | 


--------------------------------------------------------------------------------
/proton_driver/queryprocessingstage.py:
--------------------------------------------------------------------------------
1 | 
2 | class QueryProcessingStage(object):
3 |     """
4 |     Determines till which state SELECT query should be executed.
5 |     """
6 |     FETCH_COLUMNS = 0
7 |     WITH_MERGEABLE_STATE = 1
8 |     COMPLETE = 2
9 | 


--------------------------------------------------------------------------------
/proton_driver/reader.py:
--------------------------------------------------------------------------------
 1 | from struct import Struct
 2 | 
 3 | from .varint import read_varint
 4 | 
 5 | 
 6 | def read_binary_str(buf):
 7 |     length = read_varint(buf)
 8 |     return read_binary_str_fixed_len(buf, length)
 9 | 
10 | 
11 | def read_binary_bytes(buf):
12 |     length = read_varint(buf)
13 |     return read_binary_bytes_fixed_len(buf, length)
14 | 
15 | 
16 | def read_binary_str_fixed_len(buf, length):
17 |     return read_binary_bytes_fixed_len(buf, length).decode('utf-8')
18 | 
19 | 
20 | def read_binary_bytes_fixed_len(buf, length):
21 |     return buf.read(length)
22 | 
23 | 
24 | def read_binary_int(buf, fmt):
25 |     """
26 |     Reads int from buffer with provided format.
27 |     """
28 |     # Little endian.
29 |     s = Struct('<' + fmt)
30 |     return s.unpack(buf.read(s.size))[0]
31 | 
32 | 
33 | def read_binary_int8(buf):
34 |     return read_binary_int(buf, 'b')
35 | 
36 | 
37 | def read_binary_int16(buf):
38 |     return read_binary_int(buf, 'h')
39 | 
40 | 
41 | def read_binary_int32(buf):
42 |     return read_binary_int(buf, 'i')
43 | 
44 | 
45 | def read_binary_int64(buf):
46 |     return read_binary_int(buf, 'q')
47 | 
48 | 
49 | def read_binary_uint8(buf):
50 |     return read_binary_int(buf, 'B')
51 | 
52 | 
53 | def read_binary_uint16(buf):
54 |     return read_binary_int(buf, 'H')
55 | 
56 | 
57 | def read_binary_uint32(buf):
58 |     return read_binary_int(buf, 'I')
59 | 
60 | 
61 | def read_binary_uint64(buf):
62 |     return read_binary_int(buf, 'Q')
63 | 
64 | 
65 | def read_binary_uint128(buf):
66 |     hi = read_binary_int(buf, 'Q')
67 |     lo = read_binary_int(buf, 'Q')
68 | 
69 |     return (hi << 64) + lo
70 | 


--------------------------------------------------------------------------------
/proton_driver/readhelpers.py:
--------------------------------------------------------------------------------
 1 | from .errors import ServerException
 2 | from .reader import read_binary_str, read_binary_uint8, read_binary_int32
 3 | 
 4 | 
 5 | def read_exception(buf, additional_message=None):
 6 |     code = read_binary_int32(buf)
 7 |     name = read_binary_str(buf)
 8 |     message = read_binary_str(buf)
 9 |     stack_trace = read_binary_str(buf)
10 |     has_nested = bool(read_binary_uint8(buf))
11 | 
12 |     new_message = ''
13 | 
14 |     if additional_message:
15 |         new_message += additional_message + '. '
16 | 
17 |     if name != 'DB::Exception':
18 |         new_message += name + ". "
19 | 
20 |     new_message += message + ". Stack trace:\n\n" + stack_trace
21 | 
22 |     nested = None
23 |     if has_nested:
24 |         nested = read_exception(buf)
25 | 
26 |     return ServerException(new_message, code, nested=nested)
27 | 


--------------------------------------------------------------------------------
/proton_driver/settings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/proton_driver/settings/__init__.py


--------------------------------------------------------------------------------
/proton_driver/settings/types.py:
--------------------------------------------------------------------------------
 1 | from ..util.helpers import asbool
 2 | from ..varint import write_varint
 3 | from ..writer import write_binary_str
 4 | 
 5 | 
 6 | class SettingType(object):
 7 |     @classmethod
 8 |     def write(cls, value, buf):
 9 |         raise NotImplementedError
10 | 
11 | 
12 | class SettingUInt64(SettingType):
13 |     @classmethod
14 |     def write(cls, value, buf):
15 |         write_varint(int(value), buf)
16 | 
17 | 
18 | class SettingBool(SettingType):
19 |     @classmethod
20 |     def write(cls, value, buf):
21 |         write_varint(asbool(value), buf)
22 | 
23 | 
24 | class SettingString(SettingType):
25 |     @classmethod
26 |     def write(cls, value, buf):
27 |         write_binary_str(value, buf)
28 | 
29 | 
30 | class SettingChar(SettingType):
31 |     @classmethod
32 |     def write(cls, value, buf):
33 |         write_binary_str(value[0], buf)
34 | 
35 | 
36 | class SettingFloat(SettingType):
37 |     @classmethod
38 |     def write(cls, value, buf):
39 |         """
40 |         Float is written in string representation.
41 |         """
42 |         write_binary_str(str(value), buf)
43 | 
44 | 
45 | class SettingMaxThreads(SettingUInt64):
46 |     @classmethod
47 |     def write(cls, value, buf):
48 |         if value == 'auto':
49 |             value = 0
50 |         super(SettingMaxThreads, cls).write(value, buf)
51 | 


--------------------------------------------------------------------------------
/proton_driver/settings/writer.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from ..writer import write_binary_str, write_binary_uint8
 4 | from .available import settings as available_settings
 5 | 
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | def write_settings(settings, buf, settings_as_strings, is_important=False):
11 |     for setting, value in (settings or {}).items():
12 |         # If the server support settings as string we do not need to know
13 |         # anything about them, so we can write any setting.
14 |         if settings_as_strings:
15 |             write_binary_str(setting, buf)
16 |             write_binary_uint8(int(is_important), buf)
17 |             write_binary_str(str(value), buf)
18 | 
19 |         else:
20 |             # If the server requires string in binary,
21 |             # then they cannot be written without type.
22 |             setting_writer = available_settings.get(setting)
23 |             if not setting_writer:
24 |                 logger.warning('Unknown setting %s. Skipping', setting)
25 |                 continue
26 |             write_binary_str(setting, buf)
27 |             setting_writer.write(value, buf)
28 | 
29 |     write_binary_str('', buf)  # end of settings
30 | 


--------------------------------------------------------------------------------
/proton_driver/streams/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/proton_driver/streams/__init__.py


--------------------------------------------------------------------------------
/proton_driver/streams/compressed.py:
--------------------------------------------------------------------------------
 1 | from io import BytesIO
 2 | 
 3 | try:
 4 |     from clickhouse_cityhash.cityhash import CityHash128
 5 | except ImportError:
 6 |     raise RuntimeError(
 7 |         'Package clickhouse-cityhash is required to use compression'
 8 |     )
 9 | 
10 | from .native import BlockOutputStream, BlockInputStream
11 | from ..bufferedreader import CompressedBufferedReader
12 | from ..bufferedwriter import CompressedBufferedWriter
13 | from ..compression import get_decompressor_cls
14 | from ..defines import BUFFER_SIZE
15 | from ..reader import read_binary_uint8, read_binary_uint128
16 | from ..writer import write_binary_uint8, write_binary_uint128
17 | 
18 | 
19 | class CompressedBlockOutputStream(BlockOutputStream):
20 |     def __init__(self, compressor_cls, compress_block_size, fout, context):
21 |         self.compressor_cls = compressor_cls
22 |         self.compress_block_size = compress_block_size
23 |         self.raw_fout = fout
24 | 
25 |         self.compressor = self.compressor_cls()
26 |         self.fout = CompressedBufferedWriter(self.compressor, BUFFER_SIZE)
27 |         super(CompressedBlockOutputStream, self).__init__(self.fout, context)
28 | 
29 |     def get_compressed_hash(self, data):
30 |         return CityHash128(data)
31 | 
32 |     def finalize(self):
33 |         self.fout.flush()
34 | 
35 |         compressed = self.get_compressed()
36 |         compressed_size = len(compressed)
37 | 
38 |         compressed_hash = self.get_compressed_hash(compressed)
39 |         write_binary_uint128(compressed_hash, self.raw_fout)
40 | 
41 |         block_size = self.compress_block_size
42 | 
43 |         i = 0
44 |         while i < compressed_size:
45 |             self.raw_fout.write(compressed[i:i + block_size])
46 |             i += block_size
47 | 
48 |         self.raw_fout.flush()
49 | 
50 |     def get_compressed(self):
51 |         compressed = BytesIO()
52 | 
53 |         if self.compressor.method_byte is not None:
54 |             write_binary_uint8(self.compressor.method_byte, compressed)
55 |             extra_header_size = 1  # method
56 |         else:
57 |             extra_header_size = 0
58 | 
59 |         data = self.compressor.get_compressed_data(extra_header_size)
60 |         compressed.write(data)
61 | 
62 |         return compressed.getvalue()
63 | 
64 | 
65 | class CompressedBlockInputStream(BlockInputStream):
66 |     def __init__(self, fin, context):
67 |         self.raw_fin = fin
68 |         fin = CompressedBufferedReader(self.read_block, BUFFER_SIZE)
69 |         super(CompressedBlockInputStream, self).__init__(fin, context)
70 | 
71 |     def get_compressed_hash(self, data):
72 |         return CityHash128(data)
73 | 
74 |     def read_block(self):
75 |         compressed_hash = read_binary_uint128(self.raw_fin)
76 |         method_byte = read_binary_uint8(self.raw_fin)
77 | 
78 |         decompressor_cls = get_decompressor_cls(method_byte)
79 |         decompressor = decompressor_cls(self.raw_fin)
80 | 
81 |         if decompressor.method_byte is not None:
82 |             extra_header_size = 1  # method
83 |         else:
84 |             extra_header_size = 0
85 | 
86 |         return decompressor.get_decompressed_data(
87 |             method_byte, compressed_hash, extra_header_size
88 |         )
89 | 


--------------------------------------------------------------------------------
/proton_driver/streams/native.py:
--------------------------------------------------------------------------------
 1 | from ..block import ColumnOrientedBlock, BlockInfo
 2 | from ..columns.service import read_column, write_column
 3 | from ..reader import read_binary_str
 4 | from ..varint import write_varint, read_varint
 5 | from ..writer import write_binary_str
 6 | from .. import defines
 7 | 
 8 | 
 9 | class BlockOutputStream(object):
10 |     def __init__(self, fout, context):
11 |         self.fout = fout
12 |         self.context = context
13 | 
14 |         super(BlockOutputStream, self).__init__()
15 | 
16 |     def write(self, block):
17 |         revision = self.context.server_info.revision
18 |         if revision >= defines.DBMS_MIN_REVISION_WITH_BLOCK_INFO:
19 |             block.info.write(self.fout)
20 | 
21 |         # We write transposed data.
22 |         n_columns = block.num_columns
23 |         n_rows = block.num_rows
24 | 
25 |         write_varint(n_columns, self.fout)
26 |         write_varint(n_rows, self.fout)
27 | 
28 |         for i, (col_name, col_type) in enumerate(block.columns_with_types):
29 |             write_binary_str(col_name, self.fout)
30 |             write_binary_str(col_type, self.fout)
31 | 
32 |             if n_columns:
33 |                 try:
34 |                     items = block.get_column_by_index(i)
35 |                 except IndexError:
36 |                     raise ValueError('Different rows length')
37 | 
38 |                 write_column(self.context, col_name, col_type, items,
39 |                              self.fout, types_check=block.types_check)
40 | 
41 |         self.finalize()
42 | 
43 |     def finalize(self):
44 |         self.fout.flush()
45 | 
46 | 
47 | class BlockInputStream(object):
48 |     def __init__(self, fin, context):
49 |         self.fin = fin
50 |         self.context = context
51 | 
52 |         super(BlockInputStream, self).__init__()
53 | 
54 |     def read(self):
55 |         info = BlockInfo()
56 | 
57 |         revision = self.context.server_info.revision
58 |         if revision >= defines.DBMS_MIN_REVISION_WITH_BLOCK_INFO:
59 |             info.read(self.fin)
60 | 
61 |         n_columns = read_varint(self.fin)
62 |         n_rows = read_varint(self.fin)
63 | 
64 |         data, names, types = [], [], []
65 | 
66 |         for i in range(n_columns):
67 |             column_name = read_binary_str(self.fin)
68 |             column_type = read_binary_str(self.fin)
69 | 
70 |             names.append(column_name)
71 |             types.append(column_type)
72 | 
73 |             if n_rows:
74 |                 column = read_column(self.context, column_type, n_rows,
75 |                                      self.fin)
76 |                 data.append(column)
77 | 
78 |         if self.context.client_settings['use_numpy']:
79 |             from ..numpy.block import NumpyColumnOrientedBlock
80 |             block_cls = NumpyColumnOrientedBlock
81 |         else:
82 |             block_cls = ColumnOrientedBlock
83 | 
84 |         block = block_cls(
85 |             columns_with_types=list(zip(names, types)),
86 |             data=data,
87 |             info=info,
88 |         )
89 | 
90 |         return block
91 | 


--------------------------------------------------------------------------------
/proton_driver/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/proton_driver/util/__init__.py


--------------------------------------------------------------------------------
/proton_driver/util/compat.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Drop this when minimum supported version will be 3.7.
 3 | try:
 4 |     import threading
 5 | except ImportError:
 6 |     import dummy_threading as threading  # noqa: F401
 7 | 
 8 | import json  # noqa: F401
 9 | 
10 | try:
11 |     # since tzlocal 4.0+
12 |     # this will avoid warning for get_localzone().key
13 |     from tzlocal import get_localzone_name
14 | 
15 |     def get_localzone_name_compat():
16 |         try:
17 |             return get_localzone_name()
18 |         except Exception:
19 |             return None
20 | except ImportError:
21 |     from tzlocal import get_localzone
22 | 
23 |     def get_localzone_name_compat():
24 |         try:
25 |             return get_localzone().key
26 |         except AttributeError:
27 |             return get_localzone().zone
28 |         except Exception:
29 |             return None
30 | 


--------------------------------------------------------------------------------
/proton_driver/util/escape.py:
--------------------------------------------------------------------------------
 1 | from datetime import date, datetime
 2 | from enum import Enum
 3 | from uuid import UUID
 4 | 
 5 | from pytz import timezone
 6 | 
 7 | 
 8 | escape_chars_map = {
 9 |     "\b": "\\b",
10 |     "\f": "\\f",
11 |     "\r": "\\r",
12 |     "\n": "\\n",
13 |     "\t": "\\t",
14 |     "\0": "\\0",
15 |     "\a": "\\a",
16 |     "\v": "\\v",
17 |     "\\": "\\\\",
18 |     "'": "\\'"
19 | }
20 | 
21 | 
22 | def escape_datetime(item, context):
23 |     server_tz = timezone(context.server_info.timezone)
24 | 
25 |     if item.tzinfo is not None:
26 |         item = item.astimezone(server_tz)
27 | 
28 |     return "'%s'" % item.strftime('%Y-%m-%d %H:%M:%S')
29 | 
30 | 
31 | def escape_param(item, context):
32 |     if item is None:
33 |         return 'NULL'
34 | 
35 |     elif isinstance(item, datetime):
36 |         return escape_datetime(item, context)
37 | 
38 |     elif isinstance(item, date):
39 |         return "'%s'" % item.strftime('%Y-%m-%d')
40 | 
41 |     elif isinstance(item, str):
42 |         return "'%s'" % ''.join(escape_chars_map.get(c, c) for c in item)
43 | 
44 |     elif isinstance(item, list):
45 |         return "[%s]" % ', '.join(str(escape_param(x, context)) for x in item)
46 | 
47 |     elif isinstance(item, tuple):
48 |         return "(%s)" % ', '.join(str(escape_param(x, context)) for x in item)
49 | 
50 |     elif isinstance(item, Enum):
51 |         return escape_param(item.value, context)
52 | 
53 |     elif isinstance(item, UUID):
54 |         return "'%s'" % str(item)
55 | 
56 |     else:
57 |         return item
58 | 
59 | 
60 | def escape_params(params, context):
61 |     escaped = {}
62 | 
63 |     for key, value in params.items():
64 |         escaped[key] = escape_param(value, context)
65 | 
66 |     return escaped
67 | 


--------------------------------------------------------------------------------
/proton_driver/util/helpers.py:
--------------------------------------------------------------------------------
 1 | from itertools import islice, tee
 2 | 
 3 | 
 4 | def chunks(seq, n):
 5 |     # islice is MUCH slower than slice for lists and tuples.
 6 |     if isinstance(seq, (list, tuple)):
 7 |         i = 0
 8 |         item = seq[i:i+n]
 9 |         while item:
10 |             yield list(item)
11 |             i += n
12 |             item = seq[i:i+n]
13 | 
14 |     else:
15 |         it = iter(seq)
16 |         item = list(islice(it, n))
17 |         while item:
18 |             yield item
19 |             item = list(islice(it, n))
20 | 
21 | 
22 | def pairwise(iterable):
23 |     a, b = tee(iterable)
24 |     next(b, None)
25 |     return zip(a, b)
26 | 
27 | 
28 | def column_chunks(columns, n):
29 |     for column in columns:
30 |         if not isinstance(column, (list, tuple)):
31 |             raise TypeError(
32 |                 'Unsupported column type: {}. list or tuple is expected.'
33 |                 .format(type(column))
34 |             )
35 | 
36 |     # create chunk generator for every column
37 |     g = [chunks(column, n) for column in columns]
38 | 
39 |     while True:
40 |         # get next chunk for every column
41 |         item = [next(column, []) for column in g]
42 |         if not any(item):
43 |             break
44 |         yield item
45 | 
46 | 
47 | # from paste.deploy.converters
48 | def asbool(obj):
49 |     if isinstance(obj, str):
50 |         obj = obj.strip().lower()
51 |         if obj in ['true', 'yes', 'on', 'y', 't', '1']:
52 |             return True
53 |         elif obj in ['false', 'no', 'off', 'n', 'f', '0']:
54 |             return False
55 |         else:
56 |             raise ValueError('String is not true/false: %r' % obj)
57 |     return bool(obj)
58 | 


--------------------------------------------------------------------------------
/proton_driver/varint.pyx:
--------------------------------------------------------------------------------
 1 | from cpython cimport PyBytes_FromStringAndSize
 2 | 
 3 | 
 4 | def make_varint(unsigned long long number):
 5 |     """
 6 |     Writes integer of variable length using LEB128.
 7 |     """
 8 |     cdef unsigned char to_write, i = 0
 9 |     # unsigned PY_LONG_LONG checks integer on function call and
10 |     # raises OverflowError if integer overflows unsigned PY_LONG_LONG.
11 |     # Long enough for handling unsigned PY_LONG_LONG.
12 |     cdef unsigned char num_buf[32]
13 | 
14 |     while True:
15 |         to_write = number & 0x7f
16 |         number >>= 7
17 |         if number:
18 |             num_buf[i] = to_write | 0x80
19 |             i += 1
20 |         else:
21 |             num_buf[i] = to_write
22 |             i += 1
23 |             break
24 | 
25 |     return PyBytes_FromStringAndSize(<char *>num_buf, i)
26 | 
27 | 
28 | def write_varint(unsigned long long number, buf):
29 |     """
30 |     Writes integer of variable length using LEB128.
31 |     """
32 |     cdef unsigned char to_write, i = 0
33 |     # unsigned PY_LONG_LONG checks integer on function call and
34 |     # raises OverflowError if integer overflows unsigned PY_LONG_LONG.
35 |     # Long enough for handling unsigned PY_LONG_LONG.
36 |     cdef unsigned char num_buf[32]
37 | 
38 |     while True:
39 |         to_write = number & 0x7f
40 |         number >>= 7
41 |         if number:
42 |             num_buf[i] = to_write | 0x80
43 |             i += 1
44 |         else:
45 |             num_buf[i] = to_write
46 |             i += 1
47 |             break
48 | 
49 |     buf.write(PyBytes_FromStringAndSize(<char *>num_buf, i))
50 | 
51 | 
52 | def read_varint(f):
53 |     """
54 |     Reads integer of variable length using LEB128.
55 |     """
56 |     cdef unsigned char shift = 0
57 |     cdef unsigned long long i, result = 0
58 | 
59 |     read_one = f.read_one
60 | 
61 |     while True:
62 |         i = read_one()
63 |         result |= (i & 0x7f) << shift
64 |         shift += 7
65 |         if i < 0x80:
66 |             break
67 | 
68 |     return result
69 | 


--------------------------------------------------------------------------------
/proton_driver/writer.py:
--------------------------------------------------------------------------------
 1 | import struct
 2 | 
 3 | from .varint import write_varint
 4 | 
 5 | 
 6 | MAX_UINT64 = (1 << 64) - 1
 7 | MAX_INT64 = (1 << 63) - 1
 8 | 
 9 | 
10 | def _byte(b):
11 |     return bytes((b, ))
12 | 
13 | 
14 | def write_binary_str(text, buf):
15 |     text = text.encode('utf-8')
16 |     write_binary_bytes(text, buf)
17 | 
18 | 
19 | def write_binary_bytes(text, buf):
20 |     write_varint(len(text), buf)
21 |     buf.write(text)
22 | 
23 | 
24 | def write_binary_int(number, buf, fmt):
25 |     """
26 |     Writes int from buffer with provided format.
27 |     """
28 |     fmt = '<' + fmt
29 |     buf.write(struct.pack(fmt, number))
30 | 
31 | 
32 | def write_binary_int8(number, buf):
33 |     write_binary_int(number, buf, 'b')
34 | 
35 | 
36 | def write_binary_int16(number, buf):
37 |     write_binary_int(number, buf, 'h')
38 | 
39 | 
40 | def write_binary_int32(number, buf):
41 |     write_binary_int(number, buf, 'i')
42 | 
43 | 
44 | def write_binary_int64(number, buf):
45 |     write_binary_int(number, buf, 'q')
46 | 
47 | 
48 | def write_binary_uint8(number, buf):
49 |     write_binary_int(number, buf, 'B')
50 | 
51 | 
52 | def write_binary_uint16(number, buf):
53 |     write_binary_int(number, buf, 'H')
54 | 
55 | 
56 | def write_binary_uint32(number, buf):
57 |     write_binary_int(number, buf, 'I')
58 | 
59 | 
60 | def write_binary_uint64(number, buf):
61 |     write_binary_int(number, buf, 'Q')
62 | 
63 | 
64 | def write_binary_uint128(number, buf):
65 |     fmt = '<QQ'
66 |     packed = struct.pack(fmt, (number >> 64) & MAX_UINT64, number & MAX_UINT64)
67 |     buf.write(packed)
68 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | # Flake8 default compliance with specification PEP8
 3 | line-length = 79
 4 | exclude = '\.git|\.hg|\.mypy_cache|\.tox|\.venv|venv|_build|buck-out|build|dist'
 5 | skip-string-normalization = true
 6 | 
 7 | [tool.cibuildwheel]
 8 | build = "*"
 9 | skip = ""
10 | test-skip = ""
11 | 
12 | archs = ["auto64"]
13 | build-frontend = "default"
14 | config-settings = {}
15 | dependency-versions = "pinned"
16 | environment = {}
17 | environment-pass = []
18 | build-verbosity = 0
19 | 
20 | before-all = ""
21 | before-build = ""
22 | repair-wheel-command = ""
23 | 
24 | test-command = ""
25 | before-test = ""
26 | test-requires = []
27 | test-extras = []
28 | 
29 | container-engine = "docker"
30 | 
31 | manylinux-x86_64-image = "manylinux2014"
32 | manylinux-i686-image = "manylinux2014"
33 | manylinux-aarch64-image = "manylinux2014"
34 | manylinux-ppc64le-image = "manylinux2014"
35 | manylinux-s390x-image = "manylinux2014"
36 | manylinux-pypy_x86_64-image = "manylinux2014"
37 | manylinux-pypy_i686-image = "manylinux2014"
38 | manylinux-pypy_aarch64-image = "manylinux2014"
39 | 
40 | musllinux-x86_64-image = "musllinux_1_1"
41 | musllinux-i686-image = "musllinux_1_1"
42 | musllinux-aarch64-image = "musllinux_1_1"
43 | musllinux-ppc64le-image = "musllinux_1_1"
44 | musllinux-s390x-image = "musllinux_1_1"
45 | 
46 | 
47 | [tool.cibuildwheel.linux]
48 | repair-wheel-command = "auditwheel repair -w {dest_dir} {wheel}"
49 | archs = [ "aarch64", "x86_64" ]
50 | 
51 | [tool.cibuildwheel.macos]
52 | repair-wheel-command = "delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel}"
53 | archs = [ "arm64", "x86_64" ]
54 | 
55 | [tool.cibuildwheel.windows]
56 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [db]
 2 | host=localhost
 3 | port=8463
 4 | database=default
 5 | user=default
 6 | password=
 7 | compression=lz4,lz4hc,zstd
 8 | client=proton-client
 9 | 
10 | [log]
11 | level=ERROR
12 | 
13 | [bdist_wheel]
14 | universal = 0
15 | 
16 | [metadata]
17 | license_file = LICENSE
18 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | from codecs import open
  4 | 
  5 | from setuptools import setup, find_packages
  6 | from distutils.extension import Extension
  7 | 
  8 | try:
  9 |     from Cython.Build import cythonize
 10 | except ImportError:
 11 |     USE_CYTHON = False
 12 | else:
 13 |     USE_CYTHON = True
 14 | 
 15 | CYTHON_TRACE = bool(os.getenv('CYTHON_TRACE', False))
 16 | 
 17 | here = os.path.abspath(os.path.dirname(__file__))
 18 | 
 19 | 
 20 | def read_version():
 21 |     regexp = re.compile(r'^VERSION\W*=\W*\(([^\(\)]*)\)')
 22 |     init_py = os.path.join(here, 'proton_driver', '__init__.py')
 23 |     with open(init_py, encoding='utf-8') as f:
 24 |         for line in f:
 25 |             match = regexp.match(line)
 26 |             if match is not None:
 27 |                 return match.group(1).replace(', ', '.')
 28 |         else:
 29 |             raise RuntimeError(
 30 |                 'Cannot find version in proton_driver/__init__.py'
 31 |             )
 32 | 
 33 | 
 34 | with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:
 35 |     long_description = f.read()
 36 | 
 37 | # Prepare extensions.
 38 | ext = '.pyx' if USE_CYTHON else '.c'
 39 | extensions = [
 40 |     Extension(
 41 |         'proton_driver.bufferedreader',
 42 |         ['proton_driver/bufferedreader' + ext]
 43 |     ),
 44 |     Extension(
 45 |         'proton_driver.bufferedwriter',
 46 |         ['proton_driver/bufferedwriter' + ext]
 47 |     ),
 48 |     Extension(
 49 |         'proton_driver.columns.largeint',
 50 |         ['proton_driver/columns/largeint' + ext]
 51 |     ),
 52 |     Extension(
 53 |         'proton_driver.varint',
 54 |         ['proton_driver/varint' + ext]
 55 |     )
 56 | ]
 57 | 
 58 | if USE_CYTHON:
 59 |     compiler_directives = {'language_level': '3'}
 60 |     if CYTHON_TRACE:
 61 |         compiler_directives['linetrace'] = True
 62 | 
 63 |     extensions = cythonize(extensions, compiler_directives=compiler_directives)
 64 | 
 65 | setup(
 66 |     name='proton-driver',
 67 |     version=read_version(),
 68 | 
 69 |     description='Python driver with native interface for Proton',
 70 |     long_description=long_description,
 71 | 
 72 |     url='https://github.com/timeplus-io/proton-python-driver',
 73 | 
 74 |     author='Gang Tao',
 75 |     author_email='gang@timeplus.com',
 76 | 
 77 |     license='MIT',
 78 | 
 79 |     classifiers=[
 80 |         'Development Status :: 4 - Beta',
 81 | 
 82 | 
 83 |         'Environment :: Console',
 84 | 
 85 | 
 86 |         'Intended Audience :: Developers',
 87 |         'Intended Audience :: Information Technology',
 88 | 
 89 | 
 90 |         'License :: OSI Approved :: MIT License',
 91 | 
 92 | 
 93 |         'Operating System :: OS Independent',
 94 | 
 95 | 
 96 |         'Programming Language :: SQL',
 97 |         'Programming Language :: Python :: 3',
 98 |         'Programming Language :: Python :: 3.8',
 99 |         'Programming Language :: Python :: 3.9',
100 |         'Programming Language :: Python :: 3.10',
101 |         'Programming Language :: Python :: 3.11',
102 |         'Programming Language :: Python :: 3.12',
103 |         'Programming Language :: Python :: 3.13',
104 |         'Programming Language :: Python :: Implementation :: PyPy',
105 | 
106 |         'Topic :: Database',
107 |         'Topic :: Software Development',
108 |         'Topic :: Software Development :: Libraries',
109 |         'Topic :: Software Development :: Libraries :: Application Frameworks',
110 |         'Topic :: Software Development :: Libraries :: Python Modules',
111 |         'Topic :: Scientific/Engineering :: Information Analysis'
112 |     ],
113 | 
114 |     keywords='Proton db database cloud analytics',
115 | 
116 |     packages=find_packages('.', exclude=['tests*']),
117 |     python_requires='>=3.8, <4',
118 |     install_requires=[
119 |         'pytz',
120 |         'tzlocal',
121 |         'tzlocal<2.1; python_version=="3.5"'
122 |     ],
123 |     ext_modules=extensions,
124 |     extras_require={
125 |         'lz4': [
126 |             'lz4<=3.0.1; implementation_name=="pypy"',
127 |             'lz4; implementation_name!="pypy"',
128 |             'clickhouse-cityhash>=1.0.2.1'
129 |         ],
130 |         'zstd': ['zstd', 'clickhouse-cityhash>=1.0.2.1'],
131 |         'numpy': ['numpy>=1.12.0', 'pandas>=0.24.0']
132 |     },
133 |     test_suite='pytest'
134 | )
135 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/tests/__init__.py


--------------------------------------------------------------------------------
/tests/columns/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/tests/columns/__init__.py


--------------------------------------------------------------------------------
/tests/columns/test_bool.py:
--------------------------------------------------------------------------------
 1 | from tests.testcase import BaseTestCase
 2 | from proton_driver import errors
 3 | 
 4 | 
 5 | class BoolTestCase(BaseTestCase):
 6 |     # required_server_version = (21, 12)
 7 | 
 8 |     def test_simple(self):
 9 |         columns = ("a bool")
10 | 
11 |         data = [(1,), (0,), (True,), (False,), (None,), ("False",), ("",)]
12 |         with self.create_stream(columns):
13 |             self.client.execute('INSERT INTO test (a) VALUES', data)
14 | 
15 |             query = 'SELECT * FROM test'
16 |             inserted = self.emit_cli(query)
17 |             self.assertEqual(
18 |                 inserted, (
19 |                     'true\n'
20 |                     'false\n'
21 |                     'true\n'
22 |                     'false\n'
23 |                     'false\n'
24 |                     'true\n'
25 |                     'false\n'
26 |                 )
27 |             )
28 | 
29 |             inserted = self.client.execute(query)
30 |             self.assertEqual(
31 |                 inserted, [
32 |                     (True, ),
33 |                     (False, ),
34 |                     (True, ),
35 |                     (False, ),
36 |                     (False, ),
37 |                     (True, ),
38 |                     (False, ),
39 |                 ]
40 |             )
41 | 
42 |     def test_errors(self):
43 |         columns = "a bool"
44 |         with self.create_stream(columns):
45 |             with self.assertRaises(errors.TypeMismatchError):
46 |                 self.client.execute(
47 |                     'INSERT INTO test (a) VALUES', [(1, )],
48 |                     types_check=True
49 |                 )
50 | 
51 |     def test_nullable(self):
52 |         columns = "a nullable(bool)"
53 | 
54 |         data = [(None, ), (True, ), (False, )]
55 |         with self.create_stream(columns):
56 |             self.client.execute('INSERT INTO test (a) VALUES', data)
57 | 
58 |             query = 'SELECT * FROM test'
59 |             inserted = self.emit_cli(query)
60 |             self.assertEqual(
61 |                 inserted, (
62 |                     '\\N\ntrue\nfalse\n'
63 |                 )
64 |             )
65 | 
66 |             inserted = self.client.execute(query)
67 |             self.assertEqual(
68 |                 inserted, [
69 |                     (None, ), (True, ), (False, ),
70 |                 ]
71 |             )
72 | 


--------------------------------------------------------------------------------
/tests/columns/test_common.py:
--------------------------------------------------------------------------------
 1 | from tests.testcase import BaseTestCase
 2 | 
 3 | 
 4 | class CommonTestCase(BaseTestCase):
 5 |     client_kwargs = {'settings': {'insert_block_size': 1}}
 6 | 
 7 |     def setUp(self):
 8 |         super(CommonTestCase, self).setUp()
 9 | 
10 |         self.send_data_count = 0
11 |         old_send_data = self.client.connection.send_data
12 | 
13 |         def send_data(*args, **kwargs):
14 |             self.send_data_count += 1
15 |             return old_send_data(*args, **kwargs)
16 | 
17 |         self.client.connection.send_data = send_data
18 | 
19 |     def test_insert_block_size(self):
20 |         with self.create_stream('a uint8'):
21 |             data = [(x, ) for x in range(4)]
22 |             self.client.execute(
23 |                 'INSERT INTO test (a) VALUES', data
24 |             )
25 |             # Two empty blocks: for end of sending external tables
26 |             # and data.
27 |             self.assertEqual(self.send_data_count, 4 + 2)
28 | 
29 |             query = 'SELECT * FROM test'
30 |             inserted = self.emit_cli(query)
31 |             self.assertEqual(inserted, '0\n1\n2\n3\n')
32 |             inserted = self.client.execute(query)
33 |             self.assertEqual(inserted, data)
34 | 
35 |     def test_columnar_insert_block_size(self):
36 |         with self.create_stream('a uint8'):
37 |             data = [(0, 1, 2, 3)]
38 |             self.client.execute(
39 |                 'INSERT INTO test (a) VALUES', data, columnar=True
40 |             )
41 |             # Two empty blocks: for end of sending external tables
42 |             # and data.
43 |             self.assertEqual(self.send_data_count, 4 + 2)
44 | 
45 |             query = 'SELECT * FROM test'
46 |             inserted = self.emit_cli(query)
47 |             self.assertEqual(inserted, '0\n1\n2\n3\n')
48 |             inserted = self.client.execute(query)
49 |             expected = [(0, ), (1, ), (2, ), (3, )]
50 |             self.assertEqual(inserted, expected)
51 | 


--------------------------------------------------------------------------------
/tests/columns/test_date.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from datetime import date, datetime
 3 | from unittest.mock import patch
 4 | 
 5 | from freezegun import freeze_time
 6 | 
 7 | from tests.testcase import BaseTestCase
 8 | 
 9 | 
10 | class DateTestCase(BaseTestCase):
11 |     @freeze_time('2017-03-05 03:00:00')
12 |     def test_do_not_use_timezone(self):
13 |         with self.create_stream('a Date'):
14 |             data = [(date(1970, 1, 2), )]
15 |             self.client.execute(
16 |                 'INSERT INTO test (a) VALUES', data
17 |             )
18 | 
19 |             query = 'SELECT * FROM test'
20 |             inserted = self.emit_cli(query)
21 |             self.assertEqual(inserted, '1970-01-02\n')
22 | 
23 |             with patch.dict(os.environ, {'TZ': 'US/Hawaii'}):
24 |                 inserted = self.client.execute(query)
25 |                 self.assertEqual(inserted, data)
26 | 
27 |     def test_insert_datetime_to_date(self):
28 |         with self.create_stream('a Date'):
29 |             testTime = datetime(2015, 6, 6, 12, 30, 54)
30 |             self.client.execute(
31 |                 'INSERT INTO test (a) VALUES', [(testTime, )]
32 |             )
33 |             query = 'SELECT * FROM test'
34 |             inserted = self.emit_cli(query)
35 |             self.assertEqual(inserted, '2015-06-06\n')
36 | 
37 |     def test_wrong_date_insert(self):
38 |         with self.create_stream('a Date'):
39 |             data = [
40 |                 (date(5555, 1, 1), ),
41 |                 (date(1, 1, 1), ),
42 |                 (date(2149, 6, 7), )
43 |             ]
44 |             self.client.execute('INSERT INTO test (a) VALUES', data)
45 |             query = 'SELECT * FROM test'
46 |             inserted = self.emit_cli(query)
47 |             expected = (3 * '1970-01-01\n')
48 |             self.assertEqual(inserted, expected)
49 | 
50 |     def test_boundaries(self):
51 | 
52 |         with self.create_stream('a Date'):
53 |             data = [
54 |                 (date(1970, 1, 1), ),
55 |                 ((date(2149, 6, 6), ))
56 |             ]
57 |             self.client.execute('INSERT INTO test (a) VALUES', data)
58 | 
59 |             query = 'SELECT * FROM test'
60 |             inserted = self.emit_cli(query)
61 |             expected = '1970-01-01\n2149-06-06\n'
62 |             self.assertEqual(inserted, expected)
63 | 
64 |             inserted = self.client.execute(query)
65 |             self.assertEqual(inserted, data)
66 | 
67 | 
68 | class Date32TestCase(BaseTestCase):
69 |     # required_server_version = (21, 9)
70 | 
71 |     def test_wrong_date_insert(self):
72 |         with self.create_stream('a Date32'):
73 |             data = [
74 |                 (date(5555, 1, 1), ),
75 |                 (date(1, 1, 1), ),
76 |                 (date(2284, 1, 1), )
77 |             ]
78 |             self.client.execute('INSERT INTO test (a) VALUES', data)
79 |             query = 'SELECT * FROM test'
80 |             inserted = self.emit_cli(query)
81 |             self.assertEqual(inserted, '1970-01-01\n1970-01-01\n1970-01-01\n')
82 | 
83 |     def test_boundaries(self):
84 |         with self.create_stream('a Date32'):
85 |             data = [(date(1925, 1, 1), ), (date(2283, 11, 11), )]
86 |             self.client.execute('INSERT INTO test (a) VALUES', data)
87 | 
88 |             query = 'SELECT * FROM test'
89 |             inserted = self.emit_cli(query)
90 |             self.assertEqual(inserted, '1925-01-01\n2283-11-11\n')
91 | 
92 |             inserted = self.client.execute(query)
93 |             self.assertEqual(inserted, data)
94 | 


--------------------------------------------------------------------------------
/tests/columns/test_float.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | from tests.testcase import BaseTestCase
 4 | from proton_driver import errors
 5 | 
 6 | 
 7 | class FloatTestCase(BaseTestCase):
 8 |     def test_chop_to_type(self):
 9 |         with self.create_stream('a float32, b float64'):
10 |             data = [
11 |                 (3.4028235e38, 3.4028235e38),
12 |                 (3.4028235e39, 3.4028235e39),
13 |                 (-3.4028235e39, 3.4028235e39),
14 |                 (1, 2)
15 |             ]
16 | 
17 |             with self.assertRaises(errors.TypeMismatchError) as e:
18 |                 self.client.execute(
19 |                     'INSERT INTO test (a, b) VALUES', data
20 |                 )
21 | 
22 |             self.assertIn('Column a', str(e.exception))
23 | 
24 |     def test_simple(self):
25 |         with self.create_stream('a float32, b float64'):
26 |             data = [
27 |                 (3.4028235e38, 3.4028235e38),
28 |                 (3.4028235e39, 3.4028235e39),
29 |                 (-3.4028235e39, 3.4028235e39),
30 |                 (1, 2)
31 |             ]
32 |             self.client.execute(
33 |                 'INSERT INTO test (a, b) VALUES', data, types_check=True
34 |             )
35 | 
36 |             query = 'SELECT * FROM test'
37 |             inserted = self.emit_cli(query)
38 |             self.assertEqual(
39 |                 inserted, (
40 |                     '3.4028235e38\t3.4028235e38\n'
41 |                     'inf\t3.4028235e39\n'
42 |                     '-inf\t3.4028235e39\n'
43 |                     '1\t2\n'
44 |                 )
45 |             )
46 | 
47 |             inserted = self.client.execute(query)
48 |             self.assertEqual(inserted, [
49 |                 (3.4028234663852886e+38, 3.4028235e38),
50 |                 (float('inf'), 3.4028235e39),
51 |                 (-float('inf'), 3.4028235e39),
52 |                 (1, 2)
53 |             ])
54 | 
55 |     def test_nullable(self):
56 |         with self.create_stream('a nullable(float32)'):
57 |             data = [(None, ), (0.5, ), (None, ), (1.5, )]
58 |             self.client.execute(
59 |                 'INSERT INTO test (a) VALUES', data
60 |             )
61 | 
62 |             query = 'SELECT * FROM test'
63 |             inserted = self.emit_cli(query)
64 |             self.assertEqual(inserted, '\\N\n0.5\n\\N\n1.5\n')
65 | 
66 |             inserted = self.client.execute(query)
67 |             self.assertEqual(inserted, data)
68 | 
69 |     def test_nan(self):
70 |         with self.create_stream('a float32'):
71 |             data = [(float('nan'), ), (0.5, )]
72 |             self.client.execute(
73 |                 'INSERT INTO test (a) VALUES', data
74 |             )
75 | 
76 |             query = 'SELECT * FROM test'
77 |             inserted = self.emit_cli(query)
78 |             self.assertEqual(inserted, 'nan\n0.5\n')
79 | 
80 |             inserted = self.client.execute(query)
81 |             self.assertEqual(len(inserted), 2)
82 |             self.assertTrue(math.isnan(inserted[0][0]))
83 |             self.assertEqual(inserted[1][0], 0.5)
84 | 


--------------------------------------------------------------------------------
/tests/columns/test_interval.py:
--------------------------------------------------------------------------------
 1 | from tests.testcase import BaseTestCase
 2 | 
 3 | 
 4 | class IntervalTestCase(BaseTestCase):
 5 |     required_server_version = (1, 1, 54310)
 6 | 
 7 |     def test_all(self):
 8 |         interval = [
 9 |             ('YEAR', 1),
10 |             ('MONTH', 2),
11 |             ('WEEK', 3),
12 |             ('DAY', 4),
13 |             ('HOUR', 5),
14 |             ('MINUTE', 6),
15 |             ('SECOND', 7)
16 |         ]
17 |         columns = ', '.join(['INTERVAL {} {}'.format(v, k)
18 |                              for k, v in interval])
19 |         query = 'SELECT {}'.format(columns)
20 | 
21 |         cli_result = self.emit_cli(query)
22 |         self.assertEqual(cli_result, '1\t2\t3\t4\t5\t6\t7\n')
23 | 
24 |         client_result = self.client.execute(query)
25 |         self.assertEqual(client_result, [(1, 2, 3, 4, 5, 6, 7)])
26 | 


--------------------------------------------------------------------------------
/tests/columns/test_json.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from time import sleep
 3 | from tests.testcase import BaseTestCase
 4 | 
 5 | 
 6 | class JSONTestCase(BaseTestCase):
 7 |     def test_simple(self):
 8 |         rv = self.client.execute("SELECT '{\"bb\": {\"cc\": [255, 1]}}'::json")
 9 |         self.assertEqual(rv, [({'bb': {'cc': [255, 1]}},)])
10 | 
11 |     def test_from_table(self):
12 |         self.emit_cli('CREATE STREAM test (a json)')
13 |         data = [
14 |             ({},),
15 |             ({'key1': 1}, ),
16 |             ({'key1': 2.1, 'key2': {'nested': 'key'}}, ),
17 |             ({'key1': 3, 'key3': ['test'], 'key4': [10, 20]}, )
18 |         ]
19 |         self.client.execute('INSERT INTO test (a) VALUES', data)
20 |         sleep(3)
21 |         query = 'SELECT a FROM table(test)'
22 |         inserted = self.client.execute(query)
23 |         self.assertEqual(
24 |             inserted,
25 |             [
26 |                 ((0.0, ('',), [], []),),
27 |                 ((1.0, ('',), [], []),),
28 |                 ((2.1, ('key',), [], []),),
29 |                 ((3.0, ('',), ['test'], [10, 20]),)
30 |             ]
31 |         )
32 |         inserted = self.client.execute(
33 |             query, settings=dict(namedtuple_as_json=True)
34 |         )
35 |         data_with_all_keys = [
36 |             ({'key1': 0, 'key2': {'nested': ''}, 'key3': [], 'key4': []},),
37 |             ({'key1': 1, 'key2': {'nested': ''}, 'key3': [], 'key4': []},),
38 |             ({'key1': 2.1, 'key2': {'nested': 'key'}, 'key3': [],
39 |                 'key4': []},),
40 |             ({'key1': 3, 'key2': {'nested': ''}, 'key3': ['test'],
41 |                 'key4': [10, 20]},)
42 |         ]
43 |         self.assertEqual(inserted, data_with_all_keys)
44 |         self.emit_cli('DROP STREAM test')
45 | 
46 |     def test_insert_json_strings(self):
47 |         self.emit_cli('CREATE STREAM test (a json)')
48 |         data = [
49 |             (json.dumps({'i-am': 'dumped json'}),),
50 |         ]
51 |         self.client.execute('INSERT INTO test (a) VALUES', data)
52 |         sleep(3)
53 |         query = 'SELECT a FROM table(test)'
54 |         inserted = self.client.execute(query)
55 |         self.assertEqual(
56 |             inserted,
57 |             [(('dumped json',),)]
58 |         )
59 |         inserted = self.client.execute(
60 |             query, settings=dict(namedtuple_as_json=True)
61 |         )
62 |         data_with_all_keys = [
63 |             ({'`i-am`': 'dumped json'},)
64 |         ]
65 |         self.assertEqual(inserted, data_with_all_keys)
66 |         self.emit_cli('DROP STREAM test')
67 | 
68 |     def test_json_as_named_tuple(self):
69 |         settings = {'namedtuple_as_json': True}
70 |         query = 'SELECT a FROM table(test)'
71 | 
72 |         self.emit_cli('CREATE STREAM test (a json)')
73 |         data = [
74 |             ({'key': 'value'}, ),
75 |         ]
76 |         self.client.execute('INSERT INTO test (a) VALUES', data)
77 |         sleep(3)
78 |         inserted = self.client.execute(query)
79 |         self.assertEqual(inserted, [(('value',),)])
80 | 
81 |         with self.created_client(settings=settings) as client:
82 |             inserted = client.execute(query)
83 |             self.assertEqual(inserted, data)
84 |         self.emit_cli('DROP STREAM test')
85 | 


--------------------------------------------------------------------------------
/tests/columns/test_nested.py:
--------------------------------------------------------------------------------
  1 | from tests.testcase import BaseTestCase
  2 | from proton_driver.columns.util import (
  3 |     get_inner_spec,
  4 |     get_inner_columns,
  5 |     get_inner_columns_with_types
  6 | )
  7 | 
  8 | 
  9 | class NestedTestCase(BaseTestCase):
 10 |     def entuple(self, lst):
 11 |         return tuple(
 12 |             self.entuple(x) if isinstance(x, list) else x for x in lst
 13 |         )
 14 | 
 15 |     def test_simple(self):
 16 |         columns = 'n nested(i int32, s string)'
 17 | 
 18 |         # INSERT INTO test_nested VALUES ([(0, 'a'), (1, 'b')]);
 19 |         data = [([(0, 'a'), (1, 'b')],)]
 20 | 
 21 |         with self.create_stream(columns, flatten_nested=0):
 22 |             self.client.execute(
 23 |                 'INSERT INTO test (n) VALUES', data
 24 |             )
 25 | 
 26 |             query = 'SELECT * FROM test'
 27 |             inserted = self.emit_cli(query)
 28 |             self.assertEqual(inserted, "[(0,'a'),(1,'b')]\n")
 29 | 
 30 |             inserted = self.client.execute(query)
 31 |             self.assertEqual(inserted, data)
 32 | 
 33 |             projected_i = self.client.execute('SELECT n.i FROM test')
 34 |             self.assertEqual(
 35 |                 projected_i,
 36 |                 [([0, 1],)]
 37 |             )
 38 | 
 39 |             projected_s = self.client.execute('SELECT n.s FROM test')
 40 |             self.assertEqual(
 41 |                 projected_s,
 42 |                 [(['a', 'b'],)]
 43 |             )
 44 | 
 45 |     def test_multiple_rows(self):
 46 |         columns = 'n nested(i int32, s string)'
 47 | 
 48 |         data = [([(0, 'a'), (1, 'b')],), ([(3, 'd'), (4, 'e')],)]
 49 | 
 50 |         with self.create_stream(columns, flatten_nested=0):
 51 |             self.client.execute(
 52 |                 'INSERT INTO test (n) VALUES', data
 53 |             )
 54 | 
 55 |             query = 'SELECT * FROM test'
 56 |             inserted = self.emit_cli(query)
 57 |             self.assertEqual(
 58 |                 inserted,
 59 |                 "[(0,'a'),(1,'b')]\n[(3,'d'),(4,'e')]\n"
 60 |             )
 61 | 
 62 |             inserted = self.client.execute(query)
 63 |             self.assertEqual(inserted, data)
 64 | 
 65 |     def test_dict(self):
 66 |         columns = 'n nested(i int32, s string)'
 67 | 
 68 |         data = [
 69 |             {'n': [{'i': 0, 's': 'a'}, {'i': 1, 's': 'b'}]},
 70 |             {'n': [{'i': 3, 's': 'd'}, {'i': 4, 's': 'e'}]},
 71 |         ]
 72 | 
 73 |         with self.create_stream(columns, flatten_nested=0):
 74 |             self.client.execute(
 75 |                 'INSERT INTO test (n) VALUES', data
 76 |             )
 77 | 
 78 |             query = 'SELECT * FROM test'
 79 |             inserted = self.emit_cli(query)
 80 |             self.assertEqual(
 81 |                 inserted,
 82 |                 "[(0,'a'),(1,'b')]\n[(3,'d'),(4,'e')]\n"
 83 |             )
 84 | 
 85 |             inserted = self.client.execute(query)
 86 |             self.assertEqual(
 87 |                 inserted,
 88 |                 [([(0, 'a'), (1, 'b')],), ([(3, 'd'), (4, 'e')],)]
 89 |             )
 90 | 
 91 |     def test_get_nested_columns(self):
 92 |         spec = 'nested(a tuple(array(int8)),\n b nullable(string))'
 93 |         columns = get_inner_columns('nested', spec)
 94 |         self.assertEqual(
 95 |             columns,
 96 |             ['tuple(array(int8))', 'nullable(string)']
 97 |         )
 98 | 
 99 |     def test_get_columns_with_types(self):
100 |         spec = 'nested(a tuple(array(int8)),\n b nullable(string))'
101 |         columns = get_inner_columns_with_types('nested', spec)
102 |         self.assertEqual(
103 |             columns,
104 |             [('a', 'tuple(array(int8))'), ('b', 'nullable(string)')]
105 |         )
106 | 
107 |     def test_get_inner_spec(self):
108 |         inner = 'a tuple(array(int8), array(int64)), b nullable(string)'
109 |         self.assertEqual(
110 |             get_inner_spec('nested', 'nested({}) dummy '.format(inner)),
111 |             inner
112 |         )
113 | 


--------------------------------------------------------------------------------
/tests/columns/test_null.py:
--------------------------------------------------------------------------------
1 | from tests.testcase import BaseTestCase
2 | 
3 | 
4 | class NullTestCase(BaseTestCase):
5 |     def test_select_null(self):
6 |         rv = self.client.execute('SELECT NULL')
7 |         self.assertEqual(rv, [(None, )])
8 | 


--------------------------------------------------------------------------------
/tests/columns/test_nullable.py:
--------------------------------------------------------------------------------
 1 | from tests.testcase import BaseTestCase
 2 | from proton_driver import errors
 3 | 
 4 | ErrorCodes = errors.ErrorCodes
 5 | 
 6 | 
 7 | class nullableTestCase(BaseTestCase):
 8 |     def test_simple(self):
 9 |         columns = 'a nullable(int32)'
10 | 
11 |         data = [(3, ), (None, ), (2, )]
12 |         with self.create_stream(columns):
13 |             self.client.execute(
14 |                 'INSERT INTO test (a) VALUES', data
15 |             )
16 | 
17 |             query = 'SELECT * FROM test'
18 |             inserted = self.emit_cli(query)
19 |             self.assertEqual(
20 |                 inserted, '3\n\\N\n2\n'
21 |             )
22 | 
23 |             inserted = self.client.execute(query)
24 |             self.assertEqual(inserted, data)
25 | 
26 |     def test_nullable_inside_nullable(self):
27 |         columns = 'a nullable(nullable(int32))'
28 | 
29 |         with self.assertRaises(errors.ServerException) as e:
30 |             self.client.execute(
31 |                 'CREATE STREAM test ({}) ''ENGINE = Memory'.format(columns)
32 |             )
33 | 
34 |         self.assertEqual(e.exception.code, ErrorCodes.ILLEGAL_TYPE_OF_ARGUMENT)
35 | 
36 |     def test_nullable_array(self):
37 |         columns = 'a nullable(array(nullable(array(nullable(int32)))))'
38 | 
39 |         with self.assertRaises(errors.ServerException) as e:
40 |             self.client.execute(
41 |                 'CREATE STREAM test ({}) ''ENGINE = Memory'.format(columns)
42 |             )
43 | 
44 |         self.assertEqual(e.exception.code, ErrorCodes.ILLEGAL_TYPE_OF_ARGUMENT)
45 | 


--------------------------------------------------------------------------------
/tests/columns/test_simpleaggregatefunction.py:
--------------------------------------------------------------------------------
  1 | from enum import IntEnum
  2 | 
  3 | from tests.testcase import BaseTestCase
  4 | 
  5 | 
  6 | class SimpleAggregateFunctionTestCase(BaseTestCase):
  7 |     # required_server_version = (19, 8, 3)
  8 | 
  9 |     def test_simple(self):
 10 |         columns = 'a simple_aggregate_function(any, int32)'
 11 | 
 12 |         data = [(3, ), (2, )]
 13 |         with self.create_stream(columns):
 14 |             self.client.execute(
 15 |                 'INSERT INTO test (a) VALUES', data
 16 |             )
 17 | 
 18 |             query = 'SELECT * FROM test'
 19 |             inserted = self.emit_cli(query)
 20 |             self.assertEqual(
 21 |                 inserted, '3\n2\n'
 22 |             )
 23 | 
 24 |             inserted = self.client.execute(query)
 25 |             self.assertEqual(inserted, data)
 26 | 
 27 |     def test_nullable(self):
 28 |         columns = 'a simple_aggregate_function(any, nullable(int32))'
 29 | 
 30 |         data = [(3, ), (None, ), (2, )]
 31 |         with self.create_stream(columns):
 32 |             self.client.execute(
 33 |                 'INSERT INTO test (a) VALUES', data
 34 |             )
 35 | 
 36 |             query = 'SELECT * FROM test'
 37 |             inserted = self.emit_cli(query)
 38 |             self.assertEqual(
 39 |                 inserted, '3\n\\N\n2\n'
 40 |             )
 41 | 
 42 |             inserted = self.client.execute(query)
 43 |             self.assertEqual(inserted, data)
 44 | 
 45 |     def test_simple_agg_function(self):
 46 |         class A(IntEnum):
 47 |             hello = -1
 48 |             world = 2
 49 | 
 50 |         columns = "a simple_aggregate_function(any_last, " \
 51 |                   "enum8('hello' = -1, 'world' = 2))"
 52 | 
 53 |         data = [(A.hello,), (A.world,), (-1,), (2,)]
 54 |         with self.create_stream(columns):
 55 |             self.client.execute(
 56 |                 'INSERT INTO test (a) VALUES', data
 57 |             )
 58 | 
 59 |             query = 'SELECT * FROM test'
 60 |             inserted = self.emit_cli(query)
 61 |             self.assertEqual(
 62 |                 inserted, (
 63 |                     'hello\n'
 64 |                     'world\n'
 65 |                     'hello\n'
 66 |                     'world\n'
 67 |                 )
 68 |             )
 69 | 
 70 |             inserted = self.client.execute(query)
 71 |             self.assertEqual(
 72 |                 inserted, [
 73 |                     ('hello',), ('world',),
 74 |                     ('hello',), ('world',)
 75 |                 ]
 76 |             )
 77 | 
 78 |     def test_simple_agg_function_nullable(self):
 79 |         class A(IntEnum):
 80 |             hello = -1
 81 |             world = 2
 82 | 
 83 |         columns = "a simple_aggregate_function(any_last, " \
 84 |                   "nullable(enum8('hello' = -1, 'world' = 2)))"
 85 | 
 86 |         data = [(A.hello,), (A.world,), (None,), (-1,), (2,)]
 87 |         with self.create_stream(columns):
 88 |             self.client.execute(
 89 |                 'INSERT INTO test (a) VALUES', data
 90 |             )
 91 | 
 92 |             query = 'SELECT * FROM test'
 93 |             inserted = self.emit_cli(query)
 94 |             self.assertEqual(
 95 |                 inserted, (
 96 |                     'hello\n'
 97 |                     'world\n'
 98 |                     '\\N\n'
 99 |                     'hello\n'
100 |                     'world\n'
101 |                 )
102 |             )
103 | 
104 |             inserted = self.client.execute(query)
105 |             self.assertEqual(
106 |                 inserted, [
107 |                     ('hello',), ('world',),
108 |                     (None, ),
109 |                     ('hello',), ('world',)
110 |                 ]
111 |             )
112 | 


--------------------------------------------------------------------------------
/tests/columns/test_unknown.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from unittest import TestCase
 3 | 
 4 | from proton_driver import errors
 5 | from proton_driver.columns.service import get_column_by_spec
 6 | 
 7 | 
 8 | class UnknownColumnTestCase(TestCase):
 9 |     def test_get_unknown_column(self):
10 |         with self.assertRaises(errors.UnknownTypeError) as e:
11 |             get_column_by_spec('Unicorn', {'context': {}})
12 | 
13 |         self.assertIn('Unicorn', str(e.exception))
14 | 


--------------------------------------------------------------------------------
/tests/columns/test_uuid.py:
--------------------------------------------------------------------------------
 1 | from uuid import UUID
 2 | from tests.testcase import BaseTestCase
 3 | from proton_driver import errors
 4 | 
 5 | 
 6 | class UUIDTestCase(BaseTestCase):
 7 |     def test_simple(self):
 8 |         with self.create_stream('a uuid'):
 9 |             data = [
10 |                 (UUID('c0fcbba9-0752-44ed-a5d6-4dfb4342b89d'), ),
11 |                 ('2efcead4-ff55-4db5-bdb4-6b36a308d8e0', )
12 |             ]
13 |             self.client.execute(
14 |                 'INSERT INTO test (a) VALUES', data
15 |             )
16 | 
17 |             query = 'SELECT * FROM test'
18 |             inserted = self.emit_cli(query)
19 |             self.assertEqual(inserted, (
20 |                 'c0fcbba9-0752-44ed-a5d6-4dfb4342b89d\n'
21 |                 '2efcead4-ff55-4db5-bdb4-6b36a308d8e0\n'
22 |             ))
23 |             inserted = self.client.execute(query)
24 |             self.assertEqual(inserted, [
25 |                 (UUID('c0fcbba9-0752-44ed-a5d6-4dfb4342b89d'), ),
26 |                 (UUID('2efcead4-ff55-4db5-bdb4-6b36a308d8e0'), )
27 |             ])
28 | 
29 |     def test_type_mismatch(self):
30 |         data = [(62457709573696417404743346296141175008, )]
31 |         with self.create_stream('a uuid'):
32 |             with self.assertRaises(errors.TypeMismatchError):
33 |                 self.client.execute(
34 |                     'INSERT INTO test (a) VALUES', data, types_check=True
35 |                 )
36 |             with self.assertRaises(AttributeError):
37 |                 self.client.execute(
38 |                     'INSERT INTO test (a) VALUES', data
39 |                 )
40 | 
41 |     def test_bad_uuid(self):
42 |         data = [('a', )]
43 |         with self.create_stream('a uuid'):
44 |             with self.assertRaises(errors.CannotParseUuidError):
45 |                 self.client.execute(
46 |                     'INSERT INTO test (a) VALUES', data
47 |                 )
48 | 
49 |     def test_nullable(self):
50 |         with self.create_stream('a nullable(uuid)'):
51 |             data = [(UUID('2efcead4-ff55-4db5-bdb4-6b36a308d8e0'), ), (None, )]
52 |             self.client.execute(
53 |                 'INSERT INTO test (a) VALUES', data
54 |             )
55 | 
56 |             query = 'SELECT * FROM test'
57 |             inserted = self.emit_cli(query)
58 |             self.assertEqual(inserted,
59 |                              '2efcead4-ff55-4db5-bdb4-6b36a308d8e0\n\\N\n')
60 | 
61 |             inserted = self.client.execute(query)
62 |             self.assertEqual(inserted, data)
63 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | 
 4 | @pytest.fixture(autouse=True)
 5 | def assert_empty_output(capfd):
 6 |     yield
 7 | 
 8 |     captured = capfd.readouterr()
 9 | 
10 |     assert captured.out == ''
11 |     assert captured.err == ''
12 | 


--------------------------------------------------------------------------------
/tests/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   proton-server:
 5 |     image: "timeplus/timeplusd:latest"
 6 |     container_name: test-proton-server
 7 |     environment:
 8 |       - TZ=Asia/Shanghai
 9 |     ports:
10 |       - "127.0.0.1:8463:8463" 
11 |     command: >
12 |       /bin/bash -c "echo sleeping; sleep 2; /entrypoint.sh"
13 |     volumes:
14 |       - /mnt/timeplusd:/var/lib/timeplusd
15 | 
16 |   proton-client:
17 |     image: "timeplus/timeplusd:latest"
18 |     container_name: test-proton-client
19 |     entrypoint: /bin/sh
20 |     command: [-c, 'while :; do sleep 1; done']
21 | 


--------------------------------------------------------------------------------
/tests/log.py:
--------------------------------------------------------------------------------
 1 | from logging.config import dictConfig
 2 | 
 3 | 
 4 | def configure(level):
 5 |     dictConfig({
 6 |         'version': 1,
 7 |         'disable_existing_loggers': False,
 8 |         'formatters': {
 9 |             'standard': {
10 |                 'format': '%(asctime)s %(levelname)-8s %(name)s: %(message)s'
11 |             },
12 |         },
13 |         'handlers': {
14 |             'default': {
15 |                 'level': level,
16 |                 'formatter': 'standard',
17 |                 'class': 'logging.StreamHandler',
18 |             },
19 |         },
20 |         'loggers': {
21 |             '': {
22 |                 'handlers': ['default'],
23 |                 'level': level,
24 |                 'propagate': True
25 |             },
26 |         }
27 |     })
28 | 


--------------------------------------------------------------------------------
/tests/numpy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/tests/numpy/__init__.py


--------------------------------------------------------------------------------
/tests/numpy/columns/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timeplus-io/proton-python-driver/70a1c6313cd7af8f6fc12b4740fc6eb01027d4b9/tests/numpy/columns/__init__.py


--------------------------------------------------------------------------------
/tests/numpy/columns/test_float.py:
--------------------------------------------------------------------------------
 1 | from parameterized import parameterized
 2 | 
 3 | try:
 4 |     import numpy as np
 5 | except ImportError:
 6 |     np = None
 7 | 
 8 | from tests.numpy.testcase import NumpyBaseTestCase
 9 | 
10 | 
11 | class FloatTestCase(NumpyBaseTestCase):
12 |     n = 10
13 | 
14 |     def check_result(self, rv, col_type):
15 |         self.assertarraysEqual(rv[0], np.array(range(self.n)))
16 |         self.assertEqual(rv[0].dtype, col_type)
17 | 
18 |     def get_query(self, ch_type):
19 |         with self.create_stream('a {}'.format(ch_type)):
20 |             data = [np.array(range(self.n))]
21 |             self.client.execute(
22 |                 'INSERT INTO test (a) VALUES', data, columnar=True
23 |             )
24 | 
25 |             query = 'SELECT * FROM test'
26 |             inserted = self.emit_cli(query)
27 |             self.assertEqual(
28 |                 inserted, '\n'.join(str(x) for x in data[0]) + '\n'
29 |             )
30 |             return self.client.execute(query, columnar=True)
31 | 
32 |     def test_float32(self):
33 |         rv = self.get_query('float32')
34 |         self.check_result(rv, np.float32)
35 | 
36 |     def test_float64(self):
37 |         rv = self.get_query('float64')
38 |         self.check_result(rv, np.float64)
39 | 
40 |     def test_fractional_round_trip(self):
41 |         with self.create_stream('a float32'):
42 |             data = [np.array([0.5, 1.5], dtype=np.float32)]
43 |             self.client.execute(
44 |                 'INSERT INTO test (a) VALUES', data, columnar=True
45 |             )
46 | 
47 |             query = 'SELECT * FROM test'
48 |             inserted = self.emit_cli(query)
49 |             self.assertEqual(inserted, '0.5\n1.5\n')
50 | 
51 |             inserted = self.client.execute(query, columnar=True)
52 |             self.assertarraysEqual(inserted[0], data[0])
53 | 
54 |     @parameterized.expand(['float32', 'float64'])
55 |     def test_nullable(self, float_type):
56 |         with self.create_stream('a nullable({})'.format(float_type)):
57 |             data = [np.array([np.nan, 0.5, None, 1.5], dtype=object)]
58 |             self.client.execute(
59 |                 'INSERT INTO test (a) VALUES', data, columnar=True
60 |             )
61 | 
62 |             query = 'SELECT * FROM test'
63 |             inserted = self.emit_cli(query)
64 |             self.assertEqual(inserted, 'nan\n0.5\n\\N\n1.5\n')
65 | 
66 |             inserted = self.client.execute(query, columnar=True)
67 |             self.assertarraysEqual(
68 |                 inserted[0].astype(str), data[0].astype(str)
69 |             )
70 |             self.assertEqual(inserted[0].dtype, object)
71 | 
72 |     def test_nan(self):
73 |         with self.create_stream('a float32'):
74 |             data = [np.array([float('nan'), 0.5], dtype=np.float32)]
75 |             self.client.execute(
76 |                 'INSERT INTO test (a) VALUES', data, columnar=True
77 |             )
78 | 
79 |             query = 'SELECT * FROM test'
80 |             inserted = self.emit_cli(query)
81 |             self.assertEqual(inserted, 'nan\n0.5\n')
82 | 
83 |             inserted = self.client.execute(query, columnar=True)
84 |             self.assertarraysEqual(
85 |                 inserted[0].astype(str), data[0].astype(str)
86 |             )
87 |             self.assertEqual(inserted[0].dtype, np.float32)
88 | 


--------------------------------------------------------------------------------
/tests/numpy/columns/test_int.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import numpy as np
 3 | except ImportError:
 4 |     np = None
 5 | 
 6 | from tests.numpy.testcase import NumpyBaseTestCase
 7 | 
 8 | 
 9 | class IntTestCase(NumpyBaseTestCase):
10 |     n = 10
11 | 
12 |     def check_result(self, rv, col_type):
13 |         self.assertarraysEqual(rv[0], np.array(range(self.n)))
14 |         self.assertEqual(rv[0].dtype, col_type)
15 | 
16 |     def get_query(self, ch_type):
17 |         with self.create_stream('a {}'.format(ch_type)):
18 |             data = [np.array(range(self.n))]
19 |             self.client.execute(
20 |                 'INSERT INTO test (a) VALUES', data, columnar=True
21 |             )
22 | 
23 |             query = 'SELECT * FROM test'
24 |             inserted = self.emit_cli(query)
25 |             self.assertEqual(
26 |                 inserted, '\n'.join(str(x) for x in data[0]) + '\n'
27 |             )
28 |             return self.client.execute(query, columnar=True)
29 | 
30 |     def test_int8(self):
31 |         rv = self.get_query('int8')
32 |         self.check_result(rv, np.int8)
33 | 
34 |     def test_int16(self):
35 |         rv = self.get_query('int16')
36 |         self.check_result(rv, np.int16)
37 | 
38 |     def test_int32(self):
39 |         rv = self.get_query('int32')
40 |         self.check_result(rv, np.int32)
41 | 
42 |     def test_int64(self):
43 |         rv = self.get_query('int64')
44 |         self.check_result(rv, np.int64)
45 | 
46 |     def test_uint8(self):
47 |         rv = self.get_query('uint8')
48 |         self.check_result(rv, np.uint8)
49 | 
50 |     def test_uint16(self):
51 |         rv = self.get_query('uint16')
52 |         self.check_result(rv, np.uint16)
53 | 
54 |     def test_uint32(self):
55 |         rv = self.get_query('uint32')
56 |         self.check_result(rv, np.uint32)
57 | 
58 |     def test_uint64(self):
59 |         rv = self.get_query('uint64')
60 |         self.check_result(rv, np.uint64)
61 | 
62 |     def test_insert_nan_into_non_nullable(self):
63 |         with self.create_stream('a int32'):
64 |             data = [
65 |                 np.array([123, np.nan], dtype=object)
66 |             ]
67 |             self.client.execute(
68 |                 'INSERT INTO test (a) VALUES', data, columnar=True
69 |             )
70 | 
71 |             query = 'SELECT * FROM test'
72 |             inserted = self.emit_cli(query)
73 |             self.assertEqual(
74 |                 inserted,
75 |                 '123\n0\n'
76 |             )
77 | 
78 |             inserted = self.client.execute(query, columnar=True)
79 |             self.assertarraysEqual(inserted[0], np.array([123, 0]))
80 |             self.assertEqual(inserted[0].dtype, np.int32)
81 | 
82 |     def test_nullable(self):
83 |         with self.create_stream('a nullable(int32)'):
84 |             data = [np.array([2, None, 4, None, 8])]
85 |             self.client.execute(
86 |                 'INSERT INTO test (a) VALUES', data, columnar=True
87 |             )
88 | 
89 |             query = 'SELECT * FROM test'
90 |             inserted = self.emit_cli(query)
91 |             self.assertEqual(inserted, '2\n\\N\n4\n\\N\n8\n')
92 | 
93 |             inserted = self.client.execute(query, columnar=True)
94 |             self.assertarraysEqual(inserted[0], data[0])
95 |             self.assertEqual(inserted[0].dtype, object)
96 | 


--------------------------------------------------------------------------------
/tests/numpy/columns/test_nullable.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import numpy as np
 3 | except ImportError:
 4 |     np = None
 5 | 
 6 | try:
 7 |     import pandas as pd
 8 | except ImportError:
 9 |     pd = None
10 | 
11 | from tests.numpy.testcase import NumpyBaseTestCase
12 | from proton_driver import errors
13 | 
14 | ErrorCodes = errors.ErrorCodes
15 | 
16 | 
17 | class nullableTestCase(NumpyBaseTestCase):
18 |     def test_simple(self):
19 |         columns = 'a nullable(int32)'
20 | 
21 |         data = [np.array([3, None, 2], dtype=object)]
22 |         with self.create_stream(columns):
23 |             self.client.execute(
24 |                 'INSERT INTO test (a) VALUES', data, columnar=True
25 |             )
26 | 
27 |             query = 'SELECT * FROM test'
28 |             inserted = self.emit_cli(query)
29 |             self.assertEqual(
30 |                 inserted, '3\n\\N\n2\n'
31 |             )
32 | 
33 |             inserted = self.client.execute(query, columnar=True)
34 |             self.assertarraysEqual(inserted[0], data[0])
35 |             self.assertEqual(inserted[0].dtype, object)
36 | 
37 |     def test_simple_dataframe(self):
38 |         columns = (
39 |             'a int64, '
40 |             'b nullable(float64), '
41 |             'c nullable(string), '
42 |             'd nullable(int64)'
43 |         )
44 | 
45 |         df = pd.DataFrame({
46 |             'a': [1, 2, 3],
47 |             'b': [1.0, None, np.nan],
48 |             'c': ['a', None, np.nan],
49 |             'd': [1, None, None],
50 |         }, dtype=object)
51 |         expected = pd.DataFrame({
52 |             'a': np.array([1, 2, 3], dtype=np.int64),
53 |             'b': np.array([1.0, None, np.nan], dtype=object),
54 |             'c': np.array(['a', None, None], dtype=object),
55 |             'd': np.array([1, None, None], dtype=object),
56 |         })
57 | 
58 |         with self.create_stream(columns):
59 |             rv = self.client.insert_dataframe('INSERT INTO test VALUES', df)
60 |             self.assertEqual(rv, 3)
61 |             df2 = self.client.query_dataframe('SELECT * FROM test ORDER BY a')
62 |             self.assertTrue(expected.equals(df2))
63 | 


--------------------------------------------------------------------------------
/tests/numpy/columns/test_other.py:
--------------------------------------------------------------------------------
 1 | from parameterized import parameterized
 2 | 
 3 | from proton_driver import errors
 4 | from proton_driver.columns.service import get_column_by_spec
 5 | from proton_driver.context import Context
 6 | 
 7 | from tests.numpy.testcase import NumpyBaseTestCase
 8 | 
 9 | 
10 | class OtherColumnsTestCase(NumpyBaseTestCase):
11 |     def get_column(self, spec):
12 |         ctx = Context()
13 |         ctx.client_settings = {'strings_as_bytes': False, 'use_numpy': True}
14 |         return get_column_by_spec(spec, {'context': ctx})
15 | 
16 |     @parameterized.expand([
17 |         ("enum8('hello' = 1, 'world' = 2)", ),
18 |         ('decimal(8, 4)', ),
19 |         ('array(string)', ),
20 |         ('tuple(string)', ),
21 |         ('simple_aggregate_function(any, int32)', ),
22 |         ('map(string, string)', ),
23 |         ('array(low_cardinality(string))', )
24 |     ])
25 |     def test_generic_type(self, spec):
26 |         col = self.get_column(spec)
27 |         self.assertIsNotNone(col)
28 | 
29 |     def test_get_unknown_column(self):
30 |         with self.assertRaises(errors.UnknownTypeError) as e:
31 |             self.get_column('Unicorn')
32 | 
33 |         self.assertIn('Unicorn', str(e.exception))
34 | 


--------------------------------------------------------------------------------
/tests/numpy/test_external_tables.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import numpy as np
 3 |     import pandas as pd
 4 | except ImportError:
 5 |     np = None
 6 |     pd = None
 7 | 
 8 | from tests.numpy.testcase import NumpyBaseTestCase
 9 | 
10 | 
11 | class ExternalTablesTestCase(NumpyBaseTestCase):
12 |     def test_select(self):
13 |         tables = [{
14 |             'name': 'test',
15 |             'structure': [('x', 'int32'), ('y', 'string')],
16 |             'data': pd.DataFrame({
17 |                 'x': [100, 500],
18 |                 'y': ['abc', 'def']
19 |             })
20 |         }]
21 |         rv = self.client.execute(
22 |             'SELECT * FROM test', external_tables=tables, columnar=True
23 |         )
24 |         self.assertarraysListEqual(
25 |             rv, [np.array([100, 500]), np.array(['abc', 'def'])]
26 |         )
27 | 
28 |     def test_send_empty_table(self):
29 |         tables = [{
30 |             'name': 'test',
31 |             'structure': [('x', 'int32')],
32 |             'data': pd.DataFrame({'x': []})
33 |         }]
34 |         rv = self.client.execute(
35 |             'SELECT * FROM test', external_tables=tables, columnar=True
36 |         )
37 |         self.assertarraysListEqual(rv, [])
38 | 
39 |     def test_send_empty_table_structure(self):
40 |         tables = [{
41 |             'name': 'test',
42 |             'structure': [],
43 |             'data': pd.DataFrame()
44 |         }]
45 |         with self.assertRaises(ValueError) as e:
46 |             self.client.execute(
47 |                 'SELECT * FROM test', external_tables=tables, columnar=True
48 |             )
49 | 
50 |         self.assertIn('Empty table "test" structure', str(e.exception))
51 | 


--------------------------------------------------------------------------------
/tests/numpy/testcase.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import numpy as np
 3 | except ImportError:
 4 |     np = None
 5 | 
 6 | from tests.numpy.util import check_numpy
 7 | from tests.testcase import BaseTestCase
 8 | 
 9 | 
10 | class NumpyBaseTestCase(BaseTestCase):
11 |     client_kwargs = {'settings': {'use_numpy': True}}
12 | 
13 |     @check_numpy
14 |     def setUp(self):
15 |         super(NumpyBaseTestCase, self).setUp()
16 | 
17 |     def assertarraysEqual(self, first, second):
18 |         return self.assertTrue((first == second).all())
19 | 
20 |     def assertarraysListEqual(self, first, second):
21 |         self.assertEqual(len(first), len(second))
22 |         for x, y in zip(first, second):
23 |             self.assertTrue((x == y).all())
24 | 


--------------------------------------------------------------------------------
/tests/numpy/util.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | from unittest import SkipTest
 3 | 
 4 | 
 5 | def check_numpy(f):
 6 |     @wraps(f)
 7 |     def wrapper(*args, **kwargs):
 8 |         try:
 9 |             return f(*args, **kwargs)
10 |         except RuntimeError as e:
11 |             if 'NumPy' in str(e):
12 |                 raise SkipTest('Numpy package is not installed')
13 | 
14 |     return wrapper
15 | 


--------------------------------------------------------------------------------
/tests/test_buffered_reader.py:
--------------------------------------------------------------------------------
 1 | import socket
 2 | from unittest import TestCase, mock
 3 | 
 4 | from proton_driver.bufferedreader import BufferedSocketReader
 5 | 
 6 | 
 7 | class BufferedReaderTestCase(TestCase):
 8 |     def test_overflow_signed_int_string_size(self):
 9 |         data = b'\xFF\xFE\xFC\xFE\xFE\xFE\xFE\xFE\x29\x80\x40\x00\x00\x01'
10 | 
11 |         def recv_into(buf):
12 |             size = len(data)
13 |             buf[0:size] = data
14 |             return size
15 | 
16 |         with mock.patch('socket.socket') as mock_socket:
17 |             mock_socket.return_value.recv_into.side_effect = recv_into
18 |             reader = BufferedSocketReader(socket.socket(), 1024)
19 | 
20 |             # Trying to allocate huge amount of memory.
21 |             with self.assertRaises(MemoryError):
22 |                 reader.read_strings(5, encoding='utf-8')
23 | 


--------------------------------------------------------------------------------
/tests/test_compression.py:
--------------------------------------------------------------------------------
  1 | from datetime import date, datetime
  2 | from unittest import TestCase
  3 | 
  4 | from proton_driver import errors
  5 | from proton_driver.client import Client
  6 | from proton_driver.compression import get_compressor_cls
  7 | from proton_driver.compression.lz4 import Compressor
  8 | from .testcase import BaseTestCase, file_config
  9 | 
 10 | 
 11 | class BaseCompressionTestCase(BaseTestCase):
 12 |     compression = False
 13 |     supported_compressions = file_config.get('db', 'compression').split(',')
 14 | 
 15 |     def _create_client(self):
 16 |         settings = None
 17 |         if self.compression:
 18 |             # Set server compression method explicitly
 19 |             # By default server sends blocks compressed by LZ4.
 20 |             method = self.compression
 21 |             if self.server_version > (19, ):
 22 |                 method = method.upper()
 23 |             settings = {'network_compression_method': method}
 24 | 
 25 |         return Client(
 26 |             self.host, self.port, self.database, self.user, self.password,
 27 |             compression=self.compression, settings=settings
 28 |         )
 29 | 
 30 |     def setUp(self):
 31 |         super(BaseCompressionTestCase, self).setUp()
 32 |         supported = (
 33 |             self.compression is False or
 34 |             self.compression in self.supported_compressions
 35 |         )
 36 | 
 37 |         if not supported:
 38 |             self.skipTest(
 39 |                 'Compression {} is not supported'.format(self.compression)
 40 |             )
 41 | 
 42 |     def run_simple(self):
 43 |         with self.create_stream('a Date, b DateTime'):
 44 |             data = [(date(2012, 10, 25), datetime(2012, 10, 25, 14, 7, 19))]
 45 |             self.client.execute(
 46 |                 'INSERT INTO test (a, b) VALUES', data
 47 |             )
 48 | 
 49 |             query = 'SELECT * FROM test'
 50 |             inserted = self.emit_cli(query)
 51 |             self.assertEqual(inserted, '2012-10-25\t2012-10-25 14:07:19\n')
 52 | 
 53 |             inserted = self.client.execute(query)
 54 |             self.assertEqual(inserted, data)
 55 | 
 56 |     def test(self):
 57 |         if self.compression is False:
 58 |             return
 59 | 
 60 |         self.run_simple()
 61 | 
 62 | 
 63 | class LZ4ReadWriteTestCase(BaseCompressionTestCase):
 64 |     compression = 'lz4'
 65 | 
 66 | 
 67 | class LZ4HCReadWriteTestCase(BaseCompressionTestCase):
 68 |     compression = 'lz4hc'
 69 | 
 70 | 
 71 | class ZSTDReadWriteTestCase(BaseCompressionTestCase):
 72 |     compression = 'zstd'
 73 | 
 74 | 
 75 | class MiscCompressionTestCase(TestCase):
 76 |     def test_default_compression(self):
 77 |         client = Client('localhost', compression=True)
 78 |         self.assertEqual(client.connection.compressor_cls, Compressor)
 79 | 
 80 |     def test_unknown_compressor(self):
 81 |         with self.assertRaises(errors.UnknownCompressionMethod) as e:
 82 |             get_compressor_cls('hello')
 83 | 
 84 |         self.assertEqual(
 85 |             e.exception.code, errors.ErrorCodes.UNKNOWN_COMPRESSION_METHOD
 86 |         )
 87 | 
 88 | 
 89 | class ReadByBlocksTestCase(BaseCompressionTestCase):
 90 |     compression = 'lz4'
 91 | 
 92 |     def test(self):
 93 |         with self.create_stream('a int32'):
 94 |             data = [(x % 200, ) for x in range(1000000)]
 95 | 
 96 |             self.client.execute(
 97 |                 'INSERT INTO test (a) VALUES', data
 98 |             )
 99 | 
100 |             query = 'SELECT * FROM test'
101 | 
102 |             inserted = self.client.execute(query)
103 |             self.assertEqual(inserted, data)
104 | 


--------------------------------------------------------------------------------
/tests/test_errors.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | import proton_driver.errors as err
 4 | 
 5 | 
 6 | def picklable(o):
 7 |     picked = pickle.loads(pickle.dumps(o))
 8 |     assert repr(o) == repr(picked)
 9 |     assert str(o) == str(picked)
10 | 
11 | 
12 | def test_exception_picklable():
13 |     picklable(err.Error('foo'))
14 |     picklable(err.Error(message='foo'))
15 | 
16 |     picklable(err.ServerException('foo', 0, Exception()))
17 |     picklable(err.ServerException(message='foo', code=0, nested=Exception()))
18 | 


--------------------------------------------------------------------------------
/tests/test_external_tables.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from tests.testcase import BaseTestCase
 3 | 
 4 | 
 5 | class ExternalTablesTestCase(BaseTestCase):
 6 |     def test_select(self):
 7 |         tables = [{
 8 |             'name': 'test',
 9 |             'structure': [('x', 'int32'), ('y', 'array(int32)')],
10 |             'data': [
11 |                 {'x': 100, 'y': [2, 4, 6, 8]},
12 |                 {'x': 500, 'y': [1, 3, 5, 7]},
13 |             ]
14 |         }]
15 |         rv = self.client.execute('SELECT * FROM test', external_tables=tables)
16 |         self.assertEqual(rv, [(100, [2, 4, 6, 8]), (500, [1, 3, 5, 7])])
17 | 
18 |     def test_send_empty_table(self):
19 |         tables = [{
20 |             'name': 'test',
21 |             'structure': [('x', 'int32')],
22 |             'data': []
23 |         }]
24 |         rv = self.client.execute('SELECT * FROM test', external_tables=tables)
25 |         self.assertEqual(rv, [])
26 | 
27 |     def test_send_empty_table_structure(self):
28 |         tables = [{
29 |             'name': 'test',
30 |             'structure': [],
31 |             'data': []
32 |         }]
33 |         with self.assertRaises(ValueError) as e:
34 |             self.client.execute('SELECT * FROM test', external_tables=tables)
35 | 
36 |         self.assertIn('Empty table "test" structure', str(e.exception))
37 | 


--------------------------------------------------------------------------------
/tests/test_opentelemetry.py:
--------------------------------------------------------------------------------
 1 | from tests.testcase import BaseTestCase
 2 | from tests.util import capture_logging
 3 | 
 4 | 
 5 | class OpenTelemetryTestCase(BaseTestCase):
 6 |     required_server_version = (20, 11, 2)
 7 | 
 8 |     def test_server_logs(self):
 9 |         tracestate = 'tracestate'
10 |         traceparent = '00-1af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01'
11 | 
12 |         settings = {
13 |             'opentelemetry_tracestate': tracestate,
14 |             'opentelemetry_traceparent': traceparent
15 | 
16 |         }
17 |         with self.created_client(settings=settings) as client:
18 |             with capture_logging('proton_driver.log', 'INFO') as buffer:
19 |                 settings = {'send_logs_level': 'trace'}
20 |                 query = 'SELECT 1'
21 |                 client.execute(query, settings=settings)
22 |                 value = buffer.getvalue()
23 |                 self.assertIn('OpenTelemetry', value)
24 | 
25 |                 # ClickHouse 22.2+ use big-endian:
26 |                 # https://github.com/ClickHouse/ClickHouse/pull/33723
27 |                 if self.server_version >= (22, 2):
28 |                     tp = '8448eb211c80319c1af7651916cd43dd'
29 |                 else:
30 |                     tp = '1af7651916cd43dd8448eb211c80319c'
31 |                 self.assertIn(tp, value)
32 | 
33 |     def test_no_tracestate(self):
34 |         traceparent = '00-1af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01'
35 | 
36 |         settings = {
37 |             'opentelemetry_traceparent': traceparent
38 | 
39 |         }
40 |         with self.created_client(settings=settings) as client:
41 |             with capture_logging('proton_driver.log', 'INFO') as buffer:
42 |                 settings = {'send_logs_level': 'trace'}
43 |                 query = 'SELECT 1'
44 |                 client.execute(query, settings=settings)
45 |                 value = buffer.getvalue()
46 |                 self.assertIn('OpenTelemetry', value)
47 |                 # ClickHouse 22.2+ use big-endian:
48 |                 # https://github.com/ClickHouse/ClickHouse/pull/33723
49 |                 if self.server_version >= (22, 2):
50 |                     tp = '8448eb211c80319c1af7651916cd43dd'
51 |                 else:
52 |                     tp = '1af7651916cd43dd8448eb211c80319c'
53 |                 self.assertIn(tp, value)
54 | 
55 |     def test_bad_traceparent(self):
56 |         settings = {'opentelemetry_traceparent': 'bad'}
57 |         with self.created_client(settings=settings) as client:
58 |             with self.assertRaises(ValueError) as e:
59 |                 client.execute('SELECT 1')
60 | 
61 |             self.assertEqual(
62 |                 str(e.exception),
63 |                 'unexpected length 3, expected 55'
64 |             )
65 | 
66 |         traceparent = '00-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-yyyyyyyyyyyyyyyy-01'
67 |         settings = {'opentelemetry_traceparent': traceparent}
68 |         with self.created_client(settings=settings) as client:
69 |             with self.assertRaises(ValueError) as e:
70 |                 client.execute('SELECT 1')
71 | 
72 |             self.assertEqual(
73 |                 str(e.exception),
74 |                 'Malformed traceparant header: {}'.format(traceparent)
75 |             )
76 | 
77 |     def test_bad_traceparent_version(self):
78 |         settings = {
79 |             'opentelemetry_traceparent':
80 |                 '01-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01'
81 |         }
82 |         with self.created_client(settings=settings) as client:
83 |             with self.assertRaises(ValueError) as e:
84 |                 client.execute('SELECT 1')
85 | 
86 |             self.assertEqual(
87 |                 str(e.exception),
88 |                 'unexpected version 01, expected 00'
89 |             )
90 | 


--------------------------------------------------------------------------------
/tests/test_varint.py:
--------------------------------------------------------------------------------
 1 | from io import BytesIO
 2 | from unittest import TestCase
 3 | 
 4 | from proton_driver.varint import read_varint, write_varint
 5 | 
 6 | 
 7 | class VarIntTestCase(TestCase):
 8 |     def test_check_not_negative(self):
 9 |         n = 0x9FFFFFFF
10 | 
11 |         buf = BytesIO()
12 |         write_varint(n, buf)
13 |         val = buf.getvalue()
14 |         self.assertEqual(b'\xFF\xFF\xFF\xFF\t', val)
15 | 
16 |         buf = BytesIO(val)
17 |         buf.read_one = lambda: ord(buf.read(1))
18 |         m = read_varint(buf)
19 |         self.assertEqual(m, n)
20 | 


--------------------------------------------------------------------------------
/tests/testcase.py:
--------------------------------------------------------------------------------
  1 | import configparser
  2 | from contextlib import contextmanager
  3 | import subprocess
  4 | from unittest import TestCase
  5 | 
  6 | from proton_driver.client import Client
  7 | from tests import log
  8 | from tests.util import skip_by_server_version
  9 | 
 10 | 
 11 | file_config = configparser.ConfigParser()
 12 | file_config.read(['setup.cfg'])
 13 | 
 14 | 
 15 | log.configure(file_config.get('log', 'level'))
 16 | 
 17 | 
 18 | class BaseTestCase(TestCase):
 19 |     required_server_version = None
 20 |     server_version = None
 21 | 
 22 |     proton_client_binary = file_config.get('db', 'client')
 23 |     host = file_config.get('db', 'host')
 24 |     port = file_config.getint('db', 'port')
 25 |     database = file_config.get('db', 'database')
 26 |     user = file_config.get('db', 'user')
 27 |     password = file_config.get('db', 'password')
 28 | 
 29 |     client = None
 30 |     client_kwargs = None
 31 |     cli_client_kwargs = None
 32 | 
 33 |     @classmethod
 34 |     def emit_cli(cls, statement, database=None, encoding='utf-8', **kwargs):
 35 |         if database is None:
 36 |             database = cls.database
 37 | 
 38 |         args = [
 39 |             cls.proton_client_binary,
 40 |             '--database', database,
 41 |             '--host', cls.host,
 42 |             '--port', str(cls.port),
 43 |             '--query', str(statement)
 44 |         ]
 45 | 
 46 |         for key, value in kwargs.items():
 47 |             args.extend(['--' + key, str(value)])
 48 | 
 49 |         process = subprocess.Popen(
 50 |             args, stdout=subprocess.PIPE, stderr=subprocess.PIPE
 51 |         )
 52 |         output = process.communicate()
 53 |         out, err = output
 54 | 
 55 |         if err:
 56 |             raise RuntimeError(
 57 |                 'Error during communication. {}'.format(err)
 58 |             )
 59 | 
 60 |         return out.decode(encoding)
 61 | 
 62 |     def _create_client(self, **kwargs):
 63 |         client_kwargs = {
 64 |             'port': self.port,
 65 |             'database': self.database,
 66 |             'user': self.user,
 67 |             'password': self.password
 68 |         }
 69 |         client_kwargs.update(kwargs)
 70 |         return Client(self.host, **client_kwargs)
 71 | 
 72 |     def created_client(self, **kwargs):
 73 |         return self._create_client(**kwargs)
 74 | 
 75 |     @classmethod
 76 |     def setUpClass(cls):
 77 |         version_str = cls.emit_cli('SELECT version()').strip()
 78 |         cls.server_version = tuple(int(x) for x in version_str.split('.'))
 79 | 
 80 |         super(BaseTestCase, cls).setUpClass()
 81 | 
 82 |     def setUp(self):
 83 |         super(BaseTestCase, self).setUp()
 84 | 
 85 |         required = self.required_server_version
 86 | 
 87 |         if required and required > self.server_version:
 88 |             skip_by_server_version(self, self.required_server_version)
 89 | 
 90 |         if callable(self.client_kwargs):
 91 |             client_kwargs = self.client_kwargs(self.server_version)
 92 |         else:
 93 |             client_kwargs = self.client_kwargs
 94 |         client_kwargs = client_kwargs or {}
 95 |         self.client = self._create_client(**client_kwargs)
 96 | 
 97 |     def tearDown(self):
 98 |         self.client.disconnect()
 99 |         super(BaseTestCase, self).tearDown()
100 | 
101 |     @contextmanager
102 |     def create_stream(self, columns, **kwargs):
103 |         if self.cli_client_kwargs:
104 |             if callable(self.cli_client_kwargs):
105 |                 cli_client_kwargs = self.cli_client_kwargs()
106 |                 if cli_client_kwargs:
107 |                     kwargs.update(cli_client_kwargs)
108 |             else:
109 |                 kwargs.update(self.cli_client_kwargs)
110 | 
111 |         self.emit_cli(
112 |             'CREATE STREAM test ({}) ''ENGINE = Memory'.format(columns),
113 |             **kwargs
114 |         )
115 |         try:
116 |             yield
117 |         except Exception:
118 |             raise
119 |         finally:
120 |             self.emit_cli('DROP STREAM test')
121 | 


--------------------------------------------------------------------------------
/tests/util.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from contextlib import contextmanager
 3 | from functools import wraps
 4 | import logging
 5 | from io import StringIO
 6 | from time import tzset
 7 | from unittest.mock import patch
 8 | 
 9 | import tzlocal
10 | 
11 | 
12 | def skip_by_server_version(testcase, version_required):
13 |     testcase.skipTest(
14 |         'Mininum revision required: {}'.format(
15 |             '.'.join(str(x) for x in version_required)
16 |         )
17 |     )
18 | 
19 | 
20 | def require_server_version(*version_required):
21 |     def check(f):
22 |         @wraps(f)
23 |         def wrapper(*args, **kwargs):
24 |             self = args[0]
25 |             self.client.connection.connect()
26 | 
27 |             current = self.client.connection.server_info.version_tuple()
28 | 
29 |             if version_required <= current:
30 |                 return f(*args, **kwargs)
31 |             else:
32 |                 skip_by_server_version(self, version_required)
33 | 
34 |         return wrapper
35 |     return check
36 | 
37 | 
38 | class LoggingCapturer(object):
39 |     def __init__(self, logger_name, level):
40 |         self.old_stdout_handlers = []
41 |         self.logger = logging.getLogger(logger_name)
42 |         self.level = level
43 |         super(LoggingCapturer, self).__init__()
44 | 
45 |     def __enter__(self):
46 |         buffer = StringIO()
47 | 
48 |         self.new_handler = logging.StreamHandler(buffer)
49 |         self.logger.addHandler(self.new_handler)
50 |         self.old_logger_level = self.logger.level
51 |         self.logger.setLevel(self.level)
52 | 
53 |         return buffer
54 | 
55 |     def __exit__(self, *exc_info):
56 |         self.logger.setLevel(self.old_logger_level)
57 |         self.logger.removeHandler(self.new_handler)
58 | 
59 | 
60 | capture_logging = LoggingCapturer
61 | 
62 | 
63 | def bust_tzlocal_cache():
64 |     try:
65 |         tzlocal.unix._cache_tz = None
66 |         tzlocal.unix._cache_tz_name = None
67 |     except AttributeError:
68 |         pass
69 | 
70 |     try:
71 |         tzlocal.win32._cache_tz = None
72 |         tzlocal.unix._cache_tz_name = None
73 |     except AttributeError:
74 |         pass
75 | 
76 | 
77 | @contextmanager
78 | def patch_env_tz(tz_name):
79 |     bust_tzlocal_cache()
80 | 
81 |     # Although in many cases, changing the TZ environment variable may
82 |     # affect the output of functions like localtime() without calling
83 |     # tzset(), this behavior should not be relied on.
84 |     # https://docs.python.org/3/library/time.html#time.tzset
85 |     with patch.dict(os.environ, {'TZ': tz_name}):
86 |         tzset()
87 |         yield
88 | 
89 |     tzset()
90 | 


--------------------------------------------------------------------------------
/testsrequire.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | USE_NUMPY = bool(int(os.getenv('USE_NUMPY', '0')))
 5 | 
 6 | tests_require = [
 7 |     'pytest',
 8 |     'parameterized',
 9 |     'freezegun',
10 |     'zstd',
11 |     'clickhouse-cityhash>=1.0.2.1'
12 | ]
13 | 
14 | if sys.implementation.name == 'pypy':
15 |     tests_require.append('lz4<=3.0.1')
16 | else:
17 |     tests_require.append('lz4')
18 | 
19 | if USE_NUMPY:
20 |     tests_require.extend(['numpy', 'pandas'])
21 | 
22 | try:
23 |     from pip import main as pipmain
24 | except ImportError:
25 |     from pip._internal import main as pipmain
26 | 
27 | pipmain(['install'] + tests_require)
28 | 


--------------------------------------------------------------------------------
/valgrind.supp:
--------------------------------------------------------------------------------
1 | {
2 |    <PyUnicode_Decode>
3 |    # See https://bugs.python.org/issue42176
4 |    Memcheck:Cond
5 |    fun:PyUnicode_Decode
6 | }
7 | 


--------------------------------------------------------------------------------