├── .docker └── clickhouse │ ├── single_node │ ├── config.xml │ └── docker_related_config.xml │ ├── single_node_tls │ ├── Dockerfile │ ├── certificates │ │ ├── ca.crt │ │ ├── ca.key │ │ ├── client.crt │ │ ├── client.key │ │ ├── server.crt │ │ └── server.key │ ├── config.xml │ ├── docker_related_config.xml │ └── users.xml │ └── users.xml ├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── pull_request_template.md └── workflows │ ├── clickhouse_ci.yml │ ├── on_push.yml │ └── publish.yml ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── clickhouse_connect ├── __init__.py ├── __version__.py ├── cc_sqlalchemy │ ├── __init__.py │ ├── datatypes │ │ ├── __init__.py │ │ ├── base.py │ │ └── sqltypes.py │ ├── ddl │ │ ├── __init__.py │ │ ├── custom.py │ │ └── tableengine.py │ ├── dialect.py │ ├── inspector.py │ └── sql │ │ ├── __init__.py │ │ ├── ddlcompiler.py │ │ └── preparer.py ├── common.py ├── datatypes │ ├── __init__.py │ ├── base.py │ ├── container.py │ ├── dynamic.py │ ├── format.py │ ├── geometric.py │ ├── network.py │ ├── numeric.py │ ├── postinit.py │ ├── registry.py │ ├── special.py │ ├── string.py │ └── temporal.py ├── dbapi │ ├── __init__.py │ ├── connection.py │ └── cursor.py ├── driver │ ├── __init__.py │ ├── asyncclient.py │ ├── binding.py │ ├── buffer.py │ ├── client.py │ ├── common.py │ ├── compression.py │ ├── constants.py │ ├── context.py │ ├── ctypes.py │ ├── dataconv.py │ ├── ddl.py │ ├── errors.py │ ├── exceptions.py │ ├── external.py │ ├── httpclient.py │ ├── httputil.py │ ├── insert.py │ ├── models.py │ ├── npconv.py │ ├── npquery.py │ ├── options.py │ ├── parser.py │ ├── query.py │ ├── summary.py │ ├── tools.py │ ├── transform.py │ ├── types.py │ └── tzutil.py ├── driverc │ ├── .gitignore │ ├── __init__.pxd │ ├── __init__.py │ ├── buffer.pxd │ ├── buffer.pyx │ ├── dataconv.pyx │ └── npconv.pyx ├── entry_points.py ├── json_impl.py ├── py.typed └── tools │ ├── __init__.py │ ├── datagen.py │ └── testing.py ├── docker-compose.yml ├── examples ├── benchmark.py ├── clear_test_databases.py ├── insert_examples.py ├── pandas_examples.py ├── params_example.py ├── read_perf.py ├── run_async.py ├── ssh_tunnels.py ├── write_into_file.py └── write_perf.py ├── playtest.py ├── pylintrc ├── pyproject.toml ├── setup.py ├── test_dist ├── .gitignore └── superset_config.py └── tests ├── __init__.py ├── conftest.py ├── helpers.py ├── integration_tests ├── __init__.py ├── actors.csv ├── conftest.py ├── datasets.py ├── json_test.ndjson ├── movies.csv ├── movies.csv.gz ├── movies.parquet ├── proxy_ca_cert.crt ├── test_arrow.py ├── test_async_client.py ├── test_client.py ├── test_contexts.py ├── test_dynamic.py ├── test_external_data.py ├── test_formats.py ├── test_geometric.py ├── test_inserts.py ├── test_jwt_auth.py ├── test_multithreading.py ├── test_native.py ├── test_native_fuzz.py ├── test_numpy.py ├── test_pandas.py ├── test_params.py ├── test_protocol_version.py ├── test_proxy.py ├── test_raw_insert.py ├── test_session_id.py ├── test_sqlalchemy │ ├── __init__.py │ ├── conftest.py │ ├── test_basics.py │ ├── test_ddl.py │ ├── test_inserts.py │ └── test_reflect.py ├── test_streaming.py ├── test_timezones.py ├── test_tls.py └── test_tools.py ├── test_requirements.txt ├── timings.py └── unit_tests ├── __init__.py ├── test_chtypes.py ├── test_driver ├── __init__.py ├── binary.py ├── test_buffer.py ├── test_data.py ├── test_formats.py ├── test_insert.py ├── test_native_fuzz.py ├── test_native_read.py ├── test_native_write.py ├── test_params.py ├── test_parser.py ├── test_query.py └── test_settings.py └── test_sqlalchemy ├── __init__.py ├── test_ddl.py └── test_types.py /.docker/clickhouse/single_node/config.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | default 5 | default 6 | 5368709120 7 | 8 | /var/lib/clickhouse/ 9 | /var/lib/clickhouse/tmp/ 10 | /var/lib/clickhouse/user_files/ 11 | /var/lib/clickhouse/access/ 12 | 3 13 | /var/lib/clickhouse/format_schemas/ 14 | 15 | 16 | warning 17 | /var/log/clickhouse-server/clickhouse-server.log 18 | /var/log/clickhouse-server/clickhouse-server.err.log 19 | 1000M 20 | 10 21 | 1 22 | 23 | 24 | 25 | system 26 | query_log
27 | toYYYYMM(event_date) 28 | 1000 29 |
30 | 31 | 32 | system 33 | session_log
34 |
35 | 36 | 37 | 38 | users.xml 39 | 40 | 41 | 42 | SQL_ 43 |
44 | -------------------------------------------------------------------------------- /.docker/clickhouse/single_node/docker_related_config.xml: -------------------------------------------------------------------------------- 1 | 2 | 0.0.0.0 3 | 8123 4 | 9000 5 | -------------------------------------------------------------------------------- /.docker/clickhouse/single_node_tls/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM clickhouse/clickhouse-server:25.1-alpine 2 | COPY .docker/clickhouse/single_node_tls/certificates /etc/clickhouse-server/certs 3 | RUN chown clickhouse:clickhouse -R /etc/clickhouse-server/certs \ 4 | && chmod 600 /etc/clickhouse-server/certs/* \ 5 | && chmod 755 /etc/clickhouse-server/certs 6 | -------------------------------------------------------------------------------- /.docker/clickhouse/single_node_tls/certificates/ca.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIICODCCAd+gAwIBAgIUBCAqnuDk7oXPEOQ+80TFOg+0DJowCgYIKoZIzj0EAwIw 3 | ajELMAkGA1UEBhMCVVMxETAPBgNVBAgMCENvbG9yYWRvMQ8wDQYDVQQHDAZEZW52 4 | ZXIxGDAWBgNVBAoMD0NsaWNrSG91c2UgSW5jLjEdMBsGA1UEAwwUY2xpY2tob3Vz 5 | ZV90ZXN0X3Jvb3QwHhcNMjUwNTI1MjA0NTEyWhcNNDUwNTI1MjA0NTEyWjBqMQsw 6 | CQYDVQQGEwJVUzERMA8GA1UECAwIQ29sb3JhZG8xDzANBgNVBAcMBkRlbnZlcjEY 7 | MBYGA1UECgwPQ2xpY2tIb3VzZSBJbmMuMR0wGwYDVQQDDBRjbGlja2hvdXNlX3Rl 8 | c3Rfcm9vdDBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABPGo86Zr9WA7KZoBnBPi 9 | owdksQECdv2sJJD/0gb48Hpw9Im0BuX8gOb8YT7+yJm56nmz0tTV8CLeBwpC1ylT 10 | w5+jYzBhMB0GA1UdDgQWBBSSPtUyuGF0HFuucyfFfWwWMAnF9jAfBgNVHSMEGDAW 11 | gBSSPtUyuGF0HFuucyfFfWwWMAnF9jAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB 12 | /wQEAwIChDAKBggqhkjOPQQDAgNHADBEAiBBbvc42/8dPV6JJGvEIgcg7bzO5Bbw 13 | MNdXLiuxYAqxugIgJMyiLt2i3KOb69fljOA3dhApntjf8NltDozbm3wLLWs= 14 | -----END CERTIFICATE----- 15 | 16 | -------------------------------------------------------------------------------- /.docker/clickhouse/single_node_tls/certificates/ca.key: -------------------------------------------------------------------------------- 1 | -----BEGIN EC PARAMETERS----- 2 | BggqhkjOPQMBBw== 3 | -----END EC PARAMETERS----- 4 | -----BEGIN EC PRIVATE KEY----- 5 | MHcCAQEEIJnlsMN+3VCxicEQcANLIM+4gMiItWwFam3moYINelVfoAoGCCqGSM49 6 | AwEHoUQDQgAE8ajzpmv1YDspmgGcE+KjB2SxAQJ2/awkkP/SBvjwenD0ibQG5fyA 7 | 5vxhPv7ImbnqebPS1NXwIt4HCkLXKVPDnw== 8 | -----END EC PRIVATE KEY----- 9 | -------------------------------------------------------------------------------- /.docker/clickhouse/single_node_tls/certificates/client.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIICGzCCAcGgAwIBAgIUeggQ6+OCjtT3i7jASzwA1qfdDoMwCgYIKoZIzj0EAwIw 3 | ajELMAkGA1UEBhMCVVMxETAPBgNVBAgMCENvbG9yYWRvMQ8wDQYDVQQHDAZEZW52 4 | ZXIxGDAWBgNVBAoMD0NsaWNrSG91c2UgSW5jLjEdMBsGA1UEAwwUY2xpY2tob3Vz 5 | ZV90ZXN0X3Jvb3QwHhcNMjUwNTI1MjA0NTI1WhcNNDUwNTI1MjA0NTI1WjBfMQsw 6 | CQYDVQQGEwJVUzERMA8GA1UECAwIQ29sb3JhZG8xDzANBgNVBAcMBkRlbnZlcjEY 7 | MBYGA1UECgwPQ2xpY2tIb3VzZSBJbmMuMRIwEAYDVQQDDAljZXJ0X3VzZXIwWTAT 8 | BgcqhkjOPQIBBggqhkjOPQMBBwNCAASBIakdBXGwadGXUOrfXPfq8UVUvE2V5T2N 9 | pBpRTJFuQGPP2NOS9auwsZiYsGZevuFP4/JtBIbVURMsG9TxDUdto1AwTjAdBgNV 10 | HQ4EFgQUJuFP4dlFGBW3wK6vUkqvSxaLMhswDAYDVR0TAQH/BAIwADAfBgNVHSME 11 | GDAWgBSSPtUyuGF0HFuucyfFfWwWMAnF9jAKBggqhkjOPQQDAgNIADBFAiA69ags 12 | M/lvNu9mi5WkQArOqf9kuguuL9EcO3VUOXnijwIhANSbcxfVaYkfdFXvLdmlxCYS 13 | JuwjN4xF1OU+JpjJPFBk 14 | -----END CERTIFICATE----- 15 | 16 | -------------------------------------------------------------------------------- /.docker/clickhouse/single_node_tls/certificates/client.key: -------------------------------------------------------------------------------- 1 | -----BEGIN EC PARAMETERS----- 2 | BggqhkjOPQMBBw== 3 | -----END EC PARAMETERS----- 4 | -----BEGIN EC PRIVATE KEY----- 5 | MHcCAQEEIJOyEogF0IPoVr1hkJ9wjp/6zhUH1LDgtay+OjG1/9XnoAoGCCqGSM49 6 | AwEHoUQDQgAEgSGpHQVxsGnRl1Dq31z36vFFVLxNleU9jaQaUUyRbkBjz9jTkvWr 7 | sLGYmLBmXr7hT+PybQSG1VETLBvU8Q1HbQ== 8 | -----END EC PRIVATE KEY----- 9 | -------------------------------------------------------------------------------- /.docker/clickhouse/single_node_tls/certificates/server.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIICTDCCAfOgAwIBAgIUeggQ6+OCjtT3i7jASzwA1qfdDoQwCgYIKoZIzj0EAwIw 3 | ajELMAkGA1UEBhMCVVMxETAPBgNVBAgMCENvbG9yYWRvMQ8wDQYDVQQHDAZEZW52 4 | ZXIxGDAWBgNVBAoMD0NsaWNrSG91c2UgSW5jLjEdMBsGA1UEAwwUY2xpY2tob3Vz 5 | ZV90ZXN0X3Jvb3QwHhcNMjUwNTI1MjA0NTQwWhcNNDUwNTI1MjA0NTQwWjBtMQsw 6 | CQYDVQQGEwJVUzERMA8GA1UECAwIQ29sb3JhZG8xDzANBgNVBAcMBkRlbnZlcjEY 7 | MBYGA1UECgwPQ2xpY2tIb3VzZSBJbmMuMSAwHgYDVQQDDBdjbGlja2hvdXNlX3Rl 8 | c3Rfc2VydmVyMTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABGGOK+jLXwPjuLXw 9 | mqDqb3IjXud0xYCS7I+FcUuBbU+lTbCTcO+lRuQpTFQ+Uqw3C4cQoniRylquuvBY 10 | bEpPYOajdDByMB0GA1UdDgQWBBTE+zb6QpJkmth2BMqpf6VAsXZFkDAMBgNVHRMB 11 | Af8EAjAAMCIGA1UdEQQbMBmCF3NlcnZlcjEuY2xpY2tob3VzZS50ZXN0MB8GA1Ud 12 | IwQYMBaAFJI+1TK4YXQcW65zJ8V9bBYwCcX2MAoGCCqGSM49BAMCA0cAMEQCIFo+ 13 | iq9g/pUtmo6k/9cMvKxw1VmmLow2tdMuZtoofnHVAiAOSnN9CVQ8I9vLWwhoSRpq 14 | WsGApCnmNK/8tY1LTdQcLw== 15 | -----END CERTIFICATE----- 16 | 17 | 18 | -------------------------------------------------------------------------------- /.docker/clickhouse/single_node_tls/certificates/server.key: -------------------------------------------------------------------------------- 1 | -----BEGIN EC PARAMETERS----- 2 | BggqhkjOPQMBBw== 3 | -----END EC PARAMETERS----- 4 | -----BEGIN EC PRIVATE KEY----- 5 | MHcCAQEEIHU7VYTo4pduP7Q2wlE4zgg0Ruh3KFlwfdz1EHIanFgIoAoGCCqGSM49 6 | AwEHoUQDQgAEYY4r6MtfA+O4tfCaoOpvciNe53TFgJLsj4VxS4FtT6VNsJNw76VG 7 | 5ClMVD5SrDcLhxCieJHKWq668FhsSk9g5g== 8 | -----END EC PRIVATE KEY----- 9 | -------------------------------------------------------------------------------- /.docker/clickhouse/single_node_tls/config.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | default 5 | default 6 | 7 | 5368709120 8 | 9 | /var/lib/clickhouse/ 10 | /var/lib/clickhouse/tmp/ 11 | /var/lib/clickhouse/user_files/ 12 | /var/lib/clickhouse/access/ 13 | /var/lib/clickhouse/format_schemas/ 14 | 15 | 16 | warning 17 | /var/log/clickhouse-server/clickhouse-server.log 18 | /var/log/clickhouse-server/clickhouse-server.err.log 19 | 1000M 20 | 10 21 | 1 22 | 23 | 24 | 25 | 26 | /etc/clickhouse-server/certs/server.crt 27 | /etc/clickhouse-server/certs/server.key 28 | relaxed 29 | /etc/clickhouse-server/certs/ca.crt 30 | true 31 | sslv2,sslv3,tlsv1 32 | true 33 | 34 | 35 | 36 | 37 | system 38 | query_log
39 | toYYYYMM(event_date) 40 | 1000 41 |
42 | 43 | SQL_ 44 | 45 | 46 | 47 | users.xml 48 | 49 | 50 |
51 | -------------------------------------------------------------------------------- /.docker/clickhouse/single_node_tls/docker_related_config.xml: -------------------------------------------------------------------------------- 1 | 2 | 0.0.0.0 3 | 8443 4 | 9440 5 | -------------------------------------------------------------------------------- /.docker/clickhouse/single_node_tls/users.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | random 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | ::/0 15 | 16 | default 17 | default 18 | 1 19 | 20 | 21 | 22 | cert_user 23 | 24 | default 25 | 26 | 27 | 28 | 29 | 30 | 31 | 3600 32 | 0 33 | 0 34 | 0 35 | 0 36 | 0 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /.docker/clickhouse/users.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | random 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | ::/0 15 | 16 | default 17 | default 18 | 1 19 | 20 | 21 | 22 | 23 | 24 | 25 | 3600 26 | 0 27 | 0 28 | 0 29 | 0 30 | 0 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Repo stuff not needed for docker 2 | tests 3 | docs 4 | test_dist 5 | .github 6 | Dockerfile 7 | 8 | # Temporary directories of various kinds 9 | dev 10 | dist 11 | cache 12 | build 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Something not working right? Help us fix it! 4 | title: '' 5 | labels: 'bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | ### Describe the bug 12 | 13 | ### Steps to reproduce 14 | 1. 15 | 2. 16 | 3. 17 | 18 | ### Expected behaviour 19 | 20 | ### Code example 21 | ```python 22 | import clickhouse_connect 23 | ``` 24 | 25 | ### clickhouse-connect and/or ClickHouse server logs 26 | 27 | ### Configuration 28 | #### Environment 29 | * clickhouse-connect version: 30 | * Python version: 31 | * Operating system: 32 | 33 | 34 | #### ClickHouse server 35 | * ClickHouse Server version: 36 | * ClickHouse Server non-default settings, if any: 37 | * `CREATE TABLE` statements for tables involved: 38 | * Sample data for these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary 39 | 40 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: What would you like to add to clickhouse-connect? 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | 3 | 4 | ## Checklist 5 | Delete items not relevant to your PR: 6 | - [ ] Unit and integration tests covering the common scenarios were added 7 | - [ ] A human-readable description of the changes was provided to include in CHANGELOG 8 | - [ ] For significant changes, documentation in https://github.com/ClickHouse/clickhouse-docs was updated with further explanations or tutorials 9 | -------------------------------------------------------------------------------- /.github/workflows/clickhouse_ci.yml: -------------------------------------------------------------------------------- 1 | name: ClickHouse CI Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: '34 17 * * *' 7 | 8 | jobs: 9 | head: 10 | runs-on: ubuntu-latest 11 | name: ClickHouse CI Tests 12 | env: 13 | CLICKHOUSE_CONNECT_TEST_DOCKER: 'False' 14 | CLICKHOUSE_CONNECT_TEST_FUZZ: 50 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v4 18 | - name: Set up Python 3.11 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: 3.11 22 | - name: Install pip 23 | run: python -m pip install --upgrade pip 24 | - name: Install Test Dependencies 25 | run: pip install -r tests/test_requirements.txt 26 | - name: Build cython extensions 27 | run: python setup.py build_ext --inplace 28 | - name: "Add distribution info" # This lets SQLAlchemy find entry points 29 | run: python setup.py develop 30 | 31 | - name: run ClickHouse Cloud tests 32 | env: 33 | CLICKHOUSE_CONNECT_TEST_PORT: 8443 34 | CLICKHOUSE_CONNECT_TEST_CLOUD: 'True' 35 | CLICKHOUSE_CONNECT_TEST_HOST: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_HOST_SMT }} 36 | CLICKHOUSE_CONNECT_TEST_PASSWORD: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_PASSWORD_SMT }} 37 | CLICKHOUSE_CONNECT_TEST_JWT_SECRET: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_JWT_DESERT_VM_43 }} 38 | SQLALCHEMY_SILENCE_UBER_WARNING: 1 39 | run: pytest tests/integration_tests 40 | 41 | - name: Run ClickHouse Container (LATEST) 42 | run: CLICKHOUSE_CONNECT_TEST_CH_VERSION=latest docker compose up -d clickhouse 43 | - name: Run LATEST tests 44 | run: pytest tests/integration_tests 45 | - name: remove latest container 46 | run: docker compose down -v 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Test data 2 | .pytest_cache 3 | 4 | # Development Python environments 5 | *venv 6 | 7 | # IDE files 8 | *.idea 9 | .vscode 10 | 11 | # Build artifacts 12 | /dist 13 | /build 14 | *.egg-info/ 15 | 16 | # Temporary development stuff (Manual tests, etc.) 17 | /dev 18 | test.env 19 | .env 20 | .dev_version 21 | 22 | # Python cruft 23 | *.pyc 24 | .python-version 25 | 26 | # Apple garbage 27 | .DS_Store 28 | 29 | # C extensions 30 | *.pyd 31 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing guidelines 2 | 3 | ClickHouse-connect is an open-source project, and we welcome any contributions from the community. 4 | Please share your ideas, contribute to the codebase, and help us maintain up-to-date documentation. 5 | 6 | ## Setting up your environment 7 | 8 | ### Prerequisites 9 | 10 | * Python 3.11+ 11 | * Docker and the [Compose plugin](https://docs.docker.com/compose/install/) 12 | 13 | ### Create a fork of the repository and clone it 14 | 15 | ```bash 16 | git clone https://github.com/[YOUR_USERNAME]/clickhouse-connect 17 | cd clickhouse-connect 18 | ``` 19 | 20 | ### Add PYTHONPATH 21 | 22 | Add the project directory to the `PYTHONPATH` environment variable to make the driver sources are available for import. 23 | 24 | ```bash 25 | export PYTHONPATH="/absolute/path/to/clickhouse-connect" 26 | ``` 27 | 28 | ### Prepare a new virtual environment 29 | 30 | You could either use PyCharm for that, or follow [the instructions on the official website](https://docs.python.org/3/tutorial/venv.html) and set it up via the command line. 31 | 32 | ### Install dependencies 33 | 34 | ```bash 35 | python -m pip install --upgrade pip 36 | pip install setuptools wheel 37 | pip install -r tests/test_requirements.txt 38 | ``` 39 | 40 | ### Run the setup script 41 | 42 | The driver uses several Cython extensions that provide additional performance improvements 43 | (see the [clickhouse_connect/driverc](clickhouse_connect/driverc) directory). 44 | To compile the extensions, run the following command: 45 | 46 | ```bash 47 | python setup.py build_ext --inplace 48 | ``` 49 | 50 | Additionally, this command is required to provide SQLAlchemy entrypoints: 51 | 52 | ```bash 53 | python setup.py develop 54 | ``` 55 | 56 | ### Add /etc/hosts entry 57 | 58 | Required for TLS tests. 59 | The generated certificates assume TLS requests use `server1.clickhouse.test` as the hostname. 60 | See [test_tls.py](tests/integration_tests/test_tls.py) for more details. 61 | 62 | ```bash 63 | sudo -- sh -c "echo 127.0.0.1 server1.clickhouse.test >> /etc/hosts" 64 | ``` 65 | 66 | ### PyCharm setup 67 | 68 | If you use PyCharm as your IDE, make sure that you have `clickhouse-connect` added to the project structure as a source path. 69 | Go to Settings -> Project (clickhouse-connect) -> Project structure, right click on `clickhouse-connect` folder, and mark it as "Sources". 70 | 71 | ## Testing 72 | 73 | ### Start ClickHouse in Docker 74 | 75 | The tests will require two ClickHouse instances to be running. 76 | One should have a default plain authentication (for integration tests), and the other should have a TLS configuration (for tls tests only). 77 | 78 | The integration tests will start and stop the ClickHouse instance automatically. 79 | However, this adds a few seconds to each run, and this might not be ideal when you run a single test (using PyCharm, for example). 80 | To disable this behavior, set the `CLICKHOUSE_CONNECT_TEST_DOCKER` environment variable to `0`. 81 | 82 | ```bash 83 | export CLICKHOUSE_CONNECT_TEST_DOCKER=0 84 | ``` 85 | 86 | The easiest way to start all the required ClickHouse instances is to use the provided Docker Compose file (the integrations tests [setup script](tests/integration_tests/conftest.py) uses the same file). 87 | 88 | ```bash 89 | docker compose up -d 90 | ``` 91 | 92 | ### Run the tests 93 | 94 | The project uses [pytest](https://docs.pytest.org/) as a test runner. 95 | To run all the tests (unit and integration) execute the following command: 96 | 97 | ```bash 98 | pytest tests 99 | ``` 100 | 101 | If you need to run the unit tests only: 102 | 103 | ```bash 104 | pytest tests/unit_tests 105 | ``` 106 | 107 | Or the integration tests only: 108 | 109 | ```bash 110 | pytest tests/integration_tests 111 | ``` 112 | 113 | ### Run the TLS integration tests 114 | 115 | These tests require the `CLICKHOUSE_CONNECT_TEST_TLS` environment variable to be set to `1`; otherwise, they will be skipped. 116 | Additionally, the TLS ClickHouse instance should be running (see [docker-compose.yml](docker-compose.yml)). 117 | 118 | ```bash 119 | CLICKHOUSE_CONNECT_TEST_TLS=1 pytest tests/tls 120 | ``` 121 | 122 | ### Running the integration tests with ClickHouse Cloud 123 | 124 | If you want to run the tests using your ClickHouse Cloud instance instead of the local ClickHouse instance running in Docker, you will need a few additional environment variables. 125 | 126 | ```bash 127 | export CLICKHOUSE_CONNECT_TEST_CLOUD=1 128 | export CLICKHOUSE_CONNECT_TEST_PORT=8443 129 | export CLICKHOUSE_CONNECT_TEST_HOST='instance.clickhouse.cloud' 130 | export CLICKHOUSE_CONNECT_TEST_PASSWORD='secret' 131 | ``` 132 | 133 | Then, you should be able to run the tests as usual: 134 | 135 | ```bash 136 | pytest tests/integration_tests 137 | ``` 138 | 139 | ## Style Guide 140 | 141 | The project uses [PyLint](https://pypi.org/project/pylint/) to enforce the code style. 142 | It is always a good idea to run the linter before committing the changes, as this is a mandatory CI check. For example: 143 | 144 | ```bash 145 | pip install pylint 146 | pylint clickhouse_connect 147 | pylint tests 148 | ``` 149 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | 3 | ADD . / 4 | 5 | RUN pip install requests pytz 6 | RUN pip install --upgrade pip; \ 7 | pip install cython; \ 8 | pip install -v . 9 | #RUN pip install -v --index-url https://test.pypi.org/simple/ clickhouse-connect 10 | CMD ["python", "playtest.py"] 11 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | global-include *.pxd *.pyx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## ClickHouse Connect 2 | 3 | A high performance core database driver for connecting ClickHouse to Python, Pandas, and Superset 4 | 5 | * Pandas DataFrames 6 | * Numpy Arrays 7 | * PyArrow Tables 8 | * Superset Connector 9 | * SQLAlchemy 1.3 and 1.4 (limited feature set) 10 | 11 | ClickHouse Connect currently uses the ClickHouse HTTP interface for maximum compatibility. 12 | 13 | ### Installation 14 | 15 | ``` 16 | pip install clickhouse-connect 17 | ``` 18 | 19 | ClickHouse Connect requires Python 3.8 or higher. 20 | 21 | ### Superset Connectivity 22 | 23 | ClickHouse Connect is fully integrated with Apache Superset. Previous versions of ClickHouse Connect utilized a 24 | dynamically loaded Superset Engine Spec, but as of Superset v2.1.0 the engine spec was incorporated into the main 25 | Apache Superset project and removed from clickhouse-connect in v0.6.0. If you have issues connecting to earlier 26 | versions of Superset, please use clickhouse-connect v0.5.25. 27 | 28 | When creating a Superset Data Source, either use the provided connection dialog, or a SqlAlchemy DSN in the form 29 | `clickhousedb://{username}:{password}@{host}:{port}`. 30 | 31 | ### SQLAlchemy Implementation 32 | 33 | ClickHouse Connect incorporates a minimal SQLAlchemy implementation (without any ORM features) for compatibility with 34 | Superset. It has only been tested against SQLAlchemy versions 1.3.x and 1.4.x, and is unlikely to work with more 35 | complex SQLAlchemy applications. 36 | 37 | ### Asyncio Support 38 | 39 | ClickHouse Connect provides an async wrapper, so that it is possible to use the client in an `asyncio` environment. 40 | See the [run_async example](./examples/run_async.py) for more details. 41 | 42 | ### Complete Documentation 43 | 44 | The documentation for ClickHouse Connect has moved to 45 | [ClickHouse Docs](https://clickhouse.com/docs/integrations/python) 46 | -------------------------------------------------------------------------------- /clickhouse_connect/__init__.py: -------------------------------------------------------------------------------- 1 | from clickhouse_connect.driver import create_client, create_async_client 2 | 3 | driver_name = 'clickhousedb' 4 | 5 | get_client = create_client 6 | get_async_client = create_async_client 7 | -------------------------------------------------------------------------------- /clickhouse_connect/__version__.py: -------------------------------------------------------------------------------- 1 | version = '0.8.18' 2 | -------------------------------------------------------------------------------- /clickhouse_connect/cc_sqlalchemy/__init__.py: -------------------------------------------------------------------------------- 1 | from clickhouse_connect import driver_name 2 | from clickhouse_connect.cc_sqlalchemy.datatypes.base import schema_types 3 | 4 | # pylint: disable=invalid-name 5 | dialect_name = driver_name 6 | ischema_names = schema_types 7 | -------------------------------------------------------------------------------- /clickhouse_connect/cc_sqlalchemy/datatypes/__init__.py: -------------------------------------------------------------------------------- 1 | import clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes 2 | -------------------------------------------------------------------------------- /clickhouse_connect/cc_sqlalchemy/ddl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/clickhouse_connect/cc_sqlalchemy/ddl/__init__.py -------------------------------------------------------------------------------- /clickhouse_connect/cc_sqlalchemy/ddl/custom.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.sql.ddl import DDL 2 | from sqlalchemy.exc import ArgumentError 3 | 4 | from clickhouse_connect.driver.binding import quote_identifier 5 | 6 | 7 | # pylint: disable=too-many-ancestors,abstract-method 8 | class CreateDatabase(DDL): 9 | """ 10 | SqlAlchemy DDL statement that is essentially an alternative to the built in CreateSchema DDL class 11 | """ 12 | # pylint: disable-msg=too-many-arguments 13 | def __init__(self, name: str, engine: str = None, zoo_path: str = None, shard_name: str = '{shard}', 14 | replica_name: str = '{replica}'): 15 | """ 16 | :param name: Database name 17 | :param engine: Database ClickHouse engine type 18 | :param zoo_path: ClickHouse zookeeper path for Replicated database engine 19 | :param shard_name: Clickhouse shard name for Replicated database engine 20 | :param replica_name: Replica name for Replicated database engine 21 | """ 22 | if engine and engine not in ('Ordinary', 'Atomic', 'Lazy', 'Replicated'): 23 | raise ArgumentError(f'Unrecognized engine type {engine}') 24 | stmt = f'CREATE DATABASE {quote_identifier(name)}' 25 | if engine: 26 | stmt += f' Engine {engine}' 27 | if engine == 'Replicated': 28 | if not zoo_path: 29 | raise ArgumentError('zoo_path is required for Replicated Database Engine') 30 | stmt += f" ('{zoo_path}', '{shard_name}', '{replica_name}'" 31 | super().__init__(stmt) 32 | 33 | 34 | # pylint: disable=too-many-ancestors,abstract-method 35 | class DropDatabase(DDL): 36 | """ 37 | Alternative DDL statement for built in SqlAlchemy DropSchema DDL class 38 | """ 39 | def __init__(self, name: str): 40 | super().__init__(f'DROP DATABASE {quote_identifier(name)}') 41 | -------------------------------------------------------------------------------- /clickhouse_connect/cc_sqlalchemy/dialect.py: -------------------------------------------------------------------------------- 1 | 2 | from sqlalchemy import text 3 | from sqlalchemy.engine.default import DefaultDialect 4 | 5 | from clickhouse_connect import dbapi 6 | 7 | from clickhouse_connect.cc_sqlalchemy.inspector import ChInspector 8 | from clickhouse_connect.cc_sqlalchemy.sql import full_table 9 | from clickhouse_connect.cc_sqlalchemy.sql.ddlcompiler import ChDDLCompiler 10 | from clickhouse_connect.cc_sqlalchemy import ischema_names, dialect_name 11 | from clickhouse_connect.cc_sqlalchemy.sql.preparer import ChIdentifierPreparer 12 | from clickhouse_connect.driver.binding import quote_identifier, format_str 13 | 14 | 15 | # pylint: disable=too-many-public-methods,no-self-use,unused-argument 16 | class ClickHouseDialect(DefaultDialect): 17 | """ 18 | See :py:class:`sqlalchemy.engine.interfaces` 19 | """ 20 | name = dialect_name 21 | driver = 'connect' 22 | 23 | default_schema_name = 'default' 24 | supports_native_decimal = True 25 | supports_native_boolean = True 26 | supports_statement_cache = False 27 | returns_unicode_strings = True 28 | postfetch_lastrowid = False 29 | ddl_compiler = ChDDLCompiler 30 | preparer = ChIdentifierPreparer 31 | description_encoding = None 32 | max_identifier_length = 127 33 | ischema_names = ischema_names 34 | inspector = ChInspector 35 | 36 | # pylint: disable=method-hidden 37 | @classmethod 38 | def dbapi(cls): 39 | return dbapi 40 | 41 | def initialize(self, connection): 42 | pass 43 | 44 | @staticmethod 45 | def get_schema_names(connection, **_): 46 | return [row.name for row in connection.execute('SHOW DATABASES')] 47 | 48 | @staticmethod 49 | def has_database(connection, db_name): 50 | return (connection.execute(text('SELECT name FROM system.databases ' + 51 | f'WHERE name = {format_str(db_name)}'))).rowcount > 0 52 | 53 | def get_table_names(self, connection, schema=None, **kw): 54 | cmd = 'SHOW TABLES' 55 | if schema: 56 | cmd += ' FROM ' + quote_identifier(schema) 57 | return [row.name for row in connection.execute(cmd)] 58 | 59 | def get_primary_keys(self, connection, table_name, schema=None, **kw): 60 | return [] 61 | 62 | # pylint: disable=arguments-renamed 63 | def get_pk_constraint(self, connection, table_name, schema=None, **kw): 64 | return [] 65 | 66 | def get_foreign_keys(self, connection, table_name, schema=None, **kw): 67 | return [] 68 | 69 | def get_temp_table_names(self, connection, schema=None, **kw): 70 | return [] 71 | 72 | def get_view_names(self, connection, schema=None, **kw): 73 | return [] 74 | 75 | def get_temp_view_names(self, connection, schema=None, **kw): 76 | return [] 77 | 78 | def get_view_definition(self, connection, view_name, schema=None, **kw): 79 | pass 80 | 81 | def get_indexes(self, connection, table_name, schema=None, **kw): 82 | return [] 83 | 84 | def get_unique_constraints(self, connection, table_name, schema=None, **kw): 85 | return [] 86 | 87 | def get_check_constraints(self, connection, table_name, schema=None, **kw): 88 | return [] 89 | 90 | def has_table(self, connection, table_name, schema=None, **_kw): 91 | result = connection.execute(text(f'EXISTS TABLE {full_table(table_name, schema)}')) 92 | row = result.fetchone() 93 | return row[0] == 1 94 | 95 | def has_sequence(self, connection, sequence_name, schema=None, **_kw): 96 | return False 97 | 98 | def do_begin_twophase(self, connection, xid): 99 | raise NotImplementedError 100 | 101 | def do_prepare_twophase(self, connection, xid): 102 | raise NotImplementedError 103 | 104 | def do_rollback_twophase(self, connection, xid, is_prepared=True, recover=False): 105 | raise NotImplementedError 106 | 107 | def do_commit_twophase(self, connection, xid, is_prepared=True, recover=False): 108 | raise NotImplementedError 109 | 110 | def do_recover_twophase(self, connection): 111 | raise NotImplementedError 112 | 113 | def set_isolation_level(self, dbapi_conn, level): 114 | pass 115 | 116 | def get_isolation_level(self, dbapi_conn): 117 | return None 118 | -------------------------------------------------------------------------------- /clickhouse_connect/cc_sqlalchemy/inspector.py: -------------------------------------------------------------------------------- 1 | import sqlalchemy.schema as sa_schema 2 | 3 | from sqlalchemy.engine.reflection import Inspector 4 | from sqlalchemy.orm.exc import NoResultFound 5 | 6 | from clickhouse_connect.cc_sqlalchemy.datatypes.base import sqla_type_from_name 7 | from clickhouse_connect.cc_sqlalchemy.ddl.tableengine import build_engine 8 | from clickhouse_connect.cc_sqlalchemy.sql import full_table 9 | from clickhouse_connect.cc_sqlalchemy import dialect_name as dn 10 | 11 | ch_col_args = ('default_type', 'codec_expression', 'ttl_expression') 12 | 13 | 14 | def get_engine(connection, table_name, schema=None): 15 | result_set = connection.execute( 16 | f"SELECT engine_full FROM system.tables WHERE database = '{schema}' and name = '{table_name}'") 17 | row = next(result_set, None) 18 | if not row: 19 | raise NoResultFound(f'Table {schema}.{table_name} does not exist') 20 | return build_engine(row.engine_full) 21 | 22 | 23 | class ChInspector(Inspector): 24 | 25 | def reflect_table(self, table, include_columns, exclude_columns, *_args, **_kwargs): 26 | schema = table.schema 27 | for col in self.get_columns(table.name, schema): 28 | name = col.pop('name') 29 | if (include_columns and name not in include_columns) or (exclude_columns and name in exclude_columns): 30 | continue 31 | col_type = col.pop('type') 32 | col_args = {f'{dn}_{key}' if key in ch_col_args else key: value for key, value in col.items() if value} 33 | table.append_column(sa_schema.Column(name, col_type, **col_args)) 34 | table.engine = get_engine(self.bind, table.name, schema) 35 | 36 | def get_columns(self, table_name, schema=None, **_kwargs): 37 | table_id = full_table(table_name, schema) 38 | result_set = self.bind.execute(f'DESCRIBE TABLE {table_id}') 39 | if not result_set: 40 | raise NoResultFound(f'Table {full_table} does not exist') 41 | columns = [] 42 | for row in result_set: 43 | sqla_type = sqla_type_from_name(row.type.replace('\n', '')) 44 | col = {'name': row.name, 45 | 'type': sqla_type, 46 | 'nullable': sqla_type.nullable, 47 | 'autoincrement': False, 48 | 'default': row.default_expression, 49 | 'default_type': row.default_type, 50 | 'comment': row.comment, 51 | 'codec_expression': row.codec_expression, 52 | 'ttl_expression': row.ttl_expression} 53 | columns.append(col) 54 | return columns 55 | 56 | 57 | ChInspector.reflecttable = ChInspector.reflect_table # Hack to provide backward compatibility for SQLAlchemy 1.3 58 | -------------------------------------------------------------------------------- /clickhouse_connect/cc_sqlalchemy/sql/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from sqlalchemy import Table 4 | 5 | from clickhouse_connect.driver.binding import quote_identifier 6 | 7 | 8 | def full_table(table_name: str, schema: Optional[str] = None) -> str: 9 | if table_name.startswith('(') or '.' in table_name or not schema: 10 | return quote_identifier(table_name) 11 | return f'{quote_identifier(schema)}.{quote_identifier(table_name)}' 12 | 13 | 14 | def format_table(table: Table): 15 | return full_table(table.name, table.schema) 16 | -------------------------------------------------------------------------------- /clickhouse_connect/cc_sqlalchemy/sql/ddlcompiler.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Column 2 | from sqlalchemy.sql.compiler import DDLCompiler 3 | 4 | from clickhouse_connect.cc_sqlalchemy.sql import format_table 5 | from clickhouse_connect.driver.binding import quote_identifier 6 | 7 | 8 | class ChDDLCompiler(DDLCompiler): 9 | 10 | def visit_create_schema(self, create, **_): 11 | return f'CREATE DATABASE {quote_identifier(create.element)}' 12 | 13 | def visit_drop_schema(self, drop, **_): 14 | return f'DROP DATABASE {quote_identifier(drop.element)}' 15 | 16 | def visit_create_table(self, create, **_): 17 | table = create.element 18 | text = f'CREATE TABLE {format_table(table)} (' 19 | text += ', '.join([self.get_column_specification(c.element) for c in create.columns]) 20 | return text + ') ' + table.engine.compile() 21 | 22 | def get_column_specification(self, column: Column, **_): 23 | text = f'{quote_identifier(column.name)} {column.type.compile()}' 24 | return text 25 | -------------------------------------------------------------------------------- /clickhouse_connect/cc_sqlalchemy/sql/preparer.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.sql.compiler import IdentifierPreparer 2 | 3 | from clickhouse_connect.driver.binding import quote_identifier 4 | 5 | 6 | class ChIdentifierPreparer(IdentifierPreparer): 7 | 8 | quote_identifier = staticmethod(quote_identifier) 9 | 10 | def _requires_quotes(self, _value): 11 | return True 12 | -------------------------------------------------------------------------------- /clickhouse_connect/common.py: -------------------------------------------------------------------------------- 1 | import getpass 2 | import sys 3 | from dataclasses import dataclass 4 | from typing import Any, Sequence, Optional, Dict 5 | from clickhouse_connect import __version__ 6 | 7 | 8 | from clickhouse_connect.driver.exceptions import ProgrammingError 9 | 10 | 11 | def version(): 12 | return __version__.version 13 | 14 | 15 | def format_error(msg: str) -> str: 16 | max_size = _common_settings['max_error_size'].value 17 | if max_size: 18 | return msg[:max_size] 19 | return msg 20 | 21 | 22 | @dataclass 23 | class CommonSetting: 24 | name: str 25 | options: Sequence[Any] 26 | default: Any 27 | value: Optional[Any] = None 28 | 29 | 30 | _common_settings: Dict[str, CommonSetting] = {} 31 | 32 | 33 | def build_client_name(client_name: str): 34 | product_name = get_setting('product_name') 35 | product_name = product_name.strip() + ' ' if product_name else '' 36 | client_name = client_name.strip() + ' ' if client_name else '' 37 | py_version = sys.version.split(' ', maxsplit=1)[0] 38 | os_user = '' 39 | if get_setting('send_os_user'): 40 | try: 41 | os_user = f'; os_user:{getpass.getuser()}' 42 | except Exception: # pylint: disable=broad-except 43 | pass 44 | full_name = (f'{client_name}{product_name}clickhouse-connect/{version()}' + 45 | f' (lv:py/{py_version}; mode:sync; os:{sys.platform}{os_user})') 46 | return full_name.encode('ascii', 'ignore').decode() 47 | 48 | 49 | def get_setting(name: str): 50 | setting = _common_settings.get(name) 51 | if setting is None: 52 | raise ProgrammingError(f'Unrecognized common setting {name}') 53 | return setting.value if setting.value is not None else setting.default 54 | 55 | 56 | def set_setting(name: str, value: Any): 57 | setting = _common_settings.get(name) 58 | if setting is None: 59 | raise ProgrammingError(f'Unrecognized common setting {name}') 60 | if setting.options and value not in setting.options: 61 | raise ProgrammingError(f'Unrecognized option {value} for setting {name})') 62 | if value == setting.default: 63 | setting.value = None 64 | else: 65 | setting.value = value 66 | 67 | 68 | def _init_common(name: str, options: Sequence[Any], default: Any): 69 | _common_settings[name] = CommonSetting(name, options, default) 70 | 71 | 72 | _init_common('autogenerate_session_id', (True, False), True) 73 | _init_common('dict_parameter_format', ('json', 'map'), 'json') 74 | _init_common('invalid_setting_action', ('send', 'drop', 'error'), 'error') 75 | _init_common('max_connection_age', (), 10 * 60) # Max time in seconds to keep reusing a database TCP connection 76 | _init_common('product_name', (), '') # Product name used as part of client identification for ClickHouse query_log 77 | _init_common('readonly', (0, 1), 0) # Implied "read_only" ClickHouse settings for versions prior to 19.17 78 | _init_common('send_os_user', (True, False), True) 79 | 80 | # Use the client protocol version This is needed for DateTime timezone columns but breaks with current version of 81 | # chproxy 82 | _init_common('use_protocol_version', (True, False), True) 83 | 84 | _init_common('max_error_size', (), 1024) 85 | 86 | # HTTP raw data buffer for streaming queries. This should not be reduced below 64KB to ensure compatibility with LZ4 compression 87 | _init_common('http_buffer_size', (), 10 * 1024 * 1024) 88 | -------------------------------------------------------------------------------- /clickhouse_connect/datatypes/__init__.py: -------------------------------------------------------------------------------- 1 | import clickhouse_connect.datatypes.container 2 | import clickhouse_connect.datatypes.network 3 | import clickhouse_connect.datatypes.numeric 4 | import clickhouse_connect.datatypes.special 5 | import clickhouse_connect.datatypes.string 6 | import clickhouse_connect.datatypes.temporal 7 | import clickhouse_connect.datatypes.geometric 8 | import clickhouse_connect.datatypes.dynamic 9 | import clickhouse_connect.datatypes.registry 10 | import clickhouse_connect.datatypes.postinit 11 | -------------------------------------------------------------------------------- /clickhouse_connect/datatypes/format.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from typing import Dict, Type, Sequence, Optional 4 | 5 | from clickhouse_connect.datatypes.base import ClickHouseType, type_map, ch_read_formats, ch_write_formats 6 | from clickhouse_connect.driver.exceptions import ProgrammingError 7 | 8 | json_re = re.compile('json', re.IGNORECASE) 9 | 10 | 11 | def set_default_formats(*args, **kwargs): 12 | fmt_map = format_map(_convert_arguments(*args, **kwargs)) 13 | ch_read_formats.update(fmt_map) 14 | ch_write_formats.update(fmt_map) 15 | 16 | 17 | def clear_all_formats(): 18 | ch_read_formats.clear() 19 | ch_write_formats.clear() 20 | 21 | 22 | def clear_default_format(pattern: str): 23 | for ch_type in _matching_types(pattern): 24 | ch_read_formats.pop(ch_type, None) 25 | ch_write_formats.pop(ch_type, None) 26 | 27 | 28 | def set_write_format(pattern: str, fmt: str): 29 | pattern = json_re.sub('object', pattern) 30 | for ch_type in _matching_types(pattern): 31 | ch_write_formats[ch_type] = fmt 32 | 33 | 34 | def clear_write_format(pattern: str): 35 | for ch_type in _matching_types(pattern): 36 | ch_write_formats.pop(ch_type, None) 37 | 38 | 39 | def set_read_format(pattern: str, fmt: str): 40 | for ch_type in _matching_types(pattern): 41 | ch_read_formats[ch_type] = fmt 42 | 43 | 44 | def clear_read_format(pattern: str): 45 | for ch_type in _matching_types(pattern): 46 | ch_read_formats.pop(ch_type, None) 47 | 48 | 49 | def format_map(fmt_map: Optional[Dict[str, str]]) -> Dict[Type[ClickHouseType], str]: 50 | if not fmt_map: 51 | return {} 52 | final_map = {} 53 | for pattern, fmt in fmt_map.items(): 54 | for ch_type in _matching_types(pattern, fmt): 55 | final_map[ch_type] = fmt 56 | return final_map 57 | 58 | 59 | def _convert_arguments(*args, **kwargs) -> Dict[str, str]: 60 | fmt_map = {} 61 | try: 62 | for x in range(0, len(args), 2): 63 | fmt_map[args[x]] = args[x + 1] 64 | except (IndexError, TypeError, ValueError) as ex: 65 | raise ProgrammingError('Invalid type/format arguments for format method') from ex 66 | fmt_map.update(kwargs) 67 | return fmt_map 68 | 69 | 70 | def _matching_types(pattern: str, fmt: str = None) -> Sequence[Type[ClickHouseType]]: 71 | re_pattern = re.compile(pattern.replace('*', '.*'), re.IGNORECASE) 72 | matches = [ch_type for type_name, ch_type in type_map.items() if re_pattern.match(type_name)] 73 | if not matches: 74 | raise ProgrammingError(f'Unrecognized ClickHouse type {pattern} when setting formats') 75 | if fmt: 76 | invalid = [ch_type.__name__ for ch_type in matches if fmt not in ch_type.valid_formats] 77 | if invalid: 78 | raise ProgrammingError(f"{fmt} is not a valid format for ClickHouse types {','.join(invalid)}.") 79 | return matches 80 | -------------------------------------------------------------------------------- /clickhouse_connect/datatypes/geometric.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence, Any 2 | 3 | from clickhouse_connect.datatypes.base import ClickHouseType 4 | from clickhouse_connect.driver.insert import InsertContext 5 | from clickhouse_connect.driver.query import QueryContext 6 | from clickhouse_connect.driver.types import ByteSource 7 | 8 | POINT_DATA_TYPE: ClickHouseType 9 | RING_DATA_TYPE: ClickHouseType 10 | POLYGON_DATA_TYPE: ClickHouseType 11 | MULTI_POLYGON_DATA_TYPE: ClickHouseType 12 | 13 | 14 | class Point(ClickHouseType): 15 | def write_column(self, column: Sequence, dest: bytearray, ctx: InsertContext): 16 | return POINT_DATA_TYPE.write_column(column, dest, ctx) 17 | 18 | def read_column_prefix(self, source: ByteSource, ctx: QueryContext): 19 | return POINT_DATA_TYPE.read_column_prefix(source, ctx) 20 | 21 | def read_column_data(self, source: ByteSource, num_rows: int, ctx: QueryContext, read_state: Any) -> Sequence: 22 | return POINT_DATA_TYPE.read_column_data(source, num_rows, ctx, read_state) 23 | 24 | 25 | class Ring(ClickHouseType): 26 | def write_column(self, column: Sequence, dest: bytearray, ctx: InsertContext): 27 | return RING_DATA_TYPE.write_column(column, dest, ctx) 28 | 29 | def read_column_prefix(self, source: ByteSource, ctx: QueryContext): 30 | return RING_DATA_TYPE.read_column_prefix(source, ctx) 31 | 32 | def read_column_data(self, source: ByteSource, num_rows: int, ctx: QueryContext, read_state) -> Sequence: 33 | return RING_DATA_TYPE.read_column_data(source, num_rows, ctx, read_state) 34 | 35 | 36 | class Polygon(ClickHouseType): 37 | def write_column(self, column: Sequence, dest: bytearray, ctx: InsertContext): 38 | return POLYGON_DATA_TYPE.write_column(column, dest, ctx) 39 | 40 | def read_column_prefix(self, source: ByteSource, ctx: QueryContext): 41 | return POLYGON_DATA_TYPE.read_column_prefix(source, ctx) 42 | 43 | def read_column_data(self, source: ByteSource, num_rows: int, ctx: QueryContext, read_state:Any) -> Sequence: 44 | return POLYGON_DATA_TYPE.read_column_data(source, num_rows, ctx, read_state) 45 | 46 | 47 | class MultiPolygon(ClickHouseType): 48 | def write_column(self, column: Sequence, dest: bytearray, ctx: InsertContext): 49 | return MULTI_POLYGON_DATA_TYPE.write_column(column, dest, ctx) 50 | 51 | def read_column_prefix(self, source: ByteSource, ctx: QueryContext): 52 | return MULTI_POLYGON_DATA_TYPE.read_column_prefix(source, ctx) 53 | 54 | def read_column_data(self, source: ByteSource, num_rows: int, ctx: QueryContext, read_state:Any) -> Sequence: 55 | return MULTI_POLYGON_DATA_TYPE.read_column_data(source, num_rows, ctx, read_state) 56 | 57 | 58 | class LineString(Ring): 59 | pass 60 | 61 | 62 | class MultiLineString(Polygon): 63 | pass 64 | -------------------------------------------------------------------------------- /clickhouse_connect/datatypes/postinit.py: -------------------------------------------------------------------------------- 1 | from clickhouse_connect.datatypes import registry, dynamic, geometric 2 | 3 | dynamic.SHARED_DATA_TYPE = registry.get_from_name('Array(String, String)') 4 | dynamic.STRING_DATA_TYPE = registry.get_from_name('String') 5 | 6 | point = 'Tuple(Float64, Float64)' 7 | ring = f'Array({point})' 8 | polygon = f'Array({ring})' 9 | multi_polygon = f'Array({polygon})' 10 | 11 | geometric.POINT_DATA_TYPE = registry.get_from_name(point) 12 | geometric.RING_DATA_TYPE = registry.get_from_name(ring) 13 | geometric.POLYGON_DATA_TYPE = registry.get_from_name(polygon) 14 | geometric.MULTI_POLYGON_DATA_TYPE = registry.get_from_name(multi_polygon) 15 | -------------------------------------------------------------------------------- /clickhouse_connect/datatypes/registry.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from typing import Tuple, Dict 4 | from clickhouse_connect.datatypes.base import TypeDef, ClickHouseType, type_map 5 | from clickhouse_connect.driver.exceptions import InternalError 6 | from clickhouse_connect.driver.parser import parse_enum, parse_callable, parse_columns 7 | 8 | logger = logging.getLogger(__name__) 9 | type_cache: Dict[str, ClickHouseType] = {} 10 | 11 | 12 | def parse_name(name: str) -> Tuple[str, str, TypeDef]: 13 | """ 14 | Converts a ClickHouse type name into the base class and the definition (TypeDef) needed for any 15 | additional instantiation 16 | :param name: ClickHouse type name as returned by clickhouse 17 | :return: The original base name (before arguments), the full name as passed in and the TypeDef object that 18 | captures any additional arguments 19 | """ 20 | base = name 21 | wrappers = [] 22 | keys = tuple() 23 | if base.startswith('LowCardinality'): 24 | wrappers.append('LowCardinality') 25 | base = base[15:-1] 26 | if base.startswith('Nullable'): 27 | wrappers.append('Nullable') 28 | base = base[9:-1] 29 | if base.startswith('Enum'): 30 | keys, values = parse_enum(base) 31 | base = base[:base.find('(')] 32 | elif base.startswith('Nested'): 33 | keys, values = parse_columns(base[6:]) 34 | base = 'Nested' 35 | elif base.startswith('Tuple'): 36 | keys, values = parse_columns(base[5:]) 37 | base = 'Tuple' 38 | elif base.startswith('Variant'): 39 | keys, values = parse_columns(base[7:]) 40 | base = 'Variant' 41 | elif base.startswith('JSON') and len(base) > 4 and base[4] == '(': 42 | keys, values = parse_columns(base[4:]) 43 | base = 'JSON' 44 | elif base == 'Point': 45 | values = ('Float64', 'Float64') 46 | else: 47 | try: 48 | base, values, _ = parse_callable(base) 49 | except IndexError: 50 | raise InternalError(f'Can not parse ClickHouse data type: {name}') from None 51 | return base, name, TypeDef(tuple(wrappers), keys, values) 52 | 53 | 54 | def get_from_name(name: str) -> ClickHouseType: 55 | """ 56 | Returns the ClickHouseType instance parsed from the ClickHouse type name. Instances are cached 57 | :param name: ClickHouse type name as returned by ClickHouse in WithNamesAndTypes FORMAT or the Native protocol 58 | :return: The instance of the ClickHouse Type 59 | """ 60 | ch_type = type_cache.get(name, None) 61 | if not ch_type: 62 | base, name, type_def = parse_name(name) 63 | try: 64 | ch_type = type_map[base].build(type_def) 65 | except KeyError: 66 | err_str = f'Unrecognized ClickHouse type base: {base} name: {name}' 67 | logger.error(err_str) 68 | raise InternalError(err_str) from None 69 | type_cache[name] = ch_type 70 | return ch_type 71 | -------------------------------------------------------------------------------- /clickhouse_connect/datatypes/special.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Sequence, MutableSequence, Any 2 | from uuid import UUID as PYUUID 3 | 4 | from clickhouse_connect.datatypes.base import TypeDef, ClickHouseType, ArrayType, UnsupportedType 5 | from clickhouse_connect.datatypes.registry import get_from_name 6 | from clickhouse_connect.driver.common import first_value 7 | from clickhouse_connect.driver.ctypes import data_conv 8 | from clickhouse_connect.driver.insert import InsertContext 9 | from clickhouse_connect.driver.query import QueryContext 10 | from clickhouse_connect.driver.types import ByteSource 11 | 12 | empty_uuid_b = bytes(b'\x00' * 16) 13 | 14 | 15 | class UUID(ClickHouseType): 16 | valid_formats = 'string', 'native' 17 | np_type = 'U36' 18 | byte_size = 16 19 | 20 | def python_null(self, ctx): 21 | return '' if self.read_format(ctx) == 'string' else PYUUID(int=0) 22 | 23 | def _read_column_binary(self, source: ByteSource, num_rows: int, ctx: QueryContext, _read_state: Any): 24 | if self.read_format(ctx) == 'string': 25 | return self._read_binary_str(source, num_rows) 26 | return data_conv.read_uuid_col(source, num_rows) 27 | 28 | @staticmethod 29 | def _read_binary_str(source: ByteSource, num_rows: int): 30 | v = source.read_array('Q', num_rows * 2) 31 | column = [] 32 | app = column.append 33 | for i in range(num_rows): 34 | ix = i << 1 35 | x = f'{(v[ix] << 64 | v[ix + 1]):032x}' 36 | app(f'{x[:8]}-{x[8:12]}-{x[12:16]}-{x[16:20]}-{x[20:]}') 37 | return column 38 | 39 | # pylint: disable=too-many-branches 40 | def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, ctx: InsertContext): 41 | first = first_value(column, self.nullable) 42 | empty = empty_uuid_b 43 | if isinstance(first, str) or self.write_format(ctx) == 'string': 44 | for v in column: 45 | if v: 46 | x = int(v.replace('-', ''), 16) 47 | dest += (x >> 64).to_bytes(8, 'little') + (x & 0xffffffffffffffff).to_bytes(8, 'little') 48 | else: 49 | dest += empty 50 | elif isinstance(first, int): 51 | for x in column: 52 | if x: 53 | dest += (x >> 64).to_bytes(8, 'little') + (x & 0xffffffffffffffff).to_bytes(8, 'little') 54 | else: 55 | dest += empty 56 | elif isinstance(first, PYUUID): 57 | for v in column: 58 | if v: 59 | x = v.int 60 | dest += (x >> 64).to_bytes(8, 'little') + (x & 0xffffffffffffffff).to_bytes(8, 'little') 61 | else: 62 | dest += empty 63 | elif isinstance(first, (bytes, bytearray, memoryview)): 64 | for v in column: 65 | if v: 66 | dest += bytes(reversed(v[:8])) + bytes(reversed(v[8:])) 67 | else: 68 | dest += empty 69 | else: 70 | dest += empty * len(column) 71 | 72 | 73 | class Nothing(ArrayType): 74 | _array_type = 'b' 75 | 76 | def __init__(self, type_def: TypeDef): 77 | super().__init__(type_def) 78 | self.nullable = True 79 | 80 | def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, _ctx): 81 | dest += bytes(0x30 for _ in range(len(column))) 82 | 83 | 84 | class SimpleAggregateFunction(ClickHouseType): 85 | _slots = ('element_type',) 86 | 87 | def __init__(self, type_def: TypeDef): 88 | super().__init__(type_def) 89 | self.element_type: ClickHouseType = get_from_name(type_def.values[1]) 90 | self._name_suffix = type_def.arg_str 91 | self.byte_size = self.element_type.byte_size 92 | self.np_type = self.element_type.np_type 93 | self.python_type = self.element_type.python_type 94 | self.nano_divisor = self.element_type.nano_divisor 95 | 96 | def _data_size(self, sample: Sequence) -> int: 97 | return self.element_type.data_size(sample) 98 | 99 | def read_column_prefix(self, source: ByteSource, ctx: QueryContext): 100 | return self.element_type.read_column_prefix(source, ctx) 101 | 102 | def write_column_prefix(self, dest: bytearray): 103 | self.element_type.write_column_prefix(dest) 104 | 105 | def _read_column_binary(self, source: ByteSource, num_rows: int, ctx: QueryContext, read_state: Any): 106 | return self.element_type.read_column_data(source, num_rows, ctx, read_state) 107 | 108 | def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, ctx: InsertContext): 109 | self.element_type.write_column_data(column, dest, ctx) 110 | 111 | 112 | class AggregateFunction(UnsupportedType): 113 | pass 114 | -------------------------------------------------------------------------------- /clickhouse_connect/dbapi/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from clickhouse_connect.dbapi.connection import Connection 4 | 5 | 6 | apilevel = '2.0' # PEP 249 DB API level 7 | threadsafety = 2 # PEP 249 Threads may share the module and connections. 8 | paramstyle = 'pyformat' # PEP 249 Python extended format codes, e.g. ...WHERE name=%(name)s 9 | 10 | 11 | class Error(Exception): 12 | pass 13 | 14 | 15 | def connect(host: Optional[str] = None, 16 | database: Optional[str] = None, 17 | username: Optional[str] = '', 18 | password: Optional[str] = '', 19 | port: Optional[int] = None, 20 | **kwargs): 21 | secure = kwargs.pop('secure', False) 22 | return Connection(host=host, 23 | database=database, 24 | username=username, 25 | password=password, 26 | port=port, 27 | secure=secure, 28 | **kwargs) 29 | -------------------------------------------------------------------------------- /clickhouse_connect/dbapi/connection.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from clickhouse_connect.dbapi.cursor import Cursor 4 | from clickhouse_connect.driver import create_client 5 | from clickhouse_connect.driver.query import QueryResult 6 | 7 | 8 | class Connection: 9 | """ 10 | See :ref:`https://peps.python.org/pep-0249/` 11 | """ 12 | # pylint: disable=too-many-arguments 13 | def __init__(self, 14 | dsn: str = None, 15 | username: str = '', 16 | password: str = '', 17 | host: str = None, 18 | database: str = None, 19 | interface: str = None, 20 | port: int = 0, 21 | secure: Union[bool, str] = False, 22 | **kwargs): 23 | self.client = create_client(host=host, 24 | username=username, 25 | password=password, 26 | database=database, 27 | interface=interface, 28 | port=port, 29 | secure=secure, 30 | dsn=dsn, 31 | generic_args=kwargs) 32 | self.timezone = self.client.server_tz 33 | 34 | def close(self): 35 | self.client.close() 36 | 37 | def commit(self): 38 | pass 39 | 40 | def rollback(self): 41 | pass 42 | 43 | def command(self, cmd: str): 44 | return self.client.command(cmd) 45 | 46 | def raw_query(self, query: str) -> QueryResult: 47 | return self.client.query(query) 48 | 49 | def cursor(self): 50 | return Cursor(self.client) 51 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/buffer.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import array 3 | from typing import Any, Iterable 4 | 5 | from clickhouse_connect.driver.exceptions import StreamCompleteException 6 | from clickhouse_connect.driver.types import ByteSource 7 | 8 | must_swap = sys.byteorder == 'big' 9 | 10 | 11 | class ResponseBuffer(ByteSource): 12 | slots = 'slice_sz', 'buf_loc', 'end', 'gen', 'buffer', 'slice' 13 | 14 | def __init__(self, source): 15 | self.slice_sz = 4096 16 | self.buf_loc = 0 17 | self.buf_sz = 0 18 | self.source = source 19 | self.gen = source.gen 20 | self.buffer = bytes() 21 | 22 | def read_bytes(self, sz: int): 23 | if self.buf_loc + sz <= self.buf_sz: 24 | self.buf_loc += sz 25 | return self.buffer[self.buf_loc - sz: self.buf_loc] 26 | # Create a temporary buffer that bridges two or more source chunks 27 | bridge = bytearray(self.buffer[self.buf_loc: self.buf_sz]) 28 | self.buf_loc = 0 29 | self.buf_sz = 0 30 | while len(bridge) < sz: 31 | chunk = next(self.gen, None) 32 | if not chunk: 33 | raise StreamCompleteException 34 | x = len(chunk) 35 | if len(bridge) + x <= sz: 36 | bridge.extend(chunk) 37 | else: 38 | tail = sz - len(bridge) 39 | bridge.extend(chunk[:tail]) 40 | self.buffer = chunk 41 | self.buf_sz = x 42 | self.buf_loc = tail 43 | return bridge 44 | 45 | def read_byte(self) -> int: 46 | if self.buf_loc < self.buf_sz: 47 | self.buf_loc += 1 48 | return self.buffer[self.buf_loc - 1] 49 | self.buf_sz = 0 50 | self.buf_loc = 0 51 | chunk = next(self.gen, None) 52 | if not chunk: 53 | raise StreamCompleteException 54 | x = len(chunk) 55 | if x > 1: 56 | self.buffer = chunk 57 | self.buf_loc = 1 58 | self.buf_sz = x 59 | return chunk[0] 60 | 61 | def read_leb128(self) -> int: 62 | sz = 0 63 | shift = 0 64 | while True: 65 | b = self.read_byte() 66 | sz += ((b & 0x7f) << shift) 67 | if (b & 0x80) == 0: 68 | return sz 69 | shift += 7 70 | 71 | def read_leb128_str(self) -> str: 72 | sz = self.read_leb128() 73 | return self.read_bytes(sz).decode() 74 | 75 | def read_uint64(self) -> int: 76 | return int.from_bytes(self.read_bytes(8), 'little', signed=False) 77 | 78 | def read_str_col(self, 79 | num_rows: int, 80 | encoding: str, 81 | nullable: bool = False, 82 | null_obj: Any = None) -> Iterable[str]: 83 | column = [] 84 | app = column.append 85 | null_map = self.read_bytes(num_rows) if nullable else None 86 | for ix in range(num_rows): 87 | sz = 0 88 | shift = 0 89 | while True: 90 | b = self.read_byte() 91 | sz += ((b & 0x7f) << shift) 92 | if (b & 0x80) == 0: 93 | break 94 | shift += 7 95 | x = self.read_bytes(sz) 96 | if null_map and null_map[ix]: 97 | app(null_obj) 98 | elif encoding: 99 | try: 100 | app(x.decode(encoding)) 101 | except UnicodeDecodeError: 102 | app(x.hex()) 103 | else: 104 | app(x) 105 | return column 106 | 107 | def read_bytes_col(self, sz: int, num_rows: int) -> Iterable[bytes]: 108 | source = self.read_bytes(sz * num_rows) 109 | return [bytes(source[x:x+sz]) for x in range(0, sz * num_rows, sz)] 110 | 111 | def read_fixed_str_col(self, sz: int, num_rows: int, encoding: str) -> Iterable[str]: 112 | source = self.read_bytes(sz * num_rows) 113 | column = [] 114 | app = column.append 115 | for ix in range(0, sz * num_rows, sz): 116 | try: 117 | app(str(source[ix: ix + sz], encoding).rstrip('\x00')) 118 | except UnicodeDecodeError: 119 | app(source[ix: ix + sz].hex()) 120 | return column 121 | 122 | def read_array(self, array_type: str, num_rows: int) -> Iterable[Any]: 123 | column = array.array(array_type) 124 | sz = column.itemsize * num_rows 125 | b = self.read_bytes(sz) 126 | column.frombytes(b) 127 | if must_swap: 128 | column.byteswap() 129 | return column 130 | 131 | @property 132 | def last_message(self) -> bytes: 133 | return self.buffer 134 | 135 | def close(self): 136 | if self.source: 137 | self.source.close() 138 | self.source = None 139 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/compression.py: -------------------------------------------------------------------------------- 1 | import zlib 2 | from abc import abstractmethod 3 | from typing import Union 4 | 5 | import lz4 6 | import lz4.frame 7 | import zstandard 8 | 9 | try: 10 | import brotli 11 | except ImportError: 12 | brotli = None 13 | 14 | 15 | available_compression = ['lz4', 'zstd'] 16 | 17 | if brotli: 18 | available_compression.append('br') 19 | available_compression.extend(['gzip', 'deflate']) 20 | 21 | comp_map = {} 22 | 23 | 24 | class Compressor: 25 | def __init_subclass__(cls, tag: str, thread_safe: bool = True): 26 | comp_map[tag] = cls() if thread_safe else cls 27 | 28 | @abstractmethod 29 | def compress_block(self, block) -> Union[bytes, bytearray]: 30 | return block 31 | 32 | def flush(self): 33 | pass 34 | 35 | 36 | class GzipCompressor(Compressor, tag='gzip', thread_safe=False): 37 | def __init__(self, level: int = 6, wbits: int = 31): 38 | self.zlib_obj = zlib.compressobj(level=level, wbits=wbits) 39 | 40 | def compress_block(self, block): 41 | return self.zlib_obj.compress(block) 42 | 43 | def flush(self): 44 | return self.zlib_obj.flush() 45 | 46 | 47 | class Lz4Compressor(Compressor, tag='lz4', thread_safe=False): 48 | def __init__(self): 49 | self.comp = lz4.frame.LZ4FrameCompressor() 50 | 51 | def compress_block(self, block): 52 | output = self.comp.begin(len(block)) 53 | output += self.comp.compress(block) 54 | return output + self.comp.flush() 55 | 56 | 57 | class ZstdCompressor(Compressor, tag='zstd'): 58 | def compress_block(self, block): 59 | return zstandard.compress(block) 60 | 61 | 62 | class BrotliCompressor(Compressor, tag='br'): 63 | def compress_block(self, block): 64 | return brotli.compress(block) 65 | 66 | 67 | null_compressor = Compressor() 68 | 69 | 70 | def get_compressor(compression: str) -> Compressor: 71 | if not compression: 72 | return null_compressor 73 | comp = comp_map[compression] 74 | try: 75 | return comp() 76 | except TypeError: 77 | return comp 78 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/constants.py: -------------------------------------------------------------------------------- 1 | PROTOCOL_VERSION_WITH_LOW_CARD = 54405 2 | CH_VERSION_WITH_PROTOCOL = '23.2.1.2537' 3 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/context.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | from typing import Optional, Dict, Union, Any 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | _empty_map = {} 8 | 9 | 10 | # pylint: disable=too-many-instance-attributes 11 | class BaseQueryContext: 12 | 13 | def __init__(self, 14 | settings: Optional[Dict[str, Any]] = None, 15 | query_formats: Optional[Dict[str, str]] = None, 16 | column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None, 17 | encoding: Optional[str] = None, 18 | use_extended_dtypes: bool = False, 19 | use_numpy: bool = False, 20 | transport_settings: Optional[Dict[str, str]] = None): 21 | self.settings = settings or {} 22 | if query_formats is None: 23 | self.type_formats = _empty_map 24 | else: 25 | self.type_formats = {re.compile(type_name.replace('*', '.*'), re.IGNORECASE): fmt 26 | for type_name, fmt in query_formats.items()} 27 | if column_formats is None: 28 | self.col_simple_formats = _empty_map 29 | self.col_type_formats = _empty_map 30 | else: 31 | self.col_simple_formats = {col_name: fmt for col_name, fmt in column_formats.items() if 32 | isinstance(fmt, str)} 33 | self.col_type_formats = {} 34 | for col_name, fmt in column_formats.items(): 35 | if not isinstance(fmt, str): 36 | self.col_type_formats[col_name] = {re.compile(type_name.replace('*', '.*'), re.IGNORECASE): fmt 37 | for type_name, fmt in fmt.items()} 38 | self.query_formats = query_formats or {} 39 | self.column_formats = column_formats or {} 40 | self.transport_settings = transport_settings 41 | self.column_name = None 42 | self.encoding = encoding 43 | self.use_numpy = use_numpy 44 | self.use_extended_dtypes = use_extended_dtypes 45 | self._active_col_fmt = None 46 | self._active_col_type_fmts = _empty_map 47 | 48 | def start_column(self, name: str): 49 | self.column_name = name 50 | self._active_col_fmt = self.col_simple_formats.get(name) 51 | self._active_col_type_fmts = self.col_type_formats.get(name, _empty_map) 52 | 53 | def active_fmt(self, ch_type): 54 | if self._active_col_fmt: 55 | return self._active_col_fmt 56 | for type_pattern, fmt in self._active_col_type_fmts.items(): 57 | if type_pattern.match(ch_type): 58 | return fmt 59 | for type_pattern, fmt in self.type_formats.items(): 60 | if type_pattern.match(ch_type): 61 | return fmt 62 | return None 63 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/ctypes.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import clickhouse_connect.driver.dataconv as pydc 5 | import clickhouse_connect.driver.npconv as pync 6 | from clickhouse_connect.driver.buffer import ResponseBuffer 7 | from clickhouse_connect.driver.common import coerce_bool 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | RespBuffCls = ResponseBuffer 12 | data_conv = pydc 13 | numpy_conv = pync 14 | 15 | 16 | # pylint: disable=import-outside-toplevel,global-statement 17 | 18 | def connect_c_modules(): 19 | if not coerce_bool(os.environ.get('CLICKHOUSE_CONNECT_USE_C', True)): 20 | logger.info('ClickHouse Connect C optimizations disabled') 21 | return 22 | 23 | global RespBuffCls, data_conv 24 | try: 25 | from clickhouse_connect.driverc.buffer import ResponseBuffer as CResponseBuffer 26 | import clickhouse_connect.driverc.dataconv as cdc 27 | 28 | data_conv = cdc 29 | RespBuffCls = CResponseBuffer 30 | logger.debug('Successfully imported ClickHouse Connect C data optimizations') 31 | connect_numpy() 32 | except ImportError as ex: 33 | logger.warning('Unable to connect optimized C data functions [%s], falling back to pure Python', 34 | str(ex)) 35 | 36 | 37 | def connect_numpy(): 38 | global numpy_conv 39 | try: 40 | import clickhouse_connect.driverc.npconv as cnc 41 | 42 | numpy_conv = cnc 43 | logger.debug('Successfully import ClickHouse Connect C/Numpy optimizations') 44 | except ImportError as ex: 45 | logger.debug('Unable to connect ClickHouse Connect C to Numpy API [%s], falling back to pure Python', 46 | str(ex)) 47 | 48 | 49 | # connect_c_modules() 50 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/dataconv.py: -------------------------------------------------------------------------------- 1 | import array 2 | from datetime import datetime, date, tzinfo 3 | from ipaddress import IPv4Address 4 | from typing import Sequence, Optional, Any 5 | from uuid import UUID, SafeUUID 6 | 7 | from clickhouse_connect.driver.common import int_size 8 | from clickhouse_connect.driver.errors import NONE_IN_NULLABLE_COLUMN 9 | from clickhouse_connect.driver.types import ByteSource 10 | from clickhouse_connect.driver.options import np 11 | 12 | 13 | MONTH_DAYS = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365) 14 | MONTH_DAYS_LEAP = (0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366) 15 | 16 | 17 | def read_ipv4_col(source: ByteSource, num_rows: int): 18 | column = source.read_array('I', num_rows) 19 | fast_ip_v4 = IPv4Address.__new__ 20 | new_col = [] 21 | app = new_col.append 22 | for x in column: 23 | ipv4 = fast_ip_v4(IPv4Address) 24 | ipv4._ip = x # pylint: disable=protected-access 25 | app(ipv4) 26 | return new_col 27 | 28 | 29 | def read_datetime_col(source: ByteSource, num_rows: int, tz_info: Optional[tzinfo]): 30 | src_array = source.read_array('I', num_rows) 31 | if tz_info is None: 32 | fts = datetime.utcfromtimestamp 33 | return [fts(ts) for ts in src_array] 34 | fts = datetime.fromtimestamp 35 | return [fts(ts, tz_info) for ts in src_array] 36 | 37 | 38 | def epoch_days_to_date(days: int) -> date: 39 | cycles400, rem = divmod(days + 134774, 146097) 40 | cycles100, rem = divmod(rem, 36524) 41 | cycles, rem = divmod(rem, 1461) 42 | years, rem = divmod(rem, 365) 43 | year = (cycles << 2) + cycles400 * 400 + cycles100 * 100 + years + 1601 44 | if years == 4 or cycles100 == 4: 45 | return date(year - 1, 12, 31) 46 | m_list = MONTH_DAYS_LEAP if years == 3 and (year == 2000 or year % 100 != 0) else MONTH_DAYS 47 | month = (rem + 24) >> 5 48 | while rem < m_list[month]: 49 | month -= 1 50 | return date(year, month + 1, rem + 1 - m_list[month]) 51 | 52 | 53 | def read_date_col(source: ByteSource, num_rows: int): 54 | column = source.read_array('H', num_rows) 55 | return [epoch_days_to_date(x) for x in column] 56 | 57 | 58 | def read_date32_col(source: ByteSource, num_rows: int): 59 | column = source.read_array('l' if int_size == 2 else 'i', num_rows) 60 | return [epoch_days_to_date(x) for x in column] 61 | 62 | 63 | def read_uuid_col(source: ByteSource, num_rows: int): 64 | v = source.read_array('Q', num_rows * 2) 65 | empty_uuid = UUID(int=0) 66 | new_uuid = UUID.__new__ 67 | unsafe = SafeUUID.unsafe 68 | oset = object.__setattr__ 69 | column = [] 70 | app = column.append 71 | for i in range(num_rows): 72 | ix = i << 1 73 | int_value = v[ix] << 64 | v[ix + 1] 74 | if int_value == 0: 75 | app(empty_uuid) 76 | else: 77 | fast_uuid = new_uuid(UUID) 78 | oset(fast_uuid, 'int', int_value) 79 | oset(fast_uuid, 'is_safe', unsafe) 80 | app(fast_uuid) 81 | return column 82 | 83 | 84 | def read_nullable_array(source: ByteSource, array_type: str, num_rows: int, null_obj: Any): 85 | null_map = source.read_bytes(num_rows) 86 | column = source.read_array(array_type, num_rows) 87 | return [null_obj if null_map[ix] else column[ix] for ix in range(num_rows)] 88 | 89 | 90 | def build_nullable_column(source: Sequence, null_map: bytes, null_obj: Any): 91 | return [source[ix] if null_map[ix] == 0 else null_obj for ix in range(len(source))] 92 | 93 | 94 | def build_lc_nullable_column(index: Sequence, keys: array.array, null_obj: Any): 95 | column = [] 96 | for key in keys: 97 | if key == 0: 98 | column.append(null_obj) 99 | else: 100 | column.append(index[key]) 101 | return column 102 | 103 | 104 | def to_numpy_array(column: Sequence): 105 | arr = np.empty((len(column),), dtype=np.object) 106 | arr[:] = column 107 | return arr 108 | 109 | 110 | def pivot(data: Sequence[Sequence], start_row: int, end_row: int) -> Sequence[Sequence]: 111 | return tuple(zip(*data[start_row: end_row])) 112 | 113 | 114 | def write_str_col(column: Sequence, nullable: bool, encoding: Optional[str], dest: bytearray) -> int: 115 | app = dest.append 116 | for x in column: 117 | if not x: 118 | if not nullable and x is None: 119 | return NONE_IN_NULLABLE_COLUMN 120 | app(0) 121 | else: 122 | if encoding: 123 | x = x.encode(encoding) 124 | else: 125 | x = bytes(x) 126 | sz = len(x) 127 | while True: 128 | b = sz & 0x7f 129 | sz >>= 7 130 | if sz == 0: 131 | app(b) 132 | break 133 | app(0x80 | b) 134 | dest += x 135 | return 0 136 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/ddl.py: -------------------------------------------------------------------------------- 1 | from typing import NamedTuple, Sequence 2 | 3 | from clickhouse_connect.datatypes.base import ClickHouseType 4 | 5 | 6 | class TableColumnDef(NamedTuple): 7 | """ 8 | Simplified ClickHouse Table Column definition for DDL 9 | """ 10 | name: str 11 | ch_type: ClickHouseType 12 | expr_type: str = None 13 | expr: str = None 14 | 15 | @property 16 | def col_expr(self): 17 | expr = f'{self.name} {self.ch_type.name}' 18 | if self.expr_type: 19 | expr += f' {self.expr_type} {self.expr}' 20 | return expr 21 | 22 | 23 | def create_table(table_name: str, columns: Sequence[TableColumnDef], engine: str, engine_params: dict): 24 | stmt = f"CREATE TABLE {table_name} ({', '.join(col.col_expr for col in columns)}) ENGINE {engine} " 25 | if engine_params: 26 | for key, value in engine_params.items(): 27 | stmt += f' {key} {value}' 28 | return stmt 29 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/errors.py: -------------------------------------------------------------------------------- 1 | from clickhouse_connect.driver.context import BaseQueryContext 2 | from clickhouse_connect.driver.exceptions import DataError 3 | 4 | 5 | # Error codes used in the Cython API 6 | NO_ERROR = 0 7 | NONE_IN_NULLABLE_COLUMN = 1 8 | 9 | error_messages = {NONE_IN_NULLABLE_COLUMN: 'Invalid None value in non-Nullable column'} 10 | 11 | 12 | def handle_error(error_num: int, ctx: BaseQueryContext): 13 | if error_num > 0: 14 | msg = error_messages[error_num] 15 | if ctx.column_name: 16 | msg = f'{msg}, column name: `{ctx.column_name}`' 17 | raise DataError(msg) 18 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | The driver exception classes here include all named exceptions required by th DB API 2.0 specification. It's not clear 3 | how useful that naming convention is, but the convention is used for potential improved compatibility with other 4 | libraries. In most cases docstring are taken from the DBIApi 2.0 documentation 5 | """ 6 | 7 | 8 | class ClickHouseError(Exception): 9 | """Exception related to operation with ClickHouse.""" 10 | 11 | 12 | # pylint: disable=redefined-builtin 13 | class Warning(Warning, ClickHouseError): 14 | """Exception raised for important warnings like data truncations 15 | while inserting, etc.""" 16 | 17 | 18 | class Error(ClickHouseError): 19 | """Exception that is the base class of all other error exceptions 20 | (not Warning).""" 21 | 22 | 23 | class InterfaceError(Error): 24 | """Exception raised for errors that are related to the database 25 | interface rather than the database itself.""" 26 | 27 | 28 | class DatabaseError(Error): 29 | """Exception raised for errors that are related to the 30 | database.""" 31 | 32 | 33 | class DataError(DatabaseError): 34 | """Exception raised for errors that are due to problems with the 35 | processed data like division by zero, numeric value out of range, 36 | etc.""" 37 | 38 | 39 | class OperationalError(DatabaseError): 40 | """Exception raised for errors that are related to the database's 41 | operation and not necessarily under the control of the programmer, 42 | e.g. an unexpected disconnect occurs, the data source name is not 43 | found, a transaction could not be processed, a memory allocation 44 | error occurred during processing, etc.""" 45 | 46 | 47 | class IntegrityError(DatabaseError): 48 | """Exception raised when the relational integrity of the database 49 | is affected, e.g. a foreign key check fails, duplicate key, 50 | etc.""" 51 | 52 | 53 | class InternalError(DatabaseError): 54 | """Exception raised when the database encounters an internal 55 | error, e.g. the cursor is not valid anymore, the transaction is 56 | out of sync, etc.""" 57 | 58 | 59 | class ProgrammingError(DatabaseError): 60 | """Exception raised for programming errors, e.g. table not found 61 | or already exists, syntax error in the SQL statement, wrong number 62 | of parameters specified, etc.""" 63 | 64 | 65 | class NotSupportedError(DatabaseError): 66 | """Exception raised in case a method or database API was used 67 | which is not supported by the database, e.g. requesting a 68 | .rollback() on a connection that does not support transaction or 69 | has transactions turned off.""" 70 | 71 | 72 | class StreamClosedError(ProgrammingError): 73 | """Exception raised when a stream operation is executed on a closed stream.""" 74 | 75 | def __init__(self): 76 | super().__init__('Executing a streaming operation on a closed stream') 77 | 78 | 79 | class StreamCompleteException(Exception): 80 | """ Internal exception used to indicate the end of a ClickHouse query result stream.""" 81 | 82 | 83 | class StreamFailureError(Exception): 84 | """ Stream failed unexpectedly """ 85 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/models.py: -------------------------------------------------------------------------------- 1 | from typing import NamedTuple 2 | 3 | from clickhouse_connect.datatypes.registry import get_from_name 4 | 5 | 6 | class ColumnDef(NamedTuple): 7 | """ 8 | ClickHouse column definition from DESCRIBE TABLE command 9 | """ 10 | name: str 11 | type: str 12 | default_type: str 13 | default_expression: str 14 | comment: str 15 | codec_expression: str 16 | ttl_expression: str 17 | 18 | @property 19 | def type_name(self): 20 | return self.type.replace('\n', '').strip() 21 | 22 | @property 23 | def ch_type(self): 24 | return get_from_name(self.type_name) 25 | 26 | 27 | class SettingDef(NamedTuple): 28 | """ 29 | ClickHouse setting definition from system.settings table 30 | """ 31 | name: str 32 | value: str 33 | readonly: int 34 | 35 | 36 | class SettingStatus(NamedTuple): 37 | """ 38 | Get the setting "status" from a ClickHouse server setting 39 | """ 40 | is_set: bool 41 | is_writable: bool 42 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/npconv.py: -------------------------------------------------------------------------------- 1 | from clickhouse_connect.driver.options import np 2 | 3 | from clickhouse_connect.driver.types import ByteSource 4 | 5 | 6 | def read_numpy_array(source: ByteSource, np_type: str, num_rows: int): 7 | dtype = np.dtype(np_type) 8 | buffer = source.read_bytes(dtype.itemsize * num_rows) 9 | return np.frombuffer(buffer, dtype, num_rows) 10 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/npquery.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import itertools 3 | from typing import Generator, Sequence, Tuple 4 | 5 | from clickhouse_connect.driver.common import empty_gen, StreamContext 6 | from clickhouse_connect.driver.exceptions import StreamClosedError 7 | from clickhouse_connect.driver.types import Closable 8 | from clickhouse_connect.driver.options import np, pd 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | # pylint: disable=too-many-instance-attributes 14 | class NumpyResult(Closable): 15 | def __init__(self, 16 | block_gen: Generator[Sequence, None, None] = None, 17 | column_names: Tuple = (), 18 | column_types: Tuple = (), 19 | d_types: Sequence = (), 20 | source: Closable = None): 21 | self.column_names = column_names 22 | self.column_types = column_types 23 | self.np_types = d_types 24 | self.source = source 25 | self.query_id = '' 26 | self.summary = {} 27 | self._block_gen = block_gen or empty_gen() 28 | self._numpy_result = None 29 | self._df_result = None 30 | 31 | def _np_stream(self) -> Generator: 32 | if self._block_gen is None: 33 | raise StreamClosedError 34 | 35 | block_gen = self._block_gen 36 | self._block_gen = None 37 | if not self.np_types: 38 | return block_gen 39 | 40 | d_types = self.np_types 41 | first_type = d_types[0] 42 | if first_type != np.object_ and all(np.dtype(np_type) == first_type for np_type in d_types): 43 | self.np_types = first_type 44 | 45 | def numpy_blocks(): 46 | for block in block_gen: 47 | yield np.array(block, first_type).transpose() 48 | else: 49 | if any(x == np.object_ for x in d_types): 50 | self.np_types = [np.object_] * len(self.np_types) 51 | self.np_types = np.dtype(list(zip(self.column_names, d_types))) 52 | 53 | def numpy_blocks(): 54 | for block in block_gen: 55 | np_array = np.empty(len(block[0]), dtype=self.np_types) 56 | for col_name, data in zip(self.column_names, block): 57 | np_array[col_name] = data 58 | yield np_array 59 | 60 | return numpy_blocks() 61 | 62 | def _df_stream(self) -> Generator: 63 | if self._block_gen is None: 64 | raise StreamClosedError 65 | block_gen = self._block_gen 66 | 67 | def pd_blocks(): 68 | for block in block_gen: 69 | yield pd.DataFrame(dict(zip(self.column_names, block))) 70 | 71 | self._block_gen = None 72 | return pd_blocks() 73 | 74 | def close_numpy(self): 75 | if not self._block_gen: 76 | raise StreamClosedError 77 | chunk_size = 4 78 | pieces = [] 79 | blocks = [] 80 | for block in self._np_stream(): 81 | blocks.append(block) 82 | if len(blocks) == chunk_size: 83 | pieces.append(np.concatenate(blocks, dtype=self.np_types)) 84 | chunk_size *= 2 85 | blocks = [] 86 | pieces.extend(blocks) 87 | if len(pieces) > 1: 88 | self._numpy_result = np.concatenate(pieces, dtype=self.np_types) 89 | elif len(pieces) == 1: 90 | self._numpy_result = pieces[0] 91 | else: 92 | self._numpy_result = np.empty((0,)) 93 | self.close() 94 | return self 95 | 96 | def close_df(self): 97 | if self._block_gen is None: 98 | raise StreamClosedError 99 | bg = self._block_gen 100 | chain = itertools.chain 101 | chains = [chain(b) for b in zip(*bg)] 102 | new_df_series = [] 103 | for c in chains: 104 | series = [pd.Series(piece, copy=False) for piece in c if len(piece) > 0] 105 | if len(series) > 0: 106 | new_df_series.append(pd.concat(series, copy=False, ignore_index=True)) 107 | self._df_result = pd.DataFrame(dict(zip(self.column_names, new_df_series))) 108 | self.close() 109 | return self 110 | 111 | @property 112 | def np_result(self): 113 | if self._numpy_result is None: 114 | self.close_numpy() 115 | return self._numpy_result 116 | 117 | @property 118 | def df_result(self): 119 | if self._df_result is None: 120 | self.close_df() 121 | return self._df_result 122 | 123 | @property 124 | def np_stream(self) -> StreamContext: 125 | return StreamContext(self, self._np_stream()) 126 | 127 | @property 128 | def df_stream(self) -> StreamContext: 129 | return StreamContext(self, self._df_stream()) 130 | 131 | def close(self): 132 | if self._block_gen is not None: 133 | self._block_gen.close() 134 | self._block_gen = None 135 | if self.source: 136 | self.source.close() 137 | self.source = None 138 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/options.py: -------------------------------------------------------------------------------- 1 | from clickhouse_connect.driver.exceptions import NotSupportedError 2 | 3 | pd_time_test = None 4 | pd_extended_dtypes = False 5 | 6 | try: 7 | import numpy as np 8 | except ImportError: 9 | np = None 10 | 11 | try: 12 | import pandas as pd 13 | pd_extended_dtypes = not pd.__version__.startswith('0') 14 | try: 15 | from pandas.core.dtypes.common import is_datetime64_dtype 16 | from pandas.core.dtypes.common import is_timedelta64_dtype 17 | 18 | def combined_test(arr_or_dtype): 19 | return is_datetime64_dtype(arr_or_dtype) or is_timedelta64_dtype(arr_or_dtype) 20 | 21 | pd_time_test = combined_test 22 | except ImportError: 23 | try: 24 | from pandas.core.dtypes.common import is_datetime_or_timedelta_dtype 25 | pd_time_test = is_datetime_or_timedelta_dtype 26 | except ImportError as ex: 27 | raise NotSupportedError('pandas version does not contain expected test for temporal types') from ex 28 | except ImportError: 29 | pd = None 30 | 31 | try: 32 | import pyarrow as arrow 33 | except ImportError: 34 | arrow = None 35 | 36 | 37 | def check_numpy(): 38 | if np: 39 | return np 40 | raise NotSupportedError('Numpy package is not installed') 41 | 42 | 43 | def check_pandas(): 44 | if pd: 45 | return pd 46 | raise NotSupportedError('Pandas package is not installed') 47 | 48 | 49 | def check_arrow(): 50 | if arrow: 51 | return arrow 52 | raise NotSupportedError('PyArrow package is not installed') 53 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/summary.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from clickhouse_connect.datatypes.registry import get_from_name 4 | 5 | from clickhouse_connect.driver.query import QueryResult 6 | 7 | 8 | class QuerySummary: 9 | summary = {} 10 | 11 | def __init__(self, summary: Optional[dict] = None): 12 | if summary is not None: 13 | self.summary = summary 14 | 15 | @property 16 | def written_rows(self) -> int: 17 | return int(self.summary.get('written_rows', 0)) 18 | 19 | def written_bytes(self) -> int: 20 | return int(self.summary.get('written_bytes', 0)) 21 | 22 | def query_id(self) -> str: 23 | return self.summary.get('query_id', '') 24 | 25 | def as_query_result(self) -> QueryResult: 26 | data = [] 27 | column_names = [] 28 | column_types = [] 29 | str_type = get_from_name('String') 30 | int_type = get_from_name('Int64') 31 | for key, value in self.summary.items(): 32 | column_names.append(key) 33 | if value.isnumeric(): 34 | data.append(int(value)) 35 | column_types.append(int_type) 36 | else: 37 | data.append(value) 38 | column_types.append(str_type) 39 | return QueryResult([data], column_names=tuple(column_names), column_types=tuple(column_types)) 40 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/tools.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence, Dict, Any 2 | 3 | from clickhouse_connect.driver import Client 4 | from clickhouse_connect.driver.summary import QuerySummary 5 | from clickhouse_connect.driver.binding import quote_identifier 6 | 7 | 8 | def insert_file(client: Client, 9 | table: str, 10 | file_path: str, 11 | fmt: Optional[str] = None, 12 | column_names: Optional[Sequence[str]] = None, 13 | database: Optional[str] = None, 14 | settings: Optional[Dict[str, Any]] = None, 15 | compression: Optional[str] = None) -> QuerySummary: 16 | if not database and table[0] not in ('`', "'") and table.find('.') > 0: 17 | full_table = table 18 | elif database: 19 | full_table = f'{quote_identifier(database)}.{quote_identifier(table)}' 20 | else: 21 | full_table = quote_identifier(table) 22 | if not fmt: 23 | fmt = 'CSV' if column_names else 'CSVWithNames' 24 | if compression is None: 25 | if file_path.endswith('.gzip') or file_path.endswith('.gz'): 26 | compression = 'gzip' 27 | with open(file_path, 'rb') as file: 28 | return client.raw_insert(full_table, 29 | column_names=column_names, 30 | insert_block=file, 31 | fmt=fmt, 32 | settings=settings, 33 | compression=compression) 34 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/types.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Sequence, Any 3 | 4 | Matrix = Sequence[Sequence[Any]] 5 | 6 | 7 | class Closable(ABC): 8 | @abstractmethod 9 | def close(self): 10 | pass 11 | 12 | 13 | class ByteSource(Closable): 14 | last_message:bytes = None 15 | 16 | @abstractmethod 17 | def read_leb128(self) -> int: 18 | pass 19 | 20 | @abstractmethod 21 | def read_leb128_str(self) -> str: 22 | pass 23 | 24 | @abstractmethod 25 | def read_uint64(self) -> int: 26 | pass 27 | 28 | @abstractmethod 29 | def read_bytes(self, sz: int) -> bytes: 30 | pass 31 | 32 | @abstractmethod 33 | def read_str_col(self, num_rows: int, encoding: str, nullable: bool = False, null_obj: Any = None): 34 | pass 35 | 36 | @abstractmethod 37 | def read_bytes_col(self, sz: int, num_rows: int): 38 | pass 39 | 40 | @abstractmethod 41 | def read_fixed_str_col(self, sz: int, num_rows: int, encoding: str): 42 | pass 43 | 44 | @abstractmethod 45 | def read_array(self, array_type: str, num_rows: int): 46 | pass 47 | 48 | @abstractmethod 49 | def read_byte(self) -> int: 50 | pass 51 | -------------------------------------------------------------------------------- /clickhouse_connect/driver/tzutil.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | from typing import Tuple 4 | 5 | import pytz 6 | 7 | tzlocal = None 8 | try: 9 | import tzlocal # Maybe we can use the tzlocal module to get a safe timezone 10 | except ImportError: 11 | pass 12 | 13 | # Set the local timezone for DateTime conversions. Note in most cases we want to use either UTC or the server 14 | # timezone, but if someone insists on using the local timezone we will try to convert. The problem is we 15 | # never have anything but an epoch timestamp returned from ClickHouse, so attempts to convert times when the 16 | # local timezone is "DST" aware (like 'CEST' vs 'CET') will be wrong approximately half the time 17 | local_tz: pytz.timezone 18 | local_tz_dst_safe: bool = False 19 | 20 | 21 | def normalize_timezone(timezone: pytz.timezone) -> Tuple[pytz.timezone, bool]: 22 | if timezone.tzname(None) in ('UTC', 'GMT', 'Universal', 'GMT-0', 'Zulu', 'Greenwich'): 23 | return pytz.UTC, True 24 | 25 | if timezone.tzname(None) in pytz.common_timezones: 26 | return timezone, True 27 | 28 | if tzlocal is not None: # Maybe we can use the tzlocal module to get a safe timezone 29 | local_name = tzlocal.get_localzone_name() 30 | if local_name in pytz.common_timezones: 31 | return pytz.timezone(local_name), True 32 | 33 | return timezone, False 34 | 35 | 36 | try: 37 | local_tz = pytz.timezone(os.environ.get('TZ', '')) 38 | except pytz.UnknownTimeZoneError: 39 | local_tz = datetime.now().astimezone().tzinfo 40 | 41 | local_tz, local_tz_dst_safe = normalize_timezone(local_tz) 42 | -------------------------------------------------------------------------------- /clickhouse_connect/driverc/.gitignore: -------------------------------------------------------------------------------- 1 | # Cython build output 2 | *.c 3 | *.so -------------------------------------------------------------------------------- /clickhouse_connect/driverc/__init__.pxd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/clickhouse_connect/driverc/__init__.pxd -------------------------------------------------------------------------------- /clickhouse_connect/driverc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/clickhouse_connect/driverc/__init__.py -------------------------------------------------------------------------------- /clickhouse_connect/driverc/buffer.pxd: -------------------------------------------------------------------------------- 1 | cdef class ResponseBuffer: 2 | cdef: 3 | unsigned long long buf_loc, buf_sz, slice_sz 4 | signed long long slice_start 5 | object gen, source 6 | char* buffer 7 | char* slice 8 | unsigned char _read_byte_load(self) except ?255 9 | char* read_bytes_c(self, unsigned long long sz) except NULL 10 | Py_buffer buff_source 11 | cdef object _read_str_col(self, unsigned long long num_rows, char * encoding) 12 | cdef object _read_nullable_str_col(self, unsigned long long num_rows, char * encoding, object null_obj) 13 | -------------------------------------------------------------------------------- /clickhouse_connect/driverc/npconv.pyx: -------------------------------------------------------------------------------- 1 | import cython 2 | 3 | import numpy as np 4 | 5 | from .buffer cimport ResponseBuffer 6 | 7 | @cython.boundscheck(False) 8 | @cython.wraparound(False) 9 | def read_numpy_array(ResponseBuffer buffer, np_type: str, unsigned long long num_rows): 10 | dtype = np.dtype(np_type) 11 | cdef sz = dtype.itemsize * num_rows 12 | cdef char * source = buffer.read_bytes_c(dtype.itemsize * num_rows) 13 | return np.frombuffer(source[:sz], dtype, num_rows) 14 | -------------------------------------------------------------------------------- /clickhouse_connect/entry_points.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This script is used for validating installed entrypoints. Note that it fails on Python 3.7 4 | import sys 5 | 6 | from importlib.metadata import PackageNotFoundError, distribution 7 | 8 | EXPECTED_EPS = {'sqlalchemy.dialects:clickhousedb', 9 | 'sqlalchemy.dialects:clickhousedb.connect'} 10 | 11 | 12 | def validate_entrypoints(): 13 | expected_eps = EXPECTED_EPS.copy() 14 | try: 15 | dist = distribution('clickhouse-connect') 16 | except PackageNotFoundError: 17 | print ('\nClickHouse Connect package not found in this Python installation') 18 | return -1 19 | print() 20 | for entry_point in dist.entry_points: 21 | name = f'{entry_point.group}:{entry_point.name}' 22 | print(f' {name}={entry_point.value}') 23 | try: 24 | expected_eps.remove(name) 25 | except KeyError: 26 | print (f'\nUnexpected entry point {name} found') 27 | return -1 28 | if expected_eps: 29 | print() 30 | for name in expected_eps: 31 | print (f'Did not find expected ep {name}') 32 | return -1 33 | print ('\nEntrypoints correctly installed') 34 | return 0 35 | 36 | 37 | if __name__ == '__main__': 38 | sys.exit(validate_entrypoints()) 39 | -------------------------------------------------------------------------------- /clickhouse_connect/json_impl.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import json as py_json 3 | from collections import OrderedDict 4 | from typing import Any 5 | 6 | try: 7 | import orjson 8 | any_to_json = orjson.dumps # pylint: disable=no-member 9 | except ImportError: 10 | orjson = None 11 | 12 | try: 13 | import ujson 14 | 15 | def _ujson_to_json(obj: Any) -> bytes: 16 | return ujson.dumps(obj).encode() # pylint: disable=c-extension-no-member 17 | except ImportError: 18 | ujson = None 19 | _ujson_to_json = None 20 | 21 | 22 | def _pyjson_to_json(obj: Any) -> bytes: 23 | return py_json.dumps(obj, separators=(',', ':')).encode() 24 | 25 | 26 | logger = logging.getLogger(__name__) 27 | _to_json = OrderedDict() 28 | _to_json['orjson'] = orjson.dumps if orjson else None # pylint: disable=no-member 29 | _to_json['ujson'] = _ujson_to_json if ujson else None 30 | _to_json['python'] = _pyjson_to_json 31 | 32 | any_to_json = _pyjson_to_json 33 | 34 | 35 | def set_json_library(impl: str = None): 36 | global any_to_json # pylint: disable=global-statement 37 | if impl: 38 | func = _to_json.get(impl) 39 | if func: 40 | any_to_json = func 41 | return 42 | raise NotImplementedError(f'JSON library {impl} is not supported') 43 | for library, func in _to_json.items(): 44 | if func: 45 | logger.debug('Using %s library for writing JSON byte strings', library) 46 | any_to_json = func 47 | break 48 | 49 | 50 | set_json_library() 51 | -------------------------------------------------------------------------------- /clickhouse_connect/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/clickhouse_connect/py.typed -------------------------------------------------------------------------------- /clickhouse_connect/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/clickhouse_connect/tools/__init__.py -------------------------------------------------------------------------------- /clickhouse_connect/tools/testing.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence, Optional, Union, Dict, Any 2 | 3 | from clickhouse_connect.driver import Client 4 | from clickhouse_connect.driver.binding import quote_identifier, str_query_value 5 | 6 | 7 | class TableContext: 8 | def __init__(self, client: Client, 9 | table: str, 10 | columns: Union[str, Sequence[str]], 11 | column_types: Optional[Sequence[str]] = None, 12 | engine: str = 'MergeTree', 13 | order_by: str = None, 14 | settings: Optional[Dict[str, Any]] = None): 15 | self.client = client 16 | if '.' in table: 17 | self.table = table 18 | else: 19 | self.table = quote_identifier(table) 20 | self.settings = settings 21 | if isinstance(columns, str): 22 | columns = columns.split(',') 23 | if column_types is None: 24 | self.column_names = [] 25 | self.column_types = [] 26 | for col in columns: 27 | col = col.strip() 28 | ix = col.find(' ') 29 | self.column_types.append(col[ix + 1:].strip()) 30 | self.column_names.append(quote_identifier(col[:ix].strip())) 31 | else: 32 | self.column_names = [quote_identifier(name) for name in columns] 33 | self.column_types = column_types 34 | self.engine = engine 35 | self.order_by = self.column_names[0] if order_by is None else order_by 36 | 37 | def __enter__(self): 38 | if self.client.min_version('19'): 39 | self.client.command(f'DROP TABLE IF EXISTS {self.table}') 40 | else: 41 | self.client.command(f'DROP TABLE IF EXISTS {self.table} SYNC') 42 | col_defs = ','.join(f'{quote_identifier(name)} {col_type}' for name, col_type in zip(self.column_names, self.column_types)) 43 | create_cmd = f'CREATE TABLE {self.table} ({col_defs}) ENGINE {self.engine} ORDER BY {self.order_by}' 44 | if self.settings: 45 | create_cmd += ' SETTINGS ' 46 | for key, value in self.settings.items(): 47 | create_cmd += f'{key} = {str_query_value(value)}, ' 48 | if create_cmd.endswith(', '): 49 | create_cmd = create_cmd[:-2] 50 | self.client.command(create_cmd) 51 | return self 52 | 53 | def __exit__(self, exc_type, exc_val, exc_tb): 54 | self.client.command(f'DROP TABLE IF EXISTS {self.table}') 55 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | clickhouse: 3 | image: 'clickhouse/clickhouse-server:${CLICKHOUSE_CONNECT_TEST_CH_VERSION-25.1-alpine}' 4 | container_name: 'clickhouse-connect-clickhouse-server' 5 | environment: 6 | CLICKHOUSE_SKIP_USER_SETUP: 1 7 | ports: 8 | - '8123:8123' 9 | - '9000:9000' 10 | ulimits: 11 | nofile: 12 | soft: 262144 13 | hard: 262144 14 | volumes: 15 | - './.docker/clickhouse/single_node/config.xml:/etc/clickhouse-server/config.xml' 16 | - './.docker/clickhouse/users.xml:/etc/clickhouse-server/users.xml' 17 | - './.docker/clickhouse/single_node/docker_related_config.xml:/etc/clickhouse-server/config.d/docker_related_config.xml' 18 | 19 | clickhouse_tls: 20 | build: 21 | context: ./ 22 | dockerfile: .docker/clickhouse/single_node_tls/Dockerfile 23 | container_name: 'clickhouse-connect-clickhouse-server-tls' 24 | environment: 25 | CLICKHOUSE_SKIP_USER_SETUP: 1 26 | ports: 27 | - '10843:8443' 28 | - '10840:9440' 29 | ulimits: 30 | nofile: 31 | soft: 262144 32 | hard: 262144 33 | volumes: 34 | - './.docker/clickhouse/single_node_tls/config.xml:/etc/clickhouse-server/config.xml' 35 | - './.docker/clickhouse/single_node_tls/users.xml:/etc/clickhouse-server/users.xml' 36 | - './.docker/clickhouse/single_node_tls/docker_related_config.xml:/etc/clickhouse-server/config.d/docker_related_config.xml' -------------------------------------------------------------------------------- /examples/benchmark.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 -u 2 | 3 | import datetime 4 | import sys 5 | import time 6 | import uuid 7 | import argparse 8 | from ipaddress import IPv6Address 9 | from typing import List 10 | 11 | import clickhouse_connect 12 | from clickhouse_connect.datatypes.format import set_default_formats 13 | from clickhouse_connect.driver.client import Client 14 | 15 | columns = { 16 | 'int8': ('Int8', -44), 17 | 'uint16': ('UInt16', 1), 18 | 'int16': ('Int16', -2), 19 | 'uint64': ('UInt64', 32489071615273482), 20 | 'float32': ('Float32', 3.14), 21 | 'str': ('String', 'hello'), 22 | 'fstr': ('FixedString(16)', b'world numkn \nman'), 23 | 'date': ('Date', datetime.date(2022, 3, 18)), 24 | 'datetime': ('DateTime', datetime.datetime.utcnow()), 25 | 'nullint': ('Nullable(Int8)', {None, 77}), 26 | 'nullstr': ('Nullable(String)', {None, 'a_null_str'}), 27 | 'enum': ("Enum16('hello' = 1, 'world' = 2)", 'hello'), 28 | 'array': ('Array(String)', ['q', 'w', 'e', 'r']), 29 | 'narray': ('Array(Array(String))', [['xkcd', 'abs', 'norbert'], ['George', 'John', 'Thomas']]), 30 | 'uuid': ('UUID', uuid.UUID('1d439f79-c57d-5f23-52c6-ffccca93e1a9')), 31 | 'bool': ('Bool', True), 32 | 'ipv4': ('IPv4', '107.34.202.7'), 33 | 'ipv6': ('IPv6', IPv6Address('fe80::f4d4:88ff:fe88:4a64')), 34 | 'tuple': ('Tuple(Nullable(String), UInt64)', ('tuple_string', 7502888)), 35 | 'dec': ('Decimal64(5)', 25774.233), 36 | 'bdec': ('Decimal128(10)', 2503.48877233), 37 | 'uint256': ('UInt256', 1057834823498238884432566), 38 | 'dt64': ('DateTime64(9)', datetime.datetime.now()), 39 | 'dt64d': ("DateTime64(6, 'America/Denver')", datetime.datetime.now()), 40 | 'lcstr': ('LowCardinality(String)', 'A simple string') 41 | } 42 | 43 | standard_cols = ['uint16', 'int16', 'float32', 'str', 'fstr', 'date', 'datetime', 'array', 'nullint', 'enum', 'uuid'] 44 | 45 | 46 | def create_table(client: Client, col_names: List[str], rows: int): 47 | if not col_names: 48 | col_names = columns.keys() 49 | col_list = ','.join([f'{col_name} {columns[col_name][0]}' for col_name in sorted(col_names)]) 50 | client.command('DROP TABLE IF EXISTS benchmark_test') 51 | client.command(f'CREATE TABLE benchmark_test ({col_list}) ENGINE Memory') 52 | insert_cols = [] 53 | for col_name in sorted(col_names): 54 | col_def = columns[col_name] 55 | if isinstance(col_def[1], set): 56 | choices = tuple(col_def[1]) 57 | cnt = len(choices) 58 | col = [choices[ix % cnt] for ix in range(rows)] 59 | else: 60 | col = [col_def[1]] * rows 61 | insert_cols.append(col) 62 | client.insert('benchmark_test', insert_cols, column_oriented=True) 63 | 64 | 65 | def check_reads(client: Client, tries: int = 50, rows: int = 100000): 66 | start_time = time.time() 67 | for _ in range(tries): 68 | result = client.query(f'SELECT * FROM benchmark_test LIMIT {rows}', column_oriented=True) 69 | assert result.row_count == rows 70 | total_time = time.time() - start_time 71 | avg_time = total_time / tries 72 | speed = int(1 / avg_time * rows) 73 | print(f'- Avg time reading {rows} rows from {tries} runs: {avg_time} sec. Total: {total_time}') 74 | print(f' Speed: {speed} rows/sec') 75 | 76 | 77 | def main(): 78 | parser = argparse.ArgumentParser() 79 | parser.add_argument('-t', '--tries', help='Total tries for each test', type=int, default=50) 80 | parser.add_argument('-r', '--rows', help='Total rows in dataset', type=int, default=100000) 81 | parser.add_argument('-c', '--columns', help='Column types to test', type=str, nargs='+') 82 | 83 | args = parser.parse_args() 84 | rows = args.rows 85 | tries = args.tries 86 | col_names = args.columns 87 | if col_names: 88 | if 'all' in col_names: 89 | col_names = list(columns.keys()) 90 | else: 91 | invalid = set(col_names).difference(set(columns.keys())) 92 | if invalid: 93 | print(' ,'.join(invalid) + ' columns not found') 94 | sys.exit() 95 | else: 96 | col_names = standard_cols 97 | client = clickhouse_connect.get_client(compress=False) 98 | 99 | set_default_formats('IP*', 'native', '*Int64', 'native') 100 | create_table(client, col_names, rows) 101 | check_reads(client, tries, rows) 102 | 103 | 104 | if __name__ == '__main__': 105 | main() 106 | -------------------------------------------------------------------------------- /examples/clear_test_databases.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 -u 2 | 3 | import os 4 | 5 | import clickhouse_connect 6 | 7 | 8 | def main(): 9 | host = os.getenv('CLICKHOUSE_CONNECT_TEST_HOST', 'localhost') 10 | port = int(os.getenv('CLICKHOUSE_CONNECT_TEST_PORT', '8123')) 11 | password = os.getenv('CLICKHOUSE_CONNECT_TEST_PASSWORD', '') 12 | client = clickhouse_connect.get_client(host=host, port=port, password=password) 13 | database_result = client.query("SELECT name FROM system.databases WHERE name ilike '%test%'").result_rows 14 | for database_row in database_result: 15 | database:str = database_row[0] 16 | if database.startswith('dbt_clickhouse') or database.startswith('clickhouse_connect'): 17 | print(f'DROPPING DATABASE `{database}`') 18 | client.command(f'DROP DATABASE IF EXISTS {database}') 19 | 20 | 21 | if __name__ == '__main__': 22 | main() 23 | -------------------------------------------------------------------------------- /examples/insert_examples.py: -------------------------------------------------------------------------------- 1 | import clickhouse_connect 2 | 3 | client: clickhouse_connect.driver.Client 4 | 5 | 6 | def inserted_nested_flat(): 7 | client.command('DROP TABLE IF EXISTS test_nested_flat') 8 | client.command('SET flatten_nested = 1') 9 | client.command( 10 | """ 11 | CREATE TABLE test_nested_flat 12 | ( 13 | `key` UInt32, 14 | `value` Nested(str String, int32 Int32) 15 | ) 16 | ENGINE = MergeTree 17 | ORDER BY key 18 | """) 19 | result = client.query('DESCRIBE TABLE test_nested_flat') 20 | print(result.column_names[0:2]) 21 | print(result.result_columns[0:2]) 22 | 23 | # Note the Nested 'value' column is inserted as two parallel arrays of values 24 | # into their own columns of the form `col_name.key_name` with Array data types 25 | data = [[1, ['string_1', 'string_2'], [20, 30]], 26 | [2, ['string_3', 'string_4'], [40, 50]] 27 | ] 28 | client.insert('test_nested_flat', data, 29 | column_names=['key', 'value.str', 'value.int32'], 30 | column_type_names=['UInt32', 'Array(String)', 'Array(Int32)']) 31 | 32 | result = client.query('SELECT * FROM test_nested_flat') 33 | print(result.column_names) 34 | print(result.result_columns) 35 | client.command('DROP TABLE test_nested_flat') 36 | 37 | 38 | def insert_nested_not_flat(): 39 | client.command('DROP TABLE IF EXISTS test_nested_not_flat') 40 | client.command('SET flatten_nested = 0') 41 | client.command( 42 | """ 43 | CREATE TABLE test_nested_not_flat 44 | ( 45 | `key` UInt32, 46 | `value` Nested(str String, int32 Int32) 47 | ) 48 | ENGINE = MergeTree 49 | ORDER BY key 50 | """) 51 | result = client.query('DESCRIBE TABLE test_nested_not_flat') 52 | print (result.column_names[0:2]) 53 | print (result.result_columns[0:2]) 54 | 55 | # Note the Nested 'value' column is inserted as a list of dictionaries for each row 56 | data = [[1, [{'str': 'nested_string_1', 'int32': 20}, 57 | {'str': 'nested_string_2', 'int32': 30}]], 58 | [2, [{'str': 'nested_string_3', 'int32': 40}, 59 | {'str': 'nested_string_4', 'int32': 50}]] 60 | ] 61 | client.insert('test_nested_not_flat', data, 62 | column_names=['key', 'value'], 63 | column_type_names=['UInt32', 'Nested(str String, int32 Int32)']) 64 | 65 | result = client.query('SELECT * FROM test_nested_not_flat') 66 | print(result.column_names) 67 | print(result.result_columns) 68 | client.command('DROP TABLE test_nested_not_flat') 69 | 70 | 71 | def main(): 72 | global client # pylint: disable=global-statement 73 | client = clickhouse_connect.get_client() 74 | print ('Nested example flatten_nested = 1 (Default)') 75 | inserted_nested_flat() 76 | print('\n\nNested example flatten_nested = 0') 77 | insert_nested_not_flat() 78 | 79 | 80 | if __name__ == '__main__': 81 | main() 82 | -------------------------------------------------------------------------------- /examples/pandas_examples.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 -u 2 | 3 | import pandas as pd 4 | import clickhouse_connect 5 | 6 | 7 | create_table_sql = """ 8 | CREATE TABLE pandas_example 9 | ( 10 | `timeseries` DateTime('UTC'), 11 | `int_value` Int32, 12 | `str_value` String, 13 | `float_value` Float64 14 | ) 15 | ENGINE = MergeTree 16 | ORDER BY timeseries 17 | """ 18 | 19 | 20 | def write_pandas_df(): 21 | client = clickhouse_connect.get_client(host='localhost', port='8123', user='default', password= '') 22 | client.command('DROP TABLE IF EXISTS pandas_example') 23 | client.command(create_table_sql) 24 | df = pd.DataFrame({'timeseries': ['04/03/2022 10:00:11', '05/03/2022 11:15:44', '06/03/2022 17:14:00'], 25 | 'int_value': [16, 19, 11], 26 | 'str_value': ['String One', 'String Two', 'A Third String'], 27 | 'float_value': [2344.288, -73002.4444, 3.14159]}) 28 | df['timeseries'] = pd.to_datetime(df['timeseries']) 29 | client.insert_df('pandas_example', df) 30 | result_df = client.query_df('SELECT * FROM pandas_example') 31 | print() 32 | print(result_df.dtypes) 33 | print() 34 | print(result_df) 35 | 36 | 37 | if __name__ == '__main__': 38 | write_pandas_df() 39 | -------------------------------------------------------------------------------- /examples/params_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 -u 2 | 3 | from datetime import datetime, timedelta 4 | 5 | from clickhouse_connect.driver.binding import finalize_query 6 | 7 | select_template = """ 8 | SELECT 9 | formatDateTime(started_at, '%%m/%%d/%%Y', %(time_zone)s) AS date, 10 | formatDateTime(started_at, '%%I:%%M:%%S %%p', %(time_zone)s) AS time, 11 | format('{}path/link?name={}&dev_type={}', %(web_url)s, label, device_type) AS url, 12 | device_name, 13 | description 14 | FROM sessions 15 | """ 16 | 17 | 18 | def build_device_query(time_zone: str, 19 | web_url: str, 20 | client: str, 21 | company_id: str = '', 22 | device_id: str = '', 23 | updated: bool = False, 24 | start_time: datetime = None, 25 | end_time: datetime = None): 26 | params = {'time_zone': time_zone, 27 | 'web_url': web_url, 28 | 'client': client 29 | } 30 | where_template = ' WHERE client = %(client)s' 31 | if company_id: 32 | where_template += ' AND company_id = %(company_id)s' 33 | params['company_id'] = company_id 34 | if device_id: 35 | where_template += ' AND dev_type = %(device_id)s' 36 | params['device_id'] = device_id 37 | if updated: 38 | where_template += ' AND updated = true' 39 | if start_time and end_time: 40 | where_template += ' AND started_at BETWEEN %(start_time)s AND %(end_time)s' 41 | params['start_time'] = start_time 42 | params['end_time'] = end_time 43 | full_query = select_template + where_template + ' ORDER BY started_at ASC' 44 | return finalize_query(full_query, params) 45 | 46 | 47 | if __name__ == '__main__': 48 | start = datetime.now() 49 | end = start + timedelta(hours=1, minutes=20) 50 | print(build_device_query('UTC', 51 | 'https://example.com', 52 | 53 | client='Client_0', 54 | company_id='Company_1', 55 | device_id='DEVICE_77', 56 | start_time=start, 57 | end_time=end 58 | ) 59 | ) 60 | -------------------------------------------------------------------------------- /examples/read_perf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python -u 2 | 3 | """ 4 | This script is for simple timed comparisons of various queries between formats (streaming vs batch, pandas vs Python 5 | native types) based on data loaded into a local clickhouse instance from some ClickHouse Sample Datasets 6 | https://clickhouse.com/docs/en/getting-started/example-datasets/ 7 | 8 | It includes some basic comparisons with clickhouse-driver. The clickhouse-driver import and client can be 9 | commented out if clickhouse-driver is not installed 10 | 11 | Uncomment the queries and formats to measure before running. 12 | 13 | This script is not intended to be rigorous or scientific. For entertainment purposes only 14 | """ 15 | 16 | import time 17 | import clickhouse_driver # pylint: disable=import-error 18 | import clickhouse_connect 19 | 20 | 21 | queries = [#'SELECT trip_id, pickup, dropoff, pickup_longitude, pickup_latitude FROM taxis', 22 | #'SELECT number from numbers(500000000)', 23 | 'SELECT * FROM datasets.hits_100m_obfuscated LIMIT 2000000', 24 | #"SELECT * FROM perftest.ontime WHERE FlightDate < '2017-02-18'" 25 | ] 26 | 27 | cc_client = clickhouse_connect.get_client(compress=False) 28 | cd_client = clickhouse_driver.Client(host='localhost') 29 | 30 | 31 | def read_python_columns(query): 32 | print('\n\tclickhouse-connect Python Batch (column oriented):') 33 | start = time.time() 34 | columns = cc_client.query(query).result_columns 35 | _print_result(start, len(columns[0])) 36 | 37 | 38 | def read_python_rows(query): 39 | print('\n\tclickhouse-connect Python Batch (row oriented):') 40 | start = time.time() 41 | rows = cc_client.query(query).result_rows 42 | _print_result(start, len(rows)) 43 | 44 | 45 | def read_python_stream_columns(query): 46 | print('\n\tclickhouse-connect Python Stream (column blocks):') 47 | rows = 0 48 | start = time.time() 49 | with cc_client.query_column_block_stream(query) as stream: 50 | for block in stream: 51 | rows += len(block[0]) 52 | _print_result(start, rows) 53 | 54 | 55 | def read_python_stream_rows(query): 56 | print('\n\tclickhouse-connect Python Stream (row blocks):') 57 | rows = 0 58 | start = time.time() 59 | with cc_client.query_row_block_stream(query) as stream: 60 | for block in stream: 61 | rows += len(block) 62 | _print_result(start, rows) 63 | 64 | 65 | def read_numpy(query): 66 | print('\n\tclickhouse connect Numpy Batch:') 67 | start = time.time() 68 | arr = cc_client.query_np(query, max_str_len=100) 69 | _print_result(start, len(arr)) 70 | 71 | 72 | def read_pandas(query): 73 | print('\n\tclickhouse connect Pandas Batch:') 74 | start = time.time() 75 | rows = len(cc_client.query_df(query)) 76 | _print_result(start, rows) 77 | 78 | 79 | def read_arrow(query): 80 | print('\n\tclickhouse connect Arrow:') 81 | start = time.time() 82 | rows = len(cc_client.query_arrow(query)) 83 | _print_result(start, rows) 84 | 85 | 86 | def read_pandas_stream(query): 87 | print('\n\tclickhouse-connect Pandas Stream') 88 | start = time.time() 89 | rows = 0 90 | with cc_client.query_df_stream(query) as stream: 91 | for data_frame in stream: 92 | rows += len(data_frame) 93 | _print_result(start, rows) 94 | 95 | 96 | def dr_read_python_columns(query): 97 | print('\n\tclickhouse-driver Python Batch (column oriented):') 98 | start = time.time() 99 | result = cd_client.execute(query, columnar=True) 100 | _print_result(start, len(result[0])) 101 | 102 | 103 | def dr_read_python_rows(query): 104 | print('\n\tclickhouse-driver Python Batch (row oriented):') 105 | start = time.time() 106 | result = cd_client.execute(query) 107 | _print_result(start, len(result)) 108 | 109 | 110 | def dr_read_python_stream(query): 111 | print('\n\tclickhouse-driver Python Stream:') 112 | start = time.time() 113 | rows = 0 114 | for block in cd_client.execute_iter(query): 115 | rows += len(block) 116 | _print_result(start, rows) 117 | 118 | 119 | def dr_read_pandas(query): 120 | print('\n\tclickhouse-driver Pandas Batch:') 121 | start = time.time() 122 | data_frame = cd_client.query_dataframe(query) 123 | _print_result(start, len(data_frame)) 124 | 125 | 126 | def _print_result(start, rows): 127 | total_time = time.time() - start 128 | print(f'\t\tTime: {total_time:.4f} sec rows: {rows} rows/sec {rows // total_time}') 129 | 130 | 131 | def main(): 132 | for query in queries: 133 | print(f'\n{query}') 134 | # read_python_columns(query) 135 | #read_python_rows(query) 136 | read_python_stream_rows(query) 137 | #read_python_stream_columns(query) 138 | #read_pandas_stream(query) 139 | # read_numpy(query) 140 | #read_pandas(query) 141 | # read_arrow(query) 142 | #dr_read_python_columns(query) 143 | #dr_read_python_rows(query) 144 | #dr_read_python_stream(query) 145 | #dr_read_pandas(query) 146 | 147 | 148 | if __name__ == '__main__': 149 | main() 150 | -------------------------------------------------------------------------------- /examples/run_async.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python -u 2 | 3 | """ 4 | This example will execute 10 queries in total, 2 concurrent queries at a time. 5 | Each query will sleep for 2 seconds before returning. 6 | Here's a sample output that shows that the queries are executed concurrently in batches of 2: 7 | ``` 8 | Completed query 1, elapsed ms since start: 2002 9 | Completed query 0, elapsed ms since start: 2002 10 | Completed query 3, elapsed ms since start: 4004 11 | Completed query 2, elapsed ms since start: 4005 12 | Completed query 4, elapsed ms since start: 6006 13 | Completed query 5, elapsed ms since start: 6007 14 | Completed query 6, elapsed ms since start: 8009 15 | Completed query 7, elapsed ms since start: 8009 16 | Completed query 9, elapsed ms since start: 10011 17 | Completed query 8, elapsed ms since start: 10011 18 | ``` 19 | """ 20 | 21 | import asyncio 22 | from datetime import datetime 23 | 24 | import clickhouse_connect 25 | 26 | QUERIES = 10 27 | SEMAPHORE = 2 28 | 29 | 30 | async def concurrent_queries(): 31 | test_query = "SELECT sleep(2)" 32 | client = await clickhouse_connect.get_async_client() 33 | 34 | start = datetime.now() 35 | 36 | async def semaphore_wrapper(sm: asyncio.Semaphore, num: int): 37 | async with sm: 38 | await client.query(query=test_query) 39 | print(f"Completed query {num}, " 40 | f"elapsed ms since start: {int((datetime.now() - start).total_seconds() * 1000)}") 41 | 42 | semaphore = asyncio.Semaphore(SEMAPHORE) 43 | await asyncio.gather(*[semaphore_wrapper(semaphore, num) for num in range(QUERIES)]) 44 | await client.close() 45 | 46 | 47 | async def main(): 48 | await concurrent_queries() 49 | 50 | 51 | asyncio.run(main()) 52 | -------------------------------------------------------------------------------- /examples/ssh_tunnels.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python -u 2 | import os 3 | 4 | import clickhouse_connect 5 | 6 | 7 | # You can use an -L ssh tunnel directly, but to avoid HTTPS certificate errors you must add the 8 | # `server_host_name` argument to the get_client method 9 | 10 | # This example uses the following ssh tunnel command 11 | # ssh -f -N -L 1443:play.clickhouse.com:443 @ -i 12 | def direct_tunnel(): 13 | client = clickhouse_connect.get_client(host='localhost', 14 | user='play', 15 | password='clickhouse', 16 | port=1443, 17 | secure=True, 18 | server_host_name='play.clickhouse.com') 19 | print(client.query('SHOW DATABASES').result_set) 20 | client.close() 21 | 22 | 23 | # This example uses the Python sshtunnel library to create an ssh tunnel as above but within your Python code 24 | # `pip install sshtunnel` is required. See the sshtunnel documentation for additional configuration options 25 | # https://sshtunnel.readthedocs.io/en/latest/ 26 | 27 | try: 28 | import sshtunnel # pylint: disable=wrong-import-position 29 | except ImportError: 30 | pass 31 | 32 | 33 | def create_tunnel(): 34 | server = sshtunnel.SSHTunnelForwarder( 35 | (os.environ.get('CLICKHOUSE_TUNNEL_JUMP_HOST'), 22), # Create an ssh tunnel to your jump host/port 36 | ssh_username=os.environ.get('CLICKHOUSE_TUNNEL_USER', 'ubuntu'), # Set the user for the remote/jump host 37 | ssh_pkey=os.environ.get('CLICKHOUSE_TUNNEL_KEY_FILE', '~/.ssh/id_rsa'), # The private key file to use 38 | ssh_private_key_password=os.environ.get('CLICKHOUSE_TUNNEL_KEY_PASSWORD', None), # Private key password 39 | remote_bind_address=('play.clickhouse.com', 443), # The ClickHouse server and port you want to reach 40 | local_bind_address=('localhost', 1443) # The local address and port to bind the tunnel to 41 | ) 42 | server.start() 43 | 44 | client = clickhouse_connect.get_client(host='localhost', 45 | user='play', 46 | password='clickhouse', 47 | port=1443, 48 | secure=True, 49 | verify=True, 50 | server_host_name='play.clickhouse.com') 51 | print(client.query('SHOW DATABASES').result_set) 52 | client.close() 53 | server.close() 54 | 55 | 56 | # An example of how to use a "dynamic/SOCKS5" ssh tunnel to reach a ClickHouse server 57 | # The ssh tunnel for this example was created with the following command: 58 | # ssh -f -N -D 1443 @ -i 59 | 60 | # This example requires installing the pysocks library: 61 | # pip install pysocks 62 | # 63 | # Documentation for the SocksProxyManager here: https://urllib3.readthedocs.io/en/stable/reference/contrib/socks.html 64 | # Note there are limitations for the urllib3 SOCKSProxyManager, 65 | from urllib3.contrib.socks import SOCKSProxyManager # pylint: disable=wrong-import-position,wrong-import-order 66 | from clickhouse_connect.driver import httputil # pylint: disable=wrong-import-position 67 | 68 | 69 | def socks_proxy(): 70 | options = httputil.get_pool_manager_options() 71 | proxy_manager = SOCKSProxyManager('socks5h://localhost:1443', **options) 72 | 73 | client = clickhouse_connect.get_client(host='play.clickhouse.com', 74 | user='play', 75 | password='clickhouse', 76 | port=443, 77 | pool_mgr=proxy_manager) 78 | 79 | print(client.query('SHOW DATABASES').result_set) 80 | client.close() 81 | 82 | 83 | # Uncomment the option you want to test for local testing of your tunnel 84 | 85 | # direct_tunnel() 86 | create_tunnel() 87 | # socks_proxy() 88 | -------------------------------------------------------------------------------- /examples/write_into_file.py: -------------------------------------------------------------------------------- 1 | import clickhouse_connect 2 | 3 | if __name__ == '__main__': 4 | client = clickhouse_connect.get_client() 5 | query = 'SELECT number, toString(number) AS number_as_str FROM system.numbers LIMIT 5' 6 | fmt = 'CSVWithNames' # or any other format, see https://clickhouse.com/docs/en/interfaces/formats 7 | stream = client.raw_stream(query=query, fmt=fmt) 8 | with open("output.csv", "wb") as f: 9 | for chunk in stream: 10 | f.write(chunk) 11 | -------------------------------------------------------------------------------- /examples/write_perf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python -u 2 | 3 | # pylint: disable=import-error,no-name-in-module 4 | import time 5 | import random 6 | import clickhouse_driver 7 | 8 | import clickhouse_connect 9 | from clickhouse_connect.tools.testing import TableContext 10 | 11 | 12 | inserts = [{'query': 'SELECT trip_id, pickup, dropoff, pickup_longitude, ' + 13 | 'pickup_latitude FROM taxis ORDER BY trip_id LIMIT 5000000', 14 | 'columns': 'trip_id UInt32, pickup String, dropoff String,' + 15 | ' pickup_longitude Float64, pickup_latitude Float64'}, 16 | {'query': 'SELECT number from numbers(5000000)', 17 | 'columns': 'number UInt64'}] 18 | 19 | excluded = {} 20 | cc_client = clickhouse_connect.get_client(compress=False) 21 | cd_client = clickhouse_driver.Client(host='localhost') 22 | run_id = random.randint(0, 10000000) 23 | 24 | 25 | def write_python_columns(ix, insert): 26 | print('\n\tclickhouse-connect Python Insert (column oriented):') 27 | data = cc_client.query(insert['query']).result_columns 28 | table = f'perf_test_insert_{run_id}_{ix}' 29 | with test_ctx(table, insert) as ctx: 30 | start = time.time() 31 | cc_client.insert(table, data, ctx.column_names, column_type_names=ctx.column_types, column_oriented=True) 32 | _print_result(start, len(data[0])) 33 | 34 | 35 | def write_python_rows(ix, insert): 36 | print('\n\tclickhouse-connect Python Insert (row oriented):') 37 | data = cc_client.query(insert['query']).result_rows 38 | table = f'perf_test_insert_{run_id}_{ix}' 39 | with test_ctx(table, insert) as ctx: 40 | start = time.time() 41 | cc_client.insert(table, data, ctx.column_names, column_type_names=ctx.column_types) 42 | _print_result(start, len(data)) 43 | 44 | 45 | def dr_write_python_columns(ix, insert): 46 | print('\n\tclickhouse-driver Python Insert (column oriented):') 47 | data = cd_client.execute(insert['query'], columnar=True) 48 | table = f'perf_test_insert_{run_id}_{ix}' 49 | with test_ctx(table, insert) as ctx: 50 | cols = ','.join(ctx.column_names) 51 | start = time.time() 52 | cd_client.execute(f'INSERT INTO {table} ({cols}) VALUES', data, columnar=True) 53 | _print_result(start, len(data[0])) 54 | 55 | 56 | def dr_write_python_rows(ix, insert): 57 | print('\n\tclickhouse-driver Python Insert (row oriented):') 58 | data = cd_client.execute(insert['query'], columnar=False) 59 | table = f'perf_test_insert_{run_id}_{ix}' 60 | with test_ctx(table, insert) as ctx: 61 | cols = ','.join(ctx.column_names) 62 | start = time.time() 63 | cd_client.execute(f'INSERT INTO {table} ({cols}) VALUES', data, columnar=False) 64 | _print_result(start, len(data)) 65 | 66 | 67 | def test_ctx(table, insert): 68 | return TableContext(cc_client, table, insert['columns']) 69 | 70 | 71 | def _print_result(start, rows): 72 | total_time = time.time() - start 73 | print(f'\t\tTime: {total_time:.4f} sec rows: {rows} rows/sec {rows // total_time}') 74 | 75 | 76 | def main(): 77 | for ix, insert in enumerate(inserts): 78 | if ix in excluded: 79 | continue 80 | print(f"\n{insert['query']}") 81 | # write_python_columns(ix, insert) 82 | write_python_rows(ix, insert) 83 | # dr_write_python_columns(ix, insert) 84 | dr_write_python_rows(ix, insert) 85 | 86 | 87 | class CDWrapper: 88 | def __init__(self, client): 89 | self._client = client 90 | 91 | def command(self, cmd): 92 | self._client.execute(cmd) 93 | 94 | 95 | if __name__ == '__main__': 96 | main() 97 | -------------------------------------------------------------------------------- /playtest.py: -------------------------------------------------------------------------------- 1 | import clickhouse_connect 2 | 3 | 4 | def main(): 5 | print(f'\nClickHouse Connect installed version: {clickhouse_connect.version()}') 6 | client = clickhouse_connect.get_client(host='play.clickhouse.com', 7 | username='play', 8 | password='clickhouse', 9 | port=443) 10 | print(f'ClickHouse Play current version and timezone: {client.server_version} ({client.server_tz})') 11 | result = client.query('SHOW DATABASES') 12 | print('ClickHouse play Databases:') 13 | for row in result.result_set: 14 | print(f' {row[0]}') 15 | client.close() 16 | 17 | 18 | if __name__ == '__main__': 19 | main() 20 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "cython==3.0.11"] 3 | 4 | build-backend = "setuptools.build_meta" 5 | 6 | [tool.pytest.ini_options] 7 | log_cli = true 8 | log_cli_level = "INFO" 9 | env_files = ["test.env"] 10 | asyncio_default_fixture_loop_scope = "session" 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from setuptools import setup, find_packages 4 | 5 | c_modules = [] 6 | 7 | try: 8 | from Cython.Build import cythonize 9 | from Cython import __version__ as cython_version 10 | 11 | print(f'Using Cython {cython_version} to build cython modules') 12 | c_modules = cythonize('clickhouse_connect/driverc/*.pyx', language_level='3str') 13 | except ImportError as ex: 14 | print('Cython Install Failed, Not Building C Extensions: ', ex) 15 | cythonize = None 16 | except Exception as ex: # pylint: disable=broad-exception-caught 17 | print('Cython Build Failed, Not Building C Extensions: ', ex) 18 | cythonize = None 19 | 20 | 21 | def run_setup(try_c: bool = True): 22 | if try_c: 23 | kwargs = { 24 | 'ext_modules': c_modules, 25 | } 26 | else: 27 | kwargs = {} 28 | 29 | project_dir = os.path.abspath(os.path.dirname(__file__)) 30 | with open(os.path.join(project_dir, 'README.md'), encoding='utf-8') as read_me: 31 | long_desc = read_me.read() 32 | 33 | version = 'development' 34 | if os.path.isfile('.dev_version'): 35 | with open(os.path.join(project_dir, '.dev_version'), encoding='utf-8') as version_file: 36 | version = version_file.readline() 37 | else: 38 | with open(os.path.join(project_dir, 'clickhouse_connect', '__version__.py'), encoding='utf-8') as version_file: 39 | file_version = version_file.read().strip() 40 | match = re.search(r"version\s*=\s*'(.+)'", file_version) 41 | if match is None: 42 | raise ValueError(f'invalid version {file_version} in clickhouse_connect/__version__.py') 43 | version = match.group(1) 44 | 45 | setup( 46 | name='clickhouse-connect', 47 | author='ClickHouse Inc.', 48 | author_email='clients@clickhouse.com', 49 | keywords=['clickhouse', 'superset', 'sqlalchemy', 'http', 'driver'], 50 | description='ClickHouse Database Core Driver for Python, Pandas, and Superset', 51 | version=version, 52 | long_description=long_desc, 53 | long_description_content_type='text/markdown', 54 | package_data={'clickhouse_connect': ['VERSION', 'py.typed']}, 55 | url='https://github.com/ClickHouse/clickhouse-connect', 56 | packages=find_packages(exclude=['tests*']), 57 | python_requires='~=3.8', 58 | license='Apache License 2.0', 59 | install_requires=[ 60 | 'certifi', 61 | 'urllib3>=1.26', 62 | 'pytz', 63 | 'zstandard', 64 | 'lz4' 65 | ], 66 | extras_require={ 67 | 'sqlalchemy': ['sqlalchemy>1.3.21,<2.0'], 68 | 'numpy': ['numpy'], 69 | 'pandas': ['pandas'], 70 | 'arrow': ['pyarrow'], 71 | 'orjson': ['orjson'], 72 | 'tzlocal': ['tzlocal>=4.0'], 73 | }, 74 | tests_require=['pytest'], 75 | entry_points={ 76 | 'sqlalchemy.dialects': ['clickhousedb.connect=clickhouse_connect.cc_sqlalchemy.dialect:ClickHouseDialect', 77 | 'clickhousedb=clickhouse_connect.cc_sqlalchemy.dialect:ClickHouseDialect'] 78 | }, 79 | classifiers=[ 80 | 'Development Status :: 4 - Beta', 81 | 'Intended Audience :: Developers', 82 | 'License :: OSI Approved :: Apache Software License', 83 | 'Programming Language :: Python :: 3.8', 84 | 'Programming Language :: Python :: 3.9', 85 | 'Programming Language :: Python :: 3.10', 86 | 'Programming Language :: Python :: 3.11', 87 | 'Programming Language :: Python :: 3.12', 88 | 'Programming Language :: Python :: 3.13', 89 | ], 90 | **kwargs 91 | ) 92 | 93 | 94 | try: 95 | run_setup() 96 | # pylint: disable=broad-exception-caught 97 | except (Exception, IOError, SystemExit) as e: 98 | print(f'Unable to compile C extensions for faster performance due to {e}, will use pure Python') 99 | run_setup(False) 100 | -------------------------------------------------------------------------------- /test_dist/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore local configuration for superset 2 | dev_config.py 3 | 4 | # Ignore recommended symlink 5 | clickhouse_connect -------------------------------------------------------------------------------- /test_dist/superset_config.py: -------------------------------------------------------------------------------- 1 | import dev_config 2 | 3 | SUPERSET_WEBSERVER_PORT = getattr(dev_config, 'SUPERSET_WEBSERVER_PORT', 8088) 4 | SECRET_KEY = 'clickhouse_dev' 5 | db_uri = getattr(dev_config, 'SQLALCHEMY_DATABASE_URI', None) 6 | if db_uri: 7 | SQLALCHEMY_DATABASE_URI = db_uri 8 | SIP_15_ENABLED = True 9 | 10 | # Set this API key to enable Mapbox visualizations 11 | MAPBOX_API_KEY = getattr(dev_config, 'MAPBOX_API_KEY', '') 12 | PREFERRED_DATABASES = getattr(dev_config, 'PREFERRED_DATABASES', ['ClickHouse Connect', 'MySQL']) 13 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import pytz 4 | import pytest 5 | 6 | from clickhouse_connect.driver import tzutil 7 | 8 | from clickhouse_connect.datatypes.format import clear_all_formats 9 | 10 | os.environ['TZ'] = 'UTC' 11 | time.tzset() 12 | 13 | 14 | @pytest.fixture(autouse=True) 15 | def clean_global_state(): 16 | clear_all_formats() 17 | tzutil.local_tz = pytz.UTC 18 | -------------------------------------------------------------------------------- /tests/integration_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/integration_tests/__init__.py -------------------------------------------------------------------------------- /tests/integration_tests/actors.csv: -------------------------------------------------------------------------------- 1 | Robert Redford, 1936, The Sting 2 | Al Pacino, 1940, Scarface -------------------------------------------------------------------------------- /tests/integration_tests/datasets.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, date 2 | 3 | null_ds = [('key1', 1000, 77.3, 'value1', datetime(2022, 10, 15, 10, 3, 2), None), 4 | ('key2', 2000, 882.00, None, None, date(1976, 5, 5)), 5 | ('key3', None, float('nan'), 'value3', datetime(2022, 7, 4), date(1999, 12, 31)), 6 | ('key4', 3000, None, 'value4', None, None)] 7 | null_ds_columns = ['key', 'num', 'flt', 'str', 'dt', 'd'] 8 | null_ds_types = ['String', 'Nullable(Int32)', 'Nullable(Float64)', 'Nullable(String)', 'Nullable(DateTime)', 9 | 'Nullable(Date)'] 10 | 11 | basic_ds = [('key1', 1000, 50.3, 'value1', datetime.now(), 'lc_1'), 12 | ('key2', 2000, -532.43, 'value2', datetime(1976, 7, 4, 12, 12, 11), 'lc_2'), 13 | ('key3', -2503, 300.00, 'value3', date(2022, 10, 15), 'lc_99')] 14 | basic_ds_columns = ['key', 'num', 'flt', 'str', 'dt', 'lc_string'] 15 | basic_ds_types = ['String', 'Int32', 'Float64', 'String', 'DateTime64(9)', 'LowCardinality(String)'] 16 | basic_ds_types_ver19 = ['String', 'Int32', 'Float64', 'String', 'DateTime', 'LowCardinality(String)'] 17 | 18 | dt_ds = [datetime(2020, 10, 10), 19 | datetime(2021, 11, 11)] 20 | dt_ds_columns = ['timestamp'] 21 | dt_ds_types = ['DateTime'] 22 | -------------------------------------------------------------------------------- /tests/integration_tests/json_test.ndjson: -------------------------------------------------------------------------------- 1 | {"key": 17, "flt_val": 5.3, "int_val": 377} 2 | {} -------------------------------------------------------------------------------- /tests/integration_tests/movies.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/integration_tests/movies.csv.gz -------------------------------------------------------------------------------- /tests/integration_tests/movies.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/integration_tests/movies.parquet -------------------------------------------------------------------------------- /tests/integration_tests/proxy_ca_cert.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIDTzCCAjegAwIBAgIRCgdiOrCiOU6MgzKPZvk1rB0wDQYJKoZIhvcNAQELBQAw 3 | QTEYMBYGA1UEAxMPSFRUUCBUb29sa2l0IENBMQswCQYDVQQGEwJYWDEYMBYGA1UE 4 | ChMPSFRUUCBUb29sa2l0IENBMB4XDTIzMDIwMjIzMTY0M1oXDTI0MDIwMzIzMTY0 5 | M1owQTEYMBYGA1UEAxMPSFRUUCBUb29sa2l0IENBMQswCQYDVQQGEwJYWDEYMBYG 6 | A1UEChMPSFRUUCBUb29sa2l0IENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB 7 | CgKCAQEAzEMZaqeXXUXrZgn3o2pIrSavNmN6ctk5IGrx7TBrfs0BCaUpmpy6AxmI 8 | 7GayBaSFv9Kp78ORTx1rOE0+d0O5ILldMLeNjEasqAfopeQxS2GNF/rwlSMcE8Ic 9 | gi9LJk4Hh2Lwk8zJe+Xy7076irt3PPL478v1EQxRdEoe/Io8Y4eL5BoNsbxdmVHH 10 | arZD2KQvA6M/CvmoQ62DZuELOO2uE/k21lnpgTFVZMrDvNhN3L62O7tZfEz47vPN 11 | G/mCjO4lCRTkRWTGTde4p1Pr8LA2j3ENbf7WKgJS3lFPpvIgZDU6OEY+/k6unNQG 12 | ygUWbG9fO6i+zOvVADx/mBd5PGtQ7QIDAQABo0IwQDAPBgNVHRMBAf8EBTADAQH/ 13 | MA4GA1UdDwEB/wQEAwIBxjAdBgNVHQ4EFgQUBdpwLCzir2et2V5aBqu5R2WInVIw 14 | DQYJKoZIhvcNAQELBQADggEBAHOZ4JLH3bzlCj1O0Ube6n3hJ2b/O5huJsaJT27w 15 | oJz+zH7yPdRnrHwd2duQ4n8rV/rDHzVNAE7G5zHbwKRo23cMNxrzmlnOgLPMdNB5 16 | eL4bMHuGKa/0cvuaYYw44NdgoYO2DymySfBbOZC9XbyynUo4S1eKp7qAXeIszJcw 17 | NPtU3rg/5VQs1Lo/gbEFo0nzLb+GpNbbi6RYf9HQmXg4776Hvbn2FCF7X11zv8p3 18 | 9qPl8uZdeGFgL7Zugue9JUQbz5RRodsSVcTCxiiOJ9wJnG4PAIk2y1b88k9D2Gee 19 | 3avNeITx6wRI4HTKUC556ZJGZQ1HE/P4Ka8wxdQBSO+whYQ= 20 | -----END CERTIFICATE----- 21 | -------------------------------------------------------------------------------- /tests/integration_tests/test_arrow.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from typing import Callable 3 | import string 4 | 5 | import pytest 6 | 7 | from clickhouse_connect.driver import Client 8 | from clickhouse_connect.driver.options import arrow 9 | 10 | 11 | def test_arrow(test_client: Client, table_context: Callable): 12 | if not arrow: 13 | pytest.skip('PyArrow package not available') 14 | if not test_client.min_version('21'): 15 | pytest.skip(f'PyArrow is not supported in this server version {test_client.server_version}') 16 | with table_context('test_arrow_insert', ['animal String', 'legs Int64']): 17 | n_legs = arrow.array([2, 4, 5, 100] * 50) 18 | animals = arrow.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede'] * 50) 19 | names = ['legs', 'animal'] 20 | insert_table = arrow.Table.from_arrays([n_legs, animals], names=names) 21 | test_client.insert_arrow('test_arrow_insert', insert_table) 22 | result_table = test_client.query_arrow('SELECT * FROM test_arrow_insert', use_strings=False) 23 | arrow_schema = result_table.schema 24 | assert arrow_schema.field(0).name == 'animal' 25 | assert arrow_schema.field(0).type == arrow.binary() 26 | assert arrow_schema.field(1).name == 'legs' 27 | assert arrow_schema.field(1).type == arrow.int64() 28 | # pylint: disable=no-member 29 | assert arrow.compute.sum(result_table['legs']).as_py() == 5550 30 | assert len(result_table.columns) == 2 31 | 32 | arrow_table = test_client.query_arrow('SELECT number from system.numbers LIMIT 500', 33 | settings={'max_block_size': 50}) 34 | arrow_schema = arrow_table.schema 35 | assert arrow_schema.field(0).name == 'number' 36 | assert arrow_schema.field(0).type.id == 8 37 | assert arrow_table.num_rows == 500 38 | 39 | 40 | def test_arrow_stream(test_client: Client, table_context: Callable): 41 | if not arrow: 42 | pytest.skip('PyArrow package not available') 43 | if not test_client.min_version('21'): 44 | pytest.skip(f'PyArrow is not supported in this server version {test_client.server_version}') 45 | with table_context('test_arrow_insert', ['counter Int64', 'letter String']): 46 | counter = arrow.array(range(1000000)) 47 | alphabet = string.ascii_lowercase 48 | letter = arrow.array([alphabet[x % 26] for x in range(1000000)]) 49 | names = ['counter', 'letter'] 50 | insert_table = arrow.Table.from_arrays([counter, letter], names=names) 51 | test_client.insert_arrow('test_arrow_insert', insert_table) 52 | stream = test_client.query_arrow_stream('SELECT * FROM test_arrow_insert', use_strings=True) 53 | with stream: 54 | result_tables = list(stream) 55 | # Hopefully we made the table long enough we got multiple tables in the query 56 | assert len(result_tables) > 1 57 | total_rows = 0 58 | for table in result_tables: 59 | assert table.num_columns == 2 60 | arrow_schema = table.schema 61 | assert arrow_schema.field(0).name == 'counter' 62 | assert arrow_schema.field(0).type == arrow.int64() 63 | assert arrow_schema.field(1).name == 'letter' 64 | assert arrow_schema.field(1).type == arrow.string() 65 | assert table.column(1)[0].as_py() == alphabet[table.column(0)[0].as_py() % 26] 66 | total_rows += table.num_rows 67 | assert total_rows == 1000000 68 | 69 | 70 | def test_arrow_map(test_client: Client, table_context: Callable): 71 | if not arrow: 72 | pytest.skip('PyArrow package not available') 73 | if not test_client.min_version('21'): 74 | pytest.skip(f'PyArrow is not supported in this server version {test_client.server_version}') 75 | with table_context('test_arrow_map', ['trade_date Date, code String', 76 | 'kdj Map(String, Float32)', 77 | 'update_time DateTime DEFAULT now()']): 78 | data = [[date(2023, 10, 15), 'C1', {'k': 2.5, 'd': 0, 'j': 0}], 79 | [date(2023, 10, 16), 'C2', {'k': 3.5, 'd': 0, 'j': -.372}]] 80 | test_client.insert('test_arrow_map', data, column_names=('trade_date', 'code', 'kdj'), 81 | settings={'insert_deduplication_token': '10381'}) 82 | arrow_table = test_client.query_arrow('SELECT * FROM test_arrow_map ORDER BY trade_date', 83 | use_strings=True) 84 | assert isinstance(arrow_table.schema, arrow.Schema) 85 | test_client.insert_arrow('test_arrow_map', arrow_table, settings={'insert_deduplication_token': '10382'}) 86 | assert 4 == test_client.command('SELECT count() FROM test_arrow_map') 87 | -------------------------------------------------------------------------------- /tests/integration_tests/test_contexts.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | from clickhouse_connect.driver import Client 4 | 5 | 6 | def test_contexts(test_client: Client, table_context: Callable): 7 | with table_context('test_contexts', ['key Int32', 'value1 String', 'value2 String']) as ctx: 8 | data = [[1, 'v1', 'v2'], [2, 'v3', 'v4']] 9 | insert_context = test_client.create_insert_context(table=ctx.table, data=data) 10 | test_client.insert(context=insert_context) 11 | query_context = test_client.create_query_context( 12 | query=f'SELECT value1, value2 FROM {ctx.table} WHERE key = {{k:Int32}}', 13 | parameters={'k': 2}, 14 | column_oriented=True) 15 | result = test_client.query(context=query_context) 16 | assert result.result_set[1][0] == 'v4' 17 | query_context.set_parameter('k', 1) 18 | result = test_client.query(context=query_context) 19 | assert result.row_count == 1 20 | assert result.result_set[1][0] 21 | 22 | data = [[1, 'v5', 'v6'], [2, 'v7', 'v8']] 23 | test_client.insert(data=data, context=insert_context) 24 | result = test_client.query(context=query_context) 25 | assert result.row_count == 2 26 | 27 | insert_context.data = [[5, 'v5', 'v6'], [7, 'v7', 'v8']] 28 | test_client.insert(context=insert_context) 29 | assert test_client.command(f'SELECT count() FROM {ctx.table}') == 6 30 | -------------------------------------------------------------------------------- /tests/integration_tests/test_formats.py: -------------------------------------------------------------------------------- 1 | from clickhouse_connect.driver import Client, ProgrammingError 2 | 3 | 4 | def test_uint64_format(test_client: Client): 5 | # Default should be unsigned 6 | result = test_client.query('SELECT toUInt64(9523372036854775807) as value') 7 | assert result.result_set[0][0] == 9523372036854775807 8 | result = test_client.query('SELECT toUInt64(9523372036854775807) as value', query_formats={'UInt64': 'signed'}) 9 | assert result.result_set[0][0] == -8923372036854775809 10 | result = test_client.query('SELECT toUInt64(9523372036854775807) as value', query_formats={'UInt64': 'native'}) 11 | assert result.result_set[0][0] == 9523372036854775807 12 | try: 13 | test_client.query('SELECT toUInt64(9523372036854775807) as signed', query_formats={'UInt64': 'huh'}) 14 | except ProgrammingError: 15 | pass 16 | -------------------------------------------------------------------------------- /tests/integration_tests/test_geometric.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | from clickhouse_connect.driver import Client 4 | 5 | 6 | def test_point_column(test_client: Client, table_context: Callable): 7 | with table_context('point_column_test', ['key Int32', 'point Point']): 8 | data = [[1, (3.55, 3.55)], [2, (4.55, 4.55)]] 9 | test_client.insert('point_column_test', data) 10 | 11 | query_result = test_client.query('SELECT * FROM point_column_test ORDER BY key').result_rows 12 | assert len(query_result) == 2 13 | assert query_result[0] == (1, (3.55, 3.55)) 14 | assert query_result[1] == (2, (4.55, 4.55)) 15 | 16 | 17 | def test_ring_column(test_client: Client, table_context: Callable): 18 | with table_context('ring_column_test', ['key Int32', 'ring Ring']): 19 | data = [[1, [(5.522, 58.472),(3.55, 3.55)]], [2, [(4.55, 4.55)]]] 20 | test_client.insert('ring_column_test', data) 21 | 22 | query_result = test_client.query('SELECT * FROM ring_column_test ORDER BY key').result_rows 23 | assert len(query_result) == 2 24 | assert query_result[0] == (1, [(5.522, 58.472),(3.55, 3.55)]) 25 | assert query_result[1] == (2, [(4.55, 4.55)]) 26 | 27 | 28 | def test_polygon_column(test_client: Client, table_context: Callable): 29 | with table_context('polygon_column_test', ['key Int32', 'polygon Polygon']): 30 | res = test_client.query("SELECT readWKTPolygon('POLYGON ((-64.8 32.3, -65.5 18.3, -80.3 25.2, -64.8 32.3))') as polygon") 31 | pg = res.first_row[0] 32 | test_client.insert('polygon_column_test', [(1, pg), (4, pg)]) 33 | query_result = test_client.query('SELECT key, polygon FROM polygon_column_test WHERE key = 4') 34 | assert query_result.first_row[1] == pg 35 | -------------------------------------------------------------------------------- /tests/integration_tests/test_inserts.py: -------------------------------------------------------------------------------- 1 | from decimal import Decimal 2 | from typing import Callable 3 | 4 | from clickhouse_connect.driver.client import Client 5 | from clickhouse_connect.driver.exceptions import DataError 6 | 7 | 8 | def test_insert(test_client: Client, test_table_engine: str): 9 | if test_client.min_version('19'): 10 | test_client.command('DROP TABLE IF EXISTS test_system_insert') 11 | else: 12 | test_client.command('DROP TABLE IF EXISTS test_system_insert SYNC') 13 | test_client.command(f'CREATE TABLE test_system_insert AS system.tables Engine {test_table_engine} ORDER BY name') 14 | tables_result = test_client.query('SELECT * from system.tables') 15 | test_client.insert(table='test_system_insert', column_names='*', data=tables_result.result_set) 16 | copy_result = test_client.command('SELECT count() from test_system_insert') 17 | assert tables_result.row_count == copy_result 18 | test_client.command('DROP TABLE IF EXISTS test_system_insert') 19 | 20 | 21 | def test_decimal_conv(test_client: Client, table_context: Callable): 22 | with table_context('test_num_conv', ['col1 UInt64', 'col2 Int32', 'f1 Float64']): 23 | data = [[Decimal(5), Decimal(-182), Decimal(55.2)], [Decimal(57238478234), Decimal(77), Decimal(-29.5773)]] 24 | test_client.insert('test_num_conv', data) 25 | result = test_client.query('SELECT * FROM test_num_conv').result_set 26 | assert result == [(5, -182, 55.2), (57238478234, 77, -29.5773)] 27 | 28 | 29 | def test_float_decimal_conv(test_client: Client, table_context: Callable): 30 | with table_context('test_float_to_dec_conv', ['col1 Decimal32(6)','col2 Decimal32(6)', 'col3 Decimal128(6)', 'col4 Decimal128(6)']): 31 | data = [[0.492917, 0.49291700, 0.492917, 0.49291700]] 32 | test_client.insert('test_float_to_dec_conv', data) 33 | result = test_client.query('SELECT * FROM test_float_to_dec_conv').result_set 34 | assert result == [(Decimal("0.492917"), Decimal("0.492917"), Decimal("0.492917"), Decimal("0.492917"))] 35 | 36 | 37 | def test_bad_data_insert(test_client: Client, table_context: Callable): 38 | with table_context('test_bad_insert', ['key Int32', 'float_col Float64']): 39 | data = [[1, 3.22], [2, 'nope']] 40 | try: 41 | test_client.insert('test_bad_insert', data) 42 | except DataError as ex: 43 | assert 'array' in str(ex) 44 | 45 | 46 | def test_bad_strings(test_client: Client, table_context: Callable): 47 | with table_context('test_bad_strings', 'key Int32, fs FixedString(6), nsf Nullable(FixedString(4))'): 48 | try: 49 | test_client.insert('test_bad_strings', [[1, b'\x0535', None]]) 50 | except DataError as ex: 51 | assert 'match' in str(ex) 52 | try: 53 | test_client.insert('test_bad_strings', [[1, b'\x0535abc', '😀🙃']]) 54 | except DataError as ex: 55 | assert 'encoded' in str(ex) 56 | 57 | 58 | def test_low_card_dictionary_size(test_client: Client, table_context: Callable): 59 | with table_context('test_low_card_dict', 'key Int32, lc LowCardinality(String)', 60 | settings={'index_granularity': 65536 }): 61 | data = [[x, str(x)] for x in range(30000)] 62 | test_client.insert('test_low_card_dict', data) 63 | assert 30000 == test_client.command('SELECT count() FROM test_low_card_dict') 64 | 65 | 66 | def test_column_names_spaces(test_client: Client, table_context: Callable): 67 | with table_context('test_column_spaces', 68 | columns=['key 1', 'value 1'], 69 | column_types=['Int32', 'String']): 70 | data = [[1, 'str 1'], [2, 'str 2']] 71 | test_client.insert('test_column_spaces', data) 72 | result = test_client.query('SELECT * FROM test_column_spaces').result_rows 73 | assert result[0][0] == 1 74 | assert result[1][1] == 'str 2' 75 | 76 | 77 | def test_numeric_conversion(test_client: Client, table_context: Callable): 78 | with table_context('test_numeric_convert', 79 | columns=['key Int32', 'n_int Nullable(UInt64)', 'n_flt Nullable(Float64)']): 80 | data = [[1, None, None], [2, '2', '5.32']] 81 | test_client.insert('test_numeric_convert', data) 82 | result = test_client.query('SELECT * FROM test_numeric_convert').result_rows 83 | assert result[1][1] == 2 84 | assert result[1][2] == float('5.32') 85 | test_client.command('TRUNCATE TABLE test_numeric_convert') 86 | data = [[0, '55', '532.48'], [1, None, None], [2, '2', '5.32']] 87 | test_client.insert('test_numeric_convert', data) 88 | result = test_client.query('SELECT * FROM test_numeric_convert').result_rows 89 | assert result[0][1] == 55 90 | assert result[0][2] == 532.48 91 | assert result[1][1] is None 92 | assert result[2][1] == 2 93 | assert result[2][2] == 5.32 94 | -------------------------------------------------------------------------------- /tests/integration_tests/test_multithreading.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | import pytest 4 | 5 | from clickhouse_connect.driver import Client 6 | from clickhouse_connect.driver.exceptions import ProgrammingError 7 | from tests.integration_tests.conftest import TestConfig 8 | 9 | 10 | def test_threading_error(test_config: TestConfig, test_client: Client): 11 | if test_config.cloud: 12 | pytest.skip('Skipping threading test in ClickHouse Cloud') 13 | thrown = None 14 | 15 | class QueryThread (threading.Thread): 16 | def run(self): 17 | nonlocal thrown 18 | try: 19 | test_client.command('SELECT randomString(512) FROM numbers(1000000)') 20 | except ProgrammingError as ex: 21 | thrown = ex 22 | 23 | threads = [QueryThread(), QueryThread()] 24 | for thread in threads: 25 | thread.start() 26 | for thread in threads: 27 | thread.join() 28 | 29 | assert 'concurrent' in str(thrown) 30 | -------------------------------------------------------------------------------- /tests/integration_tests/test_native_fuzz.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | import pytest 5 | 6 | from clickhouse_connect.datatypes.registry import get_from_name 7 | from clickhouse_connect.driver.client import Client 8 | from clickhouse_connect.driver.ddl import TableColumnDef, create_table 9 | from tests.helpers import random_data, random_columns 10 | 11 | TEST_COLUMNS = 10 12 | MAX_DATA_ROWS = 40 13 | 14 | 15 | # pylint: disable=duplicate-code 16 | def test_query_fuzz(test_client: Client, test_table_engine: str): 17 | if not test_client.min_version('21'): 18 | pytest.skip(f'flatten_nested setting not supported in this server version {test_client.server_version}') 19 | test_runs = int(os.environ.get('CLICKHOUSE_CONNECT_TEST_FUZZ', '250')) 20 | test_client.apply_server_timezone = True 21 | try: 22 | for _ in range(test_runs): 23 | test_client.command('DROP TABLE IF EXISTS fuzz_test') 24 | data_rows = random.randint(0, MAX_DATA_ROWS) 25 | col_names, col_types = random_columns(TEST_COLUMNS) 26 | data = random_data(col_types, data_rows, test_client.server_tz) 27 | col_names = ('row_id',) + col_names 28 | col_types = (get_from_name('UInt32'),) + col_types 29 | 30 | col_defs = [TableColumnDef(name, ch_type) for name, ch_type in zip(col_names, col_types)] 31 | create_stmt = create_table('fuzz_test', col_defs, test_table_engine, {'order by': 'row_id'}) 32 | test_client.command(create_stmt, settings={'flatten_nested': 0}) 33 | test_client.insert('fuzz_test', data, col_names) 34 | 35 | data_result = test_client.query('SELECT * FROM fuzz_test') 36 | if data_rows: 37 | assert data_result.column_names == col_names 38 | assert data_result.result_set == data 39 | finally: 40 | test_client.apply_server_timezone = False 41 | -------------------------------------------------------------------------------- /tests/integration_tests/test_params.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, date 2 | from typing import Callable 3 | 4 | from clickhouse_connect.driver import Client 5 | from clickhouse_connect.driver.binding import DT64Param 6 | 7 | 8 | def test_params(test_client: Client, table_context: Callable): 9 | result = test_client.query('SELECT name, database FROM system.tables WHERE database = {db:String}', 10 | parameters={'db': 'system'}) 11 | assert result.first_item['database'] == 'system' 12 | if test_client.min_version('21'): 13 | result = test_client.query('SELECT name, {col:String} FROM system.tables WHERE table ILIKE {t:String}', 14 | parameters={'t': '%rr%', 'col': 'database'}) 15 | assert 'rr' in result.first_item['name'] 16 | 17 | first_date = datetime.strptime('Jun 1 2005 1:33PM', '%b %d %Y %I:%M%p') 18 | first_date = test_client.server_tz.localize(first_date) 19 | second_date = datetime.strptime('Dec 25 2022 5:00AM', '%b %d %Y %I:%M%p') 20 | second_date = test_client.server_tz.localize(second_date) 21 | with table_context('test_bind_params', ['key UInt64', 'dt DateTime', 'value String', 't Tuple(String, String)']): 22 | test_client.insert('test_bind_params', 23 | [[1, first_date, 'v11', ('one', 'two')], 24 | [2, second_date, 'v21', ('t1', 't2')], 25 | [3, datetime.now(), 'v31', ('str1', 'str2')]]) 26 | result = test_client.query('SELECT * FROM test_bind_params WHERE dt = {dt:DateTime}', 27 | parameters={'dt': second_date}) 28 | assert result.first_item['key'] == 2 29 | result = test_client.query('SELECT * FROM test_bind_params WHERE dt = %(dt)s', 30 | parameters={'dt': first_date}) 31 | assert result.first_item['key'] == 1 32 | result = test_client.query("SELECT * FROM test_bind_params WHERE value != %(v)s AND value like '%%1'", 33 | parameters={'v': 'v11'}) 34 | assert result.row_count == 2 35 | result = test_client.query('SELECT * FROM test_bind_params WHERE value IN %(tp)s', 36 | parameters={'tp': ('v18', 'v31')}) 37 | assert result.first_item['key'] == 3 38 | 39 | result = test_client.query('SELECT number FROM numbers(10) WHERE {n:Nullable(String)} IS NULL', 40 | parameters={'n': None}).result_rows 41 | assert len(result) == 10 42 | 43 | date_params = [date(2023, 6, 1), date(2023, 8, 5)] 44 | result = test_client.query('SELECT {l:Array(Date)}', parameters={'l': date_params}).first_row 45 | assert date_params == result[0] 46 | 47 | dt_params = [datetime(2023, 6, 1, 7, 40, 2), datetime(2023, 8, 17, 20, 0, 10)] 48 | result = test_client.query('SELECT {l:Array(DateTime)}', parameters={'l': dt_params}).first_row 49 | assert dt_params == result[0] 50 | 51 | num_array_params = [2.5, 5.3, 7.4] 52 | result = test_client.query('SELECT {l:Array(Float64)}', parameters={'l': num_array_params}).first_row 53 | assert num_array_params == result[0] 54 | result = test_client.query('SELECT %(l)s', parameters={'l': num_array_params}).first_row 55 | assert num_array_params == result[0] 56 | 57 | tp_params = ('str1', 'str2') 58 | result = test_client.query('SELECT %(tp)s', parameters={'tp': tp_params}).first_row 59 | assert tp_params == result[0] 60 | 61 | num_params = {'p_0': 2, 'p_1': 100523.55} 62 | result = test_client.query( 63 | 'SELECT count() FROM system.tables WHERE total_rows > %(p_0)d and total_rows < %(p_1)f', parameters=num_params) 64 | assert result.first_row[0] > 0 65 | 66 | 67 | def test_datetime_64_params(test_client: Client): 68 | dt_values = [datetime(2023, 6, 1, 7, 40, 2, 250306), datetime(2023, 8, 17, 20, 0, 10, 777722)] 69 | dt_params = {f'd{ix}': DT64Param(v) for ix, v in enumerate(dt_values)} 70 | result = test_client.query('SELECT {d0:DateTime64(3)}, {d1:Datetime64(9)}', parameters=dt_params).first_row 71 | assert result[0] == dt_values[0].replace(microsecond=250000) 72 | assert result[1] == dt_values[1] 73 | 74 | result = test_client.query('SELECT {a1:Array(DateTime64(6))}', parameters={'a1': [dt_params['d0'], dt_params['d1']]}).first_row 75 | assert result[0] == dt_values 76 | 77 | dt_params = {f'd{ix}_64': v for ix, v in enumerate(dt_values)} 78 | result = test_client.query('SELECT {d0:DateTime64(3)}, {d1:Datetime64(9)}', parameters=dt_params).first_row 79 | assert result[0] == dt_values[0].replace(microsecond=250000) 80 | assert result[1] == dt_values[1] 81 | 82 | result = test_client.query('SELECT {a1:Array(DateTime64(6))}', 83 | parameters={'a1_64': dt_values}).first_row 84 | assert result[0] == dt_values 85 | 86 | dt_params = [DT64Param(v) for v in dt_values] 87 | result = test_client.query("SELECT %s as string, toDateTime64(%s,6) as dateTime", parameters = dt_params).first_row 88 | assert result == ('2023-06-01 07:40:02.250306', dt_values[1]) 89 | -------------------------------------------------------------------------------- /tests/integration_tests/test_protocol_version.py: -------------------------------------------------------------------------------- 1 | from clickhouse_connect.driver import Client 2 | 3 | 4 | def test_protocol_version(test_client: Client): 5 | query = "select toDateTime(1676369730, 'Asia/Shanghai') as dt FORMAT Native" 6 | raw = test_client.raw_query(query) 7 | assert raw.hex() == '0101026474084461746554696d65425feb63' 8 | 9 | if test_client.min_version('23.3'): 10 | raw = test_client.raw_query(query, settings={'client_protocol_version': 54337}) 11 | ch_type = raw[14:39].decode() 12 | assert ch_type == "DateTime('Asia/Shanghai')" 13 | -------------------------------------------------------------------------------- /tests/integration_tests/test_proxy.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | import pytest 5 | from urllib3 import ProxyManager 6 | 7 | import clickhouse_connect 8 | from tests.integration_tests.conftest import TestConfig 9 | 10 | 11 | def test_proxies(test_config: TestConfig): 12 | if not test_config.proxy_address: 13 | pytest.skip('Proxy address not configured') 14 | if test_config.port in (8123, 10723): 15 | client = clickhouse_connect.get_client(host=test_config.host, 16 | port=test_config.port, 17 | username=test_config.username, 18 | password=test_config.password, 19 | http_proxy=test_config.proxy_address) 20 | assert '2' in client.command('SELECT version()') 21 | client.close() 22 | 23 | try: 24 | os.environ['HTTP_PROXY'] = f'http://{test_config.proxy_address}' 25 | client = clickhouse_connect.get_client(host=test_config.host, 26 | port=test_config.port, 27 | username=test_config.username, 28 | password=test_config.password) 29 | assert isinstance(client.http, ProxyManager) 30 | assert '2' in client.command('SELECT version()') 31 | client.close() 32 | 33 | os.environ['no_proxy'] = f'{test_config.host}:{test_config.port}' 34 | client = clickhouse_connect.get_client(host=test_config.host, 35 | port=test_config.port, 36 | username=test_config.username, 37 | password=test_config.password) 38 | assert not isinstance(client.http, ProxyManager) 39 | assert '2' in client.command('SELECT version()') 40 | client.close() 41 | finally: 42 | os.environ.pop('HTTP_PROXY', None) 43 | os.environ.pop('no_proxy', None) 44 | else: 45 | cert_file = f'{Path(__file__).parent}/proxy_ca_cert.crt' 46 | client = clickhouse_connect.get_client(host=test_config.host, 47 | port=test_config.port, 48 | username=test_config.username, 49 | password=test_config.password, 50 | ca_cert=cert_file, 51 | https_proxy=test_config.proxy_address) 52 | assert '2' in client.command('SELECT version()') 53 | client.close() 54 | 55 | try: 56 | os.environ['HTTPS_PROXY'] = f'{test_config.proxy_address}' 57 | client = clickhouse_connect.get_client(host=test_config.host, 58 | port=test_config.port, 59 | username=test_config.username, 60 | password=test_config.password, 61 | ca_cert=cert_file) 62 | assert isinstance(client.http, ProxyManager) 63 | assert '2' in client.command('SELECT version()') 64 | client.close() 65 | finally: 66 | os.environ.pop('HTTPS_PROXY', None) 67 | -------------------------------------------------------------------------------- /tests/integration_tests/test_raw_insert.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Callable 3 | 4 | from clickhouse_connect.driver import Client 5 | 6 | 7 | def test_raw_insert(test_client: Client, table_context: Callable): 8 | with table_context('test_raw_insert', ["`weir'd` String", 'value String']): 9 | csv = 'value1\nvalue2' 10 | test_client.raw_insert('test_raw_insert', ['"weir\'d"'], csv.encode(), fmt='CSV') 11 | result = test_client.query('SELECT * FROM test_raw_insert') 12 | assert result.result_set[1][0] == 'value2' 13 | 14 | test_client.command('TRUNCATE TABLE test_raw_insert') 15 | tsv = 'weird1\tvalue__`2\nweird2\tvalue77' 16 | test_client.raw_insert('test_raw_insert', ["`weir'd`", 'value'], tsv, fmt='TSV') 17 | result = test_client.query('SELECT * FROM test_raw_insert') 18 | assert result.result_set[0][1] == 'value__`2' 19 | assert result.result_set[1][1] == 'value77' 20 | 21 | 22 | def test_raw_insert_compression(test_client: Client, table_context: Callable): 23 | data_file = f'{Path(__file__).parent}/movies.csv.gz' 24 | with open(data_file, mode='rb') as movies_file: 25 | data = movies_file.read() 26 | with table_context('test_gzip_movies', ['movie String', 'year UInt16', 'rating Decimal32(3)']): 27 | test_client.raw_insert('test_gzip_movies', None, data, fmt='CSV', compression='gzip', 28 | settings={'input_format_allow_errors_ratio': .2, 29 | 'input_format_allow_errors_num': 5} 30 | ) 31 | res = test_client.query( 32 | 'SELECT count() as count, sum(rating) as rating, max(year) as year FROM test_gzip_movies').first_item 33 | assert res['count'] == 248 34 | assert res['year'] == 2022 35 | -------------------------------------------------------------------------------- /tests/integration_tests/test_session_id.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | 3 | from typing import Callable 4 | 5 | import pytest 6 | 7 | from clickhouse_connect.driver import create_async_client 8 | from tests.integration_tests.conftest import TestConfig 9 | 10 | SESSION_KEY = 'session_id' 11 | 12 | 13 | def test_client_default_session_id(test_create_client: Callable): 14 | # by default, the sync client will autogenerate the session id 15 | client = test_create_client() 16 | session_id = client.get_client_setting(SESSION_KEY) 17 | try: 18 | uuid.UUID(session_id) 19 | except ValueError: 20 | pytest.fail(f"Invalid session_id: {session_id}") 21 | client.close() 22 | 23 | 24 | def test_client_autogenerate_session_id(test_create_client: Callable): 25 | client = test_create_client() 26 | session_id = client.get_client_setting(SESSION_KEY) 27 | try: 28 | uuid.UUID(session_id) 29 | except ValueError: 30 | pytest.fail(f"Invalid session_id: {session_id}") 31 | 32 | 33 | def test_client_custom_session_id(test_create_client: Callable): 34 | session_id = 'custom_session_id' 35 | client = test_create_client(session_id=session_id) 36 | assert client.get_client_setting(SESSION_KEY) == session_id 37 | client.close() 38 | 39 | 40 | @pytest.mark.asyncio 41 | async def test_async_client_default_session_id(test_config: TestConfig): 42 | # by default, the async client will NOT autogenerate the session id 43 | async_client = await create_async_client(database=test_config.test_database, 44 | host=test_config.host, 45 | port=test_config.port, 46 | user=test_config.username, 47 | password=test_config.password) 48 | assert async_client.get_client_setting(SESSION_KEY) is None 49 | await async_client.close() 50 | 51 | 52 | @pytest.mark.asyncio 53 | async def test_async_client_autogenerate_session_id(test_config: TestConfig): 54 | async_client = await create_async_client(database=test_config.test_database, 55 | host=test_config.host, 56 | port=test_config.port, 57 | user=test_config.username, 58 | password=test_config.password, 59 | autogenerate_session_id=True) 60 | session_id = async_client.get_client_setting(SESSION_KEY) 61 | try: 62 | uuid.UUID(session_id) 63 | except ValueError: 64 | pytest.fail(f"Invalid session_id: {session_id}") 65 | await async_client.close() 66 | 67 | 68 | @pytest.mark.asyncio 69 | async def test_async_client_custom_session_id(test_config: TestConfig): 70 | session_id = 'custom_session_id' 71 | async_client = await create_async_client(database=test_config.test_database, 72 | host=test_config.host, 73 | port=test_config.port, 74 | user=test_config.username, 75 | password=test_config.password, 76 | session_id=session_id) 77 | assert async_client.get_client_setting(SESSION_KEY) == session_id 78 | await async_client.close() 79 | -------------------------------------------------------------------------------- /tests/integration_tests/test_sqlalchemy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/integration_tests/test_sqlalchemy/__init__.py -------------------------------------------------------------------------------- /tests/integration_tests/test_sqlalchemy/conftest.py: -------------------------------------------------------------------------------- 1 | from typing import Iterator 2 | from pytest import fixture 3 | 4 | from sqlalchemy.engine import create_engine 5 | from sqlalchemy.engine.base import Engine 6 | 7 | from tests.integration_tests.conftest import TestConfig 8 | 9 | 10 | @fixture(scope='module', name='test_engine') 11 | def test_engine_fixture(test_config: TestConfig) -> Iterator[Engine]: 12 | test_engine: Engine = create_engine( 13 | f'clickhousedb://{test_config.username}:{test_config.password}@{test_config.host}:' + 14 | f'{test_config.port}/{test_config.test_database}?ch_http_max_field_name_size=99999' + 15 | '&use_skip_indexes=0&ca_cert=certifi&query_limit=2333&compression=zstd' 16 | ) 17 | 18 | yield test_engine 19 | test_engine.dispose() 20 | -------------------------------------------------------------------------------- /tests/integration_tests/test_sqlalchemy/test_basics.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.engine import Engine 2 | 3 | from clickhouse_connect import common 4 | 5 | test_query = """ 6 | -- 6dcd92a04feb50f14bbcf07c661680ba 7 | WITH dummy = 2 8 | SELECT database, name FROM system.tables LIMIT 2 9 | -- 6dcd92a04feb50f14bbcf07c661680ba 10 | """ 11 | 12 | test_query_ver19 = """ 13 | -- 6dcd92a04feb50f14bbcf07c661680ba 14 | SELECT database, name FROM system.tables LIMIT 2 15 | -- 6dcd92a04feb50f14bbcf07c661680ba 16 | """ 17 | 18 | 19 | def test_dsn_config(test_engine: Engine): 20 | common.set_setting('invalid_setting_action', 'drop') 21 | client = test_engine.raw_connection().connection.client 22 | assert client.http.connection_pool_kw['cert_reqs'] == 'CERT_REQUIRED' 23 | assert 'use_skip_indexes' in client.params 24 | assert client.params['http_max_field_name_size'] == '99999' 25 | assert client.query_limit == 2333 26 | assert client.compression == 'zstd' 27 | 28 | 29 | def test_cursor(test_engine: Engine): 30 | common.set_setting('invalid_setting_action', 'drop') 31 | raw_conn = test_engine.raw_connection() 32 | cursor = raw_conn.cursor() 33 | sql = test_query 34 | if not raw_conn.connection.client.min_version('21'): 35 | sql = test_query_ver19 36 | 37 | cursor.execute(sql) 38 | assert cursor.description[0][0] == 'database' 39 | assert cursor.description[1][1] == 'String' 40 | assert len(getattr(cursor, 'data')) == 2 41 | assert cursor.summary[0]["read_rows"] == '2' 42 | raw_conn.close() 43 | 44 | 45 | def test_execute(test_engine: Engine): 46 | common.set_setting('invalid_setting_action', 'drop') 47 | 48 | with test_engine.begin() as conn: 49 | sql = test_query 50 | if not conn.connection.connection.client.min_version('21'): 51 | sql = test_query_ver19 52 | rows = list(row for row in conn.execute(sql)) 53 | assert len(rows) == 2 54 | 55 | rows = list(row for row in conn.execute('DROP TABLE IF EXISTS dummy_table')) 56 | assert len(rows) > 0 # This is just the metadata from the "command" QueryResult 57 | 58 | rows = list(row for row in conn.execute('describe TABLE system.columns')) 59 | assert len(rows) > 5 60 | -------------------------------------------------------------------------------- /tests/integration_tests/test_sqlalchemy/test_ddl.py: -------------------------------------------------------------------------------- 1 | from enum import Enum as PyEnum 2 | 3 | import sqlalchemy as db 4 | from sqlalchemy import MetaData 5 | 6 | from sqlalchemy.engine.base import Engine 7 | from sqlalchemy.ext.declarative import declarative_base 8 | 9 | from tests.integration_tests.conftest import TestConfig 10 | from clickhouse_connect import common 11 | from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import Int8, UInt16, Decimal, Enum16, Float64, Boolean, \ 12 | FixedString, String, UInt64, UUID, DateTime, DateTime64, LowCardinality, Nullable, Array, AggregateFunction, \ 13 | UInt32, IPv4 14 | from clickhouse_connect.cc_sqlalchemy.ddl.custom import CreateDatabase, DropDatabase 15 | from clickhouse_connect.cc_sqlalchemy.ddl.tableengine import engine_map 16 | 17 | 18 | def test_create_database(test_engine: Engine, test_config: TestConfig, test_db: str): 19 | if test_db: 20 | common.set_setting('invalid_setting_action', 'drop') 21 | conn = test_engine.connect() 22 | create_db = f'create_db_{test_db}' 23 | if not test_engine.dialect.has_database(conn, create_db): 24 | if test_config.host == 'localhost' and conn.connection.connection.client.min_version('20'): 25 | conn.execute(CreateDatabase(create_db, 'Atomic')) 26 | else: 27 | conn.execute(CreateDatabase(create_db)) 28 | conn.execute(DropDatabase(create_db)) 29 | 30 | 31 | class ColorEnum(PyEnum): 32 | RED = 1 33 | BLUE = 2 34 | TEAL = -4 35 | COBALT = 877 36 | 37 | 38 | def test_create_table(test_engine: Engine, test_db: str, test_table_engine: str): 39 | common.set_setting('invalid_setting_action', 'drop') 40 | conn = test_engine.connect() 41 | table_cls = engine_map[test_table_engine] 42 | metadata = db.MetaData(bind=test_engine, schema=test_db) 43 | conn.execute('DROP TABLE IF EXISTS simple_table_test') 44 | bool_type = Boolean 45 | date_tz64_type = DateTime64(3, 'Europe/Moscow') 46 | if not conn.connection.connection.client.min_version('20'): 47 | bool_type = Int8 48 | date_tz64_type = DateTime('Europe/Moscow') 49 | table = db.Table('simple_table_test', metadata, 50 | db.Column('key_col', Int8), 51 | db.Column('uint_col', UInt16), 52 | db.Column('dec_col', Decimal(38, 5)), # Decimal128(5) 53 | db.Column('enum_col', Enum16(ColorEnum)), 54 | db.Column('float_col', Float64), 55 | db.Column('str_col', String), 56 | db.Column('fstr_col', FixedString(17)), 57 | db.Column('bool_col', bool_type), 58 | table_cls(('key_col', 'uint_col'), primary_key='key_col')) 59 | table.create(conn) 60 | conn.execute('DROP TABLE IF EXISTS advanced_table_test') 61 | table = db.Table('advanced_table_test', metadata, 62 | db.Column('key_col', UInt64), 63 | db.Column('uuid_col', UUID), 64 | db.Column('dt_col', DateTime), 65 | db.Column('ip_col', IPv4), 66 | db.Column('dt64_col', date_tz64_type), 67 | db.Column('lc_col', LowCardinality(FixedString(16))), 68 | db.Column('lc_date_col', LowCardinality(Nullable(String))), 69 | db.Column('null_dt_col', Nullable(DateTime('America/Denver'))), 70 | db.Column('arr_col', Array(UUID)), 71 | db.Column('agg_col', AggregateFunction('uniq', LowCardinality(String))), 72 | table_cls('key_col')) 73 | table.create(conn) 74 | 75 | 76 | def test_declarative(test_engine: Engine, test_db: str, test_table_engine: str): 77 | common.set_setting('invalid_setting_action', 'drop') 78 | conn = test_engine.connect() 79 | conn.execute('DROP TABLE IF EXISTS users_test') 80 | table_cls = engine_map[test_table_engine] 81 | base_cls = declarative_base(metadata=MetaData(schema=test_db)) 82 | 83 | class User(base_cls): 84 | __tablename__ = 'users_test' 85 | __table_args__ = (table_cls(order_by=['id', 'name']),) 86 | id = db.Column(UInt32, primary_key=True) 87 | name = db.Column(String) 88 | fullname = db.Column(String) 89 | nickname = db.Column(String) 90 | 91 | base_cls.metadata.create_all(test_engine) 92 | user = User(name='Alice') 93 | assert user.name == 'Alice' 94 | -------------------------------------------------------------------------------- /tests/integration_tests/test_sqlalchemy/test_inserts.py: -------------------------------------------------------------------------------- 1 | from pytest import fixture 2 | 3 | import sqlalchemy as db 4 | from sqlalchemy import MetaData 5 | from sqlalchemy.engine import Engine 6 | from sqlalchemy.ext.declarative import declarative_base 7 | from sqlalchemy.orm import Session 8 | 9 | from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import LowCardinality, String, UInt64 10 | from clickhouse_connect.cc_sqlalchemy.ddl.tableengine import engine_map 11 | from clickhouse_connect.driver import Client 12 | 13 | 14 | @fixture(scope='module', autouse=True, name='test_model') 15 | def test_model_fixture(test_client: Client, test_engine: Engine, test_db: str, test_table_engine: str): 16 | if not test_client.min_version('22.6.1'): 17 | yield None 18 | return 19 | engine_cls = engine_map[test_table_engine] 20 | 21 | Base = declarative_base(metadata=MetaData(schema=test_db)) # pylint: disable=invalid-name 22 | 23 | class Model(Base): 24 | __tablename__ = 'insert_model' 25 | __table_args__ = (engine_cls(order_by=['test_name', 'value_1']),) 26 | test_name = db.Column(LowCardinality(String), primary_key=True) 27 | value_1 = db.Column(String) 28 | metric_2 = db.Column(UInt64) 29 | description = db.Column(String) 30 | 31 | test_engine.execute('DROP TABLE IF EXISTS insert_model') 32 | Base.metadata.create_all(test_engine) 33 | yield Model 34 | 35 | 36 | def test_single_insert(test_engine: Engine, test_model): 37 | conn = test_engine.connect() 38 | conn.execute(db.insert(test_model).values(test_name='single_insert', 39 | value_1='v1', 40 | metric_2=25738, 41 | description='Single Desc')) 42 | conn.execute(db.insert(test_model), {'test_name': 'another_single_insert'}) 43 | 44 | 45 | def test_multiple_insert(test_engine: Engine, test_model): 46 | session = Session(test_engine) 47 | model_1 = test_model(test_name='multi_1', 48 | value_1='v1', 49 | metric_2=100, 50 | description='First of Many') 51 | model_2 = test_model(test_name='multi_2', 52 | value_1='v2', 53 | metric_2=100, 54 | description='Second of Many') 55 | model_3 = test_model(value_1='v7', 56 | metric_2=77, 57 | description='Third of Many', 58 | test_name='odd_one') 59 | session.add(model_1) 60 | session.add(model_2) 61 | session.add(model_3) 62 | session.commit() 63 | 64 | 65 | def test_bulk_insert(test_engine: Engine, test_model): 66 | session = Session(test_engine) 67 | model_1 = test_model(test_name='bulk_1', 68 | value_1='v1', 69 | metric_2=100, 70 | description='First of Bulk') 71 | model_2 = test_model(test_name='bulk_2', 72 | value_1='v2', 73 | metric_2=100, 74 | description='Second of Bulk') 75 | model_3 = test_model(value_1='vb78', 76 | metric_2=528, 77 | description='Third of Bulk', 78 | test_name='bulk') 79 | session.bulk_save_objects([model_1, model_2, model_3]) 80 | session.commit() 81 | -------------------------------------------------------------------------------- /tests/integration_tests/test_sqlalchemy/test_reflect.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-member 2 | import sqlalchemy as db 3 | from sqlalchemy.engine import Engine 4 | 5 | from clickhouse_connect import common 6 | from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import UInt32, SimpleAggregateFunction, Point 7 | 8 | 9 | def test_basic_reflection(test_engine: Engine): 10 | common.set_setting('invalid_setting_action', 'drop') 11 | conn = test_engine.connect() 12 | metadata = db.MetaData(bind=test_engine, schema='system') 13 | table = db.Table('tables', metadata, autoload_with=test_engine) 14 | query = db.select([table.columns.create_table_query]) 15 | result = conn.execute(query) 16 | rows = result.fetchmany(100) 17 | assert rows 18 | 19 | 20 | def test_full_table_reflection(test_engine: Engine, test_db: str): 21 | common.set_setting('invalid_setting_action', 'drop') 22 | conn = test_engine.connect() 23 | conn.execute(f'DROP TABLE IF EXISTS {test_db}.reflect_test') 24 | conn.execute( 25 | f'CREATE TABLE {test_db}.reflect_test (key UInt32, value FixedString(20),'+ 26 | 'agg SimpleAggregateFunction(anyLast, String))' + 27 | 'ENGINE AggregatingMergeTree ORDER BY key') 28 | metadata = db.MetaData(bind=test_engine, schema=test_db) 29 | table = db.Table('reflect_test', metadata, autoload_with=test_engine) 30 | assert table.columns.key.type.__class__ == UInt32 31 | assert table.columns.agg.type.__class__ == SimpleAggregateFunction 32 | assert 'MergeTree' in table.engine.name 33 | 34 | 35 | def test_types_reflection(test_engine: Engine, test_db: str): 36 | common.set_setting('invalid_setting_action', 'drop') 37 | conn = test_engine.connect() 38 | conn.execute(f'DROP TABLE IF EXISTS {test_db}.sqlalchemy_types_test') 39 | conn.execute( 40 | f'CREATE TABLE {test_db}.sqlalchemy_types_test (key UInt32, pt Point) ' + 41 | 'ENGINE MergeTree ORDER BY key') 42 | metadata = db.MetaData(bind=test_engine, schema=test_db) 43 | table = db.Table('sqlalchemy_types_test', metadata, autoload_with=test_engine) 44 | assert table.columns.key.type.__class__ == UInt32 45 | assert table.columns.pt.type.__class__ == Point 46 | assert 'MergeTree' in table.engine.name 47 | 48 | 49 | def test_table_exists(test_engine: Engine): 50 | common.set_setting('invalid_setting_action', 'drop') 51 | conn = test_engine.connect() 52 | assert test_engine.dialect.has_table(conn, 'columns', 'system') 53 | assert not test_engine.dialect.has_table(conn, 'nope', 'fake_db') 54 | -------------------------------------------------------------------------------- /tests/integration_tests/test_streaming.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | 4 | from clickhouse_connect.driver import Client 5 | from clickhouse_connect.driver.exceptions import StreamClosedError, ProgrammingError, StreamFailureError 6 | 7 | 8 | def test_row_stream(test_client: Client): 9 | row_stream = test_client.query_rows_stream('SELECT number FROM numbers(10000)') 10 | total = 0 11 | with row_stream: 12 | for row in row_stream: 13 | total += row[0] 14 | try: 15 | with row_stream: 16 | pass 17 | except StreamClosedError: 18 | pass 19 | assert total == 49995000 20 | 21 | 22 | def test_column_block_stream(test_client: Client): 23 | random_string = 'randomStringUTF8(50)' 24 | if not test_client.min_version('20'): 25 | random_string = random.choices(string.ascii_lowercase, k=50) 26 | block_stream = test_client.query_column_block_stream(f'SELECT number, {random_string} FROM numbers(10000)', 27 | settings={'max_block_size': 4000}) 28 | total = 0 29 | block_count = 0 30 | with block_stream: 31 | for block in block_stream: 32 | block_count += 1 33 | total += sum(block[0]) 34 | assert total == 49995000 35 | assert block_count > 1 36 | 37 | 38 | def test_row_block_stream(test_client: Client): 39 | random_string = 'randomStringUTF8(50)' 40 | if not test_client.min_version('20'): 41 | random_string = random.choices(string.ascii_lowercase, k=50) 42 | block_stream = test_client.query_row_block_stream(f'SELECT number, {random_string} FROM numbers(10000)', 43 | settings={'max_block_size': 4000}) 44 | total = 0 45 | block_count = 0 46 | with block_stream: 47 | for block in block_stream: 48 | block_count += 1 49 | for row in block: 50 | total += row[0] 51 | assert total == 49995000 52 | assert block_count > 1 53 | 54 | 55 | def test_stream_errors(test_client: Client): 56 | query_result = test_client.query('SELECT number FROM numbers(100000)') 57 | try: 58 | for _ in query_result.row_block_stream: 59 | pass 60 | except ProgrammingError as ex: 61 | assert 'context' in str(ex) 62 | assert query_result.row_count == 100000 63 | try: 64 | with query_result.rows_stream as stream: 65 | assert sum(row[0] for row in stream) == 3882 66 | except StreamClosedError: 67 | pass 68 | 69 | 70 | def test_stream_failure(test_client: Client): 71 | with test_client.query_row_block_stream('SELECT toString(cityHash64(number)) FROM numbers(10000000)' + 72 | ' where intDiv(1,number-300000)>-100000000') as stream: 73 | blocks = 0 74 | failed = False 75 | try: 76 | for _ in stream: 77 | blocks += 1 78 | except StreamFailureError as ex: 79 | failed = True 80 | assert 'division by zero' in str(ex).lower() 81 | assert failed 82 | -------------------------------------------------------------------------------- /tests/integration_tests/test_tls.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from urllib3.exceptions import SSLError 5 | 6 | from clickhouse_connect import get_client 7 | from clickhouse_connect.driver.common import coerce_bool 8 | from clickhouse_connect.driver.exceptions import OperationalError 9 | from tests.helpers import PROJECT_ROOT_DIR 10 | 11 | # See .docker/clickhouse/single_node_tls for the server configuration 12 | cert_dir = f'{PROJECT_ROOT_DIR}/.docker/clickhouse/single_node_tls/certificates/' 13 | host = 'server1.clickhouse.test' 14 | 15 | 16 | def test_basic_tls(): 17 | if not coerce_bool(os.environ.get('CLICKHOUSE_CONNECT_TEST_TLS', 'False')): 18 | pytest.skip('TLS tests not enabled') 19 | client = get_client(interface='https', host=host, port=10843, verify=False) 20 | assert client.command("SELECT 'insecure'") == 'insecure' 21 | client.close_connections() 22 | 23 | client = get_client(interface='https', host=host, port=10843, ca_cert=f'{cert_dir}ca.crt') 24 | assert client.command("SELECT 'verify_server'") == 'verify_server' 25 | client.close_connections() 26 | 27 | try: 28 | get_client(interface='https', host='localhost', port=10843, ca_cert=f'{cert_dir}ca.crt') 29 | pytest.fail('Expected TLS exception with a different hostname') 30 | except OperationalError as ex: 31 | assert isinstance(ex.__cause__.reason, SSLError) # pylint: disable=no-member 32 | client.close_connections() 33 | 34 | try: 35 | get_client(interface='https', host='localhost', port=10843) 36 | pytest.fail('Expected TLS exception with a self-signed cert') 37 | except OperationalError as ex: 38 | assert isinstance(ex.__cause__.reason, SSLError) # pylint: disable=no-member 39 | 40 | 41 | def test_mutual_tls(): 42 | if not coerce_bool(os.environ.get('CLICKHOUSE_CONNECT_TEST_TLS', 'False')): 43 | pytest.skip('TLS tests not enabled') 44 | client = get_client(interface='https', 45 | username='cert_user', 46 | host=host, 47 | port=10843, 48 | ca_cert=f'{cert_dir}ca.crt', 49 | client_cert=f'{cert_dir}client.crt', 50 | client_cert_key=f'{cert_dir}client.key') 51 | assert client.command('SELECT user()') == 'cert_user' 52 | -------------------------------------------------------------------------------- /tests/integration_tests/test_tools.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Callable 3 | 4 | from clickhouse_connect.driver import Client 5 | from clickhouse_connect.driver.tools import insert_file 6 | from tests.integration_tests.conftest import TestConfig 7 | 8 | 9 | def test_csv_upload(test_client: Client, table_context: Callable): 10 | data_file = f'{Path(__file__).parent}/movies.csv.gz' 11 | with table_context('test_csv_upload', ['movie String', 'year UInt16', 'rating Decimal32(3)']): 12 | insert_file(test_client, 'test_csv_upload', data_file, 13 | settings={'input_format_allow_errors_ratio': .2, 14 | 'input_format_allow_errors_num': 5}) 15 | res = test_client.query( 16 | 'SELECT count() as count, sum(rating) as rating, max(year) as year FROM test_csv_upload').first_item 17 | assert res['count'] == 248 18 | assert res['year'] == 2022 19 | 20 | 21 | def test_parquet_upload(test_config: TestConfig, test_client: Client, table_context: Callable): 22 | data_file = f'{Path(__file__).parent}/movies.parquet' 23 | full_table = f'{test_config.test_database}.test_parquet_upload' 24 | with table_context(full_table, ['movie String', 'year UInt16', 'rating Float64']): 25 | insert_file(test_client, full_table, data_file, 'Parquet', 26 | settings={'output_format_parquet_string_as_string': 1}) 27 | res = test_client.query( 28 | f'SELECT count() as count, sum(rating) as rating, max(year) as year FROM {full_table}').first_item 29 | assert res['count'] == 250 30 | assert res['year'] == 2022 31 | 32 | 33 | def test_json_insert(test_client: Client, table_context: Callable): 34 | data_file = f'{Path(__file__).parent}/json_test.ndjson' 35 | with table_context('test_json_upload', ['key UInt16', 'flt_val Float64', 'int_val Int8']): 36 | insert_file(test_client, 'test_json_upload', data_file, 'JSONEachRow') 37 | res = test_client.query('SELECT * FROM test_json_upload ORDER BY key').result_rows 38 | assert res[1][0] == 17 39 | assert res[1][1] == 5.3 40 | assert res[1][2] == 121 41 | -------------------------------------------------------------------------------- /tests/test_requirements.txt: -------------------------------------------------------------------------------- 1 | pytz 2 | urllib3>=1.26 3 | setuptools 4 | certifi 5 | sqlalchemy>1.3.21,<2.0 6 | cython==3.0.11 7 | pyarrow 8 | pytest 9 | pytest-asyncio 10 | pytest-mock 11 | pytest-dotenv 12 | pytest-cov 13 | numpy~=1.22.0; python_version >= '3.8' and python_version <= '3.10' 14 | numpy~=1.26.0; python_version >= '3.11' and python_version <= '3.12' 15 | numpy~=2.1.0; python_version >= '3.13' 16 | pandas 17 | zstandard 18 | lz4 19 | pyjwt[crypto]==2.10.1 20 | -------------------------------------------------------------------------------- /tests/timings.py: -------------------------------------------------------------------------------- 1 | 2 | import array 3 | from datetime import datetime 4 | 5 | b = bytearray() 6 | 7 | start = datetime.now() 8 | for x in range(10000): 9 | b = bytearray() 10 | a = array.array('H', list(range(5000))) 11 | b += a 12 | print (str(len(b)) + ' ' + str(datetime.now() - start)) 13 | 14 | 15 | start = datetime.now() 16 | 17 | for x in range(10000): 18 | b = bytearray() 19 | for y in range(5000): 20 | b.extend(y.to_bytes(2, 'little')) 21 | print (str(len(b)) + ' ' + str(datetime.now() - start)) 22 | -------------------------------------------------------------------------------- /tests/unit_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/unit_tests/__init__.py -------------------------------------------------------------------------------- /tests/unit_tests/test_chtypes.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=protected-access 2 | from clickhouse_connect.datatypes.container import Nested 3 | from clickhouse_connect.datatypes.registry import get_from_name as gfn 4 | 5 | 6 | def test_enum_parse(): 7 | enum_type = gfn("Enum8('OZC|8;' = -125, '6MQ4v-t' = -114, 'As7]sEg\\'' = 40, 'v~l$PR5' = 84)") 8 | assert 'OZC|8;' in enum_type._name_map 9 | enum_type = gfn('Enum8(\'\\\'"2Af\' = 93,\'KG;+\\\' = -114,\'j0\' = -40)') 10 | assert '\'"2Af' in enum_type._name_map 11 | enum_type = gfn("Enum8('value1' = 7, 'value2'=5)") 12 | assert enum_type.name == "Enum8('value2' = 5, 'value1' = 7)" 13 | assert 7 in enum_type._int_map 14 | assert 5 in enum_type._int_map 15 | enum_type = gfn(r"Enum16('beta&&' = -3, '' = 0, 'alpha\'' = 3822)") 16 | assert r"alpha'" == enum_type._int_map[3822] 17 | assert -3 == enum_type._name_map['beta&&'] 18 | 19 | 20 | def test_names(): 21 | array_type = gfn('Array(Nullable(FixedString(50)))') 22 | assert array_type.name == 'Array(Nullable(FixedString(50)))' 23 | array_type = gfn( 24 | "Array(Enum8(\'user_name\' = 1, \'ip_address\' = -2, \'forwarded_ip_address\' = 3, \'client_key\' = 4))") 25 | assert array_type.name == ( 26 | "Array(Enum8('ip_address' = -2, 'user_name' = 1, 'forwarded_ip_address' = 3, 'client_key' = 4))") 27 | 28 | 29 | def test_nested_parse(): 30 | nested_type = gfn('Nested(str1 String, int32 UInt32)') 31 | assert nested_type.name == 'Nested(str1 String, int32 UInt32)' 32 | assert isinstance(nested_type, Nested) 33 | nested_type = gfn('Nested(id Int64, data Nested(inner_key String, inner_map Map(String, UUID)))') 34 | assert nested_type.name == 'Nested(id Int64, data Nested(inner_key String, inner_map Map(String, UUID)))' 35 | nest = "key_0 Enum16('[m(X*' = -18773, '_9as' = 11854, '&e$LE' = 27685), key_1 Nullable(Decimal(62, 38))" 36 | nested_name = f'Nested({nest})' 37 | nested_type = gfn(nested_name) 38 | assert nested_type.name == nested_name 39 | 40 | 41 | def test_named_tuple(): 42 | tuple_type = gfn('Tuple(Int64, String)') 43 | assert tuple_type.name == 'Tuple(Int64, String)' 44 | tuple_type = gfn('Tuple(`key` Int64, `value` String)') 45 | assert tuple_type.name == 'Tuple(`key` Int64, `value` String)' 46 | -------------------------------------------------------------------------------- /tests/unit_tests/test_driver/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/unit_tests/test_driver/__init__.py -------------------------------------------------------------------------------- /tests/unit_tests/test_driver/binary.py: -------------------------------------------------------------------------------- 1 | NESTED_BINARY = """ 2 | 0104 066e 6573 7465 6421 4e65 7374 6564 3 | 2873 7472 3120 5374 7269 6e67 2c20 696e 4 | 7433 3220 5549 6e74 3332 2900 0000 0000 5 | 0000 0002 0000 0000 0000 0004 0000 0000 6 | 0000 0006 0000 0000 0000 0005 7468 7265 7 | 6504 6669 7665 036f 6e65 0374 776f 036f 8 | 6e65 0374 776f 0500 0000 4d00 0000 0500 9 | 0000 3700 0000 0500 0000 3700 0000 10 | """ 11 | -------------------------------------------------------------------------------- /tests/unit_tests/test_driver/test_buffer.py: -------------------------------------------------------------------------------- 1 | from clickhouse_connect.driver.buffer import ResponseBuffer as PyResponseBuffer 2 | from clickhouse_connect.driver.exceptions import StreamCompleteException 3 | from clickhouse_connect.driverc.buffer import ResponseBuffer as CResponseBuffer # pylint: disable=no-name-in-module 4 | from tests.helpers import bytes_source, to_bytes 5 | 6 | 7 | def test_read_ints(): 8 | for cls in CResponseBuffer, PyResponseBuffer: 9 | buff = bytes_source('05 20 00 00 00 00 00 00 68 10 83 03 77', cls=cls) 10 | assert buff.read_uint64() == 8197 11 | assert buff.read_leb128() == 104 12 | assert buff.read_leb128() == 16 13 | assert buff.read_leb128() == 387 14 | assert buff.read_byte() == 0x77 15 | try: 16 | buff.read_byte() 17 | except StreamCompleteException: 18 | pass 19 | 20 | 21 | def test_read_strings(): 22 | for cls in CResponseBuffer, PyResponseBuffer: 23 | buff = bytes_source('04 43 44 4d 41', cls=cls) 24 | assert buff.read_leb128_str() == 'CDMA' 25 | try: 26 | buff.read_str_col(2, 'utf8') 27 | except StreamCompleteException: 28 | pass 29 | 30 | 31 | def test_read_bytes(): 32 | for cls in CResponseBuffer, PyResponseBuffer, : 33 | buff = bytes_source('04 43 44 4d 41 22 44 66 88 AA', cls=cls) 34 | buff.read_byte() 35 | assert buff.read_bytes(5) == to_bytes('43 44 4d 41 22') 36 | try: 37 | buff.read_bytes(10) 38 | except StreamCompleteException: 39 | pass 40 | -------------------------------------------------------------------------------- /tests/unit_tests/test_driver/test_data.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from clickhouse_connect.driver.dataconv import epoch_days_to_date as py_date, pivot as py_pivot 3 | # pylint: disable=no-name-in-module 4 | from clickhouse_connect.driverc.dataconv import epoch_days_to_date as c_date, pivot as c_pivot 5 | 6 | 7 | def test_date_conv(): 8 | for date_func in (c_date, py_date,): 9 | assert date_func(11322) == date(2000, 12, 31) 10 | assert date_func(47579) == date(2100, 4, 8) 11 | assert date_func(0) == date(1970, 1, 1) 12 | assert date_func(364) == date(1970, 12, 31) 13 | assert date_func(365) == date(1971, 1, 1) 14 | assert date_func(500) == date(1971, 5, 16) 15 | assert date_func(729) == date(1971, 12, 31) 16 | assert date_func(730) == date(1972, 1, 1) 17 | assert date_func(1096) == date(1973, 1, 1) 18 | assert date_func(2250) == date(1976, 2, 29) 19 | assert date_func(10957) == date(2000, 1, 1) 20 | assert date_func(11323) == date(2001, 1, 1) 21 | assert date_func(15941) == date(2013, 8, 24) 22 | assert date_func(12477) == date(2004, 2, 29) 23 | assert date_func(12478) == date(2004, 3, 1) 24 | assert date_func(12783) == date(2004, 12, 31) 25 | assert date_func(13148) == date(2005, 12, 31) 26 | assert date_func(19378) == date(2023, 1, 21) 27 | assert date_func(19378) == date(2023, 1, 21) 28 | assert date_func(47847) == date(2101, 1, 1) 29 | assert date_func(54727) == date(2119, 11, 3) 30 | assert date_func(-18165) == date(1920, 4, 8) 31 | 32 | 33 | def test_pivot(): 34 | data = [[1, 2, 3], [4, 5, 6]] 35 | for pivot in (c_pivot, py_pivot): 36 | result = pivot(data, 0, 2) 37 | assert result == ((1, 4), (2, 5), (3, 6)) 38 | -------------------------------------------------------------------------------- /tests/unit_tests/test_driver/test_formats.py: -------------------------------------------------------------------------------- 1 | from clickhouse_connect.datatypes.format import set_default_formats, set_write_format 2 | from clickhouse_connect.datatypes.network import IPv6 3 | from clickhouse_connect.datatypes.numeric import Int32 4 | from clickhouse_connect.datatypes.string import FixedString 5 | from clickhouse_connect.driver.context import BaseQueryContext 6 | from clickhouse_connect.driver.query import QueryContext 7 | 8 | 9 | def test_default_formats(): 10 | ctx = QueryContext() 11 | set_default_formats('Int32', 'string', 'IP*', 'string') 12 | assert IPv6.read_format(ctx) == 'string' 13 | assert Int32.read_format(ctx) == 'string' 14 | assert FixedString.read_format(ctx) == 'native' 15 | 16 | 17 | def test_fixed_str_format(): 18 | set_write_format('FixedString', 'string') 19 | assert FixedString.write_format(BaseQueryContext()) == 'string' 20 | -------------------------------------------------------------------------------- /tests/unit_tests/test_driver/test_insert.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from clickhouse_connect.datatypes.registry import get_from_name 4 | 5 | from clickhouse_connect.driver.insert import InsertContext 6 | from clickhouse_connect.tools.datagen import fixed_len_ascii_str 7 | 8 | 9 | def test_block_size(): 10 | data = [(1, (datetime.date(2020, 5, 2), datetime.datetime(2020, 5, 2, 10, 5, 2)))] 11 | ctx = InsertContext('fake_table', 12 | ['key', 'date_tuple'], 13 | [get_from_name('UInt64'), get_from_name('Tuple(Date, DateTime)')], 14 | data) 15 | assert ctx.block_row_count == 262144 16 | 17 | data = [(x, fixed_len_ascii_str(400)) for x in range(5000)] 18 | ctx = InsertContext('fake_table', 19 | ['key', 'big_str'], 20 | [get_from_name('Int32'), get_from_name('String')], 21 | data) 22 | assert ctx.block_row_count == 8192 23 | -------------------------------------------------------------------------------- /tests/unit_tests/test_driver/test_native_fuzz.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | from clickhouse_connect.datatypes.registry import get_from_name 5 | from clickhouse_connect.driver.common import coerce_bool 6 | from clickhouse_connect.driver.buffer import ResponseBuffer as PyBuff 7 | from clickhouse_connect.driverc.buffer import ResponseBuffer as CBuff # pylint: disable=no-name-in-module 8 | from tests.helpers import random_columns, random_data, native_transform, native_insert_block, bytes_source 9 | 10 | TEST_COLUMNS = 12 11 | MAX_DATA_ROWS = 100 12 | 13 | use_c = coerce_bool(os.environ.get('CLICKHOUSE_CONNECT_USE_C', True)) 14 | BuffCls = CBuff if use_c else PyBuff 15 | 16 | 17 | # pylint: disable=duplicate-code 18 | def test_native_round_trips(): 19 | test_runs = int(os.environ.get('CLICKHOUSE_CONNECT_TEST_FUZZ', '200')) 20 | 21 | for _ in range(test_runs): 22 | data_rows = random.randint(1, MAX_DATA_ROWS) 23 | col_names, col_types = random_columns(TEST_COLUMNS) 24 | data = random_data(col_types, data_rows) 25 | col_names = ('row_id',) + col_names 26 | col_types = (get_from_name('UInt32'),) + col_types 27 | assert len(data) == data_rows 28 | output = native_insert_block(data, column_names=col_names, column_types=col_types) 29 | data_result = native_transform.parse_response(bytes_source(output, cls=BuffCls)) 30 | assert data_result.column_names == col_names 31 | assert data_result.column_types == col_types 32 | dataset = data_result.result_set 33 | for row in range(data_rows): 34 | for col in range(TEST_COLUMNS): 35 | assert dataset[row][col] == data[row][col] 36 | 37 | 38 | def test_native_small(): 39 | test_runs = int(os.environ.get('CLICKHOUSE_CONNECT_TEST_FUZZ', '200')) 40 | for _ in range(test_runs): 41 | col_names, col_types = random_columns(1) 42 | data = random_data(col_types, 2) 43 | col_names = ('row_id',) + col_names 44 | col_types = (get_from_name('UInt32'),) + col_types 45 | output = native_insert_block(data, column_names=col_names, column_types=col_types) 46 | data_result = native_transform.parse_response(bytes_source(output, cls=BuffCls)) 47 | assert data_result.column_names == col_names 48 | assert data_result.column_types == col_types 49 | assert data_result.result_set == data 50 | -------------------------------------------------------------------------------- /tests/unit_tests/test_driver/test_native_read.py: -------------------------------------------------------------------------------- 1 | from ipaddress import IPv4Address 2 | from uuid import UUID 3 | 4 | from clickhouse_connect.datatypes import registry 5 | from clickhouse_connect.driver.insert import InsertContext 6 | from clickhouse_connect.driver.query import QueryContext 7 | from clickhouse_connect.driver.transform import NativeTransform 8 | from tests.helpers import bytes_source 9 | from tests.unit_tests.test_driver.binary import NESTED_BINARY 10 | 11 | UINT16_NULLS = """ 12 | 0104 0969 6e74 5f76 616c 7565 104e 756c 13 | 6c61 626c 6528 5549 6e74 3136 2901 0001 14 | 0000 0014 0000 0028 00 15 | """ 16 | 17 | LOW_CARDINALITY = """ 18 | 0102 026c 6316 4c6f 7743 6172 6469 6e61 19 | 6c69 7479 2853 7472 696e 6729 0100 0000 20 | 0000 0000 0006 0000 0000 0000 0300 0000 21 | 0000 0000 0004 4344 4d41 0347 534d 0200 22 | 0000 0000 0000 0102 0101 026c 6316 4c6f 23 | 7743 6172 6469 6e61 6c69 7479 2853 7472 24 | 696e 6729 0100 0000 0000 0000 0006 0000 25 | 0000 0000 0200 0000 0000 0000 0004 554d 26 | 5453 0100 0000 0000 0000 01 27 | """ 28 | 29 | LOW_CARD_ARRAY = """ 30 | 0102 066c 6162 656c 731d 4172 7261 7928 31 | 4c6f 7743 6172 6469 6e61 6c69 7479 2853 32 | 7472 696e 6729 2901 0000 0000 0000 0000 33 | 0000 0000 0000 0000 0000 0000 0000 00 34 | """ 35 | 36 | SIMPLE_MAP = """ 37 | 0101 066e 6e5f 6d61 7013 4d61 7028 5374 38 | 7269 6e67 2c20 5374 7269 6e67 2902 0000 39 | 0000 0000 0004 6b65 7931 046b 6579 3206 40 | 7661 6c75 6531 0676 616c 7565 32 41 | """ 42 | 43 | LOW_CARD_MAP = """ 44 | 0102 086d 6170 5f6e 756c 6c2b 4d61 7028 45 | 4c6f 7743 6172 6469 6e61 6c69 7479 2853 46 | 7472 696e 6729 2c20 4e75 6c6c 6162 6c65 47 | 2855 5549 4429 2901 0000 0000 0000 0002 48 | 0000 0000 0000 0004 0000 0000 0000 0000 49 | 0600 0000 0000 0003 0000 0000 0000 0000 50 | 0469 676f 7206 6765 6f72 6765 0400 0000 51 | 0000 0000 0102 0102 0100 0000 0000 0000 52 | 0000 0000 0000 0000 0000 0000 235f 7dc5 53 | 799f 431d a9e1 93ca ccff c652 235f 7dc5 54 | 799f 437f a9e1 93ca ccff 0052 235f 7dc5 55 | 799f 431d a9e1 93ca ccff c652 56 | """ 57 | 58 | 59 | parse_response = NativeTransform().parse_response 60 | 61 | 62 | def check_result(result, expected, row_num=0, col_num=0): 63 | result_set = result.result_set 64 | row = result_set[row_num] 65 | value = row[col_num] 66 | assert value == expected 67 | 68 | 69 | def test_uint16_nulls(): 70 | result = parse_response(bytes_source(UINT16_NULLS)) 71 | assert result.result_set == [(None,), (20,), (None,), (40,)] 72 | 73 | 74 | def test_low_cardinality(): 75 | result = parse_response(bytes_source(LOW_CARDINALITY)) 76 | assert result.result_set == [('CDMA',), ('GSM',), ('UMTS',)] 77 | 78 | 79 | def test_low_card_array(): 80 | result = parse_response(bytes_source(LOW_CARD_ARRAY)) 81 | assert result.first_row == ([],), ([],) 82 | 83 | 84 | def test_map(): 85 | result = parse_response(bytes_source(SIMPLE_MAP)) 86 | check_result(result, {'key1': 'value1', 'key2': 'value2'}) 87 | result = parse_response(bytes_source(LOW_CARD_MAP)) 88 | check_result(result, {'george': UUID('1d439f79-c57d-5f23-52c6-ffccca93e1a9'), 'igor': None}) 89 | 90 | 91 | def test_ip(): 92 | ips = ['192.168.5.3', '202.44.8.25', '0.0.2.2'] 93 | ipv4_type = registry.get_from_name('IPv4') 94 | dest = bytearray() 95 | ipv4_type.write_column(ips, dest, InsertContext('', [], [])) 96 | python = ipv4_type.read_column_data(bytes_source(bytes(dest)), 3, QueryContext(), None) 97 | assert tuple(python) == tuple(IPv4Address(ip) for ip in ips) 98 | 99 | 100 | def test_point(): 101 | points = ((3.22, 3.22),(5.22, 5.22),(4.22, 4.22)) 102 | point_type = registry.get_from_name('Point') 103 | dest = bytearray() 104 | point_type.write_column(points, dest, InsertContext('', [], [])) 105 | python = point_type.read_column_data(bytes_source(bytes(dest)), 3, QueryContext(), [None, None]) 106 | assert tuple(python) == tuple(point for point in points) 107 | 108 | 109 | def test_nested(): 110 | result = parse_response (bytes_source(NESTED_BINARY)) 111 | check_result(result, [{'str1': 'one', 'int32': 5}, {'str1': 'two', 'int32': 55}], 2, 0) 112 | -------------------------------------------------------------------------------- /tests/unit_tests/test_driver/test_params.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, date 2 | 3 | import pytest 4 | 5 | from clickhouse_connect.driver.binding import finalize_query, format_bind_value 6 | 7 | 8 | def test_finalize(): 9 | hash_id = '0x772' 10 | timestamp = datetime.fromtimestamp(1661447719) 11 | parameters = {'hash_id': hash_id, 'dt': timestamp} 12 | expected = "SELECT hash_id FROM db.mytable WHERE hash_id = '0x772' AND dt = '2022-08-25 17:15:19'" 13 | query = finalize_query('SELECT hash_id FROM db.mytable WHERE hash_id = %(hash_id)s AND dt = %(dt)s', parameters) 14 | assert query == expected 15 | 16 | parameters = [hash_id, timestamp] 17 | query = finalize_query('SELECT hash_id FROM db.mytable WHERE hash_id = %s AND dt = %s', parameters) 18 | assert query == expected 19 | 20 | 21 | # pylint: disable=inconsistent-quotes 22 | @pytest.mark.parametrize('value, expected', [ 23 | ("a", "a"), 24 | ("a'", r"a\'"), 25 | ("'a'", r"\'a\'"), 26 | ("''a'", r"\'\'a\'"), 27 | ([], "[]"), 28 | ([1], "[1]"), 29 | (["a"], "['a']"), 30 | (["a'"], r"['a\'']"), 31 | ([["a"]], "[['a']]"), 32 | (date(2023, 6, 1), '2023-06-01'), 33 | (datetime(2023, 6, 1, 20, 4, 5), '2023-06-01 20:04:05'), 34 | ([date(2023, 6, 1), date(2023, 8, 5)], "['2023-06-01', '2023-08-05']") 35 | 36 | ]) 37 | def test_format_bind_value(value, expected): 38 | assert format_bind_value(value) == expected 39 | -------------------------------------------------------------------------------- /tests/unit_tests/test_driver/test_parser.py: -------------------------------------------------------------------------------- 1 | from clickhouse_connect.datatypes.registry import get_from_name 2 | from clickhouse_connect.driver.parser import parse_callable, parse_enum 3 | from clickhouse_connect.driver.query import remove_sql_comments 4 | 5 | 6 | def test_parse_callable(): 7 | assert parse_callable('CALLABLE(1, 5)') == ('CALLABLE', (1, 5), '') 8 | assert parse_callable("Enum4('v1' = 5) other stuff") == ('Enum4', ("'v1'= 5",), 'other stuff') 9 | assert parse_callable('BareThing') == ('BareThing', (), '') 10 | assert parse_callable('Tuple(Tuple (String), Int32)') == ('Tuple', ('Tuple(String)', 'Int32'), '') 11 | assert parse_callable("ReplicatedMergeTree('/clickhouse/tables/test', '{replica'}) PARTITION BY key")\ 12 | == ('ReplicatedMergeTree', ("'/clickhouse/tables/test'", "'{replica'}"), 'PARTITION BY key') 13 | 14 | 15 | def test_parse_enum(): 16 | assert parse_enum("Enum8('one' = 1)") == (('one',), (1,)) 17 | assert parse_enum("Enum16('**\\'5' = 5, '578' = 7)") == (("**'5", '578'), (5, 7)) 18 | 19 | 20 | def test_map_type(): 21 | ch_type = get_from_name('Map(String, Decimal(5, 5))') 22 | assert ch_type.name == 'Map(String, Decimal(5, 5))' 23 | 24 | 25 | def test_variant_type(): 26 | ch_type = get_from_name('Variant(UInt64, String, Array(UInt64))') 27 | assert ch_type.name == 'Variant(UInt64, String, Array(UInt64))' 28 | 29 | 30 | def test_json_type(): 31 | names = ['JSON', 32 | 'JSON(max_dynamic_paths=100, a.b UInt32, SKIP `a.e`)', 33 | "JSON(max_dynamic_types = 55, SKIP REGEXP 'a[efg]')", 34 | 'JSON(max_dynamic_types = 33, `a.b` UInt64, b.c String)'] 35 | parsed = ['JSON', 36 | 'JSON(max_dynamic_paths = 100, `a.b` UInt32, SKIP `a.e`)', 37 | "JSON(max_dynamic_types = 55, SKIP REGEXP 'a[efg]')", 38 | 'JSON(max_dynamic_types = 33, `a.b` UInt64, `b.c` String)' 39 | ] 40 | for name, x in zip(names, parsed): 41 | ch_type = get_from_name(name) 42 | assert x == ch_type.name 43 | 44 | 45 | def test_remove_comments(): 46 | sql = """SELECT -- 6dcd92a04feb50f14bbcf07c661680ba 47 | * FROM benchmark_results /*With an inline comment */ WHERE result = 'True' 48 | /* A single line */ 49 | LIMIT 50 | /* A multiline comment 51 | 52 | */ 53 | 2 54 | -- 6dcd92a04feb50f14bbcf07c661680ba 55 | """ 56 | assert remove_sql_comments(sql) == "SELECT \n* FROM benchmark_results WHERE result = 'True'\n\nLIMIT\n\n2\n\n" 57 | -------------------------------------------------------------------------------- /tests/unit_tests/test_driver/test_query.py: -------------------------------------------------------------------------------- 1 | from clickhouse_connect.driver.query import QueryContext 2 | 3 | 4 | def test_copy_context(): 5 | settings = {'max_bytes_for_external_group_by': 1024 * 1024 * 100, 6 | 'read_overflow_mode': 'throw'} 7 | parameters = {'user_id': 'user_1'} 8 | query_formats = {'IPv*': 'string'} 9 | context = QueryContext('SELECT source_ip FROM table WHERE user_id = %(user_id)s', 10 | settings=settings, 11 | parameters=parameters, 12 | query_formats=query_formats, 13 | use_none=True) 14 | assert context.use_none is True 15 | assert context.final_query == "SELECT source_ip FROM table WHERE user_id = 'user_1'" 16 | assert context.query_formats['IPv*'] == 'string' 17 | assert context.settings['max_bytes_for_external_group_by'] == 104857600 18 | 19 | context_copy = context.updated_copy( 20 | settings={'max_bytes_for_external_group_by': 1024 * 1024 * 24, 'max_execution_time': 120}, 21 | parameters={'user_id': 'user_2'} 22 | ) 23 | assert context_copy.settings['read_overflow_mode'] == 'throw' 24 | assert context_copy.settings['max_execution_time'] == 120 25 | assert context_copy.settings['max_bytes_for_external_group_by'] == 25165824 26 | assert context_copy.final_query == "SELECT source_ip FROM table WHERE user_id = 'user_2'" 27 | -------------------------------------------------------------------------------- /tests/unit_tests/test_driver/test_settings.py: -------------------------------------------------------------------------------- 1 | from clickhouse_connect import common 2 | 3 | 4 | def test_setting(): 5 | try: 6 | assert common.get_setting('autogenerate_session_id') 7 | common.set_setting('autogenerate_session_id', False) 8 | assert common.get_setting('autogenerate_session_id') is False 9 | finally: 10 | common.set_setting('autogenerate_session_id', True) 11 | -------------------------------------------------------------------------------- /tests/unit_tests/test_sqlalchemy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/unit_tests/test_sqlalchemy/__init__.py -------------------------------------------------------------------------------- /tests/unit_tests/test_sqlalchemy/test_ddl.py: -------------------------------------------------------------------------------- 1 | import sqlalchemy as db 2 | from sqlalchemy.sql.ddl import CreateTable 3 | 4 | from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import UInt64, UInt32, DateTime 5 | from clickhouse_connect.cc_sqlalchemy.ddl.tableengine import ReplicatedMergeTree, ReplacingMergeTree 6 | from clickhouse_connect.cc_sqlalchemy.dialect import ClickHouseDialect 7 | 8 | dialect = ClickHouseDialect() 9 | 10 | replicated_mt_ddl = """\ 11 | CREATE TABLE `replicated_mt_test` (`key` UInt64) Engine ReplicatedMergeTree('/clickhouse/tables/repl_mt_test',\ 12 | '{replica}') ORDER BY key\ 13 | """ 14 | 15 | replacing_mt_ddl = """\ 16 | CREATE TABLE `replacing_mt_test` (`key` UInt32, `date` DateTime) Engine ReplacingMergeTree(date) ORDER BY key\ 17 | """ 18 | 19 | 20 | def test_table_def(): 21 | metadata = db.MetaData() 22 | 23 | table = db.Table('replicated_mt_test', metadata, db.Column('key', UInt64), 24 | ReplicatedMergeTree(order_by='key', zk_path='/clickhouse/tables/repl_mt_test', 25 | replica='{replica}')) 26 | ddl = str(CreateTable(table).compile('', dialect=dialect)) 27 | assert ddl == replicated_mt_ddl 28 | 29 | table = db.Table('replacing_mt_test', metadata, db.Column('key', UInt32), db.Column('date', DateTime), 30 | ReplacingMergeTree(ver='date', order_by='key')) 31 | 32 | ddl = str(CreateTable(table).compile('', dialect=dialect)) 33 | assert ddl == replacing_mt_ddl 34 | -------------------------------------------------------------------------------- /tests/unit_tests/test_sqlalchemy/test_types.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Integer, DateTime 2 | 3 | from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import Nullable, Int64, DateTime64, LowCardinality, String 4 | from clickhouse_connect.cc_sqlalchemy.datatypes.base import sqla_type_map, sqla_type_from_name 5 | 6 | 7 | def test_mapping(): 8 | assert issubclass(sqla_type_map['UInt64'], Integer) 9 | assert issubclass(sqla_type_map['DateTime'], DateTime) 10 | 11 | 12 | # pylint: disable=protected-access 13 | def test_sqla(): 14 | int16 = sqla_type_from_name('Int16') 15 | assert 'Int16' == int16._compiler_dispatch(None) 16 | enum = sqla_type_from_name("Enum8('value1' = 7, 'value2'=5)") 17 | assert "Enum8('value2' = 5, 'value1' = 7)" == enum._compiler_dispatch(None) 18 | 19 | 20 | # pylint: disable=no-member 21 | def test_nullable(): 22 | nullable = Nullable(Int64) 23 | assert nullable.__class__ == Int64 24 | nullable = Nullable(DateTime64(6)) 25 | assert nullable.__class__ == DateTime64 26 | assert nullable.name == 'Nullable(DateTime64(6))' 27 | 28 | 29 | # pylint: disable=no-member 30 | def test_low_cardinality(): 31 | lc_str = LowCardinality(Nullable(String)) 32 | assert lc_str.__class__ == String 33 | assert lc_str.name == 'LowCardinality(Nullable(String))' 34 | --------------------------------------------------------------------------------