├── .docker
└── clickhouse
│ ├── single_node
│ ├── config.xml
│ └── docker_related_config.xml
│ ├── single_node_tls
│ ├── Dockerfile
│ ├── certificates
│ │ ├── ca.crt
│ │ ├── ca.key
│ │ ├── client.crt
│ │ ├── client.key
│ │ ├── server.crt
│ │ └── server.key
│ ├── config.xml
│ ├── docker_related_config.xml
│ └── users.xml
│ └── users.xml
├── .dockerignore
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── pull_request_template.md
└── workflows
│ ├── clickhouse_ci.yml
│ ├── on_push.yml
│ └── publish.yml
├── .gitignore
├── CHANGELOG.md
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── clickhouse_connect
├── __init__.py
├── __version__.py
├── cc_sqlalchemy
│ ├── __init__.py
│ ├── datatypes
│ │ ├── __init__.py
│ │ ├── base.py
│ │ └── sqltypes.py
│ ├── ddl
│ │ ├── __init__.py
│ │ ├── custom.py
│ │ └── tableengine.py
│ ├── dialect.py
│ ├── inspector.py
│ └── sql
│ │ ├── __init__.py
│ │ ├── ddlcompiler.py
│ │ └── preparer.py
├── common.py
├── datatypes
│ ├── __init__.py
│ ├── base.py
│ ├── container.py
│ ├── dynamic.py
│ ├── format.py
│ ├── geometric.py
│ ├── network.py
│ ├── numeric.py
│ ├── postinit.py
│ ├── registry.py
│ ├── special.py
│ ├── string.py
│ └── temporal.py
├── dbapi
│ ├── __init__.py
│ ├── connection.py
│ └── cursor.py
├── driver
│ ├── __init__.py
│ ├── asyncclient.py
│ ├── binding.py
│ ├── buffer.py
│ ├── client.py
│ ├── common.py
│ ├── compression.py
│ ├── constants.py
│ ├── context.py
│ ├── ctypes.py
│ ├── dataconv.py
│ ├── ddl.py
│ ├── errors.py
│ ├── exceptions.py
│ ├── external.py
│ ├── httpclient.py
│ ├── httputil.py
│ ├── insert.py
│ ├── models.py
│ ├── npconv.py
│ ├── npquery.py
│ ├── options.py
│ ├── parser.py
│ ├── query.py
│ ├── summary.py
│ ├── tools.py
│ ├── transform.py
│ ├── types.py
│ └── tzutil.py
├── driverc
│ ├── .gitignore
│ ├── __init__.pxd
│ ├── __init__.py
│ ├── buffer.pxd
│ ├── buffer.pyx
│ ├── dataconv.pyx
│ └── npconv.pyx
├── entry_points.py
├── json_impl.py
├── py.typed
└── tools
│ ├── __init__.py
│ ├── datagen.py
│ └── testing.py
├── docker-compose.yml
├── examples
├── benchmark.py
├── clear_test_databases.py
├── insert_examples.py
├── pandas_examples.py
├── params_example.py
├── read_perf.py
├── run_async.py
├── ssh_tunnels.py
├── write_into_file.py
└── write_perf.py
├── playtest.py
├── pylintrc
├── pyproject.toml
├── setup.py
├── test_dist
├── .gitignore
└── superset_config.py
└── tests
├── __init__.py
├── conftest.py
├── helpers.py
├── integration_tests
├── __init__.py
├── actors.csv
├── conftest.py
├── datasets.py
├── json_test.ndjson
├── movies.csv
├── movies.csv.gz
├── movies.parquet
├── proxy_ca_cert.crt
├── test_arrow.py
├── test_async_client.py
├── test_client.py
├── test_contexts.py
├── test_dynamic.py
├── test_external_data.py
├── test_formats.py
├── test_geometric.py
├── test_inserts.py
├── test_jwt_auth.py
├── test_multithreading.py
├── test_native.py
├── test_native_fuzz.py
├── test_numpy.py
├── test_pandas.py
├── test_params.py
├── test_protocol_version.py
├── test_proxy.py
├── test_raw_insert.py
├── test_session_id.py
├── test_sqlalchemy
│ ├── __init__.py
│ ├── conftest.py
│ ├── test_basics.py
│ ├── test_ddl.py
│ ├── test_inserts.py
│ └── test_reflect.py
├── test_streaming.py
├── test_timezones.py
├── test_tls.py
└── test_tools.py
├── test_requirements.txt
├── timings.py
└── unit_tests
├── __init__.py
├── test_chtypes.py
├── test_driver
├── __init__.py
├── binary.py
├── test_buffer.py
├── test_data.py
├── test_formats.py
├── test_insert.py
├── test_native_fuzz.py
├── test_native_read.py
├── test_native_write.py
├── test_params.py
├── test_parser.py
├── test_query.py
└── test_settings.py
└── test_sqlalchemy
├── __init__.py
├── test_ddl.py
└── test_types.py
/.docker/clickhouse/single_node/config.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | default
5 | default
6 | 5368709120
7 |
8 | /var/lib/clickhouse/
9 | /var/lib/clickhouse/tmp/
10 | /var/lib/clickhouse/user_files/
11 | /var/lib/clickhouse/access/
12 | 3
13 | /var/lib/clickhouse/format_schemas/
14 |
15 |
16 | warning
17 | /var/log/clickhouse-server/clickhouse-server.log
18 | /var/log/clickhouse-server/clickhouse-server.err.log
19 | 1000M
20 | 10
21 | 1
22 |
23 |
24 |
25 | system
26 |
27 | toYYYYMM(event_date)
28 | 1000
29 |
30 |
31 |
32 | system
33 |
34 |
35 |
36 |
37 |
38 | users.xml
39 |
40 |
41 |
42 | SQL_
43 |
44 |
--------------------------------------------------------------------------------
/.docker/clickhouse/single_node/docker_related_config.xml:
--------------------------------------------------------------------------------
1 |
2 | 0.0.0.0
3 | 8123
4 | 9000
5 |
--------------------------------------------------------------------------------
/.docker/clickhouse/single_node_tls/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM clickhouse/clickhouse-server:25.1-alpine
2 | COPY .docker/clickhouse/single_node_tls/certificates /etc/clickhouse-server/certs
3 | RUN chown clickhouse:clickhouse -R /etc/clickhouse-server/certs \
4 | && chmod 600 /etc/clickhouse-server/certs/* \
5 | && chmod 755 /etc/clickhouse-server/certs
6 |
--------------------------------------------------------------------------------
/.docker/clickhouse/single_node_tls/certificates/ca.crt:
--------------------------------------------------------------------------------
1 | -----BEGIN CERTIFICATE-----
2 | MIICODCCAd+gAwIBAgIUBCAqnuDk7oXPEOQ+80TFOg+0DJowCgYIKoZIzj0EAwIw
3 | ajELMAkGA1UEBhMCVVMxETAPBgNVBAgMCENvbG9yYWRvMQ8wDQYDVQQHDAZEZW52
4 | ZXIxGDAWBgNVBAoMD0NsaWNrSG91c2UgSW5jLjEdMBsGA1UEAwwUY2xpY2tob3Vz
5 | ZV90ZXN0X3Jvb3QwHhcNMjUwNTI1MjA0NTEyWhcNNDUwNTI1MjA0NTEyWjBqMQsw
6 | CQYDVQQGEwJVUzERMA8GA1UECAwIQ29sb3JhZG8xDzANBgNVBAcMBkRlbnZlcjEY
7 | MBYGA1UECgwPQ2xpY2tIb3VzZSBJbmMuMR0wGwYDVQQDDBRjbGlja2hvdXNlX3Rl
8 | c3Rfcm9vdDBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABPGo86Zr9WA7KZoBnBPi
9 | owdksQECdv2sJJD/0gb48Hpw9Im0BuX8gOb8YT7+yJm56nmz0tTV8CLeBwpC1ylT
10 | w5+jYzBhMB0GA1UdDgQWBBSSPtUyuGF0HFuucyfFfWwWMAnF9jAfBgNVHSMEGDAW
11 | gBSSPtUyuGF0HFuucyfFfWwWMAnF9jAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB
12 | /wQEAwIChDAKBggqhkjOPQQDAgNHADBEAiBBbvc42/8dPV6JJGvEIgcg7bzO5Bbw
13 | MNdXLiuxYAqxugIgJMyiLt2i3KOb69fljOA3dhApntjf8NltDozbm3wLLWs=
14 | -----END CERTIFICATE-----
15 |
16 |
--------------------------------------------------------------------------------
/.docker/clickhouse/single_node_tls/certificates/ca.key:
--------------------------------------------------------------------------------
1 | -----BEGIN EC PARAMETERS-----
2 | BggqhkjOPQMBBw==
3 | -----END EC PARAMETERS-----
4 | -----BEGIN EC PRIVATE KEY-----
5 | MHcCAQEEIJnlsMN+3VCxicEQcANLIM+4gMiItWwFam3moYINelVfoAoGCCqGSM49
6 | AwEHoUQDQgAE8ajzpmv1YDspmgGcE+KjB2SxAQJ2/awkkP/SBvjwenD0ibQG5fyA
7 | 5vxhPv7ImbnqebPS1NXwIt4HCkLXKVPDnw==
8 | -----END EC PRIVATE KEY-----
9 |
--------------------------------------------------------------------------------
/.docker/clickhouse/single_node_tls/certificates/client.crt:
--------------------------------------------------------------------------------
1 | -----BEGIN CERTIFICATE-----
2 | MIICGzCCAcGgAwIBAgIUeggQ6+OCjtT3i7jASzwA1qfdDoMwCgYIKoZIzj0EAwIw
3 | ajELMAkGA1UEBhMCVVMxETAPBgNVBAgMCENvbG9yYWRvMQ8wDQYDVQQHDAZEZW52
4 | ZXIxGDAWBgNVBAoMD0NsaWNrSG91c2UgSW5jLjEdMBsGA1UEAwwUY2xpY2tob3Vz
5 | ZV90ZXN0X3Jvb3QwHhcNMjUwNTI1MjA0NTI1WhcNNDUwNTI1MjA0NTI1WjBfMQsw
6 | CQYDVQQGEwJVUzERMA8GA1UECAwIQ29sb3JhZG8xDzANBgNVBAcMBkRlbnZlcjEY
7 | MBYGA1UECgwPQ2xpY2tIb3VzZSBJbmMuMRIwEAYDVQQDDAljZXJ0X3VzZXIwWTAT
8 | BgcqhkjOPQIBBggqhkjOPQMBBwNCAASBIakdBXGwadGXUOrfXPfq8UVUvE2V5T2N
9 | pBpRTJFuQGPP2NOS9auwsZiYsGZevuFP4/JtBIbVURMsG9TxDUdto1AwTjAdBgNV
10 | HQ4EFgQUJuFP4dlFGBW3wK6vUkqvSxaLMhswDAYDVR0TAQH/BAIwADAfBgNVHSME
11 | GDAWgBSSPtUyuGF0HFuucyfFfWwWMAnF9jAKBggqhkjOPQQDAgNIADBFAiA69ags
12 | M/lvNu9mi5WkQArOqf9kuguuL9EcO3VUOXnijwIhANSbcxfVaYkfdFXvLdmlxCYS
13 | JuwjN4xF1OU+JpjJPFBk
14 | -----END CERTIFICATE-----
15 |
16 |
--------------------------------------------------------------------------------
/.docker/clickhouse/single_node_tls/certificates/client.key:
--------------------------------------------------------------------------------
1 | -----BEGIN EC PARAMETERS-----
2 | BggqhkjOPQMBBw==
3 | -----END EC PARAMETERS-----
4 | -----BEGIN EC PRIVATE KEY-----
5 | MHcCAQEEIJOyEogF0IPoVr1hkJ9wjp/6zhUH1LDgtay+OjG1/9XnoAoGCCqGSM49
6 | AwEHoUQDQgAEgSGpHQVxsGnRl1Dq31z36vFFVLxNleU9jaQaUUyRbkBjz9jTkvWr
7 | sLGYmLBmXr7hT+PybQSG1VETLBvU8Q1HbQ==
8 | -----END EC PRIVATE KEY-----
9 |
--------------------------------------------------------------------------------
/.docker/clickhouse/single_node_tls/certificates/server.crt:
--------------------------------------------------------------------------------
1 | -----BEGIN CERTIFICATE-----
2 | MIICTDCCAfOgAwIBAgIUeggQ6+OCjtT3i7jASzwA1qfdDoQwCgYIKoZIzj0EAwIw
3 | ajELMAkGA1UEBhMCVVMxETAPBgNVBAgMCENvbG9yYWRvMQ8wDQYDVQQHDAZEZW52
4 | ZXIxGDAWBgNVBAoMD0NsaWNrSG91c2UgSW5jLjEdMBsGA1UEAwwUY2xpY2tob3Vz
5 | ZV90ZXN0X3Jvb3QwHhcNMjUwNTI1MjA0NTQwWhcNNDUwNTI1MjA0NTQwWjBtMQsw
6 | CQYDVQQGEwJVUzERMA8GA1UECAwIQ29sb3JhZG8xDzANBgNVBAcMBkRlbnZlcjEY
7 | MBYGA1UECgwPQ2xpY2tIb3VzZSBJbmMuMSAwHgYDVQQDDBdjbGlja2hvdXNlX3Rl
8 | c3Rfc2VydmVyMTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABGGOK+jLXwPjuLXw
9 | mqDqb3IjXud0xYCS7I+FcUuBbU+lTbCTcO+lRuQpTFQ+Uqw3C4cQoniRylquuvBY
10 | bEpPYOajdDByMB0GA1UdDgQWBBTE+zb6QpJkmth2BMqpf6VAsXZFkDAMBgNVHRMB
11 | Af8EAjAAMCIGA1UdEQQbMBmCF3NlcnZlcjEuY2xpY2tob3VzZS50ZXN0MB8GA1Ud
12 | IwQYMBaAFJI+1TK4YXQcW65zJ8V9bBYwCcX2MAoGCCqGSM49BAMCA0cAMEQCIFo+
13 | iq9g/pUtmo6k/9cMvKxw1VmmLow2tdMuZtoofnHVAiAOSnN9CVQ8I9vLWwhoSRpq
14 | WsGApCnmNK/8tY1LTdQcLw==
15 | -----END CERTIFICATE-----
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.docker/clickhouse/single_node_tls/certificates/server.key:
--------------------------------------------------------------------------------
1 | -----BEGIN EC PARAMETERS-----
2 | BggqhkjOPQMBBw==
3 | -----END EC PARAMETERS-----
4 | -----BEGIN EC PRIVATE KEY-----
5 | MHcCAQEEIHU7VYTo4pduP7Q2wlE4zgg0Ruh3KFlwfdz1EHIanFgIoAoGCCqGSM49
6 | AwEHoUQDQgAEYY4r6MtfA+O4tfCaoOpvciNe53TFgJLsj4VxS4FtT6VNsJNw76VG
7 | 5ClMVD5SrDcLhxCieJHKWq668FhsSk9g5g==
8 | -----END EC PRIVATE KEY-----
9 |
--------------------------------------------------------------------------------
/.docker/clickhouse/single_node_tls/config.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | default
5 | default
6 |
7 | 5368709120
8 |
9 | /var/lib/clickhouse/
10 | /var/lib/clickhouse/tmp/
11 | /var/lib/clickhouse/user_files/
12 | /var/lib/clickhouse/access/
13 | /var/lib/clickhouse/format_schemas/
14 |
15 |
16 | warning
17 | /var/log/clickhouse-server/clickhouse-server.log
18 | /var/log/clickhouse-server/clickhouse-server.err.log
19 | 1000M
20 | 10
21 | 1
22 |
23 |
24 |
25 |
26 | /etc/clickhouse-server/certs/server.crt
27 | /etc/clickhouse-server/certs/server.key
28 | relaxed
29 | /etc/clickhouse-server/certs/ca.crt
30 | true
31 | sslv2,sslv3,tlsv1
32 | true
33 |
34 |
35 |
36 |
37 | system
38 |
39 | toYYYYMM(event_date)
40 | 1000
41 |
42 |
43 | SQL_
44 |
45 |
46 |
47 | users.xml
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/.docker/clickhouse/single_node_tls/docker_related_config.xml:
--------------------------------------------------------------------------------
1 |
2 | 0.0.0.0
3 | 8443
4 | 9440
5 |
--------------------------------------------------------------------------------
/.docker/clickhouse/single_node_tls/users.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | random
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | ::/0
15 |
16 | default
17 | default
18 | 1
19 |
20 |
21 |
22 | cert_user
23 |
24 | default
25 |
26 |
27 |
28 |
29 |
30 |
31 | 3600
32 | 0
33 | 0
34 | 0
35 | 0
36 | 0
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/.docker/clickhouse/users.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | random
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | ::/0
15 |
16 | default
17 | default
18 | 1
19 |
20 |
21 |
22 |
23 |
24 |
25 | 3600
26 | 0
27 | 0
28 | 0
29 | 0
30 | 0
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Repo stuff not needed for docker
2 | tests
3 | docs
4 | test_dist
5 | .github
6 | Dockerfile
7 |
8 | # Temporary directories of various kinds
9 | dev
10 | dist
11 | cache
12 | build
13 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Something not working right? Help us fix it!
4 | title: ''
5 | labels: 'bug'
6 | assignees: ''
7 |
8 | ---
9 |
10 |
11 | ### Describe the bug
12 |
13 | ### Steps to reproduce
14 | 1.
15 | 2.
16 | 3.
17 |
18 | ### Expected behaviour
19 |
20 | ### Code example
21 | ```python
22 | import clickhouse_connect
23 | ```
24 |
25 | ### clickhouse-connect and/or ClickHouse server logs
26 |
27 | ### Configuration
28 | #### Environment
29 | * clickhouse-connect version:
30 | * Python version:
31 | * Operating system:
32 |
33 |
34 | #### ClickHouse server
35 | * ClickHouse Server version:
36 | * ClickHouse Server non-default settings, if any:
37 | * `CREATE TABLE` statements for tables involved:
38 | * Sample data for these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary
39 |
40 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: What would you like to add to clickhouse-connect?
4 | title: ''
5 | labels: 'enhancement'
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | ## Summary
2 |
3 |
4 | ## Checklist
5 | Delete items not relevant to your PR:
6 | - [ ] Unit and integration tests covering the common scenarios were added
7 | - [ ] A human-readable description of the changes was provided to include in CHANGELOG
8 | - [ ] For significant changes, documentation in https://github.com/ClickHouse/clickhouse-docs was updated with further explanations or tutorials
9 |
--------------------------------------------------------------------------------
/.github/workflows/clickhouse_ci.yml:
--------------------------------------------------------------------------------
1 | name: ClickHouse CI Tests
2 |
3 | on:
4 | workflow_dispatch:
5 | schedule:
6 | - cron: '34 17 * * *'
7 |
8 | jobs:
9 | head:
10 | runs-on: ubuntu-latest
11 | name: ClickHouse CI Tests
12 | env:
13 | CLICKHOUSE_CONNECT_TEST_DOCKER: 'False'
14 | CLICKHOUSE_CONNECT_TEST_FUZZ: 50
15 | steps:
16 | - name: Checkout
17 | uses: actions/checkout@v4
18 | - name: Set up Python 3.11
19 | uses: actions/setup-python@v5
20 | with:
21 | python-version: 3.11
22 | - name: Install pip
23 | run: python -m pip install --upgrade pip
24 | - name: Install Test Dependencies
25 | run: pip install -r tests/test_requirements.txt
26 | - name: Build cython extensions
27 | run: python setup.py build_ext --inplace
28 | - name: "Add distribution info" # This lets SQLAlchemy find entry points
29 | run: python setup.py develop
30 |
31 | - name: run ClickHouse Cloud tests
32 | env:
33 | CLICKHOUSE_CONNECT_TEST_PORT: 8443
34 | CLICKHOUSE_CONNECT_TEST_CLOUD: 'True'
35 | CLICKHOUSE_CONNECT_TEST_HOST: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_HOST_SMT }}
36 | CLICKHOUSE_CONNECT_TEST_PASSWORD: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_PASSWORD_SMT }}
37 | CLICKHOUSE_CONNECT_TEST_JWT_SECRET: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_JWT_DESERT_VM_43 }}
38 | SQLALCHEMY_SILENCE_UBER_WARNING: 1
39 | run: pytest tests/integration_tests
40 |
41 | - name: Run ClickHouse Container (LATEST)
42 | run: CLICKHOUSE_CONNECT_TEST_CH_VERSION=latest docker compose up -d clickhouse
43 | - name: Run LATEST tests
44 | run: pytest tests/integration_tests
45 | - name: remove latest container
46 | run: docker compose down -v
47 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Test data
2 | .pytest_cache
3 |
4 | # Development Python environments
5 | *venv
6 |
7 | # IDE files
8 | *.idea
9 | .vscode
10 |
11 | # Build artifacts
12 | /dist
13 | /build
14 | *.egg-info/
15 |
16 | # Temporary development stuff (Manual tests, etc.)
17 | /dev
18 | test.env
19 | .env
20 | .dev_version
21 |
22 | # Python cruft
23 | *.pyc
24 | .python-version
25 |
26 | # Apple garbage
27 | .DS_Store
28 |
29 | # C extensions
30 | *.pyd
31 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ## Contributing guidelines
2 |
3 | ClickHouse-connect is an open-source project, and we welcome any contributions from the community.
4 | Please share your ideas, contribute to the codebase, and help us maintain up-to-date documentation.
5 |
6 | ## Setting up your environment
7 |
8 | ### Prerequisites
9 |
10 | * Python 3.11+
11 | * Docker and the [Compose plugin](https://docs.docker.com/compose/install/)
12 |
13 | ### Create a fork of the repository and clone it
14 |
15 | ```bash
16 | git clone https://github.com/[YOUR_USERNAME]/clickhouse-connect
17 | cd clickhouse-connect
18 | ```
19 |
20 | ### Add PYTHONPATH
21 |
22 | Add the project directory to the `PYTHONPATH` environment variable to make the driver sources are available for import.
23 |
24 | ```bash
25 | export PYTHONPATH="/absolute/path/to/clickhouse-connect"
26 | ```
27 |
28 | ### Prepare a new virtual environment
29 |
30 | You could either use PyCharm for that, or follow [the instructions on the official website](https://docs.python.org/3/tutorial/venv.html) and set it up via the command line.
31 |
32 | ### Install dependencies
33 |
34 | ```bash
35 | python -m pip install --upgrade pip
36 | pip install setuptools wheel
37 | pip install -r tests/test_requirements.txt
38 | ```
39 |
40 | ### Run the setup script
41 |
42 | The driver uses several Cython extensions that provide additional performance improvements
43 | (see the [clickhouse_connect/driverc](clickhouse_connect/driverc) directory).
44 | To compile the extensions, run the following command:
45 |
46 | ```bash
47 | python setup.py build_ext --inplace
48 | ```
49 |
50 | Additionally, this command is required to provide SQLAlchemy entrypoints:
51 |
52 | ```bash
53 | python setup.py develop
54 | ```
55 |
56 | ### Add /etc/hosts entry
57 |
58 | Required for TLS tests.
59 | The generated certificates assume TLS requests use `server1.clickhouse.test` as the hostname.
60 | See [test_tls.py](tests/integration_tests/test_tls.py) for more details.
61 |
62 | ```bash
63 | sudo -- sh -c "echo 127.0.0.1 server1.clickhouse.test >> /etc/hosts"
64 | ```
65 |
66 | ### PyCharm setup
67 |
68 | If you use PyCharm as your IDE, make sure that you have `clickhouse-connect` added to the project structure as a source path.
69 | Go to Settings -> Project (clickhouse-connect) -> Project structure, right click on `clickhouse-connect` folder, and mark it as "Sources".
70 |
71 | ## Testing
72 |
73 | ### Start ClickHouse in Docker
74 |
75 | The tests will require two ClickHouse instances to be running.
76 | One should have a default plain authentication (for integration tests), and the other should have a TLS configuration (for tls tests only).
77 |
78 | The integration tests will start and stop the ClickHouse instance automatically.
79 | However, this adds a few seconds to each run, and this might not be ideal when you run a single test (using PyCharm, for example).
80 | To disable this behavior, set the `CLICKHOUSE_CONNECT_TEST_DOCKER` environment variable to `0`.
81 |
82 | ```bash
83 | export CLICKHOUSE_CONNECT_TEST_DOCKER=0
84 | ```
85 |
86 | The easiest way to start all the required ClickHouse instances is to use the provided Docker Compose file (the integrations tests [setup script](tests/integration_tests/conftest.py) uses the same file).
87 |
88 | ```bash
89 | docker compose up -d
90 | ```
91 |
92 | ### Run the tests
93 |
94 | The project uses [pytest](https://docs.pytest.org/) as a test runner.
95 | To run all the tests (unit and integration) execute the following command:
96 |
97 | ```bash
98 | pytest tests
99 | ```
100 |
101 | If you need to run the unit tests only:
102 |
103 | ```bash
104 | pytest tests/unit_tests
105 | ```
106 |
107 | Or the integration tests only:
108 |
109 | ```bash
110 | pytest tests/integration_tests
111 | ```
112 |
113 | ### Run the TLS integration tests
114 |
115 | These tests require the `CLICKHOUSE_CONNECT_TEST_TLS` environment variable to be set to `1`; otherwise, they will be skipped.
116 | Additionally, the TLS ClickHouse instance should be running (see [docker-compose.yml](docker-compose.yml)).
117 |
118 | ```bash
119 | CLICKHOUSE_CONNECT_TEST_TLS=1 pytest tests/tls
120 | ```
121 |
122 | ### Running the integration tests with ClickHouse Cloud
123 |
124 | If you want to run the tests using your ClickHouse Cloud instance instead of the local ClickHouse instance running in Docker, you will need a few additional environment variables.
125 |
126 | ```bash
127 | export CLICKHOUSE_CONNECT_TEST_CLOUD=1
128 | export CLICKHOUSE_CONNECT_TEST_PORT=8443
129 | export CLICKHOUSE_CONNECT_TEST_HOST='instance.clickhouse.cloud'
130 | export CLICKHOUSE_CONNECT_TEST_PASSWORD='secret'
131 | ```
132 |
133 | Then, you should be able to run the tests as usual:
134 |
135 | ```bash
136 | pytest tests/integration_tests
137 | ```
138 |
139 | ## Style Guide
140 |
141 | The project uses [PyLint](https://pypi.org/project/pylint/) to enforce the code style.
142 | It is always a good idea to run the linter before committing the changes, as this is a mandatory CI check. For example:
143 |
144 | ```bash
145 | pip install pylint
146 | pylint clickhouse_connect
147 | pylint tests
148 | ```
149 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.10
2 |
3 | ADD . /
4 |
5 | RUN pip install requests pytz
6 | RUN pip install --upgrade pip; \
7 | pip install cython; \
8 | pip install -v .
9 | #RUN pip install -v --index-url https://test.pypi.org/simple/ clickhouse-connect
10 | CMD ["python", "playtest.py"]
11 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | global-include *.pxd *.pyx
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## ClickHouse Connect
2 |
3 | A high performance core database driver for connecting ClickHouse to Python, Pandas, and Superset
4 |
5 | * Pandas DataFrames
6 | * Numpy Arrays
7 | * PyArrow Tables
8 | * Superset Connector
9 | * SQLAlchemy 1.3 and 1.4 (limited feature set)
10 |
11 | ClickHouse Connect currently uses the ClickHouse HTTP interface for maximum compatibility.
12 |
13 | ### Installation
14 |
15 | ```
16 | pip install clickhouse-connect
17 | ```
18 |
19 | ClickHouse Connect requires Python 3.8 or higher.
20 |
21 | ### Superset Connectivity
22 |
23 | ClickHouse Connect is fully integrated with Apache Superset. Previous versions of ClickHouse Connect utilized a
24 | dynamically loaded Superset Engine Spec, but as of Superset v2.1.0 the engine spec was incorporated into the main
25 | Apache Superset project and removed from clickhouse-connect in v0.6.0. If you have issues connecting to earlier
26 | versions of Superset, please use clickhouse-connect v0.5.25.
27 |
28 | When creating a Superset Data Source, either use the provided connection dialog, or a SqlAlchemy DSN in the form
29 | `clickhousedb://{username}:{password}@{host}:{port}`.
30 |
31 | ### SQLAlchemy Implementation
32 |
33 | ClickHouse Connect incorporates a minimal SQLAlchemy implementation (without any ORM features) for compatibility with
34 | Superset. It has only been tested against SQLAlchemy versions 1.3.x and 1.4.x, and is unlikely to work with more
35 | complex SQLAlchemy applications.
36 |
37 | ### Asyncio Support
38 |
39 | ClickHouse Connect provides an async wrapper, so that it is possible to use the client in an `asyncio` environment.
40 | See the [run_async example](./examples/run_async.py) for more details.
41 |
42 | ### Complete Documentation
43 |
44 | The documentation for ClickHouse Connect has moved to
45 | [ClickHouse Docs](https://clickhouse.com/docs/integrations/python)
46 |
--------------------------------------------------------------------------------
/clickhouse_connect/__init__.py:
--------------------------------------------------------------------------------
1 | from clickhouse_connect.driver import create_client, create_async_client
2 |
3 | driver_name = 'clickhousedb'
4 |
5 | get_client = create_client
6 | get_async_client = create_async_client
7 |
--------------------------------------------------------------------------------
/clickhouse_connect/__version__.py:
--------------------------------------------------------------------------------
1 | version = '0.8.18'
2 |
--------------------------------------------------------------------------------
/clickhouse_connect/cc_sqlalchemy/__init__.py:
--------------------------------------------------------------------------------
1 | from clickhouse_connect import driver_name
2 | from clickhouse_connect.cc_sqlalchemy.datatypes.base import schema_types
3 |
4 | # pylint: disable=invalid-name
5 | dialect_name = driver_name
6 | ischema_names = schema_types
7 |
--------------------------------------------------------------------------------
/clickhouse_connect/cc_sqlalchemy/datatypes/__init__.py:
--------------------------------------------------------------------------------
1 | import clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes
2 |
--------------------------------------------------------------------------------
/clickhouse_connect/cc_sqlalchemy/ddl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/clickhouse_connect/cc_sqlalchemy/ddl/__init__.py
--------------------------------------------------------------------------------
/clickhouse_connect/cc_sqlalchemy/ddl/custom.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy.sql.ddl import DDL
2 | from sqlalchemy.exc import ArgumentError
3 |
4 | from clickhouse_connect.driver.binding import quote_identifier
5 |
6 |
7 | # pylint: disable=too-many-ancestors,abstract-method
8 | class CreateDatabase(DDL):
9 | """
10 | SqlAlchemy DDL statement that is essentially an alternative to the built in CreateSchema DDL class
11 | """
12 | # pylint: disable-msg=too-many-arguments
13 | def __init__(self, name: str, engine: str = None, zoo_path: str = None, shard_name: str = '{shard}',
14 | replica_name: str = '{replica}'):
15 | """
16 | :param name: Database name
17 | :param engine: Database ClickHouse engine type
18 | :param zoo_path: ClickHouse zookeeper path for Replicated database engine
19 | :param shard_name: Clickhouse shard name for Replicated database engine
20 | :param replica_name: Replica name for Replicated database engine
21 | """
22 | if engine and engine not in ('Ordinary', 'Atomic', 'Lazy', 'Replicated'):
23 | raise ArgumentError(f'Unrecognized engine type {engine}')
24 | stmt = f'CREATE DATABASE {quote_identifier(name)}'
25 | if engine:
26 | stmt += f' Engine {engine}'
27 | if engine == 'Replicated':
28 | if not zoo_path:
29 | raise ArgumentError('zoo_path is required for Replicated Database Engine')
30 | stmt += f" ('{zoo_path}', '{shard_name}', '{replica_name}'"
31 | super().__init__(stmt)
32 |
33 |
34 | # pylint: disable=too-many-ancestors,abstract-method
35 | class DropDatabase(DDL):
36 | """
37 | Alternative DDL statement for built in SqlAlchemy DropSchema DDL class
38 | """
39 | def __init__(self, name: str):
40 | super().__init__(f'DROP DATABASE {quote_identifier(name)}')
41 |
--------------------------------------------------------------------------------
/clickhouse_connect/cc_sqlalchemy/dialect.py:
--------------------------------------------------------------------------------
1 |
2 | from sqlalchemy import text
3 | from sqlalchemy.engine.default import DefaultDialect
4 |
5 | from clickhouse_connect import dbapi
6 |
7 | from clickhouse_connect.cc_sqlalchemy.inspector import ChInspector
8 | from clickhouse_connect.cc_sqlalchemy.sql import full_table
9 | from clickhouse_connect.cc_sqlalchemy.sql.ddlcompiler import ChDDLCompiler
10 | from clickhouse_connect.cc_sqlalchemy import ischema_names, dialect_name
11 | from clickhouse_connect.cc_sqlalchemy.sql.preparer import ChIdentifierPreparer
12 | from clickhouse_connect.driver.binding import quote_identifier, format_str
13 |
14 |
15 | # pylint: disable=too-many-public-methods,no-self-use,unused-argument
16 | class ClickHouseDialect(DefaultDialect):
17 | """
18 | See :py:class:`sqlalchemy.engine.interfaces`
19 | """
20 | name = dialect_name
21 | driver = 'connect'
22 |
23 | default_schema_name = 'default'
24 | supports_native_decimal = True
25 | supports_native_boolean = True
26 | supports_statement_cache = False
27 | returns_unicode_strings = True
28 | postfetch_lastrowid = False
29 | ddl_compiler = ChDDLCompiler
30 | preparer = ChIdentifierPreparer
31 | description_encoding = None
32 | max_identifier_length = 127
33 | ischema_names = ischema_names
34 | inspector = ChInspector
35 |
36 | # pylint: disable=method-hidden
37 | @classmethod
38 | def dbapi(cls):
39 | return dbapi
40 |
41 | def initialize(self, connection):
42 | pass
43 |
44 | @staticmethod
45 | def get_schema_names(connection, **_):
46 | return [row.name for row in connection.execute('SHOW DATABASES')]
47 |
48 | @staticmethod
49 | def has_database(connection, db_name):
50 | return (connection.execute(text('SELECT name FROM system.databases ' +
51 | f'WHERE name = {format_str(db_name)}'))).rowcount > 0
52 |
53 | def get_table_names(self, connection, schema=None, **kw):
54 | cmd = 'SHOW TABLES'
55 | if schema:
56 | cmd += ' FROM ' + quote_identifier(schema)
57 | return [row.name for row in connection.execute(cmd)]
58 |
59 | def get_primary_keys(self, connection, table_name, schema=None, **kw):
60 | return []
61 |
62 | # pylint: disable=arguments-renamed
63 | def get_pk_constraint(self, connection, table_name, schema=None, **kw):
64 | return []
65 |
66 | def get_foreign_keys(self, connection, table_name, schema=None, **kw):
67 | return []
68 |
69 | def get_temp_table_names(self, connection, schema=None, **kw):
70 | return []
71 |
72 | def get_view_names(self, connection, schema=None, **kw):
73 | return []
74 |
75 | def get_temp_view_names(self, connection, schema=None, **kw):
76 | return []
77 |
78 | def get_view_definition(self, connection, view_name, schema=None, **kw):
79 | pass
80 |
81 | def get_indexes(self, connection, table_name, schema=None, **kw):
82 | return []
83 |
84 | def get_unique_constraints(self, connection, table_name, schema=None, **kw):
85 | return []
86 |
87 | def get_check_constraints(self, connection, table_name, schema=None, **kw):
88 | return []
89 |
90 | def has_table(self, connection, table_name, schema=None, **_kw):
91 | result = connection.execute(text(f'EXISTS TABLE {full_table(table_name, schema)}'))
92 | row = result.fetchone()
93 | return row[0] == 1
94 |
95 | def has_sequence(self, connection, sequence_name, schema=None, **_kw):
96 | return False
97 |
98 | def do_begin_twophase(self, connection, xid):
99 | raise NotImplementedError
100 |
101 | def do_prepare_twophase(self, connection, xid):
102 | raise NotImplementedError
103 |
104 | def do_rollback_twophase(self, connection, xid, is_prepared=True, recover=False):
105 | raise NotImplementedError
106 |
107 | def do_commit_twophase(self, connection, xid, is_prepared=True, recover=False):
108 | raise NotImplementedError
109 |
110 | def do_recover_twophase(self, connection):
111 | raise NotImplementedError
112 |
113 | def set_isolation_level(self, dbapi_conn, level):
114 | pass
115 |
116 | def get_isolation_level(self, dbapi_conn):
117 | return None
118 |
--------------------------------------------------------------------------------
/clickhouse_connect/cc_sqlalchemy/inspector.py:
--------------------------------------------------------------------------------
1 | import sqlalchemy.schema as sa_schema
2 |
3 | from sqlalchemy.engine.reflection import Inspector
4 | from sqlalchemy.orm.exc import NoResultFound
5 |
6 | from clickhouse_connect.cc_sqlalchemy.datatypes.base import sqla_type_from_name
7 | from clickhouse_connect.cc_sqlalchemy.ddl.tableengine import build_engine
8 | from clickhouse_connect.cc_sqlalchemy.sql import full_table
9 | from clickhouse_connect.cc_sqlalchemy import dialect_name as dn
10 |
11 | ch_col_args = ('default_type', 'codec_expression', 'ttl_expression')
12 |
13 |
14 | def get_engine(connection, table_name, schema=None):
15 | result_set = connection.execute(
16 | f"SELECT engine_full FROM system.tables WHERE database = '{schema}' and name = '{table_name}'")
17 | row = next(result_set, None)
18 | if not row:
19 | raise NoResultFound(f'Table {schema}.{table_name} does not exist')
20 | return build_engine(row.engine_full)
21 |
22 |
23 | class ChInspector(Inspector):
24 |
25 | def reflect_table(self, table, include_columns, exclude_columns, *_args, **_kwargs):
26 | schema = table.schema
27 | for col in self.get_columns(table.name, schema):
28 | name = col.pop('name')
29 | if (include_columns and name not in include_columns) or (exclude_columns and name in exclude_columns):
30 | continue
31 | col_type = col.pop('type')
32 | col_args = {f'{dn}_{key}' if key in ch_col_args else key: value for key, value in col.items() if value}
33 | table.append_column(sa_schema.Column(name, col_type, **col_args))
34 | table.engine = get_engine(self.bind, table.name, schema)
35 |
36 | def get_columns(self, table_name, schema=None, **_kwargs):
37 | table_id = full_table(table_name, schema)
38 | result_set = self.bind.execute(f'DESCRIBE TABLE {table_id}')
39 | if not result_set:
40 | raise NoResultFound(f'Table {full_table} does not exist')
41 | columns = []
42 | for row in result_set:
43 | sqla_type = sqla_type_from_name(row.type.replace('\n', ''))
44 | col = {'name': row.name,
45 | 'type': sqla_type,
46 | 'nullable': sqla_type.nullable,
47 | 'autoincrement': False,
48 | 'default': row.default_expression,
49 | 'default_type': row.default_type,
50 | 'comment': row.comment,
51 | 'codec_expression': row.codec_expression,
52 | 'ttl_expression': row.ttl_expression}
53 | columns.append(col)
54 | return columns
55 |
56 |
57 | ChInspector.reflecttable = ChInspector.reflect_table # Hack to provide backward compatibility for SQLAlchemy 1.3
58 |
--------------------------------------------------------------------------------
/clickhouse_connect/cc_sqlalchemy/sql/__init__.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from sqlalchemy import Table
4 |
5 | from clickhouse_connect.driver.binding import quote_identifier
6 |
7 |
8 | def full_table(table_name: str, schema: Optional[str] = None) -> str:
9 | if table_name.startswith('(') or '.' in table_name or not schema:
10 | return quote_identifier(table_name)
11 | return f'{quote_identifier(schema)}.{quote_identifier(table_name)}'
12 |
13 |
14 | def format_table(table: Table):
15 | return full_table(table.name, table.schema)
16 |
--------------------------------------------------------------------------------
/clickhouse_connect/cc_sqlalchemy/sql/ddlcompiler.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy import Column
2 | from sqlalchemy.sql.compiler import DDLCompiler
3 |
4 | from clickhouse_connect.cc_sqlalchemy.sql import format_table
5 | from clickhouse_connect.driver.binding import quote_identifier
6 |
7 |
8 | class ChDDLCompiler(DDLCompiler):
9 |
10 | def visit_create_schema(self, create, **_):
11 | return f'CREATE DATABASE {quote_identifier(create.element)}'
12 |
13 | def visit_drop_schema(self, drop, **_):
14 | return f'DROP DATABASE {quote_identifier(drop.element)}'
15 |
16 | def visit_create_table(self, create, **_):
17 | table = create.element
18 | text = f'CREATE TABLE {format_table(table)} ('
19 | text += ', '.join([self.get_column_specification(c.element) for c in create.columns])
20 | return text + ') ' + table.engine.compile()
21 |
22 | def get_column_specification(self, column: Column, **_):
23 | text = f'{quote_identifier(column.name)} {column.type.compile()}'
24 | return text
25 |
--------------------------------------------------------------------------------
/clickhouse_connect/cc_sqlalchemy/sql/preparer.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy.sql.compiler import IdentifierPreparer
2 |
3 | from clickhouse_connect.driver.binding import quote_identifier
4 |
5 |
6 | class ChIdentifierPreparer(IdentifierPreparer):
7 |
8 | quote_identifier = staticmethod(quote_identifier)
9 |
10 | def _requires_quotes(self, _value):
11 | return True
12 |
--------------------------------------------------------------------------------
/clickhouse_connect/common.py:
--------------------------------------------------------------------------------
1 | import getpass
2 | import sys
3 | from dataclasses import dataclass
4 | from typing import Any, Sequence, Optional, Dict
5 | from clickhouse_connect import __version__
6 |
7 |
8 | from clickhouse_connect.driver.exceptions import ProgrammingError
9 |
10 |
11 | def version():
12 | return __version__.version
13 |
14 |
15 | def format_error(msg: str) -> str:
16 | max_size = _common_settings['max_error_size'].value
17 | if max_size:
18 | return msg[:max_size]
19 | return msg
20 |
21 |
22 | @dataclass
23 | class CommonSetting:
24 | name: str
25 | options: Sequence[Any]
26 | default: Any
27 | value: Optional[Any] = None
28 |
29 |
30 | _common_settings: Dict[str, CommonSetting] = {}
31 |
32 |
33 | def build_client_name(client_name: str):
34 | product_name = get_setting('product_name')
35 | product_name = product_name.strip() + ' ' if product_name else ''
36 | client_name = client_name.strip() + ' ' if client_name else ''
37 | py_version = sys.version.split(' ', maxsplit=1)[0]
38 | os_user = ''
39 | if get_setting('send_os_user'):
40 | try:
41 | os_user = f'; os_user:{getpass.getuser()}'
42 | except Exception: # pylint: disable=broad-except
43 | pass
44 | full_name = (f'{client_name}{product_name}clickhouse-connect/{version()}' +
45 | f' (lv:py/{py_version}; mode:sync; os:{sys.platform}{os_user})')
46 | return full_name.encode('ascii', 'ignore').decode()
47 |
48 |
49 | def get_setting(name: str):
50 | setting = _common_settings.get(name)
51 | if setting is None:
52 | raise ProgrammingError(f'Unrecognized common setting {name}')
53 | return setting.value if setting.value is not None else setting.default
54 |
55 |
56 | def set_setting(name: str, value: Any):
57 | setting = _common_settings.get(name)
58 | if setting is None:
59 | raise ProgrammingError(f'Unrecognized common setting {name}')
60 | if setting.options and value not in setting.options:
61 | raise ProgrammingError(f'Unrecognized option {value} for setting {name})')
62 | if value == setting.default:
63 | setting.value = None
64 | else:
65 | setting.value = value
66 |
67 |
68 | def _init_common(name: str, options: Sequence[Any], default: Any):
69 | _common_settings[name] = CommonSetting(name, options, default)
70 |
71 |
72 | _init_common('autogenerate_session_id', (True, False), True)
73 | _init_common('dict_parameter_format', ('json', 'map'), 'json')
74 | _init_common('invalid_setting_action', ('send', 'drop', 'error'), 'error')
75 | _init_common('max_connection_age', (), 10 * 60) # Max time in seconds to keep reusing a database TCP connection
76 | _init_common('product_name', (), '') # Product name used as part of client identification for ClickHouse query_log
77 | _init_common('readonly', (0, 1), 0) # Implied "read_only" ClickHouse settings for versions prior to 19.17
78 | _init_common('send_os_user', (True, False), True)
79 |
80 | # Use the client protocol version This is needed for DateTime timezone columns but breaks with current version of
81 | # chproxy
82 | _init_common('use_protocol_version', (True, False), True)
83 |
84 | _init_common('max_error_size', (), 1024)
85 |
86 | # HTTP raw data buffer for streaming queries. This should not be reduced below 64KB to ensure compatibility with LZ4 compression
87 | _init_common('http_buffer_size', (), 10 * 1024 * 1024)
88 |
--------------------------------------------------------------------------------
/clickhouse_connect/datatypes/__init__.py:
--------------------------------------------------------------------------------
1 | import clickhouse_connect.datatypes.container
2 | import clickhouse_connect.datatypes.network
3 | import clickhouse_connect.datatypes.numeric
4 | import clickhouse_connect.datatypes.special
5 | import clickhouse_connect.datatypes.string
6 | import clickhouse_connect.datatypes.temporal
7 | import clickhouse_connect.datatypes.geometric
8 | import clickhouse_connect.datatypes.dynamic
9 | import clickhouse_connect.datatypes.registry
10 | import clickhouse_connect.datatypes.postinit
11 |
--------------------------------------------------------------------------------
/clickhouse_connect/datatypes/format.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from typing import Dict, Type, Sequence, Optional
4 |
5 | from clickhouse_connect.datatypes.base import ClickHouseType, type_map, ch_read_formats, ch_write_formats
6 | from clickhouse_connect.driver.exceptions import ProgrammingError
7 |
8 | json_re = re.compile('json', re.IGNORECASE)
9 |
10 |
11 | def set_default_formats(*args, **kwargs):
12 | fmt_map = format_map(_convert_arguments(*args, **kwargs))
13 | ch_read_formats.update(fmt_map)
14 | ch_write_formats.update(fmt_map)
15 |
16 |
17 | def clear_all_formats():
18 | ch_read_formats.clear()
19 | ch_write_formats.clear()
20 |
21 |
22 | def clear_default_format(pattern: str):
23 | for ch_type in _matching_types(pattern):
24 | ch_read_formats.pop(ch_type, None)
25 | ch_write_formats.pop(ch_type, None)
26 |
27 |
28 | def set_write_format(pattern: str, fmt: str):
29 | pattern = json_re.sub('object', pattern)
30 | for ch_type in _matching_types(pattern):
31 | ch_write_formats[ch_type] = fmt
32 |
33 |
34 | def clear_write_format(pattern: str):
35 | for ch_type in _matching_types(pattern):
36 | ch_write_formats.pop(ch_type, None)
37 |
38 |
39 | def set_read_format(pattern: str, fmt: str):
40 | for ch_type in _matching_types(pattern):
41 | ch_read_formats[ch_type] = fmt
42 |
43 |
44 | def clear_read_format(pattern: str):
45 | for ch_type in _matching_types(pattern):
46 | ch_read_formats.pop(ch_type, None)
47 |
48 |
49 | def format_map(fmt_map: Optional[Dict[str, str]]) -> Dict[Type[ClickHouseType], str]:
50 | if not fmt_map:
51 | return {}
52 | final_map = {}
53 | for pattern, fmt in fmt_map.items():
54 | for ch_type in _matching_types(pattern, fmt):
55 | final_map[ch_type] = fmt
56 | return final_map
57 |
58 |
59 | def _convert_arguments(*args, **kwargs) -> Dict[str, str]:
60 | fmt_map = {}
61 | try:
62 | for x in range(0, len(args), 2):
63 | fmt_map[args[x]] = args[x + 1]
64 | except (IndexError, TypeError, ValueError) as ex:
65 | raise ProgrammingError('Invalid type/format arguments for format method') from ex
66 | fmt_map.update(kwargs)
67 | return fmt_map
68 |
69 |
70 | def _matching_types(pattern: str, fmt: str = None) -> Sequence[Type[ClickHouseType]]:
71 | re_pattern = re.compile(pattern.replace('*', '.*'), re.IGNORECASE)
72 | matches = [ch_type for type_name, ch_type in type_map.items() if re_pattern.match(type_name)]
73 | if not matches:
74 | raise ProgrammingError(f'Unrecognized ClickHouse type {pattern} when setting formats')
75 | if fmt:
76 | invalid = [ch_type.__name__ for ch_type in matches if fmt not in ch_type.valid_formats]
77 | if invalid:
78 | raise ProgrammingError(f"{fmt} is not a valid format for ClickHouse types {','.join(invalid)}.")
79 | return matches
80 |
--------------------------------------------------------------------------------
/clickhouse_connect/datatypes/geometric.py:
--------------------------------------------------------------------------------
1 | from typing import Sequence, Any
2 |
3 | from clickhouse_connect.datatypes.base import ClickHouseType
4 | from clickhouse_connect.driver.insert import InsertContext
5 | from clickhouse_connect.driver.query import QueryContext
6 | from clickhouse_connect.driver.types import ByteSource
7 |
8 | POINT_DATA_TYPE: ClickHouseType
9 | RING_DATA_TYPE: ClickHouseType
10 | POLYGON_DATA_TYPE: ClickHouseType
11 | MULTI_POLYGON_DATA_TYPE: ClickHouseType
12 |
13 |
14 | class Point(ClickHouseType):
15 | def write_column(self, column: Sequence, dest: bytearray, ctx: InsertContext):
16 | return POINT_DATA_TYPE.write_column(column, dest, ctx)
17 |
18 | def read_column_prefix(self, source: ByteSource, ctx: QueryContext):
19 | return POINT_DATA_TYPE.read_column_prefix(source, ctx)
20 |
21 | def read_column_data(self, source: ByteSource, num_rows: int, ctx: QueryContext, read_state: Any) -> Sequence:
22 | return POINT_DATA_TYPE.read_column_data(source, num_rows, ctx, read_state)
23 |
24 |
25 | class Ring(ClickHouseType):
26 | def write_column(self, column: Sequence, dest: bytearray, ctx: InsertContext):
27 | return RING_DATA_TYPE.write_column(column, dest, ctx)
28 |
29 | def read_column_prefix(self, source: ByteSource, ctx: QueryContext):
30 | return RING_DATA_TYPE.read_column_prefix(source, ctx)
31 |
32 | def read_column_data(self, source: ByteSource, num_rows: int, ctx: QueryContext, read_state) -> Sequence:
33 | return RING_DATA_TYPE.read_column_data(source, num_rows, ctx, read_state)
34 |
35 |
36 | class Polygon(ClickHouseType):
37 | def write_column(self, column: Sequence, dest: bytearray, ctx: InsertContext):
38 | return POLYGON_DATA_TYPE.write_column(column, dest, ctx)
39 |
40 | def read_column_prefix(self, source: ByteSource, ctx: QueryContext):
41 | return POLYGON_DATA_TYPE.read_column_prefix(source, ctx)
42 |
43 | def read_column_data(self, source: ByteSource, num_rows: int, ctx: QueryContext, read_state:Any) -> Sequence:
44 | return POLYGON_DATA_TYPE.read_column_data(source, num_rows, ctx, read_state)
45 |
46 |
47 | class MultiPolygon(ClickHouseType):
48 | def write_column(self, column: Sequence, dest: bytearray, ctx: InsertContext):
49 | return MULTI_POLYGON_DATA_TYPE.write_column(column, dest, ctx)
50 |
51 | def read_column_prefix(self, source: ByteSource, ctx: QueryContext):
52 | return MULTI_POLYGON_DATA_TYPE.read_column_prefix(source, ctx)
53 |
54 | def read_column_data(self, source: ByteSource, num_rows: int, ctx: QueryContext, read_state:Any) -> Sequence:
55 | return MULTI_POLYGON_DATA_TYPE.read_column_data(source, num_rows, ctx, read_state)
56 |
57 |
58 | class LineString(Ring):
59 | pass
60 |
61 |
62 | class MultiLineString(Polygon):
63 | pass
64 |
--------------------------------------------------------------------------------
/clickhouse_connect/datatypes/postinit.py:
--------------------------------------------------------------------------------
1 | from clickhouse_connect.datatypes import registry, dynamic, geometric
2 |
3 | dynamic.SHARED_DATA_TYPE = registry.get_from_name('Array(String, String)')
4 | dynamic.STRING_DATA_TYPE = registry.get_from_name('String')
5 |
6 | point = 'Tuple(Float64, Float64)'
7 | ring = f'Array({point})'
8 | polygon = f'Array({ring})'
9 | multi_polygon = f'Array({polygon})'
10 |
11 | geometric.POINT_DATA_TYPE = registry.get_from_name(point)
12 | geometric.RING_DATA_TYPE = registry.get_from_name(ring)
13 | geometric.POLYGON_DATA_TYPE = registry.get_from_name(polygon)
14 | geometric.MULTI_POLYGON_DATA_TYPE = registry.get_from_name(multi_polygon)
15 |
--------------------------------------------------------------------------------
/clickhouse_connect/datatypes/registry.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | from typing import Tuple, Dict
4 | from clickhouse_connect.datatypes.base import TypeDef, ClickHouseType, type_map
5 | from clickhouse_connect.driver.exceptions import InternalError
6 | from clickhouse_connect.driver.parser import parse_enum, parse_callable, parse_columns
7 |
8 | logger = logging.getLogger(__name__)
9 | type_cache: Dict[str, ClickHouseType] = {}
10 |
11 |
12 | def parse_name(name: str) -> Tuple[str, str, TypeDef]:
13 | """
14 | Converts a ClickHouse type name into the base class and the definition (TypeDef) needed for any
15 | additional instantiation
16 | :param name: ClickHouse type name as returned by clickhouse
17 | :return: The original base name (before arguments), the full name as passed in and the TypeDef object that
18 | captures any additional arguments
19 | """
20 | base = name
21 | wrappers = []
22 | keys = tuple()
23 | if base.startswith('LowCardinality'):
24 | wrappers.append('LowCardinality')
25 | base = base[15:-1]
26 | if base.startswith('Nullable'):
27 | wrappers.append('Nullable')
28 | base = base[9:-1]
29 | if base.startswith('Enum'):
30 | keys, values = parse_enum(base)
31 | base = base[:base.find('(')]
32 | elif base.startswith('Nested'):
33 | keys, values = parse_columns(base[6:])
34 | base = 'Nested'
35 | elif base.startswith('Tuple'):
36 | keys, values = parse_columns(base[5:])
37 | base = 'Tuple'
38 | elif base.startswith('Variant'):
39 | keys, values = parse_columns(base[7:])
40 | base = 'Variant'
41 | elif base.startswith('JSON') and len(base) > 4 and base[4] == '(':
42 | keys, values = parse_columns(base[4:])
43 | base = 'JSON'
44 | elif base == 'Point':
45 | values = ('Float64', 'Float64')
46 | else:
47 | try:
48 | base, values, _ = parse_callable(base)
49 | except IndexError:
50 | raise InternalError(f'Can not parse ClickHouse data type: {name}') from None
51 | return base, name, TypeDef(tuple(wrappers), keys, values)
52 |
53 |
54 | def get_from_name(name: str) -> ClickHouseType:
55 | """
56 | Returns the ClickHouseType instance parsed from the ClickHouse type name. Instances are cached
57 | :param name: ClickHouse type name as returned by ClickHouse in WithNamesAndTypes FORMAT or the Native protocol
58 | :return: The instance of the ClickHouse Type
59 | """
60 | ch_type = type_cache.get(name, None)
61 | if not ch_type:
62 | base, name, type_def = parse_name(name)
63 | try:
64 | ch_type = type_map[base].build(type_def)
65 | except KeyError:
66 | err_str = f'Unrecognized ClickHouse type base: {base} name: {name}'
67 | logger.error(err_str)
68 | raise InternalError(err_str) from None
69 | type_cache[name] = ch_type
70 | return ch_type
71 |
--------------------------------------------------------------------------------
/clickhouse_connect/datatypes/special.py:
--------------------------------------------------------------------------------
1 | from typing import Union, Sequence, MutableSequence, Any
2 | from uuid import UUID as PYUUID
3 |
4 | from clickhouse_connect.datatypes.base import TypeDef, ClickHouseType, ArrayType, UnsupportedType
5 | from clickhouse_connect.datatypes.registry import get_from_name
6 | from clickhouse_connect.driver.common import first_value
7 | from clickhouse_connect.driver.ctypes import data_conv
8 | from clickhouse_connect.driver.insert import InsertContext
9 | from clickhouse_connect.driver.query import QueryContext
10 | from clickhouse_connect.driver.types import ByteSource
11 |
12 | empty_uuid_b = bytes(b'\x00' * 16)
13 |
14 |
15 | class UUID(ClickHouseType):
16 | valid_formats = 'string', 'native'
17 | np_type = 'U36'
18 | byte_size = 16
19 |
20 | def python_null(self, ctx):
21 | return '' if self.read_format(ctx) == 'string' else PYUUID(int=0)
22 |
23 | def _read_column_binary(self, source: ByteSource, num_rows: int, ctx: QueryContext, _read_state: Any):
24 | if self.read_format(ctx) == 'string':
25 | return self._read_binary_str(source, num_rows)
26 | return data_conv.read_uuid_col(source, num_rows)
27 |
28 | @staticmethod
29 | def _read_binary_str(source: ByteSource, num_rows: int):
30 | v = source.read_array('Q', num_rows * 2)
31 | column = []
32 | app = column.append
33 | for i in range(num_rows):
34 | ix = i << 1
35 | x = f'{(v[ix] << 64 | v[ix + 1]):032x}'
36 | app(f'{x[:8]}-{x[8:12]}-{x[12:16]}-{x[16:20]}-{x[20:]}')
37 | return column
38 |
39 | # pylint: disable=too-many-branches
40 | def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, ctx: InsertContext):
41 | first = first_value(column, self.nullable)
42 | empty = empty_uuid_b
43 | if isinstance(first, str) or self.write_format(ctx) == 'string':
44 | for v in column:
45 | if v:
46 | x = int(v.replace('-', ''), 16)
47 | dest += (x >> 64).to_bytes(8, 'little') + (x & 0xffffffffffffffff).to_bytes(8, 'little')
48 | else:
49 | dest += empty
50 | elif isinstance(first, int):
51 | for x in column:
52 | if x:
53 | dest += (x >> 64).to_bytes(8, 'little') + (x & 0xffffffffffffffff).to_bytes(8, 'little')
54 | else:
55 | dest += empty
56 | elif isinstance(first, PYUUID):
57 | for v in column:
58 | if v:
59 | x = v.int
60 | dest += (x >> 64).to_bytes(8, 'little') + (x & 0xffffffffffffffff).to_bytes(8, 'little')
61 | else:
62 | dest += empty
63 | elif isinstance(first, (bytes, bytearray, memoryview)):
64 | for v in column:
65 | if v:
66 | dest += bytes(reversed(v[:8])) + bytes(reversed(v[8:]))
67 | else:
68 | dest += empty
69 | else:
70 | dest += empty * len(column)
71 |
72 |
73 | class Nothing(ArrayType):
74 | _array_type = 'b'
75 |
76 | def __init__(self, type_def: TypeDef):
77 | super().__init__(type_def)
78 | self.nullable = True
79 |
80 | def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, _ctx):
81 | dest += bytes(0x30 for _ in range(len(column)))
82 |
83 |
84 | class SimpleAggregateFunction(ClickHouseType):
85 | _slots = ('element_type',)
86 |
87 | def __init__(self, type_def: TypeDef):
88 | super().__init__(type_def)
89 | self.element_type: ClickHouseType = get_from_name(type_def.values[1])
90 | self._name_suffix = type_def.arg_str
91 | self.byte_size = self.element_type.byte_size
92 | self.np_type = self.element_type.np_type
93 | self.python_type = self.element_type.python_type
94 | self.nano_divisor = self.element_type.nano_divisor
95 |
96 | def _data_size(self, sample: Sequence) -> int:
97 | return self.element_type.data_size(sample)
98 |
99 | def read_column_prefix(self, source: ByteSource, ctx: QueryContext):
100 | return self.element_type.read_column_prefix(source, ctx)
101 |
102 | def write_column_prefix(self, dest: bytearray):
103 | self.element_type.write_column_prefix(dest)
104 |
105 | def _read_column_binary(self, source: ByteSource, num_rows: int, ctx: QueryContext, read_state: Any):
106 | return self.element_type.read_column_data(source, num_rows, ctx, read_state)
107 |
108 | def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, ctx: InsertContext):
109 | self.element_type.write_column_data(column, dest, ctx)
110 |
111 |
112 | class AggregateFunction(UnsupportedType):
113 | pass
114 |
--------------------------------------------------------------------------------
/clickhouse_connect/dbapi/__init__.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from clickhouse_connect.dbapi.connection import Connection
4 |
5 |
6 | apilevel = '2.0' # PEP 249 DB API level
7 | threadsafety = 2 # PEP 249 Threads may share the module and connections.
8 | paramstyle = 'pyformat' # PEP 249 Python extended format codes, e.g. ...WHERE name=%(name)s
9 |
10 |
11 | class Error(Exception):
12 | pass
13 |
14 |
15 | def connect(host: Optional[str] = None,
16 | database: Optional[str] = None,
17 | username: Optional[str] = '',
18 | password: Optional[str] = '',
19 | port: Optional[int] = None,
20 | **kwargs):
21 | secure = kwargs.pop('secure', False)
22 | return Connection(host=host,
23 | database=database,
24 | username=username,
25 | password=password,
26 | port=port,
27 | secure=secure,
28 | **kwargs)
29 |
--------------------------------------------------------------------------------
/clickhouse_connect/dbapi/connection.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 |
3 | from clickhouse_connect.dbapi.cursor import Cursor
4 | from clickhouse_connect.driver import create_client
5 | from clickhouse_connect.driver.query import QueryResult
6 |
7 |
8 | class Connection:
9 | """
10 | See :ref:`https://peps.python.org/pep-0249/`
11 | """
12 | # pylint: disable=too-many-arguments
13 | def __init__(self,
14 | dsn: str = None,
15 | username: str = '',
16 | password: str = '',
17 | host: str = None,
18 | database: str = None,
19 | interface: str = None,
20 | port: int = 0,
21 | secure: Union[bool, str] = False,
22 | **kwargs):
23 | self.client = create_client(host=host,
24 | username=username,
25 | password=password,
26 | database=database,
27 | interface=interface,
28 | port=port,
29 | secure=secure,
30 | dsn=dsn,
31 | generic_args=kwargs)
32 | self.timezone = self.client.server_tz
33 |
34 | def close(self):
35 | self.client.close()
36 |
37 | def commit(self):
38 | pass
39 |
40 | def rollback(self):
41 | pass
42 |
43 | def command(self, cmd: str):
44 | return self.client.command(cmd)
45 |
46 | def raw_query(self, query: str) -> QueryResult:
47 | return self.client.query(query)
48 |
49 | def cursor(self):
50 | return Cursor(self.client)
51 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/buffer.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import array
3 | from typing import Any, Iterable
4 |
5 | from clickhouse_connect.driver.exceptions import StreamCompleteException
6 | from clickhouse_connect.driver.types import ByteSource
7 |
8 | must_swap = sys.byteorder == 'big'
9 |
10 |
11 | class ResponseBuffer(ByteSource):
12 | slots = 'slice_sz', 'buf_loc', 'end', 'gen', 'buffer', 'slice'
13 |
14 | def __init__(self, source):
15 | self.slice_sz = 4096
16 | self.buf_loc = 0
17 | self.buf_sz = 0
18 | self.source = source
19 | self.gen = source.gen
20 | self.buffer = bytes()
21 |
22 | def read_bytes(self, sz: int):
23 | if self.buf_loc + sz <= self.buf_sz:
24 | self.buf_loc += sz
25 | return self.buffer[self.buf_loc - sz: self.buf_loc]
26 | # Create a temporary buffer that bridges two or more source chunks
27 | bridge = bytearray(self.buffer[self.buf_loc: self.buf_sz])
28 | self.buf_loc = 0
29 | self.buf_sz = 0
30 | while len(bridge) < sz:
31 | chunk = next(self.gen, None)
32 | if not chunk:
33 | raise StreamCompleteException
34 | x = len(chunk)
35 | if len(bridge) + x <= sz:
36 | bridge.extend(chunk)
37 | else:
38 | tail = sz - len(bridge)
39 | bridge.extend(chunk[:tail])
40 | self.buffer = chunk
41 | self.buf_sz = x
42 | self.buf_loc = tail
43 | return bridge
44 |
45 | def read_byte(self) -> int:
46 | if self.buf_loc < self.buf_sz:
47 | self.buf_loc += 1
48 | return self.buffer[self.buf_loc - 1]
49 | self.buf_sz = 0
50 | self.buf_loc = 0
51 | chunk = next(self.gen, None)
52 | if not chunk:
53 | raise StreamCompleteException
54 | x = len(chunk)
55 | if x > 1:
56 | self.buffer = chunk
57 | self.buf_loc = 1
58 | self.buf_sz = x
59 | return chunk[0]
60 |
61 | def read_leb128(self) -> int:
62 | sz = 0
63 | shift = 0
64 | while True:
65 | b = self.read_byte()
66 | sz += ((b & 0x7f) << shift)
67 | if (b & 0x80) == 0:
68 | return sz
69 | shift += 7
70 |
71 | def read_leb128_str(self) -> str:
72 | sz = self.read_leb128()
73 | return self.read_bytes(sz).decode()
74 |
75 | def read_uint64(self) -> int:
76 | return int.from_bytes(self.read_bytes(8), 'little', signed=False)
77 |
78 | def read_str_col(self,
79 | num_rows: int,
80 | encoding: str,
81 | nullable: bool = False,
82 | null_obj: Any = None) -> Iterable[str]:
83 | column = []
84 | app = column.append
85 | null_map = self.read_bytes(num_rows) if nullable else None
86 | for ix in range(num_rows):
87 | sz = 0
88 | shift = 0
89 | while True:
90 | b = self.read_byte()
91 | sz += ((b & 0x7f) << shift)
92 | if (b & 0x80) == 0:
93 | break
94 | shift += 7
95 | x = self.read_bytes(sz)
96 | if null_map and null_map[ix]:
97 | app(null_obj)
98 | elif encoding:
99 | try:
100 | app(x.decode(encoding))
101 | except UnicodeDecodeError:
102 | app(x.hex())
103 | else:
104 | app(x)
105 | return column
106 |
107 | def read_bytes_col(self, sz: int, num_rows: int) -> Iterable[bytes]:
108 | source = self.read_bytes(sz * num_rows)
109 | return [bytes(source[x:x+sz]) for x in range(0, sz * num_rows, sz)]
110 |
111 | def read_fixed_str_col(self, sz: int, num_rows: int, encoding: str) -> Iterable[str]:
112 | source = self.read_bytes(sz * num_rows)
113 | column = []
114 | app = column.append
115 | for ix in range(0, sz * num_rows, sz):
116 | try:
117 | app(str(source[ix: ix + sz], encoding).rstrip('\x00'))
118 | except UnicodeDecodeError:
119 | app(source[ix: ix + sz].hex())
120 | return column
121 |
122 | def read_array(self, array_type: str, num_rows: int) -> Iterable[Any]:
123 | column = array.array(array_type)
124 | sz = column.itemsize * num_rows
125 | b = self.read_bytes(sz)
126 | column.frombytes(b)
127 | if must_swap:
128 | column.byteswap()
129 | return column
130 |
131 | @property
132 | def last_message(self) -> bytes:
133 | return self.buffer
134 |
135 | def close(self):
136 | if self.source:
137 | self.source.close()
138 | self.source = None
139 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/compression.py:
--------------------------------------------------------------------------------
1 | import zlib
2 | from abc import abstractmethod
3 | from typing import Union
4 |
5 | import lz4
6 | import lz4.frame
7 | import zstandard
8 |
9 | try:
10 | import brotli
11 | except ImportError:
12 | brotli = None
13 |
14 |
15 | available_compression = ['lz4', 'zstd']
16 |
17 | if brotli:
18 | available_compression.append('br')
19 | available_compression.extend(['gzip', 'deflate'])
20 |
21 | comp_map = {}
22 |
23 |
24 | class Compressor:
25 | def __init_subclass__(cls, tag: str, thread_safe: bool = True):
26 | comp_map[tag] = cls() if thread_safe else cls
27 |
28 | @abstractmethod
29 | def compress_block(self, block) -> Union[bytes, bytearray]:
30 | return block
31 |
32 | def flush(self):
33 | pass
34 |
35 |
36 | class GzipCompressor(Compressor, tag='gzip', thread_safe=False):
37 | def __init__(self, level: int = 6, wbits: int = 31):
38 | self.zlib_obj = zlib.compressobj(level=level, wbits=wbits)
39 |
40 | def compress_block(self, block):
41 | return self.zlib_obj.compress(block)
42 |
43 | def flush(self):
44 | return self.zlib_obj.flush()
45 |
46 |
47 | class Lz4Compressor(Compressor, tag='lz4', thread_safe=False):
48 | def __init__(self):
49 | self.comp = lz4.frame.LZ4FrameCompressor()
50 |
51 | def compress_block(self, block):
52 | output = self.comp.begin(len(block))
53 | output += self.comp.compress(block)
54 | return output + self.comp.flush()
55 |
56 |
57 | class ZstdCompressor(Compressor, tag='zstd'):
58 | def compress_block(self, block):
59 | return zstandard.compress(block)
60 |
61 |
62 | class BrotliCompressor(Compressor, tag='br'):
63 | def compress_block(self, block):
64 | return brotli.compress(block)
65 |
66 |
67 | null_compressor = Compressor()
68 |
69 |
70 | def get_compressor(compression: str) -> Compressor:
71 | if not compression:
72 | return null_compressor
73 | comp = comp_map[compression]
74 | try:
75 | return comp()
76 | except TypeError:
77 | return comp
78 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/constants.py:
--------------------------------------------------------------------------------
1 | PROTOCOL_VERSION_WITH_LOW_CARD = 54405
2 | CH_VERSION_WITH_PROTOCOL = '23.2.1.2537'
3 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/context.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import re
3 | from typing import Optional, Dict, Union, Any
4 |
5 | logger = logging.getLogger(__name__)
6 |
7 | _empty_map = {}
8 |
9 |
10 | # pylint: disable=too-many-instance-attributes
11 | class BaseQueryContext:
12 |
13 | def __init__(self,
14 | settings: Optional[Dict[str, Any]] = None,
15 | query_formats: Optional[Dict[str, str]] = None,
16 | column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
17 | encoding: Optional[str] = None,
18 | use_extended_dtypes: bool = False,
19 | use_numpy: bool = False,
20 | transport_settings: Optional[Dict[str, str]] = None):
21 | self.settings = settings or {}
22 | if query_formats is None:
23 | self.type_formats = _empty_map
24 | else:
25 | self.type_formats = {re.compile(type_name.replace('*', '.*'), re.IGNORECASE): fmt
26 | for type_name, fmt in query_formats.items()}
27 | if column_formats is None:
28 | self.col_simple_formats = _empty_map
29 | self.col_type_formats = _empty_map
30 | else:
31 | self.col_simple_formats = {col_name: fmt for col_name, fmt in column_formats.items() if
32 | isinstance(fmt, str)}
33 | self.col_type_formats = {}
34 | for col_name, fmt in column_formats.items():
35 | if not isinstance(fmt, str):
36 | self.col_type_formats[col_name] = {re.compile(type_name.replace('*', '.*'), re.IGNORECASE): fmt
37 | for type_name, fmt in fmt.items()}
38 | self.query_formats = query_formats or {}
39 | self.column_formats = column_formats or {}
40 | self.transport_settings = transport_settings
41 | self.column_name = None
42 | self.encoding = encoding
43 | self.use_numpy = use_numpy
44 | self.use_extended_dtypes = use_extended_dtypes
45 | self._active_col_fmt = None
46 | self._active_col_type_fmts = _empty_map
47 |
48 | def start_column(self, name: str):
49 | self.column_name = name
50 | self._active_col_fmt = self.col_simple_formats.get(name)
51 | self._active_col_type_fmts = self.col_type_formats.get(name, _empty_map)
52 |
53 | def active_fmt(self, ch_type):
54 | if self._active_col_fmt:
55 | return self._active_col_fmt
56 | for type_pattern, fmt in self._active_col_type_fmts.items():
57 | if type_pattern.match(ch_type):
58 | return fmt
59 | for type_pattern, fmt in self.type_formats.items():
60 | if type_pattern.match(ch_type):
61 | return fmt
62 | return None
63 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/ctypes.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 |
4 | import clickhouse_connect.driver.dataconv as pydc
5 | import clickhouse_connect.driver.npconv as pync
6 | from clickhouse_connect.driver.buffer import ResponseBuffer
7 | from clickhouse_connect.driver.common import coerce_bool
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 | RespBuffCls = ResponseBuffer
12 | data_conv = pydc
13 | numpy_conv = pync
14 |
15 |
16 | # pylint: disable=import-outside-toplevel,global-statement
17 |
18 | def connect_c_modules():
19 | if not coerce_bool(os.environ.get('CLICKHOUSE_CONNECT_USE_C', True)):
20 | logger.info('ClickHouse Connect C optimizations disabled')
21 | return
22 |
23 | global RespBuffCls, data_conv
24 | try:
25 | from clickhouse_connect.driverc.buffer import ResponseBuffer as CResponseBuffer
26 | import clickhouse_connect.driverc.dataconv as cdc
27 |
28 | data_conv = cdc
29 | RespBuffCls = CResponseBuffer
30 | logger.debug('Successfully imported ClickHouse Connect C data optimizations')
31 | connect_numpy()
32 | except ImportError as ex:
33 | logger.warning('Unable to connect optimized C data functions [%s], falling back to pure Python',
34 | str(ex))
35 |
36 |
37 | def connect_numpy():
38 | global numpy_conv
39 | try:
40 | import clickhouse_connect.driverc.npconv as cnc
41 |
42 | numpy_conv = cnc
43 | logger.debug('Successfully import ClickHouse Connect C/Numpy optimizations')
44 | except ImportError as ex:
45 | logger.debug('Unable to connect ClickHouse Connect C to Numpy API [%s], falling back to pure Python',
46 | str(ex))
47 |
48 |
49 | # connect_c_modules()
50 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/dataconv.py:
--------------------------------------------------------------------------------
1 | import array
2 | from datetime import datetime, date, tzinfo
3 | from ipaddress import IPv4Address
4 | from typing import Sequence, Optional, Any
5 | from uuid import UUID, SafeUUID
6 |
7 | from clickhouse_connect.driver.common import int_size
8 | from clickhouse_connect.driver.errors import NONE_IN_NULLABLE_COLUMN
9 | from clickhouse_connect.driver.types import ByteSource
10 | from clickhouse_connect.driver.options import np
11 |
12 |
13 | MONTH_DAYS = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365)
14 | MONTH_DAYS_LEAP = (0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366)
15 |
16 |
17 | def read_ipv4_col(source: ByteSource, num_rows: int):
18 | column = source.read_array('I', num_rows)
19 | fast_ip_v4 = IPv4Address.__new__
20 | new_col = []
21 | app = new_col.append
22 | for x in column:
23 | ipv4 = fast_ip_v4(IPv4Address)
24 | ipv4._ip = x # pylint: disable=protected-access
25 | app(ipv4)
26 | return new_col
27 |
28 |
29 | def read_datetime_col(source: ByteSource, num_rows: int, tz_info: Optional[tzinfo]):
30 | src_array = source.read_array('I', num_rows)
31 | if tz_info is None:
32 | fts = datetime.utcfromtimestamp
33 | return [fts(ts) for ts in src_array]
34 | fts = datetime.fromtimestamp
35 | return [fts(ts, tz_info) for ts in src_array]
36 |
37 |
38 | def epoch_days_to_date(days: int) -> date:
39 | cycles400, rem = divmod(days + 134774, 146097)
40 | cycles100, rem = divmod(rem, 36524)
41 | cycles, rem = divmod(rem, 1461)
42 | years, rem = divmod(rem, 365)
43 | year = (cycles << 2) + cycles400 * 400 + cycles100 * 100 + years + 1601
44 | if years == 4 or cycles100 == 4:
45 | return date(year - 1, 12, 31)
46 | m_list = MONTH_DAYS_LEAP if years == 3 and (year == 2000 or year % 100 != 0) else MONTH_DAYS
47 | month = (rem + 24) >> 5
48 | while rem < m_list[month]:
49 | month -= 1
50 | return date(year, month + 1, rem + 1 - m_list[month])
51 |
52 |
53 | def read_date_col(source: ByteSource, num_rows: int):
54 | column = source.read_array('H', num_rows)
55 | return [epoch_days_to_date(x) for x in column]
56 |
57 |
58 | def read_date32_col(source: ByteSource, num_rows: int):
59 | column = source.read_array('l' if int_size == 2 else 'i', num_rows)
60 | return [epoch_days_to_date(x) for x in column]
61 |
62 |
63 | def read_uuid_col(source: ByteSource, num_rows: int):
64 | v = source.read_array('Q', num_rows * 2)
65 | empty_uuid = UUID(int=0)
66 | new_uuid = UUID.__new__
67 | unsafe = SafeUUID.unsafe
68 | oset = object.__setattr__
69 | column = []
70 | app = column.append
71 | for i in range(num_rows):
72 | ix = i << 1
73 | int_value = v[ix] << 64 | v[ix + 1]
74 | if int_value == 0:
75 | app(empty_uuid)
76 | else:
77 | fast_uuid = new_uuid(UUID)
78 | oset(fast_uuid, 'int', int_value)
79 | oset(fast_uuid, 'is_safe', unsafe)
80 | app(fast_uuid)
81 | return column
82 |
83 |
84 | def read_nullable_array(source: ByteSource, array_type: str, num_rows: int, null_obj: Any):
85 | null_map = source.read_bytes(num_rows)
86 | column = source.read_array(array_type, num_rows)
87 | return [null_obj if null_map[ix] else column[ix] for ix in range(num_rows)]
88 |
89 |
90 | def build_nullable_column(source: Sequence, null_map: bytes, null_obj: Any):
91 | return [source[ix] if null_map[ix] == 0 else null_obj for ix in range(len(source))]
92 |
93 |
94 | def build_lc_nullable_column(index: Sequence, keys: array.array, null_obj: Any):
95 | column = []
96 | for key in keys:
97 | if key == 0:
98 | column.append(null_obj)
99 | else:
100 | column.append(index[key])
101 | return column
102 |
103 |
104 | def to_numpy_array(column: Sequence):
105 | arr = np.empty((len(column),), dtype=np.object)
106 | arr[:] = column
107 | return arr
108 |
109 |
110 | def pivot(data: Sequence[Sequence], start_row: int, end_row: int) -> Sequence[Sequence]:
111 | return tuple(zip(*data[start_row: end_row]))
112 |
113 |
114 | def write_str_col(column: Sequence, nullable: bool, encoding: Optional[str], dest: bytearray) -> int:
115 | app = dest.append
116 | for x in column:
117 | if not x:
118 | if not nullable and x is None:
119 | return NONE_IN_NULLABLE_COLUMN
120 | app(0)
121 | else:
122 | if encoding:
123 | x = x.encode(encoding)
124 | else:
125 | x = bytes(x)
126 | sz = len(x)
127 | while True:
128 | b = sz & 0x7f
129 | sz >>= 7
130 | if sz == 0:
131 | app(b)
132 | break
133 | app(0x80 | b)
134 | dest += x
135 | return 0
136 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/ddl.py:
--------------------------------------------------------------------------------
1 | from typing import NamedTuple, Sequence
2 |
3 | from clickhouse_connect.datatypes.base import ClickHouseType
4 |
5 |
6 | class TableColumnDef(NamedTuple):
7 | """
8 | Simplified ClickHouse Table Column definition for DDL
9 | """
10 | name: str
11 | ch_type: ClickHouseType
12 | expr_type: str = None
13 | expr: str = None
14 |
15 | @property
16 | def col_expr(self):
17 | expr = f'{self.name} {self.ch_type.name}'
18 | if self.expr_type:
19 | expr += f' {self.expr_type} {self.expr}'
20 | return expr
21 |
22 |
23 | def create_table(table_name: str, columns: Sequence[TableColumnDef], engine: str, engine_params: dict):
24 | stmt = f"CREATE TABLE {table_name} ({', '.join(col.col_expr for col in columns)}) ENGINE {engine} "
25 | if engine_params:
26 | for key, value in engine_params.items():
27 | stmt += f' {key} {value}'
28 | return stmt
29 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/errors.py:
--------------------------------------------------------------------------------
1 | from clickhouse_connect.driver.context import BaseQueryContext
2 | from clickhouse_connect.driver.exceptions import DataError
3 |
4 |
5 | # Error codes used in the Cython API
6 | NO_ERROR = 0
7 | NONE_IN_NULLABLE_COLUMN = 1
8 |
9 | error_messages = {NONE_IN_NULLABLE_COLUMN: 'Invalid None value in non-Nullable column'}
10 |
11 |
12 | def handle_error(error_num: int, ctx: BaseQueryContext):
13 | if error_num > 0:
14 | msg = error_messages[error_num]
15 | if ctx.column_name:
16 | msg = f'{msg}, column name: `{ctx.column_name}`'
17 | raise DataError(msg)
18 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/exceptions.py:
--------------------------------------------------------------------------------
1 | """
2 | The driver exception classes here include all named exceptions required by th DB API 2.0 specification. It's not clear
3 | how useful that naming convention is, but the convention is used for potential improved compatibility with other
4 | libraries. In most cases docstring are taken from the DBIApi 2.0 documentation
5 | """
6 |
7 |
8 | class ClickHouseError(Exception):
9 | """Exception related to operation with ClickHouse."""
10 |
11 |
12 | # pylint: disable=redefined-builtin
13 | class Warning(Warning, ClickHouseError):
14 | """Exception raised for important warnings like data truncations
15 | while inserting, etc."""
16 |
17 |
18 | class Error(ClickHouseError):
19 | """Exception that is the base class of all other error exceptions
20 | (not Warning)."""
21 |
22 |
23 | class InterfaceError(Error):
24 | """Exception raised for errors that are related to the database
25 | interface rather than the database itself."""
26 |
27 |
28 | class DatabaseError(Error):
29 | """Exception raised for errors that are related to the
30 | database."""
31 |
32 |
33 | class DataError(DatabaseError):
34 | """Exception raised for errors that are due to problems with the
35 | processed data like division by zero, numeric value out of range,
36 | etc."""
37 |
38 |
39 | class OperationalError(DatabaseError):
40 | """Exception raised for errors that are related to the database's
41 | operation and not necessarily under the control of the programmer,
42 | e.g. an unexpected disconnect occurs, the data source name is not
43 | found, a transaction could not be processed, a memory allocation
44 | error occurred during processing, etc."""
45 |
46 |
47 | class IntegrityError(DatabaseError):
48 | """Exception raised when the relational integrity of the database
49 | is affected, e.g. a foreign key check fails, duplicate key,
50 | etc."""
51 |
52 |
53 | class InternalError(DatabaseError):
54 | """Exception raised when the database encounters an internal
55 | error, e.g. the cursor is not valid anymore, the transaction is
56 | out of sync, etc."""
57 |
58 |
59 | class ProgrammingError(DatabaseError):
60 | """Exception raised for programming errors, e.g. table not found
61 | or already exists, syntax error in the SQL statement, wrong number
62 | of parameters specified, etc."""
63 |
64 |
65 | class NotSupportedError(DatabaseError):
66 | """Exception raised in case a method or database API was used
67 | which is not supported by the database, e.g. requesting a
68 | .rollback() on a connection that does not support transaction or
69 | has transactions turned off."""
70 |
71 |
72 | class StreamClosedError(ProgrammingError):
73 | """Exception raised when a stream operation is executed on a closed stream."""
74 |
75 | def __init__(self):
76 | super().__init__('Executing a streaming operation on a closed stream')
77 |
78 |
79 | class StreamCompleteException(Exception):
80 | """ Internal exception used to indicate the end of a ClickHouse query result stream."""
81 |
82 |
83 | class StreamFailureError(Exception):
84 | """ Stream failed unexpectedly """
85 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/models.py:
--------------------------------------------------------------------------------
1 | from typing import NamedTuple
2 |
3 | from clickhouse_connect.datatypes.registry import get_from_name
4 |
5 |
6 | class ColumnDef(NamedTuple):
7 | """
8 | ClickHouse column definition from DESCRIBE TABLE command
9 | """
10 | name: str
11 | type: str
12 | default_type: str
13 | default_expression: str
14 | comment: str
15 | codec_expression: str
16 | ttl_expression: str
17 |
18 | @property
19 | def type_name(self):
20 | return self.type.replace('\n', '').strip()
21 |
22 | @property
23 | def ch_type(self):
24 | return get_from_name(self.type_name)
25 |
26 |
27 | class SettingDef(NamedTuple):
28 | """
29 | ClickHouse setting definition from system.settings table
30 | """
31 | name: str
32 | value: str
33 | readonly: int
34 |
35 |
36 | class SettingStatus(NamedTuple):
37 | """
38 | Get the setting "status" from a ClickHouse server setting
39 | """
40 | is_set: bool
41 | is_writable: bool
42 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/npconv.py:
--------------------------------------------------------------------------------
1 | from clickhouse_connect.driver.options import np
2 |
3 | from clickhouse_connect.driver.types import ByteSource
4 |
5 |
6 | def read_numpy_array(source: ByteSource, np_type: str, num_rows: int):
7 | dtype = np.dtype(np_type)
8 | buffer = source.read_bytes(dtype.itemsize * num_rows)
9 | return np.frombuffer(buffer, dtype, num_rows)
10 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/npquery.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import itertools
3 | from typing import Generator, Sequence, Tuple
4 |
5 | from clickhouse_connect.driver.common import empty_gen, StreamContext
6 | from clickhouse_connect.driver.exceptions import StreamClosedError
7 | from clickhouse_connect.driver.types import Closable
8 | from clickhouse_connect.driver.options import np, pd
9 |
10 | logger = logging.getLogger(__name__)
11 |
12 |
13 | # pylint: disable=too-many-instance-attributes
14 | class NumpyResult(Closable):
15 | def __init__(self,
16 | block_gen: Generator[Sequence, None, None] = None,
17 | column_names: Tuple = (),
18 | column_types: Tuple = (),
19 | d_types: Sequence = (),
20 | source: Closable = None):
21 | self.column_names = column_names
22 | self.column_types = column_types
23 | self.np_types = d_types
24 | self.source = source
25 | self.query_id = ''
26 | self.summary = {}
27 | self._block_gen = block_gen or empty_gen()
28 | self._numpy_result = None
29 | self._df_result = None
30 |
31 | def _np_stream(self) -> Generator:
32 | if self._block_gen is None:
33 | raise StreamClosedError
34 |
35 | block_gen = self._block_gen
36 | self._block_gen = None
37 | if not self.np_types:
38 | return block_gen
39 |
40 | d_types = self.np_types
41 | first_type = d_types[0]
42 | if first_type != np.object_ and all(np.dtype(np_type) == first_type for np_type in d_types):
43 | self.np_types = first_type
44 |
45 | def numpy_blocks():
46 | for block in block_gen:
47 | yield np.array(block, first_type).transpose()
48 | else:
49 | if any(x == np.object_ for x in d_types):
50 | self.np_types = [np.object_] * len(self.np_types)
51 | self.np_types = np.dtype(list(zip(self.column_names, d_types)))
52 |
53 | def numpy_blocks():
54 | for block in block_gen:
55 | np_array = np.empty(len(block[0]), dtype=self.np_types)
56 | for col_name, data in zip(self.column_names, block):
57 | np_array[col_name] = data
58 | yield np_array
59 |
60 | return numpy_blocks()
61 |
62 | def _df_stream(self) -> Generator:
63 | if self._block_gen is None:
64 | raise StreamClosedError
65 | block_gen = self._block_gen
66 |
67 | def pd_blocks():
68 | for block in block_gen:
69 | yield pd.DataFrame(dict(zip(self.column_names, block)))
70 |
71 | self._block_gen = None
72 | return pd_blocks()
73 |
74 | def close_numpy(self):
75 | if not self._block_gen:
76 | raise StreamClosedError
77 | chunk_size = 4
78 | pieces = []
79 | blocks = []
80 | for block in self._np_stream():
81 | blocks.append(block)
82 | if len(blocks) == chunk_size:
83 | pieces.append(np.concatenate(blocks, dtype=self.np_types))
84 | chunk_size *= 2
85 | blocks = []
86 | pieces.extend(blocks)
87 | if len(pieces) > 1:
88 | self._numpy_result = np.concatenate(pieces, dtype=self.np_types)
89 | elif len(pieces) == 1:
90 | self._numpy_result = pieces[0]
91 | else:
92 | self._numpy_result = np.empty((0,))
93 | self.close()
94 | return self
95 |
96 | def close_df(self):
97 | if self._block_gen is None:
98 | raise StreamClosedError
99 | bg = self._block_gen
100 | chain = itertools.chain
101 | chains = [chain(b) for b in zip(*bg)]
102 | new_df_series = []
103 | for c in chains:
104 | series = [pd.Series(piece, copy=False) for piece in c if len(piece) > 0]
105 | if len(series) > 0:
106 | new_df_series.append(pd.concat(series, copy=False, ignore_index=True))
107 | self._df_result = pd.DataFrame(dict(zip(self.column_names, new_df_series)))
108 | self.close()
109 | return self
110 |
111 | @property
112 | def np_result(self):
113 | if self._numpy_result is None:
114 | self.close_numpy()
115 | return self._numpy_result
116 |
117 | @property
118 | def df_result(self):
119 | if self._df_result is None:
120 | self.close_df()
121 | return self._df_result
122 |
123 | @property
124 | def np_stream(self) -> StreamContext:
125 | return StreamContext(self, self._np_stream())
126 |
127 | @property
128 | def df_stream(self) -> StreamContext:
129 | return StreamContext(self, self._df_stream())
130 |
131 | def close(self):
132 | if self._block_gen is not None:
133 | self._block_gen.close()
134 | self._block_gen = None
135 | if self.source:
136 | self.source.close()
137 | self.source = None
138 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/options.py:
--------------------------------------------------------------------------------
1 | from clickhouse_connect.driver.exceptions import NotSupportedError
2 |
3 | pd_time_test = None
4 | pd_extended_dtypes = False
5 |
6 | try:
7 | import numpy as np
8 | except ImportError:
9 | np = None
10 |
11 | try:
12 | import pandas as pd
13 | pd_extended_dtypes = not pd.__version__.startswith('0')
14 | try:
15 | from pandas.core.dtypes.common import is_datetime64_dtype
16 | from pandas.core.dtypes.common import is_timedelta64_dtype
17 |
18 | def combined_test(arr_or_dtype):
19 | return is_datetime64_dtype(arr_or_dtype) or is_timedelta64_dtype(arr_or_dtype)
20 |
21 | pd_time_test = combined_test
22 | except ImportError:
23 | try:
24 | from pandas.core.dtypes.common import is_datetime_or_timedelta_dtype
25 | pd_time_test = is_datetime_or_timedelta_dtype
26 | except ImportError as ex:
27 | raise NotSupportedError('pandas version does not contain expected test for temporal types') from ex
28 | except ImportError:
29 | pd = None
30 |
31 | try:
32 | import pyarrow as arrow
33 | except ImportError:
34 | arrow = None
35 |
36 |
37 | def check_numpy():
38 | if np:
39 | return np
40 | raise NotSupportedError('Numpy package is not installed')
41 |
42 |
43 | def check_pandas():
44 | if pd:
45 | return pd
46 | raise NotSupportedError('Pandas package is not installed')
47 |
48 |
49 | def check_arrow():
50 | if arrow:
51 | return arrow
52 | raise NotSupportedError('PyArrow package is not installed')
53 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/summary.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from clickhouse_connect.datatypes.registry import get_from_name
4 |
5 | from clickhouse_connect.driver.query import QueryResult
6 |
7 |
8 | class QuerySummary:
9 | summary = {}
10 |
11 | def __init__(self, summary: Optional[dict] = None):
12 | if summary is not None:
13 | self.summary = summary
14 |
15 | @property
16 | def written_rows(self) -> int:
17 | return int(self.summary.get('written_rows', 0))
18 |
19 | def written_bytes(self) -> int:
20 | return int(self.summary.get('written_bytes', 0))
21 |
22 | def query_id(self) -> str:
23 | return self.summary.get('query_id', '')
24 |
25 | def as_query_result(self) -> QueryResult:
26 | data = []
27 | column_names = []
28 | column_types = []
29 | str_type = get_from_name('String')
30 | int_type = get_from_name('Int64')
31 | for key, value in self.summary.items():
32 | column_names.append(key)
33 | if value.isnumeric():
34 | data.append(int(value))
35 | column_types.append(int_type)
36 | else:
37 | data.append(value)
38 | column_types.append(str_type)
39 | return QueryResult([data], column_names=tuple(column_names), column_types=tuple(column_types))
40 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/tools.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Sequence, Dict, Any
2 |
3 | from clickhouse_connect.driver import Client
4 | from clickhouse_connect.driver.summary import QuerySummary
5 | from clickhouse_connect.driver.binding import quote_identifier
6 |
7 |
8 | def insert_file(client: Client,
9 | table: str,
10 | file_path: str,
11 | fmt: Optional[str] = None,
12 | column_names: Optional[Sequence[str]] = None,
13 | database: Optional[str] = None,
14 | settings: Optional[Dict[str, Any]] = None,
15 | compression: Optional[str] = None) -> QuerySummary:
16 | if not database and table[0] not in ('`', "'") and table.find('.') > 0:
17 | full_table = table
18 | elif database:
19 | full_table = f'{quote_identifier(database)}.{quote_identifier(table)}'
20 | else:
21 | full_table = quote_identifier(table)
22 | if not fmt:
23 | fmt = 'CSV' if column_names else 'CSVWithNames'
24 | if compression is None:
25 | if file_path.endswith('.gzip') or file_path.endswith('.gz'):
26 | compression = 'gzip'
27 | with open(file_path, 'rb') as file:
28 | return client.raw_insert(full_table,
29 | column_names=column_names,
30 | insert_block=file,
31 | fmt=fmt,
32 | settings=settings,
33 | compression=compression)
34 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/types.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from typing import Sequence, Any
3 |
4 | Matrix = Sequence[Sequence[Any]]
5 |
6 |
7 | class Closable(ABC):
8 | @abstractmethod
9 | def close(self):
10 | pass
11 |
12 |
13 | class ByteSource(Closable):
14 | last_message:bytes = None
15 |
16 | @abstractmethod
17 | def read_leb128(self) -> int:
18 | pass
19 |
20 | @abstractmethod
21 | def read_leb128_str(self) -> str:
22 | pass
23 |
24 | @abstractmethod
25 | def read_uint64(self) -> int:
26 | pass
27 |
28 | @abstractmethod
29 | def read_bytes(self, sz: int) -> bytes:
30 | pass
31 |
32 | @abstractmethod
33 | def read_str_col(self, num_rows: int, encoding: str, nullable: bool = False, null_obj: Any = None):
34 | pass
35 |
36 | @abstractmethod
37 | def read_bytes_col(self, sz: int, num_rows: int):
38 | pass
39 |
40 | @abstractmethod
41 | def read_fixed_str_col(self, sz: int, num_rows: int, encoding: str):
42 | pass
43 |
44 | @abstractmethod
45 | def read_array(self, array_type: str, num_rows: int):
46 | pass
47 |
48 | @abstractmethod
49 | def read_byte(self) -> int:
50 | pass
51 |
--------------------------------------------------------------------------------
/clickhouse_connect/driver/tzutil.py:
--------------------------------------------------------------------------------
1 | import os
2 | from datetime import datetime
3 | from typing import Tuple
4 |
5 | import pytz
6 |
7 | tzlocal = None
8 | try:
9 | import tzlocal # Maybe we can use the tzlocal module to get a safe timezone
10 | except ImportError:
11 | pass
12 |
13 | # Set the local timezone for DateTime conversions. Note in most cases we want to use either UTC or the server
14 | # timezone, but if someone insists on using the local timezone we will try to convert. The problem is we
15 | # never have anything but an epoch timestamp returned from ClickHouse, so attempts to convert times when the
16 | # local timezone is "DST" aware (like 'CEST' vs 'CET') will be wrong approximately half the time
17 | local_tz: pytz.timezone
18 | local_tz_dst_safe: bool = False
19 |
20 |
21 | def normalize_timezone(timezone: pytz.timezone) -> Tuple[pytz.timezone, bool]:
22 | if timezone.tzname(None) in ('UTC', 'GMT', 'Universal', 'GMT-0', 'Zulu', 'Greenwich'):
23 | return pytz.UTC, True
24 |
25 | if timezone.tzname(None) in pytz.common_timezones:
26 | return timezone, True
27 |
28 | if tzlocal is not None: # Maybe we can use the tzlocal module to get a safe timezone
29 | local_name = tzlocal.get_localzone_name()
30 | if local_name in pytz.common_timezones:
31 | return pytz.timezone(local_name), True
32 |
33 | return timezone, False
34 |
35 |
36 | try:
37 | local_tz = pytz.timezone(os.environ.get('TZ', ''))
38 | except pytz.UnknownTimeZoneError:
39 | local_tz = datetime.now().astimezone().tzinfo
40 |
41 | local_tz, local_tz_dst_safe = normalize_timezone(local_tz)
42 |
--------------------------------------------------------------------------------
/clickhouse_connect/driverc/.gitignore:
--------------------------------------------------------------------------------
1 | # Cython build output
2 | *.c
3 | *.so
--------------------------------------------------------------------------------
/clickhouse_connect/driverc/__init__.pxd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/clickhouse_connect/driverc/__init__.pxd
--------------------------------------------------------------------------------
/clickhouse_connect/driverc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/clickhouse_connect/driverc/__init__.py
--------------------------------------------------------------------------------
/clickhouse_connect/driverc/buffer.pxd:
--------------------------------------------------------------------------------
1 | cdef class ResponseBuffer:
2 | cdef:
3 | unsigned long long buf_loc, buf_sz, slice_sz
4 | signed long long slice_start
5 | object gen, source
6 | char* buffer
7 | char* slice
8 | unsigned char _read_byte_load(self) except ?255
9 | char* read_bytes_c(self, unsigned long long sz) except NULL
10 | Py_buffer buff_source
11 | cdef object _read_str_col(self, unsigned long long num_rows, char * encoding)
12 | cdef object _read_nullable_str_col(self, unsigned long long num_rows, char * encoding, object null_obj)
13 |
--------------------------------------------------------------------------------
/clickhouse_connect/driverc/npconv.pyx:
--------------------------------------------------------------------------------
1 | import cython
2 |
3 | import numpy as np
4 |
5 | from .buffer cimport ResponseBuffer
6 |
7 | @cython.boundscheck(False)
8 | @cython.wraparound(False)
9 | def read_numpy_array(ResponseBuffer buffer, np_type: str, unsigned long long num_rows):
10 | dtype = np.dtype(np_type)
11 | cdef sz = dtype.itemsize * num_rows
12 | cdef char * source = buffer.read_bytes_c(dtype.itemsize * num_rows)
13 | return np.frombuffer(source[:sz], dtype, num_rows)
14 |
--------------------------------------------------------------------------------
/clickhouse_connect/entry_points.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # This script is used for validating installed entrypoints. Note that it fails on Python 3.7
4 | import sys
5 |
6 | from importlib.metadata import PackageNotFoundError, distribution
7 |
8 | EXPECTED_EPS = {'sqlalchemy.dialects:clickhousedb',
9 | 'sqlalchemy.dialects:clickhousedb.connect'}
10 |
11 |
12 | def validate_entrypoints():
13 | expected_eps = EXPECTED_EPS.copy()
14 | try:
15 | dist = distribution('clickhouse-connect')
16 | except PackageNotFoundError:
17 | print ('\nClickHouse Connect package not found in this Python installation')
18 | return -1
19 | print()
20 | for entry_point in dist.entry_points:
21 | name = f'{entry_point.group}:{entry_point.name}'
22 | print(f' {name}={entry_point.value}')
23 | try:
24 | expected_eps.remove(name)
25 | except KeyError:
26 | print (f'\nUnexpected entry point {name} found')
27 | return -1
28 | if expected_eps:
29 | print()
30 | for name in expected_eps:
31 | print (f'Did not find expected ep {name}')
32 | return -1
33 | print ('\nEntrypoints correctly installed')
34 | return 0
35 |
36 |
37 | if __name__ == '__main__':
38 | sys.exit(validate_entrypoints())
39 |
--------------------------------------------------------------------------------
/clickhouse_connect/json_impl.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import json as py_json
3 | from collections import OrderedDict
4 | from typing import Any
5 |
6 | try:
7 | import orjson
8 | any_to_json = orjson.dumps # pylint: disable=no-member
9 | except ImportError:
10 | orjson = None
11 |
12 | try:
13 | import ujson
14 |
15 | def _ujson_to_json(obj: Any) -> bytes:
16 | return ujson.dumps(obj).encode() # pylint: disable=c-extension-no-member
17 | except ImportError:
18 | ujson = None
19 | _ujson_to_json = None
20 |
21 |
22 | def _pyjson_to_json(obj: Any) -> bytes:
23 | return py_json.dumps(obj, separators=(',', ':')).encode()
24 |
25 |
26 | logger = logging.getLogger(__name__)
27 | _to_json = OrderedDict()
28 | _to_json['orjson'] = orjson.dumps if orjson else None # pylint: disable=no-member
29 | _to_json['ujson'] = _ujson_to_json if ujson else None
30 | _to_json['python'] = _pyjson_to_json
31 |
32 | any_to_json = _pyjson_to_json
33 |
34 |
35 | def set_json_library(impl: str = None):
36 | global any_to_json # pylint: disable=global-statement
37 | if impl:
38 | func = _to_json.get(impl)
39 | if func:
40 | any_to_json = func
41 | return
42 | raise NotImplementedError(f'JSON library {impl} is not supported')
43 | for library, func in _to_json.items():
44 | if func:
45 | logger.debug('Using %s library for writing JSON byte strings', library)
46 | any_to_json = func
47 | break
48 |
49 |
50 | set_json_library()
51 |
--------------------------------------------------------------------------------
/clickhouse_connect/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/clickhouse_connect/py.typed
--------------------------------------------------------------------------------
/clickhouse_connect/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/clickhouse_connect/tools/__init__.py
--------------------------------------------------------------------------------
/clickhouse_connect/tools/testing.py:
--------------------------------------------------------------------------------
1 | from typing import Sequence, Optional, Union, Dict, Any
2 |
3 | from clickhouse_connect.driver import Client
4 | from clickhouse_connect.driver.binding import quote_identifier, str_query_value
5 |
6 |
7 | class TableContext:
8 | def __init__(self, client: Client,
9 | table: str,
10 | columns: Union[str, Sequence[str]],
11 | column_types: Optional[Sequence[str]] = None,
12 | engine: str = 'MergeTree',
13 | order_by: str = None,
14 | settings: Optional[Dict[str, Any]] = None):
15 | self.client = client
16 | if '.' in table:
17 | self.table = table
18 | else:
19 | self.table = quote_identifier(table)
20 | self.settings = settings
21 | if isinstance(columns, str):
22 | columns = columns.split(',')
23 | if column_types is None:
24 | self.column_names = []
25 | self.column_types = []
26 | for col in columns:
27 | col = col.strip()
28 | ix = col.find(' ')
29 | self.column_types.append(col[ix + 1:].strip())
30 | self.column_names.append(quote_identifier(col[:ix].strip()))
31 | else:
32 | self.column_names = [quote_identifier(name) for name in columns]
33 | self.column_types = column_types
34 | self.engine = engine
35 | self.order_by = self.column_names[0] if order_by is None else order_by
36 |
37 | def __enter__(self):
38 | if self.client.min_version('19'):
39 | self.client.command(f'DROP TABLE IF EXISTS {self.table}')
40 | else:
41 | self.client.command(f'DROP TABLE IF EXISTS {self.table} SYNC')
42 | col_defs = ','.join(f'{quote_identifier(name)} {col_type}' for name, col_type in zip(self.column_names, self.column_types))
43 | create_cmd = f'CREATE TABLE {self.table} ({col_defs}) ENGINE {self.engine} ORDER BY {self.order_by}'
44 | if self.settings:
45 | create_cmd += ' SETTINGS '
46 | for key, value in self.settings.items():
47 | create_cmd += f'{key} = {str_query_value(value)}, '
48 | if create_cmd.endswith(', '):
49 | create_cmd = create_cmd[:-2]
50 | self.client.command(create_cmd)
51 | return self
52 |
53 | def __exit__(self, exc_type, exc_val, exc_tb):
54 | self.client.command(f'DROP TABLE IF EXISTS {self.table}')
55 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | clickhouse:
3 | image: 'clickhouse/clickhouse-server:${CLICKHOUSE_CONNECT_TEST_CH_VERSION-25.1-alpine}'
4 | container_name: 'clickhouse-connect-clickhouse-server'
5 | environment:
6 | CLICKHOUSE_SKIP_USER_SETUP: 1
7 | ports:
8 | - '8123:8123'
9 | - '9000:9000'
10 | ulimits:
11 | nofile:
12 | soft: 262144
13 | hard: 262144
14 | volumes:
15 | - './.docker/clickhouse/single_node/config.xml:/etc/clickhouse-server/config.xml'
16 | - './.docker/clickhouse/users.xml:/etc/clickhouse-server/users.xml'
17 | - './.docker/clickhouse/single_node/docker_related_config.xml:/etc/clickhouse-server/config.d/docker_related_config.xml'
18 |
19 | clickhouse_tls:
20 | build:
21 | context: ./
22 | dockerfile: .docker/clickhouse/single_node_tls/Dockerfile
23 | container_name: 'clickhouse-connect-clickhouse-server-tls'
24 | environment:
25 | CLICKHOUSE_SKIP_USER_SETUP: 1
26 | ports:
27 | - '10843:8443'
28 | - '10840:9440'
29 | ulimits:
30 | nofile:
31 | soft: 262144
32 | hard: 262144
33 | volumes:
34 | - './.docker/clickhouse/single_node_tls/config.xml:/etc/clickhouse-server/config.xml'
35 | - './.docker/clickhouse/single_node_tls/users.xml:/etc/clickhouse-server/users.xml'
36 | - './.docker/clickhouse/single_node_tls/docker_related_config.xml:/etc/clickhouse-server/config.d/docker_related_config.xml'
--------------------------------------------------------------------------------
/examples/benchmark.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3 -u
2 |
3 | import datetime
4 | import sys
5 | import time
6 | import uuid
7 | import argparse
8 | from ipaddress import IPv6Address
9 | from typing import List
10 |
11 | import clickhouse_connect
12 | from clickhouse_connect.datatypes.format import set_default_formats
13 | from clickhouse_connect.driver.client import Client
14 |
15 | columns = {
16 | 'int8': ('Int8', -44),
17 | 'uint16': ('UInt16', 1),
18 | 'int16': ('Int16', -2),
19 | 'uint64': ('UInt64', 32489071615273482),
20 | 'float32': ('Float32', 3.14),
21 | 'str': ('String', 'hello'),
22 | 'fstr': ('FixedString(16)', b'world numkn \nman'),
23 | 'date': ('Date', datetime.date(2022, 3, 18)),
24 | 'datetime': ('DateTime', datetime.datetime.utcnow()),
25 | 'nullint': ('Nullable(Int8)', {None, 77}),
26 | 'nullstr': ('Nullable(String)', {None, 'a_null_str'}),
27 | 'enum': ("Enum16('hello' = 1, 'world' = 2)", 'hello'),
28 | 'array': ('Array(String)', ['q', 'w', 'e', 'r']),
29 | 'narray': ('Array(Array(String))', [['xkcd', 'abs', 'norbert'], ['George', 'John', 'Thomas']]),
30 | 'uuid': ('UUID', uuid.UUID('1d439f79-c57d-5f23-52c6-ffccca93e1a9')),
31 | 'bool': ('Bool', True),
32 | 'ipv4': ('IPv4', '107.34.202.7'),
33 | 'ipv6': ('IPv6', IPv6Address('fe80::f4d4:88ff:fe88:4a64')),
34 | 'tuple': ('Tuple(Nullable(String), UInt64)', ('tuple_string', 7502888)),
35 | 'dec': ('Decimal64(5)', 25774.233),
36 | 'bdec': ('Decimal128(10)', 2503.48877233),
37 | 'uint256': ('UInt256', 1057834823498238884432566),
38 | 'dt64': ('DateTime64(9)', datetime.datetime.now()),
39 | 'dt64d': ("DateTime64(6, 'America/Denver')", datetime.datetime.now()),
40 | 'lcstr': ('LowCardinality(String)', 'A simple string')
41 | }
42 |
43 | standard_cols = ['uint16', 'int16', 'float32', 'str', 'fstr', 'date', 'datetime', 'array', 'nullint', 'enum', 'uuid']
44 |
45 |
46 | def create_table(client: Client, col_names: List[str], rows: int):
47 | if not col_names:
48 | col_names = columns.keys()
49 | col_list = ','.join([f'{col_name} {columns[col_name][0]}' for col_name in sorted(col_names)])
50 | client.command('DROP TABLE IF EXISTS benchmark_test')
51 | client.command(f'CREATE TABLE benchmark_test ({col_list}) ENGINE Memory')
52 | insert_cols = []
53 | for col_name in sorted(col_names):
54 | col_def = columns[col_name]
55 | if isinstance(col_def[1], set):
56 | choices = tuple(col_def[1])
57 | cnt = len(choices)
58 | col = [choices[ix % cnt] for ix in range(rows)]
59 | else:
60 | col = [col_def[1]] * rows
61 | insert_cols.append(col)
62 | client.insert('benchmark_test', insert_cols, column_oriented=True)
63 |
64 |
65 | def check_reads(client: Client, tries: int = 50, rows: int = 100000):
66 | start_time = time.time()
67 | for _ in range(tries):
68 | result = client.query(f'SELECT * FROM benchmark_test LIMIT {rows}', column_oriented=True)
69 | assert result.row_count == rows
70 | total_time = time.time() - start_time
71 | avg_time = total_time / tries
72 | speed = int(1 / avg_time * rows)
73 | print(f'- Avg time reading {rows} rows from {tries} runs: {avg_time} sec. Total: {total_time}')
74 | print(f' Speed: {speed} rows/sec')
75 |
76 |
77 | def main():
78 | parser = argparse.ArgumentParser()
79 | parser.add_argument('-t', '--tries', help='Total tries for each test', type=int, default=50)
80 | parser.add_argument('-r', '--rows', help='Total rows in dataset', type=int, default=100000)
81 | parser.add_argument('-c', '--columns', help='Column types to test', type=str, nargs='+')
82 |
83 | args = parser.parse_args()
84 | rows = args.rows
85 | tries = args.tries
86 | col_names = args.columns
87 | if col_names:
88 | if 'all' in col_names:
89 | col_names = list(columns.keys())
90 | else:
91 | invalid = set(col_names).difference(set(columns.keys()))
92 | if invalid:
93 | print(' ,'.join(invalid) + ' columns not found')
94 | sys.exit()
95 | else:
96 | col_names = standard_cols
97 | client = clickhouse_connect.get_client(compress=False)
98 |
99 | set_default_formats('IP*', 'native', '*Int64', 'native')
100 | create_table(client, col_names, rows)
101 | check_reads(client, tries, rows)
102 |
103 |
104 | if __name__ == '__main__':
105 | main()
106 |
--------------------------------------------------------------------------------
/examples/clear_test_databases.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3 -u
2 |
3 | import os
4 |
5 | import clickhouse_connect
6 |
7 |
8 | def main():
9 | host = os.getenv('CLICKHOUSE_CONNECT_TEST_HOST', 'localhost')
10 | port = int(os.getenv('CLICKHOUSE_CONNECT_TEST_PORT', '8123'))
11 | password = os.getenv('CLICKHOUSE_CONNECT_TEST_PASSWORD', '')
12 | client = clickhouse_connect.get_client(host=host, port=port, password=password)
13 | database_result = client.query("SELECT name FROM system.databases WHERE name ilike '%test%'").result_rows
14 | for database_row in database_result:
15 | database:str = database_row[0]
16 | if database.startswith('dbt_clickhouse') or database.startswith('clickhouse_connect'):
17 | print(f'DROPPING DATABASE `{database}`')
18 | client.command(f'DROP DATABASE IF EXISTS {database}')
19 |
20 |
21 | if __name__ == '__main__':
22 | main()
23 |
--------------------------------------------------------------------------------
/examples/insert_examples.py:
--------------------------------------------------------------------------------
1 | import clickhouse_connect
2 |
3 | client: clickhouse_connect.driver.Client
4 |
5 |
6 | def inserted_nested_flat():
7 | client.command('DROP TABLE IF EXISTS test_nested_flat')
8 | client.command('SET flatten_nested = 1')
9 | client.command(
10 | """
11 | CREATE TABLE test_nested_flat
12 | (
13 | `key` UInt32,
14 | `value` Nested(str String, int32 Int32)
15 | )
16 | ENGINE = MergeTree
17 | ORDER BY key
18 | """)
19 | result = client.query('DESCRIBE TABLE test_nested_flat')
20 | print(result.column_names[0:2])
21 | print(result.result_columns[0:2])
22 |
23 | # Note the Nested 'value' column is inserted as two parallel arrays of values
24 | # into their own columns of the form `col_name.key_name` with Array data types
25 | data = [[1, ['string_1', 'string_2'], [20, 30]],
26 | [2, ['string_3', 'string_4'], [40, 50]]
27 | ]
28 | client.insert('test_nested_flat', data,
29 | column_names=['key', 'value.str', 'value.int32'],
30 | column_type_names=['UInt32', 'Array(String)', 'Array(Int32)'])
31 |
32 | result = client.query('SELECT * FROM test_nested_flat')
33 | print(result.column_names)
34 | print(result.result_columns)
35 | client.command('DROP TABLE test_nested_flat')
36 |
37 |
38 | def insert_nested_not_flat():
39 | client.command('DROP TABLE IF EXISTS test_nested_not_flat')
40 | client.command('SET flatten_nested = 0')
41 | client.command(
42 | """
43 | CREATE TABLE test_nested_not_flat
44 | (
45 | `key` UInt32,
46 | `value` Nested(str String, int32 Int32)
47 | )
48 | ENGINE = MergeTree
49 | ORDER BY key
50 | """)
51 | result = client.query('DESCRIBE TABLE test_nested_not_flat')
52 | print (result.column_names[0:2])
53 | print (result.result_columns[0:2])
54 |
55 | # Note the Nested 'value' column is inserted as a list of dictionaries for each row
56 | data = [[1, [{'str': 'nested_string_1', 'int32': 20},
57 | {'str': 'nested_string_2', 'int32': 30}]],
58 | [2, [{'str': 'nested_string_3', 'int32': 40},
59 | {'str': 'nested_string_4', 'int32': 50}]]
60 | ]
61 | client.insert('test_nested_not_flat', data,
62 | column_names=['key', 'value'],
63 | column_type_names=['UInt32', 'Nested(str String, int32 Int32)'])
64 |
65 | result = client.query('SELECT * FROM test_nested_not_flat')
66 | print(result.column_names)
67 | print(result.result_columns)
68 | client.command('DROP TABLE test_nested_not_flat')
69 |
70 |
71 | def main():
72 | global client # pylint: disable=global-statement
73 | client = clickhouse_connect.get_client()
74 | print ('Nested example flatten_nested = 1 (Default)')
75 | inserted_nested_flat()
76 | print('\n\nNested example flatten_nested = 0')
77 | insert_nested_not_flat()
78 |
79 |
80 | if __name__ == '__main__':
81 | main()
82 |
--------------------------------------------------------------------------------
/examples/pandas_examples.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3 -u
2 |
3 | import pandas as pd
4 | import clickhouse_connect
5 |
6 |
7 | create_table_sql = """
8 | CREATE TABLE pandas_example
9 | (
10 | `timeseries` DateTime('UTC'),
11 | `int_value` Int32,
12 | `str_value` String,
13 | `float_value` Float64
14 | )
15 | ENGINE = MergeTree
16 | ORDER BY timeseries
17 | """
18 |
19 |
20 | def write_pandas_df():
21 | client = clickhouse_connect.get_client(host='localhost', port='8123', user='default', password= '')
22 | client.command('DROP TABLE IF EXISTS pandas_example')
23 | client.command(create_table_sql)
24 | df = pd.DataFrame({'timeseries': ['04/03/2022 10:00:11', '05/03/2022 11:15:44', '06/03/2022 17:14:00'],
25 | 'int_value': [16, 19, 11],
26 | 'str_value': ['String One', 'String Two', 'A Third String'],
27 | 'float_value': [2344.288, -73002.4444, 3.14159]})
28 | df['timeseries'] = pd.to_datetime(df['timeseries'])
29 | client.insert_df('pandas_example', df)
30 | result_df = client.query_df('SELECT * FROM pandas_example')
31 | print()
32 | print(result_df.dtypes)
33 | print()
34 | print(result_df)
35 |
36 |
37 | if __name__ == '__main__':
38 | write_pandas_df()
39 |
--------------------------------------------------------------------------------
/examples/params_example.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3 -u
2 |
3 | from datetime import datetime, timedelta
4 |
5 | from clickhouse_connect.driver.binding import finalize_query
6 |
7 | select_template = """
8 | SELECT
9 | formatDateTime(started_at, '%%m/%%d/%%Y', %(time_zone)s) AS date,
10 | formatDateTime(started_at, '%%I:%%M:%%S %%p', %(time_zone)s) AS time,
11 | format('{}path/link?name={}&dev_type={}', %(web_url)s, label, device_type) AS url,
12 | device_name,
13 | description
14 | FROM sessions
15 | """
16 |
17 |
18 | def build_device_query(time_zone: str,
19 | web_url: str,
20 | client: str,
21 | company_id: str = '',
22 | device_id: str = '',
23 | updated: bool = False,
24 | start_time: datetime = None,
25 | end_time: datetime = None):
26 | params = {'time_zone': time_zone,
27 | 'web_url': web_url,
28 | 'client': client
29 | }
30 | where_template = ' WHERE client = %(client)s'
31 | if company_id:
32 | where_template += ' AND company_id = %(company_id)s'
33 | params['company_id'] = company_id
34 | if device_id:
35 | where_template += ' AND dev_type = %(device_id)s'
36 | params['device_id'] = device_id
37 | if updated:
38 | where_template += ' AND updated = true'
39 | if start_time and end_time:
40 | where_template += ' AND started_at BETWEEN %(start_time)s AND %(end_time)s'
41 | params['start_time'] = start_time
42 | params['end_time'] = end_time
43 | full_query = select_template + where_template + ' ORDER BY started_at ASC'
44 | return finalize_query(full_query, params)
45 |
46 |
47 | if __name__ == '__main__':
48 | start = datetime.now()
49 | end = start + timedelta(hours=1, minutes=20)
50 | print(build_device_query('UTC',
51 | 'https://example.com',
52 |
53 | client='Client_0',
54 | company_id='Company_1',
55 | device_id='DEVICE_77',
56 | start_time=start,
57 | end_time=end
58 | )
59 | )
60 |
--------------------------------------------------------------------------------
/examples/read_perf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python -u
2 |
3 | """
4 | This script is for simple timed comparisons of various queries between formats (streaming vs batch, pandas vs Python
5 | native types) based on data loaded into a local clickhouse instance from some ClickHouse Sample Datasets
6 | https://clickhouse.com/docs/en/getting-started/example-datasets/
7 |
8 | It includes some basic comparisons with clickhouse-driver. The clickhouse-driver import and client can be
9 | commented out if clickhouse-driver is not installed
10 |
11 | Uncomment the queries and formats to measure before running.
12 |
13 | This script is not intended to be rigorous or scientific. For entertainment purposes only
14 | """
15 |
16 | import time
17 | import clickhouse_driver # pylint: disable=import-error
18 | import clickhouse_connect
19 |
20 |
21 | queries = [#'SELECT trip_id, pickup, dropoff, pickup_longitude, pickup_latitude FROM taxis',
22 | #'SELECT number from numbers(500000000)',
23 | 'SELECT * FROM datasets.hits_100m_obfuscated LIMIT 2000000',
24 | #"SELECT * FROM perftest.ontime WHERE FlightDate < '2017-02-18'"
25 | ]
26 |
27 | cc_client = clickhouse_connect.get_client(compress=False)
28 | cd_client = clickhouse_driver.Client(host='localhost')
29 |
30 |
31 | def read_python_columns(query):
32 | print('\n\tclickhouse-connect Python Batch (column oriented):')
33 | start = time.time()
34 | columns = cc_client.query(query).result_columns
35 | _print_result(start, len(columns[0]))
36 |
37 |
38 | def read_python_rows(query):
39 | print('\n\tclickhouse-connect Python Batch (row oriented):')
40 | start = time.time()
41 | rows = cc_client.query(query).result_rows
42 | _print_result(start, len(rows))
43 |
44 |
45 | def read_python_stream_columns(query):
46 | print('\n\tclickhouse-connect Python Stream (column blocks):')
47 | rows = 0
48 | start = time.time()
49 | with cc_client.query_column_block_stream(query) as stream:
50 | for block in stream:
51 | rows += len(block[0])
52 | _print_result(start, rows)
53 |
54 |
55 | def read_python_stream_rows(query):
56 | print('\n\tclickhouse-connect Python Stream (row blocks):')
57 | rows = 0
58 | start = time.time()
59 | with cc_client.query_row_block_stream(query) as stream:
60 | for block in stream:
61 | rows += len(block)
62 | _print_result(start, rows)
63 |
64 |
65 | def read_numpy(query):
66 | print('\n\tclickhouse connect Numpy Batch:')
67 | start = time.time()
68 | arr = cc_client.query_np(query, max_str_len=100)
69 | _print_result(start, len(arr))
70 |
71 |
72 | def read_pandas(query):
73 | print('\n\tclickhouse connect Pandas Batch:')
74 | start = time.time()
75 | rows = len(cc_client.query_df(query))
76 | _print_result(start, rows)
77 |
78 |
79 | def read_arrow(query):
80 | print('\n\tclickhouse connect Arrow:')
81 | start = time.time()
82 | rows = len(cc_client.query_arrow(query))
83 | _print_result(start, rows)
84 |
85 |
86 | def read_pandas_stream(query):
87 | print('\n\tclickhouse-connect Pandas Stream')
88 | start = time.time()
89 | rows = 0
90 | with cc_client.query_df_stream(query) as stream:
91 | for data_frame in stream:
92 | rows += len(data_frame)
93 | _print_result(start, rows)
94 |
95 |
96 | def dr_read_python_columns(query):
97 | print('\n\tclickhouse-driver Python Batch (column oriented):')
98 | start = time.time()
99 | result = cd_client.execute(query, columnar=True)
100 | _print_result(start, len(result[0]))
101 |
102 |
103 | def dr_read_python_rows(query):
104 | print('\n\tclickhouse-driver Python Batch (row oriented):')
105 | start = time.time()
106 | result = cd_client.execute(query)
107 | _print_result(start, len(result))
108 |
109 |
110 | def dr_read_python_stream(query):
111 | print('\n\tclickhouse-driver Python Stream:')
112 | start = time.time()
113 | rows = 0
114 | for block in cd_client.execute_iter(query):
115 | rows += len(block)
116 | _print_result(start, rows)
117 |
118 |
119 | def dr_read_pandas(query):
120 | print('\n\tclickhouse-driver Pandas Batch:')
121 | start = time.time()
122 | data_frame = cd_client.query_dataframe(query)
123 | _print_result(start, len(data_frame))
124 |
125 |
126 | def _print_result(start, rows):
127 | total_time = time.time() - start
128 | print(f'\t\tTime: {total_time:.4f} sec rows: {rows} rows/sec {rows // total_time}')
129 |
130 |
131 | def main():
132 | for query in queries:
133 | print(f'\n{query}')
134 | # read_python_columns(query)
135 | #read_python_rows(query)
136 | read_python_stream_rows(query)
137 | #read_python_stream_columns(query)
138 | #read_pandas_stream(query)
139 | # read_numpy(query)
140 | #read_pandas(query)
141 | # read_arrow(query)
142 | #dr_read_python_columns(query)
143 | #dr_read_python_rows(query)
144 | #dr_read_python_stream(query)
145 | #dr_read_pandas(query)
146 |
147 |
148 | if __name__ == '__main__':
149 | main()
150 |
--------------------------------------------------------------------------------
/examples/run_async.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python -u
2 |
3 | """
4 | This example will execute 10 queries in total, 2 concurrent queries at a time.
5 | Each query will sleep for 2 seconds before returning.
6 | Here's a sample output that shows that the queries are executed concurrently in batches of 2:
7 | ```
8 | Completed query 1, elapsed ms since start: 2002
9 | Completed query 0, elapsed ms since start: 2002
10 | Completed query 3, elapsed ms since start: 4004
11 | Completed query 2, elapsed ms since start: 4005
12 | Completed query 4, elapsed ms since start: 6006
13 | Completed query 5, elapsed ms since start: 6007
14 | Completed query 6, elapsed ms since start: 8009
15 | Completed query 7, elapsed ms since start: 8009
16 | Completed query 9, elapsed ms since start: 10011
17 | Completed query 8, elapsed ms since start: 10011
18 | ```
19 | """
20 |
21 | import asyncio
22 | from datetime import datetime
23 |
24 | import clickhouse_connect
25 |
26 | QUERIES = 10
27 | SEMAPHORE = 2
28 |
29 |
30 | async def concurrent_queries():
31 | test_query = "SELECT sleep(2)"
32 | client = await clickhouse_connect.get_async_client()
33 |
34 | start = datetime.now()
35 |
36 | async def semaphore_wrapper(sm: asyncio.Semaphore, num: int):
37 | async with sm:
38 | await client.query(query=test_query)
39 | print(f"Completed query {num}, "
40 | f"elapsed ms since start: {int((datetime.now() - start).total_seconds() * 1000)}")
41 |
42 | semaphore = asyncio.Semaphore(SEMAPHORE)
43 | await asyncio.gather(*[semaphore_wrapper(semaphore, num) for num in range(QUERIES)])
44 | await client.close()
45 |
46 |
47 | async def main():
48 | await concurrent_queries()
49 |
50 |
51 | asyncio.run(main())
52 |
--------------------------------------------------------------------------------
/examples/ssh_tunnels.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python -u
2 | import os
3 |
4 | import clickhouse_connect
5 |
6 |
7 | # You can use an -L ssh tunnel directly, but to avoid HTTPS certificate errors you must add the
8 | # `server_host_name` argument to the get_client method
9 |
10 | # This example uses the following ssh tunnel command
11 | # ssh -f -N -L 1443:play.clickhouse.com:443 @ -i
12 | def direct_tunnel():
13 | client = clickhouse_connect.get_client(host='localhost',
14 | user='play',
15 | password='clickhouse',
16 | port=1443,
17 | secure=True,
18 | server_host_name='play.clickhouse.com')
19 | print(client.query('SHOW DATABASES').result_set)
20 | client.close()
21 |
22 |
23 | # This example uses the Python sshtunnel library to create an ssh tunnel as above but within your Python code
24 | # `pip install sshtunnel` is required. See the sshtunnel documentation for additional configuration options
25 | # https://sshtunnel.readthedocs.io/en/latest/
26 |
27 | try:
28 | import sshtunnel # pylint: disable=wrong-import-position
29 | except ImportError:
30 | pass
31 |
32 |
33 | def create_tunnel():
34 | server = sshtunnel.SSHTunnelForwarder(
35 | (os.environ.get('CLICKHOUSE_TUNNEL_JUMP_HOST'), 22), # Create an ssh tunnel to your jump host/port
36 | ssh_username=os.environ.get('CLICKHOUSE_TUNNEL_USER', 'ubuntu'), # Set the user for the remote/jump host
37 | ssh_pkey=os.environ.get('CLICKHOUSE_TUNNEL_KEY_FILE', '~/.ssh/id_rsa'), # The private key file to use
38 | ssh_private_key_password=os.environ.get('CLICKHOUSE_TUNNEL_KEY_PASSWORD', None), # Private key password
39 | remote_bind_address=('play.clickhouse.com', 443), # The ClickHouse server and port you want to reach
40 | local_bind_address=('localhost', 1443) # The local address and port to bind the tunnel to
41 | )
42 | server.start()
43 |
44 | client = clickhouse_connect.get_client(host='localhost',
45 | user='play',
46 | password='clickhouse',
47 | port=1443,
48 | secure=True,
49 | verify=True,
50 | server_host_name='play.clickhouse.com')
51 | print(client.query('SHOW DATABASES').result_set)
52 | client.close()
53 | server.close()
54 |
55 |
56 | # An example of how to use a "dynamic/SOCKS5" ssh tunnel to reach a ClickHouse server
57 | # The ssh tunnel for this example was created with the following command:
58 | # ssh -f -N -D 1443 @ -i
59 |
60 | # This example requires installing the pysocks library:
61 | # pip install pysocks
62 | #
63 | # Documentation for the SocksProxyManager here: https://urllib3.readthedocs.io/en/stable/reference/contrib/socks.html
64 | # Note there are limitations for the urllib3 SOCKSProxyManager,
65 | from urllib3.contrib.socks import SOCKSProxyManager # pylint: disable=wrong-import-position,wrong-import-order
66 | from clickhouse_connect.driver import httputil # pylint: disable=wrong-import-position
67 |
68 |
69 | def socks_proxy():
70 | options = httputil.get_pool_manager_options()
71 | proxy_manager = SOCKSProxyManager('socks5h://localhost:1443', **options)
72 |
73 | client = clickhouse_connect.get_client(host='play.clickhouse.com',
74 | user='play',
75 | password='clickhouse',
76 | port=443,
77 | pool_mgr=proxy_manager)
78 |
79 | print(client.query('SHOW DATABASES').result_set)
80 | client.close()
81 |
82 |
83 | # Uncomment the option you want to test for local testing of your tunnel
84 |
85 | # direct_tunnel()
86 | create_tunnel()
87 | # socks_proxy()
88 |
--------------------------------------------------------------------------------
/examples/write_into_file.py:
--------------------------------------------------------------------------------
1 | import clickhouse_connect
2 |
3 | if __name__ == '__main__':
4 | client = clickhouse_connect.get_client()
5 | query = 'SELECT number, toString(number) AS number_as_str FROM system.numbers LIMIT 5'
6 | fmt = 'CSVWithNames' # or any other format, see https://clickhouse.com/docs/en/interfaces/formats
7 | stream = client.raw_stream(query=query, fmt=fmt)
8 | with open("output.csv", "wb") as f:
9 | for chunk in stream:
10 | f.write(chunk)
11 |
--------------------------------------------------------------------------------
/examples/write_perf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python -u
2 |
3 | # pylint: disable=import-error,no-name-in-module
4 | import time
5 | import random
6 | import clickhouse_driver
7 |
8 | import clickhouse_connect
9 | from clickhouse_connect.tools.testing import TableContext
10 |
11 |
12 | inserts = [{'query': 'SELECT trip_id, pickup, dropoff, pickup_longitude, ' +
13 | 'pickup_latitude FROM taxis ORDER BY trip_id LIMIT 5000000',
14 | 'columns': 'trip_id UInt32, pickup String, dropoff String,' +
15 | ' pickup_longitude Float64, pickup_latitude Float64'},
16 | {'query': 'SELECT number from numbers(5000000)',
17 | 'columns': 'number UInt64'}]
18 |
19 | excluded = {}
20 | cc_client = clickhouse_connect.get_client(compress=False)
21 | cd_client = clickhouse_driver.Client(host='localhost')
22 | run_id = random.randint(0, 10000000)
23 |
24 |
25 | def write_python_columns(ix, insert):
26 | print('\n\tclickhouse-connect Python Insert (column oriented):')
27 | data = cc_client.query(insert['query']).result_columns
28 | table = f'perf_test_insert_{run_id}_{ix}'
29 | with test_ctx(table, insert) as ctx:
30 | start = time.time()
31 | cc_client.insert(table, data, ctx.column_names, column_type_names=ctx.column_types, column_oriented=True)
32 | _print_result(start, len(data[0]))
33 |
34 |
35 | def write_python_rows(ix, insert):
36 | print('\n\tclickhouse-connect Python Insert (row oriented):')
37 | data = cc_client.query(insert['query']).result_rows
38 | table = f'perf_test_insert_{run_id}_{ix}'
39 | with test_ctx(table, insert) as ctx:
40 | start = time.time()
41 | cc_client.insert(table, data, ctx.column_names, column_type_names=ctx.column_types)
42 | _print_result(start, len(data))
43 |
44 |
45 | def dr_write_python_columns(ix, insert):
46 | print('\n\tclickhouse-driver Python Insert (column oriented):')
47 | data = cd_client.execute(insert['query'], columnar=True)
48 | table = f'perf_test_insert_{run_id}_{ix}'
49 | with test_ctx(table, insert) as ctx:
50 | cols = ','.join(ctx.column_names)
51 | start = time.time()
52 | cd_client.execute(f'INSERT INTO {table} ({cols}) VALUES', data, columnar=True)
53 | _print_result(start, len(data[0]))
54 |
55 |
56 | def dr_write_python_rows(ix, insert):
57 | print('\n\tclickhouse-driver Python Insert (row oriented):')
58 | data = cd_client.execute(insert['query'], columnar=False)
59 | table = f'perf_test_insert_{run_id}_{ix}'
60 | with test_ctx(table, insert) as ctx:
61 | cols = ','.join(ctx.column_names)
62 | start = time.time()
63 | cd_client.execute(f'INSERT INTO {table} ({cols}) VALUES', data, columnar=False)
64 | _print_result(start, len(data))
65 |
66 |
67 | def test_ctx(table, insert):
68 | return TableContext(cc_client, table, insert['columns'])
69 |
70 |
71 | def _print_result(start, rows):
72 | total_time = time.time() - start
73 | print(f'\t\tTime: {total_time:.4f} sec rows: {rows} rows/sec {rows // total_time}')
74 |
75 |
76 | def main():
77 | for ix, insert in enumerate(inserts):
78 | if ix in excluded:
79 | continue
80 | print(f"\n{insert['query']}")
81 | # write_python_columns(ix, insert)
82 | write_python_rows(ix, insert)
83 | # dr_write_python_columns(ix, insert)
84 | dr_write_python_rows(ix, insert)
85 |
86 |
87 | class CDWrapper:
88 | def __init__(self, client):
89 | self._client = client
90 |
91 | def command(self, cmd):
92 | self._client.execute(cmd)
93 |
94 |
95 | if __name__ == '__main__':
96 | main()
97 |
--------------------------------------------------------------------------------
/playtest.py:
--------------------------------------------------------------------------------
1 | import clickhouse_connect
2 |
3 |
4 | def main():
5 | print(f'\nClickHouse Connect installed version: {clickhouse_connect.version()}')
6 | client = clickhouse_connect.get_client(host='play.clickhouse.com',
7 | username='play',
8 | password='clickhouse',
9 | port=443)
10 | print(f'ClickHouse Play current version and timezone: {client.server_version} ({client.server_tz})')
11 | result = client.query('SHOW DATABASES')
12 | print('ClickHouse play Databases:')
13 | for row in result.result_set:
14 | print(f' {row[0]}')
15 | client.close()
16 |
17 |
18 | if __name__ == '__main__':
19 | main()
20 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "cython==3.0.11"]
3 |
4 | build-backend = "setuptools.build_meta"
5 |
6 | [tool.pytest.ini_options]
7 | log_cli = true
8 | log_cli_level = "INFO"
9 | env_files = ["test.env"]
10 | asyncio_default_fixture_loop_scope = "session"
11 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | from setuptools import setup, find_packages
4 |
5 | c_modules = []
6 |
7 | try:
8 | from Cython.Build import cythonize
9 | from Cython import __version__ as cython_version
10 |
11 | print(f'Using Cython {cython_version} to build cython modules')
12 | c_modules = cythonize('clickhouse_connect/driverc/*.pyx', language_level='3str')
13 | except ImportError as ex:
14 | print('Cython Install Failed, Not Building C Extensions: ', ex)
15 | cythonize = None
16 | except Exception as ex: # pylint: disable=broad-exception-caught
17 | print('Cython Build Failed, Not Building C Extensions: ', ex)
18 | cythonize = None
19 |
20 |
21 | def run_setup(try_c: bool = True):
22 | if try_c:
23 | kwargs = {
24 | 'ext_modules': c_modules,
25 | }
26 | else:
27 | kwargs = {}
28 |
29 | project_dir = os.path.abspath(os.path.dirname(__file__))
30 | with open(os.path.join(project_dir, 'README.md'), encoding='utf-8') as read_me:
31 | long_desc = read_me.read()
32 |
33 | version = 'development'
34 | if os.path.isfile('.dev_version'):
35 | with open(os.path.join(project_dir, '.dev_version'), encoding='utf-8') as version_file:
36 | version = version_file.readline()
37 | else:
38 | with open(os.path.join(project_dir, 'clickhouse_connect', '__version__.py'), encoding='utf-8') as version_file:
39 | file_version = version_file.read().strip()
40 | match = re.search(r"version\s*=\s*'(.+)'", file_version)
41 | if match is None:
42 | raise ValueError(f'invalid version {file_version} in clickhouse_connect/__version__.py')
43 | version = match.group(1)
44 |
45 | setup(
46 | name='clickhouse-connect',
47 | author='ClickHouse Inc.',
48 | author_email='clients@clickhouse.com',
49 | keywords=['clickhouse', 'superset', 'sqlalchemy', 'http', 'driver'],
50 | description='ClickHouse Database Core Driver for Python, Pandas, and Superset',
51 | version=version,
52 | long_description=long_desc,
53 | long_description_content_type='text/markdown',
54 | package_data={'clickhouse_connect': ['VERSION', 'py.typed']},
55 | url='https://github.com/ClickHouse/clickhouse-connect',
56 | packages=find_packages(exclude=['tests*']),
57 | python_requires='~=3.8',
58 | license='Apache License 2.0',
59 | install_requires=[
60 | 'certifi',
61 | 'urllib3>=1.26',
62 | 'pytz',
63 | 'zstandard',
64 | 'lz4'
65 | ],
66 | extras_require={
67 | 'sqlalchemy': ['sqlalchemy>1.3.21,<2.0'],
68 | 'numpy': ['numpy'],
69 | 'pandas': ['pandas'],
70 | 'arrow': ['pyarrow'],
71 | 'orjson': ['orjson'],
72 | 'tzlocal': ['tzlocal>=4.0'],
73 | },
74 | tests_require=['pytest'],
75 | entry_points={
76 | 'sqlalchemy.dialects': ['clickhousedb.connect=clickhouse_connect.cc_sqlalchemy.dialect:ClickHouseDialect',
77 | 'clickhousedb=clickhouse_connect.cc_sqlalchemy.dialect:ClickHouseDialect']
78 | },
79 | classifiers=[
80 | 'Development Status :: 4 - Beta',
81 | 'Intended Audience :: Developers',
82 | 'License :: OSI Approved :: Apache Software License',
83 | 'Programming Language :: Python :: 3.8',
84 | 'Programming Language :: Python :: 3.9',
85 | 'Programming Language :: Python :: 3.10',
86 | 'Programming Language :: Python :: 3.11',
87 | 'Programming Language :: Python :: 3.12',
88 | 'Programming Language :: Python :: 3.13',
89 | ],
90 | **kwargs
91 | )
92 |
93 |
94 | try:
95 | run_setup()
96 | # pylint: disable=broad-exception-caught
97 | except (Exception, IOError, SystemExit) as e:
98 | print(f'Unable to compile C extensions for faster performance due to {e}, will use pure Python')
99 | run_setup(False)
100 |
--------------------------------------------------------------------------------
/test_dist/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore local configuration for superset
2 | dev_config.py
3 |
4 | # Ignore recommended symlink
5 | clickhouse_connect
--------------------------------------------------------------------------------
/test_dist/superset_config.py:
--------------------------------------------------------------------------------
1 | import dev_config
2 |
3 | SUPERSET_WEBSERVER_PORT = getattr(dev_config, 'SUPERSET_WEBSERVER_PORT', 8088)
4 | SECRET_KEY = 'clickhouse_dev'
5 | db_uri = getattr(dev_config, 'SQLALCHEMY_DATABASE_URI', None)
6 | if db_uri:
7 | SQLALCHEMY_DATABASE_URI = db_uri
8 | SIP_15_ENABLED = True
9 |
10 | # Set this API key to enable Mapbox visualizations
11 | MAPBOX_API_KEY = getattr(dev_config, 'MAPBOX_API_KEY', '')
12 | PREFERRED_DATABASES = getattr(dev_config, 'PREFERRED_DATABASES', ['ClickHouse Connect', 'MySQL'])
13 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/__init__.py
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import pytz
4 | import pytest
5 |
6 | from clickhouse_connect.driver import tzutil
7 |
8 | from clickhouse_connect.datatypes.format import clear_all_formats
9 |
10 | os.environ['TZ'] = 'UTC'
11 | time.tzset()
12 |
13 |
14 | @pytest.fixture(autouse=True)
15 | def clean_global_state():
16 | clear_all_formats()
17 | tzutil.local_tz = pytz.UTC
18 |
--------------------------------------------------------------------------------
/tests/integration_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/integration_tests/__init__.py
--------------------------------------------------------------------------------
/tests/integration_tests/actors.csv:
--------------------------------------------------------------------------------
1 | Robert Redford, 1936, The Sting
2 | Al Pacino, 1940, Scarface
--------------------------------------------------------------------------------
/tests/integration_tests/datasets.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime, date
2 |
3 | null_ds = [('key1', 1000, 77.3, 'value1', datetime(2022, 10, 15, 10, 3, 2), None),
4 | ('key2', 2000, 882.00, None, None, date(1976, 5, 5)),
5 | ('key3', None, float('nan'), 'value3', datetime(2022, 7, 4), date(1999, 12, 31)),
6 | ('key4', 3000, None, 'value4', None, None)]
7 | null_ds_columns = ['key', 'num', 'flt', 'str', 'dt', 'd']
8 | null_ds_types = ['String', 'Nullable(Int32)', 'Nullable(Float64)', 'Nullable(String)', 'Nullable(DateTime)',
9 | 'Nullable(Date)']
10 |
11 | basic_ds = [('key1', 1000, 50.3, 'value1', datetime.now(), 'lc_1'),
12 | ('key2', 2000, -532.43, 'value2', datetime(1976, 7, 4, 12, 12, 11), 'lc_2'),
13 | ('key3', -2503, 300.00, 'value3', date(2022, 10, 15), 'lc_99')]
14 | basic_ds_columns = ['key', 'num', 'flt', 'str', 'dt', 'lc_string']
15 | basic_ds_types = ['String', 'Int32', 'Float64', 'String', 'DateTime64(9)', 'LowCardinality(String)']
16 | basic_ds_types_ver19 = ['String', 'Int32', 'Float64', 'String', 'DateTime', 'LowCardinality(String)']
17 |
18 | dt_ds = [datetime(2020, 10, 10),
19 | datetime(2021, 11, 11)]
20 | dt_ds_columns = ['timestamp']
21 | dt_ds_types = ['DateTime']
22 |
--------------------------------------------------------------------------------
/tests/integration_tests/json_test.ndjson:
--------------------------------------------------------------------------------
1 | {"key": 17, "flt_val": 5.3, "int_val": 377}
2 | {}
--------------------------------------------------------------------------------
/tests/integration_tests/movies.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/integration_tests/movies.csv.gz
--------------------------------------------------------------------------------
/tests/integration_tests/movies.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/integration_tests/movies.parquet
--------------------------------------------------------------------------------
/tests/integration_tests/proxy_ca_cert.crt:
--------------------------------------------------------------------------------
1 | -----BEGIN CERTIFICATE-----
2 | MIIDTzCCAjegAwIBAgIRCgdiOrCiOU6MgzKPZvk1rB0wDQYJKoZIhvcNAQELBQAw
3 | QTEYMBYGA1UEAxMPSFRUUCBUb29sa2l0IENBMQswCQYDVQQGEwJYWDEYMBYGA1UE
4 | ChMPSFRUUCBUb29sa2l0IENBMB4XDTIzMDIwMjIzMTY0M1oXDTI0MDIwMzIzMTY0
5 | M1owQTEYMBYGA1UEAxMPSFRUUCBUb29sa2l0IENBMQswCQYDVQQGEwJYWDEYMBYG
6 | A1UEChMPSFRUUCBUb29sa2l0IENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB
7 | CgKCAQEAzEMZaqeXXUXrZgn3o2pIrSavNmN6ctk5IGrx7TBrfs0BCaUpmpy6AxmI
8 | 7GayBaSFv9Kp78ORTx1rOE0+d0O5ILldMLeNjEasqAfopeQxS2GNF/rwlSMcE8Ic
9 | gi9LJk4Hh2Lwk8zJe+Xy7076irt3PPL478v1EQxRdEoe/Io8Y4eL5BoNsbxdmVHH
10 | arZD2KQvA6M/CvmoQ62DZuELOO2uE/k21lnpgTFVZMrDvNhN3L62O7tZfEz47vPN
11 | G/mCjO4lCRTkRWTGTde4p1Pr8LA2j3ENbf7WKgJS3lFPpvIgZDU6OEY+/k6unNQG
12 | ygUWbG9fO6i+zOvVADx/mBd5PGtQ7QIDAQABo0IwQDAPBgNVHRMBAf8EBTADAQH/
13 | MA4GA1UdDwEB/wQEAwIBxjAdBgNVHQ4EFgQUBdpwLCzir2et2V5aBqu5R2WInVIw
14 | DQYJKoZIhvcNAQELBQADggEBAHOZ4JLH3bzlCj1O0Ube6n3hJ2b/O5huJsaJT27w
15 | oJz+zH7yPdRnrHwd2duQ4n8rV/rDHzVNAE7G5zHbwKRo23cMNxrzmlnOgLPMdNB5
16 | eL4bMHuGKa/0cvuaYYw44NdgoYO2DymySfBbOZC9XbyynUo4S1eKp7qAXeIszJcw
17 | NPtU3rg/5VQs1Lo/gbEFo0nzLb+GpNbbi6RYf9HQmXg4776Hvbn2FCF7X11zv8p3
18 | 9qPl8uZdeGFgL7Zugue9JUQbz5RRodsSVcTCxiiOJ9wJnG4PAIk2y1b88k9D2Gee
19 | 3avNeITx6wRI4HTKUC556ZJGZQ1HE/P4Ka8wxdQBSO+whYQ=
20 | -----END CERTIFICATE-----
21 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_arrow.py:
--------------------------------------------------------------------------------
1 | from datetime import date
2 | from typing import Callable
3 | import string
4 |
5 | import pytest
6 |
7 | from clickhouse_connect.driver import Client
8 | from clickhouse_connect.driver.options import arrow
9 |
10 |
11 | def test_arrow(test_client: Client, table_context: Callable):
12 | if not arrow:
13 | pytest.skip('PyArrow package not available')
14 | if not test_client.min_version('21'):
15 | pytest.skip(f'PyArrow is not supported in this server version {test_client.server_version}')
16 | with table_context('test_arrow_insert', ['animal String', 'legs Int64']):
17 | n_legs = arrow.array([2, 4, 5, 100] * 50)
18 | animals = arrow.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede'] * 50)
19 | names = ['legs', 'animal']
20 | insert_table = arrow.Table.from_arrays([n_legs, animals], names=names)
21 | test_client.insert_arrow('test_arrow_insert', insert_table)
22 | result_table = test_client.query_arrow('SELECT * FROM test_arrow_insert', use_strings=False)
23 | arrow_schema = result_table.schema
24 | assert arrow_schema.field(0).name == 'animal'
25 | assert arrow_schema.field(0).type == arrow.binary()
26 | assert arrow_schema.field(1).name == 'legs'
27 | assert arrow_schema.field(1).type == arrow.int64()
28 | # pylint: disable=no-member
29 | assert arrow.compute.sum(result_table['legs']).as_py() == 5550
30 | assert len(result_table.columns) == 2
31 |
32 | arrow_table = test_client.query_arrow('SELECT number from system.numbers LIMIT 500',
33 | settings={'max_block_size': 50})
34 | arrow_schema = arrow_table.schema
35 | assert arrow_schema.field(0).name == 'number'
36 | assert arrow_schema.field(0).type.id == 8
37 | assert arrow_table.num_rows == 500
38 |
39 |
40 | def test_arrow_stream(test_client: Client, table_context: Callable):
41 | if not arrow:
42 | pytest.skip('PyArrow package not available')
43 | if not test_client.min_version('21'):
44 | pytest.skip(f'PyArrow is not supported in this server version {test_client.server_version}')
45 | with table_context('test_arrow_insert', ['counter Int64', 'letter String']):
46 | counter = arrow.array(range(1000000))
47 | alphabet = string.ascii_lowercase
48 | letter = arrow.array([alphabet[x % 26] for x in range(1000000)])
49 | names = ['counter', 'letter']
50 | insert_table = arrow.Table.from_arrays([counter, letter], names=names)
51 | test_client.insert_arrow('test_arrow_insert', insert_table)
52 | stream = test_client.query_arrow_stream('SELECT * FROM test_arrow_insert', use_strings=True)
53 | with stream:
54 | result_tables = list(stream)
55 | # Hopefully we made the table long enough we got multiple tables in the query
56 | assert len(result_tables) > 1
57 | total_rows = 0
58 | for table in result_tables:
59 | assert table.num_columns == 2
60 | arrow_schema = table.schema
61 | assert arrow_schema.field(0).name == 'counter'
62 | assert arrow_schema.field(0).type == arrow.int64()
63 | assert arrow_schema.field(1).name == 'letter'
64 | assert arrow_schema.field(1).type == arrow.string()
65 | assert table.column(1)[0].as_py() == alphabet[table.column(0)[0].as_py() % 26]
66 | total_rows += table.num_rows
67 | assert total_rows == 1000000
68 |
69 |
70 | def test_arrow_map(test_client: Client, table_context: Callable):
71 | if not arrow:
72 | pytest.skip('PyArrow package not available')
73 | if not test_client.min_version('21'):
74 | pytest.skip(f'PyArrow is not supported in this server version {test_client.server_version}')
75 | with table_context('test_arrow_map', ['trade_date Date, code String',
76 | 'kdj Map(String, Float32)',
77 | 'update_time DateTime DEFAULT now()']):
78 | data = [[date(2023, 10, 15), 'C1', {'k': 2.5, 'd': 0, 'j': 0}],
79 | [date(2023, 10, 16), 'C2', {'k': 3.5, 'd': 0, 'j': -.372}]]
80 | test_client.insert('test_arrow_map', data, column_names=('trade_date', 'code', 'kdj'),
81 | settings={'insert_deduplication_token': '10381'})
82 | arrow_table = test_client.query_arrow('SELECT * FROM test_arrow_map ORDER BY trade_date',
83 | use_strings=True)
84 | assert isinstance(arrow_table.schema, arrow.Schema)
85 | test_client.insert_arrow('test_arrow_map', arrow_table, settings={'insert_deduplication_token': '10382'})
86 | assert 4 == test_client.command('SELECT count() FROM test_arrow_map')
87 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_contexts.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 |
3 | from clickhouse_connect.driver import Client
4 |
5 |
6 | def test_contexts(test_client: Client, table_context: Callable):
7 | with table_context('test_contexts', ['key Int32', 'value1 String', 'value2 String']) as ctx:
8 | data = [[1, 'v1', 'v2'], [2, 'v3', 'v4']]
9 | insert_context = test_client.create_insert_context(table=ctx.table, data=data)
10 | test_client.insert(context=insert_context)
11 | query_context = test_client.create_query_context(
12 | query=f'SELECT value1, value2 FROM {ctx.table} WHERE key = {{k:Int32}}',
13 | parameters={'k': 2},
14 | column_oriented=True)
15 | result = test_client.query(context=query_context)
16 | assert result.result_set[1][0] == 'v4'
17 | query_context.set_parameter('k', 1)
18 | result = test_client.query(context=query_context)
19 | assert result.row_count == 1
20 | assert result.result_set[1][0]
21 |
22 | data = [[1, 'v5', 'v6'], [2, 'v7', 'v8']]
23 | test_client.insert(data=data, context=insert_context)
24 | result = test_client.query(context=query_context)
25 | assert result.row_count == 2
26 |
27 | insert_context.data = [[5, 'v5', 'v6'], [7, 'v7', 'v8']]
28 | test_client.insert(context=insert_context)
29 | assert test_client.command(f'SELECT count() FROM {ctx.table}') == 6
30 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_formats.py:
--------------------------------------------------------------------------------
1 | from clickhouse_connect.driver import Client, ProgrammingError
2 |
3 |
4 | def test_uint64_format(test_client: Client):
5 | # Default should be unsigned
6 | result = test_client.query('SELECT toUInt64(9523372036854775807) as value')
7 | assert result.result_set[0][0] == 9523372036854775807
8 | result = test_client.query('SELECT toUInt64(9523372036854775807) as value', query_formats={'UInt64': 'signed'})
9 | assert result.result_set[0][0] == -8923372036854775809
10 | result = test_client.query('SELECT toUInt64(9523372036854775807) as value', query_formats={'UInt64': 'native'})
11 | assert result.result_set[0][0] == 9523372036854775807
12 | try:
13 | test_client.query('SELECT toUInt64(9523372036854775807) as signed', query_formats={'UInt64': 'huh'})
14 | except ProgrammingError:
15 | pass
16 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_geometric.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 |
3 | from clickhouse_connect.driver import Client
4 |
5 |
6 | def test_point_column(test_client: Client, table_context: Callable):
7 | with table_context('point_column_test', ['key Int32', 'point Point']):
8 | data = [[1, (3.55, 3.55)], [2, (4.55, 4.55)]]
9 | test_client.insert('point_column_test', data)
10 |
11 | query_result = test_client.query('SELECT * FROM point_column_test ORDER BY key').result_rows
12 | assert len(query_result) == 2
13 | assert query_result[0] == (1, (3.55, 3.55))
14 | assert query_result[1] == (2, (4.55, 4.55))
15 |
16 |
17 | def test_ring_column(test_client: Client, table_context: Callable):
18 | with table_context('ring_column_test', ['key Int32', 'ring Ring']):
19 | data = [[1, [(5.522, 58.472),(3.55, 3.55)]], [2, [(4.55, 4.55)]]]
20 | test_client.insert('ring_column_test', data)
21 |
22 | query_result = test_client.query('SELECT * FROM ring_column_test ORDER BY key').result_rows
23 | assert len(query_result) == 2
24 | assert query_result[0] == (1, [(5.522, 58.472),(3.55, 3.55)])
25 | assert query_result[1] == (2, [(4.55, 4.55)])
26 |
27 |
28 | def test_polygon_column(test_client: Client, table_context: Callable):
29 | with table_context('polygon_column_test', ['key Int32', 'polygon Polygon']):
30 | res = test_client.query("SELECT readWKTPolygon('POLYGON ((-64.8 32.3, -65.5 18.3, -80.3 25.2, -64.8 32.3))') as polygon")
31 | pg = res.first_row[0]
32 | test_client.insert('polygon_column_test', [(1, pg), (4, pg)])
33 | query_result = test_client.query('SELECT key, polygon FROM polygon_column_test WHERE key = 4')
34 | assert query_result.first_row[1] == pg
35 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_inserts.py:
--------------------------------------------------------------------------------
1 | from decimal import Decimal
2 | from typing import Callable
3 |
4 | from clickhouse_connect.driver.client import Client
5 | from clickhouse_connect.driver.exceptions import DataError
6 |
7 |
8 | def test_insert(test_client: Client, test_table_engine: str):
9 | if test_client.min_version('19'):
10 | test_client.command('DROP TABLE IF EXISTS test_system_insert')
11 | else:
12 | test_client.command('DROP TABLE IF EXISTS test_system_insert SYNC')
13 | test_client.command(f'CREATE TABLE test_system_insert AS system.tables Engine {test_table_engine} ORDER BY name')
14 | tables_result = test_client.query('SELECT * from system.tables')
15 | test_client.insert(table='test_system_insert', column_names='*', data=tables_result.result_set)
16 | copy_result = test_client.command('SELECT count() from test_system_insert')
17 | assert tables_result.row_count == copy_result
18 | test_client.command('DROP TABLE IF EXISTS test_system_insert')
19 |
20 |
21 | def test_decimal_conv(test_client: Client, table_context: Callable):
22 | with table_context('test_num_conv', ['col1 UInt64', 'col2 Int32', 'f1 Float64']):
23 | data = [[Decimal(5), Decimal(-182), Decimal(55.2)], [Decimal(57238478234), Decimal(77), Decimal(-29.5773)]]
24 | test_client.insert('test_num_conv', data)
25 | result = test_client.query('SELECT * FROM test_num_conv').result_set
26 | assert result == [(5, -182, 55.2), (57238478234, 77, -29.5773)]
27 |
28 |
29 | def test_float_decimal_conv(test_client: Client, table_context: Callable):
30 | with table_context('test_float_to_dec_conv', ['col1 Decimal32(6)','col2 Decimal32(6)', 'col3 Decimal128(6)', 'col4 Decimal128(6)']):
31 | data = [[0.492917, 0.49291700, 0.492917, 0.49291700]]
32 | test_client.insert('test_float_to_dec_conv', data)
33 | result = test_client.query('SELECT * FROM test_float_to_dec_conv').result_set
34 | assert result == [(Decimal("0.492917"), Decimal("0.492917"), Decimal("0.492917"), Decimal("0.492917"))]
35 |
36 |
37 | def test_bad_data_insert(test_client: Client, table_context: Callable):
38 | with table_context('test_bad_insert', ['key Int32', 'float_col Float64']):
39 | data = [[1, 3.22], [2, 'nope']]
40 | try:
41 | test_client.insert('test_bad_insert', data)
42 | except DataError as ex:
43 | assert 'array' in str(ex)
44 |
45 |
46 | def test_bad_strings(test_client: Client, table_context: Callable):
47 | with table_context('test_bad_strings', 'key Int32, fs FixedString(6), nsf Nullable(FixedString(4))'):
48 | try:
49 | test_client.insert('test_bad_strings', [[1, b'\x0535', None]])
50 | except DataError as ex:
51 | assert 'match' in str(ex)
52 | try:
53 | test_client.insert('test_bad_strings', [[1, b'\x0535abc', '😀🙃']])
54 | except DataError as ex:
55 | assert 'encoded' in str(ex)
56 |
57 |
58 | def test_low_card_dictionary_size(test_client: Client, table_context: Callable):
59 | with table_context('test_low_card_dict', 'key Int32, lc LowCardinality(String)',
60 | settings={'index_granularity': 65536 }):
61 | data = [[x, str(x)] for x in range(30000)]
62 | test_client.insert('test_low_card_dict', data)
63 | assert 30000 == test_client.command('SELECT count() FROM test_low_card_dict')
64 |
65 |
66 | def test_column_names_spaces(test_client: Client, table_context: Callable):
67 | with table_context('test_column_spaces',
68 | columns=['key 1', 'value 1'],
69 | column_types=['Int32', 'String']):
70 | data = [[1, 'str 1'], [2, 'str 2']]
71 | test_client.insert('test_column_spaces', data)
72 | result = test_client.query('SELECT * FROM test_column_spaces').result_rows
73 | assert result[0][0] == 1
74 | assert result[1][1] == 'str 2'
75 |
76 |
77 | def test_numeric_conversion(test_client: Client, table_context: Callable):
78 | with table_context('test_numeric_convert',
79 | columns=['key Int32', 'n_int Nullable(UInt64)', 'n_flt Nullable(Float64)']):
80 | data = [[1, None, None], [2, '2', '5.32']]
81 | test_client.insert('test_numeric_convert', data)
82 | result = test_client.query('SELECT * FROM test_numeric_convert').result_rows
83 | assert result[1][1] == 2
84 | assert result[1][2] == float('5.32')
85 | test_client.command('TRUNCATE TABLE test_numeric_convert')
86 | data = [[0, '55', '532.48'], [1, None, None], [2, '2', '5.32']]
87 | test_client.insert('test_numeric_convert', data)
88 | result = test_client.query('SELECT * FROM test_numeric_convert').result_rows
89 | assert result[0][1] == 55
90 | assert result[0][2] == 532.48
91 | assert result[1][1] is None
92 | assert result[2][1] == 2
93 | assert result[2][2] == 5.32
94 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_multithreading.py:
--------------------------------------------------------------------------------
1 | import threading
2 |
3 | import pytest
4 |
5 | from clickhouse_connect.driver import Client
6 | from clickhouse_connect.driver.exceptions import ProgrammingError
7 | from tests.integration_tests.conftest import TestConfig
8 |
9 |
10 | def test_threading_error(test_config: TestConfig, test_client: Client):
11 | if test_config.cloud:
12 | pytest.skip('Skipping threading test in ClickHouse Cloud')
13 | thrown = None
14 |
15 | class QueryThread (threading.Thread):
16 | def run(self):
17 | nonlocal thrown
18 | try:
19 | test_client.command('SELECT randomString(512) FROM numbers(1000000)')
20 | except ProgrammingError as ex:
21 | thrown = ex
22 |
23 | threads = [QueryThread(), QueryThread()]
24 | for thread in threads:
25 | thread.start()
26 | for thread in threads:
27 | thread.join()
28 |
29 | assert 'concurrent' in str(thrown)
30 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_native_fuzz.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 |
4 | import pytest
5 |
6 | from clickhouse_connect.datatypes.registry import get_from_name
7 | from clickhouse_connect.driver.client import Client
8 | from clickhouse_connect.driver.ddl import TableColumnDef, create_table
9 | from tests.helpers import random_data, random_columns
10 |
11 | TEST_COLUMNS = 10
12 | MAX_DATA_ROWS = 40
13 |
14 |
15 | # pylint: disable=duplicate-code
16 | def test_query_fuzz(test_client: Client, test_table_engine: str):
17 | if not test_client.min_version('21'):
18 | pytest.skip(f'flatten_nested setting not supported in this server version {test_client.server_version}')
19 | test_runs = int(os.environ.get('CLICKHOUSE_CONNECT_TEST_FUZZ', '250'))
20 | test_client.apply_server_timezone = True
21 | try:
22 | for _ in range(test_runs):
23 | test_client.command('DROP TABLE IF EXISTS fuzz_test')
24 | data_rows = random.randint(0, MAX_DATA_ROWS)
25 | col_names, col_types = random_columns(TEST_COLUMNS)
26 | data = random_data(col_types, data_rows, test_client.server_tz)
27 | col_names = ('row_id',) + col_names
28 | col_types = (get_from_name('UInt32'),) + col_types
29 |
30 | col_defs = [TableColumnDef(name, ch_type) for name, ch_type in zip(col_names, col_types)]
31 | create_stmt = create_table('fuzz_test', col_defs, test_table_engine, {'order by': 'row_id'})
32 | test_client.command(create_stmt, settings={'flatten_nested': 0})
33 | test_client.insert('fuzz_test', data, col_names)
34 |
35 | data_result = test_client.query('SELECT * FROM fuzz_test')
36 | if data_rows:
37 | assert data_result.column_names == col_names
38 | assert data_result.result_set == data
39 | finally:
40 | test_client.apply_server_timezone = False
41 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_params.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime, date
2 | from typing import Callable
3 |
4 | from clickhouse_connect.driver import Client
5 | from clickhouse_connect.driver.binding import DT64Param
6 |
7 |
8 | def test_params(test_client: Client, table_context: Callable):
9 | result = test_client.query('SELECT name, database FROM system.tables WHERE database = {db:String}',
10 | parameters={'db': 'system'})
11 | assert result.first_item['database'] == 'system'
12 | if test_client.min_version('21'):
13 | result = test_client.query('SELECT name, {col:String} FROM system.tables WHERE table ILIKE {t:String}',
14 | parameters={'t': '%rr%', 'col': 'database'})
15 | assert 'rr' in result.first_item['name']
16 |
17 | first_date = datetime.strptime('Jun 1 2005 1:33PM', '%b %d %Y %I:%M%p')
18 | first_date = test_client.server_tz.localize(first_date)
19 | second_date = datetime.strptime('Dec 25 2022 5:00AM', '%b %d %Y %I:%M%p')
20 | second_date = test_client.server_tz.localize(second_date)
21 | with table_context('test_bind_params', ['key UInt64', 'dt DateTime', 'value String', 't Tuple(String, String)']):
22 | test_client.insert('test_bind_params',
23 | [[1, first_date, 'v11', ('one', 'two')],
24 | [2, second_date, 'v21', ('t1', 't2')],
25 | [3, datetime.now(), 'v31', ('str1', 'str2')]])
26 | result = test_client.query('SELECT * FROM test_bind_params WHERE dt = {dt:DateTime}',
27 | parameters={'dt': second_date})
28 | assert result.first_item['key'] == 2
29 | result = test_client.query('SELECT * FROM test_bind_params WHERE dt = %(dt)s',
30 | parameters={'dt': first_date})
31 | assert result.first_item['key'] == 1
32 | result = test_client.query("SELECT * FROM test_bind_params WHERE value != %(v)s AND value like '%%1'",
33 | parameters={'v': 'v11'})
34 | assert result.row_count == 2
35 | result = test_client.query('SELECT * FROM test_bind_params WHERE value IN %(tp)s',
36 | parameters={'tp': ('v18', 'v31')})
37 | assert result.first_item['key'] == 3
38 |
39 | result = test_client.query('SELECT number FROM numbers(10) WHERE {n:Nullable(String)} IS NULL',
40 | parameters={'n': None}).result_rows
41 | assert len(result) == 10
42 |
43 | date_params = [date(2023, 6, 1), date(2023, 8, 5)]
44 | result = test_client.query('SELECT {l:Array(Date)}', parameters={'l': date_params}).first_row
45 | assert date_params == result[0]
46 |
47 | dt_params = [datetime(2023, 6, 1, 7, 40, 2), datetime(2023, 8, 17, 20, 0, 10)]
48 | result = test_client.query('SELECT {l:Array(DateTime)}', parameters={'l': dt_params}).first_row
49 | assert dt_params == result[0]
50 |
51 | num_array_params = [2.5, 5.3, 7.4]
52 | result = test_client.query('SELECT {l:Array(Float64)}', parameters={'l': num_array_params}).first_row
53 | assert num_array_params == result[0]
54 | result = test_client.query('SELECT %(l)s', parameters={'l': num_array_params}).first_row
55 | assert num_array_params == result[0]
56 |
57 | tp_params = ('str1', 'str2')
58 | result = test_client.query('SELECT %(tp)s', parameters={'tp': tp_params}).first_row
59 | assert tp_params == result[0]
60 |
61 | num_params = {'p_0': 2, 'p_1': 100523.55}
62 | result = test_client.query(
63 | 'SELECT count() FROM system.tables WHERE total_rows > %(p_0)d and total_rows < %(p_1)f', parameters=num_params)
64 | assert result.first_row[0] > 0
65 |
66 |
67 | def test_datetime_64_params(test_client: Client):
68 | dt_values = [datetime(2023, 6, 1, 7, 40, 2, 250306), datetime(2023, 8, 17, 20, 0, 10, 777722)]
69 | dt_params = {f'd{ix}': DT64Param(v) for ix, v in enumerate(dt_values)}
70 | result = test_client.query('SELECT {d0:DateTime64(3)}, {d1:Datetime64(9)}', parameters=dt_params).first_row
71 | assert result[0] == dt_values[0].replace(microsecond=250000)
72 | assert result[1] == dt_values[1]
73 |
74 | result = test_client.query('SELECT {a1:Array(DateTime64(6))}', parameters={'a1': [dt_params['d0'], dt_params['d1']]}).first_row
75 | assert result[0] == dt_values
76 |
77 | dt_params = {f'd{ix}_64': v for ix, v in enumerate(dt_values)}
78 | result = test_client.query('SELECT {d0:DateTime64(3)}, {d1:Datetime64(9)}', parameters=dt_params).first_row
79 | assert result[0] == dt_values[0].replace(microsecond=250000)
80 | assert result[1] == dt_values[1]
81 |
82 | result = test_client.query('SELECT {a1:Array(DateTime64(6))}',
83 | parameters={'a1_64': dt_values}).first_row
84 | assert result[0] == dt_values
85 |
86 | dt_params = [DT64Param(v) for v in dt_values]
87 | result = test_client.query("SELECT %s as string, toDateTime64(%s,6) as dateTime", parameters = dt_params).first_row
88 | assert result == ('2023-06-01 07:40:02.250306', dt_values[1])
89 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_protocol_version.py:
--------------------------------------------------------------------------------
1 | from clickhouse_connect.driver import Client
2 |
3 |
4 | def test_protocol_version(test_client: Client):
5 | query = "select toDateTime(1676369730, 'Asia/Shanghai') as dt FORMAT Native"
6 | raw = test_client.raw_query(query)
7 | assert raw.hex() == '0101026474084461746554696d65425feb63'
8 |
9 | if test_client.min_version('23.3'):
10 | raw = test_client.raw_query(query, settings={'client_protocol_version': 54337})
11 | ch_type = raw[14:39].decode()
12 | assert ch_type == "DateTime('Asia/Shanghai')"
13 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_proxy.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 |
4 | import pytest
5 | from urllib3 import ProxyManager
6 |
7 | import clickhouse_connect
8 | from tests.integration_tests.conftest import TestConfig
9 |
10 |
11 | def test_proxies(test_config: TestConfig):
12 | if not test_config.proxy_address:
13 | pytest.skip('Proxy address not configured')
14 | if test_config.port in (8123, 10723):
15 | client = clickhouse_connect.get_client(host=test_config.host,
16 | port=test_config.port,
17 | username=test_config.username,
18 | password=test_config.password,
19 | http_proxy=test_config.proxy_address)
20 | assert '2' in client.command('SELECT version()')
21 | client.close()
22 |
23 | try:
24 | os.environ['HTTP_PROXY'] = f'http://{test_config.proxy_address}'
25 | client = clickhouse_connect.get_client(host=test_config.host,
26 | port=test_config.port,
27 | username=test_config.username,
28 | password=test_config.password)
29 | assert isinstance(client.http, ProxyManager)
30 | assert '2' in client.command('SELECT version()')
31 | client.close()
32 |
33 | os.environ['no_proxy'] = f'{test_config.host}:{test_config.port}'
34 | client = clickhouse_connect.get_client(host=test_config.host,
35 | port=test_config.port,
36 | username=test_config.username,
37 | password=test_config.password)
38 | assert not isinstance(client.http, ProxyManager)
39 | assert '2' in client.command('SELECT version()')
40 | client.close()
41 | finally:
42 | os.environ.pop('HTTP_PROXY', None)
43 | os.environ.pop('no_proxy', None)
44 | else:
45 | cert_file = f'{Path(__file__).parent}/proxy_ca_cert.crt'
46 | client = clickhouse_connect.get_client(host=test_config.host,
47 | port=test_config.port,
48 | username=test_config.username,
49 | password=test_config.password,
50 | ca_cert=cert_file,
51 | https_proxy=test_config.proxy_address)
52 | assert '2' in client.command('SELECT version()')
53 | client.close()
54 |
55 | try:
56 | os.environ['HTTPS_PROXY'] = f'{test_config.proxy_address}'
57 | client = clickhouse_connect.get_client(host=test_config.host,
58 | port=test_config.port,
59 | username=test_config.username,
60 | password=test_config.password,
61 | ca_cert=cert_file)
62 | assert isinstance(client.http, ProxyManager)
63 | assert '2' in client.command('SELECT version()')
64 | client.close()
65 | finally:
66 | os.environ.pop('HTTPS_PROXY', None)
67 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_raw_insert.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Callable
3 |
4 | from clickhouse_connect.driver import Client
5 |
6 |
7 | def test_raw_insert(test_client: Client, table_context: Callable):
8 | with table_context('test_raw_insert', ["`weir'd` String", 'value String']):
9 | csv = 'value1\nvalue2'
10 | test_client.raw_insert('test_raw_insert', ['"weir\'d"'], csv.encode(), fmt='CSV')
11 | result = test_client.query('SELECT * FROM test_raw_insert')
12 | assert result.result_set[1][0] == 'value2'
13 |
14 | test_client.command('TRUNCATE TABLE test_raw_insert')
15 | tsv = 'weird1\tvalue__`2\nweird2\tvalue77'
16 | test_client.raw_insert('test_raw_insert', ["`weir'd`", 'value'], tsv, fmt='TSV')
17 | result = test_client.query('SELECT * FROM test_raw_insert')
18 | assert result.result_set[0][1] == 'value__`2'
19 | assert result.result_set[1][1] == 'value77'
20 |
21 |
22 | def test_raw_insert_compression(test_client: Client, table_context: Callable):
23 | data_file = f'{Path(__file__).parent}/movies.csv.gz'
24 | with open(data_file, mode='rb') as movies_file:
25 | data = movies_file.read()
26 | with table_context('test_gzip_movies', ['movie String', 'year UInt16', 'rating Decimal32(3)']):
27 | test_client.raw_insert('test_gzip_movies', None, data, fmt='CSV', compression='gzip',
28 | settings={'input_format_allow_errors_ratio': .2,
29 | 'input_format_allow_errors_num': 5}
30 | )
31 | res = test_client.query(
32 | 'SELECT count() as count, sum(rating) as rating, max(year) as year FROM test_gzip_movies').first_item
33 | assert res['count'] == 248
34 | assert res['year'] == 2022
35 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_session_id.py:
--------------------------------------------------------------------------------
1 | import uuid
2 |
3 | from typing import Callable
4 |
5 | import pytest
6 |
7 | from clickhouse_connect.driver import create_async_client
8 | from tests.integration_tests.conftest import TestConfig
9 |
10 | SESSION_KEY = 'session_id'
11 |
12 |
13 | def test_client_default_session_id(test_create_client: Callable):
14 | # by default, the sync client will autogenerate the session id
15 | client = test_create_client()
16 | session_id = client.get_client_setting(SESSION_KEY)
17 | try:
18 | uuid.UUID(session_id)
19 | except ValueError:
20 | pytest.fail(f"Invalid session_id: {session_id}")
21 | client.close()
22 |
23 |
24 | def test_client_autogenerate_session_id(test_create_client: Callable):
25 | client = test_create_client()
26 | session_id = client.get_client_setting(SESSION_KEY)
27 | try:
28 | uuid.UUID(session_id)
29 | except ValueError:
30 | pytest.fail(f"Invalid session_id: {session_id}")
31 |
32 |
33 | def test_client_custom_session_id(test_create_client: Callable):
34 | session_id = 'custom_session_id'
35 | client = test_create_client(session_id=session_id)
36 | assert client.get_client_setting(SESSION_KEY) == session_id
37 | client.close()
38 |
39 |
40 | @pytest.mark.asyncio
41 | async def test_async_client_default_session_id(test_config: TestConfig):
42 | # by default, the async client will NOT autogenerate the session id
43 | async_client = await create_async_client(database=test_config.test_database,
44 | host=test_config.host,
45 | port=test_config.port,
46 | user=test_config.username,
47 | password=test_config.password)
48 | assert async_client.get_client_setting(SESSION_KEY) is None
49 | await async_client.close()
50 |
51 |
52 | @pytest.mark.asyncio
53 | async def test_async_client_autogenerate_session_id(test_config: TestConfig):
54 | async_client = await create_async_client(database=test_config.test_database,
55 | host=test_config.host,
56 | port=test_config.port,
57 | user=test_config.username,
58 | password=test_config.password,
59 | autogenerate_session_id=True)
60 | session_id = async_client.get_client_setting(SESSION_KEY)
61 | try:
62 | uuid.UUID(session_id)
63 | except ValueError:
64 | pytest.fail(f"Invalid session_id: {session_id}")
65 | await async_client.close()
66 |
67 |
68 | @pytest.mark.asyncio
69 | async def test_async_client_custom_session_id(test_config: TestConfig):
70 | session_id = 'custom_session_id'
71 | async_client = await create_async_client(database=test_config.test_database,
72 | host=test_config.host,
73 | port=test_config.port,
74 | user=test_config.username,
75 | password=test_config.password,
76 | session_id=session_id)
77 | assert async_client.get_client_setting(SESSION_KEY) == session_id
78 | await async_client.close()
79 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_sqlalchemy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/integration_tests/test_sqlalchemy/__init__.py
--------------------------------------------------------------------------------
/tests/integration_tests/test_sqlalchemy/conftest.py:
--------------------------------------------------------------------------------
1 | from typing import Iterator
2 | from pytest import fixture
3 |
4 | from sqlalchemy.engine import create_engine
5 | from sqlalchemy.engine.base import Engine
6 |
7 | from tests.integration_tests.conftest import TestConfig
8 |
9 |
10 | @fixture(scope='module', name='test_engine')
11 | def test_engine_fixture(test_config: TestConfig) -> Iterator[Engine]:
12 | test_engine: Engine = create_engine(
13 | f'clickhousedb://{test_config.username}:{test_config.password}@{test_config.host}:' +
14 | f'{test_config.port}/{test_config.test_database}?ch_http_max_field_name_size=99999' +
15 | '&use_skip_indexes=0&ca_cert=certifi&query_limit=2333&compression=zstd'
16 | )
17 |
18 | yield test_engine
19 | test_engine.dispose()
20 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_sqlalchemy/test_basics.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy.engine import Engine
2 |
3 | from clickhouse_connect import common
4 |
5 | test_query = """
6 | -- 6dcd92a04feb50f14bbcf07c661680ba
7 | WITH dummy = 2
8 | SELECT database, name FROM system.tables LIMIT 2
9 | -- 6dcd92a04feb50f14bbcf07c661680ba
10 | """
11 |
12 | test_query_ver19 = """
13 | -- 6dcd92a04feb50f14bbcf07c661680ba
14 | SELECT database, name FROM system.tables LIMIT 2
15 | -- 6dcd92a04feb50f14bbcf07c661680ba
16 | """
17 |
18 |
19 | def test_dsn_config(test_engine: Engine):
20 | common.set_setting('invalid_setting_action', 'drop')
21 | client = test_engine.raw_connection().connection.client
22 | assert client.http.connection_pool_kw['cert_reqs'] == 'CERT_REQUIRED'
23 | assert 'use_skip_indexes' in client.params
24 | assert client.params['http_max_field_name_size'] == '99999'
25 | assert client.query_limit == 2333
26 | assert client.compression == 'zstd'
27 |
28 |
29 | def test_cursor(test_engine: Engine):
30 | common.set_setting('invalid_setting_action', 'drop')
31 | raw_conn = test_engine.raw_connection()
32 | cursor = raw_conn.cursor()
33 | sql = test_query
34 | if not raw_conn.connection.client.min_version('21'):
35 | sql = test_query_ver19
36 |
37 | cursor.execute(sql)
38 | assert cursor.description[0][0] == 'database'
39 | assert cursor.description[1][1] == 'String'
40 | assert len(getattr(cursor, 'data')) == 2
41 | assert cursor.summary[0]["read_rows"] == '2'
42 | raw_conn.close()
43 |
44 |
45 | def test_execute(test_engine: Engine):
46 | common.set_setting('invalid_setting_action', 'drop')
47 |
48 | with test_engine.begin() as conn:
49 | sql = test_query
50 | if not conn.connection.connection.client.min_version('21'):
51 | sql = test_query_ver19
52 | rows = list(row for row in conn.execute(sql))
53 | assert len(rows) == 2
54 |
55 | rows = list(row for row in conn.execute('DROP TABLE IF EXISTS dummy_table'))
56 | assert len(rows) > 0 # This is just the metadata from the "command" QueryResult
57 |
58 | rows = list(row for row in conn.execute('describe TABLE system.columns'))
59 | assert len(rows) > 5
60 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_sqlalchemy/test_ddl.py:
--------------------------------------------------------------------------------
1 | from enum import Enum as PyEnum
2 |
3 | import sqlalchemy as db
4 | from sqlalchemy import MetaData
5 |
6 | from sqlalchemy.engine.base import Engine
7 | from sqlalchemy.ext.declarative import declarative_base
8 |
9 | from tests.integration_tests.conftest import TestConfig
10 | from clickhouse_connect import common
11 | from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import Int8, UInt16, Decimal, Enum16, Float64, Boolean, \
12 | FixedString, String, UInt64, UUID, DateTime, DateTime64, LowCardinality, Nullable, Array, AggregateFunction, \
13 | UInt32, IPv4
14 | from clickhouse_connect.cc_sqlalchemy.ddl.custom import CreateDatabase, DropDatabase
15 | from clickhouse_connect.cc_sqlalchemy.ddl.tableengine import engine_map
16 |
17 |
18 | def test_create_database(test_engine: Engine, test_config: TestConfig, test_db: str):
19 | if test_db:
20 | common.set_setting('invalid_setting_action', 'drop')
21 | conn = test_engine.connect()
22 | create_db = f'create_db_{test_db}'
23 | if not test_engine.dialect.has_database(conn, create_db):
24 | if test_config.host == 'localhost' and conn.connection.connection.client.min_version('20'):
25 | conn.execute(CreateDatabase(create_db, 'Atomic'))
26 | else:
27 | conn.execute(CreateDatabase(create_db))
28 | conn.execute(DropDatabase(create_db))
29 |
30 |
31 | class ColorEnum(PyEnum):
32 | RED = 1
33 | BLUE = 2
34 | TEAL = -4
35 | COBALT = 877
36 |
37 |
38 | def test_create_table(test_engine: Engine, test_db: str, test_table_engine: str):
39 | common.set_setting('invalid_setting_action', 'drop')
40 | conn = test_engine.connect()
41 | table_cls = engine_map[test_table_engine]
42 | metadata = db.MetaData(bind=test_engine, schema=test_db)
43 | conn.execute('DROP TABLE IF EXISTS simple_table_test')
44 | bool_type = Boolean
45 | date_tz64_type = DateTime64(3, 'Europe/Moscow')
46 | if not conn.connection.connection.client.min_version('20'):
47 | bool_type = Int8
48 | date_tz64_type = DateTime('Europe/Moscow')
49 | table = db.Table('simple_table_test', metadata,
50 | db.Column('key_col', Int8),
51 | db.Column('uint_col', UInt16),
52 | db.Column('dec_col', Decimal(38, 5)), # Decimal128(5)
53 | db.Column('enum_col', Enum16(ColorEnum)),
54 | db.Column('float_col', Float64),
55 | db.Column('str_col', String),
56 | db.Column('fstr_col', FixedString(17)),
57 | db.Column('bool_col', bool_type),
58 | table_cls(('key_col', 'uint_col'), primary_key='key_col'))
59 | table.create(conn)
60 | conn.execute('DROP TABLE IF EXISTS advanced_table_test')
61 | table = db.Table('advanced_table_test', metadata,
62 | db.Column('key_col', UInt64),
63 | db.Column('uuid_col', UUID),
64 | db.Column('dt_col', DateTime),
65 | db.Column('ip_col', IPv4),
66 | db.Column('dt64_col', date_tz64_type),
67 | db.Column('lc_col', LowCardinality(FixedString(16))),
68 | db.Column('lc_date_col', LowCardinality(Nullable(String))),
69 | db.Column('null_dt_col', Nullable(DateTime('America/Denver'))),
70 | db.Column('arr_col', Array(UUID)),
71 | db.Column('agg_col', AggregateFunction('uniq', LowCardinality(String))),
72 | table_cls('key_col'))
73 | table.create(conn)
74 |
75 |
76 | def test_declarative(test_engine: Engine, test_db: str, test_table_engine: str):
77 | common.set_setting('invalid_setting_action', 'drop')
78 | conn = test_engine.connect()
79 | conn.execute('DROP TABLE IF EXISTS users_test')
80 | table_cls = engine_map[test_table_engine]
81 | base_cls = declarative_base(metadata=MetaData(schema=test_db))
82 |
83 | class User(base_cls):
84 | __tablename__ = 'users_test'
85 | __table_args__ = (table_cls(order_by=['id', 'name']),)
86 | id = db.Column(UInt32, primary_key=True)
87 | name = db.Column(String)
88 | fullname = db.Column(String)
89 | nickname = db.Column(String)
90 |
91 | base_cls.metadata.create_all(test_engine)
92 | user = User(name='Alice')
93 | assert user.name == 'Alice'
94 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_sqlalchemy/test_inserts.py:
--------------------------------------------------------------------------------
1 | from pytest import fixture
2 |
3 | import sqlalchemy as db
4 | from sqlalchemy import MetaData
5 | from sqlalchemy.engine import Engine
6 | from sqlalchemy.ext.declarative import declarative_base
7 | from sqlalchemy.orm import Session
8 |
9 | from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import LowCardinality, String, UInt64
10 | from clickhouse_connect.cc_sqlalchemy.ddl.tableengine import engine_map
11 | from clickhouse_connect.driver import Client
12 |
13 |
14 | @fixture(scope='module', autouse=True, name='test_model')
15 | def test_model_fixture(test_client: Client, test_engine: Engine, test_db: str, test_table_engine: str):
16 | if not test_client.min_version('22.6.1'):
17 | yield None
18 | return
19 | engine_cls = engine_map[test_table_engine]
20 |
21 | Base = declarative_base(metadata=MetaData(schema=test_db)) # pylint: disable=invalid-name
22 |
23 | class Model(Base):
24 | __tablename__ = 'insert_model'
25 | __table_args__ = (engine_cls(order_by=['test_name', 'value_1']),)
26 | test_name = db.Column(LowCardinality(String), primary_key=True)
27 | value_1 = db.Column(String)
28 | metric_2 = db.Column(UInt64)
29 | description = db.Column(String)
30 |
31 | test_engine.execute('DROP TABLE IF EXISTS insert_model')
32 | Base.metadata.create_all(test_engine)
33 | yield Model
34 |
35 |
36 | def test_single_insert(test_engine: Engine, test_model):
37 | conn = test_engine.connect()
38 | conn.execute(db.insert(test_model).values(test_name='single_insert',
39 | value_1='v1',
40 | metric_2=25738,
41 | description='Single Desc'))
42 | conn.execute(db.insert(test_model), {'test_name': 'another_single_insert'})
43 |
44 |
45 | def test_multiple_insert(test_engine: Engine, test_model):
46 | session = Session(test_engine)
47 | model_1 = test_model(test_name='multi_1',
48 | value_1='v1',
49 | metric_2=100,
50 | description='First of Many')
51 | model_2 = test_model(test_name='multi_2',
52 | value_1='v2',
53 | metric_2=100,
54 | description='Second of Many')
55 | model_3 = test_model(value_1='v7',
56 | metric_2=77,
57 | description='Third of Many',
58 | test_name='odd_one')
59 | session.add(model_1)
60 | session.add(model_2)
61 | session.add(model_3)
62 | session.commit()
63 |
64 |
65 | def test_bulk_insert(test_engine: Engine, test_model):
66 | session = Session(test_engine)
67 | model_1 = test_model(test_name='bulk_1',
68 | value_1='v1',
69 | metric_2=100,
70 | description='First of Bulk')
71 | model_2 = test_model(test_name='bulk_2',
72 | value_1='v2',
73 | metric_2=100,
74 | description='Second of Bulk')
75 | model_3 = test_model(value_1='vb78',
76 | metric_2=528,
77 | description='Third of Bulk',
78 | test_name='bulk')
79 | session.bulk_save_objects([model_1, model_2, model_3])
80 | session.commit()
81 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_sqlalchemy/test_reflect.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-member
2 | import sqlalchemy as db
3 | from sqlalchemy.engine import Engine
4 |
5 | from clickhouse_connect import common
6 | from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import UInt32, SimpleAggregateFunction, Point
7 |
8 |
9 | def test_basic_reflection(test_engine: Engine):
10 | common.set_setting('invalid_setting_action', 'drop')
11 | conn = test_engine.connect()
12 | metadata = db.MetaData(bind=test_engine, schema='system')
13 | table = db.Table('tables', metadata, autoload_with=test_engine)
14 | query = db.select([table.columns.create_table_query])
15 | result = conn.execute(query)
16 | rows = result.fetchmany(100)
17 | assert rows
18 |
19 |
20 | def test_full_table_reflection(test_engine: Engine, test_db: str):
21 | common.set_setting('invalid_setting_action', 'drop')
22 | conn = test_engine.connect()
23 | conn.execute(f'DROP TABLE IF EXISTS {test_db}.reflect_test')
24 | conn.execute(
25 | f'CREATE TABLE {test_db}.reflect_test (key UInt32, value FixedString(20),'+
26 | 'agg SimpleAggregateFunction(anyLast, String))' +
27 | 'ENGINE AggregatingMergeTree ORDER BY key')
28 | metadata = db.MetaData(bind=test_engine, schema=test_db)
29 | table = db.Table('reflect_test', metadata, autoload_with=test_engine)
30 | assert table.columns.key.type.__class__ == UInt32
31 | assert table.columns.agg.type.__class__ == SimpleAggregateFunction
32 | assert 'MergeTree' in table.engine.name
33 |
34 |
35 | def test_types_reflection(test_engine: Engine, test_db: str):
36 | common.set_setting('invalid_setting_action', 'drop')
37 | conn = test_engine.connect()
38 | conn.execute(f'DROP TABLE IF EXISTS {test_db}.sqlalchemy_types_test')
39 | conn.execute(
40 | f'CREATE TABLE {test_db}.sqlalchemy_types_test (key UInt32, pt Point) ' +
41 | 'ENGINE MergeTree ORDER BY key')
42 | metadata = db.MetaData(bind=test_engine, schema=test_db)
43 | table = db.Table('sqlalchemy_types_test', metadata, autoload_with=test_engine)
44 | assert table.columns.key.type.__class__ == UInt32
45 | assert table.columns.pt.type.__class__ == Point
46 | assert 'MergeTree' in table.engine.name
47 |
48 |
49 | def test_table_exists(test_engine: Engine):
50 | common.set_setting('invalid_setting_action', 'drop')
51 | conn = test_engine.connect()
52 | assert test_engine.dialect.has_table(conn, 'columns', 'system')
53 | assert not test_engine.dialect.has_table(conn, 'nope', 'fake_db')
54 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_streaming.py:
--------------------------------------------------------------------------------
1 | import random
2 | import string
3 |
4 | from clickhouse_connect.driver import Client
5 | from clickhouse_connect.driver.exceptions import StreamClosedError, ProgrammingError, StreamFailureError
6 |
7 |
8 | def test_row_stream(test_client: Client):
9 | row_stream = test_client.query_rows_stream('SELECT number FROM numbers(10000)')
10 | total = 0
11 | with row_stream:
12 | for row in row_stream:
13 | total += row[0]
14 | try:
15 | with row_stream:
16 | pass
17 | except StreamClosedError:
18 | pass
19 | assert total == 49995000
20 |
21 |
22 | def test_column_block_stream(test_client: Client):
23 | random_string = 'randomStringUTF8(50)'
24 | if not test_client.min_version('20'):
25 | random_string = random.choices(string.ascii_lowercase, k=50)
26 | block_stream = test_client.query_column_block_stream(f'SELECT number, {random_string} FROM numbers(10000)',
27 | settings={'max_block_size': 4000})
28 | total = 0
29 | block_count = 0
30 | with block_stream:
31 | for block in block_stream:
32 | block_count += 1
33 | total += sum(block[0])
34 | assert total == 49995000
35 | assert block_count > 1
36 |
37 |
38 | def test_row_block_stream(test_client: Client):
39 | random_string = 'randomStringUTF8(50)'
40 | if not test_client.min_version('20'):
41 | random_string = random.choices(string.ascii_lowercase, k=50)
42 | block_stream = test_client.query_row_block_stream(f'SELECT number, {random_string} FROM numbers(10000)',
43 | settings={'max_block_size': 4000})
44 | total = 0
45 | block_count = 0
46 | with block_stream:
47 | for block in block_stream:
48 | block_count += 1
49 | for row in block:
50 | total += row[0]
51 | assert total == 49995000
52 | assert block_count > 1
53 |
54 |
55 | def test_stream_errors(test_client: Client):
56 | query_result = test_client.query('SELECT number FROM numbers(100000)')
57 | try:
58 | for _ in query_result.row_block_stream:
59 | pass
60 | except ProgrammingError as ex:
61 | assert 'context' in str(ex)
62 | assert query_result.row_count == 100000
63 | try:
64 | with query_result.rows_stream as stream:
65 | assert sum(row[0] for row in stream) == 3882
66 | except StreamClosedError:
67 | pass
68 |
69 |
70 | def test_stream_failure(test_client: Client):
71 | with test_client.query_row_block_stream('SELECT toString(cityHash64(number)) FROM numbers(10000000)' +
72 | ' where intDiv(1,number-300000)>-100000000') as stream:
73 | blocks = 0
74 | failed = False
75 | try:
76 | for _ in stream:
77 | blocks += 1
78 | except StreamFailureError as ex:
79 | failed = True
80 | assert 'division by zero' in str(ex).lower()
81 | assert failed
82 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_tls.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 | from urllib3.exceptions import SSLError
5 |
6 | from clickhouse_connect import get_client
7 | from clickhouse_connect.driver.common import coerce_bool
8 | from clickhouse_connect.driver.exceptions import OperationalError
9 | from tests.helpers import PROJECT_ROOT_DIR
10 |
11 | # See .docker/clickhouse/single_node_tls for the server configuration
12 | cert_dir = f'{PROJECT_ROOT_DIR}/.docker/clickhouse/single_node_tls/certificates/'
13 | host = 'server1.clickhouse.test'
14 |
15 |
16 | def test_basic_tls():
17 | if not coerce_bool(os.environ.get('CLICKHOUSE_CONNECT_TEST_TLS', 'False')):
18 | pytest.skip('TLS tests not enabled')
19 | client = get_client(interface='https', host=host, port=10843, verify=False)
20 | assert client.command("SELECT 'insecure'") == 'insecure'
21 | client.close_connections()
22 |
23 | client = get_client(interface='https', host=host, port=10843, ca_cert=f'{cert_dir}ca.crt')
24 | assert client.command("SELECT 'verify_server'") == 'verify_server'
25 | client.close_connections()
26 |
27 | try:
28 | get_client(interface='https', host='localhost', port=10843, ca_cert=f'{cert_dir}ca.crt')
29 | pytest.fail('Expected TLS exception with a different hostname')
30 | except OperationalError as ex:
31 | assert isinstance(ex.__cause__.reason, SSLError) # pylint: disable=no-member
32 | client.close_connections()
33 |
34 | try:
35 | get_client(interface='https', host='localhost', port=10843)
36 | pytest.fail('Expected TLS exception with a self-signed cert')
37 | except OperationalError as ex:
38 | assert isinstance(ex.__cause__.reason, SSLError) # pylint: disable=no-member
39 |
40 |
41 | def test_mutual_tls():
42 | if not coerce_bool(os.environ.get('CLICKHOUSE_CONNECT_TEST_TLS', 'False')):
43 | pytest.skip('TLS tests not enabled')
44 | client = get_client(interface='https',
45 | username='cert_user',
46 | host=host,
47 | port=10843,
48 | ca_cert=f'{cert_dir}ca.crt',
49 | client_cert=f'{cert_dir}client.crt',
50 | client_cert_key=f'{cert_dir}client.key')
51 | assert client.command('SELECT user()') == 'cert_user'
52 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_tools.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Callable
3 |
4 | from clickhouse_connect.driver import Client
5 | from clickhouse_connect.driver.tools import insert_file
6 | from tests.integration_tests.conftest import TestConfig
7 |
8 |
9 | def test_csv_upload(test_client: Client, table_context: Callable):
10 | data_file = f'{Path(__file__).parent}/movies.csv.gz'
11 | with table_context('test_csv_upload', ['movie String', 'year UInt16', 'rating Decimal32(3)']):
12 | insert_file(test_client, 'test_csv_upload', data_file,
13 | settings={'input_format_allow_errors_ratio': .2,
14 | 'input_format_allow_errors_num': 5})
15 | res = test_client.query(
16 | 'SELECT count() as count, sum(rating) as rating, max(year) as year FROM test_csv_upload').first_item
17 | assert res['count'] == 248
18 | assert res['year'] == 2022
19 |
20 |
21 | def test_parquet_upload(test_config: TestConfig, test_client: Client, table_context: Callable):
22 | data_file = f'{Path(__file__).parent}/movies.parquet'
23 | full_table = f'{test_config.test_database}.test_parquet_upload'
24 | with table_context(full_table, ['movie String', 'year UInt16', 'rating Float64']):
25 | insert_file(test_client, full_table, data_file, 'Parquet',
26 | settings={'output_format_parquet_string_as_string': 1})
27 | res = test_client.query(
28 | f'SELECT count() as count, sum(rating) as rating, max(year) as year FROM {full_table}').first_item
29 | assert res['count'] == 250
30 | assert res['year'] == 2022
31 |
32 |
33 | def test_json_insert(test_client: Client, table_context: Callable):
34 | data_file = f'{Path(__file__).parent}/json_test.ndjson'
35 | with table_context('test_json_upload', ['key UInt16', 'flt_val Float64', 'int_val Int8']):
36 | insert_file(test_client, 'test_json_upload', data_file, 'JSONEachRow')
37 | res = test_client.query('SELECT * FROM test_json_upload ORDER BY key').result_rows
38 | assert res[1][0] == 17
39 | assert res[1][1] == 5.3
40 | assert res[1][2] == 121
41 |
--------------------------------------------------------------------------------
/tests/test_requirements.txt:
--------------------------------------------------------------------------------
1 | pytz
2 | urllib3>=1.26
3 | setuptools
4 | certifi
5 | sqlalchemy>1.3.21,<2.0
6 | cython==3.0.11
7 | pyarrow
8 | pytest
9 | pytest-asyncio
10 | pytest-mock
11 | pytest-dotenv
12 | pytest-cov
13 | numpy~=1.22.0; python_version >= '3.8' and python_version <= '3.10'
14 | numpy~=1.26.0; python_version >= '3.11' and python_version <= '3.12'
15 | numpy~=2.1.0; python_version >= '3.13'
16 | pandas
17 | zstandard
18 | lz4
19 | pyjwt[crypto]==2.10.1
20 |
--------------------------------------------------------------------------------
/tests/timings.py:
--------------------------------------------------------------------------------
1 |
2 | import array
3 | from datetime import datetime
4 |
5 | b = bytearray()
6 |
7 | start = datetime.now()
8 | for x in range(10000):
9 | b = bytearray()
10 | a = array.array('H', list(range(5000)))
11 | b += a
12 | print (str(len(b)) + ' ' + str(datetime.now() - start))
13 |
14 |
15 | start = datetime.now()
16 |
17 | for x in range(10000):
18 | b = bytearray()
19 | for y in range(5000):
20 | b.extend(y.to_bytes(2, 'little'))
21 | print (str(len(b)) + ' ' + str(datetime.now() - start))
22 |
--------------------------------------------------------------------------------
/tests/unit_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/unit_tests/__init__.py
--------------------------------------------------------------------------------
/tests/unit_tests/test_chtypes.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=protected-access
2 | from clickhouse_connect.datatypes.container import Nested
3 | from clickhouse_connect.datatypes.registry import get_from_name as gfn
4 |
5 |
6 | def test_enum_parse():
7 | enum_type = gfn("Enum8('OZC|8;' = -125, '6MQ4v-t' = -114, 'As7]sEg\\'' = 40, 'v~l$PR5' = 84)")
8 | assert 'OZC|8;' in enum_type._name_map
9 | enum_type = gfn('Enum8(\'\\\'"2Af\' = 93,\'KG;+\\\' = -114,\'j0\' = -40)')
10 | assert '\'"2Af' in enum_type._name_map
11 | enum_type = gfn("Enum8('value1' = 7, 'value2'=5)")
12 | assert enum_type.name == "Enum8('value2' = 5, 'value1' = 7)"
13 | assert 7 in enum_type._int_map
14 | assert 5 in enum_type._int_map
15 | enum_type = gfn(r"Enum16('beta&&' = -3, '' = 0, 'alpha\'' = 3822)")
16 | assert r"alpha'" == enum_type._int_map[3822]
17 | assert -3 == enum_type._name_map['beta&&']
18 |
19 |
20 | def test_names():
21 | array_type = gfn('Array(Nullable(FixedString(50)))')
22 | assert array_type.name == 'Array(Nullable(FixedString(50)))'
23 | array_type = gfn(
24 | "Array(Enum8(\'user_name\' = 1, \'ip_address\' = -2, \'forwarded_ip_address\' = 3, \'client_key\' = 4))")
25 | assert array_type.name == (
26 | "Array(Enum8('ip_address' = -2, 'user_name' = 1, 'forwarded_ip_address' = 3, 'client_key' = 4))")
27 |
28 |
29 | def test_nested_parse():
30 | nested_type = gfn('Nested(str1 String, int32 UInt32)')
31 | assert nested_type.name == 'Nested(str1 String, int32 UInt32)'
32 | assert isinstance(nested_type, Nested)
33 | nested_type = gfn('Nested(id Int64, data Nested(inner_key String, inner_map Map(String, UUID)))')
34 | assert nested_type.name == 'Nested(id Int64, data Nested(inner_key String, inner_map Map(String, UUID)))'
35 | nest = "key_0 Enum16('[m(X*' = -18773, '_9as' = 11854, '&e$LE' = 27685), key_1 Nullable(Decimal(62, 38))"
36 | nested_name = f'Nested({nest})'
37 | nested_type = gfn(nested_name)
38 | assert nested_type.name == nested_name
39 |
40 |
41 | def test_named_tuple():
42 | tuple_type = gfn('Tuple(Int64, String)')
43 | assert tuple_type.name == 'Tuple(Int64, String)'
44 | tuple_type = gfn('Tuple(`key` Int64, `value` String)')
45 | assert tuple_type.name == 'Tuple(`key` Int64, `value` String)'
46 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_driver/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/unit_tests/test_driver/__init__.py
--------------------------------------------------------------------------------
/tests/unit_tests/test_driver/binary.py:
--------------------------------------------------------------------------------
1 | NESTED_BINARY = """
2 | 0104 066e 6573 7465 6421 4e65 7374 6564
3 | 2873 7472 3120 5374 7269 6e67 2c20 696e
4 | 7433 3220 5549 6e74 3332 2900 0000 0000
5 | 0000 0002 0000 0000 0000 0004 0000 0000
6 | 0000 0006 0000 0000 0000 0005 7468 7265
7 | 6504 6669 7665 036f 6e65 0374 776f 036f
8 | 6e65 0374 776f 0500 0000 4d00 0000 0500
9 | 0000 3700 0000 0500 0000 3700 0000
10 | """
11 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_driver/test_buffer.py:
--------------------------------------------------------------------------------
1 | from clickhouse_connect.driver.buffer import ResponseBuffer as PyResponseBuffer
2 | from clickhouse_connect.driver.exceptions import StreamCompleteException
3 | from clickhouse_connect.driverc.buffer import ResponseBuffer as CResponseBuffer # pylint: disable=no-name-in-module
4 | from tests.helpers import bytes_source, to_bytes
5 |
6 |
7 | def test_read_ints():
8 | for cls in CResponseBuffer, PyResponseBuffer:
9 | buff = bytes_source('05 20 00 00 00 00 00 00 68 10 83 03 77', cls=cls)
10 | assert buff.read_uint64() == 8197
11 | assert buff.read_leb128() == 104
12 | assert buff.read_leb128() == 16
13 | assert buff.read_leb128() == 387
14 | assert buff.read_byte() == 0x77
15 | try:
16 | buff.read_byte()
17 | except StreamCompleteException:
18 | pass
19 |
20 |
21 | def test_read_strings():
22 | for cls in CResponseBuffer, PyResponseBuffer:
23 | buff = bytes_source('04 43 44 4d 41', cls=cls)
24 | assert buff.read_leb128_str() == 'CDMA'
25 | try:
26 | buff.read_str_col(2, 'utf8')
27 | except StreamCompleteException:
28 | pass
29 |
30 |
31 | def test_read_bytes():
32 | for cls in CResponseBuffer, PyResponseBuffer, :
33 | buff = bytes_source('04 43 44 4d 41 22 44 66 88 AA', cls=cls)
34 | buff.read_byte()
35 | assert buff.read_bytes(5) == to_bytes('43 44 4d 41 22')
36 | try:
37 | buff.read_bytes(10)
38 | except StreamCompleteException:
39 | pass
40 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_driver/test_data.py:
--------------------------------------------------------------------------------
1 | from datetime import date
2 | from clickhouse_connect.driver.dataconv import epoch_days_to_date as py_date, pivot as py_pivot
3 | # pylint: disable=no-name-in-module
4 | from clickhouse_connect.driverc.dataconv import epoch_days_to_date as c_date, pivot as c_pivot
5 |
6 |
7 | def test_date_conv():
8 | for date_func in (c_date, py_date,):
9 | assert date_func(11322) == date(2000, 12, 31)
10 | assert date_func(47579) == date(2100, 4, 8)
11 | assert date_func(0) == date(1970, 1, 1)
12 | assert date_func(364) == date(1970, 12, 31)
13 | assert date_func(365) == date(1971, 1, 1)
14 | assert date_func(500) == date(1971, 5, 16)
15 | assert date_func(729) == date(1971, 12, 31)
16 | assert date_func(730) == date(1972, 1, 1)
17 | assert date_func(1096) == date(1973, 1, 1)
18 | assert date_func(2250) == date(1976, 2, 29)
19 | assert date_func(10957) == date(2000, 1, 1)
20 | assert date_func(11323) == date(2001, 1, 1)
21 | assert date_func(15941) == date(2013, 8, 24)
22 | assert date_func(12477) == date(2004, 2, 29)
23 | assert date_func(12478) == date(2004, 3, 1)
24 | assert date_func(12783) == date(2004, 12, 31)
25 | assert date_func(13148) == date(2005, 12, 31)
26 | assert date_func(19378) == date(2023, 1, 21)
27 | assert date_func(19378) == date(2023, 1, 21)
28 | assert date_func(47847) == date(2101, 1, 1)
29 | assert date_func(54727) == date(2119, 11, 3)
30 | assert date_func(-18165) == date(1920, 4, 8)
31 |
32 |
33 | def test_pivot():
34 | data = [[1, 2, 3], [4, 5, 6]]
35 | for pivot in (c_pivot, py_pivot):
36 | result = pivot(data, 0, 2)
37 | assert result == ((1, 4), (2, 5), (3, 6))
38 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_driver/test_formats.py:
--------------------------------------------------------------------------------
1 | from clickhouse_connect.datatypes.format import set_default_formats, set_write_format
2 | from clickhouse_connect.datatypes.network import IPv6
3 | from clickhouse_connect.datatypes.numeric import Int32
4 | from clickhouse_connect.datatypes.string import FixedString
5 | from clickhouse_connect.driver.context import BaseQueryContext
6 | from clickhouse_connect.driver.query import QueryContext
7 |
8 |
9 | def test_default_formats():
10 | ctx = QueryContext()
11 | set_default_formats('Int32', 'string', 'IP*', 'string')
12 | assert IPv6.read_format(ctx) == 'string'
13 | assert Int32.read_format(ctx) == 'string'
14 | assert FixedString.read_format(ctx) == 'native'
15 |
16 |
17 | def test_fixed_str_format():
18 | set_write_format('FixedString', 'string')
19 | assert FixedString.write_format(BaseQueryContext()) == 'string'
20 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_driver/test_insert.py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 | from clickhouse_connect.datatypes.registry import get_from_name
4 |
5 | from clickhouse_connect.driver.insert import InsertContext
6 | from clickhouse_connect.tools.datagen import fixed_len_ascii_str
7 |
8 |
9 | def test_block_size():
10 | data = [(1, (datetime.date(2020, 5, 2), datetime.datetime(2020, 5, 2, 10, 5, 2)))]
11 | ctx = InsertContext('fake_table',
12 | ['key', 'date_tuple'],
13 | [get_from_name('UInt64'), get_from_name('Tuple(Date, DateTime)')],
14 | data)
15 | assert ctx.block_row_count == 262144
16 |
17 | data = [(x, fixed_len_ascii_str(400)) for x in range(5000)]
18 | ctx = InsertContext('fake_table',
19 | ['key', 'big_str'],
20 | [get_from_name('Int32'), get_from_name('String')],
21 | data)
22 | assert ctx.block_row_count == 8192
23 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_driver/test_native_fuzz.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 |
4 | from clickhouse_connect.datatypes.registry import get_from_name
5 | from clickhouse_connect.driver.common import coerce_bool
6 | from clickhouse_connect.driver.buffer import ResponseBuffer as PyBuff
7 | from clickhouse_connect.driverc.buffer import ResponseBuffer as CBuff # pylint: disable=no-name-in-module
8 | from tests.helpers import random_columns, random_data, native_transform, native_insert_block, bytes_source
9 |
10 | TEST_COLUMNS = 12
11 | MAX_DATA_ROWS = 100
12 |
13 | use_c = coerce_bool(os.environ.get('CLICKHOUSE_CONNECT_USE_C', True))
14 | BuffCls = CBuff if use_c else PyBuff
15 |
16 |
17 | # pylint: disable=duplicate-code
18 | def test_native_round_trips():
19 | test_runs = int(os.environ.get('CLICKHOUSE_CONNECT_TEST_FUZZ', '200'))
20 |
21 | for _ in range(test_runs):
22 | data_rows = random.randint(1, MAX_DATA_ROWS)
23 | col_names, col_types = random_columns(TEST_COLUMNS)
24 | data = random_data(col_types, data_rows)
25 | col_names = ('row_id',) + col_names
26 | col_types = (get_from_name('UInt32'),) + col_types
27 | assert len(data) == data_rows
28 | output = native_insert_block(data, column_names=col_names, column_types=col_types)
29 | data_result = native_transform.parse_response(bytes_source(output, cls=BuffCls))
30 | assert data_result.column_names == col_names
31 | assert data_result.column_types == col_types
32 | dataset = data_result.result_set
33 | for row in range(data_rows):
34 | for col in range(TEST_COLUMNS):
35 | assert dataset[row][col] == data[row][col]
36 |
37 |
38 | def test_native_small():
39 | test_runs = int(os.environ.get('CLICKHOUSE_CONNECT_TEST_FUZZ', '200'))
40 | for _ in range(test_runs):
41 | col_names, col_types = random_columns(1)
42 | data = random_data(col_types, 2)
43 | col_names = ('row_id',) + col_names
44 | col_types = (get_from_name('UInt32'),) + col_types
45 | output = native_insert_block(data, column_names=col_names, column_types=col_types)
46 | data_result = native_transform.parse_response(bytes_source(output, cls=BuffCls))
47 | assert data_result.column_names == col_names
48 | assert data_result.column_types == col_types
49 | assert data_result.result_set == data
50 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_driver/test_native_read.py:
--------------------------------------------------------------------------------
1 | from ipaddress import IPv4Address
2 | from uuid import UUID
3 |
4 | from clickhouse_connect.datatypes import registry
5 | from clickhouse_connect.driver.insert import InsertContext
6 | from clickhouse_connect.driver.query import QueryContext
7 | from clickhouse_connect.driver.transform import NativeTransform
8 | from tests.helpers import bytes_source
9 | from tests.unit_tests.test_driver.binary import NESTED_BINARY
10 |
11 | UINT16_NULLS = """
12 | 0104 0969 6e74 5f76 616c 7565 104e 756c
13 | 6c61 626c 6528 5549 6e74 3136 2901 0001
14 | 0000 0014 0000 0028 00
15 | """
16 |
17 | LOW_CARDINALITY = """
18 | 0102 026c 6316 4c6f 7743 6172 6469 6e61
19 | 6c69 7479 2853 7472 696e 6729 0100 0000
20 | 0000 0000 0006 0000 0000 0000 0300 0000
21 | 0000 0000 0004 4344 4d41 0347 534d 0200
22 | 0000 0000 0000 0102 0101 026c 6316 4c6f
23 | 7743 6172 6469 6e61 6c69 7479 2853 7472
24 | 696e 6729 0100 0000 0000 0000 0006 0000
25 | 0000 0000 0200 0000 0000 0000 0004 554d
26 | 5453 0100 0000 0000 0000 01
27 | """
28 |
29 | LOW_CARD_ARRAY = """
30 | 0102 066c 6162 656c 731d 4172 7261 7928
31 | 4c6f 7743 6172 6469 6e61 6c69 7479 2853
32 | 7472 696e 6729 2901 0000 0000 0000 0000
33 | 0000 0000 0000 0000 0000 0000 0000 00
34 | """
35 |
36 | SIMPLE_MAP = """
37 | 0101 066e 6e5f 6d61 7013 4d61 7028 5374
38 | 7269 6e67 2c20 5374 7269 6e67 2902 0000
39 | 0000 0000 0004 6b65 7931 046b 6579 3206
40 | 7661 6c75 6531 0676 616c 7565 32
41 | """
42 |
43 | LOW_CARD_MAP = """
44 | 0102 086d 6170 5f6e 756c 6c2b 4d61 7028
45 | 4c6f 7743 6172 6469 6e61 6c69 7479 2853
46 | 7472 696e 6729 2c20 4e75 6c6c 6162 6c65
47 | 2855 5549 4429 2901 0000 0000 0000 0002
48 | 0000 0000 0000 0004 0000 0000 0000 0000
49 | 0600 0000 0000 0003 0000 0000 0000 0000
50 | 0469 676f 7206 6765 6f72 6765 0400 0000
51 | 0000 0000 0102 0102 0100 0000 0000 0000
52 | 0000 0000 0000 0000 0000 0000 235f 7dc5
53 | 799f 431d a9e1 93ca ccff c652 235f 7dc5
54 | 799f 437f a9e1 93ca ccff 0052 235f 7dc5
55 | 799f 431d a9e1 93ca ccff c652
56 | """
57 |
58 |
59 | parse_response = NativeTransform().parse_response
60 |
61 |
62 | def check_result(result, expected, row_num=0, col_num=0):
63 | result_set = result.result_set
64 | row = result_set[row_num]
65 | value = row[col_num]
66 | assert value == expected
67 |
68 |
69 | def test_uint16_nulls():
70 | result = parse_response(bytes_source(UINT16_NULLS))
71 | assert result.result_set == [(None,), (20,), (None,), (40,)]
72 |
73 |
74 | def test_low_cardinality():
75 | result = parse_response(bytes_source(LOW_CARDINALITY))
76 | assert result.result_set == [('CDMA',), ('GSM',), ('UMTS',)]
77 |
78 |
79 | def test_low_card_array():
80 | result = parse_response(bytes_source(LOW_CARD_ARRAY))
81 | assert result.first_row == ([],), ([],)
82 |
83 |
84 | def test_map():
85 | result = parse_response(bytes_source(SIMPLE_MAP))
86 | check_result(result, {'key1': 'value1', 'key2': 'value2'})
87 | result = parse_response(bytes_source(LOW_CARD_MAP))
88 | check_result(result, {'george': UUID('1d439f79-c57d-5f23-52c6-ffccca93e1a9'), 'igor': None})
89 |
90 |
91 | def test_ip():
92 | ips = ['192.168.5.3', '202.44.8.25', '0.0.2.2']
93 | ipv4_type = registry.get_from_name('IPv4')
94 | dest = bytearray()
95 | ipv4_type.write_column(ips, dest, InsertContext('', [], []))
96 | python = ipv4_type.read_column_data(bytes_source(bytes(dest)), 3, QueryContext(), None)
97 | assert tuple(python) == tuple(IPv4Address(ip) for ip in ips)
98 |
99 |
100 | def test_point():
101 | points = ((3.22, 3.22),(5.22, 5.22),(4.22, 4.22))
102 | point_type = registry.get_from_name('Point')
103 | dest = bytearray()
104 | point_type.write_column(points, dest, InsertContext('', [], []))
105 | python = point_type.read_column_data(bytes_source(bytes(dest)), 3, QueryContext(), [None, None])
106 | assert tuple(python) == tuple(point for point in points)
107 |
108 |
109 | def test_nested():
110 | result = parse_response (bytes_source(NESTED_BINARY))
111 | check_result(result, [{'str1': 'one', 'int32': 5}, {'str1': 'two', 'int32': 55}], 2, 0)
112 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_driver/test_params.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime, date
2 |
3 | import pytest
4 |
5 | from clickhouse_connect.driver.binding import finalize_query, format_bind_value
6 |
7 |
8 | def test_finalize():
9 | hash_id = '0x772'
10 | timestamp = datetime.fromtimestamp(1661447719)
11 | parameters = {'hash_id': hash_id, 'dt': timestamp}
12 | expected = "SELECT hash_id FROM db.mytable WHERE hash_id = '0x772' AND dt = '2022-08-25 17:15:19'"
13 | query = finalize_query('SELECT hash_id FROM db.mytable WHERE hash_id = %(hash_id)s AND dt = %(dt)s', parameters)
14 | assert query == expected
15 |
16 | parameters = [hash_id, timestamp]
17 | query = finalize_query('SELECT hash_id FROM db.mytable WHERE hash_id = %s AND dt = %s', parameters)
18 | assert query == expected
19 |
20 |
21 | # pylint: disable=inconsistent-quotes
22 | @pytest.mark.parametrize('value, expected', [
23 | ("a", "a"),
24 | ("a'", r"a\'"),
25 | ("'a'", r"\'a\'"),
26 | ("''a'", r"\'\'a\'"),
27 | ([], "[]"),
28 | ([1], "[1]"),
29 | (["a"], "['a']"),
30 | (["a'"], r"['a\'']"),
31 | ([["a"]], "[['a']]"),
32 | (date(2023, 6, 1), '2023-06-01'),
33 | (datetime(2023, 6, 1, 20, 4, 5), '2023-06-01 20:04:05'),
34 | ([date(2023, 6, 1), date(2023, 8, 5)], "['2023-06-01', '2023-08-05']")
35 |
36 | ])
37 | def test_format_bind_value(value, expected):
38 | assert format_bind_value(value) == expected
39 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_driver/test_parser.py:
--------------------------------------------------------------------------------
1 | from clickhouse_connect.datatypes.registry import get_from_name
2 | from clickhouse_connect.driver.parser import parse_callable, parse_enum
3 | from clickhouse_connect.driver.query import remove_sql_comments
4 |
5 |
6 | def test_parse_callable():
7 | assert parse_callable('CALLABLE(1, 5)') == ('CALLABLE', (1, 5), '')
8 | assert parse_callable("Enum4('v1' = 5) other stuff") == ('Enum4', ("'v1'= 5",), 'other stuff')
9 | assert parse_callable('BareThing') == ('BareThing', (), '')
10 | assert parse_callable('Tuple(Tuple (String), Int32)') == ('Tuple', ('Tuple(String)', 'Int32'), '')
11 | assert parse_callable("ReplicatedMergeTree('/clickhouse/tables/test', '{replica'}) PARTITION BY key")\
12 | == ('ReplicatedMergeTree', ("'/clickhouse/tables/test'", "'{replica'}"), 'PARTITION BY key')
13 |
14 |
15 | def test_parse_enum():
16 | assert parse_enum("Enum8('one' = 1)") == (('one',), (1,))
17 | assert parse_enum("Enum16('**\\'5' = 5, '578' = 7)") == (("**'5", '578'), (5, 7))
18 |
19 |
20 | def test_map_type():
21 | ch_type = get_from_name('Map(String, Decimal(5, 5))')
22 | assert ch_type.name == 'Map(String, Decimal(5, 5))'
23 |
24 |
25 | def test_variant_type():
26 | ch_type = get_from_name('Variant(UInt64, String, Array(UInt64))')
27 | assert ch_type.name == 'Variant(UInt64, String, Array(UInt64))'
28 |
29 |
30 | def test_json_type():
31 | names = ['JSON',
32 | 'JSON(max_dynamic_paths=100, a.b UInt32, SKIP `a.e`)',
33 | "JSON(max_dynamic_types = 55, SKIP REGEXP 'a[efg]')",
34 | 'JSON(max_dynamic_types = 33, `a.b` UInt64, b.c String)']
35 | parsed = ['JSON',
36 | 'JSON(max_dynamic_paths = 100, `a.b` UInt32, SKIP `a.e`)',
37 | "JSON(max_dynamic_types = 55, SKIP REGEXP 'a[efg]')",
38 | 'JSON(max_dynamic_types = 33, `a.b` UInt64, `b.c` String)'
39 | ]
40 | for name, x in zip(names, parsed):
41 | ch_type = get_from_name(name)
42 | assert x == ch_type.name
43 |
44 |
45 | def test_remove_comments():
46 | sql = """SELECT -- 6dcd92a04feb50f14bbcf07c661680ba
47 | * FROM benchmark_results /*With an inline comment */ WHERE result = 'True'
48 | /* A single line */
49 | LIMIT
50 | /* A multiline comment
51 |
52 | */
53 | 2
54 | -- 6dcd92a04feb50f14bbcf07c661680ba
55 | """
56 | assert remove_sql_comments(sql) == "SELECT \n* FROM benchmark_results WHERE result = 'True'\n\nLIMIT\n\n2\n\n"
57 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_driver/test_query.py:
--------------------------------------------------------------------------------
1 | from clickhouse_connect.driver.query import QueryContext
2 |
3 |
4 | def test_copy_context():
5 | settings = {'max_bytes_for_external_group_by': 1024 * 1024 * 100,
6 | 'read_overflow_mode': 'throw'}
7 | parameters = {'user_id': 'user_1'}
8 | query_formats = {'IPv*': 'string'}
9 | context = QueryContext('SELECT source_ip FROM table WHERE user_id = %(user_id)s',
10 | settings=settings,
11 | parameters=parameters,
12 | query_formats=query_formats,
13 | use_none=True)
14 | assert context.use_none is True
15 | assert context.final_query == "SELECT source_ip FROM table WHERE user_id = 'user_1'"
16 | assert context.query_formats['IPv*'] == 'string'
17 | assert context.settings['max_bytes_for_external_group_by'] == 104857600
18 |
19 | context_copy = context.updated_copy(
20 | settings={'max_bytes_for_external_group_by': 1024 * 1024 * 24, 'max_execution_time': 120},
21 | parameters={'user_id': 'user_2'}
22 | )
23 | assert context_copy.settings['read_overflow_mode'] == 'throw'
24 | assert context_copy.settings['max_execution_time'] == 120
25 | assert context_copy.settings['max_bytes_for_external_group_by'] == 25165824
26 | assert context_copy.final_query == "SELECT source_ip FROM table WHERE user_id = 'user_2'"
27 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_driver/test_settings.py:
--------------------------------------------------------------------------------
1 | from clickhouse_connect import common
2 |
3 |
4 | def test_setting():
5 | try:
6 | assert common.get_setting('autogenerate_session_id')
7 | common.set_setting('autogenerate_session_id', False)
8 | assert common.get_setting('autogenerate_session_id') is False
9 | finally:
10 | common.set_setting('autogenerate_session_id', True)
11 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_sqlalchemy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/clickhouse-connect/ced1ec4ddc823159648fd66ce554110192b0a023/tests/unit_tests/test_sqlalchemy/__init__.py
--------------------------------------------------------------------------------
/tests/unit_tests/test_sqlalchemy/test_ddl.py:
--------------------------------------------------------------------------------
1 | import sqlalchemy as db
2 | from sqlalchemy.sql.ddl import CreateTable
3 |
4 | from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import UInt64, UInt32, DateTime
5 | from clickhouse_connect.cc_sqlalchemy.ddl.tableengine import ReplicatedMergeTree, ReplacingMergeTree
6 | from clickhouse_connect.cc_sqlalchemy.dialect import ClickHouseDialect
7 |
8 | dialect = ClickHouseDialect()
9 |
10 | replicated_mt_ddl = """\
11 | CREATE TABLE `replicated_mt_test` (`key` UInt64) Engine ReplicatedMergeTree('/clickhouse/tables/repl_mt_test',\
12 | '{replica}') ORDER BY key\
13 | """
14 |
15 | replacing_mt_ddl = """\
16 | CREATE TABLE `replacing_mt_test` (`key` UInt32, `date` DateTime) Engine ReplacingMergeTree(date) ORDER BY key\
17 | """
18 |
19 |
20 | def test_table_def():
21 | metadata = db.MetaData()
22 |
23 | table = db.Table('replicated_mt_test', metadata, db.Column('key', UInt64),
24 | ReplicatedMergeTree(order_by='key', zk_path='/clickhouse/tables/repl_mt_test',
25 | replica='{replica}'))
26 | ddl = str(CreateTable(table).compile('', dialect=dialect))
27 | assert ddl == replicated_mt_ddl
28 |
29 | table = db.Table('replacing_mt_test', metadata, db.Column('key', UInt32), db.Column('date', DateTime),
30 | ReplacingMergeTree(ver='date', order_by='key'))
31 |
32 | ddl = str(CreateTable(table).compile('', dialect=dialect))
33 | assert ddl == replacing_mt_ddl
34 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_sqlalchemy/test_types.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy import Integer, DateTime
2 |
3 | from clickhouse_connect.cc_sqlalchemy.datatypes.sqltypes import Nullable, Int64, DateTime64, LowCardinality, String
4 | from clickhouse_connect.cc_sqlalchemy.datatypes.base import sqla_type_map, sqla_type_from_name
5 |
6 |
7 | def test_mapping():
8 | assert issubclass(sqla_type_map['UInt64'], Integer)
9 | assert issubclass(sqla_type_map['DateTime'], DateTime)
10 |
11 |
12 | # pylint: disable=protected-access
13 | def test_sqla():
14 | int16 = sqla_type_from_name('Int16')
15 | assert 'Int16' == int16._compiler_dispatch(None)
16 | enum = sqla_type_from_name("Enum8('value1' = 7, 'value2'=5)")
17 | assert "Enum8('value2' = 5, 'value1' = 7)" == enum._compiler_dispatch(None)
18 |
19 |
20 | # pylint: disable=no-member
21 | def test_nullable():
22 | nullable = Nullable(Int64)
23 | assert nullable.__class__ == Int64
24 | nullable = Nullable(DateTime64(6))
25 | assert nullable.__class__ == DateTime64
26 | assert nullable.name == 'Nullable(DateTime64(6))'
27 |
28 |
29 | # pylint: disable=no-member
30 | def test_low_cardinality():
31 | lc_str = LowCardinality(Nullable(String))
32 | assert lc_str.__class__ == String
33 | assert lc_str.name == 'LowCardinality(Nullable(String))'
34 |
--------------------------------------------------------------------------------