├── .github
    └── workflows
    │   └── ci.yaml
├── .gitignore
├── .gitmodules
├── DEV_NOTES.md
├── LICENSE
├── README.md
├── pyproject.toml
├── questdb_query
    ├── __init__.py
    ├── asynchronous.py
    ├── endpoint.py
    ├── errors.py
    ├── pandas_util.py
    ├── stats.py
    ├── synchronous.py
    └── tool.py
├── test
└── tests
    ├── mock_server.py
    ├── tests.py
    └── trips.csv


/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | name: Python CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ${{ matrix.os }}
12 |     strategy:
13 |       matrix:
14 |         python-version: ['3.9', '3.10', '3.11', '3.12']
15 |         os: [ubuntu-latest, macos-latest, windows-latest]
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v3
19 |       with:
20 |         submodules: true
21 | 
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v4
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 | 
27 |     - name: Set up Java 11
28 |       uses: actions/setup-java@v3
29 |       with:
30 |         java-version: '11'
31 |         distribution: 'temurin'
32 | 
33 |     - name: Install pipx
34 |       run: python -m pip install pipx
35 | 
36 |     - name: Ensure pipx uses the right Python
37 |       run: python -m pipx ensurepath
38 | 
39 |     - name: Install Poetry with pipx
40 |       run: pipx install poetry==1.8.2
41 | 
42 |     - name: Configure Poetry
43 |       run: poetry config virtualenvs.create false
44 | 
45 |     - name: Install dependencies
46 |       run: poetry install --no-root
47 | 
48 |     - name: Run tests
49 |       run: poetry run python -m unittest discover tests -v
50 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | .idea/
161 | 
162 | # poetry
163 | .python-version
164 | poetry.lock


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "c-questdb-client"]
2 | 	path = c-questdb-client
3 | 	url = https://github.com/questdb/c-questdb-client
4 | 


--------------------------------------------------------------------------------
/DEV_NOTES.md:
--------------------------------------------------------------------------------
 1 | # Developer's Notes
 2 | 
 3 | ## Cloning and Running Tests
 4 | 
 5 | ```shell
 6 | git clone https://github.com/questdb/py-questdb-query.git
 7 | cd py-questdb-query
 8 | git submodule update --init
 9 | poetry install
10 | ./test
11 | ```
12 | 
13 | The tests will automatically download and start a QuestDB instance, but you
14 | also need to have a Java 11 runtime installed.
15 | 
16 | ## Updating the dependencies
17 | 
18 | Tweak the `pyproject.toml` file (if needed) and then run:
19 | 
20 | ```
21 | poetry update
22 | ```
23 | 
24 | ## Running a single test
25 | 
26 | ```shell
27 | poetry run python -m unittest tests.tests.TestModule.test_basic_aut
28 | ```
29 | 
30 | ## Cutting a release
31 | 
32 | ```shell
33 | poetry export --output requirements.txt
34 | ```
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # py-questdb-query
  2 | This library allows you to perform fast queries over HTTP(S)/CSV for QuestDB, a high-performance time-series database.
  3 | 
  4 | Query results are obtained as either Pandas dataframes or dicts of numpy arrays. 
  5 | 
  6 | ## Installation
  7 | 
  8 | The library can be installed using the following command:
  9 | 
 10 | ```shell
 11 | python3 -m pip install -U git+https://github.com/questdb/py-questdb-query.git#questdb_query
 12 | ```
 13 | 
 14 | To uninstall the library, you can use the command:
 15 | 
 16 | ```shell
 17 | python3 -m pip uninstall questdb_query
 18 | ```
 19 | 
 20 | ## Basic Usage, querying into Pandas
 21 | 
 22 | Once installed, you can use the library to query a QuestDB database. Here's an example that demonstrates how to query
 23 | CPU utilization data using the library against a database running on `localhost` on the default HTTP port (9000).
 24 | 
 25 | ```python
 26 | from questdb_query import pandas_query
 27 | 
 28 | df = pandas_query('select * from cpu limit 1000')
 29 | ```
 30 | 
 31 | This allows you, for example, to pre-aggregate results:
 32 | 
 33 | ```python
 34 | >>> df = df[['region', 'usage_user', 'usage_nice']].groupby('region').mean()
 35 | >>> df
 36 |                 usage_user  usage_nice
 37 | region                                
 38 | ap-northeast-1    8.163766    6.492334
 39 | ap-southeast-1    6.511215    7.341863
 40 | ap-southeast-2    6.788770    6.257839
 41 | eu-central-1      7.392642    6.416479
 42 | eu-west-1         7.213417    7.185956
 43 | sa-east-1         7.143568    5.925026
 44 | us-east-1         7.620643    7.243553
 45 | us-west-1         6.286770    6.531977
 46 | us-west-2         6.228692    6.439672
 47 | ```
 48 | 
 49 | You can then switch over to numpy with a simple and fast conversion:
 50 | 
 51 | ```python
 52 | >>> from questdb_query import pandas_to_numpy
 53 | >>> np_arrs = pandas_to_numpy(df)
 54 | >>> np_arrs
 55 | {'usage_user': array([8.16376556, 6.51121543, 6.78876964, 7.3926419 , 7.21341716,
 56 |        7.14356839, 7.62064304, 6.28677006, 6.22869169]), 'usage_nice': array([6.49233392, 7.34186348, 6.25783903, 6.41647863, 7.18595643,
 57 |        5.92502642, 7.24355328, 6.53197733, 6.43967247]), 'region': array(['ap-northeast-1', 'ap-southeast-1', 'ap-southeast-2',
 58 |        'eu-central-1', 'eu-west-1', 'sa-east-1', 'us-east-1', 'us-west-1',
 59 |        'us-west-2'], dtype=object)}
 60 | ```
 61 | 
 62 | ## Querying a remote database
 63 | 
 64 | If your database is running on a remote host, specify an endpoint:
 65 | 
 66 | ```python
 67 | from questdb_query import pandas_query, Endpoint
 68 | 
 69 | endpoint = Endpoint(host='your.hostname.com', port=22453, https=True, username='user', password='pass')
 70 | 
 71 | np_arrs = numpy_query('select * from cpu limit 10', endpoint)
 72 | ```
 73 | 
 74 | Note how the example above enables HTTPS and specifies a username and password for authentication.
 75 | 
 76 | The port is optional and defaults to 9000 for HTTP and 443 for HTTPS.
 77 | 
 78 | Alternatively, if the server is set up with token-based authentication you can use the `token` parameter:
 79 | 
 80 | ```python
 81 | endpoint = Endpoint(host='your.hostname.com', https=True, token='your_token')
 82 | ```
 83 | 
 84 | ## Chunks: Query Parallelism
 85 | 
 86 | You can sometimes improve performance by splitting up a large query into smaller ones, running them in parallel,
 87 | and joining the results together. This is especially useful if you have multiple CPUs available.
 88 | 
 89 | The `numpy_query` function can do this automatically for you, by specifying the `chunks` parameter.
 90 | 
 91 | The example below, splits up the query into 6 parallel chunks.
 92 | 
 93 | ```python
 94 | from questdb_query import numpy_query
 95 | 
 96 | np_arrs = numpy_query('select * from cpu', chunks=6)
 97 | ```
 98 | 
 99 | The speed-up of splitting up a query into smaller ones is highly query-dependent and we recommend you experiment and
100 | benchmark. Mostly due to Python library limitations, not all parts of the query can be parallelized, so whilst you may
101 | see great benefits in going from 1 chunk (the default) to 8, the improvement going from 8 to 16 might be marginal. 
102 | 
103 | _Read on for more details on benchmarking: This is covered later in this README page._
104 | 
105 | > :warning: The `chunks > 1` parameter parallelizes queries. If the table(s) queried contain fast-moving data the
106 | > results may be inconsistent as each chunk's query would be started at slightly different times.
107 | >
108 | > To avoid consistency issues formulate the query so that it only queries data that is not changing.
109 | > You can do this, for example, by specifying a `timestamp` range in the `WHERE` clause.
110 | 
111 | ## Querying into Numpy
112 | 
113 | You can also query directly into a dictionary of Numpy arrays.
114 | 
115 | Notice that Numpy's datatypes are more limited than Panadas, specifically in the
116 | handling of null values.
117 | 
118 | This is a simple shorthand for querying into Pandas and then converting to Numpy:
119 | 
120 | ```python
121 | def numpy_query(query: str, endpoint: Endpoint = None,
122 |         chunks: int = 1, timeout: int = None) -> dict[str, np.array]:
123 |     df = pandas_query(query, endpoint, chunks, timeout)
124 |     return pandas_to_numpy(df)
125 | ```
126 | 
127 | To use it, pass the query string to the `numpy_query` function, along with the
128 | same optional parameters as the `pandas_query` function.
129 | 
130 | ```python
131 | from questdb_query import numpy_query
132 | 
133 | np_arrs = numpy_query('''
134 |     select
135 |         timestamp, hostname, datacenter, usage_user, usage_nice
136 |     from
137 |         cpu
138 |     limit 10''')
139 | ```
140 | 
141 | The `np_arrs` object is a python `dict` which holds a numpy array per column, keyed by column name:
142 | ```python
143 | >>> np_arrs
144 | {'timestamp': array(['2016-01-01T00:00:00.000000000', '2016-01-01T00:00:10.000000000',
145 |        '2016-01-01T00:00:20.000000000', '2016-01-01T00:00:30.000000000',
146 |        '2016-01-01T00:00:40.000000000', '2016-01-01T00:00:50.000000000',
147 |        '2016-01-01T00:01:00.000000000', '2016-01-01T00:01:10.000000000',
148 |        '2016-01-01T00:01:20.000000000', '2016-01-01T00:01:30.000000000'],
149 |       dtype='datetime64[ns]'), 'hostname': array(['host_0', 'host_1', 'host_2', 'host_3', 'host_4', 'host_5',
150 |        'host_6', 'host_7', 'host_8', 'host_9'], dtype=object), 'datacenter': array(['ap-southeast-2b', 'eu-west-1b', 'us-west-1b', 'us-west-2c',
151 |        'us-west-2b', 'eu-west-1b', 'eu-west-1b', 'us-west-1a',
152 |        'ap-southeast-2a', 'us-east-1a'], dtype=object), 'usage_user': array([1.39169048, 0.33846369, 0.        , 1.81511203, 0.84273104,
153 |        0.        , 0.        , 0.28085548, 0.        , 1.37192634]), 'usage_nice': array([0.30603088, 1.21496673, 0.        , 0.16688796, 0.        ,
154 |        2.77319521, 0.40332488, 1.81585253, 1.92844804, 2.12841919])}
155 | ```
156 | 
157 | If we wanted to calculate a (rather non-sensical) weighted average of `usage_user` and `usage_nice` we can
158 | do this by accessing the `numpy` columns:
159 | 
160 | ```python
161 | >>> np_arrs['usage_user'].dot(np_arrs['usage_nice'].T)
162 | 4.5700692045031985
163 | ```
164 | 
165 | ## Benchmarking
166 | 
167 | ### From code
168 | 
169 | Each query result also contains a `Stats` object with the performance summary which you can print.
170 | 
171 | ```python
172 | >>> from questdb_query import pandas_query
173 | >>> df = pandas_query('select * from cpu', chunks=8)
174 | >>> print(df.query_stats)
175 | Duration: 2.631s
176 | Millions of lines: 5.000
177 | Millions of lines/s: 1.901
178 | MiB: 1332.144
179 | MiB/s: 506.381
180 | ```
181 | 
182 | You can also extract individual fields:
183 | 
184 | ```python
185 | >>> df.query_stats
186 | Stats(duration_s=2.630711865, line_count=5000000, byte_count=1396853875, throughput_mbs=506.3814407360216, throughput_mlps=1.900626239810569)
187 | >>> df.query_stats.throughput_mlps
188 | 1.900626239810569
189 | ```
190 | 
191 | ### From the command line
192 | 
193 | To get the best performance it may be useful to try queries with different hardware setups, chunk counts etc.
194 | 
195 | You can run the benchmarking tool from the command line:
196 | 
197 | ```bash
198 | $ python3 -m questdb_query.tool --chunks 8 "select * from cpu"
199 | ```
200 | ```
201 |          hostname          region       datacenter  rack              os arch team  service  service_version service_environment  usage_user  usage_system  usage_idle  usage_nice  usage_iowait  usage_irq  usage_softirq  usage_steal  usage_guest  usage_guest_nice           timestamp
202 | 0          host_0  ap-southeast-2  ap-southeast-2b    96     Ubuntu16.10  x86  CHI       11                0                test    1.391690      0.000000    2.644812    0.306031      1.194629   0.000000       0.000000     0.726996     0.000000          0.000000 2016-01-01 00:00:00
203 | 1          host_1       eu-west-1       eu-west-1b    52  Ubuntu16.04LTS  x64  NYC        7                0          production    0.338464      1.951409    2.455378    1.214967      2.037935   0.000000       1.136997     1.022753     1.711183          0.000000 2016-01-01 00:00:10
204 | 2          host_2       us-west-1       us-west-1b    69  Ubuntu16.04LTS  x64  LON        8                1          production    0.000000      2.800873    2.296324    0.000000      1.754139   1.531160       0.662572     0.000000     0.472402          0.312164 2016-01-01 00:00:20
205 | 3          host_3       us-west-2       us-west-2c     8  Ubuntu16.04LTS  x86  LON       11                0                test    1.815112      4.412385    2.056344    0.166888      3.507148   3.276577       0.000000     0.000000     0.000000          1.496152 2016-01-01 00:00:30
206 | 4          host_4       us-west-2       us-west-2b    83  Ubuntu16.04LTS  x64  NYC        6                0                test    0.842731      3.141248    2.199520    0.000000      2.943054   5.032342       0.391105     1.375450     0.000000          1.236811 2016-01-01 00:00:40
207 | ...           ...             ...              ...   ...             ...  ...  ...      ...              ...                 ...         ...           ...         ...         ...           ...        ...            ...          ...          ...               ...                 ...
208 | 624995  host_3995  ap-southeast-2  ap-southeast-2a    30  Ubuntu16.04LTS  x86  CHI       19                1             staging   33.238309     82.647341   17.272531   52.707720     71.718564  45.605728     100.000000    22.907723    78.130846         15.652954 2017-08-01 16:52:30
209 | 624996  host_3996       us-west-2       us-west-2a    67     Ubuntu15.10  x64  CHI        9                0          production   33.344070     81.922739   16.653731   52.107537     71.844945  45.880606      99.835977    23.045458    76.468930         17.091646 2017-08-01 16:52:40
210 | 624997  host_3997       us-west-2       us-west-2b    63     Ubuntu15.10  x86   SF        8                0          production   32.932095     80.662915   14.708377   53.354277     72.265215  44.803275      99.013038    20.375169    78.043473         17.870002 2017-08-01 16:52:50
211 | 624998  host_3998       eu-west-1       eu-west-1b    53  Ubuntu16.04LTS  x86  CHI       11                1             staging   31.199818     80.994859   15.051577   51.923123     74.169828  46.453950      99.107213    21.004499    78.341154         18.880808 2017-08-01 16:53:00
212 | 624999  host_3999       us-east-1       us-east-1c    87     Ubuntu16.10  x64   SF        8                1          production   30.310735     81.727637   15.413537   51.417897     74.973555  44.882255      98.821672    19.055040    78.094993         19.263652 2017-08-01 16:53:10
213 | 
214 | [5000000 rows x 21 columns]
215 | 
216 | Duration: 2.547s
217 | Millions of lines: 5.000
218 | Millions of lines/s: 1.963
219 | MiB: 1332.144
220 | MiB/s: 522.962
221 | ```
222 | 
223 | These are the complete command line arguments:
224 | 
225 | ```bash
226 | $ python3 -m questdb_query.tool --help
227 | ```
228 | ```
229 | usage: tool.py [-h] [--host HOST] [--port PORT] [--https] [--username USERNAME] [--password PASSWORD] [--chunks CHUNKS] query
230 | 
231 | positional arguments:
232 |   query
233 | 
234 | optional arguments:
235 |   -h, --help           show this help message and exit
236 |   --host HOST
237 |   --port PORT
238 |   --https
239 |   --username USERNAME
240 |   --password PASSWORD
241 |   --chunks CHUNKS
242 | ```
243 | 
244 | 
245 | ## Async operation
246 | 
247 | The `numpy_query` and `pandas_query` functions are actually wrappers around `async` variants.
248 | 
249 | If your application is already using `async`, then call those directly as it allows other parts of your application to
250 | perform work in parallel during the data download.
251 | 
252 | The functions take identical arguments as their synchronous counterparts.
253 | 
254 | ```python
255 | import asyncio
256 | from questdb_query.asynchronous import numpy_query
257 | 
258 | 
259 | def main():
260 |     endpoint = Endpoint(host='your.hostname.com', https=True, username='user', password='pass')
261 |     np_arrs = await numpy_query('select * from cpu limit 10', endpoint)
262 |     print(np_arrs)
263 | 
264 | 
265 | if __name__ == '__main__':
266 |     asyncio.run(main())
267 | 
268 | ```
269 | 
270 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "questdb-query"
 3 | version = "0.1.0"
 4 | description = "Fast query over HTTP(S)/CSV for QuestDB"
 5 | readme = "README.md"
 6 | packages = [{include = "questdb_query"}]
 7 | authors = ["Adam Cimarosti <adam@questdb.io>"]
 8 | license = "Apache License 2.0"
 9 | repository = "https://github.com/questdb/py-questdb-query/"
10 | 
11 | [tool.poetry.dependencies]
12 | python = "^3.9"
13 | numpy = "^1.26.4"
14 | pandas = "^2.2.2"
15 | pyarrow = "^15.0.2"
16 | aiohttp = {extras = ["speedups"], version = "^3.8.4"}
17 | 
18 | [build-system]
19 | requires = ["poetry-core"]
20 | build-backend = "poetry.core.masonry.api"
21 | 


--------------------------------------------------------------------------------
/questdb_query/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Query QuestDB over HTTP into Pandas or Numpy arrays.
 3 | 
 4 | The primary implementation is in the `asynchronous` module, with a wrapper
 5 | 
 6 | """
 7 | 
 8 | __version__ = '0.1.0'
 9 | 
10 | from .endpoint import Endpoint
11 | from .errors import QueryError
12 | from .synchronous import pandas_query, numpy_query
13 | from .pandas_util import pandas_to_numpy
14 | 


--------------------------------------------------------------------------------
/questdb_query/asynchronous.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Async functions to query QuestDB over HTTP(S) via CSV into Pandas or Numpy.
  3 | """
  4 | 
  5 | __all__ = ['pandas_query', 'numpy_query']
  6 | 
  7 | import asyncio
  8 | import time
  9 | from concurrent.futures import ThreadPoolExecutor
 10 | from io import BytesIO
 11 | 
 12 | import aiohttp
 13 | import numpy as np
 14 | import pandas as pd
 15 | 
 16 | from .endpoint import Endpoint
 17 | from .errors import QueryError
 18 | from .pandas_util import pandas_to_numpy
 19 | from .stats import Stats
 20 | 
 21 | 
 22 | def _new_session(endpoint, timeout: int = None):
 23 |     auth = None
 24 |     if endpoint.username:
 25 |         auth = aiohttp.BasicAuth(endpoint.username, endpoint.password)
 26 |     timeout = aiohttp.ClientTimeout(total=timeout) \
 27 |         or aiohttp.ClientTimeout(total=300)
 28 |     return aiohttp.ClientSession(
 29 |         auth=auth,
 30 |         read_bufsize=4 * 1024 * 1024,
 31 |         timeout=timeout)
 32 | 
 33 | 
 34 | def _auth_headers(endpoint: Endpoint) -> dict[str, str]:
 35 |     if endpoint.token:
 36 |         return {'Authorization': f'Bearer {endpoint.token}'}
 37 |     return None
 38 | 
 39 | 
 40 | async def _pre_query(
 41 |         session: aiohttp.ClientSession,
 42 |         endpoint: Endpoint,
 43 |         query: str
 44 |         ) -> tuple[list[tuple[str, (str, object)]], int]:
 45 |     url = f'{endpoint.url}/exec'
 46 |     params = [('query', query), ('count', 'true'), ('limit', '0')]
 47 |     dtypes_map = {
 48 |         'STRING': ('STRING', 'string'),
 49 |         'VARCHAR': ('VARCHAR', 'string'),
 50 |         'SYMBOL': ('SYMBOL', 'string'),
 51 |         'SHORT': ('SHORT', 'int16'),
 52 |         'BYTE': ('BYTE', 'int8'),
 53 |         'BOOLEAN': ('BOOLEAN', 'bool'),
 54 |         'INT': ('INT', 'Int32'),
 55 |         'LONG': ('LONG', 'Int64'),
 56 |         'DOUBLE': ('DOUBLE', 'float64'),
 57 |         'FLOAT': ('FLOAT', 'float32'),
 58 |         'CHAR': ('CHAR', 'string'),
 59 |         'TIMESTAMP': ('TIMESTAMP', None),
 60 |         'IPV4': ('IPV4', 'string'),
 61 |         'BYTE': ('BYTE', 'int8'),
 62 |         'DATE': ('DATE', None),
 63 |         'UUID': ('UUID', 'string'),
 64 |         'BINARY': ('BINARY', 'string'),
 65 |         'LONG256': ('LONG256', 'string'),
 66 |     }
 67 | 
 68 |     def get_dtype(col):
 69 |         ty = col['type'].upper()
 70 |         if ty.startswith('GEOHASH'):
 71 |             return (ty, 'string')
 72 |         return dtypes_map[ty]
 73 | 
 74 |     async with session.get(
 75 |             url=url,
 76 |             params=params,
 77 |             headers=_auth_headers(endpoint)) as resp:
 78 |         result = await resp.json()
 79 |         if resp.status != 200:
 80 |             raise QueryError.from_json(result)
 81 |         columns = [
 82 |             (col['name'], get_dtype(col))
 83 |             for col in result['columns']]
 84 |         count = result['count']
 85 |         return columns, count
 86 | 
 87 | 
 88 | async def _query_pandas(
 89 |         session: aiohttp.ClientSession,
 90 |         executor: ThreadPoolExecutor,
 91 |         endpoint: Endpoint,
 92 |         query: str,
 93 |         result_schema: list[tuple[str, tuple[str, object]]],
 94 |         limit_range: tuple[int, int]) -> pd.DataFrame:
 95 |     url = f'{endpoint.url}/exp'
 96 |     params = [
 97 |         ('query', query),
 98 |         ('limit', f'{limit_range[0]},{limit_range[1]}')]
 99 |     async with session.get(
100 |             url=url,
101 |             params=params,
102 |             headers=_auth_headers(endpoint)) as resp:
103 |         if resp.status != 200:
104 |             raise QueryError.from_json(await resp.json())
105 |         buf = await resp.content.read()
106 |         download_bytes = len(buf)
107 |         buf_reader = BytesIO(buf)
108 |         dtypes = {
109 |             col[0]: col[1][1]
110 |             for col in result_schema
111 |             if col[1][1] is not None}
112 | 
113 |         def _read_csv():
114 |             df = pd.read_csv(buf_reader, dtype=dtypes, engine='pyarrow')
115 |             # Patch up the column types.
116 |             for col_schema in result_schema:
117 |                 col_name = col_schema[0]
118 |                 col_type = col_schema[1][0]
119 |                 try:
120 |                     if col_type in ('TIMESTAMP', 'DATE'):
121 |                         series = df[col_name]
122 |                         # if the series is empty (or full of nulls) its csv-read
123 |                         # default dtype (float64) is not one which we can
124 |                         # convert `.to_datetime`,
125 |                         if series.empty or series.isnull().all():
126 |                             # so to work around this we first convert the series
127 |                             # to Int64 (nullable).
128 |                             series = series.astype('Int64')
129 |                             series = pd.to_datetime(series, unit='ns')
130 |                         else:
131 |                             # Drop the UTC timezone during conversion.
132 |                             # This allows `.to_numpy()` on the series to
133 |                             # yield a `datetime64` dtype column.
134 |                             series = pd.to_datetime(series).dt.tz_convert(None)
135 |                         df[col_name] = series
136 |                 except Exception as e:
137 |                     print(df[col_name])
138 |                     raise ValueError(
139 |                         f'Failed to convert column {col_name} to type {col_type}: {e}\n{series}')
140 |             return df
141 | 
142 |         loop = asyncio.get_running_loop()
143 |         df = await loop.run_in_executor(executor, _read_csv)
144 |         return df, download_bytes
145 | 
146 | 
147 | async def pandas_query(
148 |         query: str,
149 |         endpoint: Endpoint = None,
150 |         chunks: int = 1,
151 |         timeout: int = None) -> pd.DataFrame:
152 |     """
153 |     Query QuestDB via CSV to a Pandas DataFrame.
154 | 
155 |     :param timeout: The timeout in seconds for the query, defaults to None (300 seconds).
156 |     """
157 |     endpoint = endpoint or Endpoint()
158 |     start_ts = time.perf_counter_ns()
159 |     with ThreadPoolExecutor(max_workers=chunks) as executor:
160 |         async with _new_session(endpoint, timeout) as session:
161 |             result_schema, row_count = await _pre_query(session, endpoint, query)
162 |             chunks = max(min(chunks, row_count), 1)
163 |             rows_per_spawn = row_count // chunks
164 |             limit_ranges = [
165 |                 (
166 |                     i * rows_per_spawn,
167 |                     ((i + 1) * rows_per_spawn) if i < chunks - 1 else row_count
168 |                 )
169 |                 for i in range(chunks)]
170 |             tasks = [
171 |                 asyncio.ensure_future(_query_pandas(
172 |                     session, executor, endpoint, query, result_schema, limit_range))
173 |                 for limit_range in limit_ranges]
174 |             results = await asyncio.gather(*tasks)
175 |             sub_dataframes = [result[0] for result in results]
176 |             df = pd.concat(sub_dataframes)
177 |             if chunks > 1:
178 |                 df.reset_index(drop=True, inplace=True)
179 |             end_ts = time.perf_counter_ns()
180 |             total_downloaded = sum(result[1] for result in results)
181 |             df.query_stats = Stats(end_ts - start_ts, row_count, total_downloaded)
182 |             return df
183 | 
184 | 
185 | async def numpy_query(
186 |         query: str,
187 |         endpoint: Endpoint = None,
188 |         chunks: int = 1,
189 |         timeout: int = None
190 |         ) -> dict[str, np.array]:
191 |     """
192 |     Query and obtain the result as a dict of columns.
193 |     Each column is a numpy array.
194 | 
195 |     :param timeout: The timeout in seconds for the query, defaults to None (300 seconds).
196 |     """
197 |     df = await pandas_query(query, endpoint, chunks, timeout)
198 |     return pandas_to_numpy(df)
199 | 


--------------------------------------------------------------------------------
/questdb_query/endpoint.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | class Endpoint:
 4 |     """
 5 |     HTTP connection parameters into QuestDB
 6 |     """
 7 |     def __init__(
 8 |                 self,
 9 |                 host='127.0.0.1',
10 |                 port=None,
11 |                 https=False,
12 |                 username=None,
13 |                 password=None,
14 |                 token=None):
15 |         self.host = host
16 |         self.port = port or (443 if https else 9000)
17 |         self.https = https
18 |         self.username = username
19 |         self.password = password
20 |         self.token = token
21 |         if ((self.username or self.password) and \
22 |             not (self.username and self.password)):
23 |             raise ValueError('Must provide both username and password or neither')
24 |         if self.token and self.username:
25 |             raise ValueError('Cannot use token with username and password')
26 |         if token and not re.match(r'^[A-Za-z0-9-._~+/]+=*$', token):
27 |             # https://datatracker.ietf.org/doc/html/rfc6750#section-2.1
28 |             raise ValueError("Invalid characters in token")
29 | 
30 |     @property
31 |     def url(self):
32 |         protocol = 'https' if self.https else 'http'
33 |         return f'{protocol}://{self.host}:{self.port}'
34 | 


--------------------------------------------------------------------------------
/questdb_query/errors.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | 
 4 | class QueryError(Exception):
 5 |     def __init__(self, message: str, query: str, position: Optional[int] = None):
 6 |         super().__init__(message)
 7 |         self.query = query
 8 |         self.position = position
 9 | 
10 |     @classmethod
11 |     def from_json(cls, json: dict):
12 |         message = json.get('error')
13 |         if not message:
14 |             message = json.get('message')
15 |         return cls(
16 |             message=message,
17 |             query=json.get('query'),
18 |             position=json.get('position'))
19 | 
20 | 


--------------------------------------------------------------------------------
/questdb_query/pandas_util.py:
--------------------------------------------------------------------------------
 1 | __all__ = ['pandas_to_numpy']
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | from .stats import StatsDict
 7 | 
 8 | 
 9 | def pandas_to_numpy(df: pd.DataFrame) -> dict[str, np.array]:
10 |     """
11 |     Convert a pandas dataframe into a dict containing numpy arrays, keyed by column name.
12 | 
13 |     If the index is named, then convert that too.
14 |     """
15 |     # Calling `.to_numpy()` for each column is quite efficient and generally avoids copies.
16 |     # This is because Pandas internally already usually stores columns as numpy.
17 |     np_arrs = {col_name: df[col_name].to_numpy() for col_name in df}
18 | 
19 |     # If the index is named, then convert that too.
20 |     if df.index.name:
21 |         np_arrs[df.index.name] = df.index.to_numpy()
22 | 
23 |     # Carry across stats, if these are present.
24 |     if hasattr(df, 'query_stats'):
25 |         np_arrs = StatsDict(np_arrs, df.query_stats)
26 | 
27 |     return np_arrs
28 | 


--------------------------------------------------------------------------------
/questdb_query/stats.py:
--------------------------------------------------------------------------------
 1 | __all__ = ['Stats', 'StatsDict']
 2 | 
 3 | NS_IN_S = 1e9
 4 | 
 5 | STATS_TEMPLATE = '''Duration: {duration_s:.3f}s
 6 | Millions of lines: {line_count_millions:.3f}
 7 | Millions of lines/s: {throughput_mlps:.3f}
 8 | MiB: {byte_count_mib:.3f}
 9 | MiB/s: {throughput_mbs:.3f}'''
10 | 
11 | 
12 | class Stats:
13 |     def __init__(self, duration_ns: int, line_count: int, byte_count: int):
14 |         self.duration_ns = duration_ns
15 |         self.line_count = line_count
16 |         self.byte_count = byte_count
17 | 
18 |     @property
19 |     def duration_s(self) -> float:
20 |         """
21 |         How long the query took in seconds.
22 |         """
23 |         return self.duration_ns / NS_IN_S
24 | 
25 |     @property
26 |     def throughput_mbs(self) -> float:
27 |         """
28 |         How many MiB/s were downloaded and parsed.
29 |         """
30 |         return self.byte_count / self.duration_ns * NS_IN_S / 1024 / 1024
31 | 
32 |     @property
33 |     def throughput_mlps(self) -> float:
34 |         """
35 |         How many millions of lines per second were parsed.
36 |         """
37 |         return self.line_count / self.duration_ns * NS_IN_S / 1e6
38 | 
39 |     def __repr__(self) -> str:
40 |         return (f'Stats(duration_s={self.duration_s}, '
41 |                 f'line_count={self.line_count}, '
42 |                 f'byte_count={self.byte_count}, '
43 |                 f'throughput_mbs={self.throughput_mbs}, '
44 |                 f'throughput_mlps={self.throughput_mlps})')
45 | 
46 |     def __str__(self):
47 |         return STATS_TEMPLATE.format(
48 |             duration_s=self.duration_s,
49 |             line_count_millions=self.line_count / 1e6,
50 |             throughput_mbs=self.throughput_mbs,
51 |             byte_count_mib=self.byte_count / 1024 / 1024,
52 |             throughput_mlps=self.throughput_mlps)
53 | 
54 | 
55 | class StatsDict(dict):
56 |     """A dict with an additional .query_stats attribute."""
57 | 
58 |     def __init__(self, other: dict, query_stats: Stats):
59 |         super().__init__(other)
60 |         self.query_stats = query_stats
61 | 


--------------------------------------------------------------------------------
/questdb_query/synchronous.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A sync shim around the `asynchronous` module.
 3 | """
 4 | 
 5 | __all__ = ['pandas_query', 'numpy_query']
 6 | 
 7 | import asyncio
 8 | 
 9 | import numpy as np
10 | import pandas as pd
11 | 
12 | from . import asynchronous as a
13 | from .endpoint import Endpoint
14 | from .pandas_util import pandas_to_numpy
15 | 
16 | 
17 | def pandas_query(
18 |         query: str,
19 |         endpoint: Endpoint = None,
20 |         chunks: int = 1,
21 |         timeout: int = None
22 |         ) -> pd.DataFrame:
23 |     """
24 |     Query QuestDB via CSV to a Pandas DataFrame.
25 | 
26 |     :param timeout: The timeout in seconds for the query, defaults to None (300 seconds).
27 |     """
28 |     try:
29 |         loop = asyncio.get_running_loop()
30 |     except RuntimeError:
31 |         loop = None
32 |     if loop is None:
33 |         return asyncio.run(a.pandas_query(query, endpoint, chunks, timeout))
34 |     else:
35 |         return loop.run_until_complete(a.pandas_query(query, endpoint, chunks, timeout))
36 | 
37 | 
38 | def numpy_query(
39 |         query: str,
40 |         endpoint: Endpoint = None,
41 |         chunks: int = 1,
42 |         timeout: int = None
43 |         ) -> dict[str, np.array]:
44 |     """
45 |     Query and obtain the result as a dict of columns.
46 |     Each column is a numpy array.
47 | 
48 |     :param timeout: The timeout in seconds for the query, defaults to None (300 seconds).
49 |     """
50 |     df = pandas_query(query, endpoint, chunks, timeout)
51 |     return pandas_to_numpy(df)
52 | 


--------------------------------------------------------------------------------
/questdb_query/tool.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Benchmarking tool
 3 | 
 4 | From the command line, run as::
 5 | 
 6 |     python3 -m questdb_query.tool --help
 7 | 
 8 | """
 9 | 
10 | from .endpoint import Endpoint
11 | from .synchronous import pandas_query
12 | 
13 | 
14 | def _parse_args():
15 |     import argparse
16 |     parser = argparse.ArgumentParser()
17 |     parser.add_argument('--host', type=str, default='localhost')
18 |     parser.add_argument('--port', type=int)
19 |     parser.add_argument('--https', action='store_true')
20 |     parser.add_argument('--username', type=str)
21 |     parser.add_argument('--password', type=str)
22 |     parser.add_argument('--token', type=str)
23 |     parser.add_argument('--chunks', type=int, default=1)
24 |     parser.add_argument('query', type=str)
25 |     return parser.parse_args()
26 | 
27 | 
28 | def main(args):
29 |     endpoint = Endpoint(
30 |         host=args.host,
31 |         port=args.port,
32 |         https=args.https,
33 |         username=args.username,
34 |         password=args.password,
35 |         token=args.token)
36 |     df = pandas_query(args.query, endpoint, args.chunks)
37 |     print(df)
38 |     print()
39 |     print(df.query_stats)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     args = _parse_args()
44 |     main(args)
45 | 


--------------------------------------------------------------------------------
/test:
--------------------------------------------------------------------------------
1 | poetry run python -m unittest discover tests -v


--------------------------------------------------------------------------------
/tests/mock_server.py:
--------------------------------------------------------------------------------
 1 | import http.server as hs
 2 | import threading
 3 | import time
 4 | 
 5 | 
 6 | class HttpServer:
 7 |     def __init__(self):
 8 |         self.requests = []
 9 |         self.responses = []
10 |         self.headers = []
11 |         self._ready_event = None
12 |         self._stop_event = None
13 |         self._http_server = None
14 |         self._http_server_thread = None
15 | 
16 |     def _serve(self):
17 |         self._http_server.serve_forever()
18 |         self._stop_event.set()
19 |     
20 |     def __enter__(self):
21 |         headers = self.headers
22 |         requests = self.requests
23 |         responses = self.responses
24 | 
25 |         class Handler(hs.BaseHTTPRequestHandler):
26 |             def do_GET(self):
27 |                 try:
28 |                     headers.append({
29 |                         key: value
30 |                         for key, value in self.headers.items()})
31 |                     try:
32 |                         wait_ms, code, content_type, body = responses.pop(0)
33 |                     except IndexError:
34 |                         wait_ms, code, content_type, body = 0, 200, None, None
35 |                     time.sleep(wait_ms / 1000)
36 |                     self.send_response(code)
37 |                     if content_type:
38 |                         self.send_header('Content-Type', content_type)
39 |                     if body:
40 |                         self.send_header('Content-Length', len(body))
41 |                     self.end_headers()
42 |                     if body:
43 |                         self.wfile.write(body)
44 |                 except BrokenPipeError:
45 |                     pass  # Client disconnected early, no biggie.
46 | 
47 |         self._stop_event = threading.Event()
48 |         self._http_server = hs.HTTPServer(
49 |             ('', 0),
50 |             Handler,
51 |             bind_and_activate=True)
52 |         self._http_server_thread = threading.Thread(target=self._serve)
53 |         self._http_server_thread.start()
54 |         return self
55 |     
56 |     def __exit__(self, _ex_type, _ex_value, _ex_tb):
57 |         self._http_server.shutdown()
58 |         self._http_server.server_close()
59 |         self._stop_event.set()
60 | 
61 |     @property
62 |     def port(self):
63 |         return self._http_server.server_port
64 | 


--------------------------------------------------------------------------------
/tests/tests.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import numpy as np
  4 | sys.dont_write_bytecode = True
  5 | 
  6 | import os
  7 | import unittest
  8 | from pathlib import Path
  9 | import io
 10 | import http.client
 11 | 
 12 | import questdb_query.asynchronous as qdbq_a
 13 | import questdb_query.synchronous as qdbq_s
 14 | from questdb_query import Endpoint
 15 | import pandas as pd
 16 | from pandas.testing import assert_frame_equal
 17 | 
 18 | try:
 19 |     # When running a single test.
 20 |     from .mock_server import HttpServer
 21 | except ImportError:
 22 |     # When discovered by unittest.
 23 |     from mock_server import HttpServer
 24 | 
 25 | # Import the code we can use to download and run a test QuestDB instance
 26 | sys.path.append(str(
 27 |     Path(__file__).resolve().parent.parent /
 28 |     'c-questdb-client' / 'system_test'))
 29 | from fixture import \
 30 |     QuestDbFixture, install_questdb, install_questdb_from_repo, AUTH, retry
 31 | 
 32 | 
 33 | QUESTDB_VERSION = '8.1.4'
 34 | QUESTDB_INSTALL_PATH = None
 35 | 
 36 | 
 37 | def may_install_questdb():
 38 |     global QUESTDB_INSTALL_PATH
 39 |     if QUESTDB_INSTALL_PATH:
 40 |         return
 41 | 
 42 |     install_path = None
 43 |     if os.environ.get('QDB_REPO_PATH'):
 44 |         repo = Path(os.environ['QDB_REPO_PATH'])
 45 |         install_path = install_questdb_from_repo(repo)
 46 |     else:
 47 |         url = ('https://github.com/questdb/questdb/releases/download/' +
 48 |             QUESTDB_VERSION +
 49 |             '/questdb-' +
 50 |             QUESTDB_VERSION +
 51 |             '-no-jre-bin.tar.gz')
 52 |         install_path = install_questdb(QUESTDB_VERSION, url)
 53 |     QUESTDB_INSTALL_PATH = install_path
 54 | 
 55 | 
 56 | def upload_csv(qdb, table, csv_path):
 57 |     with open(csv_path, 'rb') as file:
 58 |         file_data = file.read()
 59 | 
 60 |     boundary = "2cdcb4a05801c5ab05f174836624949d"
 61 |     body = io.BytesIO()
 62 |     body.write(f'--{boundary}\r\n'.encode('utf-8'))
 63 |     body.write(f'Content-Disposition: form-data; name="data"; filename="{table}"\r\n'.encode('utf-8'))
 64 |     body.write(b'Content-Type: text/csv\r\n\r\n')
 65 |     body.write(file_data)
 66 |     body.write(f'\r\n--{boundary}--\r\n'.encode('utf-8'))
 67 | 
 68 |     # Get the byte data from BytesIO
 69 |     body_bytes = body.getvalue()
 70 | 
 71 |     # Prepare headers
 72 |     headers = {
 73 |         'Content-Type': f'multipart/form-data; boundary={boundary}',
 74 |         'Content-Length': str(len(body_bytes))
 75 |     }
 76 | 
 77 |     url = f'/imp?name={table}'
 78 | 
 79 |     # Send the HTTP POST request
 80 |     try:
 81 |         conn = http.client.HTTPConnection(qdb.host, qdb.http_server_port)
 82 |         conn.request('POST', url, body_bytes, headers)
 83 |         response = conn.getresponse()
 84 |         return response.read().decode()
 85 |     finally:
 86 |         conn.close()
 87 | 
 88 | 
 89 | def load_all_types_table(qdb):
 90 |     qdb.http_sql_query('''
 91 |         CREATE TABLE almost_all_types (
 92 |             id int,
 93 |             active boolean,
 94 |             ip_address ipv4,
 95 |             age byte,
 96 |             temperature short,
 97 |             grade char,
 98 |             account_balance float,
 99 |             currency_symbol symbol,
100 |             description string,
101 |             comment varchar,
102 |             record_date date,
103 |             event_timestamp timestamp,
104 |             revenue double,
105 |             user_uuid uuid,
106 |             long_number long,
107 |             crypto_hash long256
108 |         ) timestamp (event_timestamp) PARTITION BY DAY WAL;
109 |     ''')
110 |     qdb.http_sql_query('''
111 |         INSERT INTO almost_all_types (
112 |             id, 
113 |             active, 
114 |             ip_address, 
115 |             age, 
116 |             temperature, 
117 |             grade, 
118 |             account_balance, 
119 |             currency_symbol, 
120 |             description,
121 |             comment,
122 |             record_date, 
123 |             event_timestamp, 
124 |             revenue, 
125 |             user_uuid, 
126 |             long_number, 
127 |             crypto_hash
128 |         ) VALUES
129 |          -- id  active ip_address    age temp  gra  acc_bal  curr   description      comment                 record_date                 event_timestamp                revenue user_uuid                               long_number      crypto_hash
130 |             (1, true,  '192.168.1.1', 25, 72,  'A', 1000.5,  'USD', 'Test record 1', 'pink',                 '2023-01-01T00:00:00.000Z', '2023-01-01T00:00:00.000000Z', 200.00, '123e4567-e89b-12d3-a456-426614174000', 123456789012345, '0x7fffffffffffffffffffffffffffffff'),
131 |             (2, false, NULL,          30, 68,  'B', 1500.25, 'EUR', NULL,            'lightgoldenrodyellow', NULL,                       '2023-01-02T00:00:00.000000Z', 300.00, '123e4567-e89b-12d3-a456-426614174001', 987654321098765, NULL),
132 |             (3, NULL,  '10.0.0.1',    35, -40, 'C', NULL,    'JPY', 'Test record 3', NULL,                   '2023-01-03T00:00:00.000Z', '2023-01-03T00:00:00.000000Z', NULL,   '123e4567-e89b-12d3-a456-426614174002', NULL,            '0x1fffffffffffffffffffffffffffffff');
133 |     ''')
134 | 
135 | def load_trips_table(qdb):
136 |     qdb.http_sql_query('''
137 |         CREATE TABLE 'trips' (
138 |             cab_type SYMBOL capacity 256 CACHE,
139 |             vendor_id SYMBOL capacity 256 CACHE,
140 |             pickup_datetime TIMESTAMP,
141 |             dropoff_datetime TIMESTAMP,
142 |             rate_code_id SYMBOL capacity 256 CACHE,
143 |             pickup_latitude DOUBLE,
144 |             pickup_longitude DOUBLE,
145 |             dropoff_latitude DOUBLE,
146 |             dropoff_longitude DOUBLE,
147 |             passenger_count INT,
148 |             trip_distance DOUBLE,
149 |             fare_amount DOUBLE,
150 |             extra DOUBLE,
151 |             mta_tax DOUBLE,
152 |             tip_amount DOUBLE,
153 |             tolls_amount DOUBLE,
154 |             ehail_fee DOUBLE,
155 |             improvement_surcharge DOUBLE,
156 |             congestion_surcharge DOUBLE,
157 |             total_amount DOUBLE,
158 |             payment_type SYMBOL capacity 256 CACHE,
159 |             trip_type SYMBOL capacity 256 CACHE,
160 |             pickup_location_id INT,
161 |             dropoff_location_id INT
162 |             ) timestamp (pickup_datetime) PARTITION BY MONTH WAL;
163 |     ''')
164 | 
165 |     trips_csv = Path(__file__).resolve().parent / 'trips.csv'
166 |     upload_csv(qdb, 'trips', trips_csv)
167 | 
168 |     def check_table():
169 |         try:
170 |             resp = qdb.http_sql_query('SELECT count() FROM trips')
171 |             if not resp.get('dataset'):
172 |                 return False
173 |             if resp['dataset'][0][0] == 10000:
174 |                 return True
175 |             return False
176 |         except:
177 |             return None
178 | 
179 |     # Wait until the apply job is done.
180 |     return retry(check_table, timeout_sec=10)
181 | 
182 | 
183 | class TestModule(unittest.IsolatedAsyncioTestCase):
184 |     @classmethod
185 |     def setUpClass(cls):
186 |         cls.qdb = None
187 |         may_install_questdb()
188 | 
189 |         cls.qdb = QuestDbFixture(
190 |             QUESTDB_INSTALL_PATH, auth=False, wrap_tls=True, http=True)
191 |         cls.qdb.start()
192 | 
193 |         load_all_types_table(cls.qdb)
194 |         load_trips_table(cls.qdb)
195 | 
196 |     @classmethod
197 |     def tearDownClass(cls):
198 |         if cls.qdb:
199 |             cls.qdb.stop()
200 | 
201 |     def _get_endpoint(self):
202 |         return Endpoint(self.qdb.host, self.qdb.http_server_port)
203 | 
204 |     def s_numpy_query(self, query, *, chunks=1):
205 |         endpoint = self._get_endpoint()
206 |         return qdbq_s.numpy_query(query, endpoint=endpoint, chunks=chunks)
207 |     
208 |     async def a_numpy_query(self, query, *, chunks=1):
209 |         endpoint = self._get_endpoint()
210 |         return await qdbq_a.numpy_query(query, endpoint=endpoint, chunks=chunks)
211 |     
212 |     def s_pandas_query(self, query, *, chunks=1):
213 |         endpoint = self._get_endpoint()
214 |         return qdbq_s.pandas_query(query, endpoint=endpoint, chunks=chunks)
215 |     
216 |     async def a_pandas_query(self, query, *, chunks=1):
217 |         endpoint = self._get_endpoint()
218 |         return await qdbq_a.pandas_query(query, endpoint=endpoint, chunks=chunks)
219 | 
220 |     def test_count_pandas(self):
221 |         act = self.s_pandas_query('SELECT count() FROM trips')
222 |         exp = pd.DataFrame({'count': pd.Series([10000], dtype='Int64')})
223 |         assert_frame_equal(act, exp, check_column_type=True)
224 | 
225 |     def test_count_numpy(self):
226 |         act = self.s_numpy_query('SELECT count() FROM trips')
227 |         exp = {'count': np.array([10000], dtype='int64')}
228 |         self.assertEqual(act, exp)
229 | 
230 |     def test_head_pandas(self):
231 |         act = self.s_pandas_query('SELECT * FROM trips LIMIT 5')
232 |         exp = pd.DataFrame({
233 |             'cab_type': pd.Series([
234 |                 'yellow', 'yellow', 'green', 'yellow', 'yellow'],
235 |                 dtype='string'),
236 |             'vendor_id': pd.Series([
237 |                 'VTS', 'VTS', 'VTS', 'CMT', 'VTS'],
238 |                 dtype='string'),
239 |             'pickup_datetime': pd.Series(pd.to_datetime([
240 |                 '2016-01-01T00:00:00.000000',
241 |                 '2016-01-01T00:00:00.000000',
242 |                 '2016-01-01T00:00:01.000000',
243 |                 '2016-01-01T00:00:01.000000',
244 |                 '2016-01-01T00:00:02.000000']),
245 |                 dtype='datetime64[ns]'),
246 |             'dropoff_datetime': pd.Series(pd.to_datetime([
247 |                 '2016-01-01T00:26:45.000000',
248 |                 '2016-01-01T00:18:30.000000',
249 |                 '2016-01-01T00:02:10.000000',
250 |                 '2016-01-01T00:11:55.000000',
251 |                 '2016-01-01T00:11:08.000000']),
252 |                 dtype='datetime64[ns]'),
253 |             'rate_code_id': pd.Series([
254 |                 'Standard rate',
255 |                 'Standard rate',
256 |                 'Standard rate',
257 |                 'Standard rate',
258 |                 'Standard rate'],
259 |                 dtype='string'),
260 |             'pickup_latitude': pd.Series([
261 |                 -73.9940567, -73.9801178, -73.92303467, -73.97942352, -73.99834442],
262 |                 dtype='float'),
263 |             'pickup_longitude': pd.Series([
264 |                 40.71998978, 40.74304962, 40.70674515, 40.74461365, 40.72389603],
265 |                 dtype='float'),
266 |             'dropoff_latitude': pd.Series([
267 |                 40.78987122, 40.76314163, 40.70864487, 40.7539444, 40.68840027],
268 |                 dtype='float'),
269 |             'dropoff_longitude': pd.Series([
270 |                 -73.966362, -73.9134903, -73.92714691, -73.99203491, -73.995849610000],
271 |                 dtype='float'),
272 |             'passenger_count': pd.Series([
273 |                 2, 2, 1, 1, 1],
274 |                 dtype='Int32'),
275 |             'trip_distance': pd.Series([
276 |                 7.45, 5.52, 0.34, 1.2, 3.21],
277 |                 dtype='float'),
278 |             'fare_amount': pd.Series([
279 |                 26.0, 19.0, 3.5, 9.0, 11.5],
280 |                 dtype='float'),
281 |             'extra': pd.Series([
282 |                 0.5, 0.5, 0.5, 0.5, 0.5],
283 |                 dtype='float'),
284 |             'mta_tax': pd.Series([
285 |                 0.5, 0.5, 0.5, 0.5, 0.5],
286 |                 dtype='float'),
287 |             'tip_amount': pd.Series([
288 |                 0.0, 0.0, 0.0, 0.0, 0.0],
289 |                 dtype='float'),
290 |             'tolls_amount': pd.Series([
291 |                 0.0, 0.0, 0.0, 0.0, 0.0],
292 |                 dtype='float'),
293 |             'ehail_fee': pd.Series([
294 |                 0.0, 0.0, 0.0, 0.0, 0.0],
295 |                 dtype='float'),
296 |             'improvement_surcharge': pd.Series([
297 |                 0.3, 0.3, 0.3, 0.3, 0.3],
298 |                 dtype='float'),
299 |             'congestion_surcharge': pd.Series([
300 |                 0.0, 0.0, 0.0, 0.0, 0.0],
301 |                 dtype='float'),
302 |             'total_amount': pd.Series([
303 |                 27.3, 20.3, 4.8, 10.3, 12.8],
304 |                 dtype='float'),
305 |             'payment_type': pd.Series([
306 |                 'Cash', 'Cash', 'Cash', 'Cash', 'Cash'],
307 |                 dtype='string'),
308 |             'trip_type': pd.Series([
309 |                 'na', 'na', 'na', 'na', 'na'],
310 |                 dtype='string'),
311 |             'pickup_location_id': pd.Series([
312 |                 0, 0, 0, 0, 0],
313 |                 dtype='Int32'),
314 |             'dropoff_location_id': pd.Series([
315 |                 0, 0, 0, 0, 0],
316 |                 dtype='Int32')})
317 |         assert_frame_equal(act, exp, check_column_type=True)
318 | 
319 |     def test_head_numpy(self):
320 |         act = self.s_numpy_query('SELECT * FROM trips LIMIT 5')
321 |         exp = {
322 |             'cab_type': np.array([
323 |                 'yellow', 'yellow', 'green', 'yellow', 'yellow'],
324 |                 dtype='object'),
325 |             'vendor_id': np.array([
326 |                 'VTS', 'VTS', 'VTS', 'CMT', 'VTS'],
327 |                 dtype='object'),
328 |             'pickup_datetime': np.array([
329 |                 '2016-01-01T00:00:00.000000',
330 |                 '2016-01-01T00:00:00.000000',
331 |                 '2016-01-01T00:00:01.000000',
332 |                 '2016-01-01T00:00:01.000000',
333 |                 '2016-01-01T00:00:02.000000'],
334 |                 dtype='datetime64[ns]'),
335 |             'dropoff_datetime': np.array([
336 |                 '2016-01-01T00:26:45.000000',
337 |                 '2016-01-01T00:18:30.000000',
338 |                 '2016-01-01T00:02:10.000000',
339 |                 '2016-01-01T00:11:55.000000',
340 |                 '2016-01-01T00:11:08.000000'],
341 |                 dtype='datetime64[ns]'),
342 |             'rate_code_id': np.array([
343 |                 'Standard rate',
344 |                 'Standard rate',
345 |                 'Standard rate',
346 |                 'Standard rate',
347 |                 'Standard rate'],
348 |                 dtype='object'),
349 |             'pickup_latitude': np.array([
350 |                 -73.9940567, -73.9801178, -73.92303467, -73.97942352, -73.99834442],
351 |                 dtype='float'),
352 |             'pickup_longitude': np.array([
353 |                 40.71998978, 40.74304962, 40.70674515, 40.74461365, 40.72389603],
354 |                 dtype='float'),
355 |             'dropoff_latitude': np.array([
356 |                 40.78987122, 40.76314163, 40.70864487, 40.7539444, 40.68840027],
357 |                 dtype='float'),
358 |             'dropoff_longitude': np.array([
359 |                 -73.966362, -73.9134903, -73.92714691, -73.99203491, -73.995849610000],
360 |                 dtype='float'),
361 |             'passenger_count': np.array([
362 |                 2, 2, 1, 1, 1],
363 |                 dtype='int32'),
364 |             'trip_distance': np.array([
365 |                 7.45, 5.52, 0.34, 1.2, 3.21],
366 |                 dtype='float'),
367 |             'fare_amount': np.array([
368 |                 26.0, 19.0, 3.5, 9.0, 11.5],
369 |                 dtype='float'),
370 |             'extra': np.array([
371 |                 0.5, 0.5, 0.5, 0.5, 0.5],
372 |                 dtype='float'),
373 |             'mta_tax': np.array([
374 |                 0.5, 0.5, 0.5, 0.5, 0.5],
375 |                 dtype='float'),
376 |             'tip_amount': np.array([
377 |                 0.0, 0.0, 0.0, 0.0, 0.0],
378 |                 dtype='float'),
379 |             'tolls_amount': np.array([
380 |                 0.0, 0.0, 0.0, 0.0, 0.0],
381 |                 dtype='float'),
382 |             'ehail_fee': np.array([
383 |                 0.0, 0.0, 0.0, 0.0, 0.0],
384 |                 dtype='float'),
385 |             'improvement_surcharge': np.array([
386 |                 0.3, 0.3, 0.3, 0.3, 0.3],
387 |                 dtype='float'),
388 |             'congestion_surcharge': np.array([
389 |                 0.0, 0.0, 0.0, 0.0, 0.0],
390 |                 dtype='float'),
391 |             'total_amount': np.array([
392 |                 27.3, 20.3, 4.8, 10.3, 12.8],
393 |                 dtype='float'),
394 |             'payment_type': np.array([
395 |                 'Cash', 'Cash', 'Cash', 'Cash', 'Cash'],
396 |                 dtype='object'),
397 |             'trip_type': np.array([
398 |                 'na', 'na', 'na', 'na', 'na'],
399 |                 dtype='object'),
400 |             'pickup_location_id': np.array([
401 |                 0, 0, 0, 0, 0],
402 |                 dtype='int32'),
403 |             'dropoff_location_id': np.array([
404 |                 0, 0, 0, 0, 0],
405 |                 dtype='int32')}
406 |         self.assertEqual(act.keys(), exp.keys())
407 |         for k in act:
408 |             np.testing.assert_array_equal(act[k], exp[k])
409 |             self.assertEqual(act[k].dtype, exp[k].dtype)
410 | 
411 |     def _test_chunked_pandas(self, limit=None):
412 |         qry = f'SELECT * FROM trips'
413 |         if limit is not None:
414 |             qry += f' limit {limit}'
415 |         orig = self.s_pandas_query(qry, chunks=1)
416 |         chunkings = [1, 2, 3, 7, 10, 11, 20, 100, 117]
417 |         others = [self.s_pandas_query(qry, chunks=c) for c in chunkings]
418 |         for other in others:
419 |             assert_frame_equal(orig, other, check_column_type=True)
420 | 
421 |     def test_chunked_pandas_10(self):
422 |         self._test_chunked_pandas(10)
423 | 
424 |     def test_chunked_pandas_133(self):
425 |         self._test_chunked_pandas(133)
426 | 
427 |     def test_chunked_pandas(self):
428 |         self._test_chunked_pandas()
429 | 
430 |     def test_almost_all_types(self):
431 |         act = self.s_pandas_query('SELECT * FROM almost_all_types')
432 |         schema = {
433 |             name: str(val)
434 |             for name, val
435 |             in act.dtypes.to_dict().items()}
436 |         exp_schema = {
437 |             'id': 'Int32',
438 |             'active': 'bool',
439 |             'ip_address': 'string',
440 |             'age': 'int8',
441 |             'temperature': 'int16',
442 |             'grade': 'string',
443 |             'account_balance': 'float32',
444 |             'currency_symbol': 'string',
445 |             'description': 'string',
446 |             'comment': 'string',
447 |             'record_date': 'datetime64[ns]',
448 |             'event_timestamp': 'datetime64[ns]',
449 |             'revenue': 'float64',
450 |             'user_uuid': 'string',
451 |             'long_number': 'Int64',
452 |             'crypto_hash': 'string',
453 |             }
454 |         self.assertEqual(exp_schema.keys(), schema.keys())
455 |         for key in exp_schema:
456 |             self.assertEqual((key, exp_schema[key]), (key, schema[key]))
457 | 
458 |         exp_df = pd.DataFrame({
459 |             'id': pd.Series([1, 2, 3], dtype='Int32'),
460 |             'active': pd.Series([True, False, None], dtype='bool'),
461 |             'ip_address': pd.Series(['192.168.1.1', None, '10.0.0.1'], dtype='string'),
462 |             'age': pd.Series([25, 30, 35], dtype='int8'),
463 |             'temperature': pd.Series([72, 68, -40], dtype='int16'),
464 |             'grade': pd.Series(['A', 'B', 'C'], dtype='string'),
465 |             'account_balance': pd.Series([1000.5, 1500.25, None], dtype='float32'),
466 |             'currency_symbol': pd.Series(['USD', 'EUR', 'JPY'], dtype='string'),
467 |             'description': pd.Series(['Test record 1', None, 'Test record 3'], dtype='string'),
468 |             'comment': pd.Series(['pink', 'lightgoldenrodyellow', None], dtype='string'),
469 |             'record_date': pd.Series(['2023-01-01T00:00:00.000', None, '2023-01-03T00:00:00.000'], dtype='datetime64[ns]'),
470 |             'event_timestamp': pd.Series(['2023-01-01T00:00:00.000000', '2023-01-02T00:00:00.000000', '2023-01-03T00:00:00.000000'], dtype='datetime64[ns]'),
471 |             'revenue': pd.Series([200.00, 300.00, None], dtype='float64'),
472 |             'user_uuid': pd.Series(['123e4567-e89b-12d3-a456-426614174000', '123e4567-e89b-12d3-a456-426614174001', '123e4567-e89b-12d3-a456-426614174002'], dtype='string'),
473 |             'long_number': pd.Series([123456789012345, 987654321098765, None], dtype='Int64'),
474 |             'crypto_hash': pd.Series(['0x7fffffffffffffffffffffffffffffff', None, '0x1fffffffffffffffffffffffffffffff'], dtype='string'),
475 |             })
476 |         assert_frame_equal(act, exp_df, check_column_type=True)
477 | 
478 |     def test_almost_all_types_0_rows(self):
479 |         act = self.s_pandas_query('SELECT * FROM almost_all_types WHERE id = 0')
480 |         schema = {
481 |             name: str(val)
482 |             for name, val
483 |             in act.dtypes.to_dict().items()}
484 |         exp_schema = {
485 |             'id': 'Int32',
486 |             'active': 'bool',
487 |             'ip_address': 'string',
488 |             'age': 'int8',
489 |             'temperature': 'int16',
490 |             'grade': 'string',
491 |             'account_balance': 'float32',
492 |             'currency_symbol': 'string',
493 |             'description': 'string',
494 |             'comment': 'string',
495 |             'record_date': 'datetime64[ns]',
496 |             'event_timestamp': 'datetime64[ns]',
497 |             'revenue': 'float64',
498 |             'user_uuid': 'string',
499 |             'long_number': 'Int64',
500 |             'crypto_hash': 'string',
501 |             }
502 |         self.assertEqual(exp_schema.keys(), schema.keys())
503 |         for key in exp_schema:
504 |             self.assertEqual((key, exp_schema[key]), (key, schema[key]))
505 | 
506 |         exp_df = pd.DataFrame({
507 |             'id': pd.Series([], dtype='Int32'),
508 |             'active': pd.Series([], dtype='bool'),
509 |             'ip_address': pd.Series([], dtype='string'),
510 |             'age': pd.Series([], dtype='int8'),
511 |             'temperature': pd.Series([], dtype='int16'),
512 |             'grade': pd.Series([], dtype='string'),
513 |             'account_balance': pd.Series([], dtype='float32'),
514 |             'currency_symbol': pd.Series([], dtype='string'),
515 |             'description': pd.Series([], dtype='string'),
516 |             'comment': pd.Series([], dtype='string'),
517 |             'record_date': pd.Series([], dtype='datetime64[ns]'),
518 |             'event_timestamp': pd.Series([], dtype='datetime64[ns]'),
519 |             'revenue': pd.Series([], dtype='float64'),
520 |             'user_uuid': pd.Series([], dtype='string'),
521 |             'long_number': pd.Series([], dtype='Int64'),
522 |             'crypto_hash': pd.Series([], dtype='string'),
523 |             })
524 |         assert_frame_equal(act, exp_df, check_column_type=True)
525 | 
526 |     async def test_async_pandas(self):
527 |         act = await self.a_pandas_query('SELECT count() FROM trips')
528 |         exp = pd.DataFrame({'count': pd.Series([10000], dtype='Int64')})
529 |         assert_frame_equal(act, exp, check_column_type=True)
530 | 
531 |     async def test_async_numpy(self):
532 |         act = await self.a_numpy_query('SELECT count() FROM trips')
533 |         exp = {'count': np.array([10000], dtype='int64')}
534 |         self.assertEqual(act, exp)
535 | 
536 |     def test_basic_auth(self):
537 |         endpoint = Endpoint(self.qdb.host, self.qdb.http_server_port, auth=AUTH)
538 |         act = qdbq_s.pandas_query('SELECT count() FROM trips', endpoint=endpoint)
539 |         exp = pd.DataFrame({'count': pd.Series([10000], dtype='Int64')})
540 |         assert_frame_equal(act, exp, check_column_type=True)
541 | 
542 |     def _do_auth_test(self, exp_auth_header, username=None, password=None, token=None):
543 |         with HttpServer() as server:
544 |             server.responses.append((
545 |                 0,
546 |                 200,
547 |                 'application/json',
548 |                 (
549 |                     b'{"columns": [{"name": "count", "type": "LONG"}], ' +
550 |                     b'"count": 1, "dataset": [[10000]], "query": "SELECT count() ' +
551 |                     b'FROM trips", "timestamp": -1}'
552 |                 )))
553 |             server.responses.append((
554 |                 0,
555 |                 200,
556 |                 'text/csv',
557 |                 b'"count"\r\n10000\r\n'
558 |                 ))
559 | 
560 |             endpoint = Endpoint(
561 |                 'localhost',
562 |                 server.port,
563 |                 username=username,
564 |                 password=password,
565 |                 token=token)
566 |             act = qdbq_s.pandas_query('SELECT count() FROM trips', endpoint=endpoint)
567 |             exp = pd.DataFrame({'count': pd.Series([10000], dtype='Int64')})
568 |             assert_frame_equal(act, exp, check_column_type=True)
569 | 
570 |         auth0 = server.headers[0]['Authorization']
571 |         auth1 = server.headers[1]['Authorization']
572 |         self.assertEqual(auth0, auth1)
573 |         self.assertEqual(auth0, exp_auth_header)
574 | 
575 |     def test_basic_auth(self):
576 |         self._do_auth_test(
577 |             'Basic YWRtaW46cXVlc3Q=',
578 |             username='admin',
579 |             password='quest')
580 |         
581 |     def test_token_auth(self):
582 |         self._do_auth_test(
583 |             'Bearer 1234567890',
584 |             token='1234567890')
585 |         
586 |     def test_timeout(self):
587 |         with HttpServer() as server:
588 |             server.responses.append((
589 |                 2000,  # 2 seconds
590 |                 200,
591 |                 'application/json',
592 |                 (
593 |                     b'{"columns": [{"name": "count", "type": "LONG"}], ' +
594 |                     b'"count": 1, "dataset": [[10000]], "query": "SELECT count() ' +
595 |                     b'FROM trips", "timestamp": -1}'
596 |                 )))
597 |             server.responses.append((
598 |                 2000,  # 2 seconds
599 |                 200,
600 |                 'text/csv',
601 |                 b'"count"\r\n10000\r\n'
602 |                 ))
603 |             
604 |             endpoint = Endpoint('localhost', server.port)
605 |             import asyncio
606 |             with self.assertRaises(asyncio.TimeoutError):
607 |                 qdbq_s.pandas_query(
608 |                     'SELECT count() FROM trips',
609 |                     endpoint=endpoint,
610 |                     timeout=1)
611 | 
612 | 
613 | if __name__ == '__main__':
614 |     unittest.main()
615 | 


--------------------------------------------------------------------------------