├── .gitignore ├── LICENSE ├── README.md ├── poetry.lock ├── py_tsbs_benchmark ├── __init__.py ├── bench_pandas.py ├── bench_raw_ilp.py └── common.py ├── pyproject.toml ├── results ├── ingestion.webp └── serialization.webp └── tests └── __init__.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # ILP files 7 | *.ilp 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Benchmarking Ingestion of Pandas into QuestDB 2 | 3 | ## Background 4 | [QuestDB](https://questdb.io/) is our timeseries relational database with SQL 5 | query support. We support a dedicate protocol (called 6 | [ILP](https://questdb.io/docs/reference/api/ilp/overview/)) to ingest millions 7 | of rows per second over TCP. 8 | 9 | Many of our users are Python users who pre-process their data using 10 | [Pandas](https://pandas.pydata.org/) dataframes and up until recently, however, 11 | they would have to loop through the dataframes row-by-row in Python and would 12 | see quite poor performance when doing this (mere thousands of rows per second). 13 | 14 | We've recently introduced new functionality that iterates the dataframes in 15 | native code achieving significant speedups. 16 | 17 | ## This Repo 18 | 19 | This repository hosts code to benchmark the ingestion rate of 20 | Pandas dataframes into QuestDB using the official 21 | [`questdb`](https://py-questdb-client.readthedocs.io/en/latest/) 22 | Python client library. 23 | 24 | The benchmark reproduces and ingests the "dev ops" (a.k.a. 'cpu') dataset from 25 | the [TSBS](https://github.com/timescale/tsbs) project over ILP into QuestDB. 26 | 27 | The TSBS project is written in Go, and we replicate the same logic here in 28 | Python: The generated data has the same columns, datatypes, cardinality etc. 29 | Scroll to the end of this page to see a sample of generated data. 30 | 31 | The data consists of: 32 | * 10 SYMBOL columns (string columns with repeated values - i.e. interned) 33 | * 10 DOUBLE columns (64-bit floats) 34 | * 1 TIMESTAMP column (unix epoch nanoseconds, UTC) 35 | 36 | To run these benchmarks, you will need: 37 | * Modern hardware with multiple cores and enough 38 | ram to hold a large Pandas dataset in memory. 39 | * Python 3.10 and [poetry](https://python-poetry.org/). 40 | * and a recent version of QuestDB. 41 | 42 | You can follow through the setup and the run commands or just scroll down to see 43 | numbers from our benchmark runs. 44 | 45 | ## Setup 46 | 47 | ### Python Client 48 | 49 | After cloning this git repo: 50 | 51 | ```bash 52 | poetry env use 3.10 53 | poetry install 54 | ``` 55 | 56 | Note that each benchmark run will delete and re-create the `'cpu'` table. 57 | 58 | ### Preparing QuestDB 59 | 60 | Start a QuestDB instance. 61 | 62 | ```bash 63 | questdb start 64 | ``` 65 | 66 | ## Running 67 | 68 | ### The hardware we used 69 | 70 | The numbers included below are from the following setup: 71 | * AMD 5950x 72 | * 64G ram DDR4-3600 73 | * 2TB Samsung 980 PRO 74 | * Linux (kernel version: 5.19.0) 75 | * Python 3.10.8 76 | * QuestDB 7.0.1 77 | * 12 threads for QuestDB server. 78 | * 6 threads for the Python client in multi-threaded benchmarks. 79 | 80 | ### Configuration 81 | 82 | For this specific hardware, we benchmark with a 83 | [tweaked](https://questdb.io/docs/reference/api/ilp/tcp-receiver/#capacity-planning) 84 | QuestDB config as shown below. This is done to avoid the server instance 85 | overbooking threads, given that we'll be also running the client on the same 86 | host. 87 | 88 | ```ini 89 | # conf/server.conf 90 | shared.worker.count=6 91 | line.tcp.io.worker.count=6 92 | cairo.wal.enabled.default=true 93 | ``` 94 | 95 | In addition, we've also enabled WAL tables as these give us better ingestion 96 | performance (specifically ~1.3x faster for single-threaded, and ~1.6x faster 97 | for multi-threaded gains in this specific benchmark suite and described 98 | hardware). 99 | 100 | If your benchmarking client and QuestDB server are on separate machines then you 101 | shouldn't need any config tweaks to get the best performance. 102 | 103 | The benchmark script assumes that the instance is running on localhost on standard 104 | ports: If the instance is remote or uses different ports you can pass the 105 | `--host`, `--ilp-port` and `--http-port` arguments to the benchmark script 106 | shown later. 107 | 108 | Your milage may vary of course, but it's clear from the benchmarks below that 109 | it's worth using the [`sender.dataframe()`](https://py-questdb-client.readthedocs.io/en/latest/api.html#questdb.ingress.Sender.dataframe) 110 | API and not looping through the dataframe row by row in Python. 111 | 112 | ## Results 113 | 114 | By implementing the Pandas ingestion layer in native code, we're now ~28x faster 115 | in single-threaded code and ~92x faster in multi-threaded code, including 116 | database insert operations. 117 | 118 | Our performance improvements for just serializing to an in-memory ILP buffer are 119 | even better: Single-threaded serialization performance is ~58x faster and ~284x 120 | faster when it's possible to serialize in parallel (when the Pandas column types 121 | hold data directly and not through Python objects). 122 | 123 | ### Notes 124 | * Numbers are taken from the runs shown later in this same page. 125 | * Timings *exclude* the time taken to generate the sample Pandas dataframe. 126 | * NNx times faster calculated as FAST/SLOW using the MiB/s throughputs. 127 | 128 | ### Serialization to ILP in-memory buffer 129 | 130 | *No network operations.* 131 | 132 | 147 | ![chart](results/serialization.webp) 148 | 149 | ### Serialization, network send & data insertion into QuestDB 150 | 151 | 166 | ![chart](results/ingestion.webp) 167 | 168 | ### Without Pandas Support (pre-existing `.row()` API) 169 | 170 | Before Pandas support provided by the new 171 | [`questdb>=1.1.0`](https://pypi.org/project/questdb/) Python client, 172 | one had to iterate a Pandas dataframe row by row: 173 | 174 | ```python 175 | with Sender('localhost', 9009) as sender: 176 | for _index, row in df.iterrows(): 177 | sender.row( 178 | 'cpu', 179 | symbols={ 180 | 'hostname': row['hostname'], 181 | 'region': row['region'], 182 | 'datacenter': row['datacenter'], 183 | 'rack': row['rack'], 184 | 'os': row['os'], 185 | 'arch': row['arch'], 186 | 'team': row['team'], 187 | 'service': row['service'], 188 | 'service_version': row['service_version'], 189 | 'service_environment': row['service_environment']}, 190 | columns={ 191 | 'usage_user': row['usage_user'], 192 | 'usage_system': row['usage_system'], 193 | 'usage_idle': row['usage_idle'], 194 | 'usage_nice': row['usage_nice'], 195 | 'usage_iowait': row['usage_iowait'], 196 | 'usage_irq': row['usage_irq'], 197 | 'usage_softirq': row['usage_softirq'], 198 | 'usage_steal': row['usage_steal'], 199 | 'usage_guest': row['usage_guest'], 200 | 'usage_guest_nice': row['usage_guest_nice']}, 201 | at=TimestampNanos(row['timestamp'].value)) 202 | ``` 203 | 204 | This was *very* slow. 205 | 206 | ``` 207 | poetry run bench_pandas --py-row --send --row-count 1000000 208 | ``` 209 | 210 | ``` 211 | Running with params: 212 | {'debug': False, 213 | 'host': 'localhost', 214 | 'http_port': 9000, 215 | 'ilp_port': 9009, 216 | 'py_row': True, 217 | 'row_count': 1000000, 218 | 'scale': 4000, 219 | 'seed': 6484453060204943748, 220 | 'send': True, 221 | 'shell': False, 222 | 'validation_query_timeout': 120.0, 223 | 'worker_chunk_row_count': 10000, 224 | 'workers': None, 225 | 'write_ilp': None} 226 | Dropped table cpu 227 | Created table cpu 228 | Serialized: 229 | 1000000 rows in 50.27s: 0.02 mil rows/sec. 230 | ILP Buffer size: 465.27 MiB: 9.25 MiB/sec. 231 | Sent: 232 | 1000000 rows in 50.98s: 0.02 mil rows/sec. 233 | ILP Buffer size: 465.27 MiB: 9.13 MiB/sec. 234 | ``` 235 | 236 | During profiling, we found out that this was dominated (over 90% of the time) by 237 | iterating through the pandas dataframe and *not* the `.row()` method itself. 238 | 239 | ### Single-threaded test (New `.dataframe()` API) 240 | 241 | The new [`sender.dataframe()`](https://py-questdb-client.readthedocs.io/en/latest/api.html#questdb.ingress.Sender.dataframe) 242 | method resolves the performance problem by iterating through the data in native 243 | code and is also easier to use from Python. 244 | 245 | ```python 246 | with Sender('localhost', 9009) as sender: 247 | sender.dataframe(df, table_name='cpu', symbols=True, at='timestamp') 248 | ``` 249 | 250 | *Benchmarking code: `send_one` in [`py_tsbs_benchmark/bench_pandas.py`](py_tsbs_benchmark/bench_pandas.py).* 251 | 252 | ```bash 253 | poetry run bench_pandas --send 254 | ``` 255 | 256 | ``` 257 | Running with params: 258 | {'debug': False, 259 | 'host': 'localhost', 260 | 'http_port': 9000, 261 | 'ilp_port': 9009, 262 | 'py_row': False, 263 | 'row_count': 10000000, 264 | 'scale': 4000, 265 | 'seed': 4803204514533752103, 266 | 'send': True, 267 | 'shell': False, 268 | 'validation_query_timeout': 120.0, 269 | 'worker_chunk_row_count': 10000, 270 | 'workers': None, 271 | 'write_ilp': None} 272 | Table cpu does not exist 273 | Created table cpu 274 | Serialized: 275 | 10000000 rows in 8.57s: 1.17 mil rows/sec. 276 | ILP Buffer size: 4652.50 MiB: 543.04 MiB/sec. 277 | Sent: 278 | 10000000 rows in 17.82s: 0.56 mil rows/sec. 279 | ILP Buffer size: 4652.50 MiB: 261.04 MiB/sec. 280 | ``` 281 | 282 | ### Multi-threaded test (multithreaded use of `.dataframe()` API) 283 | 284 | Since we release the Python GIL it's possible to also create multiple `sender` 285 | objects and ingest in parallel. This means that the QuestDB database receives 286 | the data [out of order, but the database deals with it](https://questdb.io/docs/concept/designated-timestamp#out-of-order-policy). 287 | 288 | *Benchmarking code: `send_workers` in [`py_tsbs_benchmark/bench_pandas.py`](py_tsbs_benchmark/bench_pandas.py).* 289 | 290 | ```bash 291 | poetry run bench_pandas --send --workers 8 292 | ``` 293 | 294 | ``` 295 | Running with params: 296 | {'debug': False, 297 | 'host': 'localhost', 298 | 'http_port': 9000, 299 | 'ilp_port': 9009, 300 | 'py_row': False, 301 | 'row_count': 10000000, 302 | 'scale': 4000, 303 | 'seed': 1038685014730277296, 304 | 'send': True, 305 | 'shell': False, 306 | 'validation_query_timeout': 120.0, 307 | 'worker_chunk_row_count': 10000, 308 | 'workers': 8, 309 | 'write_ilp': None} 310 | Dropped table cpu 311 | Created table cpu 312 | Serialized: 313 | 10000000 rows in 1.77s: 5.66 mil rows/sec. 314 | ILP Buffer size: 4652.60 MiB: 2635.69 MiB/sec. 315 | Sent: 316 | 10000000 rows in 5.52s: 1.81 mil rows/sec. 317 | ILP Buffer size: 4652.60 MiB: 843.18 MiB/sec. 318 | ``` 319 | 320 | ### Full options 321 | 322 | ```bash 323 | poetry run bench_pandas --help 324 | ``` 325 | 326 | ## The `.dataframe()` method inner workings 327 | 328 | The TL;DR of how we achieve these numbers is by avoiding calling the Python 329 | interpreter within the send loop whenever possible. 330 | 331 | Data in pandas is (usually) laid out as columns of contiguous memory. 332 | Each column (series) is accessible either as a 333 | [numpy array](https://numpy.org/), itself accessible via the 334 | [Python Buffer protocol](https://docs.python.org/3/c-api/buffer.html), or 335 | accessible as an Apache Arrow array via their 336 | [C data interface](https://arrow.apache.org/docs/format/CDataInterface.html). 337 | We've done some experimentation to figure 338 | out which Pandas datatypes are best suited to either access pattern and go from 339 | there. We try to avoid copies whenever possible (almost always possible). 340 | 341 | We loop the buffers for the series in Cython (which compiles down to C and 342 | eventually native code) and call our serialization functions which are written 343 | in Rust and themselves have a C API. A bit of inlining and link time 344 | optimization and we can get good numbers. 345 | 346 | As a bonus, this approach also allows us to release the Python GIL and 347 | parallelize across threads for customers that need that little bit of extra 348 | performance. 349 | 350 | If you're interested in the actual implementation, it lives here: 351 | https://github.com/questdb/py-questdb-client/blob/main/src/questdb/dataframe.pxi 352 | 353 | ## Pandas Dataframe String Column Choice 354 | 355 | We use the `'string[pyarrow]'` dtype in Pandas as it allows us to 356 | read the string column without needing to lock the GIL. 357 | 358 | Compared to using a more conventional Python `str`-object `'O'` dtype Pandas 359 | column type, this makes a significant difference in the multi-threaded benchmark 360 | as it enables parallelization, but makes little difference for a single-threaded 361 | use case scenario where we've gone the 362 | [extra mile](https://github.com/questdb/py-questdb-client/tree/main/pystr-to-utf8) 363 | to ensure fast Python `str` object to UTF-8 encoding by handling the interal 364 | UCS-1, UCS-2 and UCS-4 representations in a small helper library in Rust. 365 | 366 | ## Sample of generated ILP messages 367 | 368 | ``` 369 | cpu,hostname=host_0,region=eu-west-1,datacenter=eu-west-1c,rack=22,os=Ubuntu15.10,arch=x86,team=LON,service=11,service_version=0,service_environment=staging usage_user=2.260713995474621,usage_system=0.7742634345475894,usage_idle=0.5433421797689806,usage_nice=0.0,usage_iowait=1.8872789915891544,usage_irq=0.5362196205980163,usage_softirq=0.7432769744844461,usage_steal=0.0,usage_guest=0.0,usage_guest_nice=1.2110585427526344 1451606400000000000 370 | cpu,hostname=host_1,region=ap-northeast-1,datacenter=ap-northeast-1a,rack=53,os=Ubuntu15.10,arch=x86,team=NYC,service=1,service_version=0,service_environment=production usage_user=2.264693554570983,usage_system=0.5146965259325763,usage_idle=1.8878914216159703,usage_nice=0.0,usage_iowait=0.5884560303533308,usage_irq=0.42753305894872856,usage_softirq=0.801180194243782,usage_steal=0.8661127008514166,usage_guest=0.0,usage_guest_nice=0.5764978743281829 1451606410000000000 371 | cpu,hostname=host_2,region=us-west-1,datacenter=us-west-1b,rack=29,os=Ubuntu15.10,arch=x86,team=SF,service=2,service_version=1,service_environment=production usage_user=2.6079664747344085,usage_system=0.42609358370322725,usage_idle=0.0016162253527125525,usage_nice=0.10596370190082907,usage_iowait=0.665106751584084,usage_irq=0.0,usage_softirq=0.6311393304729056,usage_steal=0.0,usage_guest=0.0,usage_guest_nice=1.2642526620101873 1451606420000000000 372 | cpu,hostname=host_3,region=ap-southeast-2,datacenter=ap-southeast-2a,rack=68,os=Ubuntu15.10,arch=x64,team=NYC,service=12,service_version=1,service_environment=test usage_user=1.9812498570755634,usage_system=1.0573409130777713,usage_idle=0.6307345282945178,usage_nice=0.6577966205420174,usage_iowait=0.8692677309522628,usage_irq=0.0,usage_softirq=0.5188911519558501,usage_steal=0.46402279460697793,usage_guest=0.6656099875988695,usage_guest_nice=1.7476069678472128 1451606430000000000 373 | cpu,hostname=host_4,region=us-east-1,datacenter=us-east-1e,rack=40,os=Ubuntu15.10,arch=x86,team=SF,service=11,service_version=1,service_environment=staging usage_user=2.5964868241838843,usage_system=0.0,usage_idle=1.2272999339697328,usage_nice=0.12023414661389953,usage_iowait=0.8395651302668741,usage_irq=0.0,usage_softirq=0.45434802944514724,usage_steal=0.0,usage_guest=0.0,usage_guest_nice=3.2814223881823787 1451606440000000000 374 | cpu,hostname=host_5,region=eu-central-1,datacenter=eu-central-1b,rack=32,os=Ubuntu16.04LTS,arch=x86,team=SF,service=14,service_version=1,service_environment=staging usage_user=3.072615656127865,usage_system=0.0,usage_idle=1.3812601522351302,usage_nice=0.7655212714345465,usage_iowait=2.3434629262758166,usage_irq=0.3539595541407819,usage_softirq=0.0,usage_steal=2.9262011833188217,usage_guest=1.0922871015583087,usage_guest_nice=2.7897087006502304 1451606450000000000 375 | cpu,hostname=host_6,region=us-west-2,datacenter=us-west-2c,rack=11,os=Ubuntu16.10,arch=x86,team=NYC,service=2,service_version=0,service_environment=test usage_user=2.8100880667177486,usage_system=1.0253398248948349,usage_idle=1.5919865749453264,usage_nice=0.0,usage_iowait=4.366890705367804,usage_irq=1.0361144031260785,usage_softirq=0.0,usage_steal=1.3542451068971073,usage_guest=2.8090962406357027,usage_guest_nice=5.027439036611597 1451606460000000000 376 | cpu,hostname=host_7,region=ap-southeast-1,datacenter=ap-southeast-1a,rack=97,os=Ubuntu16.10,arch=x86,team=NYC,service=19,service_version=0,service_environment=staging usage_user=3.3933324938392984,usage_system=2.674165314702581,usage_idle=1.729746564369149,usage_nice=0.0,usage_iowait=2.6295278539977893,usage_irq=0.33325995202946646,usage_softirq=0.0,usage_steal=0.8629771143071407,usage_guest=3.5565038601505514,usage_guest_nice=4.295707748569857 1451606470000000000 377 | cpu,hostname=host_8,region=eu-central-1,datacenter=eu-central-1b,rack=43,os=Ubuntu16.04LTS,arch=x86,team=SF,service=18,service_version=0,service_environment=production usage_user=2.3683820719125404,usage_system=3.1496636608187587,usage_idle=1.0714252817838013,usage_nice=0.0,usage_iowait=3.658575628441112,usage_irq=0.0,usage_softirq=0.0,usage_steal=0.9944564076833474,usage_guest=3.606177791932647,usage_guest_nice=5.665699532249171 1451606480000000000 378 | cpu,hostname=host_9,region=sa-east-1,datacenter=sa-east-1b,rack=82,os=Ubuntu15.10,arch=x86,team=CHI,service=14,service_version=1,service_environment=staging usage_user=2.711560205310839,usage_system=2.92632821713108,usage_idle=1.6924636783124183,usage_nice=0.8654306023153091,usage_iowait=5.201435533195961,usage_irq=0.0,usage_softirq=1.7215318876485612,usage_steal=0.6839422702175311,usage_guest=3.1192465146389465,usage_guest_nice=5.414096713475799 1451606490000000000 379 | ``` 380 | -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Poetry and should not be changed by hand. 2 | 3 | [[package]] 4 | name = "certifi" 5 | version = "2022.12.7" 6 | description = "Python package for providing Mozilla's CA Bundle." 7 | category = "main" 8 | optional = false 9 | python-versions = ">=3.6" 10 | files = [ 11 | {file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"}, 12 | {file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"}, 13 | ] 14 | 15 | [[package]] 16 | name = "charset-normalizer" 17 | version = "2.1.1" 18 | description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." 19 | category = "main" 20 | optional = false 21 | python-versions = ">=3.6.0" 22 | files = [ 23 | {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"}, 24 | {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"}, 25 | ] 26 | 27 | [package.extras] 28 | unicode-backport = ["unicodedata2"] 29 | 30 | [[package]] 31 | name = "idna" 32 | version = "3.4" 33 | description = "Internationalized Domain Names in Applications (IDNA)" 34 | category = "main" 35 | optional = false 36 | python-versions = ">=3.5" 37 | files = [ 38 | {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, 39 | {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, 40 | ] 41 | 42 | [[package]] 43 | name = "llvmlite" 44 | version = "0.39.1" 45 | description = "lightweight wrapper around basic LLVM functionality" 46 | category = "main" 47 | optional = false 48 | python-versions = ">=3.7" 49 | files = [ 50 | {file = "llvmlite-0.39.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6717c7a6e93c9d2c3d07c07113ec80ae24af45cde536b34363d4bcd9188091d9"}, 51 | {file = "llvmlite-0.39.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ddab526c5a2c4ccb8c9ec4821fcea7606933dc53f510e2a6eebb45a418d3488a"}, 52 | {file = "llvmlite-0.39.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3f331a323d0f0ada6b10d60182ef06c20a2f01be21699999d204c5750ffd0b4"}, 53 | {file = "llvmlite-0.39.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2c00ff204afa721b0bb9835b5bf1ba7fba210eefcec5552a9e05a63219ba0dc"}, 54 | {file = "llvmlite-0.39.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16f56eb1eec3cda3a5c526bc3f63594fc24e0c8d219375afeb336f289764c6c7"}, 55 | {file = "llvmlite-0.39.1-cp310-cp310-win32.whl", hash = "sha256:d0bfd18c324549c0fec2c5dc610fd024689de6f27c6cc67e4e24a07541d6e49b"}, 56 | {file = "llvmlite-0.39.1-cp310-cp310-win_amd64.whl", hash = "sha256:7ebf1eb9badc2a397d4f6a6c8717447c81ac011db00064a00408bc83c923c0e4"}, 57 | {file = "llvmlite-0.39.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6546bed4e02a1c3d53a22a0bced254b3b6894693318b16c16c8e43e29d6befb6"}, 58 | {file = "llvmlite-0.39.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1578f5000fdce513712e99543c50e93758a954297575610f48cb1fd71b27c08a"}, 59 | {file = "llvmlite-0.39.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3803f11ad5f6f6c3d2b545a303d68d9fabb1d50e06a8d6418e6fcd2d0df00959"}, 60 | {file = "llvmlite-0.39.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50aea09a2b933dab7c9df92361b1844ad3145bfb8dd2deb9cd8b8917d59306fb"}, 61 | {file = "llvmlite-0.39.1-cp37-cp37m-win32.whl", hash = "sha256:b1a0bbdb274fb683f993198775b957d29a6f07b45d184c571ef2a721ce4388cf"}, 62 | {file = "llvmlite-0.39.1-cp37-cp37m-win_amd64.whl", hash = "sha256:e172c73fccf7d6db4bd6f7de963dedded900d1a5c6778733241d878ba613980e"}, 63 | {file = "llvmlite-0.39.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e31f4b799d530255aaf0566e3da2df5bfc35d3cd9d6d5a3dcc251663656c27b1"}, 64 | {file = "llvmlite-0.39.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:62c0ea22e0b9dffb020601bb65cb11dd967a095a488be73f07d8867f4e327ca5"}, 65 | {file = "llvmlite-0.39.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ffc84ade195abd4abcf0bd3b827b9140ae9ef90999429b9ea84d5df69c9058c"}, 66 | {file = "llvmlite-0.39.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c0f158e4708dda6367d21cf15afc58de4ebce979c7a1aa2f6b977aae737e2a54"}, 67 | {file = "llvmlite-0.39.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22d36591cd5d02038912321d9ab8e4668e53ae2211da5523f454e992b5e13c36"}, 68 | {file = "llvmlite-0.39.1-cp38-cp38-win32.whl", hash = "sha256:4c6ebace910410daf0bebda09c1859504fc2f33d122e9a971c4c349c89cca630"}, 69 | {file = "llvmlite-0.39.1-cp38-cp38-win_amd64.whl", hash = "sha256:fb62fc7016b592435d3e3a8f680e3ea8897c3c9e62e6e6cc58011e7a4801439e"}, 70 | {file = "llvmlite-0.39.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa9b26939ae553bf30a9f5c4c754db0fb2d2677327f2511e674aa2f5df941789"}, 71 | {file = "llvmlite-0.39.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e4f212c018db951da3e1dc25c2651abc688221934739721f2dad5ff1dd5f90e7"}, 72 | {file = "llvmlite-0.39.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39dc2160aed36e989610fc403487f11b8764b6650017ff367e45384dff88ffbf"}, 73 | {file = "llvmlite-0.39.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1ec3d70b3e507515936e475d9811305f52d049281eaa6c8273448a61c9b5b7e2"}, 74 | {file = "llvmlite-0.39.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60f8dd1e76f47b3dbdee4b38d9189f3e020d22a173c00f930b52131001d801f9"}, 75 | {file = "llvmlite-0.39.1-cp39-cp39-win32.whl", hash = "sha256:03aee0ccd81735696474dc4f8b6be60774892a2929d6c05d093d17392c237f32"}, 76 | {file = "llvmlite-0.39.1-cp39-cp39-win_amd64.whl", hash = "sha256:3fc14e757bc07a919221f0cbaacb512704ce5774d7fcada793f1996d6bc75f2a"}, 77 | {file = "llvmlite-0.39.1.tar.gz", hash = "sha256:b43abd7c82e805261c425d50335be9a6c4f84264e34d6d6e475207300005d572"}, 78 | ] 79 | 80 | [[package]] 81 | name = "numba" 82 | version = "0.56.4" 83 | description = "compiling Python code using LLVM" 84 | category = "main" 85 | optional = false 86 | python-versions = ">=3.7" 87 | files = [ 88 | {file = "numba-0.56.4-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:9f62672145f8669ec08762895fe85f4cf0ead08ce3164667f2b94b2f62ab23c3"}, 89 | {file = "numba-0.56.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c602d015478b7958408d788ba00a50272649c5186ea8baa6cf71d4a1c761bba1"}, 90 | {file = "numba-0.56.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:85dbaed7a05ff96492b69a8900c5ba605551afb9b27774f7f10511095451137c"}, 91 | {file = "numba-0.56.4-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:f4cfc3a19d1e26448032049c79fc60331b104f694cf570a9e94f4e2c9d0932bb"}, 92 | {file = "numba-0.56.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e08e203b163ace08bad500b0c16f6092b1eb34fd1fce4feaf31a67a3a5ecf3b"}, 93 | {file = "numba-0.56.4-cp310-cp310-win32.whl", hash = "sha256:0611e6d3eebe4cb903f1a836ffdb2bda8d18482bcd0a0dcc56e79e2aa3fefef5"}, 94 | {file = "numba-0.56.4-cp310-cp310-win_amd64.whl", hash = "sha256:fbfb45e7b297749029cb28694abf437a78695a100e7c2033983d69f0ba2698d4"}, 95 | {file = "numba-0.56.4-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:3cb1a07a082a61df80a468f232e452d818f5ae254b40c26390054e4e868556e0"}, 96 | {file = "numba-0.56.4-cp37-cp37m-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d69ad934e13c15684e7887100a8f5f0f61d7a8e57e0fd29d9993210089a5b531"}, 97 | {file = "numba-0.56.4-cp37-cp37m-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:dbcc847bac2d225265d054993a7f910fda66e73d6662fe7156452cac0325b073"}, 98 | {file = "numba-0.56.4-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8a95ca9cc77ea4571081f6594e08bd272b66060634b8324e99cd1843020364f9"}, 99 | {file = "numba-0.56.4-cp37-cp37m-win32.whl", hash = "sha256:fcdf84ba3ed8124eb7234adfbb8792f311991cbf8aed1cad4b1b1a7ee08380c1"}, 100 | {file = "numba-0.56.4-cp37-cp37m-win_amd64.whl", hash = "sha256:42f9e1be942b215df7e6cc9948cf9c15bb8170acc8286c063a9e57994ef82fd1"}, 101 | {file = "numba-0.56.4-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:553da2ce74e8862e18a72a209ed3b6d2924403bdd0fb341fa891c6455545ba7c"}, 102 | {file = "numba-0.56.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4373da9757049db7c90591e9ec55a2e97b2b36ba7ae3bf9c956a513374077470"}, 103 | {file = "numba-0.56.4-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3a993349b90569518739009d8f4b523dfedd7e0049e6838c0e17435c3e70dcc4"}, 104 | {file = "numba-0.56.4-cp38-cp38-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:720886b852a2d62619ae3900fe71f1852c62db4f287d0c275a60219e1643fc04"}, 105 | {file = "numba-0.56.4-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e64d338b504c9394a4a34942df4627e1e6cb07396ee3b49fe7b8d6420aa5104f"}, 106 | {file = "numba-0.56.4-cp38-cp38-win32.whl", hash = "sha256:03fe94cd31e96185cce2fae005334a8cc712fc2ba7756e52dff8c9400718173f"}, 107 | {file = "numba-0.56.4-cp38-cp38-win_amd64.whl", hash = "sha256:91f021145a8081f881996818474ef737800bcc613ffb1e618a655725a0f9e246"}, 108 | {file = "numba-0.56.4-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:d0ae9270a7a5cc0ede63cd234b4ff1ce166c7a749b91dbbf45e0000c56d3eade"}, 109 | {file = "numba-0.56.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c75e8a5f810ce80a0cfad6e74ee94f9fde9b40c81312949bf356b7304ef20740"}, 110 | {file = "numba-0.56.4-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a12ef323c0f2101529d455cfde7f4135eaa147bad17afe10b48634f796d96abd"}, 111 | {file = "numba-0.56.4-cp39-cp39-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:03634579d10a6129181129de293dd6b5eaabee86881369d24d63f8fe352dd6cb"}, 112 | {file = "numba-0.56.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0240f9026b015e336069329839208ebd70ec34ae5bfbf402e4fcc8e06197528e"}, 113 | {file = "numba-0.56.4-cp39-cp39-win32.whl", hash = "sha256:14dbbabf6ffcd96ee2ac827389afa59a70ffa9f089576500434c34abf9b054a4"}, 114 | {file = "numba-0.56.4-cp39-cp39-win_amd64.whl", hash = "sha256:0da583c532cd72feefd8e551435747e0e0fbb3c0530357e6845fcc11e38d6aea"}, 115 | {file = "numba-0.56.4.tar.gz", hash = "sha256:32d9fef412c81483d7efe0ceb6cf4d3310fde8b624a9cecca00f790573ac96ee"}, 116 | ] 117 | 118 | [package.dependencies] 119 | llvmlite = ">=0.39.0dev0,<0.40" 120 | numpy = ">=1.18,<1.24" 121 | setuptools = "*" 122 | 123 | [[package]] 124 | name = "numpy" 125 | version = "1.23.5" 126 | description = "NumPy is the fundamental package for array computing with Python." 127 | category = "main" 128 | optional = false 129 | python-versions = ">=3.8" 130 | files = [ 131 | {file = "numpy-1.23.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9c88793f78fca17da0145455f0d7826bcb9f37da4764af27ac945488116efe63"}, 132 | {file = "numpy-1.23.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e9f4c4e51567b616be64e05d517c79a8a22f3606499941d97bb76f2ca59f982d"}, 133 | {file = "numpy-1.23.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7903ba8ab592b82014713c491f6c5d3a1cde5b4a3bf116404e08f5b52f6daf43"}, 134 | {file = "numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e05b1c973a9f858c74367553e236f287e749465f773328c8ef31abe18f691e1"}, 135 | {file = "numpy-1.23.5-cp310-cp310-win32.whl", hash = "sha256:522e26bbf6377e4d76403826ed689c295b0b238f46c28a7251ab94716da0b280"}, 136 | {file = "numpy-1.23.5-cp310-cp310-win_amd64.whl", hash = "sha256:dbee87b469018961d1ad79b1a5d50c0ae850000b639bcb1b694e9981083243b6"}, 137 | {file = "numpy-1.23.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ce571367b6dfe60af04e04a1834ca2dc5f46004ac1cc756fb95319f64c095a96"}, 138 | {file = "numpy-1.23.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:56e454c7833e94ec9769fa0f86e6ff8e42ee38ce0ce1fa4cbb747ea7e06d56aa"}, 139 | {file = "numpy-1.23.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5039f55555e1eab31124a5768898c9e22c25a65c1e0037f4d7c495a45778c9f2"}, 140 | {file = "numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58f545efd1108e647604a1b5aa809591ccd2540f468a880bedb97247e72db387"}, 141 | {file = "numpy-1.23.5-cp311-cp311-win32.whl", hash = "sha256:b2a9ab7c279c91974f756c84c365a669a887efa287365a8e2c418f8b3ba73fb0"}, 142 | {file = "numpy-1.23.5-cp311-cp311-win_amd64.whl", hash = "sha256:0cbe9848fad08baf71de1a39e12d1b6310f1d5b2d0ea4de051058e6e1076852d"}, 143 | {file = "numpy-1.23.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f063b69b090c9d918f9df0a12116029e274daf0181df392839661c4c7ec9018a"}, 144 | {file = "numpy-1.23.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0aaee12d8883552fadfc41e96b4c82ee7d794949e2a7c3b3a7201e968c7ecab9"}, 145 | {file = "numpy-1.23.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92c8c1e89a1f5028a4c6d9e3ccbe311b6ba53694811269b992c0b224269e2398"}, 146 | {file = "numpy-1.23.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d208a0f8729f3fb790ed18a003f3a57895b989b40ea4dce4717e9cf4af62c6bb"}, 147 | {file = "numpy-1.23.5-cp38-cp38-win32.whl", hash = "sha256:06005a2ef6014e9956c09ba07654f9837d9e26696a0470e42beedadb78c11b07"}, 148 | {file = "numpy-1.23.5-cp38-cp38-win_amd64.whl", hash = "sha256:ca51fcfcc5f9354c45f400059e88bc09215fb71a48d3768fb80e357f3b457e1e"}, 149 | {file = "numpy-1.23.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8969bfd28e85c81f3f94eb4a66bc2cf1dbdc5c18efc320af34bffc54d6b1e38f"}, 150 | {file = "numpy-1.23.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a7ac231a08bb37f852849bbb387a20a57574a97cfc7b6cabb488a4fc8be176de"}, 151 | {file = "numpy-1.23.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf837dc63ba5c06dc8797c398db1e223a466c7ece27a1f7b5232ba3466aafe3d"}, 152 | {file = "numpy-1.23.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33161613d2269025873025b33e879825ec7b1d831317e68f4f2f0f84ed14c719"}, 153 | {file = "numpy-1.23.5-cp39-cp39-win32.whl", hash = "sha256:af1da88f6bc3d2338ebbf0e22fe487821ea4d8e89053e25fa59d1d79786e7481"}, 154 | {file = "numpy-1.23.5-cp39-cp39-win_amd64.whl", hash = "sha256:09b7847f7e83ca37c6e627682f145856de331049013853f344f37b0c9690e3df"}, 155 | {file = "numpy-1.23.5-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:abdde9f795cf292fb9651ed48185503a2ff29be87770c3b8e2a14b0cd7aa16f8"}, 156 | {file = "numpy-1.23.5-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9a909a8bae284d46bbfdefbdd4a262ba19d3bc9921b1e76126b1d21c3c34135"}, 157 | {file = "numpy-1.23.5-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:01dd17cbb340bf0fc23981e52e1d18a9d4050792e8fb8363cecbf066a84b827d"}, 158 | {file = "numpy-1.23.5.tar.gz", hash = "sha256:1b1766d6f397c18153d40015ddfc79ddb715cabadc04d2d228d4e5a8bc4ded1a"}, 159 | ] 160 | 161 | [[package]] 162 | name = "pandas" 163 | version = "1.5.2" 164 | description = "Powerful data structures for data analysis, time series, and statistics" 165 | category = "main" 166 | optional = false 167 | python-versions = ">=3.8" 168 | files = [ 169 | {file = "pandas-1.5.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e9dbacd22555c2d47f262ef96bb4e30880e5956169741400af8b306bbb24a273"}, 170 | {file = "pandas-1.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e2b83abd292194f350bb04e188f9379d36b8dfac24dd445d5c87575f3beaf789"}, 171 | {file = "pandas-1.5.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2552bffc808641c6eb471e55aa6899fa002ac94e4eebfa9ec058649122db5824"}, 172 | {file = "pandas-1.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fc87eac0541a7d24648a001d553406f4256e744d92df1df8ebe41829a915028"}, 173 | {file = "pandas-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0d8fd58df5d17ddb8c72a5075d87cd80d71b542571b5f78178fb067fa4e9c72"}, 174 | {file = "pandas-1.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:4aed257c7484d01c9a194d9a94758b37d3d751849c05a0050c087a358c41ad1f"}, 175 | {file = "pandas-1.5.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:375262829c8c700c3e7cbb336810b94367b9c4889818bbd910d0ecb4e45dc261"}, 176 | {file = "pandas-1.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc3cd122bea268998b79adebbb8343b735a5511ec14efb70a39e7acbc11ccbdc"}, 177 | {file = "pandas-1.5.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b4f5a82afa4f1ff482ab8ded2ae8a453a2cdfde2001567b3ca24a4c5c5ca0db3"}, 178 | {file = "pandas-1.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8092a368d3eb7116e270525329a3e5c15ae796ccdf7ccb17839a73b4f5084a39"}, 179 | {file = "pandas-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6257b314fc14958f8122779e5a1557517b0f8e500cfb2bd53fa1f75a8ad0af2"}, 180 | {file = "pandas-1.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:82ae615826da838a8e5d4d630eb70c993ab8636f0eff13cb28aafc4291b632b5"}, 181 | {file = "pandas-1.5.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:457d8c3d42314ff47cc2d6c54f8fc0d23954b47977b2caed09cd9635cb75388b"}, 182 | {file = "pandas-1.5.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c009a92e81ce836212ce7aa98b219db7961a8b95999b97af566b8dc8c33e9519"}, 183 | {file = "pandas-1.5.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:71f510b0efe1629bf2f7c0eadb1ff0b9cf611e87b73cd017e6b7d6adb40e2b3a"}, 184 | {file = "pandas-1.5.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a40dd1e9f22e01e66ed534d6a965eb99546b41d4d52dbdb66565608fde48203f"}, 185 | {file = "pandas-1.5.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ae7e989f12628f41e804847a8cc2943d362440132919a69429d4dea1f164da0"}, 186 | {file = "pandas-1.5.2-cp38-cp38-win32.whl", hash = "sha256:530948945e7b6c95e6fa7aa4be2be25764af53fba93fe76d912e35d1c9ee46f5"}, 187 | {file = "pandas-1.5.2-cp38-cp38-win_amd64.whl", hash = "sha256:73f219fdc1777cf3c45fde7f0708732ec6950dfc598afc50588d0d285fddaefc"}, 188 | {file = "pandas-1.5.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9608000a5a45f663be6af5c70c3cbe634fa19243e720eb380c0d378666bc7702"}, 189 | {file = "pandas-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:315e19a3e5c2ab47a67467fc0362cb36c7c60a93b6457f675d7d9615edad2ebe"}, 190 | {file = "pandas-1.5.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e18bc3764cbb5e118be139b3b611bc3fbc5d3be42a7e827d1096f46087b395eb"}, 191 | {file = "pandas-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0183cb04a057cc38fde5244909fca9826d5d57c4a5b7390c0cc3fa7acd9fa883"}, 192 | {file = "pandas-1.5.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:344021ed3e639e017b452aa8f5f6bf38a8806f5852e217a7594417fb9bbfa00e"}, 193 | {file = "pandas-1.5.2-cp39-cp39-win32.whl", hash = "sha256:e7469271497960b6a781eaa930cba8af400dd59b62ec9ca2f4d31a19f2f91090"}, 194 | {file = "pandas-1.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:c218796d59d5abd8780170c937b812c9637e84c32f8271bbf9845970f8c1351f"}, 195 | {file = "pandas-1.5.2.tar.gz", hash = "sha256:220b98d15cee0b2cd839a6358bd1f273d0356bf964c1a1aeb32d47db0215488b"}, 196 | ] 197 | 198 | [package.dependencies] 199 | numpy = [ 200 | {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, 201 | {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, 202 | ] 203 | python-dateutil = ">=2.8.1" 204 | pytz = ">=2020.1" 205 | 206 | [package.extras] 207 | test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] 208 | 209 | [[package]] 210 | name = "pyarrow" 211 | version = "10.0.1" 212 | description = "Python library for Apache Arrow" 213 | category = "main" 214 | optional = false 215 | python-versions = ">=3.7" 216 | files = [ 217 | {file = "pyarrow-10.0.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:e00174764a8b4e9d8d5909b6d19ee0c217a6cf0232c5682e31fdfbd5a9f0ae52"}, 218 | {file = "pyarrow-10.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f7a7dbe2f7f65ac1d0bd3163f756deb478a9e9afc2269557ed75b1b25ab3610"}, 219 | {file = "pyarrow-10.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb627673cb98708ef00864e2e243f51ba7b4c1b9f07a1d821f98043eccd3f585"}, 220 | {file = "pyarrow-10.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba71e6fc348c92477586424566110d332f60d9a35cb85278f42e3473bc1373da"}, 221 | {file = "pyarrow-10.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:7b4ede715c004b6fc535de63ef79fa29740b4080639a5ff1ea9ca84e9282f349"}, 222 | {file = "pyarrow-10.0.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:e3fe5049d2e9ca661d8e43fab6ad5a4c571af12d20a57dffc392a014caebef65"}, 223 | {file = "pyarrow-10.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:254017ca43c45c5098b7f2a00e995e1f8346b0fb0be225f042838323bb55283c"}, 224 | {file = "pyarrow-10.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70acca1ece4322705652f48db65145b5028f2c01c7e426c5d16a30ba5d739c24"}, 225 | {file = "pyarrow-10.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abb57334f2c57979a49b7be2792c31c23430ca02d24becd0b511cbe7b6b08649"}, 226 | {file = "pyarrow-10.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:1765a18205eb1e02ccdedb66049b0ec148c2a0cb52ed1fb3aac322dfc086a6ee"}, 227 | {file = "pyarrow-10.0.1-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:61f4c37d82fe00d855d0ab522c685262bdeafd3fbcb5fe596fe15025fbc7341b"}, 228 | {file = "pyarrow-10.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e141a65705ac98fa52a9113fe574fdaf87fe0316cde2dffe6b94841d3c61544c"}, 229 | {file = "pyarrow-10.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf26f809926a9d74e02d76593026f0aaeac48a65b64f1bb17eed9964bfe7ae1a"}, 230 | {file = "pyarrow-10.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:443eb9409b0cf78df10ced326490e1a300205a458fbeb0767b6b31ab3ebae6b2"}, 231 | {file = "pyarrow-10.0.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:f2d00aa481becf57098e85d99e34a25dba5a9ade2f44eb0b7d80c80f2984fc03"}, 232 | {file = "pyarrow-10.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b1fc226d28c7783b52a84d03a66573d5a22e63f8a24b841d5fc68caeed6784d4"}, 233 | {file = "pyarrow-10.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efa59933b20183c1c13efc34bd91efc6b2997377c4c6ad9272da92d224e3beb1"}, 234 | {file = "pyarrow-10.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:668e00e3b19f183394388a687d29c443eb000fb3fe25599c9b4762a0afd37775"}, 235 | {file = "pyarrow-10.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:d1bc6e4d5d6f69e0861d5d7f6cf4d061cf1069cb9d490040129877acf16d4c2a"}, 236 | {file = "pyarrow-10.0.1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:42ba7c5347ce665338f2bc64685d74855900200dac81a972d49fe127e8132f75"}, 237 | {file = "pyarrow-10.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b069602eb1fc09f1adec0a7bdd7897f4d25575611dfa43543c8b8a75d99d6874"}, 238 | {file = "pyarrow-10.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94fb4a0c12a2ac1ed8e7e2aa52aade833772cf2d3de9dde685401b22cec30002"}, 239 | {file = "pyarrow-10.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db0c5986bf0808927f49640582d2032a07aa49828f14e51f362075f03747d198"}, 240 | {file = "pyarrow-10.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:0ec7587d759153f452d5263dbc8b1af318c4609b607be2bd5127dcda6708cdb1"}, 241 | {file = "pyarrow-10.0.1.tar.gz", hash = "sha256:1a14f57a5f472ce8234f2964cd5184cccaa8df7e04568c64edc33b23eb285dd5"}, 242 | ] 243 | 244 | [package.dependencies] 245 | numpy = ">=1.16.6" 246 | 247 | [[package]] 248 | name = "python-dateutil" 249 | version = "2.8.2" 250 | description = "Extensions to the standard Python datetime module" 251 | category = "main" 252 | optional = false 253 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" 254 | files = [ 255 | {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, 256 | {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, 257 | ] 258 | 259 | [package.dependencies] 260 | six = ">=1.5" 261 | 262 | [[package]] 263 | name = "pytz" 264 | version = "2022.7" 265 | description = "World timezone definitions, modern and historical" 266 | category = "main" 267 | optional = false 268 | python-versions = "*" 269 | files = [ 270 | {file = "pytz-2022.7-py2.py3-none-any.whl", hash = "sha256:93007def75ae22f7cd991c84e02d434876818661f8df9ad5df9e950ff4e52cfd"}, 271 | {file = "pytz-2022.7.tar.gz", hash = "sha256:7ccfae7b4b2c067464a6733c6261673fdb8fd1be905460396b97a073e9fa683a"}, 272 | ] 273 | 274 | [[package]] 275 | name = "questdb" 276 | version = "1.1.0" 277 | description = "QuestDB client library for Python" 278 | category = "main" 279 | optional = false 280 | python-versions = ">=3.7" 281 | files = [ 282 | {file = "questdb-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a0315c0389058a7be7248425b7714a1396798159b29b82c026cd4304a635b51"}, 283 | {file = "questdb-1.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:733fe242daae6ef06ebb519041df309525834f86e5037c03a70ce2b601cde9ca"}, 284 | {file = "questdb-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9bdbecf153d6720b4d6712f5a476ef3ca08e83e8ad473819a37798c07652de1e"}, 285 | {file = "questdb-1.1.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ac518cac2678faa7c2df6f82523f8f99486020fe325af29b4d52e47421589eb1"}, 286 | {file = "questdb-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57d6cc7cb5e3d5c0982d3fdbe77c1090679cfe5c81bbeb3c650f51f6693e42c8"}, 287 | {file = "questdb-1.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7403fb8fe910af8d340a9c9738576a290428084f60e12f1e9aa8ee2ad7d9349a"}, 288 | {file = "questdb-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f9cc6dead6b6bc453a3636c5088685afca10e6c19bc257b48b3d116c366ef14e"}, 289 | {file = "questdb-1.1.0-cp310-cp310-win32.whl", hash = "sha256:772052ff554846f1abf73e51bc25e572f7389227fabbe6c2b190cfad1c041c90"}, 290 | {file = "questdb-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:7cc6f625f3bd43cdf7152cf45de1f57f424101291b4b281c956d5c9a1771f4bd"}, 291 | {file = "questdb-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:07fc96315d6a76f64cf0c68ae958d2b2d3ae6f90c9f9e6b099ac3083b459c81b"}, 292 | {file = "questdb-1.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ced7c307a596345a1236e6cd0a0fcd620bfae583be4e1f725290bf1a93b7a9c6"}, 293 | {file = "questdb-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c03e965b2f3108999ff42dddedd7c1f4b67ec4836efb0cbd1ac0f4ecf5f48c5"}, 294 | {file = "questdb-1.1.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82741f5fd21ad441a32282f21b6cd8754eb8b3fd1d6b348aca01ca6db51ee084"}, 295 | {file = "questdb-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca5ad4bfd89b9d5753f8a9cefb5fc27c1ed8126061aa4a6555c3c3815b62a130"}, 296 | {file = "questdb-1.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7b3a7784248f96436ff9a0ef8201a2658d6bfafad453194d13c47e04afefb7fa"}, 297 | {file = "questdb-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bdb15585884cca0d314606f6b4c30764eff91a712fe93070ea1f266ec36adf5f"}, 298 | {file = "questdb-1.1.0-cp311-cp311-win32.whl", hash = "sha256:ade2d0cddf4cbb7f6b86b303cb6c2e048cd14a273c57afedcf9fa2d9d38805e8"}, 299 | {file = "questdb-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:f7a35f4600d9f8fd4e52ced25aee5d1501b4dd44805980607fb1713a821d6edb"}, 300 | {file = "questdb-1.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8a78529b74d44b68d80a45fa2d7c2f5830415b4082cb96893893678dba402248"}, 301 | {file = "questdb-1.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:892f150987c477293c5257f2446a77051773aa5992791211560c738699a62690"}, 302 | {file = "questdb-1.1.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8205d59e12778c4e1c270b1d2235e40881cd1c8991e0e79a53352fcbbc4059b2"}, 303 | {file = "questdb-1.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c82c06f80ff8f895d32f1d5a5d142aa51aae1152abadb62ddb2fc1f2ee55588"}, 304 | {file = "questdb-1.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5fd5a7f23bd673e7b4181f3884a10831fcd0d00213c52ae693d5894bb4d9db48"}, 305 | {file = "questdb-1.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:30542ec6ab72ffa554a44893c07b9b7e902b577496eeefef3ab6845a35275d6e"}, 306 | {file = "questdb-1.1.0-cp37-cp37m-win32.whl", hash = "sha256:120f5de58684e25617612e2b55ca7f16725400a8a76f79f1c6aaa283b2624c21"}, 307 | {file = "questdb-1.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:f0055c712a0e6e02d85ab9e72730c93c415c401fe9dc0a5b5bbaedcce93cfc0c"}, 308 | {file = "questdb-1.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5ac3f8cdb98adc6a8553bab7cdf2207c591b4f4ff50eb43c0cdd3e87af08a9ad"}, 309 | {file = "questdb-1.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:eaa6b5c65f9f036bb8199fbc873b61fe5be6e0367a2d7771d5fceea7c437b922"}, 310 | {file = "questdb-1.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdcbd1b2949df346b54d045794c1b6188bdca87e959db20f62ff58b83c10ab4c"}, 311 | {file = "questdb-1.1.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15e4bd36b4f2e45541be2ff68a3ea6838129fc6dce8583f63a8e4e4ff271d31a"}, 312 | {file = "questdb-1.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d90e7033d70ea4647e88997cf5816d7c5f7c3e8508b11e5bd9482d0eb7fad15"}, 313 | {file = "questdb-1.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:437e093d6ed76fb9e9cbada8ffc65dba0383a6a322979c9e89acf0c23c787f13"}, 314 | {file = "questdb-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e2ddea28ce67528553c54b0c83e1fad46637236783cc65df96817b0f19db5df6"}, 315 | {file = "questdb-1.1.0-cp38-cp38-win32.whl", hash = "sha256:f2ab75d63d820af17c65320e8f240be4f7c0618a021ee806c529a7c1dfac6700"}, 316 | {file = "questdb-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:fbf91a0bddbdc8acc4fba9a61e7300b9c4d60711ac34c9d776c73c2447226f8c"}, 317 | {file = "questdb-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ba1423eb64207850116d66b5626ade368cd1bea374feae750403fcd395d2783c"}, 318 | {file = "questdb-1.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:90a350fdac80065002b9ecdd212ac8f7d52cf84c65e44f54fd4b450e4872d602"}, 319 | {file = "questdb-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c35eaa2b9009500f43f2eb52012d550db9c7dde49c6da611f0bd737c811dd281"}, 320 | {file = "questdb-1.1.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7706f5cf83ffc39313bac9835e424c60d179f74426e01db9a7d6fdf379a4c583"}, 321 | {file = "questdb-1.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:edae95e0e43e093ef0d93ed8fbce775c1182271671d2e2dc5550fb7395786051"}, 322 | {file = "questdb-1.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a88eb74ff853c791ffea1a8303a79b4a6d9d1cee4b7f1c0c9181c2569686aef9"}, 323 | {file = "questdb-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ea173e189d5c753f51ebf92d3f1b09876db2180d42a10e033f89f55c4e22487b"}, 324 | {file = "questdb-1.1.0-cp39-cp39-win32.whl", hash = "sha256:3270e401f73e75fec0c30641617b08fdc258b0fa307e115970bccee7b124f0da"}, 325 | {file = "questdb-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:38cf3c2e7b47cfd2d1ff4dce4e32109f176671b904bfa1527e40b8f7c510470f"}, 326 | {file = "questdb-1.1.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3c08cb6527e8c9e612f5cb4732361582fef58747079a91f5cdbdc33f7588b0ec"}, 327 | {file = "questdb-1.1.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b180c217a34231a715846c87e83b36d041bb5016318d5e44cf89d20cce53928f"}, 328 | {file = "questdb-1.1.0-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00dcdbab684f5dd5346896482172d323fb7917821a966d98cfe10fcab9bd9a79"}, 329 | {file = "questdb-1.1.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d2722ed78c029cd03cc48e8b86468dad821978bdf717a4aef09b1e17b1bda84"}, 330 | {file = "questdb-1.1.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:29f70bcbbb8dbaed4596a26bb8d7cbca43f1d5a1b6144b5a6d9fbeedff6426f9"}, 331 | {file = "questdb-1.1.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6d102d8f2bace9d8e054a818b7efc2c29d57924cc5a53f41d3221bcc9e3ee9aa"}, 332 | {file = "questdb-1.1.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94a7f759df05c5dd817170c4c9350fd724fe653f5938847649fdadae2dcf049c"}, 333 | {file = "questdb-1.1.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7cf97631254c4d9a79c9fcb357afeecc91ec1bb5902596028663520a95b1299d"}, 334 | {file = "questdb-1.1.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f9521cfb9ac25c9d12071db65e1294c99764077c3a27e11919409b20e6e92f4"}, 335 | {file = "questdb-1.1.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:61227e09c1ddc0e633d097c3e6951bc76cd0473420a5bd56a65895f4c671cbf5"}, 336 | {file = "questdb-1.1.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:70b1730483904e6e58c7ae76b28373def819b263fba33ade74c983a08f11b6db"}, 337 | {file = "questdb-1.1.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4d6dfbf5676d3ca8b6df84f320bed1e756712cb14e3415d2990a8aa56998e45"}, 338 | {file = "questdb-1.1.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a0fdbfeb8fd53f0fb38e1191d724e16a8b6071e146da62273820c9f063a36a71"}, 339 | {file = "questdb-1.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5df10af34da931877da976770d0d4ce6e20c33fe25545ce01e618d72a3ff462f"}, 340 | {file = "questdb-1.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:4e24bd5faf3e05cd64f4608907ac758312c356c6e0289a60a9378badd94a581b"}, 341 | {file = "questdb-1.1.0.tar.gz", hash = "sha256:3b3e967423cc33760b4eab947b042fa1e42f0161df296b35f03f7240efe37c77"}, 342 | ] 343 | 344 | [package.extras] 345 | ci = ["cibuildwheel"] 346 | publish = ["twine", "wheel"] 347 | 348 | [[package]] 349 | name = "requests" 350 | version = "2.28.1" 351 | description = "Python HTTP for Humans." 352 | category = "main" 353 | optional = false 354 | python-versions = ">=3.7, <4" 355 | files = [ 356 | {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, 357 | {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, 358 | ] 359 | 360 | [package.dependencies] 361 | certifi = ">=2017.4.17" 362 | charset-normalizer = ">=2,<3" 363 | idna = ">=2.5,<4" 364 | urllib3 = ">=1.21.1,<1.27" 365 | 366 | [package.extras] 367 | socks = ["PySocks (>=1.5.6,!=1.5.7)"] 368 | use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] 369 | 370 | [[package]] 371 | name = "setuptools" 372 | version = "65.6.3" 373 | description = "Easily download, build, install, upgrade, and uninstall Python packages" 374 | category = "main" 375 | optional = false 376 | python-versions = ">=3.7" 377 | files = [ 378 | {file = "setuptools-65.6.3-py3-none-any.whl", hash = "sha256:57f6f22bde4e042978bcd50176fdb381d7c21a9efa4041202288d3737a0c6a54"}, 379 | {file = "setuptools-65.6.3.tar.gz", hash = "sha256:a7620757bf984b58deaf32fc8a4577a9bbc0850cf92c20e1ce41c38c19e5fb75"}, 380 | ] 381 | 382 | [package.extras] 383 | docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] 384 | testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] 385 | testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] 386 | 387 | [[package]] 388 | name = "six" 389 | version = "1.16.0" 390 | description = "Python 2 and 3 compatibility utilities" 391 | category = "main" 392 | optional = false 393 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" 394 | files = [ 395 | {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, 396 | {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, 397 | ] 398 | 399 | [[package]] 400 | name = "urllib3" 401 | version = "1.26.13" 402 | description = "HTTP library with thread-safe connection pooling, file post, and more." 403 | category = "main" 404 | optional = false 405 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" 406 | files = [ 407 | {file = "urllib3-1.26.13-py2.py3-none-any.whl", hash = "sha256:47cc05d99aaa09c9e72ed5809b60e7ba354e64b59c9c173ac3018642d8bb41fc"}, 408 | {file = "urllib3-1.26.13.tar.gz", hash = "sha256:c083dd0dce68dbfbe1129d5271cb90f9447dea7d52097c6e0126120c521ddea8"}, 409 | ] 410 | 411 | [package.extras] 412 | brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] 413 | secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] 414 | socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] 415 | 416 | [metadata] 417 | lock-version = "2.0" 418 | python-versions = "^3.10" 419 | content-hash = "149172a1d2796256ff5cdf5ea3ec7e84adade99803b39ed9339f9394bbb68640" 420 | -------------------------------------------------------------------------------- /py_tsbs_benchmark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/questdb/py-tsbs-benchmark/ecf2c02920d6a397b075ad3398f4269cd24bf0ad/py_tsbs_benchmark/__init__.py -------------------------------------------------------------------------------- /py_tsbs_benchmark/bench_pandas.py: -------------------------------------------------------------------------------- 1 | import questdb.ingress as qi 2 | import numpy as np 3 | import pandas as pd 4 | import random 5 | import time 6 | import sys 7 | import pprint 8 | import textwrap 9 | from concurrent.futures import ThreadPoolExecutor, Future 10 | from numba import vectorize, float64 11 | 12 | from .common import CpuTable 13 | 14 | 15 | @vectorize([float64(float64, float64)]) 16 | def _clip_add(x, y): 17 | z = x + y 18 | # Clip to the 0 and 100 boundaries 19 | if z < 0.0: 20 | z = 0.0 21 | elif z > 100.0: 22 | z = 100.0 23 | return z 24 | 25 | 26 | _REGIONS = { 27 | "us-east-1": [ 28 | "us-east-1a", 29 | "us-east-1b", 30 | "us-east-1c", 31 | "us-east-1e"], 32 | "us-west-1": [ 33 | "us-west-1a", 34 | "us-west-1b"], 35 | "us-west-2": [ 36 | "us-west-2a", 37 | "us-west-2b", 38 | "us-west-2c"], 39 | "eu-west-1": [ 40 | "eu-west-1a", 41 | "eu-west-1b", 42 | "eu-west-1c"], 43 | "eu-central-1": [ 44 | "eu-central-1a", 45 | "eu-central-1b"], 46 | "ap-southeast-1": [ 47 | "ap-southeast-1a", 48 | "ap-southeast-1b"], 49 | "ap-southeast-2": [ 50 | "ap-southeast-2a", 51 | "ap-southeast-2b"], 52 | "ap-northeast-1": [ 53 | "ap-northeast-1a", 54 | "ap-northeast-1c"], 55 | "sa-east-1": [ 56 | "sa-east-1a", 57 | "sa-east-1b", 58 | "sa-east-1c"], 59 | } 60 | 61 | 62 | _REGION_KEYS = list(_REGIONS.keys()) 63 | 64 | 65 | _MACHINE_RACK_CHOICES = [ 66 | str(n) 67 | for n in range(100)] 68 | 69 | 70 | _MACHINE_OS_CHOICES = [ 71 | "Ubuntu16.10", 72 | "Ubuntu16.04LTS", 73 | "Ubuntu15.10"] 74 | 75 | 76 | _MACHINE_ARCH_CHOICES = [ 77 | "x64", 78 | "x86"] 79 | 80 | 81 | _MACHINE_TEAM_CHOICES = [ 82 | "SF", 83 | "NYC", 84 | "LON", 85 | "CHI"] 86 | 87 | 88 | _MACHINE_SERVICE_CHOICES = [ 89 | str(n) 90 | for n in range(20)] 91 | 92 | 93 | _MACHINE_SERVICE_VERSION_CHOICES = [ 94 | str(n) 95 | for n in range(2)] 96 | 97 | 98 | _MACHINE_SERVICE_ENVIRONMENT_CHOICES = [ 99 | "production", 100 | "staging", 101 | "test"] 102 | 103 | 104 | def gen_dataframe(seed, row_count, scale): 105 | rand, np_rand = random.Random(seed), np.random.default_rng(seed) 106 | 107 | def mk_symbols_series(strings): 108 | return pd.Series(strings, dtype='string[pyarrow]') 109 | 110 | def mk_hostname(): 111 | repeated = [f'host_{n}' for n in range(scale)] 112 | repeat_count = row_count // scale + 1 113 | values = (repeated * repeat_count)[:row_count] 114 | return mk_symbols_series(values) 115 | 116 | def rep_choice(choices): 117 | return rand.choices(choices, k=row_count) 118 | 119 | def mk_cpu_series(): 120 | values = np_rand.normal(0, 1, row_count + 1) 121 | _clip_add.accumulate(values, out=values) 122 | return pd.Series(values[1:], dtype='float64') 123 | 124 | region = [] 125 | datacenter = [] 126 | for _ in range(row_count): 127 | reg = random.choice(_REGION_KEYS) 128 | region.append(reg) 129 | datacenter.append(rand.choice(_REGIONS[reg])) 130 | 131 | df = pd.DataFrame({ 132 | 'hostname': mk_hostname(), 133 | 'region': mk_symbols_series(region), 134 | 'datacenter': mk_symbols_series(datacenter), 135 | 'rack': mk_symbols_series(rep_choice(_MACHINE_RACK_CHOICES)), 136 | 'os': mk_symbols_series(rep_choice(_MACHINE_OS_CHOICES)), 137 | 'arch': mk_symbols_series(rep_choice(_MACHINE_ARCH_CHOICES)), 138 | 'team': mk_symbols_series(rep_choice(_MACHINE_TEAM_CHOICES)), 139 | 'service': mk_symbols_series(rep_choice(_MACHINE_SERVICE_CHOICES)), 140 | 'service_version': mk_symbols_series( 141 | rep_choice(_MACHINE_SERVICE_VERSION_CHOICES)), 142 | 'service_environment': mk_symbols_series( 143 | rep_choice(_MACHINE_SERVICE_ENVIRONMENT_CHOICES)), 144 | 'usage_user': mk_cpu_series(), 145 | 'usage_system': mk_cpu_series(), 146 | 'usage_idle': mk_cpu_series(), 147 | 'usage_nice': mk_cpu_series(), 148 | 'usage_iowait': mk_cpu_series(), 149 | 'usage_irq': mk_cpu_series(), 150 | 'usage_softirq': mk_cpu_series(), 151 | 'usage_steal': mk_cpu_series(), 152 | 'usage_guest': mk_cpu_series(), 153 | 'usage_guest_nice': mk_cpu_series(), 154 | 'timestamp': pd.date_range('2016-01-01', periods=row_count, freq='10s'), 155 | }) 156 | 157 | df.index.name = 'cpu' 158 | return df 159 | 160 | 161 | def parse_args(): 162 | seed = random.randrange(sys.maxsize) 163 | import argparse 164 | parser = argparse.ArgumentParser() 165 | parser.add_argument('--row-count', type=int, default=10_000_000) 166 | parser.add_argument('--scale', type=int, default=4000) 167 | parser.add_argument('--seed', type=int, default=seed) 168 | parser.add_argument('--write-ilp', type=str, default=None) 169 | parser.add_argument('--shell', action='store_true', default=False) 170 | parser.add_argument('--send', action='store_true', default=False) 171 | parser.add_argument('--host', type=str, default='localhost') 172 | parser.add_argument('--ilp-port', type=int, default=9009) 173 | parser.add_argument('--http-port', type=int, default=9000) 174 | parser.add_argument('--op', choices=['dataframe', 'iterrows', 'itertuples'], 175 | default='dataframe') 176 | parser.add_argument('--workers', type=int, default=None) 177 | parser.add_argument('--worker-chunk-row-count', type=int, default=10_000) 178 | parser.add_argument('--validation-query-timeout', type=float, default=120.0) 179 | parser.add_argument('--debug', action='store_true', default=False) 180 | return parser.parse_args() 181 | 182 | 183 | def chunk_up_dataframe(df, chunk_row_count): 184 | dfs = [] 185 | for i in range(0, len(df), chunk_row_count): 186 | dfs.append(df.iloc[i:i + chunk_row_count]) 187 | return dfs 188 | 189 | 190 | def assign_dfs_to_workers(dfs, workers): 191 | dfs_by_worker = [[] for _ in range(workers)] 192 | for i, df in enumerate(dfs): 193 | dfs_by_worker[i % workers].append(df) 194 | return dfs_by_worker 195 | 196 | 197 | def sanity_check_split(df, dfs): 198 | df2 = pd.concat(dfs) 199 | assert len(df) == len(df2) 200 | assert df.equals(df2) 201 | 202 | 203 | def sanity_check_split2(df, dfs_by_worker): 204 | df2 = pd.concat([ 205 | df 206 | for dfs in dfs_by_worker 207 | for df in dfs]) 208 | df2.sort_values(by='timestamp', inplace=True) 209 | assert len(df) == len(df2) 210 | assert df.equals(df2) 211 | 212 | 213 | def chunk_up_by_worker(df, workers, chunk_row_count): 214 | dfs = chunk_up_dataframe(df, chunk_row_count) 215 | sanity_check_split(df, dfs) 216 | dfs_by_worker = assign_dfs_to_workers(dfs, workers) 217 | sanity_check_split2(df, dfs_by_worker) 218 | return dfs_by_worker 219 | 220 | 221 | def send_py_row(obj, df): 222 | for _index, row in df.iterrows(): 223 | symbols = { 224 | 'hostname': row['hostname'], 225 | 'region': row['region'], 226 | 'datacenter': row['datacenter'], 227 | 'rack': row['rack'], 228 | 'os': row['os'], 229 | 'arch': row['arch'], 230 | 'team': row['team'], 231 | 'service': row['service'], 232 | 'service_version': row['service_version'], 233 | 'service_environment': row['service_environment']} 234 | columns = { 235 | 'usage_user': row['usage_user'], 236 | 'usage_system': row['usage_system'], 237 | 'usage_idle': row['usage_idle'], 238 | 'usage_nice': row['usage_nice'], 239 | 'usage_iowait': row['usage_iowait'], 240 | 'usage_irq': row['usage_irq'], 241 | 'usage_softirq': row['usage_softirq'], 242 | 'usage_steal': row['usage_steal'], 243 | 'usage_guest': row['usage_guest'], 244 | 'usage_guest_nice': row['usage_guest_nice']} 245 | obj.row( 246 | 'cpu', 247 | symbols=symbols, 248 | columns=columns, 249 | at=qi.TimestampNanos(row['timestamp'].value)) 250 | 251 | 252 | def send_py_tuple(obj, df): 253 | for row in df.itertuples(): 254 | symbols = { 255 | 'hostname': row.hostname, 256 | 'region': row.region, 257 | 'datacenter': row.datacenter, 258 | 'rack': row.rack, 259 | 'os': row.os, 260 | 'arch': row.arch, 261 | 'team': row.team, 262 | 'service': row.service, 263 | 'service_version': row.service_version, 264 | 'service_environment': row.service_environment} 265 | columns = { 266 | 'usage_user': row.usage_user, 267 | 'usage_system': row.usage_system, 268 | 'usage_idle': row.usage_idle, 269 | 'usage_nice': row.usage_nice, 270 | 'usage_iowait': row.usage_iowait, 271 | 'usage_irq': row.usage_irq, 272 | 'usage_softirq': row.usage_softirq, 273 | 'usage_steal': row.usage_steal, 274 | 'usage_guest': row.usage_guest, 275 | 'usage_guest_nice': row.usage_guest_nice} 276 | obj.row( 277 | 'cpu', 278 | symbols=symbols, 279 | columns=columns, 280 | at=qi.TimestampNanos(row.timestamp.value)) 281 | 282 | 283 | def dataframe(obj, df): 284 | obj.dataframe(df, symbols=True, at='timestamp') 285 | 286 | 287 | _OP_MAP = { 288 | 'dataframe': dataframe, 289 | 'iterrows': send_py_row, 290 | 'itertuples': send_py_tuple} 291 | 292 | 293 | def serialize_one(args, df): 294 | buf = qi.Buffer() 295 | op = _OP_MAP[args.op] 296 | t0 = time.monotonic() 297 | op(buf, df) 298 | t1 = time.monotonic() 299 | elapsed = t1 - t0 300 | if args.write_ilp: 301 | if args.write_ilp == '-': 302 | print(buf) 303 | else: 304 | with open(args.write_ilp, 'w') as f: 305 | f.write(str(buf)) 306 | row_speed = args.row_count / elapsed / 1_000_000.0 307 | print('Serialized:') 308 | print( 309 | f' {args.row_count} rows in {elapsed:.2f}s: ' 310 | f'{row_speed:.2f} mil rows/sec.') 311 | size_mb = len(buf) / 1024.0 / 1024.0 312 | throughput_mb = size_mb / elapsed 313 | print( 314 | f' ILP Buffer size: {size_mb:.2f} MiB: ' 315 | f'{throughput_mb:.2f} MiB/sec.') 316 | return len(buf) 317 | 318 | 319 | def serialize_workers(args, df): 320 | dfs_by_worker = chunk_up_by_worker( 321 | df, args.workers, args.worker_chunk_row_count) 322 | bufs = [qi.Buffer() for _ in range(args.workers)] 323 | tpe = ThreadPoolExecutor(max_workers=args.workers) 324 | 325 | # Warm up the thread pool. 326 | tpe.map(lambda e: None, [None] * args.workers) 327 | 328 | op = _OP_MAP[args.op] 329 | 330 | if args.debug: 331 | repld = [False] 332 | import threading 333 | lock = threading.Lock() 334 | 335 | def serialize_dfs(buf, dfs): 336 | size = 0 337 | for df in dfs: 338 | try: 339 | op(buf, df) 340 | except Exception as e: 341 | with lock: 342 | if not repld[0]: 343 | import code 344 | code.interact(local=locals()) 345 | repld[0] = True 346 | raise e 347 | size += len(buf) 348 | buf.clear() 349 | return size 350 | else: 351 | def serialize_dfs(buf, dfs): 352 | size = 0 353 | for df in dfs: 354 | op(buf, df) 355 | size += len(buf) 356 | buf.clear() 357 | return size 358 | 359 | t0 = time.monotonic() 360 | futures = [ 361 | tpe.submit(serialize_dfs, buf, dfs) 362 | for buf, dfs in zip(bufs, dfs_by_worker)] 363 | sizes = [fut.result() for fut in futures] 364 | t1 = time.monotonic() 365 | size = sum(sizes) 366 | elapsed = t1 - t0 367 | row_speed = args.row_count / elapsed / 1_000_000.0 368 | print('Serialized:') 369 | print( 370 | f' {args.row_count} rows in {elapsed:.2f}s: ' 371 | f'{row_speed:.2f} mil rows/sec.') 372 | throughput_mb = size / elapsed / 1024.0 / 1024.0 373 | size_mb = size / 1024.0 / 1024.0 374 | print( 375 | f' ILP Buffer size: {size_mb:.2f} MiB: ' 376 | f'{throughput_mb:.2f} MiB/sec.') 377 | return size 378 | 379 | 380 | def send_one(args, df, size): 381 | op = _OP_MAP[args.op] 382 | with qi.Sender(args.host, args.ilp_port) as sender: 383 | t0 = time.monotonic() 384 | op(sender, df) 385 | sender.flush() 386 | t1 = time.monotonic() 387 | elapsed = t1 - t0 388 | row_speed = args.row_count / elapsed / 1_000_000.0 389 | print('Sent:') 390 | print( 391 | f' {args.row_count} rows in {elapsed:.2f}s: ' 392 | f'{row_speed:.2f} mil rows/sec.') 393 | throughput_mb = size / elapsed / 1024.0 / 1024.0 394 | size_mb = size / 1024.0 / 1024.0 395 | print( 396 | f' ILP Buffer size: {size_mb:.2f} MiB: ' 397 | f'{throughput_mb:.2f} MiB/sec.') 398 | 399 | 400 | def send_workers(args, df, size): 401 | dfs_by_worker = chunk_up_by_worker( 402 | df, args.workers, args.worker_chunk_row_count) 403 | 404 | tpe = ThreadPoolExecutor(max_workers=args.workers) 405 | 406 | def connected_sender(): 407 | sender = qi.Sender(args.host, args.ilp_port) 408 | sender.connect() 409 | return sender 410 | 411 | senders = [ 412 | tpe.submit(connected_sender) 413 | for _ in range(args.workers)] 414 | senders: list[qi.Sender] = [f.result() for f in senders] 415 | 416 | def worker_job(op, sender, worker_dfs): 417 | try: 418 | for df in worker_dfs: 419 | op(sender, df) 420 | sender.flush() 421 | finally: 422 | sender.close() 423 | 424 | op = _OP_MAP[args.op] 425 | 426 | t0 = time.monotonic() 427 | futures: list[Future] = [ 428 | tpe.submit(worker_job, op, sender, dfs) 429 | for sender, dfs in zip(senders, dfs_by_worker)] 430 | for f in futures: 431 | f.result() 432 | t1 = time.monotonic() 433 | 434 | elapsed = t1 - t0 435 | row_speed = args.row_count / elapsed / 1_000_000.0 436 | print('Sent:') 437 | print( 438 | f' {args.row_count} rows in {elapsed:.2f}s: ' 439 | f'{row_speed:.2f} mil rows/sec.') 440 | throughput_mb = size / elapsed / 1024.0 / 1024.0 441 | size_mb = size / 1024.0 / 1024.0 442 | print( 443 | f' ILP Buffer size: {size_mb:.2f} MiB: ' 444 | f'{throughput_mb:.2f} MiB/sec.') 445 | 446 | 447 | def main(): 448 | args = parse_args() 449 | pretty_args = textwrap.indent(pprint.pformat(vars(args)), ' ') 450 | print(f'Running with params:\n{pretty_args}') 451 | 452 | cpu_table = CpuTable(args.host, args.http_port) 453 | 454 | if args.send: 455 | cpu_table.drop() 456 | cpu_table.create() 457 | 458 | df = gen_dataframe(args.seed, args.row_count, args.scale) 459 | 460 | if not args.workers: 461 | size = serialize_one(args, df) 462 | else: 463 | if args.workers < 1: 464 | raise ValueError('workers must be >= 1') 465 | size = serialize_workers(args, df) 466 | 467 | if args.shell: 468 | import code 469 | code.interact(local=locals()) 470 | 471 | if args.send: 472 | if not args.workers: 473 | send_one(args, df, size) 474 | else: 475 | send_workers(args, df, size) 476 | 477 | cpu_table.block_until_rowcount( 478 | args.row_count, timeout=args.validation_query_timeout) 479 | else: 480 | print('Not sending. Use --send to send to server.') 481 | -------------------------------------------------------------------------------- /py_tsbs_benchmark/bench_raw_ilp.py: -------------------------------------------------------------------------------- 1 | import socket 2 | import time 3 | import struct 4 | import pprint 5 | import textwrap 6 | 7 | from .common import CpuTable 8 | 9 | 10 | class RawFileSender: 11 | def __init__(self, host, port, file_path, chunk_size=64 * 1024): 12 | self.host = host 13 | self.port = port 14 | self.file_path = file_path 15 | self.chunk_size = chunk_size 16 | 17 | def send(self): 18 | with open(self.file_path, 'rb') as f: 19 | buf = f.read() 20 | 21 | row_count = buf.count(b'\n') 22 | 23 | view = memoryview(buf) 24 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 25 | sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, True) 26 | linger = 120 # seconds 27 | sock.setsockopt( 28 | socket.SOL_SOCKET, socket.SO_LINGER, struct.pack('ii', 1, linger)) 29 | with sock: 30 | sock.connect((self.host, self.port)) 31 | t0 = time.monotonic() 32 | for i in range(0, len(view), self.chunk_size): 33 | sock.sendall(view[i:i + self.chunk_size]) 34 | t1 = time.monotonic() 35 | elapsed = t1 - t0 36 | row_speed = row_count / elapsed / 1_000_000.0 37 | size_mb = len(buf) / 1024.0 / 1024.0 38 | throughput_mb = size_mb / elapsed 39 | print('Sent:') 40 | print( 41 | f' {row_count} rows in {elapsed:.2f}s: ' 42 | f'{row_speed:.2f} mil rows/sec.') 43 | print( 44 | f' {len(buf)} bytes in {elapsed:.2f}s: ' 45 | f'{throughput_mb:.2f} MiB/sec.') 46 | 47 | return row_count 48 | 49 | 50 | def parse_args(): 51 | import argparse 52 | parser = argparse.ArgumentParser() 53 | parser.add_argument('--host', type=str, default='localhost') 54 | parser.add_argument('--ilp-port', type=int, default=9009) 55 | parser.add_argument('--http-port', type=int, default=9000) 56 | parser.add_argument('--chunk-size', type=int, default=64 * 1024) 57 | parser.add_argument('file_path', type=str) 58 | return parser.parse_args() 59 | 60 | 61 | def main(): 62 | args = parse_args() 63 | pretty_args = textwrap.indent(pprint.pformat(vars(args)), ' ') 64 | print(f'Running with params:\n{pretty_args}') 65 | 66 | cpu_table = CpuTable(args.host, args.http_port) 67 | cpu_table.drop() 68 | cpu_table.create() 69 | time.sleep(1) # grace period. 70 | 71 | sender = RawFileSender(args.host, args.ilp_port, args.file_path) 72 | row_count = sender.send() 73 | 74 | cpu_table.block_until_rowcount(row_count) 75 | -------------------------------------------------------------------------------- /py_tsbs_benchmark/common.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import time 3 | 4 | class CpuTable: 5 | def __init__(self, host, port): 6 | self.host = host 7 | self.port = port 8 | 9 | def _request(self, sql): 10 | response = requests.get( 11 | f'http://{self.host}:{self.port}/exec', 12 | params={'query': sql}).json() 13 | return response 14 | 15 | def drop(self): 16 | response = self._request('drop table cpu') 17 | if response.get('ddl') == 'OK': 18 | print(f'Dropped table cpu') 19 | return True 20 | elif response.get('error', '').startswith('table does not exist'): 21 | print(f'Table cpu does not exist') 22 | return False 23 | else: 24 | raise RuntimeError(f'Failed to drop table cpu: {response}') 25 | 26 | def create(self): 27 | symbol_cols = [ 28 | 'hostname', 'region', 'datacenter', 'rack', 'os', 'arch', 29 | 'team', 'service', 'service_version', 'service_environment'] 30 | double_cols = [ 31 | 'usage_user', 'usage_system', 'usage_idle', 'usage_nice', 32 | 'usage_iowait', 'usage_irq', 'usage_softirq', 'usage_steal', 33 | 'usage_guest', 'usage_guest_nice'] 34 | sql = f''' 35 | create table cpu ( 36 | {', '.join(f'{col} symbol' for col in symbol_cols)}, 37 | {', '.join(f'{col} double' for col in double_cols)}, 38 | timestamp timestamp) 39 | timestamp(timestamp) 40 | partition by day 41 | ''' 42 | response = self._request(sql) 43 | if response.get('ddl') == 'OK': 44 | print(f'Created table cpu') 45 | else: 46 | raise RuntimeError(f'Failed to create table cpu: {response}') 47 | 48 | def get_row_count(self): 49 | response = self._request('select count(*) from cpu') 50 | return response['dataset'][0][0] 51 | 52 | def block_until_rowcount(self, target_count, timeout=30.0): 53 | t0 = time.monotonic() 54 | while True: 55 | row_count = self.get_row_count() 56 | if row_count == target_count: 57 | return 58 | elif row_count > target_count: 59 | raise RuntimeError( 60 | f'Row count {row_count} exceeds target {target_count}') 61 | if time.monotonic() - t0 > timeout: 62 | raise RuntimeError( 63 | f'Timed out waiting for row count to reach {target_count}') 64 | time.sleep(0.1) 65 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "py-tsbs-benchmark" 3 | version = "0.1.0" 4 | description = "Benchmark ingestion of the TSBS dataset into QuestDB via ILP using the `questdb` Python library and Pandas" 5 | authors = ["Adam Cimarosti "] 6 | license = "Apache License 2.0" 7 | readme = "README.md" 8 | packages = [{include = "py_tsbs_benchmark"}] 9 | 10 | [tool.poetry.dependencies] 11 | python = "^3.10" 12 | questdb = "^1.1.0" 13 | pandas = "^1.5.2" 14 | pyarrow = "^10.0.1" 15 | numba = "^0.56.4" 16 | requests = "^2.28.1" 17 | 18 | [tool.poetry.scripts] 19 | bench_pandas = "py_tsbs_benchmark.bench_pandas:main" 20 | bench_raw_ilp = "py_tsbs_benchmark.bench_raw_ilp:main" 21 | 22 | [build-system] 23 | requires = ["poetry-core"] 24 | build-backend = "poetry.core.masonry.api" 25 | -------------------------------------------------------------------------------- /results/ingestion.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/questdb/py-tsbs-benchmark/ecf2c02920d6a397b075ad3398f4269cd24bf0ad/results/ingestion.webp -------------------------------------------------------------------------------- /results/serialization.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/questdb/py-tsbs-benchmark/ecf2c02920d6a397b075ad3398f4269cd24bf0ad/results/serialization.webp -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/questdb/py-tsbs-benchmark/ecf2c02920d6a397b075ad3398f4269cd24bf0ad/tests/__init__.py --------------------------------------------------------------------------------