├── LICENSE ├── README.md ├── ch01.sql ├── ch02.sql ├── ch03.sql ├── ch04.sql ├── ch05.sql ├── ch06.sql ├── ch07.sql ├── ch08.sql ├── ch09.sql ├── ch10.sql ├── ch11.sql ├── ch12.sql ├── ch13.sql ├── ch14.sql ├── ch15.sql ├── ch16.sql ├── ch17.sql ├── ch18.sql ├── ch19.sql ├── ch20.sql ├── ch21.sql ├── ch22.sql ├── ch_horizontal_run.sh └── tpch_ch_schema.sql /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. 374 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TPCH benchmark adapted to Clickhouse SQL syntax 2 | 3 | ## Description 4 | 5 | In this repository there is a translation of the TPCH benchmark suite to Clickhouse SQL syntax, this benchmark suite is one of the industry standards in measuring performance of analytical queries. Clickhouse is an OLAP data management system that follows a column-based approach to store information, like many other popular systems such as MonetDB or Apache Parquet. 6 | 7 | Clickhouse does not strictly follow the standard SQL syntax, and even some of the simplest features from traditional relational databases like transactions or Foreign Keys are not supported. One of the main issues that had to be addressed to make TPCH queries work in Clickhouse was nested queries, since the selected attributes in an outer query are not visible in the inner ones. This prevents many queries in the benchmarking suite from executing that had WHERE conditions that references attributes from outer queries. 8 | 9 | ## Cheatsheet 10 | 11 | To start a new server you can run the command `clickhouse server &`, it will create some files and subdirectories to manage the created database and store data so make sure you do it somewhere with enough space and where it does not matter to have additional files. 12 | 13 | Depending on the installation method, you might have separate biaries for the server and client `clickhouse-server`/`clickhouse-client`. Running any SQL code in the CLI is as easy as executing `clickhouse client -q "CREATE DATABASE IF NOT EXISTS sf3"`, but you can also execute a SQL script in the following manner: `clickhouse client -n --queries-file tpch_ch_schema.sql -d sf3` 14 | 15 | The previous commands actually create the database sf3 and loads the schema for the TPCH table layout. We still need to generate the data (this must be done with the scripts provided by the official TPCH organization, that can be found [here](https://tpc.org/TPC_Documents_Current_Versions/download_programs/tools-download-request5.asp?bm_type=TPC-H&bm_vers=3.0.0&mode=CURRENT-ONLY)) 16 | 17 | Once you have generated the data for the scale factor you are going to execute, you can run the following script `ch_horizontal_run.sh` so as to execute the different queries. By default, each query is run 3 times to account for cold start effects. The mean, min and max execution times are printed an stored into a `timings.csv` file. 18 | 19 | ## Issues 20 | 21 | By default, the client only waits for 300s for a query response from the server. Since queries 7 and 19 did not actually end for a Scale Factor of 3. You might need increase timeout creating the following file via `vim ~/.clickhouse-client/config.xml` and adding: 22 | 23 | ``` 24 | 25 | 7200 26 | 7200 27 | 28 | ``` 29 | 30 | 31 | -------------------------------------------------------------------------------- /ch01.sql: -------------------------------------------------------------------------------- 1 | select 2 | l_returnflag, 3 | l_linestatus, 4 | sum(l_quantity) as sum_qty, 5 | sum(l_extendedprice) as sum_base_price, 6 | sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, 7 | sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, 8 | avg(l_quantity) as avg_qty, 9 | avg(l_extendedprice) as avg_price, 10 | avg(l_discount) as avg_disc, 11 | count(*) as count_order 12 | from 13 | lineitem 14 | where 15 | l_shipdate <= date '1998-12-01' - interval '90' day 16 | group by 17 | l_returnflag, 18 | l_linestatus 19 | order by 20 | l_returnflag, 21 | l_linestatus; 22 | -------------------------------------------------------------------------------- /ch02.sql: -------------------------------------------------------------------------------- 1 | with cheapest_part as 2 | ( 3 | select 4 | min(ps_supplycost) as cp_lowest, 5 | p_partkey as cp_partkey 6 | from part, 7 | partsupp, 8 | supplier, 9 | nation, 10 | region 11 | where p_partkey = ps_partkey 12 | and s_suppkey = ps_suppkey 13 | and s_nationkey = n_nationkey 14 | and n_regionkey = r_regionkey 15 | and r_name = 'EUROPE' 16 | group by p_partkey 17 | ) 18 | select 19 | s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, 20 | s_phone, s_comment 21 | from part, 22 | supplier, 23 | partsupp, 24 | nation, 25 | region, 26 | cheapest_part 27 | where p_partkey = ps_partkey 28 | and s_suppkey = ps_suppkey 29 | and p_size = 15 30 | and p_type like '%BRASS' 31 | and s_nationkey = n_nationkey 32 | and n_regionkey = r_regionkey 33 | and r_name = 'EUROPE' 34 | and ps_supplycost = cp_lowest 35 | and cp_partkey = p_partkey 36 | order by s_acctbal desc, 37 | n_name, 38 | s_name, 39 | p_partkey 40 | limit 10; 41 | -------------------------------------------------------------------------------- /ch03.sql: -------------------------------------------------------------------------------- 1 | select 2 | l_orderkey, 3 | sum(l_extendedprice * (1 - l_discount)) as revenue, 4 | o_orderdate, 5 | o_shippriority 6 | from customer, 7 | orders, 8 | lineitem 9 | where c_mktsegment = 'BUILDING' 10 | and c_custkey = o_custkey 11 | and l_orderkey = o_orderkey 12 | and o_orderdate < date '1995-03-15' 13 | and l_shipdate > date '1995-03-15' 14 | group by 15 | l_orderkey, 16 | o_orderdate, 17 | o_shippriority 18 | order by 19 | revenue desc, 20 | o_orderdate 21 | limit 10; 22 | -------------------------------------------------------------------------------- /ch04.sql: -------------------------------------------------------------------------------- 1 | select 2 | o_orderpriority, 3 | count(*) as order_count 4 | from 5 | orders 6 | where 7 | o_orderdate >= date '1993-07-01' 8 | and o_orderdate < date '1993-07-01' + interval '3' month 9 | and o_orderkey in ( 10 | select 11 | l_orderkey 12 | from 13 | lineitem 14 | where 15 | l_commitdate < l_receiptdate 16 | ) 17 | group by 18 | o_orderpriority 19 | order by 20 | o_orderpriority; 21 | -------------------------------------------------------------------------------- /ch05.sql: -------------------------------------------------------------------------------- 1 | select 2 | n_name, 3 | sum(l_extendedprice * (1 - l_discount)) as revenue 4 | from 5 | customer, 6 | orders, 7 | lineitem, 8 | supplier, 9 | nation, 10 | region 11 | where 12 | c_custkey = o_custkey 13 | and l_orderkey = o_orderkey 14 | and l_suppkey = s_suppkey 15 | and c_nationkey = s_nationkey 16 | and s_nationkey = n_nationkey 17 | and n_regionkey = r_regionkey 18 | and r_name = 'ASIA' 19 | and o_orderdate >= date '1994-01-01' 20 | and o_orderdate < date '1994-01-01' + interval '1' year 21 | group by 22 | n_name 23 | order by 24 | revenue desc; 25 | 26 | -------------------------------------------------------------------------------- /ch06.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(l_extendedprice * l_discount) as revenue 3 | from 4 | lineitem 5 | where 6 | l_shipdate >= date '1994-01-01' 7 | and l_shipdate < date '1994-01-01' + interval '1' year 8 | and l_discount between toDecimal64(0.05,2) and toDecimal64(0.07,2) 9 | and l_quantity < 24; 10 | -------------------------------------------------------------------------------- /ch07.sql: -------------------------------------------------------------------------------- 1 | select 2 | supp_nation, 3 | cust_nation, 4 | l_year, 5 | sum(volume) as revenue 6 | from ( select 7 | n1.n_name as supp_nation, 8 | n2.n_name as cust_nation, 9 | extract(year from l_shipdate) as l_year, 10 | l_extendedprice * (1 - l_discount) as volume 11 | from supplier, 12 | lineitem, 13 | orders, 14 | customer, 15 | nation n1, 16 | nation n2 17 | where s_suppkey = l_suppkey 18 | and o_orderkey = l_orderkey 19 | and c_custkey = o_custkey 20 | and s_nationkey = n1.n_nationkey 21 | and c_nationkey = n2.n_nationkey 22 | and ( (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY') 23 | or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')) 24 | and l_shipdate between date '1995-01-01' and date '1996-12-31' 25 | ) as shipping 26 | group by supp_nation, 27 | cust_nation, 28 | l_year 29 | order by supp_nation, 30 | cust_nation, 31 | l_year; 32 | -------------------------------------------------------------------------------- /ch08.sql: -------------------------------------------------------------------------------- 1 | select 2 | o_year, 3 | sum(case 4 | when nation = 'BRAZIL' then volume 5 | else 0 6 | end) / sum(volume) as mkt_share 7 | from ( select extract(year from o_orderdate) as o_year, 8 | l_extendedprice * (1 - l_discount) as volume, 9 | n2.n_name as nation 10 | from part, 11 | supplier, 12 | lineitem, 13 | orders, 14 | customer, 15 | nation n1, 16 | nation n2, 17 | region 18 | where p_partkey = l_partkey 19 | and s_suppkey = l_suppkey 20 | and l_orderkey = o_orderkey 21 | and o_custkey = c_custkey 22 | and c_nationkey = n1.n_nationkey 23 | and n1.n_regionkey = r_regionkey 24 | and r_name = 'AMERICA' 25 | and s_nationkey = n2.n_nationkey 26 | and o_orderdate between date '1995-01-01' and date '1996-12-31' 27 | and p_type = 'ECONOMY ANODIZED STEEL' 28 | ) as all_nations 29 | group by o_year 30 | order by o_year; 31 | -------------------------------------------------------------------------------- /ch09.sql: -------------------------------------------------------------------------------- 1 | select 2 | nation, 3 | o_year, 4 | sum(amount) as sum_profit 5 | from ( select n_name as nation, 6 | extract(year from o_orderdate) as o_year, 7 | l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount 8 | from part, 9 | supplier, 10 | lineitem, 11 | partsupp, 12 | orders, 13 | nation 14 | where s_suppkey = l_suppkey 15 | and ps_suppkey = l_suppkey 16 | and ps_partkey = l_partkey 17 | and p_partkey = l_partkey 18 | and o_orderkey = l_orderkey 19 | and s_nationkey = n_nationkey 20 | and p_name like '%green%' 21 | ) as profit 22 | group by nation, o_year 23 | order by nation, o_year desc; 24 | -------------------------------------------------------------------------------- /ch10.sql: -------------------------------------------------------------------------------- 1 | select c_custkey, 2 | c_name, 3 | sum(l_extendedprice * (1 - l_discount)) as revenue, 4 | c_acctbal, 5 | n_name, 6 | c_address, 7 | c_phone, 8 | c_comment 9 | from customer, 10 | orders, 11 | lineitem, 12 | nation 13 | where c_custkey = o_custkey 14 | and l_orderkey = o_orderkey 15 | and o_orderdate >= date '1993-10-01' 16 | and o_orderdate < date '1993-10-01' + interval '3' month 17 | and l_returnflag = 'R' 18 | and c_nationkey = n_nationkey 19 | group by c_custkey, c_name, 20 | c_acctbal, c_phone, n_name, 21 | c_address, c_comment 22 | order by revenue desc 23 | limit 20; 24 | -------------------------------------------------------------------------------- /ch11.sql: -------------------------------------------------------------------------------- 1 | select 2 | ps_partkey, 3 | sum(ps_supplycost * ps_availqty) as value 4 | from partsupp, 5 | supplier, 6 | nation 7 | where ps_suppkey = s_suppkey 8 | and s_nationkey = n_nationkey 9 | and n_name = 'GERMANY' 10 | group by ps_partkey 11 | having sum(ps_supplycost * ps_availqty) > 12 | ( select toDecimal64(sum(ps_supplycost * ps_availqty) * 0.0000001,10) 13 | -- The above constant needs to be adjusted according 14 | -- to the scale factor (SF): constant = 0.0001 / SF. 15 | from partsupp, 16 | supplier, 17 | nation 18 | where ps_suppkey = s_suppkey 19 | and s_nationkey = n_nationkey 20 | and n_name = 'GERMANY' 21 | ) 22 | order by value desc; 23 | -------------------------------------------------------------------------------- /ch12.sql: -------------------------------------------------------------------------------- 1 | select l_shipmode, 2 | sum(case 3 | when o_orderpriority = '1-URGENT' 4 | or o_orderpriority = '2-HIGH' 5 | then 1 6 | else 0 7 | end) as high_line_count, 8 | sum(case 9 | when o_orderpriority <> '1-URGENT' 10 | and o_orderpriority <> '2-HIGH' 11 | then 1 12 | else 0 13 | end) as low_line_count 14 | from orders, lineitem 15 | where o_orderkey = l_orderkey 16 | and l_shipmode in ('MAIL', 'SHIP') 17 | and l_commitdate < l_receiptdate 18 | and l_shipdate < l_commitdate 19 | and l_receiptdate >= date '1994-01-01' 20 | and l_receiptdate < date '1994-01-01' + interval '1' year 21 | group by l_shipmode 22 | order by l_shipmode; 23 | -------------------------------------------------------------------------------- /ch13.sql: -------------------------------------------------------------------------------- 1 | with filt_orders as ( 2 | select * from orders 3 | where o_comment not like '%special%requests%' 4 | ) 5 | select c_count, 6 | count(*) as custdist 7 | from ( 8 | select c_custkey, 9 | count(o_orderkey) as c_count 10 | from customer left outer join filt_orders on 11 | c_custkey = o_custkey 12 | group by c_custkey 13 | ) as c_orders 14 | group by c_count 15 | order by custdist desc, c_count desc; 16 | -------------------------------------------------------------------------------- /ch14.sql: -------------------------------------------------------------------------------- 1 | select toDecimal64(100.00,2) * sum(case 2 | when p_type like 'PROMO%' 3 | then l_extendedprice * (1 - l_discount) 4 | else 0 5 | end) / 6 | sum(l_extendedprice * (1 - l_discount)) as promo_revenue 7 | from lineitem, part 8 | where l_partkey = p_partkey 9 | and l_shipdate >= date '1995-09-01' 10 | and l_shipdate < date '1995-09-01' + interval '1' month; 11 | -------------------------------------------------------------------------------- /ch15.sql: -------------------------------------------------------------------------------- 1 | with revenue0 as 2 | ( select l_suppkey as supplier_no, 3 | sum(l_extendedprice * (1 - l_discount)) as total_revenue 4 | from lineitem 5 | where l_shipdate >= date '1996-01-01' 6 | and l_shipdate < date '1996-01-01' + interval '3' month 7 | group by l_suppkey 8 | ) 9 | select s_suppkey, 10 | s_name, 11 | s_address, 12 | s_phone, 13 | total_revenue 14 | from supplier, revenue0 15 | where s_suppkey = supplier_no 16 | and total_revenue = ( 17 | select 18 | max(total_revenue) 19 | from 20 | revenue0 21 | ) 22 | order by s_suppkey; 23 | -------------------------------------------------------------------------------- /ch16.sql: -------------------------------------------------------------------------------- 1 | select p_brand, 2 | p_type, 3 | p_size, 4 | count(distinct ps_suppkey) as supplier_cnt 5 | from partsupp, part 6 | where p_partkey = ps_partkey 7 | and p_brand <> 'Brand#45' 8 | and p_type not like 'MEDIUM POLISHED%' 9 | and p_size in (49, 14, 23, 45, 19, 3, 36, 9) 10 | and ps_suppkey not in ( 11 | select s_suppkey 12 | from supplier 13 | where s_comment like '%Customer%Complaints%' 14 | ) 15 | group by p_brand, p_type, p_size 16 | order by supplier_cnt desc, 17 | p_brand, p_type, p_size; 18 | 19 | -------------------------------------------------------------------------------- /ch17.sql: -------------------------------------------------------------------------------- 1 | with part_avg as ( 2 | select toDecimal64(0.2 * avg(l_quantity),12) as limit_qty, l_partkey as lpk 3 | from lineitem 4 | group by l_partkey 5 | ) 6 | select sum(l_extendedprice) / toDecimal64(7.0,2) as avg_yearly 7 | from lineitem, part, part_avg 8 | where p_partkey = l_partkey 9 | and p_brand = 'Brand#23' 10 | and p_container = 'MED BOX' 11 | and p_partkey = lpk 12 | and l_quantity < limit_qty; 13 | -------------------------------------------------------------------------------- /ch18.sql: -------------------------------------------------------------------------------- 1 | select c_name, c_custkey, o_orderkey, 2 | o_orderdate, o_totalprice, sum(l_quantity) 3 | from customer, orders, lineitem 4 | where o_orderkey in ( 5 | select l_orderkey 6 | from lineitem 7 | group by l_orderkey having 8 | sum(l_quantity) > 300 9 | ) 10 | and c_custkey = o_custkey 11 | and o_orderkey = l_orderkey 12 | group by c_name, c_custkey, 13 | o_orderkey, o_orderdate, o_totalprice 14 | order by o_totalprice desc, o_orderdate 15 | limit 100; 16 | 17 | -------------------------------------------------------------------------------- /ch19.sql: -------------------------------------------------------------------------------- 1 | select sum(l_extendedprice* (1 - l_discount)) as revenue 2 | from lineitem, part 3 | where ( p_partkey = l_partkey 4 | and p_brand = 'Brand#12' 5 | and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') 6 | and l_quantity >= 1 and l_quantity <= 1 + 10 7 | and p_size between 1 and 5 8 | and l_shipmode in ('AIR', 'AIR REG') 9 | and l_shipinstruct = 'DELIVER IN PERSON' 10 | ) or ( p_partkey = l_partkey 11 | and p_brand = 'Brand#23' 12 | and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') 13 | and l_quantity >= 10 and l_quantity <= 10 + 10 14 | and p_size between 1 and 10 15 | and l_shipmode in ('AIR', 'AIR REG') 16 | and l_shipinstruct = 'DELIVER IN PERSON' 17 | ) or ( p_partkey = l_partkey 18 | and p_brand = 'Brand#34' 19 | and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') 20 | and l_quantity >= 20 and l_quantity <= 20 + 10 21 | and p_size between 1 and 15 22 | and l_shipmode in ('AIR', 'AIR REG') 23 | and l_shipinstruct = 'DELIVER IN PERSON' 24 | ); 25 | -------------------------------------------------------------------------------- /ch20.sql: -------------------------------------------------------------------------------- 1 | with availability_part_supp as( 2 | select 0.5 * sum(l_quantity) as ps_halfqty, l_partkey as pkey, l_suppkey as skey 3 | from lineitem 4 | where l_shipdate >= date '1994-01-01' 5 | and l_shipdate < date '1994-01-01' + interval '1' year 6 | group by pkey, skey 7 | ) 8 | select s_name, s_address 9 | from supplier, nation 10 | where s_suppkey in ( 11 | select ps_suppkey 12 | from partsupp, availability_part_supp 13 | where ps_partkey in ( 14 | select p_partkey 15 | from part 16 | where p_name like 'forest%' 17 | ) 18 | and ps_partkey = pkey 19 | and ps_suppkey = skey 20 | and ps_availqty > ps_halfqty 21 | ) 22 | and s_nationkey = n_nationkey 23 | and n_name = 'CANADA' 24 | order by s_name; 25 | -------------------------------------------------------------------------------- /ch21.sql: -------------------------------------------------------------------------------- 1 | select s_name, count(*) as numwait 2 | from supplier, lineitem l1, orders, nation 3 | where s_suppkey = l1.l_suppkey 4 | and o_orderkey = l1.l_orderkey 5 | and o_orderstatus = 'F' 6 | and l1.l_receiptdate > l1.l_commitdate 7 | and l1.l_orderkey in ( 8 | select l_orderkey 9 | from lineitem 10 | group by l_orderkey 11 | having count(l_suppkey) > 1 12 | ) 13 | and l1.l_orderkey not in ( 14 | select l_orderkey 15 | from lineitem 16 | where l_receiptdate > l_commitdate 17 | group by l_orderkey 18 | having count(l_suppkey) > 1 19 | ) 20 | and s_nationkey = n_nationkey 21 | and n_name = 'SAUDI ARABIA' 22 | group by s_name 23 | order by numwait desc, s_name 24 | limit 100; 25 | -------------------------------------------------------------------------------- /ch22.sql: -------------------------------------------------------------------------------- 1 | select cntrycode, 2 | count(*) as numcust, 3 | sum(c_acctbal) as totacctbal 4 | from ( select 5 | substring(c_phone from 1 for 2) as cntrycode, 6 | c_acctbal 7 | from customer 8 | where substring(c_phone from 1 for 2) in 9 | ('13', '31', '23', '29', '30', '18', '17') 10 | and c_acctbal > toDecimal64(( 11 | select avg(c_acctbal) 12 | from customer 13 | where c_acctbal > 0 14 | and substring(c_phone from 1 for 2) in 15 | ('13', '31', '23', '29', '30', '18', '17') 16 | ),2) 17 | and c_custkey not in ( 18 | select o_custkey 19 | from orders 20 | ) 21 | ) as custsale 22 | group by cntrycode 23 | order by cntrycode; 24 | -------------------------------------------------------------------------------- /ch_horizontal_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This Source Code Form is subject to the terms of the Mozilla Public 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this 5 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 6 | # 7 | # Copyright 2017-2022 MonetDB Solutions B.V. 8 | 9 | usage() { 10 | echo "Usage: $0 --db [--number ] [--db ] [--output ]" 11 | echo "Run the TPC-H queries a number of times and report timings." 12 | echo "" 13 | echo "Options:" 14 | echo " -d, --db The database" 15 | echo " -n, --number How many times to run the queries. Default=1" 16 | echo " -o, --output Where to append the output. Default=timings.csv" 17 | echo " -v, --verbose More output" 18 | echo " -h, --help This message" 19 | } 20 | 21 | dbname="SF-0_01" 22 | nruns=1 23 | 24 | 25 | while [ "$#" -gt 0 ] 26 | do 27 | case "$1" in 28 | -d|--db) 29 | dbname=$2 30 | shift 31 | shift 32 | ;; 33 | -n|--number) 34 | nruns=$2 35 | shift 36 | shift 37 | ;; 38 | -o|--output) 39 | nruns=$2 40 | shift 41 | shift 42 | ;; 43 | -v|--verbose) 44 | set -x 45 | set -v 46 | shift 47 | ;; 48 | -h|--help) 49 | usage 50 | exit 0 51 | ;; 52 | *) 53 | echo "$0: unknown argument $1" 54 | usage 55 | exit 1 56 | ;; 57 | esac 58 | done 59 | 60 | if [ -z "$dbname" ]; then 61 | usage 62 | exit 1 63 | fi 64 | 65 | output="$dbname.timings.csv" 66 | 67 | echo "# Database,Query,Min,Max,Average,Error" | tee -a "$output" 68 | 69 | 70 | for q in *.sql 71 | do 72 | 73 | max=0 74 | min=9999999 75 | sum=0 76 | 77 | for j in $(seq 1 $nruns) 78 | do 79 | s=$(date +%s.%N) 80 | timeout 3600s clickhouse client -n --queries-file $q -d $dbname 81 | err=$? 82 | 83 | x=$(date +%s.%N) 84 | elapsed=$(echo "scale=4; $x - $s" | bc) 85 | 86 | # calculate max, min, avg 87 | # using bc cmps to have floating point precission 88 | if [ $(echo "$elapsed > $max" | bc) -eq 1 ] 89 | then 90 | max=$elapsed 91 | fi 92 | 93 | if [ $(echo "$elapsed < $min" | bc) -eq 1 ] 94 | then 95 | min=$elapsed 96 | fi 97 | 98 | sum=$(echo "$elapsed + $sum" | bc) 99 | 100 | done 101 | 102 | avg=$(echo "scale=4; $sum/$nruns" | bc) 103 | 104 | echo "$dbname,"$(basename $q .sql)",$min,$max,$avg,$err" | tee -a "$output" 105 | 106 | done 107 | 108 | 109 | -------------------------------------------------------------------------------- /tpch_ch_schema.sql: -------------------------------------------------------------------------------- 1 | -- !/usr/bin/env bash 2 | 3 | -- this source code form is subject to the terms of the mozilla public 4 | -- license, v. 2.0. if a copy of the mpl was not distributed with this 5 | -- file, you can obtain one at http://mozilla.org/mpl/2.0/. 6 | 7 | -- copyright 2017-2018 monetdb solutions b.v. 8 | 9 | -- sccsid: @(#)dss.ddl 2.1.8.1 10 | create table nation ( n_nationkey integer not null, 11 | n_name char(25) not null, 12 | n_regionkey integer not null, 13 | n_comment varchar(152)) engine MergeTree() order by n_nationkey; 14 | 15 | create table region ( r_regionkey integer not null, 16 | r_name char(25) not null, 17 | r_comment varchar(152)) engine MergeTree() order by r_regionkey; 18 | 19 | create table part ( p_partkey integer not null, 20 | p_name varchar(55) not null, 21 | p_mfgr char(25) not null, 22 | p_brand char(10) not null, 23 | p_type varchar(25) not null, 24 | p_size integer not null, 25 | p_container char(10) not null, 26 | p_retailprice decimal(15,2) not null, 27 | p_comment varchar(23) not null ) engine MergeTree() order by p_partkey; 28 | 29 | create table supplier ( s_suppkey integer not null, 30 | s_name char(25) not null, 31 | s_address varchar(40) not null, 32 | s_nationkey integer not null, 33 | s_phone char(15) not null, 34 | s_acctbal decimal(15,2) not null, 35 | s_comment varchar(101) not null) engine MergeTree() order by s_suppkey; 36 | 37 | create table partsupp ( ps_partkey integer not null, 38 | ps_suppkey integer not null, 39 | ps_availqty integer not null, 40 | ps_supplycost decimal(15,2) not null, 41 | ps_comment varchar(199) not null ) engine MergeTree() order by ps_partkey; 42 | 43 | create table customer ( c_custkey integer not null, 44 | c_name varchar(25) not null, 45 | c_address varchar(40) not null, 46 | c_nationkey integer not null, 47 | c_phone char(15) not null, 48 | c_acctbal decimal(15,2) not null, 49 | c_mktsegment char(10) not null, 50 | c_comment varchar(117) not null) engine MergeTree() order by c_custkey; 51 | 52 | create table orders ( o_orderkey bigint not null, 53 | o_custkey integer not null, 54 | o_orderstatus char(1) not null, 55 | o_totalprice decimal(15,2) not null, 56 | o_orderdate date not null, 57 | o_orderpriority char(15) not null, 58 | o_clerk char(15) not null, 59 | o_shippriority integer not null, 60 | o_comment varchar(79) not null) engine MergeTree() order by o_orderkey; 61 | 62 | create table lineitem ( l_orderkey bigint not null, 63 | l_partkey integer not null, 64 | l_suppkey integer not null, 65 | l_linenumber integer not null, 66 | l_quantity decimal(15,2) not null, 67 | l_extendedprice decimal(15,2) not null, 68 | l_discount decimal(15,2) not null, 69 | l_tax decimal(15,2) not null, 70 | l_returnflag char(1) not null, 71 | l_linestatus char(1) not null, 72 | l_shipdate date not null, 73 | l_commitdate date not null, 74 | l_receiptdate date not null, 75 | l_shipinstruct char(25) not null, 76 | l_shipmode char(10) not null, 77 | l_comment varchar(44) not null) engine MergeTree() order by l_orderkey; 78 | --------------------------------------------------------------------------------