├── LICENSE
├── README.md
├── ch01.sql
├── ch02.sql
├── ch03.sql
├── ch04.sql
├── ch05.sql
├── ch06.sql
├── ch07.sql
├── ch08.sql
├── ch09.sql
├── ch10.sql
├── ch11.sql
├── ch12.sql
├── ch13.sql
├── ch14.sql
├── ch15.sql
├── ch16.sql
├── ch17.sql
├── ch18.sql
├── ch19.sql
├── ch20.sql
├── ch21.sql
├── ch22.sql
├── ch_horizontal_run.sh
└── tpch_ch_schema.sql
/LICENSE:
--------------------------------------------------------------------------------
1 | Mozilla Public License Version 2.0
2 | ==================================
3 |
4 | 1. Definitions
5 | --------------
6 |
7 | 1.1. "Contributor"
8 | means each individual or legal entity that creates, contributes to
9 | the creation of, or owns Covered Software.
10 |
11 | 1.2. "Contributor Version"
12 | means the combination of the Contributions of others (if any) used
13 | by a Contributor and that particular Contributor's Contribution.
14 |
15 | 1.3. "Contribution"
16 | means Covered Software of a particular Contributor.
17 |
18 | 1.4. "Covered Software"
19 | means Source Code Form to which the initial Contributor has attached
20 | the notice in Exhibit A, the Executable Form of such Source Code
21 | Form, and Modifications of such Source Code Form, in each case
22 | including portions thereof.
23 |
24 | 1.5. "Incompatible With Secondary Licenses"
25 | means
26 |
27 | (a) that the initial Contributor has attached the notice described
28 | in Exhibit B to the Covered Software; or
29 |
30 | (b) that the Covered Software was made available under the terms of
31 | version 1.1 or earlier of the License, but not also under the
32 | terms of a Secondary License.
33 |
34 | 1.6. "Executable Form"
35 | means any form of the work other than Source Code Form.
36 |
37 | 1.7. "Larger Work"
38 | means a work that combines Covered Software with other material, in
39 | a separate file or files, that is not Covered Software.
40 |
41 | 1.8. "License"
42 | means this document.
43 |
44 | 1.9. "Licensable"
45 | means having the right to grant, to the maximum extent possible,
46 | whether at the time of the initial grant or subsequently, any and
47 | all of the rights conveyed by this License.
48 |
49 | 1.10. "Modifications"
50 | means any of the following:
51 |
52 | (a) any file in Source Code Form that results from an addition to,
53 | deletion from, or modification of the contents of Covered
54 | Software; or
55 |
56 | (b) any new file in Source Code Form that contains any Covered
57 | Software.
58 |
59 | 1.11. "Patent Claims" of a Contributor
60 | means any patent claim(s), including without limitation, method,
61 | process, and apparatus claims, in any patent Licensable by such
62 | Contributor that would be infringed, but for the grant of the
63 | License, by the making, using, selling, offering for sale, having
64 | made, import, or transfer of either its Contributions or its
65 | Contributor Version.
66 |
67 | 1.12. "Secondary License"
68 | means either the GNU General Public License, Version 2.0, the GNU
69 | Lesser General Public License, Version 2.1, the GNU Affero General
70 | Public License, Version 3.0, or any later versions of those
71 | licenses.
72 |
73 | 1.13. "Source Code Form"
74 | means the form of the work preferred for making modifications.
75 |
76 | 1.14. "You" (or "Your")
77 | means an individual or a legal entity exercising rights under this
78 | License. For legal entities, "You" includes any entity that
79 | controls, is controlled by, or is under common control with You. For
80 | purposes of this definition, "control" means (a) the power, direct
81 | or indirect, to cause the direction or management of such entity,
82 | whether by contract or otherwise, or (b) ownership of more than
83 | fifty percent (50%) of the outstanding shares or beneficial
84 | ownership of such entity.
85 |
86 | 2. License Grants and Conditions
87 | --------------------------------
88 |
89 | 2.1. Grants
90 |
91 | Each Contributor hereby grants You a world-wide, royalty-free,
92 | non-exclusive license:
93 |
94 | (a) under intellectual property rights (other than patent or trademark)
95 | Licensable by such Contributor to use, reproduce, make available,
96 | modify, display, perform, distribute, and otherwise exploit its
97 | Contributions, either on an unmodified basis, with Modifications, or
98 | as part of a Larger Work; and
99 |
100 | (b) under Patent Claims of such Contributor to make, use, sell, offer
101 | for sale, have made, import, and otherwise transfer either its
102 | Contributions or its Contributor Version.
103 |
104 | 2.2. Effective Date
105 |
106 | The licenses granted in Section 2.1 with respect to any Contribution
107 | become effective for each Contribution on the date the Contributor first
108 | distributes such Contribution.
109 |
110 | 2.3. Limitations on Grant Scope
111 |
112 | The licenses granted in this Section 2 are the only rights granted under
113 | this License. No additional rights or licenses will be implied from the
114 | distribution or licensing of Covered Software under this License.
115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a
116 | Contributor:
117 |
118 | (a) for any code that a Contributor has removed from Covered Software;
119 | or
120 |
121 | (b) for infringements caused by: (i) Your and any other third party's
122 | modifications of Covered Software, or (ii) the combination of its
123 | Contributions with other software (except as part of its Contributor
124 | Version); or
125 |
126 | (c) under Patent Claims infringed by Covered Software in the absence of
127 | its Contributions.
128 |
129 | This License does not grant any rights in the trademarks, service marks,
130 | or logos of any Contributor (except as may be necessary to comply with
131 | the notice requirements in Section 3.4).
132 |
133 | 2.4. Subsequent Licenses
134 |
135 | No Contributor makes additional grants as a result of Your choice to
136 | distribute the Covered Software under a subsequent version of this
137 | License (see Section 10.2) or under the terms of a Secondary License (if
138 | permitted under the terms of Section 3.3).
139 |
140 | 2.5. Representation
141 |
142 | Each Contributor represents that the Contributor believes its
143 | Contributions are its original creation(s) or it has sufficient rights
144 | to grant the rights to its Contributions conveyed by this License.
145 |
146 | 2.6. Fair Use
147 |
148 | This License is not intended to limit any rights You have under
149 | applicable copyright doctrines of fair use, fair dealing, or other
150 | equivalents.
151 |
152 | 2.7. Conditions
153 |
154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
155 | in Section 2.1.
156 |
157 | 3. Responsibilities
158 | -------------------
159 |
160 | 3.1. Distribution of Source Form
161 |
162 | All distribution of Covered Software in Source Code Form, including any
163 | Modifications that You create or to which You contribute, must be under
164 | the terms of this License. You must inform recipients that the Source
165 | Code Form of the Covered Software is governed by the terms of this
166 | License, and how they can obtain a copy of this License. You may not
167 | attempt to alter or restrict the recipients' rights in the Source Code
168 | Form.
169 |
170 | 3.2. Distribution of Executable Form
171 |
172 | If You distribute Covered Software in Executable Form then:
173 |
174 | (a) such Covered Software must also be made available in Source Code
175 | Form, as described in Section 3.1, and You must inform recipients of
176 | the Executable Form how they can obtain a copy of such Source Code
177 | Form by reasonable means in a timely manner, at a charge no more
178 | than the cost of distribution to the recipient; and
179 |
180 | (b) You may distribute such Executable Form under the terms of this
181 | License, or sublicense it under different terms, provided that the
182 | license for the Executable Form does not attempt to limit or alter
183 | the recipients' rights in the Source Code Form under this License.
184 |
185 | 3.3. Distribution of a Larger Work
186 |
187 | You may create and distribute a Larger Work under terms of Your choice,
188 | provided that You also comply with the requirements of this License for
189 | the Covered Software. If the Larger Work is a combination of Covered
190 | Software with a work governed by one or more Secondary Licenses, and the
191 | Covered Software is not Incompatible With Secondary Licenses, this
192 | License permits You to additionally distribute such Covered Software
193 | under the terms of such Secondary License(s), so that the recipient of
194 | the Larger Work may, at their option, further distribute the Covered
195 | Software under the terms of either this License or such Secondary
196 | License(s).
197 |
198 | 3.4. Notices
199 |
200 | You may not remove or alter the substance of any license notices
201 | (including copyright notices, patent notices, disclaimers of warranty,
202 | or limitations of liability) contained within the Source Code Form of
203 | the Covered Software, except that You may alter any license notices to
204 | the extent required to remedy known factual inaccuracies.
205 |
206 | 3.5. Application of Additional Terms
207 |
208 | You may choose to offer, and to charge a fee for, warranty, support,
209 | indemnity or liability obligations to one or more recipients of Covered
210 | Software. However, You may do so only on Your own behalf, and not on
211 | behalf of any Contributor. You must make it absolutely clear that any
212 | such warranty, support, indemnity, or liability obligation is offered by
213 | You alone, and You hereby agree to indemnify every Contributor for any
214 | liability incurred by such Contributor as a result of warranty, support,
215 | indemnity or liability terms You offer. You may include additional
216 | disclaimers of warranty and limitations of liability specific to any
217 | jurisdiction.
218 |
219 | 4. Inability to Comply Due to Statute or Regulation
220 | ---------------------------------------------------
221 |
222 | If it is impossible for You to comply with any of the terms of this
223 | License with respect to some or all of the Covered Software due to
224 | statute, judicial order, or regulation then You must: (a) comply with
225 | the terms of this License to the maximum extent possible; and (b)
226 | describe the limitations and the code they affect. Such description must
227 | be placed in a text file included with all distributions of the Covered
228 | Software under this License. Except to the extent prohibited by statute
229 | or regulation, such description must be sufficiently detailed for a
230 | recipient of ordinary skill to be able to understand it.
231 |
232 | 5. Termination
233 | --------------
234 |
235 | 5.1. The rights granted under this License will terminate automatically
236 | if You fail to comply with any of its terms. However, if You become
237 | compliant, then the rights granted under this License from a particular
238 | Contributor are reinstated (a) provisionally, unless and until such
239 | Contributor explicitly and finally terminates Your grants, and (b) on an
240 | ongoing basis, if such Contributor fails to notify You of the
241 | non-compliance by some reasonable means prior to 60 days after You have
242 | come back into compliance. Moreover, Your grants from a particular
243 | Contributor are reinstated on an ongoing basis if such Contributor
244 | notifies You of the non-compliance by some reasonable means, this is the
245 | first time You have received notice of non-compliance with this License
246 | from such Contributor, and You become compliant prior to 30 days after
247 | Your receipt of the notice.
248 |
249 | 5.2. If You initiate litigation against any entity by asserting a patent
250 | infringement claim (excluding declaratory judgment actions,
251 | counter-claims, and cross-claims) alleging that a Contributor Version
252 | directly or indirectly infringes any patent, then the rights granted to
253 | You by any and all Contributors for the Covered Software under Section
254 | 2.1 of this License shall terminate.
255 |
256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all
257 | end user license agreements (excluding distributors and resellers) which
258 | have been validly granted by You or Your distributors under this License
259 | prior to termination shall survive termination.
260 |
261 | ************************************************************************
262 | * *
263 | * 6. Disclaimer of Warranty *
264 | * ------------------------- *
265 | * *
266 | * Covered Software is provided under this License on an "as is" *
267 | * basis, without warranty of any kind, either expressed, implied, or *
268 | * statutory, including, without limitation, warranties that the *
269 | * Covered Software is free of defects, merchantable, fit for a *
270 | * particular purpose or non-infringing. The entire risk as to the *
271 | * quality and performance of the Covered Software is with You. *
272 | * Should any Covered Software prove defective in any respect, You *
273 | * (not any Contributor) assume the cost of any necessary servicing, *
274 | * repair, or correction. This disclaimer of warranty constitutes an *
275 | * essential part of this License. No use of any Covered Software is *
276 | * authorized under this License except under this disclaimer. *
277 | * *
278 | ************************************************************************
279 |
280 | ************************************************************************
281 | * *
282 | * 7. Limitation of Liability *
283 | * -------------------------- *
284 | * *
285 | * Under no circumstances and under no legal theory, whether tort *
286 | * (including negligence), contract, or otherwise, shall any *
287 | * Contributor, or anyone who distributes Covered Software as *
288 | * permitted above, be liable to You for any direct, indirect, *
289 | * special, incidental, or consequential damages of any character *
290 | * including, without limitation, damages for lost profits, loss of *
291 | * goodwill, work stoppage, computer failure or malfunction, or any *
292 | * and all other commercial damages or losses, even if such party *
293 | * shall have been informed of the possibility of such damages. This *
294 | * limitation of liability shall not apply to liability for death or *
295 | * personal injury resulting from such party's negligence to the *
296 | * extent applicable law prohibits such limitation. Some *
297 | * jurisdictions do not allow the exclusion or limitation of *
298 | * incidental or consequential damages, so this exclusion and *
299 | * limitation may not apply to You. *
300 | * *
301 | ************************************************************************
302 |
303 | 8. Litigation
304 | -------------
305 |
306 | Any litigation relating to this License may be brought only in the
307 | courts of a jurisdiction where the defendant maintains its principal
308 | place of business and such litigation shall be governed by laws of that
309 | jurisdiction, without reference to its conflict-of-law provisions.
310 | Nothing in this Section shall prevent a party's ability to bring
311 | cross-claims or counter-claims.
312 |
313 | 9. Miscellaneous
314 | ----------------
315 |
316 | This License represents the complete agreement concerning the subject
317 | matter hereof. If any provision of this License is held to be
318 | unenforceable, such provision shall be reformed only to the extent
319 | necessary to make it enforceable. Any law or regulation which provides
320 | that the language of a contract shall be construed against the drafter
321 | shall not be used to construe this License against a Contributor.
322 |
323 | 10. Versions of the License
324 | ---------------------------
325 |
326 | 10.1. New Versions
327 |
328 | Mozilla Foundation is the license steward. Except as provided in Section
329 | 10.3, no one other than the license steward has the right to modify or
330 | publish new versions of this License. Each version will be given a
331 | distinguishing version number.
332 |
333 | 10.2. Effect of New Versions
334 |
335 | You may distribute the Covered Software under the terms of the version
336 | of the License under which You originally received the Covered Software,
337 | or under the terms of any subsequent version published by the license
338 | steward.
339 |
340 | 10.3. Modified Versions
341 |
342 | If you create software not governed by this License, and you want to
343 | create a new license for such software, you may create and use a
344 | modified version of this License if you rename the license and remove
345 | any references to the name of the license steward (except to note that
346 | such modified license differs from this License).
347 |
348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary
349 | Licenses
350 |
351 | If You choose to distribute Source Code Form that is Incompatible With
352 | Secondary Licenses under the terms of this version of the License, the
353 | notice described in Exhibit B of this License must be attached.
354 |
355 | Exhibit A - Source Code Form License Notice
356 | -------------------------------------------
357 |
358 | This Source Code Form is subject to the terms of the Mozilla Public
359 | License, v. 2.0. If a copy of the MPL was not distributed with this
360 | file, You can obtain one at http://mozilla.org/MPL/2.0/.
361 |
362 | If it is not possible or desirable to put the notice in a particular
363 | file, then You may include the notice in a location (such as a LICENSE
364 | file in a relevant directory) where a recipient would be likely to look
365 | for such a notice.
366 |
367 | You may add additional accurate notices of copyright ownership.
368 |
369 | Exhibit B - "Incompatible With Secondary Licenses" Notice
370 | ---------------------------------------------------------
371 |
372 | This Source Code Form is "Incompatible With Secondary Licenses", as
373 | defined by the Mozilla Public License, v. 2.0.
374 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TPCH benchmark adapted to Clickhouse SQL syntax
2 |
3 | ## Description
4 |
5 | In this repository there is a translation of the TPCH benchmark suite to Clickhouse SQL syntax, this benchmark suite is one of the industry standards in measuring performance of analytical queries. Clickhouse is an OLAP data management system that follows a column-based approach to store information, like many other popular systems such as MonetDB or Apache Parquet.
6 |
7 | Clickhouse does not strictly follow the standard SQL syntax, and even some of the simplest features from traditional relational databases like transactions or Foreign Keys are not supported. One of the main issues that had to be addressed to make TPCH queries work in Clickhouse was nested queries, since the selected attributes in an outer query are not visible in the inner ones. This prevents many queries in the benchmarking suite from executing that had WHERE conditions that references attributes from outer queries.
8 |
9 | ## Cheatsheet
10 |
11 | To start a new server you can run the command `clickhouse server &`, it will create some files and subdirectories to manage the created database and store data so make sure you do it somewhere with enough space and where it does not matter to have additional files.
12 |
13 | Depending on the installation method, you might have separate biaries for the server and client `clickhouse-server`/`clickhouse-client`. Running any SQL code in the CLI is as easy as executing `clickhouse client -q "CREATE DATABASE IF NOT EXISTS sf3"`, but you can also execute a SQL script in the following manner: `clickhouse client -n --queries-file tpch_ch_schema.sql -d sf3`
14 |
15 | The previous commands actually create the database sf3 and loads the schema for the TPCH table layout. We still need to generate the data (this must be done with the scripts provided by the official TPCH organization, that can be found [here](https://tpc.org/TPC_Documents_Current_Versions/download_programs/tools-download-request5.asp?bm_type=TPC-H&bm_vers=3.0.0&mode=CURRENT-ONLY))
16 |
17 | Once you have generated the data for the scale factor you are going to execute, you can run the following script `ch_horizontal_run.sh` so as to execute the different queries. By default, each query is run 3 times to account for cold start effects. The mean, min and max execution times are printed an stored into a `timings.csv` file.
18 |
19 | ## Issues
20 |
21 | By default, the client only waits for 300s for a query response from the server. Since queries 7 and 19 did not actually end for a Scale Factor of 3. You might need increase timeout creating the following file via `vim ~/.clickhouse-client/config.xml` and adding:
22 |
23 | ```
24 |
25 | 7200
26 | 7200
27 |
28 | ```
29 |
30 |
31 |
--------------------------------------------------------------------------------
/ch01.sql:
--------------------------------------------------------------------------------
1 | select
2 | l_returnflag,
3 | l_linestatus,
4 | sum(l_quantity) as sum_qty,
5 | sum(l_extendedprice) as sum_base_price,
6 | sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
7 | sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
8 | avg(l_quantity) as avg_qty,
9 | avg(l_extendedprice) as avg_price,
10 | avg(l_discount) as avg_disc,
11 | count(*) as count_order
12 | from
13 | lineitem
14 | where
15 | l_shipdate <= date '1998-12-01' - interval '90' day
16 | group by
17 | l_returnflag,
18 | l_linestatus
19 | order by
20 | l_returnflag,
21 | l_linestatus;
22 |
--------------------------------------------------------------------------------
/ch02.sql:
--------------------------------------------------------------------------------
1 | with cheapest_part as
2 | (
3 | select
4 | min(ps_supplycost) as cp_lowest,
5 | p_partkey as cp_partkey
6 | from part,
7 | partsupp,
8 | supplier,
9 | nation,
10 | region
11 | where p_partkey = ps_partkey
12 | and s_suppkey = ps_suppkey
13 | and s_nationkey = n_nationkey
14 | and n_regionkey = r_regionkey
15 | and r_name = 'EUROPE'
16 | group by p_partkey
17 | )
18 | select
19 | s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address,
20 | s_phone, s_comment
21 | from part,
22 | supplier,
23 | partsupp,
24 | nation,
25 | region,
26 | cheapest_part
27 | where p_partkey = ps_partkey
28 | and s_suppkey = ps_suppkey
29 | and p_size = 15
30 | and p_type like '%BRASS'
31 | and s_nationkey = n_nationkey
32 | and n_regionkey = r_regionkey
33 | and r_name = 'EUROPE'
34 | and ps_supplycost = cp_lowest
35 | and cp_partkey = p_partkey
36 | order by s_acctbal desc,
37 | n_name,
38 | s_name,
39 | p_partkey
40 | limit 10;
41 |
--------------------------------------------------------------------------------
/ch03.sql:
--------------------------------------------------------------------------------
1 | select
2 | l_orderkey,
3 | sum(l_extendedprice * (1 - l_discount)) as revenue,
4 | o_orderdate,
5 | o_shippriority
6 | from customer,
7 | orders,
8 | lineitem
9 | where c_mktsegment = 'BUILDING'
10 | and c_custkey = o_custkey
11 | and l_orderkey = o_orderkey
12 | and o_orderdate < date '1995-03-15'
13 | and l_shipdate > date '1995-03-15'
14 | group by
15 | l_orderkey,
16 | o_orderdate,
17 | o_shippriority
18 | order by
19 | revenue desc,
20 | o_orderdate
21 | limit 10;
22 |
--------------------------------------------------------------------------------
/ch04.sql:
--------------------------------------------------------------------------------
1 | select
2 | o_orderpriority,
3 | count(*) as order_count
4 | from
5 | orders
6 | where
7 | o_orderdate >= date '1993-07-01'
8 | and o_orderdate < date '1993-07-01' + interval '3' month
9 | and o_orderkey in (
10 | select
11 | l_orderkey
12 | from
13 | lineitem
14 | where
15 | l_commitdate < l_receiptdate
16 | )
17 | group by
18 | o_orderpriority
19 | order by
20 | o_orderpriority;
21 |
--------------------------------------------------------------------------------
/ch05.sql:
--------------------------------------------------------------------------------
1 | select
2 | n_name,
3 | sum(l_extendedprice * (1 - l_discount)) as revenue
4 | from
5 | customer,
6 | orders,
7 | lineitem,
8 | supplier,
9 | nation,
10 | region
11 | where
12 | c_custkey = o_custkey
13 | and l_orderkey = o_orderkey
14 | and l_suppkey = s_suppkey
15 | and c_nationkey = s_nationkey
16 | and s_nationkey = n_nationkey
17 | and n_regionkey = r_regionkey
18 | and r_name = 'ASIA'
19 | and o_orderdate >= date '1994-01-01'
20 | and o_orderdate < date '1994-01-01' + interval '1' year
21 | group by
22 | n_name
23 | order by
24 | revenue desc;
25 |
26 |
--------------------------------------------------------------------------------
/ch06.sql:
--------------------------------------------------------------------------------
1 | select
2 | sum(l_extendedprice * l_discount) as revenue
3 | from
4 | lineitem
5 | where
6 | l_shipdate >= date '1994-01-01'
7 | and l_shipdate < date '1994-01-01' + interval '1' year
8 | and l_discount between toDecimal64(0.05,2) and toDecimal64(0.07,2)
9 | and l_quantity < 24;
10 |
--------------------------------------------------------------------------------
/ch07.sql:
--------------------------------------------------------------------------------
1 | select
2 | supp_nation,
3 | cust_nation,
4 | l_year,
5 | sum(volume) as revenue
6 | from ( select
7 | n1.n_name as supp_nation,
8 | n2.n_name as cust_nation,
9 | extract(year from l_shipdate) as l_year,
10 | l_extendedprice * (1 - l_discount) as volume
11 | from supplier,
12 | lineitem,
13 | orders,
14 | customer,
15 | nation n1,
16 | nation n2
17 | where s_suppkey = l_suppkey
18 | and o_orderkey = l_orderkey
19 | and c_custkey = o_custkey
20 | and s_nationkey = n1.n_nationkey
21 | and c_nationkey = n2.n_nationkey
22 | and ( (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
23 | or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE'))
24 | and l_shipdate between date '1995-01-01' and date '1996-12-31'
25 | ) as shipping
26 | group by supp_nation,
27 | cust_nation,
28 | l_year
29 | order by supp_nation,
30 | cust_nation,
31 | l_year;
32 |
--------------------------------------------------------------------------------
/ch08.sql:
--------------------------------------------------------------------------------
1 | select
2 | o_year,
3 | sum(case
4 | when nation = 'BRAZIL' then volume
5 | else 0
6 | end) / sum(volume) as mkt_share
7 | from ( select extract(year from o_orderdate) as o_year,
8 | l_extendedprice * (1 - l_discount) as volume,
9 | n2.n_name as nation
10 | from part,
11 | supplier,
12 | lineitem,
13 | orders,
14 | customer,
15 | nation n1,
16 | nation n2,
17 | region
18 | where p_partkey = l_partkey
19 | and s_suppkey = l_suppkey
20 | and l_orderkey = o_orderkey
21 | and o_custkey = c_custkey
22 | and c_nationkey = n1.n_nationkey
23 | and n1.n_regionkey = r_regionkey
24 | and r_name = 'AMERICA'
25 | and s_nationkey = n2.n_nationkey
26 | and o_orderdate between date '1995-01-01' and date '1996-12-31'
27 | and p_type = 'ECONOMY ANODIZED STEEL'
28 | ) as all_nations
29 | group by o_year
30 | order by o_year;
31 |
--------------------------------------------------------------------------------
/ch09.sql:
--------------------------------------------------------------------------------
1 | select
2 | nation,
3 | o_year,
4 | sum(amount) as sum_profit
5 | from ( select n_name as nation,
6 | extract(year from o_orderdate) as o_year,
7 | l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
8 | from part,
9 | supplier,
10 | lineitem,
11 | partsupp,
12 | orders,
13 | nation
14 | where s_suppkey = l_suppkey
15 | and ps_suppkey = l_suppkey
16 | and ps_partkey = l_partkey
17 | and p_partkey = l_partkey
18 | and o_orderkey = l_orderkey
19 | and s_nationkey = n_nationkey
20 | and p_name like '%green%'
21 | ) as profit
22 | group by nation, o_year
23 | order by nation, o_year desc;
24 |
--------------------------------------------------------------------------------
/ch10.sql:
--------------------------------------------------------------------------------
1 | select c_custkey,
2 | c_name,
3 | sum(l_extendedprice * (1 - l_discount)) as revenue,
4 | c_acctbal,
5 | n_name,
6 | c_address,
7 | c_phone,
8 | c_comment
9 | from customer,
10 | orders,
11 | lineitem,
12 | nation
13 | where c_custkey = o_custkey
14 | and l_orderkey = o_orderkey
15 | and o_orderdate >= date '1993-10-01'
16 | and o_orderdate < date '1993-10-01' + interval '3' month
17 | and l_returnflag = 'R'
18 | and c_nationkey = n_nationkey
19 | group by c_custkey, c_name,
20 | c_acctbal, c_phone, n_name,
21 | c_address, c_comment
22 | order by revenue desc
23 | limit 20;
24 |
--------------------------------------------------------------------------------
/ch11.sql:
--------------------------------------------------------------------------------
1 | select
2 | ps_partkey,
3 | sum(ps_supplycost * ps_availqty) as value
4 | from partsupp,
5 | supplier,
6 | nation
7 | where ps_suppkey = s_suppkey
8 | and s_nationkey = n_nationkey
9 | and n_name = 'GERMANY'
10 | group by ps_partkey
11 | having sum(ps_supplycost * ps_availqty) >
12 | ( select toDecimal64(sum(ps_supplycost * ps_availqty) * 0.0000001,10)
13 | -- The above constant needs to be adjusted according
14 | -- to the scale factor (SF): constant = 0.0001 / SF.
15 | from partsupp,
16 | supplier,
17 | nation
18 | where ps_suppkey = s_suppkey
19 | and s_nationkey = n_nationkey
20 | and n_name = 'GERMANY'
21 | )
22 | order by value desc;
23 |
--------------------------------------------------------------------------------
/ch12.sql:
--------------------------------------------------------------------------------
1 | select l_shipmode,
2 | sum(case
3 | when o_orderpriority = '1-URGENT'
4 | or o_orderpriority = '2-HIGH'
5 | then 1
6 | else 0
7 | end) as high_line_count,
8 | sum(case
9 | when o_orderpriority <> '1-URGENT'
10 | and o_orderpriority <> '2-HIGH'
11 | then 1
12 | else 0
13 | end) as low_line_count
14 | from orders, lineitem
15 | where o_orderkey = l_orderkey
16 | and l_shipmode in ('MAIL', 'SHIP')
17 | and l_commitdate < l_receiptdate
18 | and l_shipdate < l_commitdate
19 | and l_receiptdate >= date '1994-01-01'
20 | and l_receiptdate < date '1994-01-01' + interval '1' year
21 | group by l_shipmode
22 | order by l_shipmode;
23 |
--------------------------------------------------------------------------------
/ch13.sql:
--------------------------------------------------------------------------------
1 | with filt_orders as (
2 | select * from orders
3 | where o_comment not like '%special%requests%'
4 | )
5 | select c_count,
6 | count(*) as custdist
7 | from (
8 | select c_custkey,
9 | count(o_orderkey) as c_count
10 | from customer left outer join filt_orders on
11 | c_custkey = o_custkey
12 | group by c_custkey
13 | ) as c_orders
14 | group by c_count
15 | order by custdist desc, c_count desc;
16 |
--------------------------------------------------------------------------------
/ch14.sql:
--------------------------------------------------------------------------------
1 | select toDecimal64(100.00,2) * sum(case
2 | when p_type like 'PROMO%'
3 | then l_extendedprice * (1 - l_discount)
4 | else 0
5 | end) /
6 | sum(l_extendedprice * (1 - l_discount)) as promo_revenue
7 | from lineitem, part
8 | where l_partkey = p_partkey
9 | and l_shipdate >= date '1995-09-01'
10 | and l_shipdate < date '1995-09-01' + interval '1' month;
11 |
--------------------------------------------------------------------------------
/ch15.sql:
--------------------------------------------------------------------------------
1 | with revenue0 as
2 | ( select l_suppkey as supplier_no,
3 | sum(l_extendedprice * (1 - l_discount)) as total_revenue
4 | from lineitem
5 | where l_shipdate >= date '1996-01-01'
6 | and l_shipdate < date '1996-01-01' + interval '3' month
7 | group by l_suppkey
8 | )
9 | select s_suppkey,
10 | s_name,
11 | s_address,
12 | s_phone,
13 | total_revenue
14 | from supplier, revenue0
15 | where s_suppkey = supplier_no
16 | and total_revenue = (
17 | select
18 | max(total_revenue)
19 | from
20 | revenue0
21 | )
22 | order by s_suppkey;
23 |
--------------------------------------------------------------------------------
/ch16.sql:
--------------------------------------------------------------------------------
1 | select p_brand,
2 | p_type,
3 | p_size,
4 | count(distinct ps_suppkey) as supplier_cnt
5 | from partsupp, part
6 | where p_partkey = ps_partkey
7 | and p_brand <> 'Brand#45'
8 | and p_type not like 'MEDIUM POLISHED%'
9 | and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
10 | and ps_suppkey not in (
11 | select s_suppkey
12 | from supplier
13 | where s_comment like '%Customer%Complaints%'
14 | )
15 | group by p_brand, p_type, p_size
16 | order by supplier_cnt desc,
17 | p_brand, p_type, p_size;
18 |
19 |
--------------------------------------------------------------------------------
/ch17.sql:
--------------------------------------------------------------------------------
1 | with part_avg as (
2 | select toDecimal64(0.2 * avg(l_quantity),12) as limit_qty, l_partkey as lpk
3 | from lineitem
4 | group by l_partkey
5 | )
6 | select sum(l_extendedprice) / toDecimal64(7.0,2) as avg_yearly
7 | from lineitem, part, part_avg
8 | where p_partkey = l_partkey
9 | and p_brand = 'Brand#23'
10 | and p_container = 'MED BOX'
11 | and p_partkey = lpk
12 | and l_quantity < limit_qty;
13 |
--------------------------------------------------------------------------------
/ch18.sql:
--------------------------------------------------------------------------------
1 | select c_name, c_custkey, o_orderkey,
2 | o_orderdate, o_totalprice, sum(l_quantity)
3 | from customer, orders, lineitem
4 | where o_orderkey in (
5 | select l_orderkey
6 | from lineitem
7 | group by l_orderkey having
8 | sum(l_quantity) > 300
9 | )
10 | and c_custkey = o_custkey
11 | and o_orderkey = l_orderkey
12 | group by c_name, c_custkey,
13 | o_orderkey, o_orderdate, o_totalprice
14 | order by o_totalprice desc, o_orderdate
15 | limit 100;
16 |
17 |
--------------------------------------------------------------------------------
/ch19.sql:
--------------------------------------------------------------------------------
1 | select sum(l_extendedprice* (1 - l_discount)) as revenue
2 | from lineitem, part
3 | where ( p_partkey = l_partkey
4 | and p_brand = 'Brand#12'
5 | and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
6 | and l_quantity >= 1 and l_quantity <= 1 + 10
7 | and p_size between 1 and 5
8 | and l_shipmode in ('AIR', 'AIR REG')
9 | and l_shipinstruct = 'DELIVER IN PERSON'
10 | ) or ( p_partkey = l_partkey
11 | and p_brand = 'Brand#23'
12 | and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
13 | and l_quantity >= 10 and l_quantity <= 10 + 10
14 | and p_size between 1 and 10
15 | and l_shipmode in ('AIR', 'AIR REG')
16 | and l_shipinstruct = 'DELIVER IN PERSON'
17 | ) or ( p_partkey = l_partkey
18 | and p_brand = 'Brand#34'
19 | and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
20 | and l_quantity >= 20 and l_quantity <= 20 + 10
21 | and p_size between 1 and 15
22 | and l_shipmode in ('AIR', 'AIR REG')
23 | and l_shipinstruct = 'DELIVER IN PERSON'
24 | );
25 |
--------------------------------------------------------------------------------
/ch20.sql:
--------------------------------------------------------------------------------
1 | with availability_part_supp as(
2 | select 0.5 * sum(l_quantity) as ps_halfqty, l_partkey as pkey, l_suppkey as skey
3 | from lineitem
4 | where l_shipdate >= date '1994-01-01'
5 | and l_shipdate < date '1994-01-01' + interval '1' year
6 | group by pkey, skey
7 | )
8 | select s_name, s_address
9 | from supplier, nation
10 | where s_suppkey in (
11 | select ps_suppkey
12 | from partsupp, availability_part_supp
13 | where ps_partkey in (
14 | select p_partkey
15 | from part
16 | where p_name like 'forest%'
17 | )
18 | and ps_partkey = pkey
19 | and ps_suppkey = skey
20 | and ps_availqty > ps_halfqty
21 | )
22 | and s_nationkey = n_nationkey
23 | and n_name = 'CANADA'
24 | order by s_name;
25 |
--------------------------------------------------------------------------------
/ch21.sql:
--------------------------------------------------------------------------------
1 | select s_name, count(*) as numwait
2 | from supplier, lineitem l1, orders, nation
3 | where s_suppkey = l1.l_suppkey
4 | and o_orderkey = l1.l_orderkey
5 | and o_orderstatus = 'F'
6 | and l1.l_receiptdate > l1.l_commitdate
7 | and l1.l_orderkey in (
8 | select l_orderkey
9 | from lineitem
10 | group by l_orderkey
11 | having count(l_suppkey) > 1
12 | )
13 | and l1.l_orderkey not in (
14 | select l_orderkey
15 | from lineitem
16 | where l_receiptdate > l_commitdate
17 | group by l_orderkey
18 | having count(l_suppkey) > 1
19 | )
20 | and s_nationkey = n_nationkey
21 | and n_name = 'SAUDI ARABIA'
22 | group by s_name
23 | order by numwait desc, s_name
24 | limit 100;
25 |
--------------------------------------------------------------------------------
/ch22.sql:
--------------------------------------------------------------------------------
1 | select cntrycode,
2 | count(*) as numcust,
3 | sum(c_acctbal) as totacctbal
4 | from ( select
5 | substring(c_phone from 1 for 2) as cntrycode,
6 | c_acctbal
7 | from customer
8 | where substring(c_phone from 1 for 2) in
9 | ('13', '31', '23', '29', '30', '18', '17')
10 | and c_acctbal > toDecimal64((
11 | select avg(c_acctbal)
12 | from customer
13 | where c_acctbal > 0
14 | and substring(c_phone from 1 for 2) in
15 | ('13', '31', '23', '29', '30', '18', '17')
16 | ),2)
17 | and c_custkey not in (
18 | select o_custkey
19 | from orders
20 | )
21 | ) as custsale
22 | group by cntrycode
23 | order by cntrycode;
24 |
--------------------------------------------------------------------------------
/ch_horizontal_run.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # This Source Code Form is subject to the terms of the Mozilla Public
4 | # License, v. 2.0. If a copy of the MPL was not distributed with this
5 | # file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 | #
7 | # Copyright 2017-2022 MonetDB Solutions B.V.
8 |
9 | usage() {
10 | echo "Usage: $0 --db [--number ] [--db ] [--output ]"
11 | echo "Run the TPC-H queries a number of times and report timings."
12 | echo ""
13 | echo "Options:"
14 | echo " -d, --db The database"
15 | echo " -n, --number How many times to run the queries. Default=1"
16 | echo " -o, --output Where to append the output. Default=timings.csv"
17 | echo " -v, --verbose More output"
18 | echo " -h, --help This message"
19 | }
20 |
21 | dbname="SF-0_01"
22 | nruns=1
23 |
24 |
25 | while [ "$#" -gt 0 ]
26 | do
27 | case "$1" in
28 | -d|--db)
29 | dbname=$2
30 | shift
31 | shift
32 | ;;
33 | -n|--number)
34 | nruns=$2
35 | shift
36 | shift
37 | ;;
38 | -o|--output)
39 | nruns=$2
40 | shift
41 | shift
42 | ;;
43 | -v|--verbose)
44 | set -x
45 | set -v
46 | shift
47 | ;;
48 | -h|--help)
49 | usage
50 | exit 0
51 | ;;
52 | *)
53 | echo "$0: unknown argument $1"
54 | usage
55 | exit 1
56 | ;;
57 | esac
58 | done
59 |
60 | if [ -z "$dbname" ]; then
61 | usage
62 | exit 1
63 | fi
64 |
65 | output="$dbname.timings.csv"
66 |
67 | echo "# Database,Query,Min,Max,Average,Error" | tee -a "$output"
68 |
69 |
70 | for q in *.sql
71 | do
72 |
73 | max=0
74 | min=9999999
75 | sum=0
76 |
77 | for j in $(seq 1 $nruns)
78 | do
79 | s=$(date +%s.%N)
80 | timeout 3600s clickhouse client -n --queries-file $q -d $dbname
81 | err=$?
82 |
83 | x=$(date +%s.%N)
84 | elapsed=$(echo "scale=4; $x - $s" | bc)
85 |
86 | # calculate max, min, avg
87 | # using bc cmps to have floating point precission
88 | if [ $(echo "$elapsed > $max" | bc) -eq 1 ]
89 | then
90 | max=$elapsed
91 | fi
92 |
93 | if [ $(echo "$elapsed < $min" | bc) -eq 1 ]
94 | then
95 | min=$elapsed
96 | fi
97 |
98 | sum=$(echo "$elapsed + $sum" | bc)
99 |
100 | done
101 |
102 | avg=$(echo "scale=4; $sum/$nruns" | bc)
103 |
104 | echo "$dbname,"$(basename $q .sql)",$min,$max,$avg,$err" | tee -a "$output"
105 |
106 | done
107 |
108 |
109 |
--------------------------------------------------------------------------------
/tpch_ch_schema.sql:
--------------------------------------------------------------------------------
1 | -- !/usr/bin/env bash
2 |
3 | -- this source code form is subject to the terms of the mozilla public
4 | -- license, v. 2.0. if a copy of the mpl was not distributed with this
5 | -- file, you can obtain one at http://mozilla.org/mpl/2.0/.
6 |
7 | -- copyright 2017-2018 monetdb solutions b.v.
8 |
9 | -- sccsid: @(#)dss.ddl 2.1.8.1
10 | create table nation ( n_nationkey integer not null,
11 | n_name char(25) not null,
12 | n_regionkey integer not null,
13 | n_comment varchar(152)) engine MergeTree() order by n_nationkey;
14 |
15 | create table region ( r_regionkey integer not null,
16 | r_name char(25) not null,
17 | r_comment varchar(152)) engine MergeTree() order by r_regionkey;
18 |
19 | create table part ( p_partkey integer not null,
20 | p_name varchar(55) not null,
21 | p_mfgr char(25) not null,
22 | p_brand char(10) not null,
23 | p_type varchar(25) not null,
24 | p_size integer not null,
25 | p_container char(10) not null,
26 | p_retailprice decimal(15,2) not null,
27 | p_comment varchar(23) not null ) engine MergeTree() order by p_partkey;
28 |
29 | create table supplier ( s_suppkey integer not null,
30 | s_name char(25) not null,
31 | s_address varchar(40) not null,
32 | s_nationkey integer not null,
33 | s_phone char(15) not null,
34 | s_acctbal decimal(15,2) not null,
35 | s_comment varchar(101) not null) engine MergeTree() order by s_suppkey;
36 |
37 | create table partsupp ( ps_partkey integer not null,
38 | ps_suppkey integer not null,
39 | ps_availqty integer not null,
40 | ps_supplycost decimal(15,2) not null,
41 | ps_comment varchar(199) not null ) engine MergeTree() order by ps_partkey;
42 |
43 | create table customer ( c_custkey integer not null,
44 | c_name varchar(25) not null,
45 | c_address varchar(40) not null,
46 | c_nationkey integer not null,
47 | c_phone char(15) not null,
48 | c_acctbal decimal(15,2) not null,
49 | c_mktsegment char(10) not null,
50 | c_comment varchar(117) not null) engine MergeTree() order by c_custkey;
51 |
52 | create table orders ( o_orderkey bigint not null,
53 | o_custkey integer not null,
54 | o_orderstatus char(1) not null,
55 | o_totalprice decimal(15,2) not null,
56 | o_orderdate date not null,
57 | o_orderpriority char(15) not null,
58 | o_clerk char(15) not null,
59 | o_shippriority integer not null,
60 | o_comment varchar(79) not null) engine MergeTree() order by o_orderkey;
61 |
62 | create table lineitem ( l_orderkey bigint not null,
63 | l_partkey integer not null,
64 | l_suppkey integer not null,
65 | l_linenumber integer not null,
66 | l_quantity decimal(15,2) not null,
67 | l_extendedprice decimal(15,2) not null,
68 | l_discount decimal(15,2) not null,
69 | l_tax decimal(15,2) not null,
70 | l_returnflag char(1) not null,
71 | l_linestatus char(1) not null,
72 | l_shipdate date not null,
73 | l_commitdate date not null,
74 | l_receiptdate date not null,
75 | l_shipinstruct char(25) not null,
76 | l_shipmode char(10) not null,
77 | l_comment varchar(44) not null) engine MergeTree() order by l_orderkey;
78 |
--------------------------------------------------------------------------------