├── LICENSE ├── README.md ├── drop-caches.sh ├── dss ├── templates │ ├── 1.sql │ ├── 10.sql │ ├── 11.sql │ ├── 12.sql │ ├── 13.sql │ ├── 14.sql │ ├── 15.sql │ ├── 16.sql │ ├── 17.sql │ ├── 18.sql │ ├── 19.sql │ ├── 2.sql │ ├── 20.sql │ ├── 21.sql │ ├── 22.sql │ ├── 3.sql │ ├── 4.sql │ ├── 5.sql │ ├── 6.sql │ ├── 7.sql │ ├── 8.sql │ └── 9.sql ├── tpch-alter.sql ├── tpch-create.sql ├── tpch-index.sql ├── tpch-load.sql ├── tpch-load_pg.sql └── tpch-pkeys.sql ├── process.php └── tpch.sh /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011, Tomas Vondra 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | TPC-H PostgreSQL benchmark 2 | ========================== 3 | This repository contains a simple implementation that runs a TPC-H-like 4 | benchmark with a PostgreSQL database. It builds on the official TPC-H 5 | benchmark available at http://tpc.org/tpch/default.asp (uses just the 6 | dbgen a qgen parts). 7 | 8 | 9 | Preparing dbgen and qgen 10 | ------------------------ 11 | The first thing you need to do is to prepare the tool that generates 12 | data and queries. This step is more thoroughly explained at my blog at 13 | 14 | http://www.fuzzy.cz/en/articles/dss-tpc-h-benchmark-with-postgresql/ 15 | 16 | but let's briefly repeat what needs to be done. 17 | 18 | First, download the TPC-H benchmark from http://tpc.org/tpch/default.asp 19 | and extract it to a directory 20 | 21 | $ wget http://tpc.org/tpch/spec/tpch_2_14_3.tgz 22 | $ mkdir tpch 23 | $ tar -xzf tpch_2_14_3.tgz -C tpch 24 | 25 | and then prepare the Makefile - create a copy from makefile.suite 26 | 27 | $ cd tpch/dbgen 28 | $ cp makefile.suite Makefile 29 | $ nano Makefile 30 | 31 | and modify it so that it contains this (around line 110) 32 | 33 | CC=gcc 34 | DATABASE=ORACLE 35 | MACHINE=LINUX 36 | WORKLOAD=TPCH 37 | 38 | and compile it using `make` as usual. Now you should have `dbgen` and 39 | `qgen` tools that generate data and queries. 40 | 41 | 42 | Generating data 43 | --------------- 44 | Right, so let's generate the data using the `dbgen` tool - there's one 45 | important parameter 'scale' that influences the amount of data. It's 46 | roughly equal to number of GB of raw data, so to generate 10GB of data 47 | just do 48 | 49 | $ ./dbgen -s 10 50 | 51 | which creates a bunch of .tbl files in Oracle-like CSV format 52 | 53 | $ ls *.tbl 54 | 55 | and to convert them to a CSV format compatible with PostgreSQL, do this 56 | 57 | $ for i in `ls *.tbl`; do sed 's/|$//' $i > ${i/tbl/csv}; echo $i; done; 58 | 59 | Finally, move these data to the 'dss/data' directory or somewhere else, 60 | and create a symlink to /tmp/dss-data (that's where tpch-load.sql is 61 | looking for for the data from). 62 | 63 | It's a good idea to place this directory on a ramdrive so that it does not 64 | influence the benchmark (e.g. it's a very bad idea to place the data on the 65 | same drive as PostgreSQL data directory). 66 | 67 | 68 | Generating queries 69 | ------------------ 70 | Now we have to generate queries from templates specified in TPC-H benchmark. 71 | The templates provided at tpch.org are not suitable for PostgreSQL. So 72 | I have provided slightly modified queries in the 'dss/templates' directory 73 | and you should place the queries in 'dss/queries' dir. 74 | 75 | for q in `seq 1 22` 76 | do 77 | DSS_QUERY=dss/templates ./qgen $q >> dss/queries/$q.sql 78 | sed 's/^select/explain select/' dss/queries/$q.sql > dss/queries/$q.explain.sql 79 | cat dss/queries/$i.sql >> dss/queries/$i.explain.sql; 80 | done 81 | 82 | Now you should have 44 files in the dss/queries directory. 22 of them will 83 | actually run the queries and the other 22 will generate EXPLAIN plan of 84 | the query (without actually running it). 85 | 86 | 87 | Running the benchmark 88 | --------------------- 89 | The actual benchmark is implemented in the 'tpch.sh' script. It expects 90 | an already prepared database and four parameters - directory where to place 91 | the results, database and user name. So to run it, do this: 92 | 93 | $ ./tpch.sh ./results tpch-db tpch-user 94 | 95 | and wait until the benchmark. 96 | 97 | 98 | Processing the results 99 | ---------------------- 100 | All the results are written into the output directory (first parameter). To get 101 | useful results (timing of each query, various statistics), you can use script 102 | process.php. It expects two parameters - input dir (with data collected by the 103 | tpch.sh script) and output file (in CSV format). For example like this: 104 | 105 | $ php process.php ./results output.csv 106 | 107 | This should give you nicely formatted CSV file. -------------------------------------------------------------------------------- /drop-caches.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | echo "3" > /proc/sys/vm/drop_caches 4 | -------------------------------------------------------------------------------- /dss/templates/1.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Pricing Summary Report Query (Q1) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | l_returnflag, 9 | l_linestatus, 10 | sum(l_quantity) as sum_qty, 11 | sum(l_extendedprice) as sum_base_price, 12 | sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, 13 | sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, 14 | avg(l_quantity) as avg_qty, 15 | avg(l_extendedprice) as avg_price, 16 | avg(l_discount) as avg_disc, 17 | count(*) as count_order 18 | from 19 | lineitem 20 | where 21 | l_shipdate <= date '1998-12-01' - interval ':1' day 22 | group by 23 | l_returnflag, 24 | l_linestatus 25 | order by 26 | l_returnflag, 27 | l_linestatus 28 | LIMIT 1; 29 | -------------------------------------------------------------------------------- /dss/templates/10.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Returned Item Reporting Query (Q10) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | c_custkey, 9 | c_name, 10 | sum(l_extendedprice * (1 - l_discount)) as revenue, 11 | c_acctbal, 12 | n_name, 13 | c_address, 14 | c_phone, 15 | c_comment 16 | from 17 | customer, 18 | orders, 19 | lineitem, 20 | nation 21 | where 22 | c_custkey = o_custkey 23 | and l_orderkey = o_orderkey 24 | and o_orderdate >= date ':1' 25 | and o_orderdate < date ':1' + interval '3' month 26 | and l_returnflag = 'R' 27 | and c_nationkey = n_nationkey 28 | group by 29 | c_custkey, 30 | c_name, 31 | c_acctbal, 32 | c_phone, 33 | n_name, 34 | c_address, 35 | c_comment 36 | order by 37 | revenue desc 38 | LIMIT 20; -------------------------------------------------------------------------------- /dss/templates/11.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Important Stock Identification Query (Q11) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | ps_partkey, 9 | sum(ps_supplycost * ps_availqty) as value 10 | from 11 | partsupp, 12 | supplier, 13 | nation 14 | where 15 | ps_suppkey = s_suppkey 16 | and s_nationkey = n_nationkey 17 | and n_name = ':1' 18 | group by 19 | ps_partkey having 20 | sum(ps_supplycost * ps_availqty) > ( 21 | select 22 | sum(ps_supplycost * ps_availqty) * :2 23 | from 24 | partsupp, 25 | supplier, 26 | nation 27 | where 28 | ps_suppkey = s_suppkey 29 | and s_nationkey = n_nationkey 30 | and n_name = ':1' 31 | ) 32 | order by 33 | value desc 34 | LIMIT 1; -------------------------------------------------------------------------------- /dss/templates/12.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Shipping Modes and Order Priority Query (Q12) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | l_shipmode, 9 | sum(case 10 | when o_orderpriority = '1-URGENT' 11 | or o_orderpriority = '2-HIGH' 12 | then 1 13 | else 0 14 | end) as high_line_count, 15 | sum(case 16 | when o_orderpriority <> '1-URGENT' 17 | and o_orderpriority <> '2-HIGH' 18 | then 1 19 | else 0 20 | end) as low_line_count 21 | from 22 | orders, 23 | lineitem 24 | where 25 | o_orderkey = l_orderkey 26 | and l_shipmode in (':1', ':2') 27 | and l_commitdate < l_receiptdate 28 | and l_shipdate < l_commitdate 29 | and l_receiptdate >= date ':3' 30 | and l_receiptdate < date ':3' + interval '1' year 31 | group by 32 | l_shipmode 33 | order by 34 | l_shipmode 35 | LIMIT 1; -------------------------------------------------------------------------------- /dss/templates/13.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Customer Distribution Query (Q13) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | c_count, 9 | count(*) as custdist 10 | from 11 | ( 12 | select 13 | c_custkey, 14 | count(o_orderkey) 15 | from 16 | customer left outer join orders on 17 | c_custkey = o_custkey 18 | and o_comment not like '%:1%:2%' 19 | group by 20 | c_custkey 21 | ) as c_orders (c_custkey, c_count) 22 | group by 23 | c_count 24 | order by 25 | custdist desc, 26 | c_count desc 27 | LIMIT 1; -------------------------------------------------------------------------------- /dss/templates/14.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Promotion Effect Query (Q14) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | 100.00 * sum(case 9 | when p_type like 'PROMO%' 10 | then l_extendedprice * (1 - l_discount) 11 | else 0 12 | end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue 13 | from 14 | lineitem, 15 | part 16 | where 17 | l_partkey = p_partkey 18 | and l_shipdate >= date ':1' 19 | and l_shipdate < date ':1' + interval '1' month 20 | LIMIT 1; -------------------------------------------------------------------------------- /dss/templates/15.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Top Supplier Query (Q15) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | create view revenue:s (supplier_no, total_revenue) as 7 | select 8 | l_suppkey, 9 | sum(l_extendedprice * (1 - l_discount)) 10 | from 11 | lineitem 12 | where 13 | l_shipdate >= date ':1' 14 | and l_shipdate < date ':1' + interval '3' month 15 | group by 16 | l_suppkey; 17 | 18 | :o 19 | select 20 | s_suppkey, 21 | s_name, 22 | s_address, 23 | s_phone, 24 | total_revenue 25 | from 26 | supplier, 27 | revenue:s 28 | where 29 | s_suppkey = supplier_no 30 | and total_revenue = ( 31 | select 32 | max(total_revenue) 33 | from 34 | revenue:s 35 | ) 36 | order by 37 | s_suppkey 38 | LIMIT 1; 39 | 40 | drop view revenue:s; -------------------------------------------------------------------------------- /dss/templates/16.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Parts/Supplier Relationship Query (Q16) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | p_brand, 9 | p_type, 10 | p_size, 11 | count(distinct ps_suppkey) as supplier_cnt 12 | from 13 | partsupp, 14 | part 15 | where 16 | p_partkey = ps_partkey 17 | and p_brand <> ':1' 18 | and p_type not like ':2%' 19 | and p_size in (:3, :4, :5, :6, :7, :8, :9, :10) 20 | and ps_suppkey not in ( 21 | select 22 | s_suppkey 23 | from 24 | supplier 25 | where 26 | s_comment like '%Customer%Complaints%' 27 | ) 28 | group by 29 | p_brand, 30 | p_type, 31 | p_size 32 | order by 33 | supplier_cnt desc, 34 | p_brand, 35 | p_type, 36 | p_size 37 | LIMIT 1; -------------------------------------------------------------------------------- /dss/templates/17.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Small-Quantity-Order Revenue Query (Q17) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | sum(l_extendedprice) / 7.0 as avg_yearly 9 | from 10 | lineitem, 11 | part, 12 | (SELECT l_partkey AS agg_partkey, 0.2 * avg(l_quantity) AS avg_quantity FROM lineitem GROUP BY l_partkey) part_agg 13 | where 14 | p_partkey = l_partkey 15 | and agg_partkey = l_partkey 16 | and p_brand = ':1' 17 | and p_container = ':2' 18 | and l_quantity < avg_quantity 19 | LIMIT 1; 20 | -------------------------------------------------------------------------------- /dss/templates/18.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Large Volume Customer Query (Q18) 3 | -- Function Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | c_name, 9 | c_custkey, 10 | o_orderkey, 11 | o_orderdate, 12 | o_totalprice, 13 | sum(l_quantity) 14 | from 15 | customer, 16 | orders, 17 | lineitem 18 | where 19 | o_orderkey in ( 20 | select 21 | l_orderkey 22 | from 23 | lineitem 24 | group by 25 | l_orderkey having 26 | sum(l_quantity) > :1 27 | ) 28 | and c_custkey = o_custkey 29 | and o_orderkey = l_orderkey 30 | group by 31 | c_name, 32 | c_custkey, 33 | o_orderkey, 34 | o_orderdate, 35 | o_totalprice 36 | order by 37 | o_totalprice desc, 38 | o_orderdate 39 | LIMIT 100; -------------------------------------------------------------------------------- /dss/templates/19.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Discounted Revenue Query (Q19) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | sum(l_extendedprice* (1 - l_discount)) as revenue 9 | from 10 | lineitem, 11 | part 12 | where 13 | ( 14 | p_partkey = l_partkey 15 | and p_brand = ':1' 16 | and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') 17 | and l_quantity >= :4 and l_quantity <= :4 + 10 18 | and p_size between 1 and 5 19 | and l_shipmode in ('AIR', 'AIR REG') 20 | and l_shipinstruct = 'DELIVER IN PERSON' 21 | ) 22 | or 23 | ( 24 | p_partkey = l_partkey 25 | and p_brand = ':2' 26 | and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') 27 | and l_quantity >= :5 and l_quantity <= :5 + 10 28 | and p_size between 1 and 10 29 | and l_shipmode in ('AIR', 'AIR REG') 30 | and l_shipinstruct = 'DELIVER IN PERSON' 31 | ) 32 | or 33 | ( 34 | p_partkey = l_partkey 35 | and p_brand = ':3' 36 | and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') 37 | and l_quantity >= :6 and l_quantity <= :6 + 10 38 | and p_size between 1 and 15 39 | and l_shipmode in ('AIR', 'AIR REG') 40 | and l_shipinstruct = 'DELIVER IN PERSON' 41 | ) 42 | LIMIT 1; -------------------------------------------------------------------------------- /dss/templates/2.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Minimum Cost Supplier Query (Q2) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | s_acctbal, 9 | s_name, 10 | n_name, 11 | p_partkey, 12 | p_mfgr, 13 | s_address, 14 | s_phone, 15 | s_comment 16 | from 17 | part, 18 | supplier, 19 | partsupp, 20 | nation, 21 | region 22 | where 23 | p_partkey = ps_partkey 24 | and s_suppkey = ps_suppkey 25 | and p_size = :1 26 | and p_type like '%:2' 27 | and s_nationkey = n_nationkey 28 | and n_regionkey = r_regionkey 29 | and r_name = ':3' 30 | and ps_supplycost = ( 31 | select 32 | min(ps_supplycost) 33 | from 34 | partsupp, 35 | supplier, 36 | nation, 37 | region 38 | where 39 | p_partkey = ps_partkey 40 | and s_suppkey = ps_suppkey 41 | and s_nationkey = n_nationkey 42 | and n_regionkey = r_regionkey 43 | and r_name = ':3' 44 | ) 45 | order by 46 | s_acctbal desc, 47 | n_name, 48 | s_name, 49 | p_partkey 50 | LIMIT 100; 51 | -------------------------------------------------------------------------------- /dss/templates/20.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Potential Part Promotion Query (Q20) 3 | -- Function Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | s_name, 9 | s_address 10 | from 11 | supplier, 12 | nation 13 | where 14 | s_suppkey in ( 15 | select 16 | ps_suppkey 17 | from 18 | partsupp, 19 | ( 20 | select 21 | l_partkey agg_partkey, 22 | l_suppkey agg_suppkey, 23 | 0.5 * sum(l_quantity) AS agg_quantity 24 | from 25 | lineitem 26 | where 27 | l_shipdate >= date ':2' 28 | and l_shipdate < date ':2' + interval '1' year 29 | group by 30 | l_partkey, 31 | l_suppkey 32 | ) agg_lineitem 33 | where 34 | agg_partkey = ps_partkey 35 | and agg_suppkey = ps_suppkey 36 | and ps_partkey in ( 37 | select 38 | p_partkey 39 | from 40 | part 41 | where 42 | p_name like ':1%' 43 | ) 44 | and ps_availqty > agg_quantity 45 | ) 46 | and s_nationkey = n_nationkey 47 | and n_name = ':3' 48 | order by 49 | s_name 50 | LIMIT 1; 51 | -------------------------------------------------------------------------------- /dss/templates/21.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Suppliers Who Kept Orders Waiting Query (Q21) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | s_name, 9 | count(*) as numwait 10 | from 11 | supplier, 12 | lineitem l1, 13 | orders, 14 | nation 15 | where 16 | s_suppkey = l1.l_suppkey 17 | and o_orderkey = l1.l_orderkey 18 | and o_orderstatus = 'F' 19 | and l1.l_receiptdate > l1.l_commitdate 20 | and exists ( 21 | select 22 | * 23 | from 24 | lineitem l2 25 | where 26 | l2.l_orderkey = l1.l_orderkey 27 | and l2.l_suppkey <> l1.l_suppkey 28 | ) 29 | and not exists ( 30 | select 31 | * 32 | from 33 | lineitem l3 34 | where 35 | l3.l_orderkey = l1.l_orderkey 36 | and l3.l_suppkey <> l1.l_suppkey 37 | and l3.l_receiptdate > l3.l_commitdate 38 | ) 39 | and s_nationkey = n_nationkey 40 | and n_name = ':1' 41 | group by 42 | s_name 43 | order by 44 | numwait desc, 45 | s_name 46 | LIMIT 100; -------------------------------------------------------------------------------- /dss/templates/22.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Global Sales Opportunity Query (Q22) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | cntrycode, 9 | count(*) as numcust, 10 | sum(c_acctbal) as totacctbal 11 | from 12 | ( 13 | select 14 | substring(c_phone from 1 for 2) as cntrycode, 15 | c_acctbal 16 | from 17 | customer 18 | where 19 | substring(c_phone from 1 for 2) in 20 | (':1', ':2', ':3', ':4', ':5', ':6', ':7') 21 | and c_acctbal > ( 22 | select 23 | avg(c_acctbal) 24 | from 25 | customer 26 | where 27 | c_acctbal > 0.00 28 | and substring(c_phone from 1 for 2) in 29 | (':1', ':2', ':3', ':4', ':5', ':6', ':7') 30 | ) 31 | and not exists ( 32 | select 33 | * 34 | from 35 | orders 36 | where 37 | o_custkey = c_custkey 38 | ) 39 | ) as custsale 40 | group by 41 | cntrycode 42 | order by 43 | cntrycode 44 | LIMIT 1; -------------------------------------------------------------------------------- /dss/templates/3.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Shipping Priority Query (Q3) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | l_orderkey, 9 | sum(l_extendedprice * (1 - l_discount)) as revenue, 10 | o_orderdate, 11 | o_shippriority 12 | from 13 | customer, 14 | orders, 15 | lineitem 16 | where 17 | c_mktsegment = ':1' 18 | and c_custkey = o_custkey 19 | and l_orderkey = o_orderkey 20 | and o_orderdate < date ':2' 21 | and l_shipdate > date ':2' 22 | group by 23 | l_orderkey, 24 | o_orderdate, 25 | o_shippriority 26 | order by 27 | revenue desc, 28 | o_orderdate 29 | LIMIT 10; -------------------------------------------------------------------------------- /dss/templates/4.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Order Priority Checking Query (Q4) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | o_orderpriority, 9 | count(*) as order_count 10 | from 11 | orders 12 | where 13 | o_orderdate >= date ':1' 14 | and o_orderdate < date ':1' + interval '3' month 15 | and exists ( 16 | select 17 | * 18 | from 19 | lineitem 20 | where 21 | l_orderkey = o_orderkey 22 | and l_commitdate < l_receiptdate 23 | ) 24 | group by 25 | o_orderpriority 26 | order by 27 | o_orderpriority 28 | LIMIT 1; 29 | -------------------------------------------------------------------------------- /dss/templates/5.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Local Supplier Volume Query (Q5) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | n_name, 9 | sum(l_extendedprice * (1 - l_discount)) as revenue 10 | from 11 | customer, 12 | orders, 13 | lineitem, 14 | supplier, 15 | nation, 16 | region 17 | where 18 | c_custkey = o_custkey 19 | and l_orderkey = o_orderkey 20 | and l_suppkey = s_suppkey 21 | and c_nationkey = s_nationkey 22 | and s_nationkey = n_nationkey 23 | and n_regionkey = r_regionkey 24 | and r_name = ':1' 25 | and o_orderdate >= date ':2' 26 | and o_orderdate < date ':2' + interval '1' year 27 | group by 28 | n_name 29 | order by 30 | revenue desc 31 | LIMIT 1; 32 | -------------------------------------------------------------------------------- /dss/templates/6.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Forecasting Revenue Change Query (Q6) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | sum(l_extendedprice * l_discount) as revenue 9 | from 10 | lineitem 11 | where 12 | l_shipdate >= date ':1' 13 | and l_shipdate < date ':1' + interval '1' year 14 | and l_discount between :2 - 0.01 and :2 + 0.01 15 | and l_quantity < :3 16 | LIMIT 1; 17 | -------------------------------------------------------------------------------- /dss/templates/7.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Volume Shipping Query (Q7) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | supp_nation, 9 | cust_nation, 10 | l_year, 11 | sum(volume) as revenue 12 | from 13 | ( 14 | select 15 | n1.n_name as supp_nation, 16 | n2.n_name as cust_nation, 17 | extract(year from l_shipdate) as l_year, 18 | l_extendedprice * (1 - l_discount) as volume 19 | from 20 | supplier, 21 | lineitem, 22 | orders, 23 | customer, 24 | nation n1, 25 | nation n2 26 | where 27 | s_suppkey = l_suppkey 28 | and o_orderkey = l_orderkey 29 | and c_custkey = o_custkey 30 | and s_nationkey = n1.n_nationkey 31 | and c_nationkey = n2.n_nationkey 32 | and ( 33 | (n1.n_name = ':1' and n2.n_name = ':2') 34 | or (n1.n_name = ':2' and n2.n_name = ':1') 35 | ) 36 | and l_shipdate between date '1995-01-01' and date '1996-12-31' 37 | ) as shipping 38 | group by 39 | supp_nation, 40 | cust_nation, 41 | l_year 42 | order by 43 | supp_nation, 44 | cust_nation, 45 | l_year 46 | LIMIT 1; -------------------------------------------------------------------------------- /dss/templates/8.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R National Market Share Query (Q8) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | o_year, 9 | sum(case 10 | when nation = ':1' then volume 11 | else 0 12 | end) / sum(volume) as mkt_share 13 | from 14 | ( 15 | select 16 | extract(year from o_orderdate) as o_year, 17 | l_extendedprice * (1 - l_discount) as volume, 18 | n2.n_name as nation 19 | from 20 | part, 21 | supplier, 22 | lineitem, 23 | orders, 24 | customer, 25 | nation n1, 26 | nation n2, 27 | region 28 | where 29 | p_partkey = l_partkey 30 | and s_suppkey = l_suppkey 31 | and l_orderkey = o_orderkey 32 | and o_custkey = c_custkey 33 | and c_nationkey = n1.n_nationkey 34 | and n1.n_regionkey = r_regionkey 35 | and r_name = ':2' 36 | and s_nationkey = n2.n_nationkey 37 | and o_orderdate between date '1995-01-01' and date '1996-12-31' 38 | and p_type = ':3' 39 | ) as all_nations 40 | group by 41 | o_year 42 | order by 43 | o_year 44 | LIMIT 1; -------------------------------------------------------------------------------- /dss/templates/9.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Product Type Profit Measure Query (Q9) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | :x 6 | :o 7 | select 8 | nation, 9 | o_year, 10 | sum(amount) as sum_profit 11 | from 12 | ( 13 | select 14 | n_name as nation, 15 | extract(year from o_orderdate) as o_year, 16 | l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount 17 | from 18 | part, 19 | supplier, 20 | lineitem, 21 | partsupp, 22 | orders, 23 | nation 24 | where 25 | s_suppkey = l_suppkey 26 | and ps_suppkey = l_suppkey 27 | and ps_partkey = l_partkey 28 | and p_partkey = l_partkey 29 | and o_orderkey = l_orderkey 30 | and s_nationkey = n_nationkey 31 | and p_name like '%:1%' 32 | ) as profit 33 | group by 34 | nation, 35 | o_year 36 | order by 37 | nation, 38 | o_year desc 39 | LIMIT 1; -------------------------------------------------------------------------------- /dss/tpch-alter.sql: -------------------------------------------------------------------------------- 1 | 2 | -- foreign keys 3 | 4 | ALTER TABLE SUPPLIER ADD FOREIGN KEY (S_NATIONKEY) REFERENCES NATION(N_NATIONKEY); 5 | 6 | ALTER TABLE PARTSUPP ADD FOREIGN KEY (PS_PARTKEY) REFERENCES PART(P_PARTKEY); 7 | ALTER TABLE PARTSUPP ADD FOREIGN KEY (PS_SUPPKEY) REFERENCES SUPPLIER(S_SUPPKEY); 8 | 9 | ALTER TABLE CUSTOMER ADD FOREIGN KEY (C_NATIONKEY) REFERENCES NATION(N_NATIONKEY); 10 | 11 | ALTER TABLE ORDERS ADD FOREIGN KEY (O_CUSTKEY) REFERENCES CUSTOMER(C_CUSTKEY); 12 | 13 | ALTER TABLE LINEITEM ADD FOREIGN KEY (L_ORDERKEY) REFERENCES ORDERS(O_ORDERKEY); 14 | ALTER TABLE LINEITEM ADD FOREIGN KEY (L_PARTKEY,L_SUPPKEY) REFERENCES PARTSUPP(PS_PARTKEY,PS_SUPPKEY); 15 | 16 | ALTER TABLE NATION ADD FOREIGN KEY (N_REGIONKEY) REFERENCES REGION(R_REGIONKEY); 17 | -------------------------------------------------------------------------------- /dss/tpch-create.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE PART ( 2 | 3 | P_PARTKEY SERIAL, 4 | P_NAME VARCHAR(55), 5 | P_MFGR CHAR(25), 6 | P_BRAND CHAR(10), 7 | P_TYPE VARCHAR(25), 8 | P_SIZE INTEGER, 9 | P_CONTAINER CHAR(10), 10 | P_RETAILPRICE DECIMAL, 11 | P_COMMENT VARCHAR(23) 12 | ); 13 | 14 | CREATE TABLE SUPPLIER ( 15 | S_SUPPKEY SERIAL, 16 | S_NAME CHAR(25), 17 | S_ADDRESS VARCHAR(40), 18 | S_NATIONKEY INTEGER NOT NULL, -- references N_NATIONKEY 19 | S_PHONE CHAR(15), 20 | S_ACCTBAL DECIMAL, 21 | S_COMMENT VARCHAR(101) 22 | ); 23 | 24 | CREATE TABLE PARTSUPP ( 25 | PS_PARTKEY INTEGER NOT NULL, -- references P_PARTKEY 26 | PS_SUPPKEY INTEGER NOT NULL, -- references S_SUPPKEY 27 | PS_AVAILQTY INTEGER, 28 | PS_SUPPLYCOST DECIMAL, 29 | PS_COMMENT VARCHAR(199) 30 | ); 31 | 32 | CREATE TABLE CUSTOMER ( 33 | C_CUSTKEY SERIAL, 34 | C_NAME VARCHAR(25), 35 | C_ADDRESS VARCHAR(40), 36 | C_NATIONKEY INTEGER NOT NULL, -- references N_NATIONKEY 37 | C_PHONE CHAR(15), 38 | C_ACCTBAL DECIMAL, 39 | C_MKTSEGMENT CHAR(10), 40 | C_COMMENT VARCHAR(117) 41 | ); 42 | 43 | CREATE TABLE ORDERS ( 44 | O_ORDERKEY SERIAL, 45 | O_CUSTKEY INTEGER NOT NULL, -- references C_CUSTKEY 46 | O_ORDERSTATUS CHAR(1), 47 | O_TOTALPRICE DECIMAL, 48 | O_ORDERDATE DATE, 49 | O_ORDERPRIORITY CHAR(15), 50 | O_CLERK CHAR(15), 51 | O_SHIPPRIORITY INTEGER, 52 | O_COMMENT VARCHAR(79) 53 | ); 54 | 55 | CREATE TABLE LINEITEM ( 56 | L_ORDERKEY INTEGER NOT NULL, -- references O_ORDERKEY 57 | L_PARTKEY INTEGER NOT NULL, -- references P_PARTKEY (compound fk to PARTSUPP) 58 | L_SUPPKEY INTEGER NOT NULL, -- references S_SUPPKEY (compound fk to PARTSUPP) 59 | L_LINENUMBER INTEGER, 60 | L_QUANTITY DECIMAL, 61 | L_EXTENDEDPRICE DECIMAL, 62 | L_DISCOUNT DECIMAL, 63 | L_TAX DECIMAL, 64 | L_RETURNFLAG CHAR(1), 65 | L_LINESTATUS CHAR(1), 66 | L_SHIPDATE DATE, 67 | L_COMMITDATE DATE, 68 | L_RECEIPTDATE DATE, 69 | L_SHIPINSTRUCT CHAR(25), 70 | L_SHIPMODE CHAR(10), 71 | L_COMMENT VARCHAR(44) 72 | ); 73 | 74 | CREATE TABLE NATION ( 75 | N_NATIONKEY SERIAL, 76 | N_NAME CHAR(25), 77 | N_REGIONKEY INTEGER NOT NULL, -- references R_REGIONKEY 78 | N_COMMENT VARCHAR(152) 79 | ); 80 | 81 | CREATE TABLE REGION ( 82 | R_REGIONKEY SERIAL, 83 | R_NAME CHAR(25), 84 | R_COMMENT VARCHAR(152) 85 | ); 86 | -------------------------------------------------------------------------------- /dss/tpch-index.sql: -------------------------------------------------------------------------------- 1 | 2 | -- indexes on the foreign keys 3 | 4 | CREATE INDEX IDX_SUPPLIER_NATION_KEY ON SUPPLIER (S_NATIONKEY); 5 | 6 | CREATE INDEX IDX_PARTSUPP_PARTKEY ON PARTSUPP (PS_PARTKEY); 7 | CREATE INDEX IDX_PARTSUPP_SUPPKEY ON PARTSUPP (PS_SUPPKEY); 8 | 9 | CREATE INDEX IDX_CUSTOMER_NATIONKEY ON CUSTOMER (C_NATIONKEY); 10 | 11 | CREATE INDEX IDX_ORDERS_CUSTKEY ON ORDERS (O_CUSTKEY); 12 | 13 | CREATE INDEX IDX_LINEITEM_ORDERKEY ON LINEITEM (L_ORDERKEY); 14 | CREATE INDEX IDX_LINEITEM_PART_SUPP ON LINEITEM (L_PARTKEY,L_SUPPKEY); 15 | 16 | CREATE INDEX IDX_NATION_REGIONKEY ON NATION (N_REGIONKEY); 17 | 18 | 19 | -- aditional indexes 20 | 21 | CREATE INDEX IDX_LINEITEM_SHIPDATE ON LINEITEM (L_SHIPDATE, L_DISCOUNT, L_QUANTITY); 22 | 23 | CREATE INDEX IDX_ORDERS_ORDERDATE ON ORDERS (O_ORDERDATE); -------------------------------------------------------------------------------- /dss/tpch-load.sql: -------------------------------------------------------------------------------- 1 | BEGIN; 2 | 3 | CREATE TABLE PART ( 4 | P_PARTKEY SERIAL8, 5 | P_NAME VARCHAR(55), 6 | P_MFGR CHAR(25), 7 | P_BRAND CHAR(10), 8 | P_TYPE VARCHAR(25), 9 | P_SIZE INTEGER, 10 | P_CONTAINER CHAR(10), 11 | P_RETAILPRICE DECIMAL, 12 | P_COMMENT VARCHAR(23) 13 | ) with (APPENDONLY=true,BLOCKSIZE=2097152,ORIENTATION=COLUMN,CHECKSUM=true,OIDS=false) DISTRIBUTED BY (p_partkey); 14 | 15 | COPY part FROM '/tmp/dss-data/part.csv' WITH csv DELIMITER '|'; 16 | 17 | COMMIT; 18 | 19 | BEGIN; 20 | 21 | CREATE TABLE REGION ( 22 | R_REGIONKEY SERIAL8, 23 | R_NAME CHAR(25), 24 | R_COMMENT VARCHAR(152) 25 | ) with (APPENDONLY=true,BLOCKSIZE=2097152,ORIENTATION=COLUMN,CHECKSUM=true,OIDS=false) DISTRIBUTED BY (r_regionkey); 26 | 27 | COPY region FROM '/tmp/dss-data/region.csv' WITH csv DELIMITER '|'; 28 | 29 | COMMIT; 30 | 31 | BEGIN; 32 | 33 | CREATE TABLE NATION ( 34 | N_NATIONKEY SERIAL8, 35 | N_NAME CHAR(25), 36 | N_REGIONKEY BIGINT NOT NULL, -- references R_REGIONKEY 37 | N_COMMENT VARCHAR(152) 38 | ) with (APPENDONLY=true,BLOCKSIZE=2097152,ORIENTATION=COLUMN,CHECKSUM=true,OIDS=false) DISTRIBUTED BY (n_nationkey); 39 | 40 | COPY nation FROM '/tmp/dss-data/nation.csv' WITH csv DELIMITER '|'; 41 | 42 | COMMIT; 43 | 44 | BEGIN; 45 | 46 | CREATE TABLE SUPPLIER ( 47 | S_SUPPKEY SERIAL8, 48 | S_NAME CHAR(25), 49 | S_ADDRESS VARCHAR(40), 50 | S_NATIONKEY BIGINT NOT NULL, -- references N_NATIONKEY 51 | S_PHONE CHAR(15), 52 | S_ACCTBAL DECIMAL, 53 | S_COMMENT VARCHAR(101) 54 | ) with (APPENDONLY=true,BLOCKSIZE=2097152,ORIENTATION=COLUMN,CHECKSUM=true,OIDS=false) DISTRIBUTED BY (s_suppkey); 55 | 56 | COPY supplier FROM '/tmp/dss-data/supplier.csv' WITH csv DELIMITER '|'; 57 | 58 | COMMIT; 59 | 60 | BEGIN; 61 | 62 | CREATE TABLE CUSTOMER ( 63 | C_CUSTKEY SERIAL8, 64 | C_NAME VARCHAR(25), 65 | C_ADDRESS VARCHAR(40), 66 | C_NATIONKEY BIGINT NOT NULL, -- references N_NATIONKEY 67 | C_PHONE CHAR(15), 68 | C_ACCTBAL DECIMAL, 69 | C_MKTSEGMENT CHAR(10), 70 | C_COMMENT VARCHAR(117) 71 | ) with (APPENDONLY=true,BLOCKSIZE=2097152,ORIENTATION=COLUMN,CHECKSUM=true,OIDS=false) DISTRIBUTED BY (c_custkey); 72 | 73 | COPY customer FROM '/tmp/dss-data/customer.csv' WITH csv DELIMITER '|'; 74 | 75 | COMMIT; 76 | 77 | BEGIN; 78 | 79 | CREATE TABLE PARTSUPP ( 80 | PS_PARTKEY BIGINT NOT NULL, -- references P_PARTKEY 81 | PS_SUPPKEY BIGINT NOT NULL, -- references S_SUPPKEY 82 | PS_AVAILQTY INTEGER, 83 | PS_SUPPLYCOST DECIMAL, 84 | PS_COMMENT VARCHAR(199) 85 | ) with (APPENDONLY=true,BLOCKSIZE=2097152,ORIENTATION=COLUMN,CHECKSUM=true,OIDS=false) DISTRIBUTED BY (ps_partkey,ps_suppkey); 86 | 87 | COPY partsupp FROM '/tmp/dss-data/partsupp.csv' WITH csv DELIMITER '|'; 88 | 89 | COMMIT; 90 | 91 | BEGIN; 92 | 93 | CREATE TABLE ORDERS ( 94 | O_ORDERKEY SERIAL8, 95 | O_CUSTKEY BIGINT NOT NULL, -- references C_CUSTKEY 96 | O_ORDERSTATUS CHAR(1), 97 | O_TOTALPRICE DECIMAL, 98 | O_ORDERDATE DATE, 99 | O_ORDERPRIORITY CHAR(15), 100 | O_CLERK CHAR(15), 101 | O_SHIPPRIORITY INTEGER, 102 | O_COMMENT VARCHAR(79) 103 | ) with (APPENDONLY=true,BLOCKSIZE=2097152,ORIENTATION=COLUMN,CHECKSUM=true,OIDS=false) DISTRIBUTED BY (o_orderkey); 104 | 105 | COPY orders FROM '/tmp/dss-data/orders.csv' WITH csv DELIMITER '|'; 106 | 107 | COMMIT; 108 | 109 | BEGIN; 110 | 111 | CREATE TABLE LINEITEM ( 112 | L_ORDERKEY BIGINT NOT NULL, -- references O_ORDERKEY 113 | L_PARTKEY BIGINT NOT NULL, -- references P_PARTKEY (compound fk to PARTSUPP) 114 | L_SUPPKEY BIGINT NOT NULL, -- references S_SUPPKEY (compound fk to PARTSUPP) 115 | L_LINENUMBER INTEGER, 116 | L_QUANTITY DECIMAL, 117 | L_EXTENDEDPRICE DECIMAL, 118 | L_DISCOUNT DECIMAL, 119 | L_TAX DECIMAL, 120 | L_RETURNFLAG CHAR(1), 121 | L_LINESTATUS CHAR(1), 122 | L_SHIPDATE DATE, 123 | L_COMMITDATE DATE, 124 | L_RECEIPTDATE DATE, 125 | L_SHIPINSTRUCT CHAR(25), 126 | L_SHIPMODE CHAR(10), 127 | L_COMMENT VARCHAR(44) 128 | ) with (APPENDONLY=true,BLOCKSIZE=2097152,ORIENTATION=COLUMN,CHECKSUM=true,OIDS=false) DISTRIBUTED BY (l_orderkey, l_linenumber); 129 | 130 | COPY lineitem FROM '/tmp/dss-data/lineitem.csv' WITH csv DELIMITER '|'; 131 | 132 | COMMIT; 133 | -------------------------------------------------------------------------------- /dss/tpch-load_pg.sql: -------------------------------------------------------------------------------- 1 | BEGIN; 2 | 3 | CREATE TABLE PART ( 4 | 5 | P_PARTKEY SERIAL, 6 | P_NAME VARCHAR(55), 7 | P_MFGR CHAR(25), 8 | P_BRAND CHAR(10), 9 | P_TYPE VARCHAR(25), 10 | P_SIZE INTEGER, 11 | P_CONTAINER CHAR(10), 12 | P_RETAILPRICE DECIMAL, 13 | P_COMMENT VARCHAR(23) 14 | ); 15 | 16 | COPY part FROM '/tmp/dss-data/part.csv' WITH csv DELIMITER '|'; 17 | 18 | COMMIT; 19 | 20 | BEGIN; 21 | 22 | CREATE TABLE REGION ( 23 | R_REGIONKEY SERIAL, 24 | R_NAME CHAR(25), 25 | R_COMMENT VARCHAR(152) 26 | ); 27 | 28 | COPY region FROM '/tmp/dss-data/region.csv' WITH (FORMAT csv, DELIMITER '|'); 29 | 30 | COMMIT; 31 | 32 | BEGIN; 33 | 34 | CREATE TABLE NATION ( 35 | N_NATIONKEY SERIAL, 36 | N_NAME CHAR(25), 37 | N_REGIONKEY BIGINT NOT NULL, -- references R_REGIONKEY 38 | N_COMMENT VARCHAR(152) 39 | ); 40 | 41 | COPY nation FROM '/tmp/dss-data/nation.csv' WITH (FORMAT csv, DELIMITER '|'); 42 | 43 | COMMIT; 44 | 45 | BEGIN; 46 | 47 | CREATE TABLE SUPPLIER ( 48 | S_SUPPKEY SERIAL, 49 | S_NAME CHAR(25), 50 | S_ADDRESS VARCHAR(40), 51 | S_NATIONKEY BIGINT NOT NULL, -- references N_NATIONKEY 52 | S_PHONE CHAR(15), 53 | S_ACCTBAL DECIMAL, 54 | S_COMMENT VARCHAR(101) 55 | ); 56 | 57 | COPY supplier FROM '/tmp/dss-data/supplier.csv' WITH (FORMAT csv, DELIMITER '|'); 58 | 59 | COMMIT; 60 | 61 | BEGIN; 62 | 63 | CREATE TABLE CUSTOMER ( 64 | C_CUSTKEY SERIAL, 65 | C_NAME VARCHAR(25), 66 | C_ADDRESS VARCHAR(40), 67 | C_NATIONKEY BIGINT NOT NULL, -- references N_NATIONKEY 68 | C_PHONE CHAR(15), 69 | C_ACCTBAL DECIMAL, 70 | C_MKTSEGMENT CHAR(10), 71 | C_COMMENT VARCHAR(117) 72 | ); 73 | 74 | COPY customer FROM '/tmp/dss-data/customer.csv' WITH (FORMAT csv, DELIMITER '|'); 75 | 76 | COMMIT; 77 | 78 | BEGIN; 79 | 80 | CREATE TABLE PARTSUPP ( 81 | PS_PARTKEY BIGINT NOT NULL, -- references P_PARTKEY 82 | PS_SUPPKEY BIGINT NOT NULL, -- references S_SUPPKEY 83 | PS_AVAILQTY INTEGER, 84 | PS_SUPPLYCOST DECIMAL, 85 | PS_COMMENT VARCHAR(199) 86 | ); 87 | 88 | COPY partsupp FROM '/tmp/dss-data/partsupp.csv' WITH (FORMAT csv, DELIMITER '|'); 89 | 90 | COMMIT; 91 | 92 | BEGIN; 93 | 94 | CREATE TABLE ORDERS ( 95 | O_ORDERKEY SERIAL, 96 | O_CUSTKEY BIGINT NOT NULL, -- references C_CUSTKEY 97 | O_ORDERSTATUS CHAR(1), 98 | O_TOTALPRICE DECIMAL, 99 | O_ORDERDATE DATE, 100 | O_ORDERPRIORITY CHAR(15), 101 | O_CLERK CHAR(15), 102 | O_SHIPPRIORITY INTEGER, 103 | O_COMMENT VARCHAR(79) 104 | ); 105 | 106 | COPY orders FROM '/tmp/dss-data/orders.csv' WITH (FORMAT csv, DELIMITER '|'); 107 | 108 | COMMIT; 109 | 110 | BEGIN; 111 | 112 | CREATE TABLE LINEITEM ( 113 | L_ORDERKEY BIGINT NOT NULL, -- references O_ORDERKEY 114 | L_PARTKEY BIGINT NOT NULL, -- references P_PARTKEY (compound fk to PARTSUPP) 115 | L_SUPPKEY BIGINT NOT NULL, -- references S_SUPPKEY (compound fk to PARTSUPP) 116 | L_LINENUMBER INTEGER, 117 | L_QUANTITY DECIMAL, 118 | L_EXTENDEDPRICE DECIMAL, 119 | L_DISCOUNT DECIMAL, 120 | L_TAX DECIMAL, 121 | L_RETURNFLAG CHAR(1), 122 | L_LINESTATUS CHAR(1), 123 | L_SHIPDATE DATE, 124 | L_COMMITDATE DATE, 125 | L_RECEIPTDATE DATE, 126 | L_SHIPINSTRUCT CHAR(25), 127 | L_SHIPMODE CHAR(10), 128 | L_COMMENT VARCHAR(44) 129 | ); 130 | 131 | COPY lineitem FROM '/tmp/dss-data/lineitem.csv' WITH (FORMAT csv, DELIMITER '|'); 132 | 133 | COMMIT; 134 | -------------------------------------------------------------------------------- /dss/tpch-pkeys.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE PART ADD PRIMARY KEY (P_PARTKEY); 2 | ALTER TABLE SUPPLIER ADD PRIMARY KEY (S_SUPPKEY); 3 | ALTER TABLE PARTSUPP ADD PRIMARY KEY (PS_PARTKEY, PS_SUPPKEY); 4 | ALTER TABLE CUSTOMER ADD PRIMARY KEY (C_CUSTKEY); 5 | ALTER TABLE ORDERS ADD PRIMARY KEY (O_ORDERKEY); 6 | ALTER TABLE LINEITEM ADD PRIMARY KEY (L_ORDERKEY, L_LINENUMBER); 7 | ALTER TABLE NATION ADD PRIMARY KEY (N_NATIONKEY); 8 | ALTER TABLE REGION ADD PRIMARY KEY (R_REGIONKEY); -------------------------------------------------------------------------------- /process.php: -------------------------------------------------------------------------------- 1 | 0) { 121 | 122 | $tmp = explode('=', $line); 123 | $qn = intval($tmp[0]); /* query id */ 124 | 125 | $queries[$qn]['duration'] = floatval($tmp[1]); 126 | $queries[$qn]['hash'] = get_plan_hash("$dir/explain/$qn"); 127 | 128 | } 129 | } 130 | 131 | return $queries; 132 | 133 | } 134 | 135 | /* loads tpc-h results from the directory */ 136 | /* param $dir - directory with benchmark results */ 137 | function load_tpch($dir) { 138 | $out = array(); 139 | $out['stats'] = load_stats($dir); 140 | $out['queries'] = load_queries($dir); 141 | return $out; 142 | } 143 | 144 | function score_eval($current, $min, $max = QUERY_TIMEOUT) { 145 | 146 | // cancelled queries should get 0 147 | if ($current >= $max) { 148 | return 0; 149 | } 150 | 151 | // otherwise use the inverse (always "current > min") 152 | return $min/$current; 153 | 154 | } 155 | 156 | function time_eval($current, $max = QUERY_TIMEOUT) { 157 | 158 | return min(floatval($current), $max); 159 | 160 | } 161 | 162 | /* loads bench.log and appends it to the results */ 163 | /* param $data - benchmark data */ 164 | /* param $logfile - logfile to read data from */ 165 | function parse_log(&$data, $logfile) { 166 | 167 | $log = file($logfile); 168 | 169 | $t = 0; 170 | 171 | $ddir = ''; 172 | $idir = ''; 173 | 174 | for ($i = 0; $i < count($log); $i++) { 175 | $matches = array(); 176 | if (preg_match('/[0-9][0-9]:[0-9][0-9]:[0-9][0-9] \[([0-9]+)\] : preparing TPC-H database/', $log[$i], $matches)) { 177 | $t = intval($matches[1]); 178 | } else if (preg_match('/[0-9][0-9]:[0-9][0-9]:[0-9][0-9] \[([0-9]+)\] : loading data/', $log[$i], $matches)) { 179 | $t = intval($matches[1]); 180 | } else if (preg_match('/[0-9][0-9]:[0-9][0-9]:[0-9][0-9] \[([0-9]+)\] : creating primary keys/', $log[$i], $matches)) { 181 | $data['stats']['load'] = intval($matches[1]) - $t; 182 | $t = intval($matches[1]); 183 | } else if (preg_match('/[0-9][0-9]:[0-9][0-9]:[0-9][0-9] \[([0-9]+)\] : creating foreign keys/', $log[$i], $matches)) { 184 | $data['stats']['pkeys'] = intval($matches[1]) - $t; 185 | $t = intval($matches[1]); 186 | } else if (preg_match('/[0-9][0-9]:[0-9][0-9]:[0-9][0-9] \[([0-9]+)\] : creating indexes/', $log[$i], $matches)) { 187 | $data['stats']['fkeys'] = intval($matches[1]) - $t; 188 | $t = intval($matches[1]); 189 | } else if (preg_match('/[0-9][0-9]:[0-9][0-9]:[0-9][0-9] \[([0-9]+)\] : analyzing/', $log[$i], $matches)) { 190 | $data['stats']['indexes'] = intval($matches[1]) - $t; 191 | $t = intval($matches[1]); 192 | } else if (preg_match('/[0-9][0-9]:[0-9][0-9]:[0-9][0-9] \[([0-9]+)\] : running TPC-H benchmark/', $log[$i], $matches)) { 193 | $data['stats']['analyze'] = intval($matches[1]) - $t; 194 | $t = intval($matches[1]); 195 | } else if (preg_match('/[0-9][0-9]:[0-9][0-9]:[0-9][0-9] \[([0-9]+)\] : finished TPC-H benchmark/', $log[$i], $matches)) { 196 | $data['stats']['benchmark'] = intval($matches[1]) - $t; 197 | $t = intval($matches[1]); 198 | } 199 | 200 | } 201 | 202 | } 203 | 204 | /* writes benchmark results to the CSV file */ 205 | /* param $data - benchmark data */ 206 | /* param $logfile - logfile to read data from */ 207 | function print_tpch_csv($data, $outfile) { 208 | 209 | $fd = fopen($outfile, "a"); 210 | 211 | fwrite($fd, 'tpch_load;tpch_pkeys;tpch_fkeys;tpch_indexes;tpch_analyze;tpch_total;' . 212 | 'query_1;query_2;query_3;query_4;query_5;query_6;query_7;query_8;query_9;query_10;query_11;query_12;query_13;' . 213 | 'query_14;query_15;query_16;query_17;query_18;query_19;query_20;query_21;query_22;'. 214 | 'query_1_hash;query_2_hash;query_3_hash;query_4_hash;query_5_hash;query_6_hash;query_7_hash;query_8_hash;query_9_hash;' . 215 | 'query_10_hash;query_11_hash;query_12_hash;query_13_hash;query_14_hash;query_15_hash;query_16_hash;query_17_hash;' . 216 | 'query_18_hash;query_19_hash;query_20_hash;query_21_hash;query_22_hash;db_cache_hit_ratio' . "\n"); 217 | 218 | $line = '%.2f;%.2f;%.2f;%.2f;%.2f;%.2f;' . 219 | '%.2f;%.2f;%.2f;%.2f;%.2f;%.2f;%.2f;%.2f;%.2f;%.2f;%.2f;%.2f;%.2f;' . 220 | '%.2f;%.2f;%.2f;%.2f;%.2f;%.2f;%.2f;%.2f;%.2f;'. 221 | '%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%.2f'; 222 | 223 | fwrite($fd, sprintf($line, 224 | 225 | // tpc-h 226 | $data['stats']['load'], 227 | $data['stats']['pkeys'], 228 | $data['stats']['fkeys'], 229 | $data['stats']['indexes'], 230 | $data['stats']['analyze'], 231 | $data['stats']['benchmark'], 232 | 233 | ($data['queries'][1]['duration'] < QUERY_TIMEOUT) ? $data['queries'][1]['duration'] : null, 234 | ($data['queries'][2]['duration'] < QUERY_TIMEOUT) ? $data['queries'][2]['duration'] : null, 235 | ($data['queries'][3]['duration'] < QUERY_TIMEOUT) ? $data['queries'][3]['duration'] : null, 236 | ($data['queries'][4]['duration'] < QUERY_TIMEOUT) ? $data['queries'][4]['duration'] : null, 237 | ($data['queries'][5]['duration'] < QUERY_TIMEOUT) ? $data['queries'][5]['duration'] : null, 238 | ($data['queries'][6]['duration'] < QUERY_TIMEOUT) ? $data['queries'][6]['duration'] : null, 239 | ($data['queries'][7]['duration'] < QUERY_TIMEOUT) ? $data['queries'][7]['duration'] : null, 240 | ($data['queries'][8]['duration'] < QUERY_TIMEOUT) ? $data['queries'][8]['duration'] : null, 241 | ($data['queries'][9]['duration'] < QUERY_TIMEOUT) ? $data['queries'][9]['duration'] : null, 242 | ($data['queries'][10]['duration'] < QUERY_TIMEOUT) ? $data['queries'][10]['duration'] : null, 243 | ($data['queries'][11]['duration'] < QUERY_TIMEOUT) ? $data['queries'][11]['duration'] : null, 244 | ($data['queries'][12]['duration'] < QUERY_TIMEOUT) ? $data['queries'][12]['duration'] : null, 245 | ($data['queries'][13]['duration'] < QUERY_TIMEOUT) ? $data['queries'][13]['duration'] : null, 246 | ($data['queries'][14]['duration'] < QUERY_TIMEOUT) ? $data['queries'][14]['duration'] : null, 247 | ($data['queries'][15]['duration'] < QUERY_TIMEOUT) ? $data['queries'][15]['duration'] : null, 248 | ($data['queries'][16]['duration'] < QUERY_TIMEOUT) ? $data['queries'][16]['duration'] : null, 249 | ($data['queries'][17]['duration'] < QUERY_TIMEOUT) ? $data['queries'][17]['duration'] : null, 250 | ($data['queries'][18]['duration'] < QUERY_TIMEOUT) ? $data['queries'][18]['duration'] : null, 251 | ($data['queries'][19]['duration'] < QUERY_TIMEOUT) ? $data['queries'][19]['duration'] : null, 252 | ($data['queries'][20]['duration'] < QUERY_TIMEOUT) ? $data['queries'][20]['duration'] : null, 253 | ($data['queries'][21]['duration'] < QUERY_TIMEOUT) ? $data['queries'][21]['duration'] : null, 254 | ($data['queries'][22]['duration'] < QUERY_TIMEOUT) ? $data['queries'][22]['duration'] : null, 255 | 256 | $data['queries'][1]['hash'], 257 | $data['queries'][2]['hash'], 258 | $data['queries'][3]['hash'], 259 | $data['queries'][4]['hash'], 260 | $data['queries'][5]['hash'], 261 | $data['queries'][6]['hash'], 262 | $data['queries'][7]['hash'], 263 | $data['queries'][8]['hash'], 264 | $data['queries'][9]['hash'], 265 | $data['queries'][10]['hash'], 266 | $data['queries'][11]['hash'], 267 | $data['queries'][12]['hash'], 268 | $data['queries'][13]['hash'], 269 | $data['queries'][14]['hash'], 270 | $data['queries'][15]['hash'], 271 | $data['queries'][16]['hash'], 272 | $data['queries'][17]['hash'], 273 | $data['queries'][18]['hash'], 274 | $data['queries'][19]['hash'], 275 | $data['queries'][20]['hash'], 276 | $data['queries'][21]['hash'], 277 | $data['queries'][22]['hash'], 278 | $data['stats']['hit_ratio'] 279 | 280 | ) . ";\n"); 281 | 282 | fclose($fd); 283 | 284 | } 285 | 286 | function load_checkpoints($logfile) { 287 | 288 | $log = file($logfile); 289 | $checkpoints = array(); 290 | 291 | foreach ($log AS $row) { 292 | $row = trim($row); 293 | 294 | if (preg_match('/^([0-9]{4})-([0-9]{2})-([0-9]{2}) ([0-9]{2}):([0-9]{2}):([0-9]{2}).([0-9]{3}) [A-Z]+ [0-9]+ :[a-z0-9]+\.[a-z0-9]+ LOG: checkpoint starting: (.*)$/', $row, $matches)) { 295 | $checkpoints[] = array('start' => mktime($matches[4], $matches[5], $matches[6], $matches[2], $matches[3], $matches[1]) + TIME_DIFF, 'cause' => $matches[8]); 296 | } else if (preg_match('/^([0-9]{4})-([0-9]{2})-([0-9]{2}) ([0-9]{2}):([0-9]{2}):([0-9]{2}).([0-9]{3}) [A-Z]+ [0-9]+ :[a-z0-9]+\.[a-z0-9]+ LOG: checkpoint complete: wrote ([0-9]+) buffers/', $row, $matches)) { 297 | $checkpoints[count($checkpoints)-1]['end'] = mktime($matches[4], $matches[5], $matches[6], $matches[2], $matches[3], $matches[1]) + TIME_DIFF; 298 | $checkpoints[count($checkpoints)-1]['buffers'] = $matches[8]; 299 | } 300 | } 301 | 302 | return $checkpoints; 303 | 304 | } 305 | 306 | /* returns a hash (used to compare multiple plans) */ 307 | function get_plan_hash($file) { 308 | 309 | $plan = file($file); 310 | 311 | $tmp = ''; 312 | foreach ($plan AS $line) { 313 | $line = preg_replace('/[0-9]/', '', $line); 314 | $line = preg_replace('/^\s+/', '', $line); 315 | $line = preg_replace('/\s+$/', '', $line); 316 | $tmp .= $line; 317 | } 318 | 319 | return md5($tmp); 320 | 321 | } 322 | 323 | ?> -------------------------------------------------------------------------------- /tpch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | RESULTS=$1 4 | DBNAME=$2 5 | USER=$3 6 | 7 | # delay between stats collections (iostat, vmstat, ...) 8 | DELAY=15 9 | 10 | # DSS queries timeout (5 minutes or something like that) 11 | DSS_TIMEOUT=300000 # 5 minutes in seconds 12 | 13 | # log 14 | LOGFILE=bench.log 15 | 16 | function benchmark_run() { 17 | 18 | mkdir -p $RESULTS 19 | 20 | # store the settings 21 | psql -h localhost postgres -c "select name,setting from pg_settings" > $RESULTS/settings.log 2> $RESULTS/settings.err 22 | 23 | print_log "preparing TPC-H database" 24 | 25 | # create database, populate it with data and set up foreign keys 26 | # psql -h localhost tpch < dss/tpch-create.sql > $RESULTS/create.log 2> $RESULTS/create.err 27 | 28 | print_log " loading data" 29 | 30 | psql -h localhost -U $USER $DBNAME < dss/tpch-load.sql > $RESULTS/load.log 2> $RESULTS/load.err 31 | 32 | print_log " creating primary keys" 33 | 34 | psql -h localhost -U $USER $DBNAME < dss/tpch-pkeys.sql > $RESULTS/pkeys.log 2> $RESULTS/pkeys.err 35 | 36 | print_log " creating foreign keys" 37 | 38 | psql -h localhost -U $USER $DBNAME < dss/tpch-alter.sql > $RESULTS/alter.log 2> $RESULTS/alter.err 39 | 40 | print_log " creating indexes" 41 | 42 | psql -h localhost -U $USER $DBNAME < dss/tpch-index.sql > $RESULTS/index.log 2> $RESULTS/index.err 43 | 44 | print_log " analyzing" 45 | 46 | psql -h localhost -U $USER $DBNAME -c "analyze" > $RESULTS/analyze.log 2> $RESULTS/analyze.err 47 | 48 | print_log "running TPC-H benchmark" 49 | 50 | benchmark_dss $RESULTS 51 | 52 | print_log "finished TPC-H benchmark" 53 | 54 | } 55 | 56 | function benchmark_dss() { 57 | 58 | mkdir -p $RESULTS 59 | 60 | mkdir $RESULTS/vmstat-s $RESULTS/vmstat-d $RESULTS/explain $RESULTS/results $RESULTS/errors 61 | 62 | # get bgwriter stats 63 | psql postgres -c "SELECT * FROM pg_stat_bgwriter" > $RESULTS/stats-before.log 2>> $RESULTS/stats-before.err 64 | psql postgres -c "SELECT * FROM pg_stat_database WHERE datname = '$DBNAME'" >> $RESULTS/stats-before.log 2>> $RESULTS/stats-before.err 65 | 66 | vmstat -s > $RESULTS/vmstat-s-before.log 2>&1 67 | vmstat -d > $RESULTS/vmstat-d-before.log 2>&1 68 | 69 | print_log "running queries defined in TPC-H benchmark" 70 | 71 | for n in `seq 1 22` 72 | do 73 | 74 | q="dss/queries/$n.sql" 75 | qe="dss/queries/$n.explain.sql" 76 | 77 | if [ -f "$q" ]; then 78 | 79 | print_log " running query $n" 80 | 81 | echo "======= query $n =======" >> $RESULTS/data.log 2>&1; 82 | 83 | # run explain 84 | psql -h localhost -U $USER $DBNAME < $qe > $RESULTS/explain/$n 2>> $RESULTS/explain.err 85 | 86 | vmstat -s > $RESULTS/vmstat-s/before-$n.log 2>&1 87 | vmstat -d > $RESULTS/vmstat-d/before-$n.log 2>&1 88 | 89 | # run the query on background 90 | /usr/bin/time -a -f "$n = %e" -o $RESULTS/results.log psql -h localhost -U $USER $DBNAME < $q > $RESULTS/results/$n 2> $RESULTS/errors/$n & 91 | 92 | # wait up to the given number of seconds, then terminate the query if still running (don't wait for too long) 93 | for i in `seq 0 $DSS_TIMEOUT` 94 | do 95 | 96 | # the query is still running - check the time 97 | if [ -d "/proc/$!" ]; then 98 | 99 | # the time is over, kill it with fire! 100 | if [ $i -eq $DSS_TIMEOUT ]; then 101 | 102 | print_log " killing query $n (timeout)" 103 | 104 | # echo "$q : timeout" >> $RESULTS/results.log 105 | psql -h localhost postgres -c "SELECT pg_terminate_backend(procpid) FROM pg_stat_activity WHERE datname = 'tpch'" >> $RESULTS/queries.err 2>&1; 106 | 107 | # time to do a cleanup 108 | sleep 10; 109 | 110 | # just check how many backends are there (should be 0) 111 | psql -h localhost postgres -c "SELECT COUNT(*) AS tpch_backends FROM pg_stat_activity WHERE datname = 'tpch'" >> $RESULTS/queries.err 2>&1; 112 | 113 | else 114 | # the query is still running and we have time left, sleep another second 115 | sleep 1; 116 | fi; 117 | 118 | else 119 | 120 | # the query finished in time, do not wait anymore 121 | print_log " query $n finished OK ($i seconds)" 122 | break; 123 | 124 | fi; 125 | 126 | done; 127 | 128 | vmstat -s > $RESULTS/vmstat-s/after-$n.log 2>&1 129 | vmstat -d > $RESULTS/vmstat-d/after-$n.log 2>&1 130 | 131 | fi; 132 | 133 | done; 134 | 135 | # collect stats again 136 | psql postgres -c "SELECT * FROM pg_stat_bgwriter" > $RESULTS/stats-after.log 2>> $RESULTS/stats-after.err 137 | psql postgres -c "SELECT * FROM pg_stat_database WHERE datname = '$DBNAME'" >> $RESULTS/stats-after.log 2>> $RESULTS/stats-after.err 138 | 139 | vmstat -s > $RESULTS/vmstat-s-after.log 2>&1 140 | vmstat -d > $RESULTS/vmstat-d-after.log 2>&1 141 | 142 | } 143 | 144 | function stat_collection_start() 145 | { 146 | 147 | local RESULTS=$1 148 | 149 | # run some basic monitoring tools (iotop, iostat, vmstat) 150 | for dev in $DEVICES 151 | do 152 | iostat -t -x /dev/$dev $DELAY >> $RESULTS/iostat.$dev.log & 153 | done; 154 | 155 | vmstat $DELAY >> $RESULTS/vmstat.log & 156 | 157 | } 158 | 159 | function stat_collection_stop() 160 | { 161 | 162 | # wait to get a complete log from iostat etc. and then kill them 163 | sleep $DELAY 164 | 165 | for p in `jobs -p`; do 166 | kill $p; 167 | done; 168 | 169 | } 170 | 171 | function print_log() { 172 | 173 | local message=$1 174 | 175 | echo `date +"%Y-%m-%d %H:%M:%S"` "["`date +%s`"] : $message" >> $RESULTS/$LOGFILE; 176 | 177 | } 178 | 179 | mkdir $RESULTS; 180 | 181 | # start statistics collection 182 | stat_collection_start $RESULTS 183 | 184 | # run the benchmark 185 | benchmark_run $RESULTS $DBNAME $USER 186 | 187 | # stop statistics collection 188 | stat_collection_stop 189 | --------------------------------------------------------------------------------