├── load.sql ├── README.md ├── tables.sql ├── explain.sql └── explain-analyze.sql /load.sql: -------------------------------------------------------------------------------- 1 | -- 2 | -- load.sql 3 | -- Data load for SSB Tables. 4 | -- (Data format: dbgen output, (https://github.com/electrum/ssb-dbgen/) 5 | -- 6 | TRUNCATE customer, date, part, supplier, lineorder; 7 | COPY customer FROM '/tmp/customer.tbl'DELIMITER '|'; 8 | COPY date FROM '/tmp/date.tbl' DELIMITER '|'; 9 | COPY part FROM '/tmp/part.tbl' DELIMITER '|'; 10 | COPY supplier FROM '/tmp/supplier.tbl' DELIMITER '|'; 11 | COPY lineorder FROM '/tmp/lineorder.tbl' DELIMITER '|'; 12 | 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ssb-postgres 2 | スタースキーマベンチマーク(SSB)をPostgreSQL上で試すためのSQLスクリプト群。 3 | 4 | # Description 5 | このスクリプト群は、https://www.cs.umb.edu/~poneil/StarSchemaB.PDF の記載内容を元に、 6 | PostgreSQL上でスタースキーマベンチマークを実行するためのSQLスクリプトである。 7 | 現在、以下の4つのファイルを準備している。 8 | 9 | * テーブル定義(tables.sql) 10 | * データロード(load.sql) 11 | * EXPLAIN実行(explain-analyze.sql) 12 | * EXPLAIN ANALYZE実行(explain.sql) 13 | 14 | このリポジトリには、データ生成のスクリプトは含まれていない。 15 | データ生成用のツールは、ssb-gen(https://github.com/electrum/ssb-dbgen/)のリポジトリから 16 | ソースをダウンロードしてmakeする必要がある。 17 | 18 | # Example 19 | PostgreSQL 11.2を用いて、データベースを生成してから、SSBのクエリを実行するまでの例を示す。 20 | 21 | * ``createdb``コマンドでデータベースを生成する。 22 | 23 | ``` 24 | $ createdb ssb 25 | ``` 26 | 27 | * SSB用のテーブルを定義する。 28 | 29 | ``` 30 | $ psql ssb -f tables.sql 31 | ``` 32 | * ssb-dbgen ツールは事前にリポジトリのクローンあるいはDLしてビルドしておき、``dbgen``コマンドが動作するようにしておく。 33 | * ssb-dbgen ツールを使って任意の大きさ(Scale Facotr)のデータを生成する。この例では、Scale Factor=1のデータを生成する。 34 | 35 | ``` 36 | $ ./dbgen -s 1 -T a 37 | ``` 38 | 39 | * 上記の例の場合、``dbgen``コマンドはカレントディレクトリに以下の5つのファイルを生成する。 40 | 41 | ``` 42 | $ ls -1 *.tbl 43 | customer.tbl 44 | date.tbl 45 | head-customer.tbl 46 | head-lineorder.tbl 47 | lineorder.tbl 48 | part.tbl 49 | supplier.tbl 50 | $ 51 | ``` 52 | 53 | * 生成されたデータファイルを、``/tmp``ディレクトリに移動するか、``ln -s``コマンドによって``/tmp``からのシンボリックリンクを生成する。 54 | * ``psql``コマンドを用いて、生成したデータをロードする。 55 | 56 | 57 | `` 58 | $ psql ssb -f load.sql 59 | `` 60 | 61 | * データロードが終わった後で、必要に応じて、``VACUUM``, ``ANALYZE``等のコマンドを実行する。 62 | * SSBのクエリ(Q1~$4)の実行計画を取得する場合は、explain.sql を用いる。 63 | * SSBのクエリ(Q1~$4)の実行時間を取得する場合は、explain-analyze.sql を用いる。 64 | 65 | 66 | # TODO 67 | 68 | * orderlineを日付レンジでパーティション化することが有効化検証するための、テーブル定義の作成。 69 | * pg_hint_planが有効なケースがあった場合、そのHINT句コメントを付与した、EXPLAIN実行スクリプトの作成。 70 | 71 | # Author 72 | 73 | @nuko_yokohama (ぬこ@横浜) 74 | 75 | -------------------------------------------------------------------------------- /tables.sql: -------------------------------------------------------------------------------- 1 | -- 2 | -- SSB DDL for PostgreSQL(Simple Version) 3 | -- 4 | -- Author: @nuko_yokohama 5 | -- 6 | -- Description 7 | -- This script file registers the five tables and indexes specified in SSB in PostgreSQL. 8 | -- This script file is verified with PostgreSQL 11. 9 | -- 10 | -- Reference documents 11 | -- Star Schema Benchmark Revision 3, June 5, 2009 12 | -- (https://www.cs.umb.edu/~poneil/StarSchemaB.PDF) 13 | -- 14 | -- Implementation concept 15 | -- * "ssb-dbgen" data generater based. 16 | -- * Table names and column names are in lowercase. 17 | -- * Integer values map to integer types. 18 | -- * Fixed Text data is CHAR(n). 19 | -- * Variable Length Text data is TEXT. 20 | 21 | DROP TABLE IF EXISTS lineorder, date, part, supplier, customer CASCADE; 22 | -- 23 | -- customer 24 | -- 25 | CREATE TABLE customer ( 26 | c_custkey INTEGER PRIMARY KEY, 27 | c_name TEXT, 28 | c_address TEXT, 29 | c_city CHAR(10), 30 | c_nation CHAR(15), 31 | c_region CHAR(12), 32 | c_phone CHAR(15), 33 | c_mktsegment CHAR(10), 34 | dummy TEXT -- dbgen last delimiter 35 | ); 36 | 37 | -- 38 | -- date 39 | -- 40 | CREATE TABLE date ( 41 | d_datekey DATE PRIMARY KEY, 42 | d_date CHAR(18), 43 | d_dayofweek CHAR(9), 44 | d_month CHAR(9), 45 | d_year INTEGER, 46 | d_yearmonthnum INTEGER, 47 | d_yearmonth CHAR(7), 48 | d_daynuminweek INTEGER, 49 | d_daynuminmonth INTEGER, 50 | d_daynuminyear INTEGER, 51 | d_monthnuminyear INTEGER, 52 | d_weeknuminyear INTEGER, 53 | d_sellingseason TEXT, 54 | d_lastdayinweekfl CHAR(1), 55 | d_lastdayinmonthfl CHAR(1), 56 | d_holidayfl CHAR(1), 57 | d_weekdayfl CHAR(1), 58 | dummy TEXT -- dbgen last delimiter 59 | ); 60 | 61 | -- 62 | -- part 63 | -- 64 | CREATE TABLE part ( 65 | p_partkey INTEGER PRIMARY KEY, 66 | p_name TEXT, 67 | p_mfgr CHAR(6), 68 | p_category CHAR(7), 69 | p_brand1 CHAR(9), 70 | p_color CHAR(11), 71 | p_type TEXT, 72 | p_size INTEGER, 73 | p_container CHAR(10), 74 | dummy TEXT -- dbgen last delimiter 75 | ); 76 | 77 | -- 78 | -- supplier 79 | -- 80 | CREATE TABLE supplier ( 81 | s_suppkey INTEGER PRIMARY KEY, 82 | s_name CHAR(25), 83 | s_address TEXT, 84 | s_city CHAR(10), 85 | s_nation CHAR(15), 86 | s_region CHAR(12), 87 | s_phone CHAR(15), 88 | dummy TEXT -- dbgen last delimiter 89 | ); 90 | 91 | -- 92 | -- lineorder 93 | -- 94 | CREATE TABLE lineorder ( 95 | lo_orderkey BIGINT, -- Consider SF 300+ 96 | lo_linenumber INTEGER, 97 | lo_custkey INTEGER, -- FK to C_CUSTKEY 98 | lo_partkey INTEGER, -- FK to P_PARTKEY 99 | lo_suppkey INTEGER, -- FK to S_SUPPKEY 100 | lo_orderdate DATE, -- FK to D_DATEKEY 101 | lo_orderpriority CHAR(15), 102 | lo_shippriority CHAR(1), 103 | lo_quantity INTEGER, 104 | lo_extendedprice NUMERIC, 105 | lo_ordtotalprice NUMERIC, 106 | lo_discount NUMERIC, 107 | lo_revenue NUMERIC, 108 | lo_supplycost NUMERIC, 109 | lo_tax NUMERIC, 110 | lo_commitdate DATE, -- FK to D_DATEKEY 111 | lo_shipmod CHAR(10), 112 | dummy TEXT, -- dbgen last delimiter 113 | CONSTRAINT lo_pkey PRIMARY KEY(lo_orderkey, lo_linenumber), 114 | FOREIGN KEY (lo_custkey) REFERENCES customer (c_custkey), 115 | FOREIGN KEY (lo_partkey) REFERENCES part (p_partkey), 116 | FOREIGN KEY (lo_suppkey) REFERENCES supplier (s_suppkey), 117 | FOREIGN KEY (lo_orderdate) REFERENCES date (d_datekey) 118 | ); 119 | 120 | 121 | -------------------------------------------------------------------------------- /explain.sql: -------------------------------------------------------------------------------- 1 | -- 2 | -- explain.sql 3 | -- Q1-Q4 query with EXPLAIN 4 | -- Verified PostgreSQL 11.2 5 | -- 6 | 7 | -- Q1.1 8 | EXPLAIN VERBOSE 9 | select sum(lo_extendedprice*lo_discount) as revenue 10 | from lineorder, date 11 | where lo_orderdate = d_datekey 12 | and d_year = 1993 13 | and lo_discount between 1 and 3 14 | and lo_quantity < 25; 15 | 16 | -- Q1.2 17 | EXPLAIN VERBOSE 18 | select sum(lo_extendedprice*lo_discount) as revenue 19 | from lineorder, date 20 | where lo_orderdate = d_datekey 21 | and d_yearmonthnum = 199401 22 | and lo_discount between 4 and 6 23 | and lo_quantity between 26 and 35 24 | ; 25 | 26 | -- Q1.3 27 | EXPLAIN VERBOSE 28 | select sum(lo_extendedprice*lo_discount) as revenue 29 | from lineorder, date 30 | where lo_orderdate = d_datekey 31 | and d_weeknuminyear = 6 32 | and d_year = 1994 33 | and lo_discount between 5 and 7 34 | and lo_quantity between 26 and 35 35 | ; 36 | 37 | -- Q2.1 38 | EXPLAIN VERBOSE 39 | select sum(lo_revenue), d_year, p_brand1 40 | from lineorder, date, part, supplier 41 | where lo_orderdate = d_datekey 42 | and lo_partkey = p_partkey 43 | and lo_suppkey = s_suppkey 44 | and p_category = 'MFGR#12' 45 | and s_region = 'AMERICA' 46 | group by d_year, p_brand1 47 | order by d_year, p_brand1 48 | ; 49 | 50 | -- Q2.1 51 | EXPLAIN VERBOSE 52 | select sum(lo_revenue), d_year, p_brand1 53 | from lineorder, date, part, supplier 54 | where lo_orderdate = d_datekey 55 | and lo_partkey = p_partkey 56 | and lo_suppkey = s_suppkey 57 | and p_brand1 between 'MFGR#2221' and 'MFGR#2228' 58 | and s_region = 'ASIA' 59 | group by d_year, p_brand1 60 | order by d_year, p_brand1 61 | ; 62 | 63 | -- Q2.3 64 | EXPLAIN VERBOSE 65 | select sum(lo_revenue), d_year, p_brand1 66 | from lineorder, date, part, supplier 67 | where lo_orderdate = d_datekey 68 | and lo_partkey = p_partkey 69 | and lo_suppkey = s_suppkey 70 | and p_brand1 = 'MFGR#2221' 71 | and s_region = 'EUROPE' 72 | group by d_year, p_brand1 73 | order by d_year, p_brand1 74 | ; 75 | 76 | -- Q3.1 77 | EXPLAIN VERBOSE 78 | select c_nation, s_nation, d_year, sum(lo_revenue) as revenue 79 | from customer, lineorder, supplier, date 80 | where lo_custkey = c_custkey 81 | and lo_suppkey = s_suppkey 82 | and lo_orderdate = d_datekey 83 | and c_region = 'ASIA' 84 | and s_region = 'ASIA' 85 | and d_year >= 1992 86 | and d_year <= 1997 87 | group by c_nation, s_nation, d_year 88 | order by d_year asc, revenue desc 89 | ; 90 | 91 | -- Q3.2 92 | EXPLAIN VERBOSE 93 | select c_city, s_city, d_year, sum(lo_revenue) as revenue 94 | from customer, lineorder, supplier, date 95 | where lo_custkey = c_custkey 96 | and lo_suppkey = s_suppkey 97 | and lo_orderdate = d_datekey 98 | and c_nation = 'UNITED STATES' 99 | and s_nation = 'UNITED STATES' 100 | and d_year >= 1992 101 | and d_year <= 1997 102 | group by c_city, s_city, d_year 103 | order by d_year asc, revenue desc 104 | ; 105 | 106 | -- Q3.3 107 | EXPLAIN VERBOSE 108 | select c_city, s_city, d_year, sum(lo_revenue) as revenue 109 | from customer, lineorder, supplier, date 110 | where lo_custkey = c_custkey 111 | and lo_suppkey = s_suppkey 112 | and lo_orderdate = d_datekey 113 | and (c_city='UNITED KI1' or c_city='UNITED KI5') 114 | and (s_city='UNITED KI1' or s_city='UNITED KI5') 115 | and d_year >= 1992 116 | and d_year <= 1997 117 | group by c_city, s_city, d_year 118 | order by d_year asc, revenue desc 119 | ; 120 | 121 | -- Q3.4 122 | EXPLAIN VERBOSE 123 | select c_city, s_city, d_year, sum(lo_revenue) as revenue 124 | from customer, lineorder, supplier, date 125 | where lo_custkey = c_custkey 126 | and lo_suppkey = s_suppkey 127 | and lo_orderdate = d_datekey 128 | and (c_city='UNITED KI1' or c_city='UNITED KI5') 129 | and (s_city='UNITED KI1' or s_city='UNITED KI5') 130 | and d_yearmonth = 'Dec1997' 131 | group by c_city, s_city, d_year 132 | order by d_year asc, revenue desc 133 | ; 134 | 135 | -- Q4.1 136 | EXPLAIN VERBOSE 137 | select d_year, c_nation, sum(lo_revenue - lo_supplycost) as profit 138 | from date, customer, supplier, part, lineorder 139 | where lo_custkey = c_custkey 140 | and lo_suppkey = s_suppkey 141 | and lo_partkey = p_partkey 142 | and lo_orderdate = d_datekey 143 | and c_region = 'AMERICA' 144 | and s_region = 'AMERICA' 145 | and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') 146 | group by d_year, c_nation 147 | order by d_year, c_nation 148 | ; 149 | 150 | -- Q4.2 151 | EXPLAIN VERBOSE 152 | select d_year, s_nation, p_category, sum(lo_revenue - lo_supplycost) as profit 153 | from date, customer, supplier, part, lineorder 154 | where lo_custkey = c_custkey 155 | and lo_suppkey = s_suppkey 156 | and lo_partkey = p_partkey 157 | and lo_orderdate = d_datekey 158 | and c_region = 'AMERICA' 159 | and s_region = 'AMERICA' 160 | and (d_year = 1997 or d_year = 1998) 161 | and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') 162 | group by d_year, s_nation, p_category 163 | order by d_year, s_nation, p_category 164 | ; 165 | 166 | -- Q4.3 167 | EXPLAIN VERBOSE 168 | select d_year, s_city, p_brand1, sum(lo_revenue - lo_supplycost) as profit 169 | from date, customer, supplier, part, lineorder 170 | where lo_custkey = c_custkey 171 | and lo_suppkey = s_suppkey 172 | and lo_partkey = p_partkey 173 | and lo_orderdate = d_datekey 174 | and c_region = 'AMERICA' 175 | and s_nation = 'UNITED STATES' 176 | and (d_year = 1997 or d_year = 1998) 177 | and p_category = 'MFGR#14' 178 | group by d_year, s_city, p_brand1 179 | order by d_year, s_city, p_brand1 180 | ; 181 | -------------------------------------------------------------------------------- /explain-analyze.sql: -------------------------------------------------------------------------------- 1 | -- 2 | -- explain-analyze.sql 3 | -- Q1-Q4 query with EXPLAIN ANALYZE ANALYZE 4 | -- Verified PostgreSQL 11.2 5 | -- 6 | 7 | -- Q1.1 8 | EXPLAIN ANALYZE VERBOSE 9 | select sum(lo_extendedprice*lo_discount) as revenue 10 | from lineorder, date 11 | where lo_orderdate = d_datekey 12 | and d_year = 1993 13 | and lo_discount between 1 and 3 14 | and lo_quantity < 25; 15 | 16 | -- Q1.2 17 | EXPLAIN ANALYZE VERBOSE 18 | select sum(lo_extendedprice*lo_discount) as revenue 19 | from lineorder, date 20 | where lo_orderdate = d_datekey 21 | and d_yearmonthnum = 199401 22 | and lo_discount between 4 and 6 23 | and lo_quantity between 26 and 35 24 | ; 25 | 26 | -- Q1.3 27 | EXPLAIN ANALYZE VERBOSE 28 | select sum(lo_extendedprice*lo_discount) as revenue 29 | from lineorder, date 30 | where lo_orderdate = d_datekey 31 | and d_weeknuminyear = 6 32 | and d_year = 1994 33 | and lo_discount between 5 and 7 34 | and lo_quantity between 26 and 35 35 | ; 36 | 37 | -- Q2.1 38 | EXPLAIN ANALYZE VERBOSE 39 | select sum(lo_revenue), d_year, p_brand1 40 | from lineorder, date, part, supplier 41 | where lo_orderdate = d_datekey 42 | and lo_partkey = p_partkey 43 | and lo_suppkey = s_suppkey 44 | and p_category = 'MFGR#12' 45 | and s_region = 'AMERICA' 46 | group by d_year, p_brand1 47 | order by d_year, p_brand1 48 | ; 49 | 50 | -- Q2.1 51 | EXPLAIN ANALYZE VERBOSE 52 | select sum(lo_revenue), d_year, p_brand1 53 | from lineorder, date, part, supplier 54 | where lo_orderdate = d_datekey 55 | and lo_partkey = p_partkey 56 | and lo_suppkey = s_suppkey 57 | and p_brand1 between 'MFGR#2221' and 'MFGR#2228' 58 | and s_region = 'ASIA' 59 | group by d_year, p_brand1 60 | order by d_year, p_brand1 61 | ; 62 | 63 | -- Q2.3 64 | EXPLAIN ANALYZE VERBOSE 65 | select sum(lo_revenue), d_year, p_brand1 66 | from lineorder, date, part, supplier 67 | where lo_orderdate = d_datekey 68 | and lo_partkey = p_partkey 69 | and lo_suppkey = s_suppkey 70 | and p_brand1 = 'MFGR#2221' 71 | and s_region = 'EUROPE' 72 | group by d_year, p_brand1 73 | order by d_year, p_brand1 74 | ; 75 | 76 | -- Q3.1 77 | EXPLAIN ANALYZE VERBOSE 78 | select c_nation, s_nation, d_year, sum(lo_revenue) as revenue 79 | from customer, lineorder, supplier, date 80 | where lo_custkey = c_custkey 81 | and lo_suppkey = s_suppkey 82 | and lo_orderdate = d_datekey 83 | and c_region = 'ASIA' 84 | and s_region = 'ASIA' 85 | and d_year >= 1992 86 | and d_year <= 1997 87 | group by c_nation, s_nation, d_year 88 | order by d_year asc, revenue desc 89 | ; 90 | 91 | -- Q3.2 92 | EXPLAIN ANALYZE VERBOSE 93 | select c_city, s_city, d_year, sum(lo_revenue) as revenue 94 | from customer, lineorder, supplier, date 95 | where lo_custkey = c_custkey 96 | and lo_suppkey = s_suppkey 97 | and lo_orderdate = d_datekey 98 | and c_nation = 'UNITED STATES' 99 | and s_nation = 'UNITED STATES' 100 | and d_year >= 1992 101 | and d_year <= 1997 102 | group by c_city, s_city, d_year 103 | order by d_year asc, revenue desc 104 | ; 105 | 106 | -- Q3.3 107 | EXPLAIN ANALYZE VERBOSE 108 | select c_city, s_city, d_year, sum(lo_revenue) as revenue 109 | from customer, lineorder, supplier, date 110 | where lo_custkey = c_custkey 111 | and lo_suppkey = s_suppkey 112 | and lo_orderdate = d_datekey 113 | and (c_city='UNITED KI1' or c_city='UNITED KI5') 114 | and (s_city='UNITED KI1' or s_city='UNITED KI5') 115 | and d_year >= 1992 116 | and d_year <= 1997 117 | group by c_city, s_city, d_year 118 | order by d_year asc, revenue desc 119 | ; 120 | 121 | -- Q3.4 122 | EXPLAIN ANALYZE VERBOSE 123 | select c_city, s_city, d_year, sum(lo_revenue) as revenue 124 | from customer, lineorder, supplier, date 125 | where lo_custkey = c_custkey 126 | and lo_suppkey = s_suppkey 127 | and lo_orderdate = d_datekey 128 | and (c_city='UNITED KI1' or c_city='UNITED KI5') 129 | and (s_city='UNITED KI1' or s_city='UNITED KI5') 130 | and d_yearmonth = 'Dec1997' 131 | group by c_city, s_city, d_year 132 | order by d_year asc, revenue desc 133 | ; 134 | 135 | -- Q4.1 136 | EXPLAIN ANALYZE VERBOSE 137 | select d_year, c_nation, sum(lo_revenue - lo_supplycost) as profit 138 | from date, customer, supplier, part, lineorder 139 | where lo_custkey = c_custkey 140 | and lo_suppkey = s_suppkey 141 | and lo_partkey = p_partkey 142 | and lo_orderdate = d_datekey 143 | and c_region = 'AMERICA' 144 | and s_region = 'AMERICA' 145 | and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') 146 | group by d_year, c_nation 147 | order by d_year, c_nation 148 | ; 149 | 150 | -- Q4.2 151 | EXPLAIN ANALYZE VERBOSE 152 | select d_year, s_nation, p_category, sum(lo_revenue - lo_supplycost) as profit 153 | from date, customer, supplier, part, lineorder 154 | where lo_custkey = c_custkey 155 | and lo_suppkey = s_suppkey 156 | and lo_partkey = p_partkey 157 | and lo_orderdate = d_datekey 158 | and c_region = 'AMERICA' 159 | and s_region = 'AMERICA' 160 | and (d_year = 1997 or d_year = 1998) 161 | and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') 162 | group by d_year, s_nation, p_category 163 | order by d_year, s_nation, p_category 164 | ; 165 | 166 | -- Q4.3 167 | EXPLAIN ANALYZE VERBOSE 168 | select d_year, s_city, p_brand1, sum(lo_revenue - lo_supplycost) as profit 169 | from date, customer, supplier, part, lineorder 170 | where lo_custkey = c_custkey 171 | and lo_suppkey = s_suppkey 172 | and lo_partkey = p_partkey 173 | and lo_orderdate = d_datekey 174 | and c_region = 'AMERICA' 175 | and s_nation = 'UNITED STATES' 176 | and (d_year = 1997 or d_year = 1998) 177 | and p_category = 'MFGR#14' 178 | group by d_year, s_city, p_brand1 179 | order by d_year, s_city, p_brand1 180 | ; 181 | --------------------------------------------------------------------------------