├── doc ├── overview.png ├── evaluation_job_query.png ├── evaluation_job_total.png ├── evaluation_overview.png ├── online_offline_mode.png ├── evaluation_tpcds_1g_query.png ├── evaluation_tpcds_1g_total.png ├── evaluation_tpch_1g_query.png ├── evaluation_tpch_1g_total.png ├── evaluation_web3bench_query.png ├── evaluation_web3bench_total.png ├── evaluation_web3_customer_query.png └── evaluation_web3_customer_total.png ├── examples ├── workload_export_output │ ├── output │ │ ├── ddl.sql │ │ ├── q4.txt │ │ ├── q10.txt │ │ ├── q2.txt │ │ ├── q6.txt │ │ ├── q8.txt │ │ └── summary.txt │ ├── queries.sql │ ├── schema.sql │ └── stats │ │ ├── test1_t.json │ │ ├── test_t.json │ │ ├── test_ta.json │ │ └── test1_tb.json ├── job │ ├── queries │ │ ├── 3b.sql │ │ ├── 2a.sql │ │ ├── 2b.sql │ │ ├── 2c.sql │ │ ├── 2d.sql │ │ ├── 3a.sql │ │ ├── 32a.sql │ │ ├── 3c.sql │ │ ├── 32b.sql │ │ ├── 4a.sql │ │ ├── 4b.sql │ │ ├── 4c.sql │ │ ├── 6a.sql │ │ ├── 6c.sql │ │ ├── 6e.sql │ │ ├── 8c.sql │ │ ├── 6f.sql │ │ ├── 5b.sql │ │ ├── 17f.sql │ │ ├── 17d.sql │ │ ├── 17e.sql │ │ ├── 8d.sql │ │ ├── 10c.sql │ │ ├── 17b.sql │ │ ├── 17c.sql │ │ ├── 6b.sql │ │ ├── 6d.sql │ │ ├── 10b.sql │ │ ├── 1d.sql │ │ ├── 5a.sql │ │ ├── 5c.sql │ │ ├── 17a.sql │ │ ├── 1b.sql │ │ ├── 10a.sql │ │ ├── 1c.sql │ │ ├── 1a.sql │ │ ├── 16b.sql │ │ ├── 16c.sql │ │ ├── 8a.sql │ │ ├── 16a.sql │ │ ├── 16d.sql │ │ ├── 18a.sql │ │ ├── 9d.sql │ │ ├── 7b.sql │ │ ├── 13a.sql │ │ ├── 13d.sql │ │ ├── 18c.sql │ │ ├── 9c.sql │ │ ├── 11d.sql │ │ ├── 15d.sql │ │ ├── 7a.sql │ │ ├── 11a.sql │ │ ├── 11b.sql │ │ ├── 9b.sql │ │ ├── 12a.sql │ │ ├── 8b.sql │ │ ├── 12b.sql │ │ ├── 12c.sql │ │ ├── 13c.sql │ │ ├── 18b.sql │ │ ├── 13b.sql │ │ ├── 9a.sql │ │ ├── 11c.sql │ │ ├── 14a.sql │ │ ├── 15a.sql │ │ ├── 19d.sql │ │ ├── 15c.sql │ │ ├── 7c.sql │ │ ├── 14c.sql │ │ ├── 15b.sql │ │ ├── 14b.sql │ │ ├── 20a.sql │ │ ├── 20c.sql │ │ ├── 21b.sql │ │ ├── 20b.sql │ │ ├── 19c.sql │ │ ├── 25a.sql │ │ ├── 21a.sql │ │ ├── 19b.sql │ │ ├── 21c.sql │ │ ├── 23a.sql │ │ ├── 23b.sql │ │ ├── 25b.sql │ │ ├── 25c.sql │ │ ├── 19a.sql │ │ ├── 23c.sql │ │ ├── 26b.sql │ │ ├── 22a.sql │ │ ├── 22b.sql │ │ ├── 22d.sql │ │ ├── 26c.sql │ │ ├── 22c.sql │ │ ├── 26a.sql │ │ ├── 24a.sql │ │ ├── 31a.sql │ │ ├── 31c.sql │ │ ├── 27b.sql │ │ ├── 27a.sql │ │ ├── 33b.sql │ │ ├── 24b.sql │ │ ├── 27c.sql │ │ ├── 33a.sql │ │ ├── 30c.sql │ │ ├── 30a.sql │ │ ├── 33c.sql │ │ ├── 31b.sql │ │ ├── 30b.sql │ │ ├── 28b.sql │ │ ├── 28c.sql │ │ ├── 28a.sql │ │ ├── 29b.sql │ │ ├── 29c.sql │ │ └── 29a.sql │ ├── output │ │ ├── ddl.sql │ │ └── summary.txt │ └── stats │ │ ├── comp_cast_type.json │ │ ├── company_type.json │ │ ├── kind_type.json │ │ ├── role_type.json │ │ └── link_type.json ├── tpch_example1 │ ├── queries │ │ ├── q6.sql │ │ ├── q17.sql │ │ ├── q14.sql │ │ ├── q4.sql │ │ ├── q13.sql │ │ ├── q3.sql │ │ ├── q5.sql │ │ ├── q11.sql │ │ ├── q16.sql │ │ ├── q18.sql │ │ ├── q10.sql │ │ ├── q1.sql │ │ ├── q12.sql │ │ ├── q9.sql │ │ ├── q2.sql │ │ ├── q21.sql │ │ ├── q20.sql │ │ ├── q22.sql │ │ ├── q8.sql │ │ ├── q7.sql │ │ └── q19.sql │ ├── output │ │ ├── ddl.sql │ │ ├── summary.txt │ │ ├── q6.txt │ │ └── q13.txt │ └── stats │ │ └── tidb_stats_by_table_1684995617.json ├── web3bench │ ├── output │ │ ├── ddl.sql │ │ └── summary.txt │ └── queries.sql ├── tpcds │ └── stats │ │ └── dbgen_version.json └── tpch_example2 │ └── stats │ └── tidb_stats_by_table_1684995617.json ├── .gitignore ├── utils ├── tidb_test.go ├── logger.go ├── utils_test.go ├── utils.go └── tidb.go ├── advisor ├── workload_info_compression_test.go ├── workload_info_compression.go ├── index_selection_auto_admin_test.go ├── index_advisor.go ├── column_selection_simple_test.go └── utils.go ├── main.go ├── optimizer ├── tidb_what_if_optimizer_test.go └── what_if_optimizer.go ├── cmd_usage.sh ├── cmd └── precheck.go └── go.mod /doc/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qw4990/index_advisor/HEAD/doc/overview.png -------------------------------------------------------------------------------- /doc/evaluation_job_query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qw4990/index_advisor/HEAD/doc/evaluation_job_query.png -------------------------------------------------------------------------------- /doc/evaluation_job_total.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qw4990/index_advisor/HEAD/doc/evaluation_job_total.png -------------------------------------------------------------------------------- /doc/evaluation_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qw4990/index_advisor/HEAD/doc/evaluation_overview.png -------------------------------------------------------------------------------- /doc/online_offline_mode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qw4990/index_advisor/HEAD/doc/online_offline_mode.png -------------------------------------------------------------------------------- /examples/workload_export_output/output/ddl.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX idx_a ON test.t (a); 2 | CREATE INDEX idx_a ON test1.t (a) -------------------------------------------------------------------------------- /doc/evaluation_tpcds_1g_query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qw4990/index_advisor/HEAD/doc/evaluation_tpcds_1g_query.png -------------------------------------------------------------------------------- /doc/evaluation_tpcds_1g_total.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qw4990/index_advisor/HEAD/doc/evaluation_tpcds_1g_total.png -------------------------------------------------------------------------------- /doc/evaluation_tpch_1g_query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qw4990/index_advisor/HEAD/doc/evaluation_tpch_1g_query.png -------------------------------------------------------------------------------- /doc/evaluation_tpch_1g_total.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qw4990/index_advisor/HEAD/doc/evaluation_tpch_1g_total.png -------------------------------------------------------------------------------- /doc/evaluation_web3bench_query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qw4990/index_advisor/HEAD/doc/evaluation_web3bench_query.png -------------------------------------------------------------------------------- /doc/evaluation_web3bench_total.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qw4990/index_advisor/HEAD/doc/evaluation_web3bench_total.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | advise-result 3 | exec-workload-result 4 | index_advisor 5 | ./index_advisor 6 | vendor 7 | data 8 | result 9 | -------------------------------------------------------------------------------- /doc/evaluation_web3_customer_query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qw4990/index_advisor/HEAD/doc/evaluation_web3_customer_query.png -------------------------------------------------------------------------------- /doc/evaluation_web3_customer_total.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qw4990/index_advisor/HEAD/doc/evaluation_web3_customer_total.png -------------------------------------------------------------------------------- /examples/job/queries/3b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword like '%sequel%' AND mi.info IN ('Bulgaria') AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /examples/workload_export_output/queries.sql: -------------------------------------------------------------------------------- 1 | use test; 2 | select * from t where a<1; 3 | 4 | use test; 5 | select * from t where a=1; 6 | 7 | use test; 8 | select * from ta where a=1 and b >1; 9 | 10 | use test1; 11 | select * from tb where c=1 and a<10; 12 | 13 | use test1; 14 | select a from t where a =1; 15 | 16 | -------------------------------------------------------------------------------- /examples/job/queries/2a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[de]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/2b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[nl]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/2c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[sm]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/2d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/3a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword like '%sequel%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year > 2005 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /utils/tidb_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestStartTiDB(t *testing.T) { 8 | s, err := StartLocalTiDBServer("") 9 | if err != nil { 10 | panic(err) 11 | } 12 | if !PingLocalTiDB(s.DSN()) { 13 | panic("failed to ping TiDB") 14 | } 15 | s.Release() 16 | if PingLocalTiDB(s.DSN()) { 17 | panic("TiDB should be killed") 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /examples/job/queries/32a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(lt.link) AS link_type, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM keyword AS k, link_type AS lt, movie_keyword AS mk, movie_link AS ml, title AS t1, title AS t2 WHERE k.keyword ='10,000-mile-club' AND mk.keyword_id = k.id AND t1.id = mk.movie_id AND ml.movie_id = t1.id AND ml.linked_movie_id = t2.id AND lt.id = ml.link_type_id AND mk.movie_id = t1.id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/3c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword like '%sequel%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND t.production_year > 1990 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/32b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(lt.link) AS link_type, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM keyword AS k, link_type AS lt, movie_keyword AS mk, movie_link AS ml, title AS t1, title AS t2 WHERE k.keyword ='character-name-in-title' AND mk.keyword_id = k.id AND t1.id = mk.movie_id AND ml.movie_id = t1.id AND ml.linked_movie_id = t2.id AND lt.id = ml.link_type_id AND mk.movie_id = t1.id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/4a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword like '%sequel%' AND mi_idx.info > '5.0' AND t.production_year > 2005 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/4b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword like '%sequel%' AND mi_idx.info > '9.0' AND t.production_year > 2010 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/4c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword like '%sequel%' AND mi_idx.info > '2.0' AND t.production_year > 1990 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/6a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2010 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/6c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2014 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/6e.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/8c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(a1.name) AS writer_pseudo_name, MIN(t.title) AS movie_title FROM aka_name AS a1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE cn.country_code ='[us]' AND rt.role ='writer' AND a1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND a1.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q6.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Forecasting Revenue Change Query (Q6) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | sum(l_extendedprice * l_discount) as revenue 9 | from 10 | lineitem 11 | where 12 | l_shipdate >= date '1993-01-01' 13 | and l_shipdate < date '1993-01-01' + interval '1' year 14 | and l_discount between 0.07 - 0.01 and 0.07 + 0.01 15 | and l_quantity < 25; -------------------------------------------------------------------------------- /examples/job/queries/6f.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/5b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS american_vhs_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind = 'production companies' AND mc.note like '%(VHS)%' and mc.note like '%(USA)%' and mc.note like '%(1994)%' AND mi.info IN ('USA', 'America') AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/17f.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE '%B%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/17d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE '%Bert%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/17e.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/8d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an1.name) AS costume_designer_pseudo, MIN(t.title) AS movie_with_costumes FROM aka_name AS an1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE cn.country_code ='[us]' AND rt.role ='costume designer' AND an1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an1.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/10c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS ch, MIN(t.title) AS movie_with_american_producer FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note like '%(producer)%' AND cn.country_code = '[us]' AND t.production_year > 1990 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/17b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE 'Z%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/17c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE 'X%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/6b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2014 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/6d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/output/ddl.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX idx_l_partkey_l_quantity_l_shipmode ON tpch.lineitem (l_partkey, l_quantity, l_shipmode); 2 | CREATE INDEX idx_l_partkey_l_shipdate_l_shipmode ON tpch.lineitem (l_partkey, l_shipdate, l_shipmode); 3 | CREATE INDEX idx_l_suppkey_l_shipdate ON tpch.lineitem (l_suppkey, l_shipdate); 4 | CREATE INDEX idx_o_custkey_o_orderdate_o_totalprice ON tpch.orders (o_custkey, o_orderdate, o_totalprice); 5 | CREATE INDEX idx_ps_suppkey_ps_supplycost ON tpch.partsupp (ps_suppkey, ps_supplycost) -------------------------------------------------------------------------------- /examples/job/queries/10b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS ch, MIN(t.title) AS russian_mov_with_actor_producer FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note like '%(producer)%' AND cn.country_code = '[ru]' AND rt.role = 'actor' AND t.production_year > 2010 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/1d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'bottom 10 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' AND t.production_year >2000 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/5a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS typical_european_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind = 'production companies' AND mc.note like '%(theatrical)%' and mc.note like '%(France)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year > 2005 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/5c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS american_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind = 'production companies' AND mc.note not like '%(TV)%' and mc.note like '%(USA)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND t.production_year > 1990 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/17a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_american_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND n.name LIKE 'B%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/1b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'bottom 10 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' AND t.production_year between 2005 and 2010 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/10a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS uncredited_voiced_character, MIN(t.title) AS russian_movie FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note like '%(voice)%' and ci.note like '%(uncredited)%' AND cn.country_code = '[ru]' AND rt.role = 'actor' AND t.production_year > 2005 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/1c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'top 250 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' and (mc.note like '%(co-production)%') AND t.production_year >2010 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/1a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'top 250 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' and (mc.note like '%(co-production)%' or mc.note like '%(presents)%') AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q17.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Small-Quantity-Order Revenue Query (Q17) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | sum(l_extendedprice) / 7.0 as avg_yearly 9 | from 10 | lineitem, 11 | part 12 | where 13 | p_partkey = l_partkey 14 | and p_brand = 'Brand#12' 15 | and p_container = 'SM BAG' 16 | and l_quantity < ( 17 | select 18 | 0.2 * avg(l_quantity) 19 | from 20 | lineitem 21 | where 22 | l_partkey = p_partkey 23 | ); -------------------------------------------------------------------------------- /examples/job/queries/16b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /advisor/workload_info_compression_test.go: -------------------------------------------------------------------------------- 1 | package advisor 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/qw4990/index_advisor/utils" 7 | ) 8 | 9 | func TestDigestCompression(t *testing.T) { 10 | s := utils.NewSet[utils.Query]() 11 | s.Add(utils.Query{Text: "select * from t1 where a = 1", Frequency: 1}) 12 | s.Add(utils.Query{Text: "select * from t1 where a = 2", Frequency: 2}) 13 | s.Add(utils.Query{Text: "select * from t1 where a = 3", Frequency: 3}) 14 | cs := compressBySQLDigest(s) 15 | if cs.ToList()[0].Frequency != 1+2+3 { 16 | t.Errorf("expect 6, got %v", cs.ToList()[0].Frequency) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /examples/job/queries/16c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /examples/web3bench/output/ddl.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX idx_block_number ON ethereum.receipts (block_number); 2 | CREATE INDEX idx_from_address_block_number_token_address ON ethereum.token_transfers (from_address, block_number, token_address); 3 | CREATE INDEX idx_token_address ON ethereum.token_transfers (token_address); 4 | CREATE INDEX idx_block_number ON ethereum.transactions (block_number); 5 | CREATE INDEX idx_from_address ON ethereum.transactions (from_address); 6 | CREATE INDEX idx_hash ON ethereum.transactions (hash); 7 | CREATE INDEX idx_to_address_block_timestamp_value ON ethereum.transactions (to_address, block_timestamp, `value`); -------------------------------------------------------------------------------- /examples/job/queries/8a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an1.name) AS actress_pseudonym, MIN(t.title) AS japanese_movie_dubbed FROM aka_name AS an1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE ci.note ='(voice: English version)' AND cn.country_code ='[jp]' AND mc.note like '%(Japan)%' and mc.note not like '%(USA)%' AND n1.name like '%Yo%' and n1.name not like '%Yu%' AND rt.role ='actress' AND an1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an1.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/16a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr >= 50 AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/16d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr >= 5 AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/18a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note in ('(producer)', '(executive producer)') AND it1.info = 'budget' AND it2.info = 'votes' AND n.gender = 'm' and n.name like '%Tim%' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q14.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Promotion Effect Query (Q14) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | 100.00 * sum(case 9 | when p_type like 'PROMO%' 10 | then l_extendedprice * (1 - l_discount) 11 | else 0 12 | end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue 13 | from 14 | lineitem, 15 | part 16 | where 17 | l_partkey = p_partkey 18 | and l_shipdate >= date '1993-04-01' 19 | and l_shipdate < date '1993-04-01' + interval '1' month; -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q4.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Order Priority Checking Query (Q4) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | o_orderpriority, 9 | count(*) as order_count 10 | from 11 | orders 12 | where 13 | o_orderdate >= date '1995-07-01' 14 | and o_orderdate < date '1995-07-01' + interval '3' month 15 | and exists ( 16 | select 17 | * 18 | from 19 | lineitem 20 | where 21 | l_orderkey = o_orderkey 22 | and l_commitdate < l_receiptdate 23 | ) 24 | group by 25 | o_orderpriority 26 | order by 27 | o_orderpriority; -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q13.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Customer Distribution Query (Q13) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | c_count, 9 | count(*) as custdist 10 | from 11 | ( 12 | select 13 | c_custkey, 14 | count(o_orderkey) as c_count 15 | from 16 | customer left outer join orders on 17 | c_custkey = o_custkey 18 | and o_comment not like '%special%packages%' 19 | group by 20 | c_custkey 21 | ) as c_orders 22 | group by 23 | c_count 24 | order by 25 | custdist desc, 26 | c_count desc; -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/qw4990/index_advisor/cmd" 5 | "github.com/spf13/cobra" 6 | ) 7 | 8 | var ( 9 | rootCmd = &cobra.Command{ 10 | Use: "TiDB-index-advisor", 11 | Short: "TiDB index advisor", 12 | Long: `TiDB index advisor recommends you the best indexes for your workload`, 13 | } 14 | ) 15 | 16 | func init() { 17 | cobra.OnInitialize() 18 | rootCmd.AddCommand(cmd.NewAdviseOnlineCmd()) 19 | rootCmd.AddCommand(cmd.NewAdviseOfflineCmd()) 20 | rootCmd.AddCommand(cmd.NewPreCheckCmd()) 21 | rootCmd.AddCommand(cmd.NewEvaluateCmd()) 22 | rootCmd.AddCommand(cmd.NewWorkloadExportCmd()) 23 | } 24 | 25 | func main() { 26 | rootCmd.Execute() 27 | } 28 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q3.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Shipping Priority Query (Q3) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | l_orderkey, 9 | sum(l_extendedprice * (1 - l_discount)) as revenue, 10 | o_orderdate, 11 | o_shippriority 12 | from 13 | customer, 14 | orders, 15 | lineitem 16 | where 17 | c_mktsegment = 'FURNITURE' 18 | and c_custkey = o_custkey 19 | and l_orderkey = o_orderkey 20 | and o_orderdate < date '1995-03-17' 21 | and l_shipdate > date '1995-03-17' 22 | group by 23 | l_orderkey, 24 | o_orderdate, 25 | o_shippriority 26 | order by 27 | revenue desc, 28 | o_orderdate; -------------------------------------------------------------------------------- /examples/job/queries/9d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND n.gender ='f' AND rt.role ='actress' AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/7b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS of_person, MIN(t.title) AS biography_movie FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name LIKE '%a%' AND it.info ='mini biography' AND lt.link ='features' AND n.name_pcode_cf LIKE 'D%' AND n.gender='m' AND pi.note ='Volker Boehm' AND t.production_year BETWEEN 1980 AND 1984 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/13a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(miidx.info) AS rating, MIN(t.title) AS german_movie FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[de]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/13d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/18c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/9c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_character_name, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/11d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(mc.note) AS production_note, MIN(t.title) AS movie_based_on_book FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND ct.kind != 'production companies' and ct.kind is not NULL AND k.keyword in ('sequel', 'revenge', 'based-on-novel') AND mc.note is not NULL AND t.production_year > 1950 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/15d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(at.title) AS aka_title, MIN(t.title) AS internet_movie_title FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' AND it1.info = 'release dates' AND mi.note like '%internet%' AND t.production_year > 1990 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/7a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS of_person, MIN(t.title) AS biography_movie FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name LIKE '%a%' AND it.info ='mini biography' AND lt.link ='features' AND n.name_pcode_cf BETWEEN 'A' AND 'F' AND (n.gender='m' OR (n.gender = 'f' AND n.name LIKE 'B%')) AND pi.note ='Volker Boehm' AND t.production_year BETWEEN 1980 AND 1995 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/11a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(lt.link) AS movie_link_type, MIN(t.title) AS non_polish_sequel_movie FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/11b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(lt.link) AS movie_link_type, MIN(t.title) AS sequel_movie FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follows%' AND mc.note IS NULL AND t.production_year = 1998 and t.title like '%Money%' AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/9b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_character, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note = '(voice)' AND cn.country_code ='[us]' AND mc.note like '%(200%)%' and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND n.gender ='f' and n.name like '%Angel%' AND rt.role ='actress' AND t.production_year between 2007 and 2010 AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/12a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS drama_horror_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code = '[us]' AND ct.kind = 'production companies' AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info in ('Drama', 'Horror') AND mi_idx.info > '8.0' AND t.production_year between 2005 and 2008 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/8b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS acress_pseudonym, MIN(t.title) AS japanese_anime_movie FROM aka_name AS an, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note ='(voice: English version)' AND cn.country_code ='[jp]' AND mc.note like '%(Japan)%' and mc.note not like '%(USA)%' and (mc.note like '%(2006)%' or mc.note like '%(2007)%') AND n.name like '%Yo%' and n.name not like '%Yu%' AND rt.role ='actress' AND t.production_year between 2006 and 2007 and (t.title like 'One Piece%' or t.title like 'Dragon Ball Z%') AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/12b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS budget, MIN(t.title) AS unsuccsessful_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code ='[us]' AND ct.kind is not NULL and (ct.kind ='production companies' or ct.kind = 'distributors') AND it1.info ='budget' AND it2.info ='bottom 10 rank' AND t.production_year >2000 AND (t.title LIKE 'Birdemic%' OR t.title LIKE '%Movie%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q5.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Local Supplier Volume Query (Q5) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | n_name, 9 | sum(l_extendedprice * (1 - l_discount)) as revenue 10 | from 11 | customer, 12 | orders, 13 | lineitem, 14 | supplier, 15 | nation, 16 | region 17 | where 18 | c_custkey = o_custkey 19 | and l_orderkey = o_orderkey 20 | and l_suppkey = s_suppkey 21 | and c_nationkey = s_nationkey 22 | and s_nationkey = n_nationkey 23 | and n_regionkey = r_regionkey 24 | and r_name = 'AMERICA' 25 | and o_orderdate >= date '1993-01-01' 26 | and o_orderdate < date '1993-01-01' + interval '1' year 27 | group by 28 | n_name 29 | order by 30 | revenue desc; -------------------------------------------------------------------------------- /examples/job/queries/12c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS mainstream_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code = '[us]' AND ct.kind = 'production companies' AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info in ('Drama', 'Horror', 'Western', 'Family') AND mi_idx.info > '7.0' AND t.production_year between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/13c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie_about_winning FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND t.title != '' AND (t.title LIKE 'Champion%' OR t.title LIKE 'Loser%') AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/18b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info in ('Horror', 'Thriller') and mi.note is NULL AND mi_idx.info > '8.0' AND n.gender is not null and n.gender = 'f' AND t.production_year between 2008 and 2014 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/13b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie_about_winning FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND t.title != '' AND (t.title LIKE '%Champion%' OR t.title LIKE '%Loser%') AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/9a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS character_name, MIN(t.title) AS movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND mc.note is not NULL and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND n.gender ='f' and n.name like '%Ang%' AND rt.role ='actress' AND t.production_year between 2005 and 2015 AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/11c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(mc.note) AS production_note, MIN(t.title) AS movie_based_on_book FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' and (cn.name like '20th Century Fox%' or cn.name like 'Twentieth Century Fox%') AND ct.kind != 'production companies' and ct.kind is not NULL AND k.keyword in ('sequel', 'revenge', 'based-on-novel') AND mc.note is not NULL AND t.production_year > 1950 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /examples/workload_export_output/output/q4.txt: -------------------------------------------------------------------------------- 1 | Alias: q4 2 | Query: 3 | select * from t where a=1 4 | 5 | Original Cost: 5.49E+01 6 | Optimized Cost: 1.89E+01 7 | Cost Reduction Ratio: 0.34 8 | 9 | 10 | ===================== original plan ===================== 11 | TableReader_7 1.00 54.90 root data:Selection_6 12 | └─Selection_6 1.00 760.20 cop[tikv] eq(test.t.a, 1) 13 | └─TableFullScan_5 3.00 610.50 cop[tikv] table:t keep order:false 14 | 15 | ===================== optimized plan ===================== 16 | IndexReader_6 1.25 18.85 root index:IndexRangeScan_5 17 | └─IndexRangeScan_5 1.25 203.50 cop[tikv] table:t, index:idx_a(a) range:[1,1], keep order:false -------------------------------------------------------------------------------- /examples/workload_export_output/schema.sql: -------------------------------------------------------------------------------- 1 | create database if not exists test; 2 | use test; 3 | CREATE TABLE `t` ( 4 | `a` int(11) DEFAULT NULL 5 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; 6 | 7 | create database if not exists test; 8 | use test; 9 | CREATE TABLE `ta` ( 10 | `a` int(11) DEFAULT NULL, 11 | `b` int(11) DEFAULT NULL 12 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; 13 | 14 | create database if not exists test1; 15 | use test1; 16 | CREATE TABLE `t` ( 17 | `a` int(11) DEFAULT NULL 18 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; 19 | 20 | create database if not exists test1; 21 | use test1; 22 | CREATE TABLE `tb` ( 23 | `a` int(11) DEFAULT NULL, 24 | `c` int(11) DEFAULT NULL 25 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; 26 | 27 | -------------------------------------------------------------------------------- /examples/job/output/ddl.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX idx_movie_id_person_id ON imdbload_no_fk.cast_info (movie_id, person_id); 2 | CREATE INDEX idx_person_id ON imdbload_no_fk.cast_info (person_id); 3 | CREATE INDEX idx_role_id ON imdbload_no_fk.cast_info (role_id); 4 | CREATE INDEX idx_movie_id_company_id_company_type_id ON imdbload_no_fk.movie_companies (movie_id, company_id, company_type_id); 5 | CREATE INDEX idx_info_type_id ON imdbload_no_fk.movie_info (info_type_id); 6 | CREATE INDEX idx_movie_id_info_type_id ON imdbload_no_fk.movie_info (movie_id, info_type_id); 7 | CREATE INDEX idx_movie_id_info_type_id ON imdbload_no_fk.movie_info_idx (movie_id, info_type_id); 8 | CREATE INDEX idx_keyword_id_movie_id ON imdbload_no_fk.movie_keyword (keyword_id, movie_id); 9 | CREATE INDEX idx_movie_id_keyword_id ON imdbload_no_fk.movie_keyword (movie_id, keyword_id) -------------------------------------------------------------------------------- /examples/job/queries/14a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS northern_dark_movie FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind = 'movie' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2010 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/workload_export_output/output/q10.txt: -------------------------------------------------------------------------------- 1 | Alias: q10 2 | Query: 3 | select a from t where a =1 4 | 5 | Original Cost: 1.10E+02 6 | Optimized Cost: 3.77E+01 7 | Cost Reduction Ratio: 0.34 8 | 9 | 10 | ===================== original plan ===================== 11 | TableReader_7 2.00 109.81 root data:Selection_6 12 | └─Selection_6 2.00 1520.40 cop[tikv] eq(test1.t.a, 1) 13 | └─TableFullScan_5 6.00 1221.00 cop[tikv] table:t keep order:false 14 | 15 | ===================== optimized plan ===================== 16 | IndexReader_6 2.50 37.69 root index:IndexRangeScan_5 17 | └─IndexRangeScan_5 2.50 407.00 cop[tikv] table:t, index:idx_a(a) range:[1,1], keep order:false -------------------------------------------------------------------------------- /examples/workload_export_output/output/q2.txt: -------------------------------------------------------------------------------- 1 | Alias: q2 2 | Query: 3 | select * from t where a<1 4 | 5 | Original Cost: 5.49E+01 6 | Optimized Cost: 1.50E+01 7 | Cost Reduction Ratio: 0.27 8 | 9 | 10 | ===================== original plan ===================== 11 | TableReader_7 0.00 54.90 root data:Selection_6 12 | └─Selection_6 0.00 760.20 cop[tikv] lt(test.t.a, 1) 13 | └─TableFullScan_5 3.00 610.50 cop[tikv] table:t keep order:false 14 | 15 | ===================== optimized plan ===================== 16 | IndexReader_6 1.00 15.03 root index:IndexRangeScan_5 17 | └─IndexRangeScan_5 1.00 162.31 cop[tikv] table:t, index:idx_a(a) range:[-inf,1), keep order:false -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q11.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Important Stock Identification Query (Q11) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | ps_partkey, 9 | sum(ps_supplycost * ps_availqty) as value 10 | from 11 | partsupp, 12 | supplier, 13 | nation 14 | where 15 | ps_suppkey = s_suppkey 16 | and s_nationkey = n_nationkey 17 | and n_name = 'JAPAN' 18 | group by 19 | ps_partkey having 20 | sum(ps_supplycost * ps_availqty) > ( 21 | select 22 | sum(ps_supplycost * ps_availqty) * 0.0001000000 23 | from 24 | partsupp, 25 | supplier, 26 | nation 27 | where 28 | ps_suppkey = s_suppkey 29 | and s_nationkey = n_nationkey 30 | and n_name = 'JAPAN' 31 | ) 32 | order by 33 | value desc; -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q16.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Parts/Supplier Relationship Query (Q16) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | p_brand, 9 | p_type, 10 | p_size, 11 | count(distinct ps_suppkey) as supplier_cnt 12 | from 13 | partsupp, 14 | part 15 | where 16 | p_partkey = ps_partkey 17 | and p_brand <> 'Brand#41' 18 | and p_type not like 'MEDIUM BURNISHED%' 19 | and p_size in (4, 22, 35, 31, 47, 44, 30, 11) 20 | and ps_suppkey not in ( 21 | select 22 | s_suppkey 23 | from 24 | supplier 25 | where 26 | s_comment like '%Customer%Complaints%' 27 | ) 28 | group by 29 | p_brand, 30 | p_type, 31 | p_size 32 | order by 33 | supplier_cnt desc, 34 | p_brand, 35 | p_type, 36 | p_size; -------------------------------------------------------------------------------- /examples/job/queries/15a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS internet_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' AND it1.info = 'release dates' AND mc.note like '%(200%)%' and mc.note like '%(worldwide)%' AND mi.note like '%internet%' AND mi.info like 'USA:% 200%' AND t.production_year > 2000 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/19d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS jap_engl_voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND n.gender ='f' AND rt.role ='actress' AND t.production_year > 2000 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q18.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Large Volume Customer Query (Q18) 3 | -- Function Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | c_name, 9 | c_custkey, 10 | o_orderkey, 11 | o_orderdate, 12 | o_totalprice, 13 | sum(l_quantity) 14 | from 15 | customer, 16 | orders, 17 | lineitem 18 | where 19 | o_orderkey in ( 20 | select 21 | l_orderkey 22 | from 23 | lineitem 24 | group by 25 | l_orderkey having 26 | sum(l_quantity) > 313 27 | ) 28 | and c_custkey = o_custkey 29 | and o_orderkey = l_orderkey 30 | group by 31 | c_name, 32 | c_custkey, 33 | o_orderkey, 34 | o_orderdate, 35 | o_totalprice 36 | order by 37 | o_totalprice desc, 38 | o_orderdate; -------------------------------------------------------------------------------- /examples/job/queries/15c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS modern_american_internet_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' AND it1.info = 'release dates' AND mi.note like '%internet%' AND mi.info is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year > 1990 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/7c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS cast_member_name, MIN(pi.info) AS cast_member_info FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name is not NULL and (an.name LIKE '%a%' or an.name LIKE 'A%') AND it.info ='mini biography' AND lt.link in ('references', 'referenced in', 'features', 'featured in') AND n.name_pcode_cf BETWEEN 'A' AND 'F' AND (n.gender='m' OR (n.gender = 'f' AND n.name LIKE 'A%')) AND pi.note is not NULL AND t.production_year BETWEEN 1980 AND 2010 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/14c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS north_european_dark_production FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info = 'countries' AND it2.info = 'rating' AND k.keyword is not null and k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/15b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS youtube_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' and cn.name = 'YouTube' AND it1.info = 'release dates' AND mc.note like '%(200%)%' and mc.note like '%(worldwide)%' AND mi.note like '%internet%' AND mi.info like 'USA:% 200%' AND t.production_year between 2005 and 2010 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q10.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Returned Item Reporting Query (Q10) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | c_custkey, 9 | c_name, 10 | sum(l_extendedprice * (1 - l_discount)) as revenue, 11 | c_acctbal, 12 | n_name, 13 | c_address, 14 | c_phone, 15 | c_comment 16 | from 17 | customer, 18 | orders, 19 | lineitem, 20 | nation 21 | where 22 | c_custkey = o_custkey 23 | and l_orderkey = o_orderkey 24 | and o_orderdate >= date '1993-11-01' 25 | and o_orderdate < date '1993-11-01' + interval '3' month 26 | and l_returnflag = 'R' 27 | and c_nationkey = n_nationkey 28 | group by 29 | c_custkey, 30 | c_name, 31 | c_acctbal, 32 | c_phone, 33 | n_name, 34 | c_address, 35 | c_comment 36 | order by 37 | revenue desc; -------------------------------------------------------------------------------- /examples/job/queries/14b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS western_dark_production FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title') AND kt.kind = 'movie' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info > '6.0' AND t.production_year > 2010 and (t.title like '%murder%' or t.title like '%Murder%' or t.title like '%Mord%') AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/20a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS complete_downey_ironman_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name not like '%Sherlock%' and (chn.name like '%Tony%Stark%' or chn.name like '%Iron%Man%') AND k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND kt.kind = 'movie' AND t.production_year > 1950 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /examples/workload_export_output/stats/test1_t.json: -------------------------------------------------------------------------------- 1 | { 2 | "columns": { 3 | "a": { 4 | "histogram": { 5 | "ndv": 3 6 | }, 7 | "cm_sketch": { 8 | "top_n": [ 9 | { 10 | "data": "A4AAAAAAAAAB", 11 | "count": 1 12 | }, 13 | { 14 | "data": "A4AAAAAAAAAC", 15 | "count": 1 16 | }, 17 | { 18 | "data": "A4AAAAAAAAAD", 19 | "count": 1 20 | } 21 | ], 22 | "default_value": 0 23 | }, 24 | "fm_sketch": null, 25 | "stats_ver": 2, 26 | "null_count": 0, 27 | "tot_col_size": 27, 28 | "last_update_version": 444389253898829838, 29 | "correlation": 1 30 | } 31 | }, 32 | "indices": {}, 33 | "partitions": null, 34 | "database_name": "test1", 35 | "table_name": "t", 36 | "ext_stats": null, 37 | "count": 6, 38 | "modify_count": 3, 39 | "version": 444389258407706640, 40 | "is_historical_stats": false 41 | } -------------------------------------------------------------------------------- /examples/workload_export_output/stats/test_t.json: -------------------------------------------------------------------------------- 1 | { 2 | "columns": { 3 | "a": { 4 | "histogram": { 5 | "ndv": 3 6 | }, 7 | "cm_sketch": { 8 | "top_n": [ 9 | { 10 | "data": "A4AAAAAAAAAB", 11 | "count": 1 12 | }, 13 | { 14 | "data": "A4AAAAAAAAAC", 15 | "count": 1 16 | }, 17 | { 18 | "data": "A4AAAAAAAAAD", 19 | "count": 1 20 | } 21 | ], 22 | "default_value": 0 23 | }, 24 | "fm_sketch": null, 25 | "stats_ver": 2, 26 | "null_count": 0, 27 | "tot_col_size": 24, 28 | "last_update_version": 444389259416698890, 29 | "correlation": -1 30 | } 31 | }, 32 | "indices": {}, 33 | "partitions": null, 34 | "database_name": "test", 35 | "table_name": "t", 36 | "ext_stats": null, 37 | "count": 3, 38 | "modify_count": 0, 39 | "version": 444389259416698890, 40 | "is_historical_stats": false 41 | } -------------------------------------------------------------------------------- /examples/job/queries/20c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS cast_member, MIN(t.title) AS complete_dynamic_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind = 'movie' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q1.sql: -------------------------------------------------------------------------------- 1 | -- TPC TPC-H Parameter Substitution (Version 2.17.3 build 0) 2 | -- using 1 as a seed to the RNG 3 | -- $ID$ 4 | -- TPC-H/TPC-R Pricing Summary Report Query (Q1) 5 | -- Functional Query Definition 6 | -- Approved February 1998 7 | 8 | 9 | select 10 | l_returnflag, 11 | l_linestatus, 12 | sum(l_quantity) as sum_qty, 13 | sum(l_extendedprice) as sum_base_price, 14 | sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, 15 | sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, 16 | avg(l_quantity) as avg_qty, 17 | avg(l_extendedprice) as avg_price, 18 | avg(l_discount) as avg_disc, 19 | count(*) as count_order 20 | from 21 | lineitem 22 | where 23 | l_shipdate <= date_sub('1998-12-01', interval '68' day) 24 | group by 25 | l_returnflag, 26 | l_linestatus 27 | order by 28 | l_returnflag, 29 | l_linestatus; -------------------------------------------------------------------------------- /optimizer/tidb_what_if_optimizer_test.go: -------------------------------------------------------------------------------- 1 | package optimizer 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/qw4990/index_advisor/utils" 8 | ) 9 | 10 | func TestWhatIfOptimizer(t *testing.T) { 11 | dsn := "root:@tcp(127.0.0.1:4000)/test" 12 | o, err := NewTiDBWhatIfOptimizer(dsn) 13 | must(err) 14 | defer o.Close() 15 | must(o.Execute(`create table t (a int, b int)`)) 16 | p1, err := o.Explain(`select * from t where a=1`) 17 | must(err) 18 | must(o.CreateHypoIndex(utils.NewIndex("test", "t", "idx_a", "a"))) 19 | p2, err := o.Explain(`select * from t where a=1`) 20 | must(err) 21 | must(o.DropHypoIndex(utils.NewIndex("test", "t", "idx_a", "a"))) 22 | p3, err := o.Explain(`select * from t where a=1`) 23 | must(err) 24 | fmt.Println(p1.PlanCost(), p2.PlanCost(), p3.PlanCost()) // cost2 > cost1 = cost3 25 | } 26 | 27 | func must(err error) { 28 | if err != nil { 29 | panic(err) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /examples/job/queries/21b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS german_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Germany', 'German') AND t.production_year BETWEEN 2000 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/20b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS complete_downey_ironman_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name not like '%Sherlock%' and (chn.name like '%Tony%Stark%' or chn.name like '%Iron%Man%') AND k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND kt.kind = 'movie' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/19c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS jap_engl_voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year > 2000 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/25a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'blood', 'gore', 'death', 'female-nudity') AND mi.info = 'Horror' AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /examples/workload_export_output/output/q6.txt: -------------------------------------------------------------------------------- 1 | Alias: q6 2 | Query: 3 | select * from ta where a=1 and b >1 4 | 5 | Original Cost: 1.37E+02 6 | Optimized Cost: 1.37E+02 7 | Cost Reduction Ratio: 1.00 8 | 9 | 10 | ===================== original plan ===================== 11 | TableReader_7 0.75 137.18 root data:Selection_6 12 | └─Selection_6 0.75 1962.65 cop[tikv] eq(test.ta.a, 1), gt(test.ta.b, 1) 13 | └─TableFullScan_5 6.00 1363.85 cop[tikv] table:ta keep order:false 14 | 15 | ===================== optimized plan ===================== 16 | TableReader_7 0.75 137.18 root data:Selection_6 17 | └─Selection_6 0.75 1962.65 cop[tikv] eq(test.ta.a, 1), gt(test.ta.b, 1) 18 | └─TableFullScan_5 6.00 1363.85 cop[tikv] table:ta keep order:false -------------------------------------------------------------------------------- /examples/job/queries/21a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS western_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/19b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS kung_fu_panda FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note = '(voice)' AND cn.country_code ='[us]' AND it.info = 'release dates' AND mc.note like '%(200%)%' and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND mi.info is not null and (mi.info like 'Japan:%2007%' or mi.info like 'USA:%2008%') AND n.gender ='f' and n.name like '%Angel%' AND rt.role ='actress' AND t.production_year between 2007 and 2008 and t.title like '%Kung%Fu%Panda%' AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/21c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS western_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'English') AND t.production_year BETWEEN 1950 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/23a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_us_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind = 'complete+verified' AND cn.country_code = '[us]' AND it1.info = 'release dates' AND kt.kind in ('movie') AND mi.note like '%internet%' AND mi.info is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/23b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_nerdy_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind = 'complete+verified' AND cn.country_code = '[us]' AND it1.info = 'release dates' AND k.keyword in ('nerd', 'loner', 'alienation', 'dignity') AND kt.kind in ('movie') AND mi.note like '%internet%' AND mi.info like 'USA:% 200%' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /examples/workload_export_output/output/q8.txt: -------------------------------------------------------------------------------- 1 | Alias: q8 2 | Query: 3 | select * from tb where c=1 and a<10 4 | 5 | Original Cost: 1.91E+02 6 | Optimized Cost: 1.91E+02 7 | Cost Reduction Ratio: 1.00 8 | 9 | 10 | ===================== original plan ===================== 11 | TableReader_7 2.00 191.35 root data:Selection_6 12 | └─Selection_6 2.00 2616.86 cop[tikv] eq(test1.tb.c, 1), lt(test1.tb.a, 10) 13 | └─TableFullScan_5 8.00 1818.46 cop[tikv] table:tb keep order:false 14 | 15 | ===================== optimized plan ===================== 16 | TableReader_7 2.00 191.35 root data:Selection_6 17 | └─Selection_6 2.00 2616.86 cop[tikv] eq(test1.tb.c, 1), lt(test1.tb.a, 10) 18 | └─TableFullScan_5 8.00 1818.46 cop[tikv] table:tb keep order:false -------------------------------------------------------------------------------- /examples/job/queries/25b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'blood', 'gore', 'death', 'female-nudity') AND mi.info = 'Horror' AND n.gender = 'm' AND t.production_year > 2010 AND t.title like 'Vampire%' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q12.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Shipping Modes and Order Priority Query (Q12) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | l_shipmode, 9 | sum(case 10 | when o_orderpriority = '1-URGENT' 11 | or o_orderpriority = '2-HIGH' 12 | then 1 13 | else 0 14 | end) as high_line_count, 15 | sum(case 16 | when o_orderpriority <> '1-URGENT' 17 | and o_orderpriority <> '2-HIGH' 18 | then 1 19 | else 0 20 | end) as low_line_count 21 | from 22 | orders, 23 | lineitem 24 | where 25 | o_orderkey = l_orderkey 26 | and l_shipmode in ('FOB', 'REG AIR') 27 | and l_commitdate < l_receiptdate 28 | and l_shipdate < l_commitdate 29 | and l_receiptdate >= date '1993-01-01' 30 | and l_receiptdate < date '1993-01-01' + interval '1' year 31 | group by 32 | l_shipmode 33 | order by 34 | l_shipmode; -------------------------------------------------------------------------------- /examples/job/queries/25c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/19a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND mc.note is not NULL and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%Ang%' AND rt.role ='actress' AND t.production_year between 2005 and 2009 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/23c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_us_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind = 'complete+verified' AND cn.country_code = '[us]' AND it1.info = 'release dates' AND kt.kind in ('movie', 'tv movie', 'video movie', 'video game') AND mi.note like '%internet%' AND mi.info is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year > 1990 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q9.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Product Type Profit Measure Query (Q9) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | nation, 9 | o_year, 10 | sum(amount) as sum_profit 11 | from 12 | ( 13 | select 14 | n_name as nation, 15 | extract(year from o_orderdate) as o_year, 16 | l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount 17 | from 18 | part, 19 | supplier, 20 | lineitem, 21 | partsupp, 22 | orders, 23 | nation 24 | where 25 | s_suppkey = l_suppkey 26 | and ps_suppkey = l_suppkey 27 | and ps_partkey = l_partkey 28 | and p_partkey = l_partkey 29 | and o_orderkey = l_orderkey 30 | and s_nationkey = n_nationkey 31 | and p_name like '%thistle%' 32 | ) as profit 33 | group by 34 | nation, 35 | o_year 36 | order by 37 | nation, 38 | o_year desc; -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q2.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Minimum Cost Supplier Query (Q2) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | s_acctbal, 9 | s_name, 10 | n_name, 11 | p_partkey, 12 | p_mfgr, 13 | s_address, 14 | s_phone, 15 | s_comment 16 | from 17 | part, 18 | supplier, 19 | partsupp, 20 | nation, 21 | region 22 | where 23 | p_partkey = ps_partkey 24 | and s_suppkey = ps_suppkey 25 | and p_size = 38 26 | and p_type like '%STEEL' 27 | and s_nationkey = n_nationkey 28 | and n_regionkey = r_regionkey 29 | and r_name = 'ASIA' 30 | and ps_supplycost = ( 31 | select 32 | min(ps_supplycost) 33 | from 34 | partsupp, 35 | supplier, 36 | nation, 37 | region 38 | where 39 | p_partkey = ps_partkey 40 | and s_suppkey = ps_suppkey 41 | and s_nationkey = n_nationkey 42 | and n_regionkey = r_regionkey 43 | and r_name = 'ASIA' 44 | ) 45 | order by 46 | s_acctbal desc, 47 | n_name, 48 | s_name, 49 | p_partkey; -------------------------------------------------------------------------------- /examples/job/queries/26b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info = 'rating' AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'fight') AND kt.kind = 'movie' AND mi_idx.info > '8.0' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/22a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Germany', 'German', 'USA', 'American') AND mi_idx.info < '7.0' AND t.production_year > 2008 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/22b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Germany', 'German', 'USA', 'American') AND mi_idx.info < '7.0' AND t.production_year > 2009 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/22d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q21.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Suppliers Who Kept Orders Waiting Query (Q21) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | s_name, 9 | count(*) as numwait 10 | from 11 | supplier, 12 | lineitem l1, 13 | orders, 14 | nation 15 | where 16 | s_suppkey = l1.l_suppkey 17 | and o_orderkey = l1.l_orderkey 18 | and o_orderstatus = 'F' 19 | and l1.l_receiptdate > l1.l_commitdate 20 | and exists ( 21 | select 22 | * 23 | from 24 | lineitem l2 25 | where 26 | l2.l_orderkey = l1.l_orderkey 27 | and l2.l_suppkey <> l1.l_suppkey 28 | ) 29 | and not exists ( 30 | select 31 | * 32 | from 33 | lineitem l3 34 | where 35 | l3.l_orderkey = l1.l_orderkey 36 | and l3.l_suppkey <> l1.l_suppkey 37 | and l3.l_receiptdate > l3.l_commitdate 38 | ) 39 | and s_nationkey = n_nationkey 40 | and n_name = 'PERU' 41 | group by 42 | s_name 43 | order by 44 | numwait desc, 45 | s_name; -------------------------------------------------------------------------------- /examples/job/queries/26c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info = 'rating' AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind = 'movie' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q20.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Potential Part Promotion Query (Q20) 3 | -- Function Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | s_name, 9 | s_address 10 | from 11 | supplier, 12 | nation 13 | where 14 | s_suppkey in ( 15 | select 16 | ps_suppkey 17 | from 18 | partsupp 19 | where 20 | ps_partkey in ( 21 | select 22 | p_partkey 23 | from 24 | part 25 | where 26 | p_name like 'ivory%' 27 | ) 28 | and ps_availqty > ( 29 | select 30 | 0.5 * sum(l_quantity) 31 | from 32 | lineitem 33 | where 34 | l_partkey = ps_partkey 35 | and l_suppkey = ps_suppkey 36 | and l_shipdate >= date '1996-01-01' 37 | and l_shipdate < date '1996-01-01' + interval '1' year 38 | ) 39 | ) 40 | and s_nationkey = n_nationkey 41 | and n_name = 'KENYA' 42 | order by 43 | s_name; -------------------------------------------------------------------------------- /examples/job/queries/22c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/26a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(n.name) AS playing_actor, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info = 'rating' AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind = 'movie' AND mi_idx.info > '7.0' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /advisor/workload_info_compression.go: -------------------------------------------------------------------------------- 1 | package advisor 2 | 3 | import ( 4 | "github.com/qw4990/index_advisor/utils" 5 | ) 6 | 7 | // NoneWorkloadInfoCompress does nothing. 8 | func NoneWorkloadInfoCompress(workloadInfo utils.WorkloadInfo) utils.WorkloadInfo { 9 | return workloadInfo 10 | } 11 | 12 | // DigestWorkloadInfoCompress compresses queries by digest. 13 | func DigestWorkloadInfoCompress(workloadInfo utils.WorkloadInfo) utils.WorkloadInfo { 14 | compressed := workloadInfo 15 | compressed.Queries = compressBySQLDigest(compressed.Queries) 16 | return compressed 17 | } 18 | 19 | func compressBySQLDigest(sqls utils.Set[utils.Query]) utils.Set[utils.Query] { 20 | s := utils.NewSet[utils.Query]() 21 | digestFreq := make(map[string]int) 22 | digestSQL := make(map[string]utils.Query) 23 | for _, sql := range sqls.ToList() { 24 | _, digest := utils.NormalizeDigest(sql.Text) 25 | if _, ok := digestFreq[digest]; ok { 26 | digestFreq[digest] += sql.Frequency 27 | existingSQL := digestSQL[digest] 28 | existingSQL.Frequency = digestFreq[digest] 29 | s.Add(existingSQL) 30 | } else { 31 | digestFreq[digest] = sql.Frequency 32 | digestSQL[digest] = sql 33 | s.Add(sql) 34 | } 35 | } 36 | return s 37 | } 38 | -------------------------------------------------------------------------------- /examples/job/queries/24a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress_name, MIN(t.title) AS voiced_action_movie_jap_eng FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND k.keyword in ('hero', 'martial-arts', 'hand-to-hand-combat') AND mi.info is not null and (mi.info like 'Japan:%201%' or mi.info like 'USA:%201%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND ci.movie_id = mk.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/31a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name like 'Lionsgate%' AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/31c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name like 'Lionsgate%' AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q22.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Global Sales Opportunity Query (Q22) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | cntrycode, 9 | count(*) as numcust, 10 | sum(c_acctbal) as totacctbal 11 | from 12 | ( 13 | select 14 | substring(c_phone from 1 for 2) as cntrycode, 15 | c_acctbal 16 | from 17 | customer 18 | where 19 | substring(c_phone from 1 for 2) in 20 | ('24', '33', '31', '10', '15', '28', '23') 21 | and c_acctbal > ( 22 | select 23 | avg(c_acctbal) 24 | from 25 | customer 26 | where 27 | c_acctbal > 0.00 28 | and substring(c_phone from 1 for 2) in 29 | ('24', '33', '31', '10', '15', '28', '23') 30 | ) 31 | and not exists ( 32 | select 33 | * 34 | from 35 | orders 36 | where 37 | o_custkey = c_custkey 38 | ) 39 | ) as custsale 40 | group by 41 | cntrycode 42 | order by 43 | cntrycode; 44 | -------------------------------------------------------------------------------- /examples/job/queries/27b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind = 'complete' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Germany','Swedish', 'German') AND t.production_year = 1998 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/27a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind = 'complete' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Germany','Swedish', 'German') AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/33b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code = '[nl]' AND it1.info = 'rating' AND it2.info = 'rating' AND kt1.kind in ('tv series') AND kt2.kind in ('tv series') AND lt.link LIKE '%follow%' AND mi_idx2.info < '3.0' AND t2.production_year = 2007 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/24b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress_name, MIN(t.title) AS kung_fu_panda FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND cn.name = 'DreamWorks Animation' AND it.info = 'release dates' AND k.keyword in ('hero', 'martial-arts', 'hand-to-hand-combat', 'computer-animated-movie') AND mi.info is not null and (mi.info like 'Japan:%201%' or mi.info like 'USA:%201%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year > 2010 AND t.title like 'Kung Fu Panda%' AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND ci.movie_id = mk.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q8.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R National Market Share Query (Q8) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | o_year, 9 | sum(case 10 | when nation = 'MOZAMBIQUE' then volume 11 | else 0 12 | end) / sum(volume) as mkt_share 13 | from 14 | ( 15 | select 16 | extract(year from o_orderdate) as o_year, 17 | l_extendedprice * (1 - l_discount) as volume, 18 | n2.n_name as nation 19 | from 20 | part, 21 | supplier, 22 | lineitem, 23 | orders, 24 | customer, 25 | nation n1, 26 | nation n2, 27 | region 28 | where 29 | p_partkey = l_partkey 30 | and s_suppkey = l_suppkey 31 | and l_orderkey = o_orderkey 32 | and o_custkey = c_custkey 33 | and c_nationkey = n1.n_nationkey 34 | and n1.n_regionkey = r_regionkey 35 | and r_name = 'AFRICA' 36 | and s_nationkey = n2.n_nationkey 37 | and o_orderdate between date '1995-01-01' and date '1996-12-31' 38 | and p_type = 'PROMO POLISHED TIN' 39 | ) as all_nations 40 | group by 41 | o_year 42 | order by 43 | o_year; -------------------------------------------------------------------------------- /examples/job/queries/27c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like 'complete%' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'English') AND t.production_year BETWEEN 1950 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/33a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code = '[us]' AND it1.info = 'rating' AND it2.info = 'rating' AND kt1.kind in ('tv series') AND kt2.kind in ('tv series') AND lt.link in ('sequel', 'follows', 'followed by') AND mi_idx2.info < '3.0' AND t2.production_year between 2005 and 2008 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/30c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_violent_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind ='complete+verified' AND ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/30a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_violent_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind ='complete+verified' AND ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.production_year > 2000 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/33c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code != '[us]' AND it1.info = 'rating' AND it2.info = 'rating' AND kt1.kind in ('tv series', 'episode') AND kt2.kind in ('tv series', 'episode') AND lt.link in ('sequel', 'follows', 'followed by') AND mi_idx2.info < '3.5' AND t2.production_year between 2000 and 2010 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q7.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Volume Shipping Query (Q7) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | supp_nation, 9 | cust_nation, 10 | l_year, 11 | sum(volume) as revenue 12 | from 13 | ( 14 | select 15 | n1.n_name as supp_nation, 16 | n2.n_name as cust_nation, 17 | extract(year from l_shipdate) as l_year, 18 | l_extendedprice * (1 - l_discount) as volume 19 | from 20 | supplier, 21 | lineitem, 22 | orders, 23 | customer, 24 | nation n1, 25 | nation n2 26 | where 27 | s_suppkey = l_suppkey 28 | and o_orderkey = l_orderkey 29 | and c_custkey = o_custkey 30 | and s_nationkey = n1.n_nationkey 31 | and c_nationkey = n2.n_nationkey 32 | and ( 33 | (n1.n_name = 'MOZAMBIQUE' and n2.n_name = 'UNITED KINGDOM') 34 | or (n1.n_name = 'UNITED KINGDOM' and n2.n_name = 'MOZAMBIQUE') 35 | ) 36 | and l_shipdate between date '1995-01-01' and date '1996-12-31' 37 | ) as shipping 38 | group by 39 | supp_nation, 40 | cust_nation, 41 | l_year 42 | order by 43 | supp_nation, 44 | cust_nation, 45 | l_year; -------------------------------------------------------------------------------- /examples/job/queries/31b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name like 'Lionsgate%' AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mc.note like '%(Blu-ray)%' AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.production_year > 2000 and (t.title like '%Freddy%' or t.title like '%Jason%' or t.title like 'Saw%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /examples/workload_export_output/stats/test_ta.json: -------------------------------------------------------------------------------- 1 | { 2 | "columns": { 3 | "a": { 4 | "histogram": { 5 | "ndv": 2 6 | }, 7 | "cm_sketch": { 8 | "top_n": [ 9 | { 10 | "data": "A4AAAAAAAAAB", 11 | "count": 1 12 | }, 13 | { 14 | "data": "A4AAAAAAAAAC", 15 | "count": 3 16 | } 17 | ], 18 | "default_value": 0 19 | }, 20 | "fm_sketch": null, 21 | "stats_ver": 2, 22 | "null_count": 0, 23 | "tot_col_size": 34, 24 | "last_update_version": 444389259626938378, 25 | "correlation": 1 26 | }, 27 | "b": { 28 | "histogram": { 29 | "ndv": 3 30 | }, 31 | "cm_sketch": { 32 | "top_n": [ 33 | { 34 | "data": "A4AAAAAAAAAB", 35 | "count": 2 36 | }, 37 | { 38 | "data": "A4AAAAAAAAAC", 39 | "count": 1 40 | }, 41 | { 42 | "data": "A4AAAAAAAAAD", 43 | "count": 1 44 | } 45 | ], 46 | "default_value": 0 47 | }, 48 | "fm_sketch": null, 49 | "stats_ver": 2, 50 | "null_count": 0, 51 | "tot_col_size": 34, 52 | "last_update_version": 444389259626938378, 53 | "correlation": 1 54 | } 55 | }, 56 | "indices": {}, 57 | "partitions": null, 58 | "database_name": "test", 59 | "table_name": "ta", 60 | "ext_stats": null, 61 | "count": 6, 62 | "modify_count": 2, 63 | "version": 444389274136346625, 64 | "is_historical_stats": false 65 | } -------------------------------------------------------------------------------- /examples/workload_export_output/stats/test1_tb.json: -------------------------------------------------------------------------------- 1 | { 2 | "columns": { 3 | "a": { 4 | "histogram": { 5 | "ndv": 2 6 | }, 7 | "cm_sketch": { 8 | "top_n": [ 9 | { 10 | "data": "A4AAAAAAAAAB", 11 | "count": 2 12 | }, 13 | { 14 | "data": "A4AAAAAAAAAD", 15 | "count": 2 16 | } 17 | ], 18 | "default_value": 0 19 | }, 20 | "fm_sketch": null, 21 | "stats_ver": 2, 22 | "null_count": 0, 23 | "tot_col_size": 36, 24 | "last_update_version": 444389254409748481, 25 | "correlation": 1 26 | }, 27 | "c": { 28 | "histogram": { 29 | "ndv": 3 30 | }, 31 | "cm_sketch": { 32 | "top_n": [ 33 | { 34 | "data": "A4AAAAAAAAAB", 35 | "count": 1 36 | }, 37 | { 38 | "data": "A4AAAAAAAAAC", 39 | "count": 2 40 | }, 41 | { 42 | "data": "A4AAAAAAAAAF", 43 | "count": 1 44 | } 45 | ], 46 | "default_value": 0 47 | }, 48 | "fm_sketch": null, 49 | "stats_ver": 2, 50 | "null_count": 0, 51 | "tot_col_size": 36, 52 | "last_update_version": 444389254409748481, 53 | "correlation": 1 54 | } 55 | }, 56 | "indices": {}, 57 | "partitions": null, 58 | "database_name": "test1", 59 | "table_name": "tb", 60 | "ext_stats": null, 61 | "count": 8, 62 | "modify_count": 4, 63 | "version": 444389258407706648, 64 | "is_historical_stats": false 65 | } -------------------------------------------------------------------------------- /examples/workload_export_output/output/summary.txt: -------------------------------------------------------------------------------- 1 | Total Queries in the workload: 5 2 | Total number of indexes: 2 3 | CREATE INDEX idx_a ON test.t (a); 4 | CREATE INDEX idx_a ON test1.t (a); 5 | Total original workload cost: 5.48E+02 6 | Total optimized workload cost: 4.00E+02 7 | Total cost reduction ratio: 27.01% 8 | Top 5 queries with the most cost reduction ratio: 9 | Alias: q2, Cost Reduction Ratio: 5.49E+01->1.50E+01(0.27) 10 | Alias: q10, Cost Reduction Ratio: 1.10E+02->3.77E+01(0.34) 11 | Alias: q4, Cost Reduction Ratio: 5.49E+01->1.89E+01(0.34) 12 | Alias: q6, Cost Reduction Ratio: 1.37E+02->1.37E+02(1.00) 13 | Alias: q8, Cost Reduction Ratio: 1.91E+02->1.91E+02(1.00) 14 | Top 5 queries with the most cost reduction number: 15 | Alias: q10, Cost Reduction Ratio: 1.10E+02->3.77E+01(0.34) 16 | Alias: q2, Cost Reduction Ratio: 5.49E+01->1.50E+01(0.27) 17 | Alias: q4, Cost Reduction Ratio: 5.49E+01->1.89E+01(0.34) 18 | Alias: q6, Cost Reduction Ratio: 1.37E+02->1.37E+02(1.00) 19 | Alias: q8, Cost Reduction Ratio: 1.91E+02->1.91E+02(1.00) 20 | Top 5 queries with the most cost: 21 | Alias: q8, Cost Reduction Ratio: 1.91E+02->1.91E+02(1.00) 22 | Alias: q6, Cost Reduction Ratio: 1.37E+02->1.37E+02(1.00) 23 | Alias: q10, Cost Reduction Ratio: 1.10E+02->3.77E+01(0.34) 24 | Alias: q4, Cost Reduction Ratio: 5.49E+01->1.89E+01(0.34) 25 | Alias: q2, Cost Reduction Ratio: 5.49E+01->1.50E+01(0.27) 26 | -------------------------------------------------------------------------------- /examples/job/queries/30b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_gore_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind ='complete+verified' AND ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.production_year > 2000 and (t.title like '%Freddy%' or t.title like '%Jason%' or t.title like 'Saw%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/28b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind = 'crew' AND cct2.kind != 'complete+verified' AND cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Germany', 'Swedish', 'German') AND mi_idx.info > '6.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /utils/logger.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | l "log" 6 | "os" 7 | "strings" 8 | ) 9 | 10 | var ( 11 | // 0 for debug, 1 for info, 2 for warning, 3 for error 12 | logLevel = 1 13 | logger *l.Logger 14 | ) 15 | 16 | func init() { 17 | logger = l.New(os.Stdout, "", l.LstdFlags|l.Lshortfile) 18 | } 19 | 20 | // SetLogLevel sets the log level. 21 | func SetLogLevel(level string) { 22 | if level == "" { 23 | return // use default 24 | } 25 | level = strings.TrimSpace(strings.ToLower(level)) 26 | switch level { 27 | case "debug": 28 | logLevel = 0 29 | case "info": 30 | logLevel = 1 31 | case "warning": 32 | logLevel = 2 33 | case "error": 34 | logLevel = 3 35 | default: 36 | panic("invalid log level: " + level) 37 | } 38 | } 39 | 40 | func Debugf(format string, args ...interface{}) { 41 | if logLevel <= 0 { 42 | log("DEBUG", format, args...) 43 | } 44 | } 45 | 46 | func Infof(format string, args ...interface{}) { 47 | if logLevel <= 1 { 48 | log("INFO", format, args...) 49 | } 50 | } 51 | 52 | func Warningf(format string, args ...interface{}) { 53 | if logLevel <= 2 { 54 | log("WARNING", format, args...) 55 | } 56 | } 57 | 58 | func Errorf(format string, args ...interface{}) { 59 | if logLevel <= 3 { 60 | log("ERROR", format, args...) 61 | } 62 | } 63 | 64 | func log(level, format string, args ...interface{}) { 65 | logger.Output(3, fmt.Sprintf("[%v] %v\n", level, fmt.Sprintf(format, args...))) 66 | } 67 | -------------------------------------------------------------------------------- /examples/job/queries/28c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind = 'cast' AND cct2.kind = 'complete' AND cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /examples/tpch_example1/queries/q19.sql: -------------------------------------------------------------------------------- 1 | -- $ID$ 2 | -- TPC-H/TPC-R Discounted Revenue Query (Q19) 3 | -- Functional Query Definition 4 | -- Approved February 1998 5 | 6 | 7 | select 8 | sum(l_extendedprice* (1 - l_discount)) as revenue 9 | from 10 | lineitem, 11 | part 12 | where 13 | ( 14 | p_partkey = l_partkey 15 | and p_brand = 'Brand#13' 16 | and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') 17 | and l_quantity >= 6 and l_quantity <= 6 + 10 18 | and p_size between 1 and 5 19 | and l_shipmode in ('AIR', 'AIR REG') 20 | and l_shipinstruct = 'DELIVER IN PERSON' 21 | ) 22 | or 23 | ( 24 | p_partkey = l_partkey 25 | and p_brand = 'Brand#43' 26 | and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') 27 | and l_quantity >= 11 and l_quantity <= 11 + 10 28 | and p_size between 1 and 10 29 | and l_shipmode in ('AIR', 'AIR REG') 30 | and l_shipinstruct = 'DELIVER IN PERSON' 31 | ) 32 | or 33 | ( 34 | p_partkey = l_partkey 35 | and p_brand = 'Brand#55' 36 | and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') 37 | and l_quantity >= 27 and l_quantity <= 27 + 10 38 | and p_size between 1 and 15 39 | and l_shipmode in ('AIR', 'AIR REG') 40 | and l_shipinstruct = 'DELIVER IN PERSON' 41 | ); -------------------------------------------------------------------------------- /cmd_usage.sh: -------------------------------------------------------------------------------- 1 | ./index_advisor advise-offline --query-path='./examples/tpch_example1/queries' \ 2 | --schema-path='./examples/tpch_example1/schema.sql' \ 3 | --stats-path='./examples/tpch_example1/stats' \ 4 | --tidb-version='nightly' \ 5 | --output='./examples/tpch_example1/output' \ 6 | --max-num-indexes=5; 7 | 8 | ./index_advisor advise-offline --query-path='./examples/tpch_example2/queries.sql' \ 9 | --schema-path='./examples/tpch_example2/schema.sql' \ 10 | --stats-path='./examples/tpch_example2/stats' \ 11 | --tidb-version='nightly' \ 12 | --output='/tmp/index_advisor_output/tpch_example2' \ 13 | --max-num-indexes=5; 14 | 15 | ./index_advisor advise-offline --dir-path='./examples/job_fk' \ 16 | --tidb-version='nightly' \ 17 | --output='/tmp/index_advisor_output/job_fk' \ 18 | --max-num-indexes=5; 19 | 20 | ./index_advisor advise-offline --dir-path='./examples/job_no_fk' \ 21 | --tidb-version='nightly' \ 22 | --output='/tmp/index_advisor_output/job_no_fk' \ 23 | --max-num-indexes=5; 24 | 25 | ./index_advisor advise-offline --dir-path='./examples/web3bench' \ 26 | --tidb-version='nightly' \ 27 | --output='/tmp/index_advisor_output/web3bench' \ 28 | --max-num-indexes=5; 29 | 30 | ./index_advisor advise-offline --dir-path='./examples/tpcds' \ 31 | --tidb-version='nightly' \ 32 | --output='/tmp/index_advisor_output/tpcds' \ 33 | --max-num-indexes=5 \ 34 | --query-black-list='q5,q14,q18,q22,q27,q77,q80,q36,q86,q23,q51,q97,q67,q70,q78,q64,q41,q38,q81,q1,q30,q39,q54,q83,q31,q60,q33,q56,q58,q24,q57,q47,q95,q2,q59,q4,q11,q74'; 35 | -------------------------------------------------------------------------------- /examples/job/queries/28a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind = 'crew' AND cct2.kind != 'complete+verified' AND cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/29b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind ='cast' AND cct2.kind ='complete+verified' AND chn.name = 'Queen' AND ci.note in ('(voice)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND it3.info = 'height' AND k.keyword = 'computer-animation' AND mi.info like 'USA:%200%' AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.title = 'Shrek 2' AND t.production_year between 2000 and 2005 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/29c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind ='cast' AND cct2.kind ='complete+verified' AND ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND it3.info = 'trivia' AND k.keyword = 'computer-animation' AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /examples/job/queries/29a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind ='cast' AND cct2.kind ='complete+verified' AND chn.name = 'Queen' AND ci.note in ('(voice)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND it3.info = 'trivia' AND k.keyword = 'computer-animation' AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.title = 'Shrek 2' AND t.production_year between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /advisor/index_selection_auto_admin_test.go: -------------------------------------------------------------------------------- 1 | package advisor 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/qw4990/index_advisor/optimizer" 8 | "github.com/qw4990/index_advisor/utils" 9 | ) 10 | 11 | func prepareTestWorkload(dsn, schemaName string, createTableStmts, rawSQLs []string) (utils.WorkloadInfo, optimizer.WhatIfOptimizer) { 12 | w, err := utils.CreateWorkloadFromRawStmt(schemaName, createTableStmts, rawSQLs) 13 | must(err) 14 | must(IndexableColumnsSelectionSimple(&w)) 15 | if dsn == "" { 16 | dsn = "root:@tcp(127.0.0.1:4000)/" 17 | } 18 | opt, err := optimizer.NewTiDBWhatIfOptimizer("root:@tcp(127.0.0.1:4000)/") 19 | must(err) 20 | for _, t := range w.TableSchemas.ToList() { 21 | must(opt.Execute("use " + t.SchemaName)) 22 | must(opt.Execute(t.CreateStmtText)) 23 | } 24 | return w, opt 25 | } 26 | 27 | func TestSimulateAndCost(t *testing.T) { 28 | _, opt := prepareTestWorkload("", "test", 29 | []string{"create table t (a int, b int, c int, d int , e int)"}, 30 | []string{ 31 | "select * from t where a = 1 and c = 1", 32 | "select * from t where b = 1 and e = 1", 33 | }) 34 | 35 | opt.CreateHypoIndex(utils.NewIndex("test", "t", "a", "a")) 36 | plan1, _ := opt.Explain("select * from t where a = 1 and c < 1") 37 | opt.DropHypoIndex(utils.NewIndex("test", "t", "a", "a")) 38 | 39 | for _, p := range plan1 { 40 | fmt.Println(">> ", p) 41 | } 42 | 43 | opt.CreateHypoIndex(utils.NewIndex("test", "t", "ac", "a", "c")) 44 | plan2, _ := opt.Explain("select * from t where a = 1 and c < 1") 45 | opt.DropHypoIndex(utils.NewIndex("test", "t", "ac", "a", "c")) 46 | for _, p := range plan2 { 47 | fmt.Println(">> ", p) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /examples/job/stats/comp_cast_type.json: -------------------------------------------------------------------------------- 1 | { 2 | "is_historical_stats": false, 3 | "database_name": "imdbload_no_fk", 4 | "table_name": "comp_cast_type", 5 | "columns": { 6 | "id": { 7 | "histogram": { 8 | "ndv": 4 9 | }, 10 | "cm_sketch": { 11 | "top_n": [ 12 | { 13 | "data": "A4AAAAAAAAAB", 14 | "count": 1 15 | }, 16 | { 17 | "data": "A4AAAAAAAAAC", 18 | "count": 1 19 | }, 20 | { 21 | "data": "A4AAAAAAAAAD", 22 | "count": 1 23 | }, 24 | { 25 | "data": "A4AAAAAAAAAE", 26 | "count": 1 27 | } 28 | ], 29 | "default_value": 0 30 | }, 31 | "fm_sketch": null, 32 | "null_count": 0, 33 | "tot_col_size": 32, 34 | "last_update_version": 441988927412502536, 35 | "correlation": 1, 36 | "stats_ver": 2 37 | }, 38 | "kind": { 39 | "histogram": { 40 | "ndv": 4 41 | }, 42 | "cm_sketch": { 43 | "top_n": [ 44 | { 45 | "data": "AWNhc3QAAAAA+w==", 46 | "count": 1 47 | }, 48 | { 49 | "data": "AWNvbXBsZXRl/wAAAAAAAAAA9w==", 50 | "count": 1 51 | }, 52 | { 53 | "data": "AWNvbXBsZXRl/yt2ZXJpZmll/2QAAAAAAAAA+A==", 54 | "count": 1 55 | }, 56 | { 57 | "data": "AWNyZXcAAAAA+w==", 58 | "count": 1 59 | } 60 | ], 61 | "default_value": 0 62 | }, 63 | "fm_sketch": null, 64 | "null_count": 0, 65 | "tot_col_size": 37, 66 | "last_update_version": 441988927412502536, 67 | "correlation": 0.4, 68 | "stats_ver": 2 69 | } 70 | }, 71 | "indices": {}, 72 | "ext_stats": null, 73 | "count": 4, 74 | "modify_count": 0, 75 | "partitions": null, 76 | "version": 441988927412502536 77 | } -------------------------------------------------------------------------------- /utils/utils_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestCollectTableNames(t *testing.T) { 9 | sql := ` 10 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title 11 | , MIN(t.production_year) AS movie_year 12 | FROM company_type ct, info_type it, movie_companies mc, movie_info_idx mi_idx, title t 13 | WHERE ct.kind = 'production companies' 14 | AND it.info = 'top 250 rank' 15 | AND mc.note NOT LIKE '%(as Metro-Goldwyn-Mayer Pictures)%' 16 | AND (mc.note LIKE '%(co-production)%' 17 | OR mc.note LIKE '%(presents)%') 18 | AND ct.id = mc.company_type_id 19 | AND t.id = mc.movie_id 20 | AND t.id = mi_idx.movie_id 21 | AND mc.movie_id = mi_idx.movie_id 22 | AND it.id = mi_idx.info_type_id; 23 | ` 24 | tables, _ := CollectTableNamesFromSQL("test", sql) 25 | fmt.Println(tables.ToList()) 26 | } 27 | 28 | func TestCombSet(t *testing.T) { 29 | s := NewSet[Column]() 30 | for i := 0; i < 6; i++ { 31 | s.Add(NewColumn("test", "test", fmt.Sprintf("col%d", i))) 32 | } 33 | 34 | for i := 1; i < 6; i++ { 35 | fmt.Println("======================== ", i, " ========================") 36 | result := CombSet(s, i) 37 | fmt.Println("--> ", len(result)) 38 | for _, ss := range result { 39 | fmt.Println(ss.ToList()) 40 | } 41 | } 42 | } 43 | 44 | func TestPlanCost(t *testing.T) { 45 | plan := [][]string{ 46 | {"HashJoin_37", "100", "8225.40"}, 47 | {"├─IndexHashJoin_45(Build)", "1.000", "6096.63"}, 48 | {"└─CTEFullScan_39(Probe)", "10.00", "14.97"}, 49 | {"CTE_0", "10.00", "14.97"}, 50 | {"└─IndexLookUp_31(Seed Part)", "10.00", "19530.45"}, 51 | } 52 | p := Plan(plan) 53 | if p.PlanCost() != 8225.40+19530.45 { 54 | t.Error("plan cost error") 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /examples/job/stats/company_type.json: -------------------------------------------------------------------------------- 1 | { 2 | "is_historical_stats": false, 3 | "database_name": "imdbload_no_fk", 4 | "table_name": "company_type", 5 | "columns": { 6 | "id": { 7 | "histogram": { 8 | "ndv": 4 9 | }, 10 | "cm_sketch": { 11 | "top_n": [ 12 | { 13 | "data": "A4AAAAAAAAAB", 14 | "count": 1 15 | }, 16 | { 17 | "data": "A4AAAAAAAAAC", 18 | "count": 1 19 | }, 20 | { 21 | "data": "A4AAAAAAAAAD", 22 | "count": 1 23 | }, 24 | { 25 | "data": "A4AAAAAAAAAE", 26 | "count": 1 27 | } 28 | ], 29 | "default_value": 0 30 | }, 31 | "fm_sketch": null, 32 | "null_count": 0, 33 | "tot_col_size": 32, 34 | "last_update_version": 441988927845302284, 35 | "correlation": 1, 36 | "stats_ver": 2 37 | }, 38 | "kind": { 39 | "histogram": { 40 | "ndv": 4 41 | }, 42 | "cm_sketch": { 43 | "top_n": [ 44 | { 45 | "data": "AWRpc3RyaWJ1/3RvcnMAAAAA+w==", 46 | "count": 1 47 | }, 48 | { 49 | "data": "AW1pc2NlbGxh/25lb3VzIGNv/21wYW5pZXMA/g==", 50 | "count": 1 51 | }, 52 | { 53 | "data": "AXByb2R1Y3Rp/29uIGNvbXBh/25pZXMAAAAA+w==", 54 | "count": 1 55 | }, 56 | { 57 | "data": "AXNwZWNpYWwg/2VmZmVjdHMg/2NvbXBhbmll/3MAAAAAAAAA+A==", 58 | "count": 1 59 | } 60 | ], 61 | "default_value": 0 62 | }, 63 | "fm_sketch": null, 64 | "null_count": 0, 65 | "tot_col_size": 84, 66 | "last_update_version": 441988927845302284, 67 | "correlation": 0.4, 68 | "stats_ver": 2 69 | } 70 | }, 71 | "indices": {}, 72 | "ext_stats": null, 73 | "count": 4, 74 | "modify_count": 0, 75 | "partitions": null, 76 | "version": 441988927845302284 77 | } -------------------------------------------------------------------------------- /cmd/precheck.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/qw4990/index_advisor/optimizer" 5 | "github.com/spf13/cobra" 6 | ) 7 | 8 | func NewPreCheckCmd() *cobra.Command { 9 | var dsn string 10 | cmd := &cobra.Command{ 11 | Use: "pre-check", 12 | Short: "check what kind of index advisor mode can fit your cluster, use `index_advisor pre-check --help` to see more details", 13 | Long: `check what kind of index advisor mode can fit your cluster. 14 | How it work: 15 | 1. connect to your TiDB cluster through the DSN 16 | 2. check whether you can run online-mode index advisor on your cluster`, 17 | RunE: func(cmd *cobra.Command, args []string) error { 18 | db, err := optimizer.NewTiDBWhatIfOptimizer(dsn) 19 | if err != nil { 20 | return err 21 | } 22 | reason := checkOnlineModeSupport(db) 23 | if reason == "" { 24 | cmd.Println("[pre-check] you can use online mode and offline mode on your cluster.") 25 | } else { 26 | cmd.Println("[pre-check] you can only use offline mode on your cluster.") 27 | cmd.Println("[pre-check] your TiDB cluster does not support Index Advisor Online Mode, reason:", reason) 28 | } 29 | return nil 30 | }, 31 | } 32 | 33 | cmd.Flags().StringVar(&dsn, "dsn", "root:@tcp(127.0.0.1:4000)/test", "the DSN of the TiDB cluster") 34 | return cmd 35 | } 36 | 37 | // PreCheck checks whether this cluster is suitable for online-mode. 38 | func checkOnlineModeSupport(db optimizer.WhatIfOptimizer) (reason string) { 39 | if !supportHypoIndex(db) { 40 | return "your TiDB version does not support hypothetical index feature, which is required by Index Advisor Online Mode" 41 | } 42 | if redactLogEnabled(db) { 43 | return "redact log is enabled, the Advisor probably cannot get the full SQL text if you use Index Advisor Online Mode" 44 | } 45 | return "" 46 | } 47 | -------------------------------------------------------------------------------- /examples/tpcds/stats/dbgen_version.json: -------------------------------------------------------------------------------- 1 | { 2 | "columns": { 3 | "dv_cmdline_args": { 4 | "histogram": { 5 | "ndv": 1 6 | }, 7 | "cm_sketch": { 8 | "top_n": [ 9 | { 10 | "data": "AS1ESVNUUklC/1VUSU9OUyAu/y90b29scy90/3BjZHMuaWR4/yAtU0NBTEUg/zEgLURJUiAu/y9kZ2VuX291/3RwdXQAAAAA+w==", 11 | "count": 1 12 | } 13 | ], 14 | "default_value": 0 15 | }, 16 | "fm_sketch": null, 17 | "stats_ver": 2, 18 | "null_count": 0, 19 | "tot_col_size": 62, 20 | "last_update_version": 442619549726277646, 21 | "correlation": 1 22 | }, 23 | "dv_create_date": { 24 | "histogram": { 25 | "ndv": 1 26 | }, 27 | "cm_sketch": { 28 | "top_n": [ 29 | { 30 | "data": "BBmwfAAAAAAA", 31 | "count": 1 32 | } 33 | ], 34 | "default_value": 0 35 | }, 36 | "fm_sketch": null, 37 | "stats_ver": 2, 38 | "null_count": 0, 39 | "tot_col_size": 8, 40 | "last_update_version": 442619549726277646, 41 | "correlation": 1 42 | }, 43 | "dv_create_time": { 44 | "histogram": { 45 | "ndv": 1 46 | }, 47 | "cm_sketch": { 48 | "top_n": [ 49 | { 50 | "data": "B4AAMYMRQcYA", 51 | "count": 1 52 | } 53 | ], 54 | "default_value": 0 55 | }, 56 | "fm_sketch": null, 57 | "stats_ver": 2, 58 | "null_count": 0, 59 | "tot_col_size": 8, 60 | "last_update_version": 442619549726277646, 61 | "correlation": 1 62 | }, 63 | "dv_version": { 64 | "histogram": { 65 | "ndv": 1 66 | }, 67 | "cm_sketch": { 68 | "top_n": [ 69 | { 70 | "data": "ATIuMTAuMAAA/Q==", 71 | "count": 1 72 | } 73 | ], 74 | "default_value": 0 75 | }, 76 | "fm_sketch": null, 77 | "stats_ver": 2, 78 | "null_count": 0, 79 | "tot_col_size": 7, 80 | "last_update_version": 442619549726277646, 81 | "correlation": 1 82 | } 83 | }, 84 | "indices": {}, 85 | "partitions": null, 86 | "database_name": "tpcds", 87 | "table_name": "dbgen_version", 88 | "ext_stats": null, 89 | "count": 1, 90 | "modify_count": 0, 91 | "version": 442619549726277646, 92 | "is_historical_stats": false 93 | } -------------------------------------------------------------------------------- /optimizer/what_if_optimizer.go: -------------------------------------------------------------------------------- 1 | package optimizer 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | "time" 7 | 8 | _ "github.com/go-sql-driver/mysql" 9 | "github.com/qw4990/index_advisor/utils" 10 | ) 11 | 12 | // WhatIfOptimizerStats records the statistics of a what-if optimizer. 13 | type WhatIfOptimizerStats struct { 14 | ExecuteCount int // number of executed Query statements 15 | ExecuteTime time.Duration // total execution time 16 | CreateOrDropHypoIdxCount int // number of executed CreateHypoIndex/DropHypoIndex 17 | CreateOrDropHypoIdxTime time.Duration // total execution time of CreateHypoIndex/DropHypoIndex 18 | GetCostCount int // number of executed GetCost 19 | GetCostTime time.Duration // total execution time of GetCost 20 | } 21 | 22 | // Format formats the statistics. 23 | func (s WhatIfOptimizerStats) Format() string { 24 | return fmt.Sprintf(`Execute(count/time): (%v/%v), CreateOrDropHypoIndex: (%v/%v), GetCost: (%v/%v)`, 25 | s.ExecuteCount, s.ExecuteTime, s.CreateOrDropHypoIdxCount, s.CreateOrDropHypoIdxTime, s.GetCostCount, s.GetCostTime) 26 | } 27 | 28 | // WhatIfOptimizer is the interface of a what-if optimizer. 29 | type WhatIfOptimizer interface { 30 | Query(sql string) (*sql.Rows, error) // execute the specified Query statement and return the result 31 | Execute(sql string) error // execute the specified Query statement 32 | 33 | Close() error // release the underlying database connection 34 | Clone() (WhatIfOptimizer, error) // clone this optimizer 35 | 36 | CreateHypoIndex(index utils.Index) error // create a hypothetical index 37 | DropHypoIndex(index utils.Index) error // drop a hypothetical index 38 | 39 | ExplainQ(q utils.Query) (plan utils.Plan, err error) // return the execution plan of the specified query 40 | Explain(query string) (plan utils.Plan, err error) // return the execution plan of the specified query 41 | ExplainAnalyze(query string) (plan utils.Plan, err error) // return the execution plan of the specified query with analyze 42 | 43 | ResetStats() // reset the statistics 44 | Stats() WhatIfOptimizerStats // return the statistics 45 | 46 | SetDebug(flag bool) // print each query if set to true 47 | } 48 | -------------------------------------------------------------------------------- /examples/job/stats/kind_type.json: -------------------------------------------------------------------------------- 1 | { 2 | "is_historical_stats": false, 3 | "database_name": "imdbload_no_fk", 4 | "table_name": "kind_type", 5 | "columns": { 6 | "id": { 7 | "histogram": { 8 | "ndv": 7 9 | }, 10 | "cm_sketch": { 11 | "top_n": [ 12 | { 13 | "data": "A4AAAAAAAAAB", 14 | "count": 1 15 | }, 16 | { 17 | "data": "A4AAAAAAAAAC", 18 | "count": 1 19 | }, 20 | { 21 | "data": "A4AAAAAAAAAD", 22 | "count": 1 23 | }, 24 | { 25 | "data": "A4AAAAAAAAAE", 26 | "count": 1 27 | }, 28 | { 29 | "data": "A4AAAAAAAAAF", 30 | "count": 1 31 | }, 32 | { 33 | "data": "A4AAAAAAAAAG", 34 | "count": 1 35 | }, 36 | { 37 | "data": "A4AAAAAAAAAH", 38 | "count": 1 39 | } 40 | ], 41 | "default_value": 0 42 | }, 43 | "fm_sketch": null, 44 | "null_count": 0, 45 | "tot_col_size": 56, 46 | "last_update_version": 441988928290684941, 47 | "correlation": 1, 48 | "stats_ver": 2 49 | }, 50 | "kind": { 51 | "histogram": { 52 | "ndv": 7 53 | }, 54 | "cm_sketch": { 55 | "top_n": [ 56 | { 57 | "data": "AWVwaXNvZGUA/g==", 58 | "count": 1 59 | }, 60 | { 61 | "data": "AW1vdmllAAAA/A==", 62 | "count": 1 63 | }, 64 | { 65 | "data": "AXR2IG1pbmkg/3NlcmllcwAA/Q==", 66 | "count": 1 67 | }, 68 | { 69 | "data": "AXR2IG1vdmll/wAAAAAAAAAA9w==", 70 | "count": 1 71 | }, 72 | { 73 | "data": "AXR2IHNlcmll/3MAAAAAAAAA+A==", 74 | "count": 1 75 | }, 76 | { 77 | "data": "AXZpZGVvIGdh/21lAAAAAAAA+Q==", 78 | "count": 1 79 | }, 80 | { 81 | "data": "AXZpZGVvIG1v/3ZpZQAAAAAA+g==", 82 | "count": 1 83 | } 84 | ], 85 | "default_value": 0 86 | }, 87 | "fm_sketch": null, 88 | "null_count": 0, 89 | "tot_col_size": 71, 90 | "last_update_version": 441988928290684941, 91 | "correlation": -0.07142857142857142, 92 | "stats_ver": 2 93 | } 94 | }, 95 | "indices": {}, 96 | "ext_stats": null, 97 | "count": 7, 98 | "modify_count": 0, 99 | "partitions": null, 100 | "version": 441988928290684941 101 | } -------------------------------------------------------------------------------- /examples/web3bench/queries.sql: -------------------------------------------------------------------------------- 1 | -- R1 2 | Select to_address, from_address from transactions where hash = '0x1f415defb2729863fd8088727900d99b7df6f03d5e22e2105fc984cac3d0fb1c'; 3 | 4 | -- R21 5 | Select * from transactions where to_address in ('0x70f0f4f40fed33420c1e4ceefa1eb482e044ba24', 6 | '0x34662f274a42a17876926bc7b0ba541535e40e5f', 7 | '0x7259c2a51a9b1f7e373dcd00898d26a44ffc2e7c'); 8 | 9 | -- R22 10 | Select * from transactions 11 | where hash in ('0x1f415defb2729863fd8088727900d99b7df6f03d5e22e2105fc984cac3d0fb1c', 12 | '0xbeff7a4cf341d10c6293a2ecfb255f39c21836bf8956c6877d0f2486794fd5b8', 13 | '0x5dee984c63cc26037a81d0f2861565c4e0c21a87ebf165b331faec347d7a76a1', 14 | '0xc7da1e3391e4b7769fffe8e6afc284175a6cbe5fd9b333d9c0585944a36118dd') and to_address <> from_address; 15 | 16 | -- R31 17 | SELECT * FROM token_transfers WHERE from_address = '0xfbb1b73c4f0bda4f67dca266ce6ef42f520fbb98' ORDER BY block_number DESC LIMIT 5; 18 | 19 | -- R32 20 | Select count(*) from token_transfers where token_address = '0x7a93f0d9f302c0818022f8dca6ee1eb0f1b50308'; 21 | 22 | -- R41 23 | SELECT * FROM transactions 24 | WHERE from_address = '0x31d118c5f75502b96ca21d3d0d3fb8d7b19fed24' OR to_address = '0x6364989a903f45798c7a292778285a83d0928608' 25 | ORDER BY block_timestamp DESC LIMIT 10; 26 | 27 | -- R42 28 | SELECT count(DISTINCT from_address) FROM transactions; 29 | 30 | -- R43 31 | SELECT 32 | sum(`value`) AS totalamount, 33 | count(`value`) AS transactioncount, 34 | from_address AS fromaddress 35 | FROM transactions 36 | WHERE to_address = '0xfeadad412ec5b5f62afe4b6f39a168eb5f098f41' AND 37 | block_timestamp >= 1499637035 AND block_timestamp <= 1499639599 AND `value` > 1008000000000000 38 | GROUP BY from_address 39 | ORDER BY sum(value) DESC 40 | LIMIT 10; 41 | 42 | -- R44 43 | SELECT 44 | count(*) as count 45 | FROM (SELECT * 46 | FROM token_transfers t 47 | WHERE from_address = '0xfbb1b73c4f0bda4f67dca266ce6ef42f520fbb98' 48 | UNION ALL 49 | SELECT t2.* 50 | FROM token_transfers t2 51 | INNER JOIN token_transfers t ON t2.from_address = t.to_address 52 | AND t.value < t2.value 53 | LIMIT 100) as temp; 54 | 55 | 56 | -- R45 57 | SELECT COUNT(DISTINCT block_receipts) as count 58 | FROM (SELECT block_number AS block_receipts 59 | FROM receipts 60 | WHERE NOT EXISTS ( 61 | SELECT block_number 62 | FROM transactions 63 | WHERE block_number = receipts.block_number)) as temp; 64 | -------------------------------------------------------------------------------- /examples/tpch_example1/output/summary.txt: -------------------------------------------------------------------------------- 1 | Total Queries in the workload: 21 2 | Total number of indexes: 5 3 | CREATE INDEX idx_l_partkey_l_quantity_l_shipmode ON tpch.lineitem (l_partkey, l_quantity, l_shipmode); 4 | CREATE INDEX idx_l_partkey_l_shipdate_l_shipmode ON tpch.lineitem (l_partkey, l_shipdate, l_shipmode); 5 | CREATE INDEX idx_l_suppkey_l_shipdate ON tpch.lineitem (l_suppkey, l_shipdate); 6 | CREATE INDEX idx_o_custkey_o_orderdate_o_totalprice ON tpch.orders (o_custkey, o_orderdate, o_totalprice); 7 | CREATE INDEX idx_ps_suppkey_ps_supplycost ON tpch.partsupp (ps_suppkey, ps_supplycost); 8 | Total original workload cost: 1.37E+10 9 | Total optimized workload cost: 1.02E+10 10 | Total cost reduction ratio: 25.22% 11 | Top 10 queries with the most cost reduction ratio: 12 | Alias: q22, Cost Reduction Ratio: 1.97E+08->4.30E+06(0.02) 13 | Alias: q19, Cost Reduction Ratio: 2.89E+08->1.20E+07(0.04) 14 | Alias: q20, Cost Reduction Ratio: 3.40E+08->4.39E+07(0.13) 15 | Alias: q17, Cost Reduction Ratio: 8.36E+08->2.00E+08(0.24) 16 | Alias: q2, Cost Reduction Ratio: 1.35E+08->3.76E+07(0.28) 17 | Alias: q5, Cost Reduction Ratio: 7.79E+08->2.51E+08(0.32) 18 | Alias: q11, Cost Reduction Ratio: 7.62E+07->2.54E+07(0.33) 19 | Alias: q7, Cost Reduction Ratio: 5.99E+08->2.46E+08(0.41) 20 | Alias: q14, Cost Reduction Ratio: 2.76E+08->1.17E+08(0.43) 21 | Alias: q21, Cost Reduction Ratio: 8.62E+08->4.30E+08(0.50) 22 | Top 10 queries with the most cost reduction number: 23 | Alias: q17, Cost Reduction Ratio: 8.36E+08->2.00E+08(0.24) 24 | Alias: q5, Cost Reduction Ratio: 7.79E+08->2.51E+08(0.32) 25 | Alias: q21, Cost Reduction Ratio: 8.62E+08->4.30E+08(0.50) 26 | Alias: q7, Cost Reduction Ratio: 5.99E+08->2.46E+08(0.41) 27 | Alias: q20, Cost Reduction Ratio: 3.40E+08->4.39E+07(0.13) 28 | Alias: q19, Cost Reduction Ratio: 2.89E+08->1.20E+07(0.04) 29 | Alias: q9, Cost Reduction Ratio: 2.13E+09->1.90E+09(0.89) 30 | Alias: q22, Cost Reduction Ratio: 1.97E+08->4.30E+06(0.02) 31 | Alias: q14, Cost Reduction Ratio: 2.76E+08->1.17E+08(0.43) 32 | Alias: q18, Cost Reduction Ratio: 3.41E+09->3.30E+09(0.97) 33 | Top 10 queries with the most cost: 34 | Alias: q18, Cost Reduction Ratio: 3.41E+09->3.30E+09(0.97) 35 | Alias: q9, Cost Reduction Ratio: 2.13E+09->1.90E+09(0.89) 36 | Alias: q3, Cost Reduction Ratio: 1.77E+09->1.75E+09(0.99) 37 | Alias: q21, Cost Reduction Ratio: 8.62E+08->4.30E+08(0.50) 38 | Alias: q17, Cost Reduction Ratio: 8.36E+08->2.00E+08(0.24) 39 | Alias: q5, Cost Reduction Ratio: 7.79E+08->2.51E+08(0.32) 40 | Alias: q7, Cost Reduction Ratio: 5.99E+08->2.46E+08(0.41) 41 | Alias: q12, Cost Reduction Ratio: 3.92E+08->3.92E+08(1.00) 42 | Alias: q8, Cost Reduction Ratio: 3.57E+08->3.13E+08(0.88) 43 | Alias: q6, Cost Reduction Ratio: 3.28E+08->3.28E+08(1.00) 44 | -------------------------------------------------------------------------------- /examples/web3bench/output/summary.txt: -------------------------------------------------------------------------------- 1 | Total Queries in the workload: 10 2 | Total number of indexes: 7 3 | CREATE INDEX idx_block_number ON ethereum.receipts (block_number); 4 | CREATE INDEX idx_from_address_block_number_token_address ON ethereum.token_transfers (from_address, block_number, token_address); 5 | CREATE INDEX idx_token_address ON ethereum.token_transfers (token_address); 6 | CREATE INDEX idx_block_number ON ethereum.transactions (block_number); 7 | CREATE INDEX idx_from_address ON ethereum.transactions (from_address); 8 | CREATE INDEX idx_hash ON ethereum.transactions (hash); 9 | CREATE INDEX idx_to_address_block_timestamp_value ON ethereum.transactions (to_address, block_timestamp, value); 10 | Total original workload cost: 1.05E+10 11 | Total optimized workload cost: 2.65E+09 12 | Total cost reduction ratio: 74.68% 13 | Top 10 queries with the most cost reduction ratio: 14 | Alias: q4, Cost Reduction Ratio: 1.39E+08->6.04E+03(0.00) 15 | Alias: q9, Cost Reduction Ratio: 2.77E+08->1.85E+04(0.00) 16 | Alias: q5, Cost Reduction Ratio: 1.39E+08->8.95E+04(0.00) 17 | Alias: q6, Cost Reduction Ratio: 6.72E+08->2.03E+06(0.00) 18 | Alias: q8, Cost Reduction Ratio: 9.62E+08->8.02E+06(0.01) 19 | Alias: q1, Cost Reduction Ratio: 7.24E+08->4.76E+07(0.07) 20 | Alias: q2, Cost Reduction Ratio: 6.72E+08->1.44E+08(0.21) 21 | Alias: q10, Cost Reduction Ratio: 4.72E+09->1.13E+09(0.24) 22 | Alias: q3, Cost Reduction Ratio: 7.51E+08->1.92E+08(0.26) 23 | Alias: q7, Cost Reduction Ratio: 1.41E+09->1.13E+09(0.80) 24 | Top 10 queries with the most cost reduction number: 25 | Alias: q10, Cost Reduction Ratio: 4.72E+09->1.13E+09(0.24) 26 | Alias: q8, Cost Reduction Ratio: 9.62E+08->8.02E+06(0.01) 27 | Alias: q1, Cost Reduction Ratio: 7.24E+08->4.76E+07(0.07) 28 | Alias: q6, Cost Reduction Ratio: 6.72E+08->2.03E+06(0.00) 29 | Alias: q3, Cost Reduction Ratio: 7.51E+08->1.92E+08(0.26) 30 | Alias: q2, Cost Reduction Ratio: 6.72E+08->1.44E+08(0.21) 31 | Alias: q7, Cost Reduction Ratio: 1.41E+09->1.13E+09(0.80) 32 | Alias: q9, Cost Reduction Ratio: 2.77E+08->1.85E+04(0.00) 33 | Alias: q4, Cost Reduction Ratio: 1.39E+08->6.04E+03(0.00) 34 | Alias: q5, Cost Reduction Ratio: 1.39E+08->8.95E+04(0.00) 35 | Top 10 queries with the most cost: 36 | Alias: q10, Cost Reduction Ratio: 4.72E+09->1.13E+09(0.24) 37 | Alias: q7, Cost Reduction Ratio: 1.41E+09->1.13E+09(0.80) 38 | Alias: q8, Cost Reduction Ratio: 9.62E+08->8.02E+06(0.01) 39 | Alias: q3, Cost Reduction Ratio: 7.51E+08->1.92E+08(0.26) 40 | Alias: q2, Cost Reduction Ratio: 6.72E+08->1.44E+08(0.21) 41 | Alias: q1, Cost Reduction Ratio: 7.24E+08->4.76E+07(0.07) 42 | Alias: q6, Cost Reduction Ratio: 6.72E+08->2.03E+06(0.00) 43 | Alias: q9, Cost Reduction Ratio: 2.77E+08->1.85E+04(0.00) 44 | Alias: q4, Cost Reduction Ratio: 1.39E+08->6.04E+03(0.00) 45 | Alias: q5, Cost Reduction Ratio: 1.39E+08->8.95E+04(0.00) 46 | -------------------------------------------------------------------------------- /examples/job/output/summary.txt: -------------------------------------------------------------------------------- 1 | Total Queries in the workload: 113 2 | Total number of indexes: 9 3 | CREATE INDEX idx_movie_id_person_id ON imdbload_no_fk.cast_info (movie_id, person_id); 4 | CREATE INDEX idx_person_id ON imdbload_no_fk.cast_info (person_id); 5 | CREATE INDEX idx_role_id ON imdbload_no_fk.cast_info (role_id); 6 | CREATE INDEX idx_movie_id_company_id_company_type_id ON imdbload_no_fk.movie_companies (movie_id, company_id, company_type_id); 7 | CREATE INDEX idx_info_type_id ON imdbload_no_fk.movie_info (info_type_id); 8 | CREATE INDEX idx_movie_id_info_type_id ON imdbload_no_fk.movie_info (movie_id, info_type_id); 9 | CREATE INDEX idx_movie_id_info_type_id ON imdbload_no_fk.movie_info_idx (movie_id, info_type_id); 10 | CREATE INDEX idx_keyword_id_movie_id ON imdbload_no_fk.movie_keyword (keyword_id, movie_id); 11 | CREATE INDEX idx_movie_id_keyword_id ON imdbload_no_fk.movie_keyword (movie_id, keyword_id); 12 | Total original workload cost: 1.37E+11 13 | Total optimized workload cost: 4.22E+10 14 | Total cost reduction ratio: 69.28% 15 | Top 10 queries with the most cost reduction ratio: 16 | Alias: 18b, Cost Reduction Ratio: 8.14E+08->1.43E+04(0.00) 17 | Alias: 17a, Cost Reduction Ratio: 1.78E+09->3.01E+06(0.00) 18 | Alias: 17c, Cost Reduction Ratio: 1.78E+09->3.01E+06(0.00) 19 | Alias: 17b, Cost Reduction Ratio: 1.78E+09->3.01E+06(0.00) 20 | Alias: 17e, Cost Reduction Ratio: 1.78E+09->3.02E+06(0.00) 21 | Alias: 17d, Cost Reduction Ratio: 1.78E+09->3.02E+06(0.00) 22 | Alias: 17f, Cost Reduction Ratio: 1.78E+09->3.02E+06(0.00) 23 | Alias: 6c, Cost Reduction Ratio: 1.42E+09->2.72E+06(0.00) 24 | Alias: 6a, Cost Reduction Ratio: 1.42E+09->2.72E+06(0.00) 25 | Alias: 6e, Cost Reduction Ratio: 1.42E+09->2.72E+06(0.00) 26 | Top 10 queries with the most cost reduction number: 27 | Alias: 26c, Cost Reduction Ratio: 2.18E+09->4.87E+07(0.02) 28 | Alias: 26b, Cost Reduction Ratio: 2.13E+09->8.74E+07(0.04) 29 | Alias: 26a, Cost Reduction Ratio: 2.21E+09->1.70E+08(0.08) 30 | Alias: 20a, Cost Reduction Ratio: 1.85E+09->4.76E+07(0.03) 31 | Alias: 20b, Cost Reduction Ratio: 1.85E+09->4.80E+07(0.03) 32 | Alias: 20c, Cost Reduction Ratio: 1.85E+09->4.80E+07(0.03) 33 | Alias: 16d, Cost Reduction Ratio: 1.82E+09->4.65E+07(0.03) 34 | Alias: 16a, Cost Reduction Ratio: 1.82E+09->4.65E+07(0.03) 35 | Alias: 16c, Cost Reduction Ratio: 1.82E+09->4.65E+07(0.03) 36 | Alias: 16b, Cost Reduction Ratio: 1.82E+09->4.65E+07(0.03) 37 | Top 10 queries with the most cost: 38 | Alias: 10c, Cost Reduction Ratio: 2.21E+09->1.96E+09(0.89) 39 | Alias: 29c, Cost Reduction Ratio: 2.06E+09->1.10E+09(0.54) 40 | Alias: 24a, Cost Reduction Ratio: 1.89E+09->1.07E+09(0.56) 41 | Alias: 15d, Cost Reduction Ratio: 2.01E+09->7.88E+08(0.39) 42 | Alias: 15a, Cost Reduction Ratio: 1.66E+09->1.13E+09(0.68) 43 | Alias: 19d, Cost Reduction Ratio: 1.89E+09->8.90E+08(0.47) 44 | Alias: 15c, Cost Reduction Ratio: 1.61E+09->9.84E+08(0.61) 45 | Alias: 19a, Cost Reduction Ratio: 1.52E+09->1.02E+09(0.67) 46 | Alias: 19c, Cost Reduction Ratio: 1.52E+09->1.02E+09(0.67) 47 | Alias: 19b, Cost Reduction Ratio: 1.48E+09->1.01E+09(0.68) 48 | -------------------------------------------------------------------------------- /examples/tpch_example1/stats/tidb_stats_by_table_1684995617.json: -------------------------------------------------------------------------------- 1 | { 2 | "is_historical_stats": false, 3 | "database_name": "tpch", 4 | "table_name": "region", 5 | "columns": { 6 | "r_comment": { 7 | "histogram": { 8 | "ndv": 5 9 | }, 10 | "cm_sketch": { 11 | "top_n": [ 12 | { 13 | "data": "AWdlcy4gdGhp/25seSBldmVu/yBwaW50byBi/2VhbnMgY2EA/g==", 14 | "count": 1 15 | }, 16 | { 17 | "data": "AWhzIHVzZSBp/3JvbmljLCBl/3ZlbiByZXF1/2VzdHMuIHMA/g==", 18 | "count": 1 19 | }, 20 | { 21 | "data": "AWxhciBkZXBv/3NpdHMuIGJs/2l0aGVseSBm/2luYWwgcGFj/2thZ2VzIGNh/2pvbGUuIHJl/2d1bGFyIHdh/3RlcnMgYXJl/yBmaW5hbCBy/2VxdWVzdHMu/yByZWd1bGFy/yBhY2NvdW50/3MgYXJlIGFj/2NvcmRpbmcg/3RvAAAAAAAA+Q==", 22 | "count": 1 23 | }, 24 | { 25 | "data": "AWx5IGZpbmFs/yBjb3VydHMg/2Nham9sZSBm/3VyaW91c2x5/yBmaW5hbCBl/3hjdXNlAAAA/A==", 26 | "count": 1 27 | }, 28 | { 29 | "data": "AXVpY2tseSBz/3BlY2lhbCBh/2Njb3VudHMg/2Nham9sZSBj/2FyZWZ1bGx5/yBibGl0aGVs/3kgY2xvc2Ug/3JlcXVlc3Rz/y4gY2FyZWZ1/2xseSBmaW5h/2wgYXN5bXB0/290ZXMgaGFn/2dsZSBmdXJp/291c2wAAAAA+w==", 30 | "count": 1 31 | } 32 | ], 33 | "default_value": 0 34 | }, 35 | "fm_sketch": null, 36 | "null_count": 0, 37 | "tot_col_size": 337, 38 | "last_update_version": 441690031786295304, 39 | "correlation": 0.6, 40 | "stats_ver": 2 41 | }, 42 | "r_name": { 43 | "histogram": { 44 | "ndv": 5 45 | }, 46 | "cm_sketch": { 47 | "top_n": [ 48 | { 49 | "data": "AUFGUklDQQAA/Q==", 50 | "count": 1 51 | }, 52 | { 53 | "data": "AUFNRVJJQ0EA/g==", 54 | "count": 1 55 | }, 56 | { 57 | "data": "AUFTSUEAAAAA+w==", 58 | "count": 1 59 | }, 60 | { 61 | "data": "AUVVUk9QRQAA/Q==", 62 | "count": 1 63 | }, 64 | { 65 | "data": "AU1JRERMRSBF/0FTVAAAAAAA+g==", 66 | "count": 1 67 | } 68 | ], 69 | "default_value": 0 70 | }, 71 | "fm_sketch": null, 72 | "null_count": 0, 73 | "tot_col_size": 39, 74 | "last_update_version": 441690031786295304, 75 | "correlation": 1, 76 | "stats_ver": 2 77 | }, 78 | "r_regionkey": { 79 | "histogram": { 80 | "ndv": 5 81 | }, 82 | "cm_sketch": { 83 | "top_n": [ 84 | { 85 | "data": "A4AAAAAAAAAA", 86 | "count": 1 87 | }, 88 | { 89 | "data": "A4AAAAAAAAAB", 90 | "count": 1 91 | }, 92 | { 93 | "data": "A4AAAAAAAAAC", 94 | "count": 1 95 | }, 96 | { 97 | "data": "A4AAAAAAAAAD", 98 | "count": 1 99 | }, 100 | { 101 | "data": "A4AAAAAAAAAE", 102 | "count": 1 103 | } 104 | ], 105 | "default_value": 0 106 | }, 107 | "fm_sketch": null, 108 | "null_count": 0, 109 | "tot_col_size": 40, 110 | "last_update_version": 441690031786295304, 111 | "correlation": 1, 112 | "stats_ver": 2 113 | } 114 | }, 115 | "indices": {}, 116 | "ext_stats": null, 117 | "count": 5, 118 | "modify_count": 0, 119 | "partitions": null, 120 | "version": 441690031786295304 121 | } -------------------------------------------------------------------------------- /examples/tpch_example2/stats/tidb_stats_by_table_1684995617.json: -------------------------------------------------------------------------------- 1 | { 2 | "is_historical_stats": false, 3 | "database_name": "tpch", 4 | "table_name": "region", 5 | "columns": { 6 | "r_comment": { 7 | "histogram": { 8 | "ndv": 5 9 | }, 10 | "cm_sketch": { 11 | "top_n": [ 12 | { 13 | "data": "AWdlcy4gdGhp/25seSBldmVu/yBwaW50byBi/2VhbnMgY2EA/g==", 14 | "count": 1 15 | }, 16 | { 17 | "data": "AWhzIHVzZSBp/3JvbmljLCBl/3ZlbiByZXF1/2VzdHMuIHMA/g==", 18 | "count": 1 19 | }, 20 | { 21 | "data": "AWxhciBkZXBv/3NpdHMuIGJs/2l0aGVseSBm/2luYWwgcGFj/2thZ2VzIGNh/2pvbGUuIHJl/2d1bGFyIHdh/3RlcnMgYXJl/yBmaW5hbCBy/2VxdWVzdHMu/yByZWd1bGFy/yBhY2NvdW50/3MgYXJlIGFj/2NvcmRpbmcg/3RvAAAAAAAA+Q==", 22 | "count": 1 23 | }, 24 | { 25 | "data": "AWx5IGZpbmFs/yBjb3VydHMg/2Nham9sZSBm/3VyaW91c2x5/yBmaW5hbCBl/3hjdXNlAAAA/A==", 26 | "count": 1 27 | }, 28 | { 29 | "data": "AXVpY2tseSBz/3BlY2lhbCBh/2Njb3VudHMg/2Nham9sZSBj/2FyZWZ1bGx5/yBibGl0aGVs/3kgY2xvc2Ug/3JlcXVlc3Rz/y4gY2FyZWZ1/2xseSBmaW5h/2wgYXN5bXB0/290ZXMgaGFn/2dsZSBmdXJp/291c2wAAAAA+w==", 30 | "count": 1 31 | } 32 | ], 33 | "default_value": 0 34 | }, 35 | "fm_sketch": null, 36 | "null_count": 0, 37 | "tot_col_size": 337, 38 | "last_update_version": 441690031786295304, 39 | "correlation": 0.6, 40 | "stats_ver": 2 41 | }, 42 | "r_name": { 43 | "histogram": { 44 | "ndv": 5 45 | }, 46 | "cm_sketch": { 47 | "top_n": [ 48 | { 49 | "data": "AUFGUklDQQAA/Q==", 50 | "count": 1 51 | }, 52 | { 53 | "data": "AUFNRVJJQ0EA/g==", 54 | "count": 1 55 | }, 56 | { 57 | "data": "AUFTSUEAAAAA+w==", 58 | "count": 1 59 | }, 60 | { 61 | "data": "AUVVUk9QRQAA/Q==", 62 | "count": 1 63 | }, 64 | { 65 | "data": "AU1JRERMRSBF/0FTVAAAAAAA+g==", 66 | "count": 1 67 | } 68 | ], 69 | "default_value": 0 70 | }, 71 | "fm_sketch": null, 72 | "null_count": 0, 73 | "tot_col_size": 39, 74 | "last_update_version": 441690031786295304, 75 | "correlation": 1, 76 | "stats_ver": 2 77 | }, 78 | "r_regionkey": { 79 | "histogram": { 80 | "ndv": 5 81 | }, 82 | "cm_sketch": { 83 | "top_n": [ 84 | { 85 | "data": "A4AAAAAAAAAA", 86 | "count": 1 87 | }, 88 | { 89 | "data": "A4AAAAAAAAAB", 90 | "count": 1 91 | }, 92 | { 93 | "data": "A4AAAAAAAAAC", 94 | "count": 1 95 | }, 96 | { 97 | "data": "A4AAAAAAAAAD", 98 | "count": 1 99 | }, 100 | { 101 | "data": "A4AAAAAAAAAE", 102 | "count": 1 103 | } 104 | ], 105 | "default_value": 0 106 | }, 107 | "fm_sketch": null, 108 | "null_count": 0, 109 | "tot_col_size": 40, 110 | "last_update_version": 441690031786295304, 111 | "correlation": 1, 112 | "stats_ver": 2 113 | } 114 | }, 115 | "indices": {}, 116 | "ext_stats": null, 117 | "count": 5, 118 | "modify_count": 0, 119 | "partitions": null, 120 | "version": 441690031786295304 121 | } -------------------------------------------------------------------------------- /examples/job/stats/role_type.json: -------------------------------------------------------------------------------- 1 | { 2 | "is_historical_stats": false, 3 | "database_name": "imdbload_no_fk", 4 | "table_name": "role_type", 5 | "columns": { 6 | "id": { 7 | "histogram": { 8 | "ndv": 12 9 | }, 10 | "cm_sketch": { 11 | "top_n": [ 12 | { 13 | "data": "A4AAAAAAAAAB", 14 | "count": 1 15 | }, 16 | { 17 | "data": "A4AAAAAAAAAC", 18 | "count": 1 19 | }, 20 | { 21 | "data": "A4AAAAAAAAAD", 22 | "count": 1 23 | }, 24 | { 25 | "data": "A4AAAAAAAAAE", 26 | "count": 1 27 | }, 28 | { 29 | "data": "A4AAAAAAAAAF", 30 | "count": 1 31 | }, 32 | { 33 | "data": "A4AAAAAAAAAG", 34 | "count": 1 35 | }, 36 | { 37 | "data": "A4AAAAAAAAAH", 38 | "count": 1 39 | }, 40 | { 41 | "data": "A4AAAAAAAAAI", 42 | "count": 1 43 | }, 44 | { 45 | "data": "A4AAAAAAAAAJ", 46 | "count": 1 47 | }, 48 | { 49 | "data": "A4AAAAAAAAAK", 50 | "count": 1 51 | }, 52 | { 53 | "data": "A4AAAAAAAAAL", 54 | "count": 1 55 | }, 56 | { 57 | "data": "A4AAAAAAAAAM", 58 | "count": 1 59 | } 60 | ], 61 | "default_value": 0 62 | }, 63 | "fm_sketch": null, 64 | "null_count": 0, 65 | "tot_col_size": 96, 66 | "last_update_version": 441988931161161740, 67 | "correlation": 1, 68 | "stats_ver": 2 69 | }, 70 | "role": { 71 | "histogram": { 72 | "ndv": 12 73 | }, 74 | "cm_sketch": { 75 | "top_n": [ 76 | { 77 | "data": "AWFjdG9yAAAA/A==", 78 | "count": 1 79 | }, 80 | { 81 | "data": "AWFjdHJlc3MA/g==", 82 | "count": 1 83 | }, 84 | { 85 | "data": "AWNpbmVtYXRv/2dyYXBoZXIA/g==", 86 | "count": 1 87 | }, 88 | { 89 | "data": "AWNvbXBvc2Vy/wAAAAAAAAAA9w==", 90 | "count": 1 91 | }, 92 | { 93 | "data": "AWNvc3R1bWUg/2Rlc2lnbmVy/wAAAAAAAAAA9w==", 94 | "count": 1 95 | }, 96 | { 97 | "data": "AWRpcmVjdG9y/wAAAAAAAAAA9w==", 98 | "count": 1 99 | }, 100 | { 101 | "data": "AWVkaXRvcgAA/Q==", 102 | "count": 1 103 | }, 104 | { 105 | "data": "AWd1ZXN0AAAA/A==", 106 | "count": 1 107 | }, 108 | { 109 | "data": "AW1pc2NlbGxh/25lb3VzIGNy/2V3AAAAAAAA+Q==", 110 | "count": 1 111 | }, 112 | { 113 | "data": "AXByb2R1Y2Vy/wAAAAAAAAAA9w==", 114 | "count": 1 115 | }, 116 | { 117 | "data": "AXByb2R1Y3Rp/29uIGRlc2ln/25lcgAAAAAA+g==", 118 | "count": 1 119 | }, 120 | { 121 | "data": "AXdyaXRlcgAA/Q==", 122 | "count": 1 123 | } 124 | ], 125 | "default_value": 0 126 | }, 127 | "fm_sketch": null, 128 | "null_count": 0, 129 | "tot_col_size": 133, 130 | "last_update_version": 441988931161161740, 131 | "correlation": 0.4755244755244755, 132 | "stats_ver": 2 133 | } 134 | }, 135 | "indices": {}, 136 | "ext_stats": null, 137 | "count": 12, 138 | "modify_count": 0, 139 | "partitions": null, 140 | "version": 441988931161161740 141 | } -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/qw4990/index_advisor 2 | 3 | go 1.20 4 | 5 | require ( 6 | github.com/pingcap/parser v0.0.0-20210415081931-48e7f467fd74 7 | github.com/pingcap/tidb v1.1.0-beta.0.20210415113353-05e584f145f1 8 | github.com/spf13/cobra v1.7.0 9 | ) 10 | 11 | require ( 12 | github.com/BurntSushi/toml v0.3.1 // indirect 13 | github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect 14 | github.com/benbjohnson/clock v1.3.5 // indirect 15 | github.com/beorn7/perks v1.0.1 // indirect 16 | github.com/cespare/xxhash/v2 v2.1.1 // indirect 17 | github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e // indirect 18 | github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect 19 | github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect 20 | github.com/danjacques/gofslock v0.0.0-20191023191349-0a45f885bc37 // indirect 21 | github.com/go-ole/go-ole v1.2.4 // indirect 22 | github.com/go-sql-driver/mysql v1.7.1 // indirect 23 | github.com/gogo/protobuf v1.3.2 // indirect 24 | github.com/golang/protobuf v1.3.4 // indirect 25 | github.com/google/go-cmp v0.5.8 // indirect 26 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 27 | github.com/konsorten/go-windows-terminal-sequences v1.0.3 // indirect 28 | github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect 29 | github.com/opentracing/opentracing-go v1.1.0 // indirect 30 | github.com/pingcap/errors v0.11.5-0.20210425183316-da1aaba5fb63 // indirect 31 | github.com/pingcap/failpoint v0.0.0-20210316064728-7acb0f0a3dfd // indirect 32 | github.com/pingcap/kvproto v0.0.0-20210308063835-39b884695fb8 // indirect 33 | github.com/pingcap/log v0.0.0-20210317133921-96f4fcab92a4 // indirect 34 | github.com/pingcap/tipb v0.0.0-20210326161441-1164ca065d1b // indirect 35 | github.com/pkg/errors v0.9.1 // indirect 36 | github.com/prometheus/client_golang v1.5.1 // indirect 37 | github.com/prometheus/client_model v0.2.0 // indirect 38 | github.com/prometheus/common v0.9.1 // indirect 39 | github.com/prometheus/procfs v0.0.8 // indirect 40 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect 41 | github.com/shirou/gopsutil v3.21.2+incompatible // indirect 42 | github.com/sirupsen/logrus v1.6.0 // indirect 43 | github.com/spf13/pflag v1.0.5 // indirect 44 | github.com/stretchr/testify v1.8.2 // indirect 45 | github.com/tikv/pd v1.1.0-beta.0.20210323121136-78679e5e209d // indirect 46 | github.com/uber/jaeger-client-go v2.22.1+incompatible // indirect 47 | github.com/uber/jaeger-lib v2.4.0+incompatible // indirect 48 | go.etcd.io/etcd v0.5.0-alpha.5.0.20200824191128-ae9734ed278b // indirect 49 | go.uber.org/atomic v1.11.0 // indirect 50 | go.uber.org/multierr v1.11.0 // indirect 51 | go.uber.org/zap v1.24.0 // indirect 52 | golang.org/x/exp v0.0.0-20230519143937-03e91628a987 // indirect 53 | golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4 // indirect 54 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 // indirect 55 | golang.org/x/sys v0.5.0 // indirect 56 | golang.org/x/text v0.9.0 // indirect 57 | google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63 // indirect 58 | google.golang.org/grpc v1.27.1 // indirect 59 | gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect 60 | ) 61 | -------------------------------------------------------------------------------- /advisor/index_advisor.go: -------------------------------------------------------------------------------- 1 | package advisor 2 | 3 | import ( 4 | "github.com/qw4990/index_advisor/optimizer" 5 | "github.com/qw4990/index_advisor/utils" 6 | ) 7 | 8 | // IndexSelectionAlgo is the interface for index selection algorithms. 9 | type IndexSelectionAlgo func( 10 | workloadInfo utils.WorkloadInfo, // the target workload 11 | parameter Parameter, // the input parameters 12 | optimizer optimizer.WhatIfOptimizer, // the what-if optimizer 13 | ) (utils.Set[utils.Index], error) 14 | 15 | // IndexableColumnsSelectionAlgo is the interface for indexable columns selection algorithms. 16 | type IndexableColumnsSelectionAlgo func(workloadInfo *utils.WorkloadInfo) error 17 | 18 | // WorkloadInfoCompressionAlgo is the interface for workload info compression algorithms. 19 | type WorkloadInfoCompressionAlgo func(workloadInfo utils.WorkloadInfo) utils.WorkloadInfo 20 | 21 | var ( 22 | compressAlgorithms = map[string]WorkloadInfoCompressionAlgo{ 23 | "none": NoneWorkloadInfoCompress, 24 | "digest": DigestWorkloadInfoCompress, 25 | } 26 | 27 | findIndexableColsAlgorithms = map[string]IndexableColumnsSelectionAlgo{ 28 | "simple": IndexableColumnsSelectionSimple, 29 | } 30 | 31 | selectIndexAlgorithms = map[string]IndexSelectionAlgo{ 32 | "auto_admin": SelectIndexAAAlgo, 33 | } 34 | ) 35 | 36 | // Parameter is the input parameters of index advisor. 37 | type Parameter struct { 38 | MaxNumberIndexes int // the max number of indexes to recommend 39 | MaxIndexWidth int // the max number of columns in recommended indexes 40 | } 41 | 42 | func validateParameter(p Parameter) Parameter { 43 | if p.MaxNumberIndexes < 1 { 44 | utils.Warningf("max number of indexes should be at least 1, set from %v to 1", p.MaxNumberIndexes) 45 | p.MaxNumberIndexes = 1 46 | } 47 | if p.MaxNumberIndexes > 20 { 48 | utils.Warningf("max number of indexes should be at most 20, set from %v to 20", p.MaxNumberIndexes) 49 | p.MaxNumberIndexes = 20 50 | } 51 | if p.MaxIndexWidth < 1 { 52 | utils.Warningf("max index width should be at least 1, set from %v to 1", p.MaxIndexWidth) 53 | p.MaxIndexWidth = 1 54 | } 55 | if p.MaxIndexWidth > 5 { 56 | utils.Warningf("max index width should be at most 5, set from %v to 5", p.MaxIndexWidth) 57 | p.MaxIndexWidth = 5 58 | } 59 | return p 60 | } 61 | 62 | // IndexAdvise is the entry point of index advisor. 63 | func IndexAdvise(db optimizer.WhatIfOptimizer, workload utils.WorkloadInfo, param Parameter) (utils.Set[utils.Index], error) { 64 | utils.Infof("start index advise for %v queries, %v tables", workload.Queries.Size(), workload.TableSchemas.Size()) 65 | param = validateParameter(param) 66 | 67 | compress := compressAlgorithms["digest"] 68 | indexable := findIndexableColsAlgorithms["simple"] 69 | selection := selectIndexAlgorithms["auto_admin"] 70 | 71 | compressedWorkloadInfo := compress(workload) 72 | utils.Infof("compress %v queries to %v queries", workload.Queries.Size(), compressedWorkloadInfo.Queries.Size()) 73 | 74 | if err := indexable(&compressedWorkloadInfo); err != nil { 75 | return nil, err 76 | } 77 | utils.Infof("find %v indexable columns", compressedWorkloadInfo.IndexableColumns.Size()) 78 | 79 | checkWorkloadInfo(compressedWorkloadInfo) 80 | recommendedIndexes, err := selection(compressedWorkloadInfo, param, db) 81 | if err != nil { 82 | return nil, err 83 | } 84 | utils.Infof("finish index advise with %v recommended indexes", recommendedIndexes.Size()) 85 | return recommendedIndexes, err 86 | } 87 | -------------------------------------------------------------------------------- /examples/tpch_example1/output/q6.txt: -------------------------------------------------------------------------------- 1 | Alias: q6 2 | Query: 3 | -- $ID$ 4 | -- TPC-H/TPC-R Forecasting Revenue Change Query (Q6) 5 | -- Functional Query Definition 6 | -- Approved February 1998 7 | 8 | 9 | select 10 | sum(l_extendedprice * l_discount) as revenue 11 | from 12 | lineitem 13 | where 14 | l_shipdate >= date '1993-01-01' 15 | and l_shipdate < date '1993-01-01' + interval '1' year 16 | and l_discount between 0.07 - 0.01 and 0.07 + 0.01 17 | and l_quantity < 25; 18 | 19 | Original Cost: 3.28E+08 20 | Optimized Cost: 3.28E+08 21 | Cost Reduction Ratio: 1.00 22 | 23 | 24 | ===================== original plan ===================== 25 | HashAgg_13 1.00 328164043.31 root funcs:sum(Column#18)->Column#17 26 | └─TableReader_14 1.00 328162514.77 root data:HashAgg_6 27 | └─HashAgg_6 1.00 4922437563.12 cop[tikv] funcs:sum(mul(tpch.lineitem.l_extendedprice, tpch.lineitem.l_discount))->Column#18 28 | └─Selection_12 161242.16 4919217661.10 cop[tikv] ge(tpch.lineitem.l_discount, 0.06), ge(tpch.lineitem.l_shipdate, 1993-01-01), le(tpch.lineitem.l_discount, 0.08), lt(tpch.lineitem.l_quantity, 25), lt(tpch.lineitem.l_shipdate, 1994-01-01) 29 | └─TableFullScan_11 8143998.00 2887290160.10 cop[tikv] table:lineitem keep order:false 30 | 31 | ===================== optimized plan ===================== 32 | HashAgg_13 1.00 328164043.31 root funcs:sum(Column#18)->Column#17 33 | └─TableReader_14 1.00 328162514.77 root data:HashAgg_6 34 | └─HashAgg_6 1.00 4922437563.12 cop[tikv] funcs:sum(mul(tpch.lineitem.l_extendedprice, tpch.lineitem.l_discount))->Column#18 35 | └─Selection_12 161242.16 4919217661.10 cop[tikv] ge(tpch.lineitem.l_discount, 0.06), ge(tpch.lineitem.l_shipdate, 1993-01-01), le(tpch.lineitem.l_discount, 0.08), lt(tpch.lineitem.l_quantity, 25), lt(tpch.lineitem.l_shipdate, 1994-01-01) 36 | └─TableFullScan_11 8143998.00 2887290160.10 cop[tikv] table:lineitem keep order:false -------------------------------------------------------------------------------- /utils/utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "net/http" 7 | "os" 8 | "path" 9 | "strings" 10 | ) 11 | 12 | // SaveContentTo saves the given content to the given file. 13 | func SaveContentTo(fpath, content string) error { 14 | return os.WriteFile(fpath, []byte(content), 0644) 15 | } 16 | 17 | // FileExists tests whether this file exists and is or not a directory. 18 | func FileExists(filename string) (exist, isDir bool) { 19 | info, err := os.Stat(filename) 20 | if os.IsNotExist(err) { 21 | return false, false 22 | } 23 | return true, info.IsDir() 24 | } 25 | 26 | func PrepareDir(dirPath string) error { 27 | if err := os.RemoveAll(dirPath); err != nil { 28 | return err 29 | } 30 | return os.MkdirAll(dirPath, 0755) 31 | } 32 | 33 | func ReadURL(url string) ([]byte, error) { 34 | resp, err := http.Get(url) 35 | if err != nil { 36 | return nil, fmt.Errorf("get %v error: %v", url, err) 37 | } 38 | defer resp.Body.Close() 39 | 40 | if resp.StatusCode != http.StatusOK { 41 | return nil, fmt.Errorf("get %v error: status code is %v not OK(200)", url, resp.StatusCode) 42 | } 43 | 44 | data, err := io.ReadAll(resp.Body) 45 | if err != nil { 46 | return nil, fmt.Errorf("read %v response body error: %v", url, err) 47 | } 48 | return data, nil 49 | } 50 | 51 | // GetDBNameFromDSN extracts the database name from the given DSN. 52 | func GetDBNameFromDSN(dsn string) (dsnWithoutDB, dbName string) { 53 | idx := strings.Index(dsn, "/") 54 | if idx == -1 { 55 | return dsn, "" 56 | } 57 | return dsn[:idx+1], strings.TrimSpace(dsn[idx+1:]) 58 | } 59 | 60 | // ParseStmtsFromDir parses raw Queries from the given directory. 61 | // Each *.sql in this directory is parsed as a single Query. 62 | func ParseStmtsFromDir(dirPath string) (sqls, fileNames []string, err error) { 63 | des, err := os.ReadDir(dirPath) 64 | if err != nil { 65 | return nil, nil, err 66 | } 67 | for _, entry := range des { 68 | if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".sql") { 69 | continue 70 | } 71 | fpath := path.Join(dirPath, entry.Name()) 72 | content, err := os.ReadFile(fpath) 73 | if err != nil { 74 | return nil, nil, err 75 | } 76 | sql := strings.TrimSpace(string(content)) 77 | sqls = append(sqls, sql) 78 | fileNames = append(fileNames, entry.Name()) 79 | } 80 | return 81 | } 82 | 83 | // ParseStmtsFromFile parses raw Queries from the given file. 84 | // It ignore all comments, and assume all Queries are separated by ';'. 85 | func ParseStmtsFromFile(fpath string) ([]string, error) { 86 | data, err := os.ReadFile(fpath) 87 | if err != nil { 88 | return nil, err 89 | } 90 | lines := strings.Split(string(data), "\n") 91 | var filteredLines []string 92 | for _, line := range lines { 93 | line = strings.TrimSpace(line) 94 | if line == "" || strings.HasPrefix(line, "--") { // empty line or comment 95 | continue 96 | } 97 | filteredLines = append(filteredLines, line) 98 | } 99 | content := strings.Join(filteredLines, "\n") 100 | 101 | tmp := strings.Split(content, ";") 102 | var sqls []string 103 | for _, sql := range tmp { 104 | sql = strings.TrimSpace(sql) 105 | if sql == "" { 106 | continue 107 | } 108 | sqls = append(sqls, sql) 109 | } 110 | return sqls, nil 111 | } 112 | 113 | func Min[T int | float64](xs ...T) T { 114 | res := xs[0] 115 | for _, x := range xs { 116 | if x < res { 117 | res = x 118 | } 119 | } 120 | return res 121 | } 122 | 123 | func Max[T int | float64](xs ...T) T { 124 | res := xs[0] 125 | for _, x := range xs { 126 | if x > res { 127 | res = x 128 | } 129 | } 130 | return res 131 | } 132 | -------------------------------------------------------------------------------- /utils/tidb.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "bytes" 5 | "database/sql" 6 | "fmt" 7 | "net" 8 | "os" 9 | "os/exec" 10 | "path" 11 | "syscall" 12 | "time" 13 | 14 | _ "github.com/go-sql-driver/mysql" 15 | ) 16 | 17 | type LocalTiDBServer struct { 18 | cmd *exec.Cmd 19 | port int 20 | tmpDir string 21 | } 22 | 23 | func (s *LocalTiDBServer) Release() error { 24 | Infof("Kill TiDB process pid: %v", s.cmd.Process.Pid) 25 | err := syscall.Kill(s.cmd.Process.Pid, syscall.SIGQUIT) 26 | if err != nil { 27 | return err 28 | } 29 | 30 | Infof("wait for TiDB to close") 31 | time.Sleep(time.Second * 3) 32 | 33 | Infof("Clean tmpDir: %v", s.tmpDir) 34 | os.RemoveAll(s.tmpDir) 35 | return nil 36 | } 37 | 38 | func (s *LocalTiDBServer) DSN() string { 39 | return fmt.Sprintf("root:@tcp(127.0.0.1:%v)/", s.port) 40 | } 41 | 42 | // StartLocalTiDBServer starts a TiDB server with the given version. 43 | func StartLocalTiDBServer(ver string) (*LocalTiDBServer, error) { 44 | if ver == "" { 45 | ver = "nightly" 46 | } 47 | tiupPath, err := exec.LookPath("tiup") 48 | if err != nil { 49 | return nil, fmt.Errorf("failed to find tiup cmd: %v, please install tiup first: https://docs.pingcap.com/tidb/dev/tiup-overview", err) 50 | } 51 | 52 | port, err := GetFreePort() 53 | if err != nil { 54 | return nil, fmt.Errorf("failed to get a free port: %v", err) 55 | } 56 | statusPort, err := GetFreePort() 57 | if err != nil { 58 | return nil, fmt.Errorf("failed to get a free port: %v", err) 59 | } 60 | tmpDir, err := GetTempDir() 61 | if err != nil { 62 | return nil, fmt.Errorf("failed to get a temp dir: %v", err) 63 | } 64 | logFilePath := path.Join(tmpDir, "tidb.log") 65 | slowLogFilePath := path.Join(tmpDir, "tidb_slow.log") 66 | 67 | cmd := exec.Command(tiupPath, fmt.Sprintf("tidb:%v", ver), 68 | fmt.Sprintf("--status=%v", statusPort), 69 | fmt.Sprintf("-P=%v", port), 70 | fmt.Sprintf("--path=%v", tmpDir), 71 | fmt.Sprintf("--log-file=%v", logFilePath), 72 | fmt.Sprintf("--log-slow-query=%v", slowLogFilePath)) 73 | var stdErr bytes.Buffer 74 | cmd.Stderr = &stdErr 75 | 76 | Infof("Starting TiDB %v", cmd.String()) 77 | if err := cmd.Start(); err != nil { 78 | return nil, fmt.Errorf("failed to start TiDB: %v", err) 79 | } 80 | 81 | Infof("Wait for TiDB to start, pid: %v", cmd.Process.Pid) 82 | ok := false 83 | dsn := fmt.Sprintf("root:@tcp(127.0.0.1:%v)/test", port) 84 | for i := 0; i < 10; i++ { 85 | if PingLocalTiDB(dsn) { 86 | Infof("TiDB started, port: %v, tmpDir: %v", port, tmpDir) 87 | ok = true 88 | break 89 | } 90 | time.Sleep(time.Second * 2) 91 | Infof("Wait for TiDB to start, pid: %v", cmd.Process.Pid) 92 | } 93 | if !ok { 94 | cmd.Process.Kill() 95 | return nil, fmt.Errorf("failed to start TiDB, stderr: %v", stdErr.String()) 96 | } 97 | 98 | return &LocalTiDBServer{ 99 | cmd: cmd, 100 | port: port, 101 | tmpDir: tmpDir, 102 | }, nil 103 | } 104 | 105 | // GetTempDir returns an temporary directory path 106 | func GetTempDir() (string, error) { 107 | return os.MkdirTemp("", "index_advisor_tidb_tmp") 108 | } 109 | 110 | // GetFreePort asks the kernel for a free open port that is ready to use. 111 | func GetFreePort() (int, error) { 112 | addr, err := net.ResolveTCPAddr("tcp", "localhost:0") 113 | if err != nil { 114 | return 0, err 115 | } 116 | 117 | l, err := net.ListenTCP("tcp", addr) 118 | if err != nil { 119 | return 0, err 120 | } 121 | defer l.Close() 122 | return l.Addr().(*net.TCPAddr).Port, nil 123 | } 124 | 125 | func PingLocalTiDB(dsn string) bool { 126 | db, err := sql.Open("mysql", dsn) 127 | if err != nil { 128 | return false 129 | } 130 | defer db.Close() 131 | return db.Ping() == nil 132 | } 133 | -------------------------------------------------------------------------------- /examples/job/stats/link_type.json: -------------------------------------------------------------------------------- 1 | { 2 | "is_historical_stats": false, 3 | "database_name": "imdbload_no_fk", 4 | "table_name": "link_type", 5 | "columns": { 6 | "id": { 7 | "histogram": { 8 | "ndv": 18 9 | }, 10 | "cm_sketch": { 11 | "top_n": [ 12 | { 13 | "data": "A4AAAAAAAAAB", 14 | "count": 1 15 | }, 16 | { 17 | "data": "A4AAAAAAAAAC", 18 | "count": 1 19 | }, 20 | { 21 | "data": "A4AAAAAAAAAD", 22 | "count": 1 23 | }, 24 | { 25 | "data": "A4AAAAAAAAAE", 26 | "count": 1 27 | }, 28 | { 29 | "data": "A4AAAAAAAAAF", 30 | "count": 1 31 | }, 32 | { 33 | "data": "A4AAAAAAAAAG", 34 | "count": 1 35 | }, 36 | { 37 | "data": "A4AAAAAAAAAH", 38 | "count": 1 39 | }, 40 | { 41 | "data": "A4AAAAAAAAAI", 42 | "count": 1 43 | }, 44 | { 45 | "data": "A4AAAAAAAAAJ", 46 | "count": 1 47 | }, 48 | { 49 | "data": "A4AAAAAAAAAK", 50 | "count": 1 51 | }, 52 | { 53 | "data": "A4AAAAAAAAAL", 54 | "count": 1 55 | }, 56 | { 57 | "data": "A4AAAAAAAAAM", 58 | "count": 1 59 | }, 60 | { 61 | "data": "A4AAAAAAAAAN", 62 | "count": 1 63 | }, 64 | { 65 | "data": "A4AAAAAAAAAO", 66 | "count": 1 67 | }, 68 | { 69 | "data": "A4AAAAAAAAAP", 70 | "count": 1 71 | }, 72 | { 73 | "data": "A4AAAAAAAAAQ", 74 | "count": 1 75 | }, 76 | { 77 | "data": "A4AAAAAAAAAR", 78 | "count": 1 79 | }, 80 | { 81 | "data": "A4AAAAAAAAAS", 82 | "count": 1 83 | } 84 | ], 85 | "default_value": 0 86 | }, 87 | "fm_sketch": null, 88 | "null_count": 0, 89 | "tot_col_size": 144, 90 | "last_update_version": 441988928304054286, 91 | "correlation": 1, 92 | "stats_ver": 2 93 | }, 94 | "link": { 95 | "histogram": { 96 | "ndv": 18 97 | }, 98 | "cm_sketch": { 99 | "top_n": [ 100 | { 101 | "data": "AWFsdGVybmF0/2UgbGFuZ3Vh/2dlIHZlcnNp/29uIG9mAAAA/A==", 102 | "count": 1 103 | }, 104 | { 105 | "data": "AWVkaXRlZCBm/3JvbQAAAAAA+g==", 106 | "count": 1 107 | }, 108 | { 109 | "data": "AWVkaXRlZCBp/250bwAAAAAA+g==", 110 | "count": 1 111 | }, 112 | { 113 | "data": "AWZlYXR1cmVk/yBpbgAAAAAA+g==", 114 | "count": 1 115 | }, 116 | { 117 | "data": "AWZlYXR1cmVz/wAAAAAAAAAA9w==", 118 | "count": 1 119 | }, 120 | { 121 | "data": "AWZvbGxvd2Vk/yBieQAAAAAA+g==", 122 | "count": 1 123 | }, 124 | { 125 | "data": "AWZvbGxvd3MA/g==", 126 | "count": 1 127 | }, 128 | { 129 | "data": "AXJlZmVyZW5j/2VkIGluAAAA/A==", 130 | "count": 1 131 | }, 132 | { 133 | "data": "AXJlZmVyZW5j/2VzAAAAAAAA+Q==", 134 | "count": 1 135 | }, 136 | { 137 | "data": "AXJlbWFkZSBh/3MAAAAAAAAA+A==", 138 | "count": 1 139 | }, 140 | { 141 | "data": "AXJlbWFrZSBv/2YAAAAAAAAA+A==", 142 | "count": 1 143 | }, 144 | { 145 | "data": "AXNpbWlsYXIg/3RvAAAAAAAA+Q==", 146 | "count": 1 147 | }, 148 | { 149 | "data": "AXNwaW4gb2Zm/wAAAAAAAAAA9w==", 150 | "count": 1 151 | }, 152 | { 153 | "data": "AXNwaW4gb2Zm/yBmcm9tAAAA/A==", 154 | "count": 1 155 | }, 156 | { 157 | "data": "AXNwb29mZWQg/2luAAAAAAAA+Q==", 158 | "count": 1 159 | }, 160 | { 161 | "data": "AXNwb29mcwAA/Q==", 162 | "count": 1 163 | }, 164 | { 165 | "data": "AXVua25vd24g/2xpbmsAAAAA+w==", 166 | "count": 1 167 | }, 168 | { 169 | "data": "AXZlcnNpb24g/29mAAAAAAAA+Q==", 170 | "count": 1 171 | } 172 | ], 173 | "default_value": 0 174 | }, 175 | "fm_sketch": null, 176 | "null_count": 0, 177 | "tot_col_size": 216, 178 | "last_update_version": 441988928304054286, 179 | "correlation": -0.021671826625386997, 180 | "stats_ver": 2 181 | } 182 | }, 183 | "indices": {}, 184 | "ext_stats": null, 185 | "count": 18, 186 | "modify_count": 0, 187 | "partitions": null, 188 | "version": 441988928304054286 189 | } -------------------------------------------------------------------------------- /advisor/column_selection_simple_test.go: -------------------------------------------------------------------------------- 1 | package advisor 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/qw4990/index_advisor/utils" 10 | ) 11 | 12 | func must(err error) { 13 | if err != nil { 14 | panic(err) 15 | } 16 | } 17 | 18 | func TestFindIndexableColumnsCase1(t *testing.T) { 19 | tt, err := utils.ParseCreateTableStmt("test", "create table t (a int, b int, c int, d int, e int)") 20 | must(err) 21 | 22 | workload := utils.WorkloadInfo{ 23 | TableSchemas: utils.ListToSet(tt), 24 | Queries: utils.ListToSet( 25 | utils.Query{"", "test", 26 | "select * from t where a<1 and b>1 and e like 'abc'", 1, nil}, 27 | utils.Query{"", "test", 28 | "select * from t where c in (1, 2, 3) order by d", 1, nil}), 29 | } 30 | must(IndexableColumnsSelectionSimple(&workload)) 31 | checkIndexableCols(workload.IndexableColumns, []string{"test.t.a", "test.t.b", "test.t.c", "test.t.d"}) 32 | } 33 | 34 | func TestFindIndexableColumnsCase2(t *testing.T) { 35 | t1, err := utils.ParseCreateTableStmt("test", "create table t1 (a int)") 36 | must(err) 37 | t2, err := utils.ParseCreateTableStmt("test", "create table t2 (a int)") 38 | must(err) 39 | workload := utils.WorkloadInfo{ 40 | TableSchemas: utils.ListToSet(t1, t2), 41 | Queries: utils.ListToSet(utils.Query{"", "test", 42 | "select * from t2 tx where a<1", 1, nil}), 43 | } 44 | must(IndexableColumnsSelectionSimple(&workload)) 45 | checkIndexableCols(workload.IndexableColumns, []string{"test.t2.a"}) 46 | } 47 | 48 | func TestFindIndexableColumnsCase3(t *testing.T) { 49 | t1, err := utils.ParseCreateTableStmt("db1", "create table t1 (a1 int)") 50 | must(err) 51 | t2, err := utils.ParseCreateTableStmt("db2", "create table t2 (a2 int)") 52 | must(err) 53 | workload := utils.WorkloadInfo{ 54 | TableSchemas: utils.ListToSet(t1, t2), 55 | Queries: utils.ListToSet(utils.Query{"", "db1", 56 | "select * from db2.t2 where a2<1", 1, nil}), 57 | } 58 | must(IndexableColumnsSelectionSimple(&workload)) 59 | checkIndexableCols(workload.IndexableColumns, []string{"db2.t2.a2"}) 60 | } 61 | 62 | func TestFindIndexableColumnsSimpleTPCH(t *testing.T) { 63 | t1, err := utils.ParseCreateTableStmt("tpch", `CREATE TABLE tpch.nation ( 64 | N_NATIONKEY bigint(20) NOT NULL, 65 | N_NAME char(25) NOT NULL, 66 | N_REGIONKEY bigint(20) NOT NULL, 67 | N_COMMENT varchar(152) DEFAULT NULL, 68 | PRIMARY KEY (N_NATIONKEY) /*T![clustered_index] CLUSTERED */)`) 69 | must(err) 70 | 71 | workload := utils.WorkloadInfo{ 72 | TableSchemas: utils.ListToSet(t1), 73 | Queries: utils.ListToSet( 74 | utils.Query{"", "tpch", `select 75 | supp_nation, 76 | cust_nation, 77 | l_year, 78 | sum(volume) as revenue 79 | from 80 | ( 81 | select 82 | n1.n_name as supp_nation, 83 | n2.n_name as cust_nation, 84 | extract(year from l_shipdate) as l_year, 85 | l_extendedprice * (1 - l_discount) as volume 86 | from 87 | supplier, 88 | lineitem, 89 | orders, 90 | customer, 91 | nation n1, 92 | nation n2 93 | where 94 | s_suppkey = l_suppkey 95 | and o_orderkey = l_orderkey 96 | and c_custkey = o_custkey 97 | and s_nationkey = n1.n_nationkey 98 | and c_nationkey = n2.n_nationkey 99 | and ( 100 | (n1.n_name = 'MOZAMBIQUE' and n2.n_name = 'UNITED KINGDOM') 101 | or (n1.n_name = 'UNITED KINGDOM' and n2.n_name = 'MOZAMBIQUE') 102 | ) 103 | and l_shipdate between date '1995-01-01' and date '1996-12-31' 104 | ) as shipping 105 | group by 106 | supp_nation, 107 | cust_nation, 108 | l_year 109 | order by 110 | supp_nation, 111 | cust_nation, 112 | l_year`, 1, nil})} 113 | must(IndexableColumnsSelectionSimple(&workload)) 114 | checkIndexableCols(workload.IndexableColumns, []string{"tpch.nation.n_name", "tpch.nation.n_nationkey"}) 115 | } 116 | 117 | func checkIndexableCols(got utils.Set[utils.Column], expected []string) { 118 | var gotCols []string 119 | for _, c := range got.ToList() { 120 | gotCols = append(gotCols, c.Key()) 121 | } 122 | sort.Strings(gotCols) 123 | sort.Strings(expected) 124 | gotStr := strings.Join(gotCols, ",\n") 125 | expectedStr := strings.Join(expected, ",\n") 126 | if gotStr != expectedStr { 127 | panic(fmt.Sprintf("got %s, expected %s", gotStr, expectedStr)) 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /advisor/utils.go: -------------------------------------------------------------------------------- 1 | package advisor 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | "strings" 7 | "sync" 8 | "sync/atomic" 9 | 10 | "github.com/qw4990/index_advisor/optimizer" 11 | "github.com/qw4990/index_advisor/utils" 12 | ) 13 | 14 | func evaluateIndexConfCostConcurrently(info utils.WorkloadInfo, optimizers []optimizer.WhatIfOptimizer, 15 | indexes []utils.Set[utils.Index]) (bestSet utils.Set[utils.Index], bestCost utils.IndexConfCost, err error) { 16 | bestSet = utils.NewSet[utils.Index]() 17 | errPointer := new(atomic.Pointer[error]) 18 | costs := make([]utils.IndexConfCost, len(indexes)) 19 | var wg sync.WaitGroup 20 | for id := 0; id < len(optimizers); id++ { 21 | wg.Add(1) 22 | go func(id int) { 23 | defer wg.Done() 24 | for i := id; i < len(indexes); i += len(optimizers) { 25 | cost, err := evaluateIndexConfCost(info, optimizers[id], indexes[i]) 26 | if err != nil { 27 | errPointer.CompareAndSwap(nil, &err) 28 | return 29 | } 30 | if errPointer.Load() != nil { 31 | return 32 | } 33 | costs[i] = cost 34 | } 35 | }(id) 36 | } 37 | wg.Wait() 38 | if errPointer.Load() != nil { 39 | return nil, bestCost, *errPointer.Load() 40 | } 41 | 42 | for i := 0; i < len(indexes); i++ { 43 | if costs[i].Less(bestCost) { 44 | bestSet = indexes[i] 45 | bestCost = costs[i] 46 | } 47 | } 48 | return bestSet, bestCost, nil 49 | } 50 | 51 | // evaluateIndexConfCost evaluates the workload cost under the given indexes. 52 | func evaluateIndexConfCost(info utils.WorkloadInfo, optimizer optimizer.WhatIfOptimizer, indexes utils.Set[utils.Index]) (utils.IndexConfCost, error) { 53 | for _, index := range indexes.ToList() { 54 | if err := optimizer.CreateHypoIndex(index); err != nil { 55 | return utils.IndexConfCost{}, err 56 | } 57 | } 58 | var workloadCost float64 59 | for _, sql := range info.Queries.ToList() { // TODO: run them concurrently to save time 60 | p, err := optimizer.ExplainQ(sql) 61 | if err != nil { 62 | return utils.IndexConfCost{}, err 63 | } 64 | workloadCost += p.PlanCost() * float64(sql.Frequency) 65 | } 66 | for _, index := range indexes.ToList() { 67 | if err := optimizer.DropHypoIndex(index); err != nil { 68 | return utils.IndexConfCost{}, err 69 | } 70 | } 71 | var totCols int 72 | var keys []string 73 | for _, index := range indexes.ToList() { 74 | totCols += len(index.Columns) 75 | keys = append(keys, index.Key()) 76 | } 77 | sort.Strings(keys) 78 | 79 | return utils.IndexConfCost{workloadCost, totCols, strings.Join(keys, ",")}, nil 80 | } 81 | 82 | var indexID atomic.Int64 83 | 84 | // tempIndexName returns a temp index name for the given columns. 85 | func tempIndexName(cols ...utils.Column) string { 86 | var names []string 87 | for _, col := range cols { 88 | names = append(names, col.ColumnName) 89 | } 90 | idxName := fmt.Sprintf("idx_%v", strings.Join(names, "_")) 91 | if len(idxName) <= 64 { 92 | return idxName 93 | } 94 | return fmt.Sprintf("idx_%v", indexID.Add(1)) 95 | } 96 | 97 | func checkWorkloadInfo(w utils.WorkloadInfo) { 98 | for _, col := range w.IndexableColumns.ToList() { 99 | if col.SchemaName == "" || col.TableName == "" || col.ColumnName == "" { 100 | panic(fmt.Sprintf("invalid indexable column: %v", col)) 101 | } 102 | } 103 | for _, sql := range w.Queries.ToList() { 104 | if sql.Text == "" { 105 | panic(fmt.Sprintf("invalid sql: %v", sql)) 106 | } 107 | for _, col := range sql.IndexableColumns.ToList() { 108 | if col.SchemaName == "" || col.TableName == "" || col.ColumnName == "" { 109 | panic(fmt.Sprintf("invalid indexable column: %v", col)) 110 | } 111 | } 112 | } 113 | for _, tbl := range w.TableSchemas.ToList() { 114 | if tbl.SchemaName == "" || tbl.TableName == "" { 115 | panic(fmt.Sprintf("invalid table schema: %v", tbl)) 116 | } 117 | for _, col := range tbl.Columns { 118 | if col.SchemaName == "" || col.TableName == "" || col.ColumnName == "" || col.ColumnType == nil { 119 | panic(fmt.Sprintf("invalid indexable column: %v", col)) 120 | } 121 | } 122 | for _, idx := range tbl.Indexes { 123 | if idx.SchemaName == "" || idx.TableName == "" || idx.IndexName == "" { 124 | panic(fmt.Sprintf("invalid index: %v", idx)) 125 | } 126 | for _, col := range idx.Columns { 127 | if col.SchemaName == "" || col.TableName == "" || col.ColumnName == "" { 128 | panic(fmt.Sprintf("invalid indexable column: %v", col)) 129 | } 130 | } 131 | } 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /examples/tpch_example1/output/q13.txt: -------------------------------------------------------------------------------- 1 | Alias: q13 2 | Query: 3 | -- $ID$ 4 | -- TPC-H/TPC-R Customer Distribution Query (Q13) 5 | -- Functional Query Definition 6 | -- Approved February 1998 7 | 8 | 9 | select 10 | c_count, 11 | count(*) as custdist 12 | from 13 | ( 14 | select 15 | c_custkey, 16 | count(o_orderkey) as c_count 17 | from 18 | customer left outer join orders on 19 | c_custkey = o_custkey 20 | and o_comment not like '%special%packages%' 21 | group by 22 | c_custkey 23 | ) as c_orders 24 | group by 25 | c_count 26 | order by 27 | custdist desc, 28 | c_count desc; 29 | 30 | Original Cost: 2.84E+08 31 | Optimized Cost: 2.84E+08 32 | Cost Reduction Ratio: 1.00 33 | 34 | 35 | ===================== original plan ===================== 36 | Sort_10 149568.00 283698218.67 root Column#19:desc, Column#18:desc 37 | └─Projection_12 149568.00 154919713.48 root Column#18, Column#19 38 | └─HashAgg_13 149568.00 154889859.70 root group by:Column#18, funcs:count(1)->Column#19, funcs:firstrow(Column#18)->Column#18 39 | └─HashAgg_14 149568.00 145821580.46 root group by:tpch.customer.c_custkey, funcs:count(tpch.orders.o_orderkey)->Column#18 40 | └─HashJoin_17 1203465.98 106634967.01 root left outer join, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)] 41 | ├─TableReader_19(Build) 150000.00 3942218.19 root data:TableFullScan_18 42 | │ └─TableFullScan_18 150000.00 49629272.82 cop[tikv] table:customer keep order:false 43 | └─TableReader_22(Probe) 1200000.00 63289251.82 root data:Selection_21 44 | └─Selection_21 1200000.00 549553017.35 cop[tikv] not(like(tpch.orders.o_comment, "%special%packages%", 92)) 45 | └─TableFullScan_20 1500000.00 474703017.35 cop[tikv] table:orders keep order:false 46 | 47 | ===================== optimized plan ===================== 48 | Sort_10 149568.00 283698218.67 root Column#19:desc, Column#18:desc 49 | └─Projection_12 149568.00 154919713.48 root Column#18, Column#19 50 | └─HashAgg_13 149568.00 154889859.70 root group by:Column#18, funcs:count(1)->Column#19, funcs:firstrow(Column#18)->Column#18 51 | └─HashAgg_14 149568.00 145821580.46 root group by:tpch.customer.c_custkey, funcs:count(tpch.orders.o_orderkey)->Column#18 52 | └─HashJoin_31 1203465.98 106634967.01 root left outer join, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)] 53 | ├─TableReader_39(Build) 150000.00 3942218.19 root data:TableFullScan_38 54 | │ └─TableFullScan_38 150000.00 49629272.82 cop[tikv] table:customer keep order:false 55 | └─TableReader_42(Probe) 1200000.00 63289251.82 root data:Selection_41 56 | └─Selection_41 1200000.00 549553017.35 cop[tikv] not(like(tpch.orders.o_comment, "%special%packages%", 92)) 57 | └─TableFullScan_40 1500000.00 474703017.35 cop[tikv] table:orders keep order:false --------------------------------------------------------------------------------