├── src └── execute.cpp ├── engine ├── Config.hpp ├── pipeline │ ├── JoinPipeline.cpp │ ├── PipelineFunction.cpp │ ├── PipelineGen.hpp │ └── PipelineFunction.hpp ├── op │ ├── OpBase.cpp │ ├── ScanBase.cpp │ ├── OpBase.hpp │ ├── TargetBase.hpp │ ├── ScanBase.hpp │ ├── CollectorTarget.hpp │ └── CollectorTarget.cpp ├── infra │ ├── helper │ │ ├── Misc.hpp │ │ ├── Span.hpp │ │ └── BitOps.hpp │ ├── PageMemory.hpp │ ├── JoinFilter.hpp │ ├── Random.hpp │ ├── UnionFind.hpp │ ├── Mmap.hpp │ ├── AdressSanitizer.hpp │ ├── Mmap.cpp │ ├── Scheduler.hpp │ ├── QueryMemory.hpp │ └── Util.hpp ├── Execute.hpp ├── tools │ ├── DuckDB.hpp │ ├── SQL.hpp │ ├── JoinPipelineLoader.hpp │ ├── ParsedSQL.hpp │ ├── DuckDB.cpp │ └── Setting.hpp ├── query │ ├── Restriction.cpp │ ├── RuntimeValue.hpp │ ├── PlanImport.hpp │ ├── Restriction.hpp │ ├── QueryGraph.hpp │ ├── DPccp.hpp │ └── QueryPlan.hpp ├── storage │ ├── CopyLogic.hpp │ ├── RestrictionLogic.hpp │ └── BitLogic.hpp ├── Execute.cpp ├── test │ └── unit_tests3.cpp └── genpipelines.py ├── .clangd ├── listincludes.sh ├── job ├── 3b.sql ├── 2a.sql ├── 2b.sql ├── 2c.sql ├── 2d.sql ├── 3a.sql ├── 32a.sql ├── 3c.sql ├── 32b.sql ├── 4a.sql ├── 4b.sql ├── 4c.sql ├── 6a.sql ├── 6c.sql ├── 6e.sql ├── 8c.sql ├── 5b.sql ├── 6f.sql ├── 17f.sql ├── 17d.sql ├── 17e.sql ├── 8d.sql ├── 10c.sql ├── 17b.sql ├── 17c.sql ├── 6b.sql ├── 6d.sql ├── 1d.sql ├── 5a.sql ├── 5c.sql ├── 10b.sql ├── 17a.sql ├── 1b.sql ├── 10a.sql ├── 1a.sql ├── 1c.sql ├── 16b.sql ├── 16c.sql ├── 8a.sql ├── 16a.sql ├── 16d.sql ├── 18a.sql ├── 9d.sql ├── 7b.sql ├── 13d.sql ├── 13a.sql ├── 18c.sql ├── 9c.sql ├── 11d.sql ├── 15d.sql ├── 7a.sql ├── 11a.sql ├── 11b.sql ├── 8b.sql ├── 9b.sql ├── 12a.sql ├── 12b.sql ├── 12c.sql ├── 18b.sql ├── 13b.sql ├── 13c.sql ├── 9a.sql ├── 11c.sql ├── 14a.sql ├── 15a.sql ├── 19d.sql ├── 15c.sql ├── 7c.sql ├── 14c.sql ├── 15b.sql ├── 14b.sql ├── 20a.sql ├── 20c.sql ├── 21b.sql ├── 20b.sql ├── 19c.sql ├── 25a.sql ├── 21a.sql ├── 23a.sql ├── 19b.sql ├── 21c.sql ├── 23b.sql ├── 25b.sql ├── 25c.sql ├── 23c.sql ├── 19a.sql ├── 26b.sql ├── 22a.sql ├── 22b.sql ├── 22d.sql ├── 26c.sql ├── 22c.sql ├── 26a.sql ├── 24a.sql ├── 31a.sql ├── 31c.sql ├── 27b.sql ├── 27a.sql ├── 33b.sql ├── 24b.sql ├── 27c.sql ├── 33a.sql ├── 30c.sql ├── 30a.sql ├── 33c.sql ├── 31b.sql ├── 30b.sql ├── 28b.sql ├── 28c.sql ├── 28a.sql ├── fkindexes.sql ├── 29b.sql ├── 29c.sql ├── 29a.sql └── README ├── .gitignore ├── checknolib.sh ├── checkglobals.sh ├── duckdbrunner.py ├── download_imdb.sh ├── ANNOUNCEMENTS.md ├── include ├── csv_parser.h ├── table_entity.h ├── hardware__ca09.h ├── hardware__sidon.h ├── hardware__koroneia.h ├── hardware__cp02.h ├── attribute.h ├── table.h └── common.h └── tests └── build_database.cpp /src/execute.cpp: -------------------------------------------------------------------------------- 1 | 2 | // Intentionally left blank -------------------------------------------------------------------------------- /engine/Config.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | namespace engine::config { 3 | constexpr bool handleMultiplicity = true; 4 | } -------------------------------------------------------------------------------- /.clangd: -------------------------------------------------------------------------------- 1 | CompileFlags: 2 | Remove: 3 | - -fmodules-ts 4 | - -fmodule-mapper=* 5 | - -fdeps-format=p1689r5 -------------------------------------------------------------------------------- /listincludes.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # check for files containing fmt 4 | find engine/ -type f ! -path "*/all.cpp" ! -path "*/tools/*" ! -path "*/test/*" -exec grep "#include" {} \; | sort | uniq 5 | -------------------------------------------------------------------------------- /job/3b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword like '%sequel%' AND mi.info IN ('Bulgaria') AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /job/2a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[de]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/2b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[nl]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/2c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[sm]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/2d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/3a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword like '%sequel%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year > 2005 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /job/32a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(lt.link) AS link_type, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM keyword AS k, link_type AS lt, movie_keyword AS mk, movie_link AS ml, title AS t1, title AS t2 WHERE k.keyword ='10,000-mile-club' AND mk.keyword_id = k.id AND t1.id = mk.movie_id AND ml.movie_id = t1.id AND ml.linked_movie_id = t2.id AND lt.id = ml.link_type_id AND mk.movie_id = t1.id; 2 | -------------------------------------------------------------------------------- /job/3c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword like '%sequel%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND t.production_year > 1990 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /job/32b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(lt.link) AS link_type, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM keyword AS k, link_type AS lt, movie_keyword AS mk, movie_link AS ml, title AS t1, title AS t2 WHERE k.keyword ='character-name-in-title' AND mk.keyword_id = k.id AND t1.id = mk.movie_id AND ml.movie_id = t1.id AND ml.linked_movie_id = t2.id AND lt.id = ml.link_type_id AND mk.movie_id = t1.id; 2 | -------------------------------------------------------------------------------- /job/4a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword like '%sequel%' AND mi_idx.info > '5.0' AND t.production_year > 2005 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/4b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword like '%sequel%' AND mi_idx.info > '9.0' AND t.production_year > 2010 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/4c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword like '%sequel%' AND mi_idx.info > '2.0' AND t.production_year > 1990 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/6a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2010 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/6c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2014 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/6e.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /engine/pipeline/JoinPipeline.cpp: -------------------------------------------------------------------------------- 1 | #include "pipeline/JoinPipeline.hpp" 2 | #include "infra/Scheduler.hpp" 3 | //--------------------------------------------------------------------------- 4 | namespace engine { 5 | //--------------------------------------------------------------------------- 6 | //--------------------------------------------------------------------------- 7 | } 8 | //--------------------------------------------------------------------------- 9 | -------------------------------------------------------------------------------- /job/8c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(a1.name) AS writer_pseudo_name, MIN(t.title) AS movie_title FROM aka_name AS a1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE cn.country_code ='[us]' AND rt.role ='writer' AND a1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND a1.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/5b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS american_vhs_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind = 'production companies' AND mc.note like '%(VHS)%' and mc.note like '%(USA)%' and mc.note like '%(1994)%' AND mi.info IN ('USA', 'America') AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id; 2 | -------------------------------------------------------------------------------- /job/6f.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /engine/op/OpBase.cpp: -------------------------------------------------------------------------------- 1 | #include "op/OpBase.hpp" 2 | //--------------------------------------------------------------------------- 3 | namespace engine { 4 | //--------------------------------------------------------------------------- 5 | std::string OpBase::getPretty() const { 6 | return {}; 7 | } 8 | //--------------------------------------------------------------------------- 9 | } 10 | //--------------------------------------------------------------------------- -------------------------------------------------------------------------------- /job/17f.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE '%B%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /engine/infra/helper/Misc.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | namespace engine { 4 | //--------------------------------------------------------------------------- 5 | template 6 | struct type_identity { 7 | using type = T; 8 | }; 9 | //--------------------------------------------------------------------------- 10 | } 11 | //--------------------------------------------------------------------------- -------------------------------------------------------------------------------- /job/17d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE '%Bert%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/17e.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/8d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an1.name) AS costume_designer_pseudo, MIN(t.title) AS movie_with_costumes FROM aka_name AS an1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE cn.country_code ='[us]' AND rt.role ='costume designer' AND an1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an1.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/10c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character, MIN(t.title) AS movie_with_american_producer FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note like '%(producer)%' AND cn.country_code = '[us]' AND t.production_year > 1990 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /job/17b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE 'Z%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/17c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE 'X%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/6b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2014 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/6d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.vscode 2 | /build 3 | /imdb 4 | /output*.txt 5 | /*.tgz 6 | /*.db 7 | /TPC-H* 8 | /job-sample 9 | /.cache 10 | BENCHMARK_RUNTIME.txt 11 | perf.data* 12 | perfetto.trace 13 | /.cache.db 14 | /.cache.db.zst 15 | /cmake-* 16 | /.idea 17 | /record.csv 18 | /results/ 19 | /xray.trace 20 | /build_xray 21 | /build_perfetto 22 | /queries 23 | /querygen.log 24 | /errors.log 25 | /.venv 26 | /.cache.db* 27 | scaling_bench.json 28 | scaling_bench.pdf 29 | /schema-domain.json 30 | /schema-domain.json.gz -------------------------------------------------------------------------------- /job/1d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'bottom 10 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' AND t.production_year >2000 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/5a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS typical_european_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind = 'production companies' AND mc.note like '%(theatrical)%' and mc.note like '%(France)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year > 2005 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id; 2 | -------------------------------------------------------------------------------- /job/5c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS american_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind = 'production companies' AND mc.note not like '%(TV)%' and mc.note like '%(USA)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND t.production_year > 1990 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id; 2 | -------------------------------------------------------------------------------- /job/10b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character, MIN(t.title) AS russian_mov_with_actor_producer FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note like '%(producer)%' AND cn.country_code = '[ru]' AND rt.role = 'actor' AND t.production_year > 2010 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /engine/op/ScanBase.cpp: -------------------------------------------------------------------------------- 1 | #include "op/ScanBase.hpp" 2 | #include "infra/Scheduler.hpp" 3 | //--------------------------------------------------------------------------- 4 | namespace engine { 5 | //--------------------------------------------------------------------------- 6 | size_t ScanBase::concurrency() const { 7 | return Scheduler::concurrency(); 8 | } 9 | //--------------------------------------------------------------------------- 10 | } 11 | //--------------------------------------------------------------------------- -------------------------------------------------------------------------------- /job/17a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_american_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND n.name LIKE 'B%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/1b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'bottom 10 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' AND t.production_year between 2005 and 2010 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/10a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS uncredited_voiced_character, MIN(t.title) AS russian_movie FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note like '%(voice)%' and ci.note like '%(uncredited)%' AND cn.country_code = '[ru]' AND rt.role = 'actor' AND t.production_year > 2005 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /job/1a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'top 250 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' and (mc.note like '%(co-production)%' or mc.note like '%(presents)%') AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/1c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'top 250 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' and (mc.note like '%(co-production)%') AND t.production_year >2010 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/16b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /engine/op/OpBase.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include 4 | #include 5 | //--------------------------------------------------------------------------- 6 | namespace engine { 7 | //--------------------------------------------------------------------------- 8 | /// Base class for all ops 9 | class OpBase { 10 | public: 11 | /// Get the pretty name 12 | virtual std::string getPretty() const; 13 | }; 14 | //--------------------------------------------------------------------------- 15 | } 16 | -------------------------------------------------------------------------------- /job/16c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/8a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an1.name) AS actress_pseudonym, MIN(t.title) AS japanese_movie_dubbed FROM aka_name AS an1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE ci.note ='(voice: English version)' AND cn.country_code ='[jp]' AND mc.note like '%(Japan)%' and mc.note not like '%(USA)%' AND n1.name like '%Yo%' and n1.name not like '%Yu%' AND rt.role ='actress' AND an1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an1.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/16a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr >= 50 AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/16d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr >= 5 AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/18a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note in ('(producer)', '(executive producer)') AND it1.info = 'budget' AND it2.info = 'votes' AND n.gender = 'm' and n.name like '%Tim%' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /engine/Execute.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include 4 | //--------------------------------------------------------------------------- 5 | namespace engine { 6 | //--------------------------------------------------------------------------- 7 | class QueryPlan; 8 | //--------------------------------------------------------------------------- 9 | ColumnarTable execute(QueryPlan plan, [[maybe_unused]] void* context); 10 | //--------------------------------------------------------------------------- 11 | } 12 | //--------------------------------------------------------------------------- -------------------------------------------------------------------------------- /job/9d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND n.gender ='f' AND rt.role ='actress' AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/7b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS of_person, MIN(t.title) AS biography_movie FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name LIKE '%a%' AND it.info ='mini biography' AND lt.link ='features' AND n.name_pcode_cf LIKE 'D%' AND n.gender='m' AND pi.note ='Volker Boehm' AND t.production_year BETWEEN 1980 AND 1984 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id; 2 | -------------------------------------------------------------------------------- /job/13d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/13a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(miidx.info) AS rating, MIN(t.title) AS german_movie FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[de]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/18c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/9c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_character_name, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /engine/tools/DuckDB.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include 4 | #include 5 | //--------------------------------------------------------------------------- 6 | namespace engine { 7 | //--------------------------------------------------------------------------- 8 | class DuckDB { 9 | struct Impl; 10 | std::unique_ptr impl; 11 | 12 | public: 13 | DuckDB(); 14 | ~DuckDB() noexcept; 15 | 16 | ColumnarTable execute(std::string query); 17 | }; 18 | //--------------------------------------------------------------------------- 19 | } 20 | //--------------------------------------------------------------------------- -------------------------------------------------------------------------------- /job/11d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(mc.note) AS production_note, MIN(t.title) AS movie_based_on_book FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND ct.kind != 'production companies' and ct.kind is not NULL AND k.keyword in ('sequel', 'revenge', 'based-on-novel') AND mc.note is not NULL AND t.production_year > 1950 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/15d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(at.title) AS aka_title, MIN(t.title) AS internet_movie_title FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' AND it1.info = 'release dates' AND mi.note like '%internet%' AND t.production_year > 1990 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /job/7a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS of_person, MIN(t.title) AS biography_movie FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name LIKE '%a%' AND it.info ='mini biography' AND lt.link ='features' AND n.name_pcode_cf BETWEEN 'A' AND 'F' AND (n.gender='m' OR (n.gender = 'f' AND n.name LIKE 'B%')) AND pi.note ='Volker Boehm' AND t.production_year BETWEEN 1980 AND 1995 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id; 2 | -------------------------------------------------------------------------------- /job/11a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(lt.link) AS movie_link_type, MIN(t.title) AS non_polish_sequel_movie FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/11b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(lt.link) AS movie_link_type, MIN(t.title) AS sequel_movie FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follows%' AND mc.note IS NULL AND t.production_year = 1998 and t.title like '%Money%' AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/8b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS acress_pseudonym, MIN(t.title) AS japanese_anime_movie FROM aka_name AS an, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note ='(voice: English version)' AND cn.country_code ='[jp]' AND mc.note like '%(Japan)%' and mc.note not like '%(USA)%' and (mc.note like '%(2006)%' or mc.note like '%(2007)%') AND n.name like '%Yo%' and n.name not like '%Yu%' AND rt.role ='actress' AND t.production_year between 2006 and 2007 and (t.title like 'One Piece%' or t.title like 'Dragon Ball Z%') AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/9b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_character, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note = '(voice)' AND cn.country_code ='[us]' AND mc.note like '%(200%)%' and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND n.gender ='f' and n.name like '%Angel%' AND rt.role ='actress' AND t.production_year between 2007 and 2010 AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/12a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS drama_horror_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code = '[us]' AND ct.kind = 'production companies' AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info in ('Drama', 'Horror') AND mi_idx.info > '8.0' AND t.production_year between 2005 and 2008 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id; 2 | -------------------------------------------------------------------------------- /job/12b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS budget, MIN(t.title) AS unsuccsessful_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code ='[us]' AND ct.kind is not NULL and (ct.kind ='production companies' or ct.kind = 'distributors') AND it1.info ='budget' AND it2.info ='bottom 10 rank' AND t.production_year >2000 AND (t.title LIKE 'Birdemic%' OR t.title LIKE '%Movie%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id; 2 | -------------------------------------------------------------------------------- /job/12c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS mainstream_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code = '[us]' AND ct.kind = 'production companies' AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info in ('Drama', 'Horror', 'Western', 'Family') AND mi_idx.info > '7.0' AND t.production_year between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id; 2 | -------------------------------------------------------------------------------- /job/18b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info in ('Horror', 'Thriller') and mi.note is NULL AND mi_idx.info > '8.0' AND n.gender is not null and n.gender = 'f' AND t.production_year between 2008 and 2014 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/13b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie_about_winning FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND t.title != '' AND (t.title LIKE '%Champion%' OR t.title LIKE '%Loser%') AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/13c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie_about_winning FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND t.title != '' AND (t.title LIKE 'Champion%' OR t.title LIKE 'Loser%') AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/9a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS character_name, MIN(t.title) AS movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND mc.note is not NULL and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND n.gender ='f' and n.name like '%Ang%' AND rt.role ='actress' AND t.production_year between 2005 and 2015 AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/11c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(mc.note) AS production_note, MIN(t.title) AS movie_based_on_book FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' and (cn.name like '20th Century Fox%' or cn.name like 'Twentieth Century Fox%') AND ct.kind != 'production companies' and ct.kind is not NULL AND k.keyword in ('sequel', 'revenge', 'based-on-novel') AND mc.note is not NULL AND t.production_year > 1950 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /checknolib.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # check for files containing fmt 4 | find engine/ -type f ! -path "*/all.cpp" ! -path "*/tools/*" ! -path "*/test/*" -exec grep "fmt" {} \; 5 | 6 | # check for files containing ranges 7 | find engine/ -type f ! -path "*/all.cpp" ! -path "*/tools/*" ! -path "*/test/*" -exec grep "ranges" {} \; 8 | 9 | # check for files containing Setting 10 | find engine/ -type f ! -path "*/all.cpp" ! -path "*/tools/*" ! -path "*/test/*" -exec grep "Setting" {} \; 11 | 12 | # check for files containing PerfEvent 13 | find engine/ -type f ! -path "*/all.cpp" ! -path "*/tools/*" ! -path "*/test/*" -exec grep "PerfEvent" {} \; 14 | 15 | # check for files containing Perfetto 16 | find engine/ -type f ! -path "*/all.cpp" ! -path "*/tools/*" ! -path "*/test/*" -exec grep "Perfetto" {} \; 17 | -------------------------------------------------------------------------------- /job/14a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS northern_dark_movie FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind = 'movie' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2010 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/15a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS internet_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' AND it1.info = 'release dates' AND mc.note like '%(200%)%' and mc.note like '%(worldwide)%' AND mi.note like '%internet%' AND mi.info like 'USA:% 200%' AND t.production_year > 2000 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /job/19d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS jap_engl_voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND n.gender ='f' AND rt.role ='actress' AND t.production_year > 2000 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /engine/query/Restriction.cpp: -------------------------------------------------------------------------------- 1 | #include "query/Restriction.hpp" 2 | #include "op/Hashtable.hpp" 3 | //--------------------------------------------------------------------------- 4 | namespace engine { 5 | //--------------------------------------------------------------------------- 6 | bool Restriction::operator()(uint64_t val) const noexcept { 7 | switch (type) { 8 | case Eq: 9 | assert(cst.value != nullValue); 10 | return val == cst.value; 11 | case NotNull: return val != nullValue; 12 | case Join: return (val != nullValue) && joinFilter->joinFilter(val); 13 | case JoinPrecise: return (val != nullValue) && joinFilter->joinFilterPrecise(val); 14 | } 15 | __builtin_unreachable(); 16 | } 17 | //--------------------------------------------------------------------------- 18 | } -------------------------------------------------------------------------------- /job/15c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS modern_american_internet_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' AND it1.info = 'release dates' AND mi.note like '%internet%' AND mi.info is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year > 1990 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /engine/pipeline/PipelineFunction.cpp: -------------------------------------------------------------------------------- 1 | #include "pipeline/PipelineFunction.hpp" 2 | #include 3 | #include 4 | //--------------------------------------------------------------------------- 5 | namespace engine { 6 | //--------------------------------------------------------------------------- 7 | PipelineFunction PipelineFunctions::lookupPipeline(std::string_view name) { 8 | // Binary search in functions 9 | auto it = std::lower_bound(functions, functions + numFunctions, name, [](const auto& f, std::string_view name) { 10 | return f.first < name; 11 | }); 12 | if (it == functions + numFunctions || it->first != name) 13 | throw std::runtime_error("Pipeline not found"); 14 | return it->second; 15 | } 16 | //--------------------------------------------------------------------------- 17 | } 18 | -------------------------------------------------------------------------------- /checkglobals.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -ne 1 ]; then 4 | echo "Usage: $0 " 5 | exit 1 6 | fi 7 | 8 | BINARY=$1 9 | 10 | echo "Extracting global constructors from: $BINARY" 11 | 12 | # Get the addresses from .init_array (two addresses per line) 13 | # Get the addresses from .init_array 14 | ADDRESSES=$(objdump -s -j .init_array "$BINARY" | awk 'NR>3 {print $2 $3; print $4 $5}' | sed 's/\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)/0x\8\7\6\5\4\3\2\1/') 15 | if [ -z "$ADDRESSES" ]; then 16 | echo "No global constructors found." 17 | exit 1 18 | fi 19 | 20 | echo $ADDRESSES 21 | 22 | echo "Found constructor addresses:" 23 | for ADDR in $ADDRESSES; do 24 | SYMBOL=$(objdump -S --start-address=$ADDR "$BINARY" 2>/dev/null | head -n 10 | grep -Eo "<[^>]+>" | head -n 1) 25 | echo "$ADDR -> ${SYMBOL:-}" 26 | done 27 | -------------------------------------------------------------------------------- /job/7c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS cast_member_name, MIN(pi.info) AS cast_member_info FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name is not NULL and (an.name LIKE '%a%' or an.name LIKE 'A%') AND it.info ='mini biography' AND lt.link in ('references', 'referenced in', 'features', 'featured in') AND n.name_pcode_cf BETWEEN 'A' AND 'F' AND (n.gender='m' OR (n.gender = 'f' AND n.name LIKE 'A%')) AND pi.note is not NULL AND t.production_year BETWEEN 1980 AND 2010 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id; 2 | -------------------------------------------------------------------------------- /job/14c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS north_european_dark_production FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info = 'countries' AND it2.info = 'rating' AND k.keyword is not null and k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/15b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS youtube_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' and cn.name = 'YouTube' AND it1.info = 'release dates' AND mc.note like '%(200%)%' and mc.note like '%(worldwide)%' AND mi.note like '%internet%' AND mi.info like 'USA:% 200%' AND t.production_year between 2005 and 2010 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /job/14b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS western_dark_production FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title') AND kt.kind = 'movie' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info > '6.0' AND t.production_year > 2010 and (t.title like '%murder%' or t.title like '%Murder%' or t.title like '%Mord%') AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/20a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS complete_downey_ironman_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name not like '%Sherlock%' and (chn.name like '%Tony%Stark%' or chn.name like '%Iron%Man%') AND k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND kt.kind = 'movie' AND t.production_year > 1950 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /duckdbrunner.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | import sys 3 | 4 | with duckdb.connect(database='imdb2.db', read_only=True, config={'access_mode': 'read_only'}) as conn: 5 | conn.execute("set memory_limit='10GB';") 6 | conn.execute("set temp_directory='';") 7 | while sys.stdin: 8 | query = sys.stdin.readline().strip() 9 | print(f"Received query: {query}", file=sys.stderr) 10 | if not query: 11 | break 12 | try: 13 | result = conn.execute(query).fetchall() 14 | print(f"Executed query {query} with result {result}", file=sys.stderr) 15 | print(result[0][0]) 16 | sys.stdout.flush() 17 | except Exception as e: 18 | print(f"Executed query {query} with error: {e}", file=sys.stderr) 19 | print(-1) 20 | sys.stdout.flush() 21 | continue 22 | 23 | sys.stdin.close() -------------------------------------------------------------------------------- /job/20c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS cast_member, MIN(t.title) AS complete_dynamic_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind = 'movie' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/21b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS german_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Germany', 'German') AND t.production_year BETWEEN 2000 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id; 2 | -------------------------------------------------------------------------------- /job/20b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS complete_downey_ironman_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name not like '%Sherlock%' and (chn.name like '%Tony%Stark%' or chn.name like '%Iron%Man%') AND k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND kt.kind = 'movie' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/19c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS jap_engl_voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year > 2000 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /job/25a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'blood', 'gore', 'death', 'female-nudity') AND mi.info = 'Horror' AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /job/21a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS western_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id; 2 | -------------------------------------------------------------------------------- /job/23a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_us_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind = 'complete+verified' AND cn.country_code = '[us]' AND it1.info = 'release dates' AND kt.kind in ('movie') AND mi.note like '%internet%' AND mi.info is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/19b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS kung_fu_panda FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note = '(voice)' AND cn.country_code ='[us]' AND it.info = 'release dates' AND mc.note like '%(200%)%' and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND mi.info is not null and (mi.info like 'Japan:%2007%' or mi.info like 'USA:%2008%') AND n.gender ='f' and n.name like '%Angel%' AND rt.role ='actress' AND t.production_year between 2007 and 2008 and t.title like '%Kung%Fu%Panda%' AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /job/21c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS western_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'English') AND t.production_year BETWEEN 1950 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id; 2 | -------------------------------------------------------------------------------- /job/23b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_nerdy_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind = 'complete+verified' AND cn.country_code = '[us]' AND it1.info = 'release dates' AND k.keyword in ('nerd', 'loner', 'alienation', 'dignity') AND kt.kind in ('movie') AND mi.note like '%internet%' AND mi.info like 'USA:% 200%' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/25b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'blood', 'gore', 'death', 'female-nudity') AND mi.info = 'Horror' AND n.gender = 'm' AND t.production_year > 2010 AND t.title like 'Vampire%' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /engine/infra/PageMemory.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | //--------------------------------------------------------------------------- 5 | namespace engine::pagememory { 6 | //--------------------------------------------------------------------------- 7 | /// Setup the page memory 8 | void setup(); 9 | //--------------------------------------------------------------------------- 10 | /// Prefault the page memory 11 | bool prefault(); 12 | //--------------------------------------------------------------------------- 13 | /// Start a new query 14 | void start_query(); 15 | //--------------------------------------------------------------------------- 16 | /// Allocate a page 17 | void* allocate(); 18 | //--------------------------------------------------------------------------- 19 | struct AllocationStealer { 20 | AllocationStealer() noexcept; 21 | ~AllocationStealer() noexcept; 22 | }; 23 | //--------------------------------------------------------------------------- 24 | } 25 | -------------------------------------------------------------------------------- /job/25c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /download_imdb.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | url="https://event.cwi.nl/da/job/imdb.tgz" 4 | output_file="imdb.tgz" 5 | target_dir="imdb" 6 | 7 | # Detect and select downloader 8 | if command -v wget &> /dev/null; then 9 | if ! wget "$url" -O "$output_file"; then 10 | echo "Error: downloading failed" >&2 11 | exit 1 12 | fi 13 | elif command -v curl &> /dev/null; then 14 | if ! curl -L "$url" -o "$output_file"; then 15 | echo "Error: downloading failed" >&2 16 | exit 1 17 | fi 18 | else 19 | echo "Error: please install wget or curl to download imdb.tgz" >&2 20 | exit 1 21 | fi 22 | 23 | # make target directory (if not exists) 24 | if ! mkdir -p "$target_dir"; then 25 | echo "Error: cannot make directory '$target_dir'" >&2 26 | exit 1 27 | fi 28 | 29 | # decompress the file to the target directory 30 | if ! tar -xf "$output_file" -C "$target_dir"; then 31 | echo "Error: failed to decompress the file" >&2 32 | exit 1 33 | fi 34 | 35 | echo "Success!" 36 | -------------------------------------------------------------------------------- /job/23c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_us_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind = 'complete+verified' AND cn.country_code = '[us]' AND it1.info = 'release dates' AND kt.kind in ('movie', 'tv movie', 'video movie', 'video game') AND mi.note like '%internet%' AND mi.info is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year > 1990 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/19a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND mc.note is not NULL and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%Ang%' AND rt.role ='actress' AND t.production_year between 2005 and 2009 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /job/26b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info = 'rating' AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'fight') AND kt.kind = 'movie' AND mi_idx.info > '8.0' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/22a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Germany', 'German', 'USA', 'American') AND mi_idx.info < '7.0' AND t.production_year > 2008 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /job/22b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Germany', 'German', 'USA', 'American') AND mi_idx.info < '7.0' AND t.production_year > 2009 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /job/22d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /job/26c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info = 'rating' AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind = 'movie' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /engine/query/RuntimeValue.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include 4 | #include 5 | #include 6 | //--------------------------------------------------------------------------- 7 | namespace engine { 8 | //--------------------------------------------------------------------------- 9 | struct RuntimeValue { 10 | static constexpr uint64_t nullValue = std::numeric_limits::max(); 11 | DataType type; 12 | uint64_t value; 13 | 14 | static RuntimeValue from(DataType type, uint64_t value) { 15 | return {type, value}; 16 | } 17 | 18 | /// Is null? 19 | constexpr bool isNull() const { 20 | return value == nullValue; 21 | } 22 | /// Compare 23 | bool operator==(const RuntimeValue& other) const { 24 | return type == other.type && value == other.value; 25 | } 26 | /// Compare 27 | bool operator!=(const RuntimeValue& other) const { 28 | return !operator==(other); 29 | } 30 | }; 31 | //--------------------------------------------------------------------------- 32 | } 33 | -------------------------------------------------------------------------------- /engine/infra/JoinFilter.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include 4 | //--------------------------------------------------------------------------- 5 | namespace engine { 6 | //--------------------------------------------------------------------------- 7 | struct JoinFilter { 8 | /// Lut for popcount == 4 9 | alignas(4096) static const uint16_t bloomMasks[2048]; 10 | 11 | template 12 | [[gnu::always_inline]] static inline uint16_t getMask(T hash) { 13 | return bloomMasks[hash >> (sizeof(hash) * 8 - 11)]; 14 | } 15 | 16 | [[gnu::always_inline]] static inline bool checkMaskWithEntry(uint16_t mask, uint16_t entry) { 17 | return !(~entry & mask); 18 | } 19 | 20 | template 21 | [[gnu::always_inline]] static inline bool checkEntry(T hash, uint16_t entry) { 22 | return checkMaskWithEntry(getMask(hash), entry); 23 | } 24 | }; 25 | //--------------------------------------------------------------------------- 26 | } 27 | //--------------------------------------------------------------------------- 28 | -------------------------------------------------------------------------------- /engine/op/TargetBase.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include "infra/Reflection.hpp" 4 | #include "op/OpBase.hpp" 5 | #include "pipeline/PipelineConcepts.hpp" 6 | #include 7 | //--------------------------------------------------------------------------- 8 | namespace engine { 9 | //--------------------------------------------------------------------------- 10 | /// Base class for all targets 11 | class TargetBase : public OpBase { 12 | public: 13 | /// Get the target type 14 | virtual std::string_view getName() const = 0; 15 | }; 16 | //--------------------------------------------------------------------------- 17 | /// All targets must inherit from target impl 18 | template 19 | class TargetImpl : public TargetBase { 20 | public: 21 | /// Return the name of the class 22 | std::string_view getName() const override { 23 | static_assert(TargetOperator, "T must be a proper target"); 24 | return ClassInfo::getName(); 25 | } 26 | }; 27 | //--------------------------------------------------------------------------- 28 | } 29 | -------------------------------------------------------------------------------- /ANNOUNCEMENTS.md: -------------------------------------------------------------------------------- 1 | # Announcements 2 | 3 | ### 2025-02-27 4 | - The recently pushed GitHub workflow will automatically compile, test, and benchmark your solution on all four systems 5 | - Check your repository's pull requests 6 | - The results are currently shown at https://sigmod-contest-25.hpi-sci.de/ and will soon be published on the official contest website 7 | 8 | ### 2025-03-04 9 | - With today's changes to the main repository you forked from, we improved the performance of the evaluation phase 10 | - **Important notes:** 11 | - **Deadline change:** The deadline for the final submission has been extended to March 31 12 | - **Own source files**: The CMake file (which cannot be modified by participants) now includes all *.cpp fiels in the `src` directory. This way, you can add your own source files and better structure your code. 13 | - **Third-party library:** We found that some teams use third-party libraries, e.g., for logging. Please note that third-party libraries are not allowed in the contest. You are free to use them during development, but you need to remove them prior to the final submission. Otherwise, your submission is disqualified. 14 | -------------------------------------------------------------------------------- /job/22c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /job/26a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(n.name) AS playing_actor, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info = 'rating' AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind = 'movie' AND mi_idx.info > '7.0' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/24a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress_name, MIN(t.title) AS voiced_action_movie_jap_eng FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND k.keyword in ('hero', 'martial-arts', 'hand-to-hand-combat') AND mi.info is not null and (mi.info like 'Japan:%201%' or mi.info like 'USA:%201%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND ci.movie_id = mk.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /job/31a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name like 'Lionsgate%' AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /engine/op/ScanBase.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include "pipeline/PipelineConcepts.hpp" 4 | #include "op/OpBase.hpp" 5 | #include "infra/Reflection.hpp" 6 | #include 7 | //--------------------------------------------------------------------------- 8 | namespace engine { 9 | //--------------------------------------------------------------------------- 10 | /// Base class for all scans 11 | class ScanBase : public OpBase { 12 | public: 13 | /// Get the target type 14 | virtual std::string_view getName() const = 0; 15 | /// Get the concurrency 16 | virtual size_t concurrency() const; 17 | }; 18 | //--------------------------------------------------------------------------- 19 | /// All targets must inherit from target impl 20 | template 21 | class ScanImpl : public ScanBase { 22 | public: 23 | /// Return the name of the class 24 | std::string_view getName() const override { 25 | static_assert(ScanOperator, "T must be a proper scan"); 26 | return ClassInfo::getName(); 27 | } 28 | }; 29 | //--------------------------------------------------------------------------- 30 | } 31 | -------------------------------------------------------------------------------- /job/31c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name like 'Lionsgate%' AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /engine/infra/Random.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include 4 | //--------------------------------------------------------------------------- 5 | namespace engine { 6 | //--------------------------------------------------------------------------- 7 | struct Random { 8 | static constexpr uint64_t addConstant = 0x2d358dccaa6c78a5ull; 9 | static constexpr uint64_t xorConstant = 0x8bb84b93962eacc9ull; 10 | uint64_t seed = 0; 11 | 12 | static uint64_t mix(uint64_t a, uint64_t b) { 13 | auto res = static_cast(a) * b; 14 | return static_cast(res >> 64) ^ static_cast(res); 15 | } 16 | 17 | uint64_t operator()() { 18 | seed += addConstant; 19 | return mix(seed, seed ^ xorConstant); 20 | } 21 | 22 | constexpr explicit Random(uint64_t s = 0) : seed(mix(s, 0x8bb84b93962eacc9ull)) {} 23 | 24 | // Generate in range [0, s) 25 | uint64_t nextRange(uint64_t s) { 26 | uint64_t val = operator()(); 27 | return (static_cast(val) * s) >> 64; 28 | } 29 | }; 30 | //--------------------------------------------------------------------------- 31 | } -------------------------------------------------------------------------------- /engine/query/PlanImport.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include "query/QueryPlan.hpp" 4 | #include 5 | //--------------------------------------------------------------------------- 6 | struct Plan; 7 | struct ColumnarTable; 8 | //--------------------------------------------------------------------------- 9 | namespace engine { 10 | //--------------------------------------------------------------------------- 11 | struct PlanImport { 12 | static QueryPlan importPlanExistingData(DataSource& dataSource, const Plan& plan); 13 | static QueryPlan importPlan(DataSource& dataSource, const Plan& plan); 14 | 15 | static DataSource::Table importTable(const ColumnarTable& tbl); 16 | 17 | using Data = std::variant; 18 | /// Used for testing 19 | struct TableResult { 20 | virtual ~TableResult() = default; 21 | DataSource::Table table; 22 | }; 23 | /// Used for testing 24 | static std::unique_ptr makeTable(std::vector> data, std::vector types); 25 | }; 26 | //--------------------------------------------------------------------------- 27 | } -------------------------------------------------------------------------------- /job/27b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind = 'complete' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Germany','Swedish', 'German') AND t.production_year = 1998 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id; 2 | -------------------------------------------------------------------------------- /job/27a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind = 'complete' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Germany','Swedish', 'German') AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id; 2 | -------------------------------------------------------------------------------- /job/33b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code = '[nl]' AND it1.info = 'rating' AND it2.info = 'rating' AND kt1.kind in ('tv series') AND kt2.kind in ('tv series') AND lt.link LIKE '%follow%' AND mi_idx2.info < '3.0' AND t2.production_year = 2007 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id; 2 | -------------------------------------------------------------------------------- /job/24b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress_name, MIN(t.title) AS kung_fu_panda FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND cn.name = 'DreamWorks Animation' AND it.info = 'release dates' AND k.keyword in ('hero', 'martial-arts', 'hand-to-hand-combat', 'computer-animated-movie') AND mi.info is not null and (mi.info like 'Japan:%201%' or mi.info like 'USA:%201%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year > 2010 AND t.title like 'Kung Fu Panda%' AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND ci.movie_id = mk.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /job/27c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like 'complete%' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'English') AND t.production_year BETWEEN 1950 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id; 2 | -------------------------------------------------------------------------------- /job/33a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code = '[us]' AND it1.info = 'rating' AND it2.info = 'rating' AND kt1.kind in ('tv series') AND kt2.kind in ('tv series') AND lt.link in ('sequel', 'follows', 'followed by') AND mi_idx2.info < '3.0' AND t2.production_year between 2005 and 2008 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id; 2 | -------------------------------------------------------------------------------- /job/30c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_violent_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind ='complete+verified' AND ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/30a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_violent_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind ='complete+verified' AND ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.production_year > 2000 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/33c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code != '[us]' AND it1.info = 'rating' AND it2.info = 'rating' AND kt1.kind in ('tv series', 'episode') AND kt2.kind in ('tv series', 'episode') AND lt.link in ('sequel', 'follows', 'followed by') AND mi_idx2.info < '3.5' AND t2.production_year between 2000 and 2010 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id; 2 | -------------------------------------------------------------------------------- /job/31b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name like 'Lionsgate%' AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mc.note like '%(Blu-ray)%' AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.production_year > 2000 and (t.title like '%Freddy%' or t.title like '%Jason%' or t.title like 'Saw%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /job/30b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_gore_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind ='complete+verified' AND ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.production_year > 2000 and (t.title like '%Freddy%' or t.title like '%Jason%' or t.title like 'Saw%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/28b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind = 'crew' AND cct2.kind != 'complete+verified' AND cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Germany', 'Swedish', 'German') AND mi_idx.info > '6.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /engine/infra/UnionFind.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include 4 | #include "infra/SmallVec.hpp" 5 | //--------------------------------------------------------------------------- 6 | namespace engine { 7 | //--------------------------------------------------------------------------- 8 | class UnionFind { 9 | struct Entry { 10 | unsigned parent = 0; 11 | unsigned rank = 0; 12 | }; 13 | /// The entries 14 | SmallVec entries; 15 | 16 | public: 17 | unsigned find(unsigned v) { 18 | if (v >= entries.size()) 19 | return v; 20 | while (entries[v].parent != v) { 21 | entries[v].parent = entries[entries[v].parent].parent; 22 | v = entries[v].parent; 23 | } 24 | return v; 25 | } 26 | unsigned merge(unsigned a, unsigned b) { 27 | a = find(a); 28 | b = find(b); 29 | if (a == b) 30 | return a; 31 | 32 | // Grow 33 | while (std::max(a, b) >= entries.size()) 34 | entries.push_back({unsigned(entries.size()), 1}); 35 | 36 | if (entries[a].rank > entries[b].rank) 37 | std::swap(a, b); 38 | 39 | entries[a].parent = b; 40 | entries[b].rank += entries[a].rank; 41 | return b; 42 | } 43 | }; 44 | //--------------------------------------------------------------------------- 45 | } -------------------------------------------------------------------------------- /include/csv_parser.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | class CSVParser { 8 | public: 9 | enum Error { 10 | Ok, 11 | QuoteNotClosed, 12 | InconsistentColumns, 13 | NoTrailingComma, 14 | }; 15 | 16 | CSVParser(char escape = '"', char sep = ',', bool has_trailing_comma = false) 17 | : escape_(escape) 18 | , comma_(sep) 19 | , has_trailing_comma_(has_trailing_comma) {} 20 | 21 | [[nodiscard]] Error execute(const char* buffer, size_t len); 22 | [[nodiscard]] Error finish(); 23 | 24 | virtual void on_field(size_t col_idx, size_t row_idx, const char* begin, size_t len) = 0; 25 | 26 | private: 27 | // configure 28 | char escape_{'"'}; // may also be '\\' 29 | char comma_{','}; // may also be '|' 30 | // true means # commas = # columns and the last comma in each line is followed by the record 31 | // seperator; false means # commas + 1 = # columns 32 | bool has_trailing_comma_{false}; 33 | 34 | // states 35 | std::vector current_field_; 36 | size_t col_idx_{0}; 37 | size_t row_idx_{0}; 38 | size_t num_cols_{0}; 39 | bool after_first_row_{false}; 40 | bool quoted_{false}; 41 | bool after_field_sep_{false}; 42 | bool after_record_sep_{false}; 43 | bool escaping_{false}; 44 | bool newlining_{false}; 45 | }; 46 | -------------------------------------------------------------------------------- /engine/query/Restriction.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include "query/RuntimeValue.hpp" 4 | //--------------------------------------------------------------------------- 5 | namespace engine { 6 | //--------------------------------------------------------------------------- 7 | class Hashtable; 8 | //--------------------------------------------------------------------------- 9 | /// A restriction 10 | struct Restriction { 11 | static constexpr uint64_t nullValue = RuntimeValue::nullValue; 12 | /// The type order is used for ordering restrictions 13 | /// Selective & cheap comes first 14 | enum Type { 15 | /// Attribute is equal to value 16 | Eq, 17 | /// Attribute is not null 18 | NotNull, 19 | /// Attribute will likely find a join partner 20 | Join, 21 | /// Attribute will definitely find a join partner 22 | JoinPrecise 23 | }; 24 | /// The type of the restriction 25 | Type type; 26 | /// The constant value compared with 27 | RuntimeValue cst; 28 | /// The hash table for join filters 29 | Hashtable* joinFilter; 30 | /// The selectivity estimation for the restriction 31 | double selectivity = 1.0; 32 | 33 | /// Check whether the restriction is satisfied by a value 34 | bool operator()(uint64_t val) const noexcept; 35 | }; 36 | //--------------------------------------------------------------------------- 37 | } -------------------------------------------------------------------------------- /job/28c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind = 'cast' AND cct2.kind = 'complete' AND cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/28a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind = 'crew' AND cct2.kind != 'complete+verified' AND cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /engine/infra/Mmap.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | //--------------------------------------------------------------------------- 9 | namespace engine { 10 | //--------------------------------------------------------------------------- 11 | class Mmap { 12 | char* data_ = nullptr; 13 | size_t size_ = 0; 14 | int file = -1; 15 | 16 | public: 17 | Mmap(const Mmap&) = delete; 18 | Mmap& operator=(const Mmap&) = delete; 19 | 20 | constexpr Mmap() noexcept = default; 21 | ~Mmap() noexcept { 22 | reset(); 23 | } 24 | Mmap(Mmap&& other) noexcept { 25 | *this = std::move(other); 26 | } 27 | Mmap& operator=(Mmap&& other) noexcept { 28 | reset(); 29 | std::swap(data_, other.data_); 30 | std::swap(size_, other.size_); 31 | std::swap(file, other.file); 32 | return *this; 33 | } 34 | 35 | static void prefault(void* data, size_t size); 36 | 37 | 38 | static Mmap mapFile(const std::string& fileName); 39 | static Mmap mapMemory(size_t size); 40 | 41 | void reset() noexcept; 42 | 43 | constexpr operator bool() const noexcept { return data_ != nullptr; } 44 | 45 | char* data() const { return data_; } 46 | size_t size() const { return size_; } 47 | }; 48 | //--------------------------------------------------------------------------- 49 | } 50 | -------------------------------------------------------------------------------- /engine/tools/SQL.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include "query/DataSource.hpp" 4 | #include "query/QueryPlan.hpp" 5 | #include 6 | #include 7 | #include 8 | //--------------------------------------------------------------------------- 9 | namespace engine { 10 | //--------------------------------------------------------------------------- 11 | /// Parsing SQL and plans 12 | class SQL { 13 | public: 14 | /// Plan maker 15 | struct PlanMaker { 16 | virtual ~PlanMaker() noexcept = default; 17 | virtual QueryPlan makePlan() = 0; 18 | }; 19 | /// The parse result 20 | struct Query { 21 | /// Name of the query 22 | std::string name; 23 | /// The SQL query for duckdb 24 | std::string sql; 25 | /// The plan for the query 26 | std::unique_ptr planMaker; 27 | /// The index for the DuckDB result relation 28 | unsigned resultRelation; 29 | }; 30 | /// Batch of queries 31 | struct Batch { 32 | /// The data 33 | std::unique_ptr db; 34 | /// The queries 35 | std::vector queries; 36 | }; 37 | /// Parse all queries 38 | static Batch parse(const std::string& planFile, std::vector selected); 39 | }; 40 | //--------------------------------------------------------------------------- 41 | } 42 | //--------------------------------------------------------------------------- -------------------------------------------------------------------------------- /job/fkindexes.sql: -------------------------------------------------------------------------------- 1 | create index company_id_movie_companies on movie_companies(company_id); 2 | create index company_type_id_movie_companies on movie_companies(company_type_id); 3 | create index info_type_id_movie_info_idx on movie_info_idx(info_type_id); 4 | create index info_type_id_movie_info on movie_info(info_type_id); 5 | create index info_type_id_person_info on person_info(info_type_id); 6 | create index keyword_id_movie_keyword on movie_keyword(keyword_id); 7 | create index kind_id_aka_title on aka_title(kind_id); 8 | create index kind_id_title on title(kind_id); 9 | create index linked_movie_id_movie_link on movie_link(linked_movie_id); 10 | create index link_type_id_movie_link on movie_link(link_type_id); 11 | create index movie_id_aka_title on aka_title(movie_id); 12 | create index movie_id_cast_info on cast_info(movie_id); 13 | create index movie_id_complete_cast on complete_cast(movie_id); 14 | create index movie_id_movie_companies on movie_companies(movie_id); 15 | create index movie_id_movie_info_idx on movie_info_idx(movie_id); 16 | create index movie_id_movie_keyword on movie_keyword(movie_id); 17 | create index movie_id_movie_link on movie_link(movie_id); 18 | create index movie_id_movie_info on movie_info(movie_id); 19 | create index person_id_aka_name on aka_name(person_id); 20 | create index person_id_cast_info on cast_info(person_id); 21 | create index person_id_person_info on person_info(person_id); 22 | create index person_role_id_cast_info on cast_info(person_role_id); 23 | create index role_id_cast_info on cast_info(role_id); 24 | -------------------------------------------------------------------------------- /engine/tools/JoinPipelineLoader.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | //--------------------------------------------------------------------------- 9 | namespace engine { 10 | //--------------------------------------------------------------------------- 11 | struct ParsedSQL; 12 | class DataSource; 13 | using OutputAttrsType = std::vector>; 14 | //--------------------------------------------------------------------------- 15 | struct DataSourceBuilder { 16 | DataSource& db; 17 | std::vector columns; 18 | std::unordered_map tables; 19 | }; 20 | //--------------------------------------------------------------------------- 21 | struct JoinPipelineLoader { 22 | const ParsedSQL& parsed_sql; 23 | DataSourceBuilder& db; 24 | ::Plan ret; 25 | 26 | std::unordered_set extract_entities(const nlohmann::json& node); 27 | std::tuple>> recurse(const nlohmann::json& node, const OutputAttrsType& required_attrs); 28 | 29 | static ::Plan load_join_pipeline(DataSourceBuilder& db, const nlohmann::json& node, const ParsedSQL& parsed_sql); 30 | }; 31 | //--------------------------------------------------------------------------- 32 | } 33 | //--------------------------------------------------------------------------- -------------------------------------------------------------------------------- /job/29b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind ='cast' AND cct2.kind ='complete+verified' AND chn.name = 'Queen' AND ci.note in ('(voice)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND it3.info = 'height' AND k.keyword = 'computer-animation' AND mi.info like 'USA:%200%' AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.title = 'Shrek 2' AND t.production_year between 2000 and 2005 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/29c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind ='cast' AND cct2.kind ='complete+verified' AND ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND it3.info = 'trivia' AND k.keyword = 'computer-animation' AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/29a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind ='cast' AND cct2.kind ='complete+verified' AND chn.name = 'Queen' AND ci.note in ('(voice)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND it3.info = 'trivia' AND k.keyword = 'computer-animation' AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.title = 'Shrek 2' AND t.production_year between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /tests/build_database.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int main(int argc, char* argv[]) { 7 | using namespace duckdb; 8 | namespace fs = std::filesystem; 9 | 10 | if (argc < 2) { 11 | fmt::println(stderr, "Usage: {} ", argv[0]); 12 | exit(EXIT_FAILURE); 13 | } 14 | 15 | auto schema = read_file(fs::path("job") / "schema.sql"); 16 | 17 | DuckDB db(argv[1]); 18 | Connection conn(db); 19 | auto result = conn.Query(schema); 20 | if (result->HasError()) { 21 | fmt::println("Error: {}", result->GetError()); 22 | } 23 | 24 | std::vector table_names{ 25 | "char_name", 26 | "kind_type", 27 | "cast_info", 28 | "movie_companies", 29 | "role_type", 30 | "complete_cast", 31 | "comp_cast_type", 32 | "company_name", 33 | "company_type", 34 | "movie_link", 35 | "movie_keyword", 36 | "name", 37 | "info_type", 38 | "movie_info_idx", 39 | "person_info", 40 | "link_type", 41 | "title", 42 | "aka_name", 43 | "movie_info", 44 | "keyword", 45 | "aka_title", 46 | }; 47 | 48 | for (auto& table: table_names) { 49 | result = 50 | conn.Query(fmt::format("COPY {0} FROM 'imdb/{0}.csv' (ESCAPE '\\');", table)); 51 | if (result->HasError()) { 52 | fmt::println("Error: {}", result->GetError()); 53 | } else { 54 | fmt::println("Successfully loaded table {} into {}", table, argv[1]); 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /engine/op/CollectorTarget.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include "infra/SmallVec.hpp" 4 | #include "op/TargetBase.hpp" 5 | #include 6 | //--------------------------------------------------------------------------- 7 | namespace engine { 8 | //--------------------------------------------------------------------------- 9 | class TableScan; 10 | //--------------------------------------------------------------------------- 11 | /// A very simple collector target 12 | class CollectorTarget : public TargetImpl { 13 | public: 14 | struct LocalState { 15 | /// The values 16 | SmallVec values; 17 | /// The next local state 18 | LocalState* next = nullptr; 19 | 20 | LocalState(CollectorTarget& target); 21 | }; 22 | /// The local states 23 | std::atomic localStates = nullptr; 24 | /// The values 25 | SmallVec values; 26 | 27 | /// Consume attributes 28 | template 29 | void operator()(LocalState& ls, uint64_t multiplicity, AttrT... attrs) { 30 | for (uint64_t i = 0; i < multiplicity; i++) 31 | (ls.values.push_back(attrs), ...); 32 | } 33 | 34 | /// Flush collected 35 | void finishConsume(); 36 | 37 | /// Collect the output from a table scan 38 | void collect(TableScan& op); 39 | }; 40 | //--------------------------------------------------------------------------- 41 | static_assert(TargetOperator); 42 | //--------------------------------------------------------------------------- 43 | } 44 | //--------------------------------------------------------------------------- 45 | -------------------------------------------------------------------------------- /include/table_entity.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "common.h" 7 | 8 | struct TableEntity { 9 | std::string table; 10 | int id; 11 | 12 | friend bool operator==(const TableEntity& left, const TableEntity& right); 13 | friend bool operator!=(const TableEntity& left, const TableEntity& right); 14 | friend bool operator<(const TableEntity& left, const TableEntity& right); 15 | }; 16 | 17 | inline bool operator==(const TableEntity& left, const TableEntity& right) { 18 | return left.table == right.table && left.id == right.id; 19 | } 20 | 21 | inline bool operator!=(const TableEntity& left, const TableEntity& right) { 22 | return !(left == right); 23 | } 24 | 25 | inline bool operator<(const TableEntity& left, const TableEntity& right) { 26 | if (left.table < right.table) { 27 | return true; 28 | } else if (left.table > right.table) { 29 | return false; 30 | } else { 31 | return left.id < right.id; 32 | } 33 | } 34 | 35 | namespace std { 36 | template <> 37 | struct hash { 38 | size_t operator()(const TableEntity& te) const noexcept { 39 | size_t seed = 0; 40 | hash_combine(seed, hash{}(te.table)); 41 | hash_combine(seed, hash{}(te.id)); 42 | return seed; 43 | } 44 | }; 45 | 46 | } // namespace std 47 | 48 | template <> 49 | struct fmt::formatter { 50 | template 51 | constexpr auto parse(ParseContext& ctx) { 52 | return ctx.begin(); 53 | } 54 | 55 | template 56 | auto format(const TableEntity& te, FormatContext& ctx) const { 57 | return fmt::format_to(ctx.out(), "({}, {})", te.table, te.id); 58 | } 59 | }; 60 | -------------------------------------------------------------------------------- /include/hardware__ca09.h: -------------------------------------------------------------------------------- 1 | // Hardware information for Ampere Altra Max node ca09. 2 | 3 | // Architecture from `uname -srm`. 4 | #define SPC__AARCH64 5 | 6 | // CPU from `/proc/cpuinfo`. 7 | #define SPC__CPU_NAME "" 8 | 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers 10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system. 11 | #define SPC__CORE_COUNT 128 12 | #define SPC__THREAD_COUNT 128 13 | #define SPC__NUMA_NODE_COUNT 1 14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1 15 | 16 | // Main memory per NUMA node (MB). 17 | #define SPC__NUMA_NODE_DRAM_MB 515809 18 | 19 | // Obtained from `lsb_release -a`. 20 | #define SPC__OS "Ubuntu 24.04.1 LTS" 21 | 22 | // Obtained from: `uname -srm`. 23 | #define SPC__KERNEL "Linux 6.8.0-50-generic aarch64" 24 | 25 | // ARM: possible options are SVE, SVE2, and NEON. No ARM CPU older than Ampere Altra Max will be used. 26 | #define SPC__SUPPORTS_NEON 27 | 28 | // Cache information from `getconf -a | grep CACHE`. 29 | // As Ubuntu did not list all numbers, we also took cache sizes from `cat /sys/devices/system/cpu/cpu0/cache/index*/size` 30 | #define SPC__LEVEL1_ICACHE_SIZE 65536 31 | #define SPC__LEVEL1_ICACHE_ASSOC 32 | #define SPC__LEVEL1_ICACHE_LINESIZE 64 33 | #define SPC__LEVEL1_DCACHE_SIZE 65536 34 | #define SPC__LEVEL1_DCACHE_ASSOC 35 | #define SPC__LEVEL1_DCACHE_LINESIZE 64 36 | #define SPC__LEVEL2_CACHE_SIZE 1048576 37 | #define SPC__LEVEL2_CACHE_ASSOC 38 | #define SPC__LEVEL2_CACHE_LINESIZE 39 | #define SPC__LEVEL3_CACHE_SIZE 40 | #define SPC__LEVEL3_CACHE_ASSOC 41 | #define SPC__LEVEL3_CACHE_LINESIZE 42 | #define SPC__LEVEL4_CACHE_SIZE 43 | #define SPC__LEVEL4_CACHE_ASSOC 44 | #define SPC__LEVEL4_CACHE_LINESIZE 45 | -------------------------------------------------------------------------------- /include/hardware__sidon.h: -------------------------------------------------------------------------------- 1 | // Hardware information for Intel Xeon E7-4880 v2 node sidon. 2 | 3 | // Architecture from `uname -srm`. 4 | #define SPC__X86_64 5 | 6 | // CPU from `/proc/cpuinfo`. 7 | #define SPC__CPU_NAME "Intel(R) Xeon(R) CPU E7-4880 v2 @ 2.50GHz" 8 | 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers 10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system. 11 | #define SPC__CORE_COUNT 15 12 | #define SPC__THREAD_COUNT 30 13 | #define SPC__NUMA_NODE_COUNT 4 14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1 15 | 16 | // Main memory per NUMA node (MB). 17 | #define SPC__NUMA_NODE_DRAM_MB 515809 18 | 19 | // Obtained from `lsb_release -a`. 20 | #define SPC__OS "Ubuntu 22.04.4 LTS" 21 | 22 | // Obtained from: `uname -srm`. 23 | #define SPC__KERNEL "Linux 5.15.0-116-generic x86_64" 24 | 25 | // Intel: possible options are AVX, AVX2, and AVX512. No Intel CPU older than Intel Xeon E7-4880 v2 will be used. 26 | #define SPC__SUPPORTS_AVX 27 | 28 | // Cache information from `getconf -a | grep CACHE`. 29 | #define SPC__LEVEL1_ICACHE_SIZE 32768 30 | #define SPC__LEVEL1_ICACHE_ASSOC 31 | #define SPC__LEVEL1_ICACHE_LINESIZE 64 32 | #define SPC__LEVEL1_DCACHE_SIZE 32768 33 | #define SPC__LEVEL1_DCACHE_ASSOC 8 34 | #define SPC__LEVEL1_DCACHE_LINESIZE 64 35 | #define SPC__LEVEL2_CACHE_SIZE 262144 36 | #define SPC__LEVEL2_CACHE_ASSOC 8 37 | #define SPC__LEVEL2_CACHE_LINESIZE 64 38 | #define SPC__LEVEL3_CACHE_SIZE 39321600 39 | #define SPC__LEVEL3_CACHE_ASSOC 20 40 | #define SPC__LEVEL3_CACHE_LINESIZE 64 41 | #define SPC__LEVEL4_CACHE_SIZE 0 42 | #define SPC__LEVEL4_CACHE_ASSOC 43 | #define SPC__LEVEL4_CACHE_LINESIZE 44 | -------------------------------------------------------------------------------- /engine/pipeline/PipelineGen.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include "op/Hashtable.hpp" 4 | #include "op/TableScan.hpp" 5 | #include "op/TableTarget.hpp" 6 | #include "pipeline/JoinPipeline.hpp" 7 | #include "pipeline/PipelineFunction.hpp" 8 | //--------------------------------------------------------------------------- 9 | namespace engine { 10 | //--------------------------------------------------------------------------- 11 | template 12 | auto genProbes(engine::span& probeParams, std::index_sequence) { 13 | return std::tuple{DefaultProbe{probeParams[Is]}...}; 14 | } 15 | //--------------------------------------------------------------------------- 16 | template 17 | void PipelineFunctions::runPipeline(TargetBase& targetBase, ScanBase& scanBase, engine::span probeParams, engine::span keyOffsets, engine::span attrOffsets) { 18 | static_assert(std::is_base_of_v); 19 | auto& target = dynamic_cast(targetBase); 20 | auto& scan = dynamic_cast(scanBase); 21 | assert(keyOffsets.size() == Keys::size()); 22 | assert(attrOffsets.size() == Attrs::size()); 23 | std::array ko; 24 | for (size_t i = 0; i < Keys::size(); ++i) 25 | ko[i] = keyOffsets[i]; 26 | std::array ao; 27 | for (size_t i = 0; i < Attrs::size(); ++i) 28 | ao[i] = attrOffsets[i]; 29 | auto probes = genProbes(probeParams, std::make_index_sequence{}); 30 | JoinPipeline pipeline{target, scan, probes, ko, ao}; 31 | pipeline(); 32 | } 33 | //--------------------------------------------------------------------------- 34 | } 35 | -------------------------------------------------------------------------------- /include/hardware__koroneia.h: -------------------------------------------------------------------------------- 1 | // Hardware information for AMD EPYC 7F72 node koroneia. 2 | 3 | // Architecture from `uname -srm`. 4 | #define SPC__X86_64 5 | 6 | // CPU from `/proc/cpuinfo`. 7 | #define SPC__CPU_NAME "AMD EPYC 7F72 24-Core Processor" 8 | 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers 10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system. 11 | #define SPC__CORE_COUNT 24 12 | #define SPC__THREAD_COUNT 48 13 | #define SPC__NUMA_NODE_COUNT 2 14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1 15 | 16 | // Main memory per NUMA node (MB). 17 | #define SPC__NUMA_NODE_DRAM_MB 257699 18 | 19 | // Obtained from `lsb_release -a`. 20 | #define SPC__OS "Ubuntu 24.04.2 LTS" 21 | 22 | // Obtained from: `uname -srm`. 23 | #define SPC__KERNEL "Linux 5.15.0-106-generic x86_64" 24 | 25 | // AMD: possible options are AVX, AVX2, and AVX512. No AMD CPU older than AMD EPYC 7F72 will be used. 26 | #define SPC__SUPPORTS_AVX 27 | #define SPC__SUPPORTS_AVX2 28 | 29 | // Cache information from `getconf -a | grep CACHE`. 30 | #define SPC__LEVEL1_ICACHE_SIZE 32768 31 | #define SPC__LEVEL1_ICACHE_ASSOC 32 | #define SPC__LEVEL1_ICACHE_LINESIZE 64 33 | #define SPC__LEVEL1_DCACHE_SIZE 32768 34 | #define SPC__LEVEL1_DCACHE_ASSOC 8 35 | #define SPC__LEVEL1_DCACHE_LINESIZE 64 36 | #define SPC__LEVEL2_CACHE_SIZE 524288 37 | #define SPC__LEVEL2_CACHE_ASSOC 8 38 | #define SPC__LEVEL2_CACHE_LINESIZE 64 39 | #define SPC__LEVEL3_CACHE_SIZE 16777216 40 | #define SPC__LEVEL3_CACHE_ASSOC 16 41 | #define SPC__LEVEL3_CACHE_LINESIZE 64 42 | #define SPC__LEVEL4_CACHE_SIZE 0 43 | #define SPC__LEVEL4_CACHE_ASSOC 44 | #define SPC__LEVEL4_CACHE_LINESIZE 45 | -------------------------------------------------------------------------------- /include/hardware__cp02.h: -------------------------------------------------------------------------------- 1 | // Hardware information for IBM Power8 node cp02. 2 | 3 | // Architecture from `uname -srm`. 4 | #define SPC__PPC64LE 5 | 6 | // CPU from `/proc/cpuinfo`. 7 | #define SPC__CPU_NAME "POWER8 (architected), altivec supported" 8 | 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers 10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system. 11 | #define SPC__CORE_COUNT 12 12 | #define SPC__THREAD_COUNT 96 13 | #define SPC__NUMA_NODE_COUNT 8 14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1 15 | 16 | // Main memory per NUMA node (MB). 17 | #define SPC__NUMA_NODE_DRAM_MB 1039964 18 | 19 | // Obtained from `lsb_release -a`. 20 | #define SPC__OS "Ubuntu 20.04.6 LTS" 21 | 22 | // Obtained from: `uname -srm`. 23 | #define SPC__KERNEL "Linux 5.4.0-137-generic x86_64" 24 | 25 | // IBM: possible options are VSX, VMX, and MMA. No IBM CPU older than Power8 will be used. 26 | #define SPC__SUPPORTS_VSX 27 | #define SPC__SUPPORTS_VMX 28 | 29 | // Cache information from `getconf -a | grep CACHE`. 30 | #define SPC__LEVEL1_ICACHE_SIZE 32768 31 | #define SPC__LEVEL1_ICACHE_ASSOC 8 32 | #define SPC__LEVEL1_ICACHE_LINESIZE 128 33 | #define SPC__LEVEL1_DCACHE_SIZE 65536 34 | #define SPC__LEVEL1_DCACHE_ASSOC 8 35 | #define SPC__LEVEL1_DCACHE_LINESIZE 128 36 | #define SPC__LEVEL2_CACHE_SIZE 524288 37 | #define SPC__LEVEL2_CACHE_ASSOC 8 38 | #define SPC__LEVEL2_CACHE_LINESIZE 128 39 | #define SPC__LEVEL3_CACHE_SIZE 8388608 40 | #define SPC__LEVEL3_CACHE_ASSOC 8 41 | #define SPC__LEVEL3_CACHE_LINESIZE 128 42 | #define SPC__LEVEL4_CACHE_SIZE 0 43 | #define SPC__LEVEL4_CACHE_ASSOC 0 44 | #define SPC__LEVEL4_CACHE_LINESIZE 0 45 | -------------------------------------------------------------------------------- /engine/infra/AdressSanitizer.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include 4 | //--------------------------------------------------------------------------- 5 | #if defined(__has_feature) 6 | #if __has_feature(address_sanitizer) 7 | #define ADDRESS_SANITIZER_ACTIVE 8 | #endif 9 | #elif defined(__SANITIZE_ADDRESS__) 10 | #define ADDRESS_SANITIZER_ACTIVE 11 | #endif 12 | //--------------------------------------------------------------------------- 13 | #ifdef ADDRESS_SANITIZER_ACTIVE 14 | extern "C" void __asan_poison_memory_region(void const volatile* p, size_t size); // NOLINT(bugprone-reserved-identifier) 15 | extern "C" void __asan_unpoison_memory_region(void const volatile* p, size_t size); // NOLINT(bugprone-reserved-identifier) 16 | #endif 17 | //--------------------------------------------------------------------------- 18 | namespace engine::AddressSanitizer { 19 | /// Is the address sanitizer compiled into this binary? 20 | constexpr const bool addressSanitizerActive = 21 | #ifdef ADDRESS_SANITIZER_ACTIVE 22 | true 23 | #else 24 | false 25 | #endif 26 | ; 27 | 28 | #ifdef ADDRESS_SANITIZER_ACTIVE 29 | [[gnu::always_inline]] static inline void poisonMemoryRegion(void const volatile* p, size_t size) 30 | // Poison a region of memory 31 | { 32 | __asan_poison_memory_region(p, size); 33 | } 34 | [[gnu::always_inline]] static inline void unpoisonMemoryRegion(void const volatile* p, size_t size) 35 | // Unpoison a region of memory 36 | { 37 | __asan_unpoison_memory_region(p, size); 38 | } 39 | #else 40 | [[gnu::always_inline]] static inline void poisonMemoryRegion(void const volatile* /*p*/, size_t /*size*/) 41 | // Poison a region of memory 42 | { 43 | } 44 | [[gnu::always_inline]] static inline void unpoisonMemoryRegion(void const volatile* /*p*/, size_t /*size*/) 45 | // Unpoison a region of memory 46 | { 47 | } 48 | #endif 49 | } 50 | //--------------------------------------------------------------------------- 51 | -------------------------------------------------------------------------------- /engine/tools/ParsedSQL.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | //--------------------------------------------------------------------------- 9 | namespace engine { 10 | //--------------------------------------------------------------------------- 11 | using OutputAttrsType = std::vector>; 12 | using AliasMapType = std::unordered_map; 13 | using FilterMapType = std::unordered_map>; 14 | using JoinGraphType = std::unordered_map>>; 16 | using ColumnMapType = std::unordered_map>; 17 | //--------------------------------------------------------------------------- 18 | struct ParsedSQL { 19 | static const std::unordered_map> attributes_map; 20 | 21 | const std::unordered_map>& column_to_tables; 22 | std::unordered_map table_counts; 23 | AliasMapType alias_map; 24 | std::unordered_map entity_to_alias; 25 | JoinGraphType join_graph; 26 | FilterMapType filters; 27 | OutputAttrsType output_attrs; 28 | ColumnMapType column_map; 29 | std::vector> column_vec; 30 | 31 | ParsedSQL(const std::unordered_map>& column_to_tables); 32 | 33 | std::string executed_sql(const std::string& sql); 34 | 35 | void parse_sql(const std::string& sql, std::string_view name); 36 | }; 37 | //--------------------------------------------------------------------------- 38 | } 39 | //--------------------------------------------------------------------------- -------------------------------------------------------------------------------- /engine/storage/CopyLogic.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include 4 | #include 5 | //--------------------------------------------------------------------------- 6 | namespace engine { 7 | //--------------------------------------------------------------------------- 8 | class CopyLogic { 9 | public: 10 | /// Copy 32 bit integers with mask 11 | /// srcOffsets is a bitset that describes the indices of the src array that should be copied 12 | /// dstOffsets is a bitset that describes the indices of the dst array that should be copied to 13 | /// The rest of the indices of the dst array (up to numTuples) should be set to null (~0ull) 14 | static void extractInt32(uint64_t* dst, const uint32_t* src, uint64_t srcOffsets, uint64_t dstOffsets, size_t numTuples) noexcept; 15 | /// Copy 32 bit integers with mask 16 | /// srcOffsets is a bitset that describes the indices of the src array that should be copied 17 | /// dstOffsets is a bitset that describes the indices of the dst array that should be copied to 18 | /// The rest of the indices of the dst array (up to numTuples) should be set to null (~0ull) 19 | static void extractInt64(uint64_t* dst, const uint64_t* src, uint64_t srcOffsets, uint64_t dstOffsets, size_t numTuples) noexcept; 20 | /// Copy 32 bit strings with mask 21 | /// srcOffsets is a bitset that describes the indices of the src array that should be copied 22 | /// dstOffsets is a bitset that describes the indices of the dst array that should be copied to 23 | /// The rest of the indices of the dst array (up to numTuples) should be set to null (~0ull) 24 | /// All the strings are short strings 25 | static void extractVarChar(uint64_t* dst, const uint16_t* src, uint64_t srcOffsets, uint64_t dstOffsets, size_t numTuples, const char* stringHead) noexcept; 26 | }; 27 | //--------------------------------------------------------------------------- 28 | } 29 | //--------------------------------------------------------------------------- -------------------------------------------------------------------------------- /engine/pipeline/PipelineFunction.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include "pipeline/PipelineConcepts.hpp" 4 | #include "infra/helper/Span.hpp" 5 | #include "infra/Util.hpp" 6 | #include "op/ScanBase.hpp" 7 | #include "op/TargetBase.hpp" 8 | #include 9 | #include 10 | //--------------------------------------------------------------------------- 11 | namespace engine { 12 | //--------------------------------------------------------------------------- 13 | class Hashtable; 14 | class HashtableBuild; 15 | class HashtableProbe; 16 | class TableScan; 17 | class TableTarget; 18 | //--------------------------------------------------------------------------- 19 | using DefaultProbe = HashtableProbe; 20 | //--------------------------------------------------------------------------- 21 | using DefaultProbeParameter = const Hashtable*; 22 | //--------------------------------------------------------------------------- 23 | using PipelineFunction = void (*)(TargetBase& target, ScanBase& scan, engine::span probes, engine::span keyOffsets, engine::span outputAttributeOffsets); 24 | //--------------------------------------------------------------------------- 25 | class JoinPipelineBase { 26 | virtual ~JoinPipelineBase() noexcept = default; 27 | }; 28 | //--------------------------------------------------------------------------- 29 | struct PipelineFunctions { 30 | static size_t numFunctions; 31 | static std::pair functions[]; 32 | 33 | static PipelineFunction lookupPipeline(std::string_view name); 34 | 35 | template 36 | static void runPipeline(TargetBase& target, ScanBase& scan, engine::span probeParams, engine::span keyOffsets, engine::span attrOffsets); 37 | }; 38 | //--------------------------------------------------------------------------- 39 | } 40 | -------------------------------------------------------------------------------- /engine/Execute.cpp: -------------------------------------------------------------------------------- 1 | #include "infra/PageMemory.hpp" 2 | #include "infra/QueryMemory.hpp" 3 | #include "infra/Scheduler.hpp" 4 | #include "query/PlanImport.hpp" 5 | #include "query/QueryPlan.hpp" 6 | #include 7 | #include 8 | //--------------------------------------------------------------------------- 9 | namespace engine { 10 | //--------------------------------------------------------------------------- 11 | ColumnarTable execute(QueryPlan plan, [[maybe_unused]] void* context) { 12 | Scheduler::start_query(); 13 | pagememory::start_query(); 14 | ColumnarTable output; 15 | { 16 | QueryPlan pp = std::move(plan); 17 | output = pp.run(); 18 | } 19 | querymemory::end_query(); 20 | Scheduler::end_query(); 21 | return std::move(output); 22 | } 23 | //--------------------------------------------------------------------------- 24 | } 25 | //--------------------------------------------------------------------------- 26 | namespace Contest { 27 | //--------------------------------------------------------------------------- 28 | ColumnarTable execute(const Plan& plan, [[maybe_unused]] void* context) { 29 | engine::Scheduler::start_query(); 30 | engine::pagememory::start_query(); 31 | ColumnarTable output; 32 | { 33 | engine::DataSource ds; 34 | auto imported = engine::PlanImport::importPlan(ds, plan); 35 | output = imported.run(); 36 | } 37 | engine::querymemory::end_query(); 38 | engine::Scheduler::end_query(); 39 | return std::move(output); 40 | } 41 | //--------------------------------------------------------------------------- 42 | void* build_context() { 43 | engine::Scheduler::setup(); 44 | 45 | return nullptr; 46 | } 47 | //--------------------------------------------------------------------------- 48 | void destroy_context([[maybe_unused]] void* context) { engine::Scheduler::teardown(); } 49 | //--------------------------------------------------------------------------- 50 | } // namespace Contest 51 | //--------------------------------------------------------------------------- 52 | -------------------------------------------------------------------------------- /engine/storage/RestrictionLogic.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include "infra/QueryMemory.hpp" 4 | #include "storage/BitLogic.hpp" 5 | #include 6 | #include 7 | #include 8 | #include 9 | //--------------------------------------------------------------------------- 10 | namespace engine { 11 | //--------------------------------------------------------------------------- 12 | struct Restriction; 13 | //--------------------------------------------------------------------------- 14 | class RestrictionLogic { 15 | public: 16 | /// Filter masked values and return a bitset mask 17 | uint64_t run(const uint32_t* values, uint64_t mask) const { 18 | if (!mask) 19 | return 0; 20 | if (BitLogic::isDense(mask)) [[likely]] { 21 | auto [st, en] = BitLogic::getRange(mask); 22 | auto len = en - st; 23 | auto result = runDense(values + st, len); 24 | return result << st; 25 | } else { 26 | return runSparse(values, mask); 27 | } 28 | } 29 | virtual uint64_t runSparse(const uint32_t* values, uint64_t mask) const = 0; 30 | virtual uint64_t runDense(const uint32_t* values, size_t len) const = 0; 31 | virtual std::pair runAndSkip(const uint32_t* values, size_t len) const = 0; 32 | /// Estimate the selectivity very broadly 33 | virtual double estimateSelectivity() const = 0; 34 | virtual double estimateCost() const = 0; 35 | /// Destructor 36 | virtual ~RestrictionLogic() noexcept = default; 37 | 38 | /// The null restriction 39 | static const RestrictionLogic* notNullRestriction; 40 | 41 | /// Setup a restriction logic given restriction 42 | static UniquePtr setupRestriction(const Restriction& restriction); 43 | 44 | virtual std::string_view name() const = 0; 45 | }; 46 | //--------------------------------------------------------------------------- 47 | } 48 | //--------------------------------------------------------------------------- 49 | -------------------------------------------------------------------------------- /include/attribute.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | enum class DataType { 9 | INT32, // 4-byte integer 10 | INT64, // 8-byte integer 11 | FP64, // 8-byte floating point 12 | VARCHAR, // string of arbitary length 13 | }; 14 | 15 | template <> 16 | struct fmt::formatter { 17 | template 18 | constexpr auto parse(ParseContext& ctx) { 19 | return ctx.begin(); 20 | } 21 | 22 | template 23 | auto format(DataType value, FormatContext& ctx) const { 24 | static std::array names{ 25 | "INT32", 26 | "INT64", 27 | "FP64", 28 | "VARCHAR", 29 | }; 30 | return fmt::format_to(ctx.out(), "{}", names[int(value)]); 31 | } 32 | }; 33 | 34 | #define DISPATCH_DATA_TYPE(type, TYPE, ...) \ 35 | do { \ 36 | switch (type) { \ 37 | case DataType::INT32: { \ 38 | using TYPE = int32_t; \ 39 | __VA_ARGS__ \ 40 | break; \ 41 | } \ 42 | case DataType::INT64: { \ 43 | using TYPE = int64_t; \ 44 | __VA_ARGS__ \ 45 | break; \ 46 | } \ 47 | case DataType::FP64: { \ 48 | using TYPE = double; \ 49 | __VA_ARGS__ \ 50 | break; \ 51 | } \ 52 | case DataType::VARCHAR: { \ 53 | using TYPE = std::string; \ 54 | __VA_ARGS__ \ 55 | break; \ 56 | } \ 57 | } \ 58 | } while (0) 59 | 60 | struct Attribute { 61 | DataType type; 62 | std::string name; 63 | }; -------------------------------------------------------------------------------- /engine/test/unit_tests3.cpp: -------------------------------------------------------------------------------- 1 | #include "infra/SmallVec.hpp" 2 | #include "infra/helper/BitOps.hpp" 3 | #include "plan.h" 4 | #include "storage/BitLogic.hpp" 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | using namespace engine; 11 | 12 | void testBits(uint64_t bits) { 13 | array offsets{}; 14 | auto sz = BitLogic::bitsToOffsets(offsets.data(), bits) - offsets.data(); 15 | REQUIRE(sz == engine::popcount(bits)); 16 | uint64_t cur = bits; 17 | for (int i = 0; i < sz; i++) { 18 | REQUIRE(offsets[i] == countr_zero(cur)); 19 | cur &= cur - 1; 20 | } 21 | REQUIRE(cur == 0); 22 | } 23 | 24 | TEST_CASE("BitLogic") { 25 | array bits{}; 26 | 27 | testBits(5); 28 | testBits(0x8000000000000000); 29 | testBits(0x8000000000000001); 30 | testBits(0x19c0003040502); 31 | } 32 | 33 | TEST_CASE("SmallVec") { 34 | SmallVec smallVec{}; 35 | smallVec.push_back(3); 36 | auto x = smallVec.emplace_back(2); 37 | REQUIRE(x == 2); 38 | REQUIRE(smallVec[0] == 3); 39 | REQUIRE(smallVec.size() == 2); 40 | 41 | for (auto i = 0; i < 10; i++) { 42 | smallVec.push_back(i); 43 | REQUIRE(smallVec[i + 2] == i); 44 | REQUIRE(smallVec.size() == 3 + i); 45 | } 46 | 47 | for (auto i = 0; i < 12; i++) 48 | smallVec.pop_back(); 49 | 50 | REQUIRE(smallVec.size() == 0); 51 | 52 | std::vector stdVec; 53 | for (auto i : {7, 3, 1, 6, 2, 9, 4, 8, 2, 3, 10, 6}) { 54 | smallVec.push_back(i); 55 | stdVec.push_back(i); 56 | REQUIRE(smallVec.back() == i); 57 | } 58 | 59 | // Can we sort? 60 | std::sort(smallVec.begin(), smallVec.end()); 61 | REQUIRE(std::is_sorted(smallVec.begin(), smallVec.end())); 62 | 63 | // Sort the standard vector and compare 64 | std::sort(stdVec.begin(), stdVec.end()); 65 | for (auto i = 0; i < stdVec.size(); i++) 66 | REQUIRE(smallVec[i] == stdVec[i]); 67 | 68 | stdVec.erase(stdVec.begin(), stdVec.end()); 69 | 70 | smallVec.reserve(512); 71 | REQUIRE(smallVec.capacity() >= 512); 72 | 73 | smallVec.erase(smallVec.begin(), smallVec.end()); 74 | REQUIRE(smallVec.empty()); 75 | } 76 | -------------------------------------------------------------------------------- /engine/infra/helper/Span.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | //--------------------------------------------------------------------------- 10 | namespace engine { 11 | //--------------------------------------------------------------------------- 12 | template 13 | class span { 14 | public: 15 | using value_type = T; 16 | using iterator = T*; 17 | using const_iterator = const T*; 18 | 19 | private: 20 | T* data_; 21 | size_t size_; 22 | 23 | public: 24 | constexpr span(T* data, size_t size) noexcept : data_(data), size_(size) {} 25 | constexpr span(T* begin, T* end) noexcept : data_(begin), size_(end - begin) {} 26 | template 27 | constexpr span(Container& arr) noexcept : data_(arr.data()), size_(arr.size()) {} 28 | template 29 | constexpr span(const Container& arr) noexcept : data_(arr.data()), size_(arr.size()) {} 30 | constexpr span() noexcept : data_(nullptr), size_(0) {} 31 | 32 | constexpr size_t size() const noexcept { return size_; } 33 | constexpr bool empty() const noexcept { return size_ == 0; } 34 | constexpr T& operator[](size_t i) const noexcept { 35 | assert(i < size_); 36 | return data_[i]; 37 | } 38 | constexpr T& front() const noexcept { 39 | assert(size_ > 0); 40 | return data_[0]; 41 | } 42 | constexpr T& back() const noexcept { 43 | assert(size_ > 0); 44 | return data_[size_ - 1]; 45 | } 46 | constexpr T* data() const noexcept { return data_; } 47 | 48 | constexpr auto begin() const noexcept { return data_; } 49 | constexpr auto end() const noexcept { return data_ + size_; } 50 | }; 51 | //--------------------------------------------------------------------------- 52 | template 53 | span(Container&) -> span; 54 | //--------------------------------------------------------------------------- 55 | template 56 | span(const Container&) -> span; 57 | //--------------------------------------------------------------------------- 58 | } // namespace engine 59 | //--------------------------------------------------------------------------- 60 | -------------------------------------------------------------------------------- /engine/op/CollectorTarget.cpp: -------------------------------------------------------------------------------- 1 | #include "op/CollectorTarget.hpp" 2 | #include "pipeline/PipelineGen.hpp" 3 | //--------------------------------------------------------------------------- 4 | namespace engine { 5 | //--------------------------------------------------------------------------- 6 | CollectorTarget::LocalState::LocalState(CollectorTarget& target) { 7 | next = target.localStates.exchange(this); 8 | } 9 | //--------------------------------------------------------------------------- 10 | void CollectorTarget::finishConsume() { 11 | for (auto* ls = localStates.load(); ls; ls = ls->next) 12 | values.insert(values.end(), ls->values.begin(), ls->values.end()); 13 | } 14 | //--------------------------------------------------------------------------- 15 | template 16 | static auto collectCallback(CollectorTarget& collector, engine::TableScan& op, SmallVec& attrOffsets, std::index_sequence) { 17 | PipelineFunctions::runPipeline, std::index_sequence>(collector, op, {}, {}, {attrOffsets.data(), attrOffsets.size()}); 18 | } 19 | //--------------------------------------------------------------------------- 20 | void CollectorTarget::collect(engine::TableScan& op) { 21 | size_t cols = op.getProducedColumns(); 22 | SmallVec attrOffsets; 23 | for (size_t i = 0; i < cols; i++) 24 | attrOffsets.push_back(i); 25 | 26 | switch (cols) { 27 | case 1: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<1>{}); 28 | case 2: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<2>{}); 29 | case 3: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<3>{}); 30 | case 4: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<4>{}); 31 | case 5: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<5>{}); 32 | case 6: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<6>{}); 33 | case 7: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<7>{}); 34 | case 8: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<8>{}); 35 | default: 36 | throw std::runtime_error("Unsupported number of columns"); 37 | } 38 | } 39 | //--------------------------------------------------------------------------- 40 | } 41 | //--------------------------------------------------------------------------- 42 | -------------------------------------------------------------------------------- /job/README: -------------------------------------------------------------------------------- 1 | This package contains the Join Order Benchmark (JOB) queries from: 2 | 3 | "How Good Are Query Optimizers, Really?" 4 | by Viktor Leis, Andrey Gubichev, Atans Mirchev, Peter Boncz, Alfons Kemper, Thomas Neumann 5 | PVLDB Volume 9, No. 3, 2015 6 | 7 | IMDB Data Set 8 | ------------- 9 | 10 | The CSV files used in the paper, which are from May 2013, can be found 11 | at http://homepages.cwi.nl/~boncz/job/imdb.tgz 12 | 13 | The license and links to the current version IMDB data set can be 14 | found at http://www.imdb.com/interfaces 15 | Step-by-step instructions: 16 | 1. download *gz files (unpacking not necessary) 17 | wget ftp://ftp.fu-berlin.de/pub/misc/movies/database/*gz 18 | 2. download and unpack imdbpy and the imdbpy2sql.py script 19 | wget https://bitbucket.org/alberanid/imdbpy/get/5.0.zip 20 | 3. create PostgreSQL database (e.g., name imdbload): 21 | createdb imdbload 22 | 4. transform *gz files to relational schema (takes a while) 23 | imdbpy2sql.py -d PATH_TO_GZ_FILES -u postgres://username:password@hostname/imdbload 24 | 25 | Now you should have a PostgreSQL database named "imdbload" with the 26 | imdb data. Note that this database has some secondary indexes (but not 27 | on all foreign key attributes). You can export all tables to CSV: 28 | 29 | \copy aka_name to 'PATH/aka_name.csv' csv 30 | \copy aka_title to 'PATH/aka_title.csv' csv 31 | \copy cast_info to 'PATH/cast_info.csv' csv 32 | \copy char_name to 'PATH/char_name.csv' csv 33 | \copy comp_cast_type to 'PATH/comp_cast_type.csv' csv 34 | \copy company_name to 'PATH/company_name.csv' csv 35 | \copy company_type to 'PATH/company_type.csv' csv 36 | \copy complete_cast to 'PATH/complete_cast.csv' csv 37 | \copy info_type to 'PATH/info_type.csv' csv 38 | \copy keyword to 'PATH/keyword.csv' csv 39 | \copy kind_type to 'PATH/kind_type.csv' csv 40 | \copy link_type to 'PATH/link_type.csv' csv 41 | \copy movie_companies to 'PATH/movie_companies.csv' csv 42 | \copy movie_info to 'PATH/movie_info.csv' csv 43 | \copy movie_info_idx to 'PATH/movie_info_idx.csv' csv 44 | \copy movie_keyword to 'PATH/movie_keyword.csv' csv 45 | \copy movie_link to 'PATH/movie_link.csv' csv 46 | \copy name to 'PATH/name.csv' csv 47 | \copy person_info to 'PATH/person_info.csv' csv 48 | \copy role_type to 'PATH/role_type.csv' csv 49 | \copy title to 'PATH/title.csv' csv 50 | 51 | To import the CSV files to another database, create all tables (see 52 | schema.sql and optionally fkindexes.sql) and run the same copy as 53 | above statements but replace the keyword "to" by "from". 54 | 55 | Questions 56 | --------- 57 | 58 | Contact Viktor Leis (leis@in.tum.de) if you have any questions. 59 | -------------------------------------------------------------------------------- /engine/infra/Mmap.cpp: -------------------------------------------------------------------------------- 1 | #include "infra/Mmap.hpp" 2 | #include "infra/Util.hpp" 3 | #include "infra/Scheduler.hpp" 4 | #include 5 | #include 6 | #include 7 | //--------------------------------------------------------------------------- 8 | namespace engine { 9 | //--------------------------------------------------------------------------- 10 | void Mmap::prefault(void* data, size_t size) { 11 | char* mem = static_cast(data); 12 | for (std::size_t i = 0; i < size; i += 4096) 13 | mem[i] = 0; 14 | } 15 | //--------------------------------------------------------------------------- 16 | Mmap Mmap::mapFile(const std::string& filename) { 17 | Mmap result; 18 | result.file = open(filename.c_str(), O_RDONLY); 19 | if (result.file == -1) { 20 | return result; 21 | } 22 | result.size_ = lseek(result.file, 0, SEEK_END); 23 | if (result.size_ == -1) { 24 | close(result.file); 25 | result.file = -1; 26 | return result; 27 | } 28 | result.data_ = static_cast(mmap(nullptr, result.size_, PROT_READ, MAP_SHARED, result.file, 0)); 29 | if (result.data_ == MAP_FAILED) { 30 | close(result.file); 31 | result.file = -1; 32 | result.size_ = 0; 33 | result.data_ = nullptr; 34 | } 35 | #ifdef SIGMOD_LOCAL 36 | size_t morselSize = 1ull << 21; 37 | Scheduler::parallelMorsel(0, result.size_, morselSize, [&](size_t workerId, size_t pos) { 38 | auto end = std::min(pos + morselSize, result.size_); 39 | madvise(result.data_ + pos, end - pos, MADV_POPULATE_READ); 40 | }); 41 | #endif 42 | return result; 43 | } 44 | //--------------------------------------------------------------------------- 45 | Mmap Mmap::mapMemory(size_t size) { 46 | Mmap result; 47 | result.size_ = size; 48 | #ifdef NDEBUG 49 | result.data_ = static_cast(mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); 50 | if (size >= 2'000'000) 51 | madvise(result.data_, size, MADV_HUGEPAGE); 52 | #else 53 | result.data_ = static_cast(aligned_alloc(4096, size)); 54 | #endif 55 | return result; 56 | } 57 | //--------------------------------------------------------------------------- 58 | void Mmap::reset() noexcept { 59 | if (data_) { 60 | #ifdef NDEBUG 61 | munmap(data_, size_); 62 | #else 63 | if (file) 64 | munmap(data_, size_); 65 | else 66 | free(data_); 67 | #endif 68 | data_ = nullptr; 69 | size_ = 0; 70 | } 71 | if (file != -1) { 72 | close(file); 73 | file = -1; 74 | } 75 | } 76 | //--------------------------------------------------------------------------- 77 | } 78 | -------------------------------------------------------------------------------- /engine/infra/Scheduler.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include "infra/Util.hpp" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | //--------------------------------------------------------------------------- 10 | namespace engine { 11 | class Scheduler { 12 | static __thread size_t currentWorker; 13 | 14 | public: 15 | struct Worker; 16 | struct Impl; 17 | 18 | class Task; 19 | template 20 | class TaskImpl; 21 | 22 | /// Setup the scheduler 23 | static void setup(); 24 | /// Teardown the scheduler 25 | static void teardown(); 26 | /// Start a query 27 | static void start_query(); 28 | /// End a query 29 | static void end_query(); 30 | 31 | /// Run a parallel morsel task 32 | static void parallelImpl(size_t size, FunctionRef task, bool finalizeTask = false); 33 | /// Run a parallel morsel task 34 | template 35 | static void parallelMorsel(size_t begin, size_t end, size_t morselSize, Fun&& task, bool finalizeTask = false) { 36 | return parallelImpl((end - begin + morselSize - 1) / morselSize, [&task, morselSize, begin](size_t workerId, size_t i) { return task(workerId, i >= ~0ull - 1 ? i : begin + i * morselSize); }, finalizeTask); 37 | } 38 | /// Run a parallel for task 39 | template 40 | static void parallelFor(size_t begin, size_t end, size_t stepSize, size_t morselSize, Fun&& task) { 41 | assert(stepSize > 0); 42 | assert(morselSize > 0); 43 | assert(stepSize % morselSize == 0); 44 | return parallelMorsel(begin, end, stepSize, [stepSize, morselSize, &task](size_t workerId, size_t i) { 45 | for (size_t j = 0; j < stepSize; j += morselSize) 46 | task(workerId, i + j); 47 | }); 48 | } 49 | /// Run a parallel for task 50 | template 51 | static void parallelFor(size_t begin, size_t end, size_t stepSize, Fun&& task) { 52 | return parallelFor(begin, end, stepSize, 1, std::forward(task)); 53 | } 54 | /// Run a parallel for task 55 | template 56 | static void parallelFor(size_t begin, size_t end, Fun&& task) { 57 | return parallelFor(begin, end, 1, std::forward(task)); 58 | } 59 | 60 | /// Get thread id 61 | static size_t threadId() noexcept { return currentWorker; } 62 | 63 | /// Get the hardware concurrency 64 | static size_t concurrency() noexcept; 65 | /// Get the ratio of cores we are using 66 | static size_t unusedRatio() noexcept; 67 | }; 68 | //--------------------------------------------------------------------------- 69 | } 70 | -------------------------------------------------------------------------------- /engine/storage/BitLogic.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include "infra/helper/BitOps.hpp" 4 | #include 5 | #include 6 | #include 7 | #include 8 | //--------------------------------------------------------------------------- 9 | namespace engine { 10 | //--------------------------------------------------------------------------- 11 | class BitLogic { 12 | public: 13 | /// Convert the bits in a 64bit integer to an array of offsets (and the length of this array) 14 | /// Target should have size at least 64+7=71 bytes 15 | static uint8_t* bitsToOffsets(uint8_t* target, uint64_t bits) noexcept; 16 | 17 | /// Is a bitset dense? Are all bits packed together? 18 | static constexpr bool isDense(uint64_t mask) { 19 | auto lowest = mask & -mask; 20 | auto incremented = mask + lowest; 21 | return (incremented & (incremented - 1)) == 0; 22 | } 23 | /// Get the range of bits from a bitset (assuming it is dense) 24 | static constexpr std::pair getRange(uint64_t mask) { 25 | assert(isDense(mask)); 26 | if (!mask) 27 | return {0, 0}; 28 | return {engine::countr_zero(mask), 64 - engine::countl_zero(mask)}; 29 | } 30 | 31 | struct IndexIterator { 32 | size_t index = 0; 33 | 34 | void operator++() { index++; } 35 | bool operator!=(const IndexIterator& other) const { return index != other.index; } 36 | size_t operator*() const { return index; } 37 | }; 38 | 39 | struct IndirectIterator { 40 | uint8_t* indices = nullptr; 41 | 42 | void operator++() { indices++; } 43 | bool operator!=(const IndirectIterator& other) const { return indices != other.indices; } 44 | size_t operator*() const { return *indices; } 45 | }; 46 | 47 | 48 | /// Get the range of bits from a bitset (assuming it is dense) 49 | static std::pair getDenseIterators(uint64_t mask) { 50 | assert(isDense(mask)); 51 | if (!mask) 52 | return {IndexIterator{0}, IndexIterator{0}}; 53 | return {IndexIterator{size_t(engine::countr_zero(mask))}, IndexIterator{size_t(64 - engine::countl_zero(mask))}}; 54 | } 55 | /// Get the range of bits from a bitset (assuming it is dense) 56 | static std::pair getSparseIterators(uint8_t* target, uint64_t mask) { 57 | if (!mask) 58 | return {IndirectIterator{nullptr}, IndirectIterator{nullptr}}; 59 | auto* end = bitsToOffsets(target, mask); 60 | return {IndirectIterator{target}, IndirectIterator{end}}; 61 | } 62 | }; 63 | //--------------------------------------------------------------------------- 64 | } 65 | //--------------------------------------------------------------------------- -------------------------------------------------------------------------------- /engine/query/QueryGraph.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include "infra/BitSet.hpp" 4 | #include "infra/QueryMemory.hpp" 5 | #include "infra/SmallVec.hpp" 6 | #include 7 | #include 8 | //--------------------------------------------------------------------------- 9 | namespace engine { 10 | //--------------------------------------------------------------------------- 11 | class QueryPlan; 12 | //--------------------------------------------------------------------------- 13 | struct QueryGraph { 14 | struct Input { 15 | /// The equivalence classes being produced 16 | BitSet producedEq; 17 | /// The cardinality estimation 18 | double cardinality = 1.0; 19 | /// The multiplicity for hash tables 20 | double multiplicity = 1.0; 21 | /// The key that needs to be joined with 22 | unsigned joinKey = ~0u; 23 | }; 24 | struct Plan { 25 | Plan* left = nullptr; 26 | Plan* right = nullptr; 27 | BitSet set; 28 | size_t pipes; 29 | BitSet eqs; 30 | mutable BitSet neighborhood; 31 | double card = -1; 32 | double bc = 0.0; 33 | double mc = 1.0; 34 | double cost = std::numeric_limits::infinity(); 35 | 36 | bool isLeaf() const { return !left; } 37 | }; 38 | 39 | /// The maximum number of pipes in a plan 40 | static constexpr size_t maxPipelineLength = 3; 41 | 42 | /// Reference to the query plan 43 | QueryPlan& qp; 44 | /// The inputs 45 | SmallVec inputs; 46 | /// The plans 47 | Vector> plans; 48 | /// The number of pipes of the best plan 49 | size_t bestPipes = 0; 50 | 51 | explicit QueryGraph(QueryPlan& qp, SmallVec inputs); 52 | 53 | size_t size() const; 54 | 55 | BitSet computeNeighborhood(BitSet rels, BitSet eqs) const; 56 | BitSet neighborhood(BitSet bs) const; 57 | bool connected(BitSet bs) const; 58 | bool canJoin(BitSet left, BitSet right); 59 | void consider(BitSet left, BitSet right); 60 | static double computeCost(double card, double leftCard, [[maybe_unused]] double rightCard); 61 | 62 | /// Get a plan 63 | [[gnu::always_inline]] Plan& get(BitSet bs, size_t pipes) { 64 | assert(pipes < maxPipelineLength); 65 | assert(bs.asU64() < plans.size()); 66 | auto& result = plans[bs.asU64()][pipes]; 67 | assert(result.set == bs); 68 | assert(result.pipes == pipes); 69 | return result; 70 | } 71 | 72 | /// Compute cardinality 73 | double computeCard(BitSet rels); 74 | /// Compute cardinality 75 | void computeCard(Plan& target, const Plan& left, const Plan& right); 76 | 77 | Plan* optimize(); 78 | }; 79 | //--------------------------------------------------------------------------- 80 | } 81 | -------------------------------------------------------------------------------- /engine/query/DPccp.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include "infra/BitSet.hpp" 4 | #include "infra/Util.hpp" 5 | #include 6 | //--------------------------------------------------------------------------- 7 | namespace engine { 8 | //--------------------------------------------------------------------------- 9 | template 10 | struct QG { 11 | static_assert(std::is_same().neighborhood(std::declval())), BitSet>::value); 12 | static_assert(std::is_same().connected(std::declval())), bool>::value); 13 | static_assert(std::is_convertible().size()), unsigned>::value); 14 | }; 15 | //--------------------------------------------------------------------------- 16 | class DPccp { 17 | public: 18 | template >> 19 | static void enumerateCsg(const QG& qg, unsigned N, Callback&& callback) { 20 | enumerateCsg(qg, BitSet::prefix(N), BitSet{}, callback); 21 | } 22 | 23 | template >> 24 | static void enumerateCsg(const QG& qg, BitSet::arg_type s, BitSet::arg_type x, Callback&& callback) { 25 | for (unsigned i : s.reversed()) 26 | enumerateCsgRec(qg, {i}, x + (BitSet::prefix(i) & s), callback); 27 | } 28 | 29 | template >> 30 | static void enumerateCsgRec(const QG& qg, BitSet::arg_type s, BitSet::arg_type x, Callback&& callback) { 31 | callback(s); 32 | auto n = qg.neighborhood(s) - x; 33 | for (BitSet sp : n.subsets()) 34 | enumerateCsgRec(qg, s + sp, x + n, callback); 35 | } 36 | 37 | template >> 38 | static void enumerateCmp(const QG& qg, BitSet::arg_type s, Callback&& callback) { 39 | auto x = BitSet::prefix(s.front()) + s; 40 | auto n = qg.neighborhood(s) - x; 41 | enumerateCsg(qg, n, x, callback); 42 | } 43 | 44 | template >> 45 | static void enumerateCsgCmp(const QG& qg,Callback && callback) { 46 | enumerateCsg(qg, qg.size(), [&](BitSet::arg_type s) { 47 | if (!qg.connected(s)) 48 | return; 49 | enumerateCmp(qg, s, [&](BitSet::arg_type c) { 50 | if (!qg.connected(c)) 51 | return; 52 | callback(s, c); 53 | }); 54 | }); 55 | } 56 | }; 57 | //--------------------------------------------------------------------------- 58 | } -------------------------------------------------------------------------------- /engine/genpipelines.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | #&runPipeline, std::index_sequence<0, 0, 1>>; 4 | def comma(seq): 5 | return ','.join(map(str, seq)) 6 | 7 | def jj(seq): 8 | return ''.join(map(str, seq)) 9 | 10 | def subsets_with_replacement(p, r): 11 | for i in range(0, r + 1): 12 | for v in itertools.combinations_with_replacement(p, i): 13 | yield v 14 | 15 | targets = [ 16 | "engine::HashtableBuild", 17 | "engine::TableTarget" 18 | ] 19 | scans = [ 20 | "engine::TableScan" 21 | ] 22 | 23 | funcs = [] 24 | maxn = 3 25 | maxattrs = 6 26 | 27 | for i in range(0, maxn): 28 | for keys in itertools.product(range(maxn), repeat = i): 29 | print(i, keys) 30 | isValid = True 31 | for ind, v in enumerate(keys): 32 | if v > ind: 33 | isValid = False 34 | break 35 | if not isValid: 36 | continue 37 | 38 | for target in ["engine::TableTarget"]: 39 | for scan in scans: 40 | attrs = [] 41 | restAttrs = [] 42 | funcs.append((f'{target},{scan},{i},({jj(keys)}),({jj(attrs)})',f'PipelineFunctions::runPipeline<{target}, {scan}, {i}, std::index_sequence<{comma(keys)}>, std::index_sequence<{comma(attrs)}>>')) 43 | # Let the first attribute come from any table as it will be interpreted as the hash key by hash builds 44 | for firstAttr in range(i + 1): 45 | for restAttrs in subsets_with_replacement(range(i + 1), maxattrs): 46 | for target in targets: 47 | for scan in scans: 48 | attrs = [firstAttr] + list(restAttrs) 49 | funcs.append((f'{target},{scan},{i},({jj(keys)}),({jj(attrs)})',f'PipelineFunctions::runPipeline<{target}, {scan}, {i}, std::index_sequence<{comma(keys)}>, std::index_sequence<{comma(attrs)}>>')) 50 | 51 | 52 | funcs.sort() 53 | 54 | def inst(v): 55 | return f"template void {v}(TargetBase&, ScanBase&, engine::span, engine::span, engine::span)" 56 | 57 | step = (len(funcs) + 15) // 16 58 | for ind, i in enumerate(range(0, len(funcs), step)): 59 | with open(f"pipeline/PipelineGen{ind}.cpp", "w") as f: 60 | f.write('#include "pipeline/PipelineGen.hpp"\n') 61 | f.write('namespace engine {\n') 62 | for n, v in funcs[i:i + step]: 63 | f.write(inst(v) + ";\n") 64 | f.write("}\n") 65 | 66 | with open("pipeline/PipelineGen.cpp", "w") as f: 67 | f.write('#include "pipeline/PipelineFunction.hpp"\n') 68 | f.write('namespace engine {\n') 69 | f.write(f'size_t PipelineFunctions::numFunctions = {len(funcs)};\n') 70 | f.write('std::pair PipelineFunctions::functions[] = {\n') 71 | for n, v in funcs: 72 | f.write(f'{{std::string_view{{"{n}"}},&{v}}},\n') 73 | f.write('};\n') 74 | f.write('}\n') 75 | 76 | print("Number of generated functions:", len(funcs)) -------------------------------------------------------------------------------- /engine/infra/QueryMemory.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | //--------------------------------------------------------------------------- 10 | namespace engine::querymemory { 11 | //--------------------------------------------------------------------------- 12 | /// Setup the query memory 13 | void setup(); 14 | //--------------------------------------------------------------------------- 15 | /// Prefault the page memory 16 | bool prefault(); 17 | //--------------------------------------------------------------------------- 18 | /// End a query 19 | void end_query(); 20 | //--------------------------------------------------------------------------- 21 | /// Allocate a page 22 | void* allocate(size_t bytes); 23 | //--------------------------------------------------------------------------- 24 | /// std::allocator like wrapper for querymemory::allocator 25 | template 26 | struct Allocator { 27 | using value_type = T; 28 | using pointer = T*; 29 | using const_pointer = const T*; 30 | using reference = T&; 31 | using const_reference = const T&; 32 | using size_type = std::size_t; 33 | using difference_type = std::ptrdiff_t; 34 | 35 | Allocator() noexcept = default; 36 | template 37 | Allocator(const Allocator&) noexcept {} 38 | 39 | T* allocate(std::size_t n) { 40 | return static_cast(::engine::querymemory::allocate(n * sizeof(T))); 41 | } 42 | 43 | void deallocate(T* p, std::size_t) noexcept { } 44 | 45 | template 46 | void construct(U* p, Args&&... args) { 47 | ::new (static_cast(p)) U(std::forward(args)...); 48 | } 49 | 50 | template 51 | void destroy(U* p) { 52 | p->~U(); 53 | } 54 | 55 | template 56 | struct rebind { 57 | using other = Allocator; 58 | }; 59 | }; 60 | //--------------------------------------------------------------------------- 61 | struct Deleter { 62 | template 63 | void operator()(T* p) const { 64 | if (p) { 65 | p->~T(); 66 | } 67 | } 68 | }; 69 | //--------------------------------------------------------------------------- 70 | } 71 | //--------------------------------------------------------------------------- 72 | namespace engine { 73 | template 74 | using Vector = std::vector>; 75 | template 76 | using UniquePtr = std::unique_ptr; 77 | template 78 | UniquePtr makeUnique(Args&&... args) { 79 | return UniquePtr(new (querymemory::allocate(sizeof(T))) T(std::forward(args)...)); 80 | } 81 | template , typename Equal = std::equal_to> 82 | using UnorderedSet = std::unordered_set>; 83 | template , typename Equal = std::equal_to> 84 | using UnorderedMap = std::unordered_map>>; 85 | } 86 | //--------------------------------------------------------------------------- 87 | -------------------------------------------------------------------------------- /engine/infra/helper/BitOps.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | //--------------------------------------------------------------------------- 5 | namespace engine { 6 | //--------------------------------------------------------------------------- 7 | constexpr auto Nd_ull = std::numeric_limits::digits; 8 | constexpr auto Nd_ul = std::numeric_limits::digits; 9 | constexpr auto Nd_u = std::numeric_limits::digits; 10 | //--------------------------------------------------------------------------- 11 | [[noreturn]] inline void unreachable() 12 | { 13 | // Uses compiler specific extensions if possible. 14 | // Even if no extension is used, undefined behavior is still raised by 15 | // an empty function body and the noreturn attribute. 16 | #if defined(_MSC_VER) && !defined(__clang__) // MSVC 17 | __assume(false); 18 | #else // GCC, Clang 19 | __builtin_unreachable(); 20 | #endif 21 | } 22 | //--------------------------------------------------------------------------- 23 | template 24 | constexpr int popcount(T x) noexcept { 25 | constexpr auto Nd = std::numeric_limits::digits; 26 | 27 | if constexpr (Nd <= Nd_u) 28 | return __builtin_popcount(x); 29 | else if constexpr (Nd <= Nd_ul) 30 | return __builtin_popcountl(x); 31 | else if constexpr (Nd <= Nd_ull) 32 | return __builtin_popcountll(x); 33 | else 34 | unreachable(); 35 | } 36 | //--------------------------------------------------------------------------- 37 | template 38 | constexpr int countr_zero(T x) noexcept { 39 | constexpr auto Nd = std::numeric_limits::digits; 40 | 41 | if (x == 0) 42 | return Nd; 43 | 44 | if constexpr (Nd <= Nd_u) 45 | return __builtin_ctz(x); 46 | else if constexpr (Nd <= Nd_ul) 47 | return __builtin_ctzl(x); 48 | else if constexpr (Nd <= Nd_ull) 49 | return __builtin_ctzll(x); 50 | else 51 | unreachable(); 52 | } 53 | //--------------------------------------------------------------------------- 54 | template 55 | constexpr int countl_zero(T x) noexcept { 56 | constexpr auto Nd = std::numeric_limits::digits; 57 | 58 | if (x == 0) 59 | return Nd; 60 | 61 | if constexpr (Nd <= Nd_u) { 62 | constexpr int diff = Nd_u - Nd; 63 | return __builtin_clz(x) - diff; 64 | } else if constexpr (Nd <= Nd_ul) { 65 | constexpr int diff = Nd_ul - Nd; 66 | return __builtin_clzl(x) - diff; 67 | } else if constexpr (Nd <= Nd_ull) { 68 | constexpr int diff = Nd_ull - Nd; 69 | return __builtin_clzll(x) - diff; 70 | } else { 71 | unreachable(); 72 | } 73 | } 74 | //--------------------------------------------------------------------------- 75 | template 76 | constexpr int has_single_bit(T x) noexcept { 77 | return popcount(x) == 1; 78 | } 79 | //--------------------------------------------------------------------------- 80 | template 81 | constexpr int bit_width(T x) noexcept { 82 | constexpr auto Nd = std::numeric_limits::digits; 83 | return Nd - countl_zero(x); 84 | } 85 | //--------------------------------------------------------------------------- 86 | template 87 | constexpr int countr_one(T x) noexcept { 88 | return countr_zero((T)~x); 89 | } 90 | //--------------------------------------------------------------------------- 91 | } 92 | //--------------------------------------------------------------------------- 93 | -------------------------------------------------------------------------------- /engine/infra/Util.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include 4 | #include 5 | #include 6 | #include 7 | //--------------------------------------------------------------------------- 8 | namespace engine { 9 | //--------------------------------------------------------------------------- 10 | template 11 | [[gnu::always_inline]] inline void unalignedStore(void* ptr, const T& value) noexcept { 12 | static_assert(std::is_trivially_copyable_v); 13 | __builtin_memcpy(ptr, &value, sizeof(T)); 14 | } 15 | //--------------------------------------------------------------------------- 16 | template 17 | [[gnu::always_inline]] inline T unalignedLoad(const void* ptr) noexcept { 18 | static_assert(std::is_trivially_copyable_v); 19 | T value; 20 | __builtin_memcpy(&value, ptr, sizeof(T)); 21 | return value; 22 | } 23 | //--------------------------------------------------------------------------- 24 | namespace detail { 25 | template 26 | struct IsFunT; 27 | 28 | template 29 | struct IsFitFor { 30 | static constexpr bool value = std::is_void_v || std::is_convertible_v; 31 | }; 32 | 33 | template 34 | struct IsFunT { 35 | private: 36 | template 37 | static auto test(int) -> decltype(std::declval()(std::declval()...), std::true_type{}); 38 | 39 | template 40 | static std::false_type test(...); 41 | 42 | public: 43 | static constexpr bool value = decltype(test(0))::value && IsFitFor()(std::declval()...)), R>::value; 44 | }; 45 | } 46 | //--------------------------------------------------------------------------- 47 | template 48 | constexpr bool Fun = detail::IsFunT::value; 49 | //--------------------------------------------------------------------------- 50 | template 51 | class FunctionRef; 52 | template 53 | class FunctionRef { 54 | private: 55 | using Func = R (*)(const void*, Args...); 56 | Func func = nullptr; 57 | const void* obj = nullptr; 58 | 59 | public: 60 | constexpr FunctionRef() noexcept = default; 61 | template >> 62 | constexpr FunctionRef(const F& f) { 63 | obj = &f; 64 | func = [](const void* o, Args... args) -> R { 65 | return (*static_cast(o))(std::forward(args)...); 66 | }; 67 | } 68 | constexpr FunctionRef(R (*f)(Args...)) { 69 | obj = f; 70 | func = [](const void* o, Args... args) -> R { 71 | return static_cast(o)(std::forward(args)...); 72 | }; 73 | } 74 | R operator()(Args... args) const { return func(obj, std::forward(args)...); } 75 | explicit operator bool() const noexcept { return func != nullptr; } 76 | }; 77 | //--------------------------------------------------------------------------- 78 | // cacheline sizes for x64, ARM, and Power8 79 | #if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) 80 | static constexpr size_t hardwareCachelineSize = 64; 81 | #elif defined(__PPC64__) 82 | static constexpr size_t hardwareCachelineSize = 128; 83 | #else 84 | static constexpr size_t hardwareCachelineSize = 64; 85 | #endif 86 | //--------------------------------------------------------------------------- 87 | } 88 | -------------------------------------------------------------------------------- /engine/query/QueryPlan.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include "infra/BitSet.hpp" 4 | #include "infra/QueryMemory.hpp" 5 | #include "infra/SmallVec.hpp" 6 | #include "query/DataSource.hpp" 7 | #include "query/Restriction.hpp" 8 | #include "query/RuntimeValue.hpp" 9 | #include 10 | #include 11 | #include 12 | #include 13 | //--------------------------------------------------------------------------- 14 | namespace engine { 15 | //--------------------------------------------------------------------------- 16 | class TableTarget; 17 | class Hashtable; 18 | class HashtableBuild; 19 | class TableScan; 20 | class RestrictionLogic; 21 | class QueryGraph; 22 | //--------------------------------------------------------------------------- 23 | class QueryPlan { 24 | public: 25 | /// An attribute corresponding to a physical column 26 | struct Attribute; 27 | /// An input. It is either a base table or a hash table built on an attribute 28 | struct Input; 29 | 30 | private: 31 | /// The query graph for optimizing joins 32 | friend class QueryGraph; 33 | /// A pipeline descriptor 34 | struct PlanPipeline; 35 | struct CheapestPipelineFinder; 36 | /// The input data 37 | DataSource* db; 38 | /// All attributes provided by table scans in the query plan 39 | Vector attributes; 40 | /// The output equivalence classes 41 | SmallVec outputEqs; 42 | /// The inputs 43 | SmallVec> inputs; 44 | /// The used inputs that we have to keep alive 45 | SmallVec> graveyard; 46 | /// Sets of attributes within each equivalence class 47 | SmallVec equivalenceSets; 48 | /// Equivalence classes for which we have a constant value 49 | UnorderedMap eqConstants; 50 | /// Equivalence classes for which we have a restriction 51 | UnorderedMap eqRestrictions; 52 | /// The result 53 | ColumnarTable finalResult; 54 | 55 | /// Estimate the cardinality of a table and the selectivities of its restrictions 56 | void estimateCardinality(Input& input); 57 | /// Build a table scan for an input 58 | TableScan buildScan(Input& input, BitSet requiredEqs, double mult); 59 | /// Compute the required equivalence classes outside of a set of relations 60 | BitSet computeRequiredEq(BitSet relations); 61 | /// Eliminate singletons 62 | void eliminateSingletons(); 63 | /// Compute samples 64 | void computeSamples(); 65 | /// Run a pipeline 66 | bool runPipeline(const PlanPipeline& pipeline, double cardinalityEstimate); 67 | /// Print a query plan 68 | void printPlan(Input& root) const; 69 | 70 | public: 71 | /// Constructor 72 | explicit QueryPlan(DataSource& db); 73 | /// Destructor 74 | ~QueryPlan() noexcept; 75 | /// Move constructor 76 | QueryPlan(QueryPlan&&) noexcept; 77 | /// Move assignment 78 | QueryPlan& operator=(QueryPlan&&) noexcept; 79 | 80 | /// Add an input 81 | void addInput(DataSource::Table& table, BitSet attrs); 82 | /// Add an attribute 83 | void addAttribute(unsigned relation, unsigned column, DataType dataType); 84 | /// Prepare query plan after all inputs and attributes have been added 85 | void prepare(SmallVec equivalenceSets); 86 | /// Set the output attribtues 87 | void setOutput(engine::span attrs); 88 | 89 | /// Run the query 90 | ColumnarTable run(); 91 | }; 92 | //--------------------------------------------------------------------------- 93 | } 94 | -------------------------------------------------------------------------------- /include/table.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | struct Table { 12 | public: 13 | Table() = default; 14 | 15 | Table(std::vector> data, std::vector types) 16 | : types_(types) 17 | , data_(data) {} 18 | 19 | static ColumnarTable from_csv(const std::vector& attributes, 20 | const std::filesystem::path& path, 21 | Statement* filter, 22 | bool header = false); 23 | 24 | static Table from_columnar(const ColumnarTable& input); 25 | 26 | ColumnarTable to_columnar() const; 27 | 28 | const std::vector>& table() const { return data_; } 29 | 30 | std::vector>& table() { return data_; } 31 | 32 | const std::vector& types() const { return types_; } 33 | 34 | size_t number_rows() const { return this->data_.size(); } 35 | 36 | size_t number_cols() const { return this->types_.size(); } 37 | 38 | static void print(const std::vector>& data) { 39 | namespace views = ranges::views; 40 | 41 | auto escape_string = [](const std::string& s) { 42 | std::string escaped; 43 | for (char c: s) { 44 | switch (c) { 45 | case '"': escaped += "\\\""; break; 46 | case '\\': escaped += "\\\\"; break; 47 | case '\n': escaped += "\\n"; break; 48 | case '\r': escaped += "\\r"; break; 49 | case '\t': escaped += "\\t"; break; 50 | default: escaped += c; break; 51 | } 52 | } 53 | return escaped; 54 | }; 55 | 56 | for (auto& record: data) { 57 | auto line = record 58 | | views::transform([&escape_string](const Data& field) -> std::string { 59 | return std::visit( 60 | [&escape_string](const auto& arg) { 61 | using T = std::decay_t; 62 | using namespace std::string_literals; 63 | if constexpr (std::is_same_v) { 64 | return "NULL"s; 65 | } else if constexpr (std::is_same_v 66 | || std::is_same_v 67 | || std::is_same_v) { 68 | return fmt::format("{}", arg); 69 | } else if constexpr (std::is_same_v) { 70 | return fmt::format("\"{}\"", escape_string(arg)); 71 | // return fmt::format("{}", arg); 72 | } 73 | }, 74 | field); 75 | }) 76 | | views::join('|') | ranges::to(); 77 | fmt::println("{}", line); 78 | } 79 | } 80 | 81 | private: 82 | std::vector types_; 83 | std::vector> data_; 84 | 85 | void set_attributes(const std::vector& attributes) { 86 | this->types_.clear(); 87 | for (auto& attr: attributes) { 88 | this->types_.push_back(attr.type); 89 | } 90 | } 91 | }; 92 | -------------------------------------------------------------------------------- /engine/tools/DuckDB.cpp: -------------------------------------------------------------------------------- 1 | #include "tools/DuckDB.hpp" 2 | #ifndef NO_DUCK 3 | #include 4 | #endif 5 | //--------------------------------------------------------------------------- 6 | namespace engine { 7 | //--------------------------------------------------------------------------- 8 | struct DuckDB::Impl { 9 | #ifndef NO_DUCK 10 | duckdb::DBConfig config; 11 | duckdb::DuckDB db; 12 | duckdb::Connection conn; 13 | 14 | Impl() : config(true), db("imdb.db", &config), conn(db) { 15 | conn.Query("SET memory_limit = '20GB';"); 16 | conn.Query("SET temp_directory = '';"); 17 | } 18 | #endif 19 | }; 20 | //--------------------------------------------------------------------------- 21 | #ifndef NO_DUCK 22 | static DataType mapType(const duckdb::LogicalType& lhs) { 23 | using namespace duckdb; 24 | switch (lhs.id()) { 25 | case LogicalTypeId::INTEGER: return DataType::INT32; 26 | case LogicalTypeId::BIGINT: return DataType::INT64; 27 | case LogicalTypeId::DOUBLE: return DataType::FP64; 28 | case LogicalTypeId::VARCHAR: return DataType::VARCHAR; 29 | default: 30 | throw std::runtime_error("in DuckDB is not supported"); 31 | } 32 | } 33 | #endif 34 | //--------------------------------------------------------------------------- 35 | DuckDB::DuckDB() : impl(std::make_unique()) {} 36 | DuckDB::~DuckDB() noexcept = default; 37 | //--------------------------------------------------------------------------- 38 | ColumnarTable DuckDB::execute(std::string query) { 39 | #ifndef NO_DUCK 40 | auto results = impl->conn.SendQuery(query); 41 | auto& duckdb_results = *results; 42 | auto num_cols = duckdb_results.ColumnCount(); 43 | 44 | std::vector cols; 45 | 46 | for (size_t i = 0; i < num_cols; ++i) { 47 | cols.push_back(mapType(duckdb_results.types[i])); 48 | } 49 | 50 | std::vector> duckdb_table; 51 | size_t rowCount = 0; 52 | for (auto& row : *results) { 53 | if (rowCount > 50'000'000ull) { 54 | throw std::runtime_error("Too many rows in result"); 55 | } 56 | rowCount++; 57 | std::vector record; 58 | for (size_t col_idx = 0; col_idx < num_cols; col_idx++) { 59 | auto val = row.iterator.chunk->GetValue(col_idx, row.row); 60 | if (val.IsNull()) { 61 | record.emplace_back(std::monostate{}); 62 | } else { 63 | switch (cols[col_idx]) { 64 | case DataType::INT32: { 65 | record.emplace_back(duckdb::IntegerValue::Get(val)); 66 | break; 67 | } 68 | case DataType::INT64: { 69 | record.emplace_back(duckdb::BigIntValue::Get(val)); 70 | break; 71 | } 72 | case DataType::FP64: { 73 | record.emplace_back(duckdb::FloatValue::Get(val)); 74 | break; 75 | } 76 | case DataType::VARCHAR: { 77 | record.emplace_back(duckdb::StringValue::Get(val)); 78 | break; 79 | } 80 | } 81 | } 82 | } 83 | duckdb_table.emplace_back(std::move(record)); 84 | } 85 | sort(duckdb_table.begin(), duckdb_table.end()); 86 | Table tbl(std::move(duckdb_table), std::move(cols)); 87 | return tbl.to_columnar(); 88 | #else 89 | throw std::runtime_error("Built with NO_DUCK but trying to run a DuckDB query!"); 90 | #endif 91 | } 92 | //--------------------------------------------------------------------------- 93 | } 94 | //--------------------------------------------------------------------------- -------------------------------------------------------------------------------- /include/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | namespace detail { 11 | inline uint32_t rotl32(uint32_t x, uint8_t bits) { 12 | return (x << bits) | (x >> (32 - bits)); 13 | } 14 | 15 | inline void hash_combine_impl(uint32_t& h1, uint32_t k1) { 16 | constexpr uint32_t c1 = 0xcc9e2d51u; 17 | constexpr uint32_t c2 = 0x1b873593u; 18 | 19 | k1 *= c1; 20 | k1 = rotl32(k1, 15); 21 | k1 *= c2; 22 | 23 | h1 ^= k1; 24 | h1 = rotl32(h1, 13); 25 | h1 = h1 * 5u + 0xe6546b64u; 26 | } 27 | 28 | inline void hash_combine_impl(uint64_t& h, uint64_t k) { 29 | constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995); 30 | constexpr int r = 47; 31 | 32 | k *= m; 33 | k ^= k >> r; 34 | k *= m; 35 | 36 | h ^= k; 37 | h *= m; 38 | h += 0xe6546b64; 39 | } 40 | } // namespace detail 41 | 42 | inline void hash_combine(std::size_t& seed, std::size_t k) { 43 | if constexpr (sizeof(std::size_t) == 4) { 44 | uint32_t h = static_cast(seed); 45 | detail::hash_combine_impl(h, static_cast(k)); 46 | seed = h; 47 | } else if constexpr (sizeof(std::size_t) == 8) { 48 | uint64_t h = static_cast(seed); 49 | detail::hash_combine_impl(h, static_cast(k)); 50 | seed = h; 51 | } else { 52 | static_assert(sizeof(std::size_t) == 4 || sizeof(std::size_t) == 8, 53 | "Unsupported size_t size for hash_combine"); 54 | } 55 | } 56 | 57 | class File { 58 | public: 59 | File(const std::filesystem::path& path, const char* mode) 60 | : handle(std::fopen(path.string().c_str(), mode)) { 61 | if (!handle) { 62 | throw std::runtime_error("Failed to open file: " + path.string()); 63 | } 64 | } 65 | 66 | operator FILE*() const noexcept { return handle; } 67 | 68 | File(File&& other) noexcept 69 | : handle(other.handle) { 70 | other.handle = nullptr; 71 | } 72 | 73 | File& operator=(File&& other) noexcept { 74 | if (this != &other) { 75 | close(); 76 | handle = other.handle; 77 | other.handle = nullptr; 78 | } 79 | return *this; 80 | } 81 | 82 | File(const File&) = delete; 83 | File& operator=(const File&) = delete; 84 | 85 | ~File() { close(); } 86 | 87 | private: 88 | FILE* handle = nullptr; 89 | 90 | void close() noexcept { 91 | if (handle) { 92 | std::fclose(handle); 93 | handle = nullptr; 94 | } 95 | } 96 | }; 97 | 98 | inline std::string read_file(const std::filesystem::path& path) { 99 | File f(path, "rb"); 100 | ::fseek(f, 0, SEEK_END); 101 | auto size = ::ftell(f); 102 | ::fseek(f, 0, SEEK_SET); 103 | std::string result; 104 | result.resize(size); 105 | std::ignore = ::fread(result.data(), 1, size, f); 106 | return result; 107 | } 108 | 109 | struct DSU { 110 | std::vector pa; 111 | 112 | explicit DSU(size_t size) 113 | : pa(size) { 114 | std::iota(pa.begin(), pa.end(), 0); 115 | } 116 | 117 | size_t find(size_t x) { return pa[x] == x ? x : pa[x] = find(pa[x]); } 118 | 119 | void unite(size_t x, size_t y) { pa[find(x)] = find(y); } 120 | }; 121 | 122 | [[noreturn]] inline void unreachable() 123 | { 124 | // Uses compiler specific extensions if possible. 125 | // Even if no extension is used, undefined behavior is still raised by 126 | // an empty function body and the noreturn attribute. 127 | #if defined(_MSC_VER) && !defined(__clang__) // MSVC 128 | __assume(false); 129 | #else // GCC, Clang 130 | __builtin_unreachable(); 131 | #endif 132 | } -------------------------------------------------------------------------------- /engine/tools/Setting.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //--------------------------------------------------------------------------- 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | //--------------------------------------------------------------------------- 9 | namespace engine { 10 | //--------------------------------------------------------------------------- 11 | namespace setting { 12 | struct Bool { 13 | bool defaultValue; 14 | 15 | constexpr Bool(bool defaultValue = false) noexcept : defaultValue(defaultValue) {} 16 | 17 | bool operator()(std::string_view name, std::string_view value) const; 18 | 19 | constexpr auto getDefault() const noexcept { 20 | return defaultValue; 21 | } 22 | }; 23 | struct Size { 24 | size_t defaultValue; 25 | size_t minValue; 26 | size_t maxValue; 27 | 28 | constexpr Size(size_t defaultValue = 0, size_t minValue = std::numeric_limits::min(), size_t maxValue = std::numeric_limits::max()) noexcept : defaultValue(defaultValue), minValue(minValue), maxValue(maxValue) {} 29 | 30 | size_t operator()(std::string_view name, std::string_view value) const; 31 | 32 | constexpr auto getDefault() const noexcept { 33 | return defaultValue; 34 | } 35 | }; 36 | } 37 | //--------------------------------------------------------------------------- 38 | #ifdef SIGMOD_LOCAL 39 | /// Class for runtime settings using environment variables 40 | class SettingBase { 41 | /// The name of the setting 42 | std::string name; 43 | /// The cached value of the setting 44 | mutable std::string cached; 45 | /// Whether initialized 46 | mutable std::atomic initialized{0}; 47 | 48 | /// Ensure the value is ready 49 | void ensureReadyImpl() const; 50 | 51 | protected: 52 | /// Compute the value 53 | virtual void computeImpl(std::string_view str) const = 0; 54 | 55 | /// Ensure the value is ready 56 | void ensureReady() const { 57 | if (initialized.load() == 1) [[likely]] 58 | return; 59 | return ensureReadyImpl(); 60 | } 61 | 62 | public: 63 | /// Constructor 64 | explicit SettingBase(std::string name); 65 | 66 | /// Get as string 67 | std::string_view getAsString() const; 68 | /// Get the name 69 | std::string_view getName() const { 70 | return name; 71 | } 72 | /// Set the value 73 | void set(std::string value); 74 | }; 75 | //--------------------------------------------------------------------------- 76 | /// Class for runtime settings using environment variables 77 | template 78 | class Setting : public SettingBase, Parser { 79 | /// The cached value 80 | mutable decltype(std::declval()(std::declval(), std::declval())) cached; 81 | 82 | /// Compute the value 83 | void computeImpl(std::string_view str) const override { 84 | cached = Parser::operator()(getName(), str); 85 | } 86 | 87 | public: 88 | /// Constructor 89 | Setting(std::string name, Parser parser) : SettingBase(std::move(name)), Parser(parser) { 90 | ensureReady(); 91 | } 92 | 93 | /// Get the value 94 | auto get() const { 95 | ensureReady(); 96 | return cached; 97 | } 98 | }; 99 | #else 100 | /// Class for runtime settings using environment variables 101 | template 102 | class Setting : public Parser { 103 | public: 104 | /// Constructor 105 | constexpr Setting(std::string_view, Parser parser) noexcept : Parser(parser) {} 106 | 107 | /// Get the value 108 | constexpr auto get() const noexcept { 109 | return Parser::getDefault(); 110 | } 111 | }; 112 | #endif 113 | //--------------------------------------------------------------------------- 114 | } 115 | --------------------------------------------------------------------------------