├── src
    └── execute.cpp
├── engine
    ├── Config.hpp
    ├── pipeline
    │   ├── JoinPipeline.cpp
    │   ├── PipelineFunction.cpp
    │   ├── PipelineGen.hpp
    │   └── PipelineFunction.hpp
    ├── op
    │   ├── OpBase.cpp
    │   ├── ScanBase.cpp
    │   ├── OpBase.hpp
    │   ├── TargetBase.hpp
    │   ├── ScanBase.hpp
    │   ├── CollectorTarget.hpp
    │   └── CollectorTarget.cpp
    ├── infra
    │   ├── helper
    │   │   ├── Misc.hpp
    │   │   ├── Span.hpp
    │   │   └── BitOps.hpp
    │   ├── PageMemory.hpp
    │   ├── JoinFilter.hpp
    │   ├── Random.hpp
    │   ├── UnionFind.hpp
    │   ├── Mmap.hpp
    │   ├── AdressSanitizer.hpp
    │   ├── Mmap.cpp
    │   ├── Scheduler.hpp
    │   ├── QueryMemory.hpp
    │   └── Util.hpp
    ├── Execute.hpp
    ├── tools
    │   ├── DuckDB.hpp
    │   ├── SQL.hpp
    │   ├── JoinPipelineLoader.hpp
    │   ├── ParsedSQL.hpp
    │   ├── DuckDB.cpp
    │   └── Setting.hpp
    ├── query
    │   ├── Restriction.cpp
    │   ├── RuntimeValue.hpp
    │   ├── PlanImport.hpp
    │   ├── Restriction.hpp
    │   ├── QueryGraph.hpp
    │   ├── DPccp.hpp
    │   └── QueryPlan.hpp
    ├── storage
    │   ├── CopyLogic.hpp
    │   ├── RestrictionLogic.hpp
    │   └── BitLogic.hpp
    ├── Execute.cpp
    ├── test
    │   └── unit_tests3.cpp
    └── genpipelines.py
├── .clangd
├── listincludes.sh
├── job
    ├── 3b.sql
    ├── 2a.sql
    ├── 2b.sql
    ├── 2c.sql
    ├── 2d.sql
    ├── 3a.sql
    ├── 32a.sql
    ├── 3c.sql
    ├── 32b.sql
    ├── 4a.sql
    ├── 4b.sql
    ├── 4c.sql
    ├── 6a.sql
    ├── 6c.sql
    ├── 6e.sql
    ├── 8c.sql
    ├── 5b.sql
    ├── 6f.sql
    ├── 17f.sql
    ├── 17d.sql
    ├── 17e.sql
    ├── 8d.sql
    ├── 10c.sql
    ├── 17b.sql
    ├── 17c.sql
    ├── 6b.sql
    ├── 6d.sql
    ├── 1d.sql
    ├── 5a.sql
    ├── 5c.sql
    ├── 10b.sql
    ├── 17a.sql
    ├── 1b.sql
    ├── 10a.sql
    ├── 1a.sql
    ├── 1c.sql
    ├── 16b.sql
    ├── 16c.sql
    ├── 8a.sql
    ├── 16a.sql
    ├── 16d.sql
    ├── 18a.sql
    ├── 9d.sql
    ├── 7b.sql
    ├── 13d.sql
    ├── 13a.sql
    ├── 18c.sql
    ├── 9c.sql
    ├── 11d.sql
    ├── 15d.sql
    ├── 7a.sql
    ├── 11a.sql
    ├── 11b.sql
    ├── 8b.sql
    ├── 9b.sql
    ├── 12a.sql
    ├── 12b.sql
    ├── 12c.sql
    ├── 18b.sql
    ├── 13b.sql
    ├── 13c.sql
    ├── 9a.sql
    ├── 11c.sql
    ├── 14a.sql
    ├── 15a.sql
    ├── 19d.sql
    ├── 15c.sql
    ├── 7c.sql
    ├── 14c.sql
    ├── 15b.sql
    ├── 14b.sql
    ├── 20a.sql
    ├── 20c.sql
    ├── 21b.sql
    ├── 20b.sql
    ├── 19c.sql
    ├── 25a.sql
    ├── 21a.sql
    ├── 23a.sql
    ├── 19b.sql
    ├── 21c.sql
    ├── 23b.sql
    ├── 25b.sql
    ├── 25c.sql
    ├── 23c.sql
    ├── 19a.sql
    ├── 26b.sql
    ├── 22a.sql
    ├── 22b.sql
    ├── 22d.sql
    ├── 26c.sql
    ├── 22c.sql
    ├── 26a.sql
    ├── 24a.sql
    ├── 31a.sql
    ├── 31c.sql
    ├── 27b.sql
    ├── 27a.sql
    ├── 33b.sql
    ├── 24b.sql
    ├── 27c.sql
    ├── 33a.sql
    ├── 30c.sql
    ├── 30a.sql
    ├── 33c.sql
    ├── 31b.sql
    ├── 30b.sql
    ├── 28b.sql
    ├── 28c.sql
    ├── 28a.sql
    ├── fkindexes.sql
    ├── 29b.sql
    ├── 29c.sql
    ├── 29a.sql
    └── README
├── .gitignore
├── checknolib.sh
├── checkglobals.sh
├── duckdbrunner.py
├── download_imdb.sh
├── ANNOUNCEMENTS.md
├── include
    ├── csv_parser.h
    ├── table_entity.h
    ├── hardware__ca09.h
    ├── hardware__sidon.h
    ├── hardware__koroneia.h
    ├── hardware__cp02.h
    ├── attribute.h
    ├── table.h
    └── common.h
└── tests
    └── build_database.cpp


/src/execute.cpp:
--------------------------------------------------------------------------------
1 | 
2 | // Intentionally left blank


--------------------------------------------------------------------------------
/engine/Config.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | namespace engine::config {
3 | constexpr bool handleMultiplicity = true;
4 | }


--------------------------------------------------------------------------------
/.clangd:
--------------------------------------------------------------------------------
1 | CompileFlags:
2 |     Remove:
3 |         - -fmodules-ts
4 |         - -fmodule-mapper=*
5 |         - -fdeps-format=p1689r5


--------------------------------------------------------------------------------
/listincludes.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | # check for files containing fmt
4 | find engine/ -type f ! -path "*/all.cpp" ! -path "*/tools/*" ! -path "*/test/*" -exec grep "#include" {} \; | sort | uniq
5 | 


--------------------------------------------------------------------------------
/job/3b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword  like '%sequel%' AND mi.info  IN ('Bulgaria') AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/job/2a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[de]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/2b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[nl]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/2c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[sm]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/2d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/3a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword  like '%sequel%' AND mi.info  IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year > 2005 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/job/32a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(lt.link) AS link_type, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM keyword AS k, link_type AS lt, movie_keyword AS mk, movie_link AS ml, title AS t1, title AS t2 WHERE k.keyword ='10,000-mile-club' AND mk.keyword_id = k.id AND t1.id = mk.movie_id AND ml.movie_id = t1.id AND ml.linked_movie_id = t2.id AND lt.id = ml.link_type_id AND mk.movie_id = t1.id;
2 | 


--------------------------------------------------------------------------------
/job/3c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword  like '%sequel%' AND mi.info  IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND t.production_year > 1990 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/job/32b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(lt.link) AS link_type, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM keyword AS k, link_type AS lt, movie_keyword AS mk, movie_link AS ml, title AS t1, title AS t2 WHERE k.keyword ='character-name-in-title' AND mk.keyword_id = k.id AND t1.id = mk.movie_id AND ml.movie_id = t1.id AND ml.linked_movie_id = t2.id AND lt.id = ml.link_type_id AND mk.movie_id = t1.id;
2 | 


--------------------------------------------------------------------------------
/job/4a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword  like '%sequel%' AND mi_idx.info  > '5.0' AND t.production_year > 2005 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/4b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword  like '%sequel%' AND mi_idx.info  > '9.0' AND t.production_year > 2010 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/4c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword  like '%sequel%' AND mi_idx.info  > '2.0' AND t.production_year > 1990 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/6a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2010 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/6c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2014 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/6e.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/engine/pipeline/JoinPipeline.cpp:
--------------------------------------------------------------------------------
1 | #include "pipeline/JoinPipeline.hpp"
2 | #include "infra/Scheduler.hpp"
3 | //---------------------------------------------------------------------------
4 | namespace engine {
5 | //---------------------------------------------------------------------------
6 | //---------------------------------------------------------------------------
7 | }
8 | //---------------------------------------------------------------------------
9 | 


--------------------------------------------------------------------------------
/job/8c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(a1.name) AS writer_pseudo_name, MIN(t.title) AS movie_title FROM aka_name AS a1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE cn.country_code ='[us]' AND rt.role ='writer' AND a1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND a1.person_id = ci.person_id AND ci.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/5b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS american_vhs_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind  = 'production companies' AND mc.note  like '%(VHS)%' and mc.note like '%(USA)%' and mc.note like '%(1994)%' AND mi.info  IN ('USA', 'America') AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/6f.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/engine/op/OpBase.cpp:
--------------------------------------------------------------------------------
 1 | #include "op/OpBase.hpp"
 2 | //---------------------------------------------------------------------------
 3 | namespace engine {
 4 | //---------------------------------------------------------------------------
 5 | std::string OpBase::getPretty() const {
 6 |     return {};
 7 | }
 8 | //---------------------------------------------------------------------------
 9 | }
10 | //---------------------------------------------------------------------------


--------------------------------------------------------------------------------
/job/17f.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name  LIKE '%B%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/engine/infra/helper/Misc.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | namespace engine {
 4 | //---------------------------------------------------------------------------
 5 | template <typename T>
 6 | struct type_identity {
 7 |     using type = T;
 8 | };
 9 | //---------------------------------------------------------------------------
10 | }
11 | //---------------------------------------------------------------------------


--------------------------------------------------------------------------------
/job/17d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name  LIKE '%Bert%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/17e.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/8d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an1.name) AS costume_designer_pseudo, MIN(t.title) AS movie_with_costumes FROM aka_name AS an1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE cn.country_code ='[us]' AND rt.role ='costume designer' AND an1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an1.person_id = ci.person_id AND ci.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/10c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS character, MIN(t.title) AS movie_with_american_producer FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note  like '%(producer)%' AND cn.country_code  = '[us]' AND t.production_year > 1990 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id;
2 | 


--------------------------------------------------------------------------------
/job/17b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name  LIKE 'Z%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/17c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name  LIKE 'X%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/6b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2014 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/6d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /.vscode
 2 | /build
 3 | /imdb
 4 | /output*.txt
 5 | /*.tgz
 6 | /*.db
 7 | /TPC-H*
 8 | /job-sample
 9 | /.cache
10 | BENCHMARK_RUNTIME.txt
11 | perf.data*
12 | perfetto.trace
13 | /.cache.db
14 | /.cache.db.zst
15 | /cmake-*
16 | /.idea
17 | /record.csv
18 | /results/
19 | /xray.trace
20 | /build_xray
21 | /build_perfetto
22 | /queries
23 | /querygen.log
24 | /errors.log
25 | /.venv
26 | /.cache.db*
27 | scaling_bench.json
28 | scaling_bench.pdf
29 | /schema-domain.json
30 | /schema-domain.json.gz


--------------------------------------------------------------------------------
/job/1d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'bottom 10 rank' AND mc.note  not like '%(as Metro-Goldwyn-Mayer Pictures)%' AND t.production_year >2000 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/5a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS typical_european_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind  = 'production companies' AND mc.note  like '%(theatrical)%' and mc.note like '%(France)%' AND mi.info  IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year > 2005 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/5c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS american_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind  = 'production companies' AND mc.note  not like '%(TV)%' and mc.note like '%(USA)%' AND mi.info  IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND t.production_year > 1990 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/10b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS character, MIN(t.title) AS russian_mov_with_actor_producer FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note  like '%(producer)%' AND cn.country_code  = '[ru]' AND rt.role  = 'actor' AND t.production_year > 2010 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id;
2 | 


--------------------------------------------------------------------------------
/engine/op/ScanBase.cpp:
--------------------------------------------------------------------------------
 1 | #include "op/ScanBase.hpp"
 2 | #include "infra/Scheduler.hpp"
 3 | //---------------------------------------------------------------------------
 4 | namespace engine {
 5 | //---------------------------------------------------------------------------
 6 | size_t ScanBase::concurrency() const {
 7 |     return Scheduler::concurrency();
 8 | }
 9 | //---------------------------------------------------------------------------
10 | }
11 | //---------------------------------------------------------------------------


--------------------------------------------------------------------------------
/job/17a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_american_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND n.name  LIKE 'B%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/1b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'bottom 10 rank' AND mc.note  not like '%(as Metro-Goldwyn-Mayer Pictures)%' AND t.production_year between 2005 and 2010 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/10a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS uncredited_voiced_character, MIN(t.title) AS russian_movie FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note  like '%(voice)%' and ci.note like '%(uncredited)%' AND cn.country_code  = '[ru]' AND rt.role  = 'actor' AND t.production_year > 2005 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id;
2 | 


--------------------------------------------------------------------------------
/job/1a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'top 250 rank' AND mc.note  not like '%(as Metro-Goldwyn-Mayer Pictures)%' and (mc.note like '%(co-production)%' or mc.note like '%(presents)%') AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/1c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'top 250 rank' AND mc.note  not like '%(as Metro-Goldwyn-Mayer Pictures)%' and (mc.note like '%(co-production)%') AND t.production_year >2010 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/16b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/engine/op/OpBase.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include <cassert>
 4 | #include <string>
 5 | //---------------------------------------------------------------------------
 6 | namespace engine {
 7 | //---------------------------------------------------------------------------
 8 | /// Base class for all ops
 9 | class OpBase {
10 |     public:
11 |     /// Get the pretty name
12 |     virtual std::string getPretty() const;
13 | };
14 | //---------------------------------------------------------------------------
15 | }
16 | 


--------------------------------------------------------------------------------
/job/16c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/8a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an1.name) AS actress_pseudonym, MIN(t.title) AS japanese_movie_dubbed FROM aka_name AS an1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE ci.note ='(voice: English version)' AND cn.country_code ='[jp]' AND mc.note like '%(Japan)%' and mc.note not like '%(USA)%' AND n1.name like '%Yo%' and n1.name not like '%Yu%' AND rt.role ='actress' AND an1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an1.person_id = ci.person_id AND ci.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/16a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr >= 50 AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/16d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr >= 5 AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/18a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note  in ('(producer)', '(executive producer)') AND it1.info  = 'budget' AND it2.info  = 'votes' AND n.gender  = 'm' and n.name like '%Tim%' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/engine/Execute.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include <plan.h>
 4 | //---------------------------------------------------------------------------
 5 | namespace engine {
 6 | //---------------------------------------------------------------------------
 7 | class QueryPlan;
 8 | //---------------------------------------------------------------------------
 9 | ColumnarTable execute(QueryPlan plan, [[maybe_unused]] void* context);
10 | //---------------------------------------------------------------------------
11 | }
12 | //---------------------------------------------------------------------------


--------------------------------------------------------------------------------
/job/9d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND n.gender ='f' AND rt.role ='actress' AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/7b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS of_person, MIN(t.title) AS biography_movie FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name LIKE '%a%' AND it.info ='mini biography' AND lt.link ='features' AND n.name_pcode_cf LIKE 'D%' AND n.gender='m' AND pi.note ='Volker Boehm' AND t.production_year BETWEEN 1980 AND 1984 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id;
2 | 


--------------------------------------------------------------------------------
/job/13d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/13a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS release_date, MIN(miidx.info) AS rating, MIN(t.title) AS german_movie FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[de]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/18c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'votes' AND mi.info  in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender  = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/9c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_character_name, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/engine/tools/DuckDB.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include <string>
 4 | #include <table.h>
 5 | //---------------------------------------------------------------------------
 6 | namespace engine {
 7 | //---------------------------------------------------------------------------
 8 | class DuckDB {
 9 |     struct Impl;
10 |     std::unique_ptr<Impl> impl;
11 | 
12 |     public:
13 |     DuckDB();
14 |     ~DuckDB() noexcept;
15 | 
16 |     ColumnarTable execute(std::string query);
17 | };
18 | //---------------------------------------------------------------------------
19 | }
20 | //---------------------------------------------------------------------------


--------------------------------------------------------------------------------
/job/11d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS from_company, MIN(mc.note) AS production_note, MIN(t.title) AS movie_based_on_book FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code  !='[pl]' AND ct.kind  != 'production companies' and ct.kind is not NULL AND k.keyword  in ('sequel', 'revenge', 'based-on-novel') AND mc.note  is not NULL AND t.production_year  > 1950 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/15d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(at.title) AS aka_title, MIN(t.title) AS internet_movie_title FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code  = '[us]' AND it1.info  = 'release dates' AND mi.note  like '%internet%' AND t.production_year  > 1990 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id;
2 | 


--------------------------------------------------------------------------------
/job/7a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS of_person, MIN(t.title) AS biography_movie FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name LIKE '%a%' AND it.info ='mini biography' AND lt.link ='features' AND n.name_pcode_cf BETWEEN 'A' AND 'F' AND (n.gender='m' OR (n.gender = 'f' AND n.name LIKE 'B%')) AND pi.note ='Volker Boehm' AND t.production_year BETWEEN 1980 AND 1995 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id;
2 | 


--------------------------------------------------------------------------------
/job/11a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS from_company, MIN(lt.link) AS movie_link_type, MIN(t.title) AS non_polish_sequel_movie FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/11b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS from_company, MIN(lt.link) AS movie_link_type, MIN(t.title) AS sequel_movie FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follows%' AND mc.note IS NULL AND t.production_year  = 1998 and t.title like '%Money%' AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/8b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS acress_pseudonym, MIN(t.title) AS japanese_anime_movie FROM aka_name AS an, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note ='(voice: English version)' AND cn.country_code ='[jp]' AND mc.note like '%(Japan)%' and mc.note not like '%(USA)%' and (mc.note like '%(2006)%' or mc.note like '%(2007)%') AND n.name like '%Yo%' and n.name not like '%Yu%' AND rt.role ='actress' AND t.production_year between 2006 and 2007 and (t.title like 'One Piece%' or t.title like 'Dragon Ball Z%') AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/9b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_character, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note  = '(voice)' AND cn.country_code ='[us]' AND mc.note  like '%(200%)%' and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND n.gender ='f' and n.name like '%Angel%' AND rt.role ='actress' AND t.production_year  between 2007 and 2010 AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/12a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS drama_horror_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code  = '[us]' AND ct.kind  = 'production companies' AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info  in ('Drama', 'Horror') AND mi_idx.info  > '8.0' AND t.production_year  between 2005 and 2008 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/12b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS budget, MIN(t.title) AS unsuccsessful_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code ='[us]' AND ct.kind  is not NULL and (ct.kind ='production companies' or ct.kind = 'distributors') AND it1.info ='budget' AND it2.info ='bottom 10 rank' AND t.production_year >2000 AND (t.title LIKE 'Birdemic%' OR t.title LIKE '%Movie%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/12c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS mainstream_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code  = '[us]' AND ct.kind  = 'production companies' AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info  in ('Drama', 'Horror', 'Western', 'Family') AND mi_idx.info  > '7.0' AND t.production_year  between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/18b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'rating' AND mi.info  in ('Horror', 'Thriller') and mi.note is NULL AND mi_idx.info  > '8.0' AND n.gender  is not null and n.gender = 'f' AND t.production_year  between 2008 and 2014 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/13b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie_about_winning FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND t.title  != '' AND (t.title LIKE '%Champion%' OR t.title LIKE '%Loser%') AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/13c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie_about_winning FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND t.title  != '' AND (t.title LIKE 'Champion%' OR t.title LIKE 'Loser%') AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/9a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS character_name, MIN(t.title) AS movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND mc.note  is not NULL and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND n.gender ='f' and n.name like '%Ang%' AND rt.role ='actress' AND t.production_year  between 2005 and 2015 AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/11c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS from_company, MIN(mc.note) AS production_note, MIN(t.title) AS movie_based_on_book FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code  !='[pl]' and (cn.name like '20th Century Fox%' or cn.name like 'Twentieth Century Fox%') AND ct.kind  != 'production companies' and ct.kind is not NULL AND k.keyword  in ('sequel', 'revenge', 'based-on-novel') AND mc.note  is not NULL AND t.production_year  > 1950 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/checknolib.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # check for files containing fmt
 4 | find engine/ -type f ! -path "*/all.cpp" ! -path "*/tools/*" ! -path "*/test/*" -exec grep "fmt" {} \;
 5 | 
 6 | # check for files containing ranges
 7 | find engine/ -type f ! -path "*/all.cpp" ! -path "*/tools/*" ! -path "*/test/*" -exec grep "ranges" {} \;
 8 | 
 9 | # check for files containing Setting
10 | find engine/ -type f ! -path "*/all.cpp" ! -path "*/tools/*" ! -path "*/test/*" -exec grep "Setting" {} \;
11 | 
12 | # check for files containing PerfEvent
13 | find engine/ -type f ! -path "*/all.cpp" ! -path "*/tools/*" ! -path "*/test/*" -exec grep "PerfEvent" {} \;
14 | 
15 | # check for files containing Perfetto
16 | find engine/ -type f ! -path "*/all.cpp" ! -path "*/tools/*" ! -path "*/test/*" -exec grep "Perfetto" {} \;
17 | 


--------------------------------------------------------------------------------
/job/14a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS northern_dark_movie FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  = 'movie' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info  < '8.5' AND t.production_year  > 2010 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/15a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS internet_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code  = '[us]' AND it1.info  = 'release dates' AND mc.note  like '%(200%)%' and mc.note like '%(worldwide)%' AND mi.note  like '%internet%' AND mi.info  like 'USA:% 200%' AND t.production_year  > 2000 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id;
2 | 


--------------------------------------------------------------------------------
/job/19d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS jap_engl_voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info  = 'release dates' AND n.gender ='f' AND rt.role ='actress' AND t.production_year  > 2000 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id;
2 | 


--------------------------------------------------------------------------------
/engine/query/Restriction.cpp:
--------------------------------------------------------------------------------
 1 | #include "query/Restriction.hpp"
 2 | #include "op/Hashtable.hpp"
 3 | //---------------------------------------------------------------------------
 4 | namespace engine {
 5 | //---------------------------------------------------------------------------
 6 | bool Restriction::operator()(uint64_t val) const noexcept {
 7 |     switch (type) {
 8 |         case Eq:
 9 |             assert(cst.value != nullValue);
10 |             return val == cst.value;
11 |         case NotNull: return val != nullValue;
12 |         case Join: return (val != nullValue) && joinFilter->joinFilter(val);
13 |         case JoinPrecise: return (val != nullValue) && joinFilter->joinFilterPrecise(val);
14 |     }
15 |     __builtin_unreachable();
16 | }
17 | //---------------------------------------------------------------------------
18 | }


--------------------------------------------------------------------------------
/job/15c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS modern_american_internet_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code  = '[us]' AND it1.info  = 'release dates' AND mi.note  like '%internet%' AND mi.info  is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year  > 1990 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id;
2 | 


--------------------------------------------------------------------------------
/engine/pipeline/PipelineFunction.cpp:
--------------------------------------------------------------------------------
 1 | #include "pipeline/PipelineFunction.hpp"
 2 | #include <algorithm>
 3 | #include <stdexcept>
 4 | //---------------------------------------------------------------------------
 5 | namespace engine {
 6 | //---------------------------------------------------------------------------
 7 | PipelineFunction PipelineFunctions::lookupPipeline(std::string_view name) {
 8 |     // Binary search in functions
 9 |     auto it = std::lower_bound(functions, functions + numFunctions, name, [](const auto& f, std::string_view name) {
10 |         return f.first < name;
11 |     });
12 |     if (it == functions + numFunctions || it->first != name)
13 |         throw std::runtime_error("Pipeline not found");
14 |     return it->second;
15 | }
16 | //---------------------------------------------------------------------------
17 | }
18 | 


--------------------------------------------------------------------------------
/checkglobals.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 1 ]; then
 4 |     echo "Usage: $0 <binary>"
 5 |     exit 1
 6 | fi
 7 | 
 8 | BINARY=$1
 9 | 
10 | echo "Extracting global constructors from: $BINARY"
11 | 
12 | # Get the addresses from .init_array (two addresses per line)
13 | # Get the addresses from .init_array
14 | ADDRESSES=$(objdump -s -j .init_array "$BINARY" | awk 'NR>3 {print $2 $3; print $4 $5}' | sed 's/\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)/0x\8\7\6\5\4\3\2\1/')
15 | if [ -z "$ADDRESSES" ]; then
16 |     echo "No global constructors found."
17 |     exit 1
18 | fi
19 | 
20 | echo $ADDRESSES
21 | 
22 | echo "Found constructor addresses:"
23 | for ADDR in $ADDRESSES; do
24 |     SYMBOL=$(objdump -S --start-address=$ADDR "$BINARY" 2>/dev/null | head -n 10 | grep -Eo "<[^>]+>" | head -n 1)
25 |     echo "$ADDR -> ${SYMBOL:-<unknown>}"
26 | done
27 | 


--------------------------------------------------------------------------------
/job/7c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS cast_member_name, MIN(pi.info) AS cast_member_info FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name  is not NULL and (an.name LIKE '%a%' or an.name LIKE 'A%') AND it.info ='mini biography' AND lt.link  in ('references', 'referenced in', 'features', 'featured in') AND n.name_pcode_cf BETWEEN 'A' AND 'F' AND (n.gender='m' OR (n.gender = 'f' AND n.name LIKE 'A%')) AND pi.note  is not NULL AND t.production_year BETWEEN 1980 AND 2010 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id;
2 | 


--------------------------------------------------------------------------------
/job/14c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS north_european_dark_production FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  is not null and k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info  < '8.5' AND t.production_year  > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/15b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS youtube_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code  = '[us]' and cn.name = 'YouTube' AND it1.info  = 'release dates' AND mc.note  like '%(200%)%' and mc.note like '%(worldwide)%' AND mi.note  like '%internet%' AND mi.info  like 'USA:% 200%' AND t.production_year  between 2005 and 2010 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id;
2 | 


--------------------------------------------------------------------------------
/job/14b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS western_dark_production FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title') AND kt.kind  = 'movie' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info  > '6.0' AND t.production_year  > 2010 and (t.title like '%murder%' or t.title like '%Murder%' or t.title like '%Mord%') AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/20a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS complete_downey_ironman_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  like '%complete%' AND chn.name  not like '%Sherlock%' and (chn.name like '%Tony%Stark%' or chn.name like '%Iron%Man%') AND k.keyword  in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND kt.kind  = 'movie' AND t.production_year  > 1950 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/duckdbrunner.py:
--------------------------------------------------------------------------------
 1 | import duckdb
 2 | import sys
 3 | 
 4 | with duckdb.connect(database='imdb2.db', read_only=True, config={'access_mode': 'read_only'}) as conn:
 5 |     conn.execute("set memory_limit='10GB';")
 6 |     conn.execute("set temp_directory='';")
 7 |     while sys.stdin:
 8 |         query = sys.stdin.readline().strip()
 9 |         print(f"Received query: {query}", file=sys.stderr)
10 |         if not query:
11 |             break
12 |         try:
13 |             result = conn.execute(query).fetchall()
14 |             print(f"Executed query {query} with result {result}", file=sys.stderr)
15 |             print(result[0][0])
16 |             sys.stdout.flush()
17 |         except Exception as e:
18 |             print(f"Executed query {query} with error: {e}", file=sys.stderr)
19 |             print(-1)
20 |             sys.stdout.flush()
21 |             continue
22 | 
23 | sys.stdin.close()


--------------------------------------------------------------------------------
/job/20c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS cast_member, MIN(t.title) AS complete_dynamic_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  like '%complete%' AND chn.name  is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND k.keyword  in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind  = 'movie' AND t.production_year  > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/21b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS german_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Germany', 'German') AND t.production_year BETWEEN 2000 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/20b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS complete_downey_ironman_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  like '%complete%' AND chn.name  not like '%Sherlock%' and (chn.name like '%Tony%Stark%' or chn.name like '%Iron%Man%') AND k.keyword  in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND kt.kind  = 'movie' AND n.name  LIKE '%Downey%Robert%' AND t.production_year  > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/19c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS jap_engl_voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info  = 'release dates' AND mi.info  is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year  > 2000 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id;
2 | 


--------------------------------------------------------------------------------
/job/25a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'blood', 'gore', 'death', 'female-nudity') AND mi.info  = 'Horror' AND n.gender  = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/job/21a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS western_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/23a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_us_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind  = 'complete+verified' AND cn.country_code  = '[us]' AND it1.info  = 'release dates' AND kt.kind  in ('movie') AND mi.note  like '%internet%' AND mi.info  is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year  > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/19b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS kung_fu_panda FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note  = '(voice)' AND cn.country_code ='[us]' AND it.info  = 'release dates' AND mc.note  like '%(200%)%' and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND mi.info  is not null and (mi.info like 'Japan:%2007%' or mi.info like 'USA:%2008%') AND n.gender ='f' and n.name like '%Angel%' AND rt.role ='actress' AND t.production_year  between 2007 and 2008 and t.title like '%Kung%Fu%Panda%' AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id;
2 | 


--------------------------------------------------------------------------------
/job/21c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS western_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'English') AND t.production_year BETWEEN 1950 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/23b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_nerdy_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind  = 'complete+verified' AND cn.country_code  = '[us]' AND it1.info  = 'release dates' AND k.keyword  in ('nerd', 'loner', 'alienation', 'dignity') AND kt.kind  in ('movie') AND mi.note  like '%internet%' AND mi.info  like 'USA:% 200%' AND t.production_year  > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/25b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'blood', 'gore', 'death', 'female-nudity') AND mi.info  = 'Horror' AND n.gender   = 'm' AND t.production_year  > 2010 AND t.title  like 'Vampire%' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/engine/infra/PageMemory.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <cassert>
 3 | #include <cstdlib>
 4 | //---------------------------------------------------------------------------
 5 | namespace engine::pagememory {
 6 | //---------------------------------------------------------------------------
 7 | /// Setup the page memory
 8 | void setup();
 9 | //---------------------------------------------------------------------------
10 | /// Prefault the page memory
11 | bool prefault();
12 | //---------------------------------------------------------------------------
13 | /// Start a new query
14 | void start_query();
15 | //---------------------------------------------------------------------------
16 | /// Allocate a page
17 | void* allocate();
18 | //---------------------------------------------------------------------------
19 | struct AllocationStealer {
20 |     AllocationStealer() noexcept;
21 |     ~AllocationStealer() noexcept;
22 | };
23 | //---------------------------------------------------------------------------
24 | }
25 | 


--------------------------------------------------------------------------------
/job/25c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info  in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender   = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/download_imdb.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | url="https://event.cwi.nl/da/job/imdb.tgz"
 4 | output_file="imdb.tgz"
 5 | target_dir="imdb"
 6 | 
 7 | # Detect and select downloader
 8 | if command -v wget &> /dev/null; then
 9 |     if ! wget "$url" -O "$output_file"; then
10 |         echo "Error: downloading failed" >&2
11 |         exit 1
12 |     fi
13 | elif command -v curl &> /dev/null; then
14 |     if ! curl -L "$url" -o "$output_file"; then
15 |         echo "Error: downloading failed" >&2
16 |         exit 1
17 |     fi
18 | else
19 |     echo "Error: please install wget or curl to download imdb.tgz" >&2
20 |     exit 1
21 | fi
22 | 
23 | # make target directory (if not exists)
24 | if ! mkdir -p "$target_dir"; then
25 |     echo "Error: cannot make directory '$target_dir'" >&2
26 |     exit 1
27 | fi
28 | 
29 | # decompress the file to the target directory
30 | if ! tar -xf "$output_file" -C "$target_dir"; then
31 |     echo "Error: failed to decompress the file" >&2
32 |     exit 1
33 | fi
34 | 
35 | echo "Success!"
36 | 


--------------------------------------------------------------------------------
/job/23c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_us_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind  = 'complete+verified' AND cn.country_code  = '[us]' AND it1.info  = 'release dates' AND kt.kind  in ('movie', 'tv movie', 'video movie', 'video game') AND mi.note  like '%internet%' AND mi.info  is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year  > 1990 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/19a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info  = 'release dates' AND mc.note  is not NULL and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND mi.info  is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%Ang%' AND rt.role ='actress' AND t.production_year  between 2005 and 2009 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id;
2 | 


--------------------------------------------------------------------------------
/job/26b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  like '%complete%' AND chn.name  is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info  = 'rating' AND k.keyword  in ('superhero', 'marvel-comics', 'based-on-comic', 'fight') AND kt.kind  = 'movie' AND mi_idx.info  > '8.0' AND t.production_year  > 2005 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/22a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code  != '[us]' AND it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mc.note  not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Germany', 'German', 'USA', 'American') AND mi_idx.info  < '7.0' AND t.production_year  > 2008 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id;
2 | 


--------------------------------------------------------------------------------
/job/22b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code  != '[us]' AND it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mc.note  not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Germany', 'German', 'USA', 'American') AND mi_idx.info  < '7.0' AND t.production_year  > 2009 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id;
2 | 


--------------------------------------------------------------------------------
/job/22d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code  != '[us]' AND it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info  < '8.5' AND t.production_year  > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id;
2 | 


--------------------------------------------------------------------------------
/job/26c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  like '%complete%' AND chn.name  is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info  = 'rating' AND k.keyword  in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind  = 'movie' AND t.production_year  > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/engine/query/RuntimeValue.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include <cstdint>
 4 | #include <memory>
 5 | #include <attribute.h>
 6 | //---------------------------------------------------------------------------
 7 | namespace engine {
 8 | //---------------------------------------------------------------------------
 9 | struct RuntimeValue {
10 |     static constexpr uint64_t nullValue = std::numeric_limits<uint64_t>::max();
11 |     DataType type;
12 |     uint64_t value;
13 | 
14 |     static RuntimeValue from(DataType type, uint64_t value) {
15 |         return {type, value};
16 |     }
17 | 
18 |     /// Is null?
19 |     constexpr bool isNull() const {
20 |         return value == nullValue;
21 |     }
22 |     /// Compare
23 |     bool operator==(const RuntimeValue& other) const {
24 |         return type == other.type && value == other.value;
25 |     }
26 |     /// Compare
27 |     bool operator!=(const RuntimeValue& other) const {
28 |         return !operator==(other);
29 |     }
30 | };
31 | //---------------------------------------------------------------------------
32 | }
33 | 


--------------------------------------------------------------------------------
/engine/infra/JoinFilter.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include <cstdint>
 4 | //---------------------------------------------------------------------------
 5 | namespace engine {
 6 | //---------------------------------------------------------------------------
 7 | struct JoinFilter {
 8 |     /// Lut for popcount == 4
 9 |     alignas(4096) static const uint16_t bloomMasks[2048];
10 | 
11 |     template <typename T>
12 |     [[gnu::always_inline]] static inline uint16_t getMask(T hash) {
13 |         return bloomMasks[hash >> (sizeof(hash) * 8 - 11)];
14 |     }
15 | 
16 |     [[gnu::always_inline]] static inline bool checkMaskWithEntry(uint16_t mask, uint16_t entry) {
17 |         return !(~entry & mask);
18 |     }
19 | 
20 |     template<typename T>
21 |     [[gnu::always_inline]] static inline bool checkEntry(T hash, uint16_t entry) {
22 |         return checkMaskWithEntry(getMask(hash), entry);
23 |     }
24 | };
25 | //---------------------------------------------------------------------------
26 | }
27 | //---------------------------------------------------------------------------
28 | 


--------------------------------------------------------------------------------
/engine/op/TargetBase.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include "infra/Reflection.hpp"
 4 | #include "op/OpBase.hpp"
 5 | #include "pipeline/PipelineConcepts.hpp"
 6 | #include <cassert>
 7 | //---------------------------------------------------------------------------
 8 | namespace engine {
 9 | //---------------------------------------------------------------------------
10 | /// Base class for all targets
11 | class TargetBase : public OpBase {
12 |     public:
13 |     /// Get the target type
14 |     virtual std::string_view getName() const = 0;
15 | };
16 | //---------------------------------------------------------------------------
17 | /// All targets must inherit from target impl
18 | template <typename T>
19 | class TargetImpl : public TargetBase {
20 |     public:
21 |     /// Return the name of the class
22 |     std::string_view getName() const override {
23 |         static_assert(TargetOperator<T, 1>, "T must be a proper target");
24 |         return ClassInfo::getName<T>();
25 |     }
26 | };
27 | //---------------------------------------------------------------------------
28 | }
29 | 


--------------------------------------------------------------------------------
/ANNOUNCEMENTS.md:
--------------------------------------------------------------------------------
 1 | # Announcements
 2 | 
 3 | ### 2025-02-27
 4 |   - The recently pushed GitHub workflow will automatically compile, test, and benchmark your solution on all four systems
 5 |   - Check your repository's pull requests
 6 |   - The results are currently shown at https://sigmod-contest-25.hpi-sci.de/ and will soon be published on the official contest website
 7 | 
 8 | ### 2025-03-04
 9 |   - With today's changes to the main repository you forked from, we improved the performance of the evaluation phase
10 |   - **Important notes:**
11 |     - **Deadline change:** The deadline for the final submission has been extended to March 31
12 |     - **Own source files**: The CMake file (which cannot be modified by participants) now includes all *.cpp fiels in the `src` directory. This way, you can add your own source files and better structure your code.
13 |     - **Third-party library:** We found that some teams use third-party libraries, e.g., for  logging. Please note that third-party libraries are not allowed in the contest. You are free to use them during development, but you need to remove them prior to the final submission. Otherwise, your submission is disqualified.
14 | 


--------------------------------------------------------------------------------
/job/22c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code  != '[us]' AND it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mc.note  not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info  < '8.5' AND t.production_year  > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id;
2 | 


--------------------------------------------------------------------------------
/job/26a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(n.name) AS playing_actor, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  like '%complete%' AND chn.name  is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info  = 'rating' AND k.keyword  in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind  = 'movie' AND mi_idx.info  > '7.0' AND t.production_year  > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/24a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress_name, MIN(t.title) AS voiced_action_movie_jap_eng FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info  = 'release dates' AND k.keyword  in ('hero', 'martial-arts', 'hand-to-hand-combat') AND mi.info  is not null and (mi.info like 'Japan:%201%' or mi.info like 'USA:%201%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year  > 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND ci.movie_id = mk.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/job/31a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name  like 'Lionsgate%' AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info  in ('Horror', 'Thriller') AND n.gender   = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id;
2 | 


--------------------------------------------------------------------------------
/engine/op/ScanBase.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include "pipeline/PipelineConcepts.hpp"
 4 | #include "op/OpBase.hpp"
 5 | #include "infra/Reflection.hpp"
 6 | #include <string_view>
 7 | //---------------------------------------------------------------------------
 8 | namespace engine {
 9 | //---------------------------------------------------------------------------
10 | /// Base class for all scans
11 | class ScanBase : public OpBase {
12 |     public:
13 |     /// Get the target type
14 |     virtual std::string_view getName() const = 0;
15 |     /// Get the concurrency
16 |     virtual size_t concurrency() const;
17 | };
18 | //---------------------------------------------------------------------------
19 | /// All targets must inherit from target impl
20 | template <typename T>
21 | class ScanImpl : public ScanBase {
22 |     public:
23 |     /// Return the name of the class
24 |     std::string_view getName() const override {
25 |         static_assert(ScanOperator<T>, "T must be a proper scan");
26 |         return ClassInfo::getName<T>();
27 |     }
28 | };
29 | //---------------------------------------------------------------------------
30 | }
31 | 


--------------------------------------------------------------------------------
/job/31c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name  like 'Lionsgate%' AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info  in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id;
2 | 


--------------------------------------------------------------------------------
/engine/infra/Random.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include <cstdint>
 4 | //---------------------------------------------------------------------------
 5 | namespace engine {
 6 | //---------------------------------------------------------------------------
 7 | struct Random {
 8 |     static constexpr uint64_t addConstant = 0x2d358dccaa6c78a5ull;
 9 |     static constexpr uint64_t xorConstant = 0x8bb84b93962eacc9ull;
10 |     uint64_t seed = 0;
11 | 
12 |     static uint64_t mix(uint64_t a, uint64_t b) {
13 |         auto res = static_cast<unsigned __int128>(a) * b;
14 |         return static_cast<uint64_t>(res >> 64) ^ static_cast<uint64_t>(res);
15 |     }
16 | 
17 |     uint64_t operator()() {
18 |         seed += addConstant;
19 |         return mix(seed, seed ^ xorConstant);
20 |     }
21 | 
22 |     constexpr explicit Random(uint64_t s = 0) : seed(mix(s, 0x8bb84b93962eacc9ull)) {}
23 | 
24 |     // Generate in range [0, s)
25 |     uint64_t nextRange(uint64_t s) {
26 |         uint64_t val = operator()();
27 |         return (static_cast<unsigned __int128>(val) * s) >> 64;
28 |     }
29 | };
30 | //---------------------------------------------------------------------------
31 | }


--------------------------------------------------------------------------------
/engine/query/PlanImport.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include "query/QueryPlan.hpp"
 4 | #include <memory>
 5 | //---------------------------------------------------------------------------
 6 | struct Plan;
 7 | struct ColumnarTable;
 8 | //---------------------------------------------------------------------------
 9 | namespace engine {
10 | //---------------------------------------------------------------------------
11 | struct PlanImport {
12 |     static QueryPlan importPlanExistingData(DataSource& dataSource, const Plan& plan);
13 |     static QueryPlan importPlan(DataSource& dataSource, const Plan& plan);
14 | 
15 |     static DataSource::Table importTable(const ColumnarTable& tbl);
16 | 
17 |     using Data = std::variant<int32_t, int64_t, double, std::string, std::monostate>;
18 |     /// Used for testing
19 |     struct TableResult {
20 |         virtual ~TableResult() = default;
21 |         DataSource::Table table;
22 |     };
23 |     /// Used for testing
24 |     static std::unique_ptr<TableResult> makeTable(std::vector<std::vector<Data>> data, std::vector<DataType> types);
25 | };
26 | //---------------------------------------------------------------------------
27 | }


--------------------------------------------------------------------------------
/job/27b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind  in ('cast', 'crew') AND cct2.kind  = 'complete' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Germany','Swedish', 'German') AND t.production_year  = 1998 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/27a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind  in ('cast', 'crew') AND cct2.kind  = 'complete' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Germany','Swedish', 'German') AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/33b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code  = '[nl]' AND it1.info  = 'rating' AND it2.info  = 'rating' AND kt1.kind  in ('tv series') AND kt2.kind  in ('tv series') AND lt.link  LIKE '%follow%' AND mi_idx2.info  < '3.0' AND t2.production_year  = 2007 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/24b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress_name, MIN(t.title) AS kung_fu_panda FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND cn.name  = 'DreamWorks Animation' AND it.info  = 'release dates' AND k.keyword  in ('hero', 'martial-arts', 'hand-to-hand-combat', 'computer-animated-movie') AND mi.info  is not null and (mi.info like 'Japan:%201%' or mi.info like 'USA:%201%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year  > 2010 AND t.title like 'Kung Fu Panda%' AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND ci.movie_id = mk.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/job/27c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  like 'complete%' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'English') AND t.production_year BETWEEN 1950 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/33a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code  = '[us]' AND it1.info  = 'rating' AND it2.info  = 'rating' AND kt1.kind  in ('tv series') AND kt2.kind  in ('tv series') AND lt.link  in ('sequel', 'follows', 'followed by') AND mi_idx2.info  < '3.0' AND t2.production_year  between 2005 and 2008 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/30c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_violent_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  ='complete+verified' AND ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info  in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender  = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/30a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_violent_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  in ('cast', 'crew') AND cct2.kind  ='complete+verified' AND ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info  in ('Horror', 'Thriller') AND n.gender  = 'm' AND t.production_year  > 2000 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/33c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code  != '[us]' AND it1.info  = 'rating' AND it2.info  = 'rating' AND kt1.kind  in ('tv series', 'episode') AND kt2.kind  in ('tv series', 'episode') AND lt.link  in ('sequel', 'follows', 'followed by') AND mi_idx2.info  < '3.5' AND t2.production_year  between 2000 and 2010 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/31b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name  like 'Lionsgate%' AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mc.note  like '%(Blu-ray)%' AND mi.info  in ('Horror', 'Thriller') AND n.gender  = 'm' AND t.production_year  > 2000 and (t.title like '%Freddy%' or t.title like '%Jason%' or t.title like 'Saw%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id;
2 | 


--------------------------------------------------------------------------------
/job/30b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_gore_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  in ('cast', 'crew') AND cct2.kind  ='complete+verified' AND ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info  in ('Horror', 'Thriller') AND n.gender  = 'm' AND t.production_year  > 2000 and (t.title like '%Freddy%' or t.title like '%Jason%' or t.title like 'Saw%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/28b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind  = 'crew' AND cct2.kind  != 'complete+verified' AND cn.country_code  != '[us]' AND it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mc.note  not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info  IN ('Sweden', 'Germany', 'Swedish', 'German') AND mi_idx.info  > '6.5' AND t.production_year  > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/engine/infra/UnionFind.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include <vector>
 4 | #include "infra/SmallVec.hpp"
 5 | //---------------------------------------------------------------------------
 6 | namespace engine {
 7 | //---------------------------------------------------------------------------
 8 | class UnionFind {
 9 |     struct Entry {
10 |         unsigned parent = 0;
11 |         unsigned rank = 0;
12 |     };
13 |     /// The entries
14 |     SmallVec<Entry, 16> entries;
15 | 
16 |     public:
17 |     unsigned find(unsigned v) {
18 |         if (v >= entries.size())
19 |             return v;
20 |         while (entries[v].parent != v) {
21 |             entries[v].parent = entries[entries[v].parent].parent;
22 |             v = entries[v].parent;
23 |         }
24 |         return v;
25 |     }
26 |     unsigned merge(unsigned a, unsigned b) {
27 |         a = find(a);
28 |         b = find(b);
29 |         if (a == b)
30 |             return a;
31 | 
32 |         // Grow
33 |         while (std::max(a, b) >= entries.size())
34 |             entries.push_back({unsigned(entries.size()), 1});
35 | 
36 |         if (entries[a].rank > entries[b].rank)
37 |             std::swap(a, b);
38 | 
39 |         entries[a].parent = b;
40 |         entries[b].rank += entries[a].rank;
41 |         return b;
42 |     }
43 | };
44 | //---------------------------------------------------------------------------
45 | }


--------------------------------------------------------------------------------
/include/csv_parser.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <vector>
 4 | 
 5 | #include <cstdlib>
 6 | 
 7 | class CSVParser {
 8 | public:
 9 |     enum Error {
10 |         Ok,
11 |         QuoteNotClosed,
12 |         InconsistentColumns,
13 |         NoTrailingComma,
14 |     };
15 | 
16 |     CSVParser(char escape = '"', char sep = ',', bool has_trailing_comma = false)
17 |     : escape_(escape)
18 |     , comma_(sep)
19 |     , has_trailing_comma_(has_trailing_comma) {}
20 | 
21 |     [[nodiscard]] Error execute(const char* buffer, size_t len);
22 |     [[nodiscard]] Error finish();
23 | 
24 |     virtual void on_field(size_t col_idx, size_t row_idx, const char* begin, size_t len) = 0;
25 | 
26 | private:
27 |     // configure
28 |     char escape_{'"'}; // may also be '\\'
29 |     char comma_{','};  // may also be '|'
30 |     // true means # commas = # columns and the last comma in each line is followed by the record
31 |     // seperator; false means # commas + 1 = # columns
32 |     bool has_trailing_comma_{false};
33 | 
34 |     // states
35 |     std::vector<char> current_field_;
36 |     size_t            col_idx_{0};
37 |     size_t            row_idx_{0};
38 |     size_t            num_cols_{0};
39 |     bool              after_first_row_{false};
40 |     bool              quoted_{false};
41 |     bool              after_field_sep_{false};
42 |     bool              after_record_sep_{false};
43 |     bool              escaping_{false};
44 |     bool              newlining_{false};
45 | };
46 | 


--------------------------------------------------------------------------------
/engine/query/Restriction.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include "query/RuntimeValue.hpp"
 4 | //---------------------------------------------------------------------------
 5 | namespace engine {
 6 | //---------------------------------------------------------------------------
 7 | class Hashtable;
 8 | //---------------------------------------------------------------------------
 9 | /// A restriction
10 | struct Restriction {
11 |     static constexpr uint64_t nullValue = RuntimeValue::nullValue;
12 |     /// The type order is used for ordering restrictions
13 |     /// Selective & cheap comes first
14 |     enum Type {
15 |         /// Attribute is equal to value
16 |         Eq,
17 |         /// Attribute is not null
18 |         NotNull,
19 |         /// Attribute will likely find a join partner
20 |         Join,
21 |         /// Attribute will definitely find a join partner
22 |         JoinPrecise
23 |     };
24 |     /// The type of the restriction
25 |     Type type;
26 |     /// The constant value compared with
27 |     RuntimeValue cst;
28 |     /// The hash table for join filters
29 |     Hashtable* joinFilter;
30 |     /// The selectivity estimation for the restriction
31 |     double selectivity = 1.0;
32 | 
33 |     /// Check whether the restriction is satisfied by a value
34 |     bool operator()(uint64_t val) const noexcept;
35 | };
36 | //---------------------------------------------------------------------------
37 | }


--------------------------------------------------------------------------------
/job/28c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  = 'complete' AND cn.country_code  != '[us]' AND it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mc.note  not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info  < '8.5' AND t.production_year  > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/28a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind  = 'crew' AND cct2.kind  != 'complete+verified' AND cn.country_code  != '[us]' AND it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mc.note  not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info  < '8.5' AND t.production_year  > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/engine/infra/Mmap.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include <cassert>
 4 | #include <cstddef>
 5 | #include <cstdio>
 6 | #include <string>
 7 | #include <utility>
 8 | //---------------------------------------------------------------------------
 9 | namespace engine {
10 | //---------------------------------------------------------------------------
11 | class Mmap {
12 |     char* data_ = nullptr;
13 |     size_t size_ = 0;
14 |     int file = -1;
15 | 
16 |     public:
17 |     Mmap(const Mmap&) = delete;
18 |     Mmap& operator=(const Mmap&) = delete;
19 | 
20 |     constexpr Mmap() noexcept = default;
21 |     ~Mmap() noexcept {
22 |         reset();
23 |     }
24 |     Mmap(Mmap&& other) noexcept {
25 |         *this = std::move(other);
26 |     }
27 |     Mmap& operator=(Mmap&& other) noexcept {
28 |         reset();
29 |         std::swap(data_, other.data_);
30 |         std::swap(size_, other.size_);
31 |         std::swap(file, other.file);
32 |         return *this;
33 |     }
34 | 
35 |     static void prefault(void* data, size_t size);
36 | 
37 | 
38 |     static Mmap mapFile(const std::string& fileName);
39 |     static Mmap mapMemory(size_t size);
40 | 
41 |     void reset() noexcept;
42 | 
43 |     constexpr operator bool() const noexcept { return data_ != nullptr; }
44 | 
45 |     char* data() const { return data_; }
46 |     size_t size() const { return size_; }
47 | };
48 | //---------------------------------------------------------------------------
49 | }
50 | 


--------------------------------------------------------------------------------
/engine/tools/SQL.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include "query/DataSource.hpp"
 4 | #include "query/QueryPlan.hpp"
 5 | #include <vector>
 6 | #include <string>
 7 | #include <unordered_map>
 8 | //---------------------------------------------------------------------------
 9 | namespace engine {
10 | //---------------------------------------------------------------------------
11 | /// Parsing SQL and plans
12 | class SQL {
13 |     public:
14 |     /// Plan maker
15 |     struct PlanMaker {
16 |         virtual ~PlanMaker() noexcept = default;
17 |         virtual QueryPlan makePlan() = 0;
18 |     };
19 |     /// The parse result
20 |     struct Query {
21 |         /// Name of the query
22 |         std::string name;
23 |         /// The SQL query for duckdb
24 |         std::string sql;
25 |         /// The plan for the query
26 |         std::unique_ptr<PlanMaker> planMaker;
27 |         /// The index for the DuckDB result relation
28 |         unsigned resultRelation;
29 |     };
30 |     /// Batch of queries
31 |     struct Batch {
32 |         /// The data
33 |         std::unique_ptr<DataSource> db;
34 |         /// The queries
35 |         std::vector<Query> queries;
36 |     };
37 |     /// Parse all queries
38 |     static Batch parse(const std::string& planFile, std::vector<std::string> selected);
39 | };
40 | //---------------------------------------------------------------------------
41 | }
42 | //---------------------------------------------------------------------------


--------------------------------------------------------------------------------
/job/fkindexes.sql:
--------------------------------------------------------------------------------
 1 | create index company_id_movie_companies on movie_companies(company_id);
 2 | create index company_type_id_movie_companies on movie_companies(company_type_id);
 3 | create index info_type_id_movie_info_idx on movie_info_idx(info_type_id);
 4 | create index info_type_id_movie_info on movie_info(info_type_id);
 5 | create index info_type_id_person_info on person_info(info_type_id);
 6 | create index keyword_id_movie_keyword on movie_keyword(keyword_id);
 7 | create index kind_id_aka_title on aka_title(kind_id);
 8 | create index kind_id_title on title(kind_id);
 9 | create index linked_movie_id_movie_link on movie_link(linked_movie_id);
10 | create index link_type_id_movie_link on movie_link(link_type_id);
11 | create index movie_id_aka_title on aka_title(movie_id);
12 | create index movie_id_cast_info on cast_info(movie_id);
13 | create index movie_id_complete_cast on complete_cast(movie_id);
14 | create index movie_id_movie_companies on movie_companies(movie_id);
15 | create index movie_id_movie_info_idx on movie_info_idx(movie_id);
16 | create index movie_id_movie_keyword on movie_keyword(movie_id);
17 | create index movie_id_movie_link on movie_link(movie_id);
18 | create index movie_id_movie_info on movie_info(movie_id);
19 | create index person_id_aka_name on aka_name(person_id);
20 | create index person_id_cast_info on cast_info(person_id);
21 | create index person_id_person_info on person_info(person_id);
22 | create index person_role_id_cast_info on cast_info(person_role_id);
23 | create index role_id_cast_info on cast_info(role_id);
24 | 


--------------------------------------------------------------------------------
/engine/tools/JoinPipelineLoader.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include <nlohmann/json_fwd.hpp>
 4 | #include <plan.h>
 5 | #include <table_entity.h>
 6 | #include <table.h>
 7 | #include <unordered_set>
 8 | //---------------------------------------------------------------------------
 9 | namespace engine {
10 | //---------------------------------------------------------------------------
11 | struct ParsedSQL;
12 | class DataSource;
13 | using OutputAttrsType = std::vector<std::tuple<TableEntity, std::string>>;
14 | //---------------------------------------------------------------------------
15 | struct DataSourceBuilder {
16 |     DataSource& db;
17 |     std::vector<ColumnarTable> columns;
18 |     std::unordered_map<std::string, uint32_t> tables;
19 | };
20 | //---------------------------------------------------------------------------
21 | struct JoinPipelineLoader {
22 |     const ParsedSQL& parsed_sql;
23 |     DataSourceBuilder& db;
24 |     ::Plan ret;
25 | 
26 |     std::unordered_set<TableEntity> extract_entities(const nlohmann::json& node);
27 |     std::tuple<size_t, std::vector<std::tuple<TableEntity, std::string, DataType>>> recurse(const nlohmann::json& node, const OutputAttrsType& required_attrs);
28 | 
29 |     static ::Plan load_join_pipeline(DataSourceBuilder& db, const nlohmann::json& node, const ParsedSQL& parsed_sql);
30 | };
31 | //---------------------------------------------------------------------------
32 | }
33 | //---------------------------------------------------------------------------


--------------------------------------------------------------------------------
/job/29b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind  ='cast' AND cct2.kind  ='complete+verified' AND chn.name  = 'Queen' AND ci.note  in ('(voice)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info  = 'release dates' AND it3.info  = 'height' AND k.keyword  = 'computer-animation' AND mi.info  like 'USA:%200%' AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.title  = 'Shrek 2' AND t.production_year  between 2000 and 2005 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/29c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind  ='cast' AND cct2.kind  ='complete+verified' AND ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info  = 'release dates' AND it3.info  = 'trivia' AND k.keyword  = 'computer-animation' AND mi.info  is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year  between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/29a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind  ='cast' AND cct2.kind  ='complete+verified' AND chn.name  = 'Queen' AND ci.note  in ('(voice)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info  = 'release dates' AND it3.info  = 'trivia' AND k.keyword  = 'computer-animation' AND mi.info  is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.title  = 'Shrek 2' AND t.production_year  between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/tests/build_database.cpp:
--------------------------------------------------------------------------------
 1 | #include <common.h>
 2 | 
 3 | #include <duckdb.hpp>
 4 | #include <fmt/core.h>
 5 | 
 6 | int main(int argc, char* argv[]) {
 7 |     using namespace duckdb;
 8 |     namespace fs = std::filesystem;
 9 | 
10 |     if (argc < 2) {
11 |         fmt::println(stderr, "Usage: {} <DuckDB database file>", argv[0]);
12 |         exit(EXIT_FAILURE);
13 |     }
14 | 
15 |     auto schema = read_file(fs::path("job") / "schema.sql");
16 | 
17 |     DuckDB     db(argv[1]);
18 |     Connection conn(db);
19 |     auto       result = conn.Query(schema);
20 |     if (result->HasError()) {
21 |         fmt::println("Error: {}", result->GetError());
22 |     }
23 | 
24 |     std::vector<std::string> table_names{
25 |         "char_name",
26 |         "kind_type",
27 |         "cast_info",
28 |         "movie_companies",
29 |         "role_type",
30 |         "complete_cast",
31 |         "comp_cast_type",
32 |         "company_name",
33 |         "company_type",
34 |         "movie_link",
35 |         "movie_keyword",
36 |         "name",
37 |         "info_type",
38 |         "movie_info_idx",
39 |         "person_info",
40 |         "link_type",
41 |         "title",
42 |         "aka_name",
43 |         "movie_info",
44 |         "keyword",
45 |         "aka_title",
46 |     };
47 | 
48 |     for (auto& table: table_names) {
49 |         result =
50 |             conn.Query(fmt::format("COPY {0} FROM 'imdb/{0}.csv' (ESCAPE '\\');", table));
51 |         if (result->HasError()) {
52 |             fmt::println("Error: {}", result->GetError());
53 |         } else {
54 |             fmt::println("Successfully loaded table {} into {}", table, argv[1]);
55 |         }
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/engine/op/CollectorTarget.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include "infra/SmallVec.hpp"
 4 | #include "op/TargetBase.hpp"
 5 | #include <atomic>
 6 | //---------------------------------------------------------------------------
 7 | namespace engine {
 8 | //---------------------------------------------------------------------------
 9 | class TableScan;
10 | //---------------------------------------------------------------------------
11 | /// A very simple collector target
12 | class CollectorTarget : public TargetImpl<CollectorTarget> {
13 |     public:
14 |     struct LocalState {
15 |         /// The values
16 |         SmallVec<uint64_t> values;
17 |         /// The next local state
18 |         LocalState* next = nullptr;
19 | 
20 |         LocalState(CollectorTarget& target);
21 |     };
22 |     /// The local states
23 |     std::atomic<LocalState*> localStates = nullptr;
24 |     /// The values
25 |     SmallVec<uint64_t> values;
26 | 
27 |     /// Consume attributes
28 |     template <typename... AttrT>
29 |     void operator()(LocalState& ls, uint64_t multiplicity, AttrT... attrs) {
30 |         for (uint64_t i = 0; i < multiplicity; i++)
31 |             (ls.values.push_back(attrs), ...);
32 |     }
33 | 
34 |     /// Flush collected
35 |     void finishConsume();
36 | 
37 |     /// Collect the output from a table scan
38 |     void collect(TableScan& op);
39 | };
40 | //---------------------------------------------------------------------------
41 | static_assert(TargetOperator<CollectorTarget, 1>);
42 | //---------------------------------------------------------------------------
43 | }
44 | //---------------------------------------------------------------------------
45 | 


--------------------------------------------------------------------------------
/include/table_entity.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <fmt/core.h>
 4 | #include <string>
 5 | 
 6 | #include "common.h"
 7 | 
 8 | struct TableEntity {
 9 |     std::string table;
10 |     int         id;
11 | 
12 |     friend bool operator==(const TableEntity& left, const TableEntity& right);
13 |     friend bool operator!=(const TableEntity& left, const TableEntity& right);
14 |     friend bool operator<(const TableEntity& left, const TableEntity& right);
15 | };
16 | 
17 | inline bool operator==(const TableEntity& left, const TableEntity& right) {
18 |     return left.table == right.table && left.id == right.id;
19 | }
20 | 
21 | inline bool operator!=(const TableEntity& left, const TableEntity& right) {
22 |     return !(left == right);
23 | }
24 | 
25 | inline bool operator<(const TableEntity& left, const TableEntity& right) {
26 |     if (left.table < right.table) {
27 |         return true;
28 |     } else if (left.table > right.table) {
29 |         return false;
30 |     } else {
31 |         return left.id < right.id;
32 |     }
33 | }
34 | 
35 | namespace std {
36 | template <>
37 | struct hash<TableEntity> {
38 |     size_t operator()(const TableEntity& te) const noexcept {
39 |         size_t seed = 0;
40 |         hash_combine(seed, hash<string>{}(te.table));
41 |         hash_combine(seed, hash<int>{}(te.id));
42 |         return seed;
43 |     }
44 | };
45 | 
46 | } // namespace std
47 | 
48 | template <>
49 | struct fmt::formatter<TableEntity> {
50 |     template <class ParseContext>
51 |     constexpr auto parse(ParseContext& ctx) {
52 |         return ctx.begin();
53 |     }
54 | 
55 |     template <class FormatContext>
56 |     auto format(const TableEntity& te, FormatContext& ctx) const {
57 |         return fmt::format_to(ctx.out(), "({}, {})", te.table, te.id);
58 |     }
59 | };
60 | 


--------------------------------------------------------------------------------
/include/hardware__ca09.h:
--------------------------------------------------------------------------------
 1 | // Hardware information for Ampere Altra Max node ca09.
 2 | 
 3 | // Architecture from `uname -srm`.
 4 | #define SPC__AARCH64
 5 | 
 6 | // CPU from `/proc/cpuinfo`.
 7 | #define SPC__CPU_NAME ""
 8 | 
 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers
10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system.
11 | #define SPC__CORE_COUNT 128
12 | #define SPC__THREAD_COUNT 128
13 | #define SPC__NUMA_NODE_COUNT 1
14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1
15 | 
16 | // Main memory per NUMA node (MB).
17 | #define SPC__NUMA_NODE_DRAM_MB 515809
18 | 
19 | // Obtained from `lsb_release -a`.
20 | #define SPC__OS "Ubuntu 24.04.1 LTS"
21 | 
22 | // Obtained from: `uname -srm`.
23 | #define SPC__KERNEL "Linux 6.8.0-50-generic aarch64"
24 | 
25 | // ARM: possible options are SVE, SVE2, and NEON. No ARM CPU older than Ampere Altra Max will be used.
26 | #define SPC__SUPPORTS_NEON
27 | 
28 | // Cache information from `getconf -a | grep CACHE`.
29 | // As Ubuntu did not list all numbers, we also took cache sizes from `cat /sys/devices/system/cpu/cpu0/cache/index*/size`
30 | #define SPC__LEVEL1_ICACHE_SIZE                 65536
31 | #define SPC__LEVEL1_ICACHE_ASSOC
32 | #define SPC__LEVEL1_ICACHE_LINESIZE             64
33 | #define SPC__LEVEL1_DCACHE_SIZE                 65536
34 | #define SPC__LEVEL1_DCACHE_ASSOC
35 | #define SPC__LEVEL1_DCACHE_LINESIZE             64
36 | #define SPC__LEVEL2_CACHE_SIZE                  1048576
37 | #define SPC__LEVEL2_CACHE_ASSOC
38 | #define SPC__LEVEL2_CACHE_LINESIZE
39 | #define SPC__LEVEL3_CACHE_SIZE
40 | #define SPC__LEVEL3_CACHE_ASSOC
41 | #define SPC__LEVEL3_CACHE_LINESIZE
42 | #define SPC__LEVEL4_CACHE_SIZE 
43 | #define SPC__LEVEL4_CACHE_ASSOC
44 | #define SPC__LEVEL4_CACHE_LINESIZE
45 | 


--------------------------------------------------------------------------------
/include/hardware__sidon.h:
--------------------------------------------------------------------------------
 1 | // Hardware information for Intel Xeon E7-4880 v2 node sidon.
 2 | 
 3 | // Architecture from `uname -srm`.
 4 | #define SPC__X86_64
 5 | 
 6 | // CPU from `/proc/cpuinfo`.
 7 | #define SPC__CPU_NAME "Intel(R) Xeon(R) CPU E7-4880 v2 @ 2.50GHz"
 8 | 
 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers
10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system.
11 | #define SPC__CORE_COUNT 15
12 | #define SPC__THREAD_COUNT 30
13 | #define SPC__NUMA_NODE_COUNT 4
14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1
15 | 
16 | // Main memory per NUMA node (MB).
17 | #define SPC__NUMA_NODE_DRAM_MB 515809
18 | 
19 | // Obtained from `lsb_release -a`.
20 | #define SPC__OS "Ubuntu 22.04.4 LTS"
21 | 
22 | // Obtained from: `uname -srm`.
23 | #define SPC__KERNEL "Linux 5.15.0-116-generic x86_64"
24 | 
25 | // Intel: possible options are AVX, AVX2, and AVX512. No Intel CPU older than Intel Xeon E7-4880 v2 will be used.
26 | #define SPC__SUPPORTS_AVX
27 | 
28 | // Cache information from `getconf -a | grep CACHE`.
29 | #define SPC__LEVEL1_ICACHE_SIZE                 32768
30 | #define SPC__LEVEL1_ICACHE_ASSOC
31 | #define SPC__LEVEL1_ICACHE_LINESIZE             64
32 | #define SPC__LEVEL1_DCACHE_SIZE                 32768
33 | #define SPC__LEVEL1_DCACHE_ASSOC                8
34 | #define SPC__LEVEL1_DCACHE_LINESIZE             64
35 | #define SPC__LEVEL2_CACHE_SIZE                  262144
36 | #define SPC__LEVEL2_CACHE_ASSOC                 8
37 | #define SPC__LEVEL2_CACHE_LINESIZE              64
38 | #define SPC__LEVEL3_CACHE_SIZE                  39321600
39 | #define SPC__LEVEL3_CACHE_ASSOC                 20
40 | #define SPC__LEVEL3_CACHE_LINESIZE              64
41 | #define SPC__LEVEL4_CACHE_SIZE                  0
42 | #define SPC__LEVEL4_CACHE_ASSOC
43 | #define SPC__LEVEL4_CACHE_LINESIZE
44 | 


--------------------------------------------------------------------------------
/engine/pipeline/PipelineGen.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include "op/Hashtable.hpp"
 4 | #include "op/TableScan.hpp"
 5 | #include "op/TableTarget.hpp"
 6 | #include "pipeline/JoinPipeline.hpp"
 7 | #include "pipeline/PipelineFunction.hpp"
 8 | //---------------------------------------------------------------------------
 9 | namespace engine {
10 | //---------------------------------------------------------------------------
11 | template<size_t ... Is>
12 | auto genProbes(engine::span<const DefaultProbeParameter>& probeParams, std::index_sequence<Is...>) {
13 |     return std::tuple{DefaultProbe{probeParams[Is]}...};
14 | }
15 | //---------------------------------------------------------------------------
16 | template <typename Target, typename Scan, size_t NumJoins, typename Keys, typename Attrs>
17 | void PipelineFunctions::runPipeline(TargetBase& targetBase, ScanBase& scanBase, engine::span<const DefaultProbeParameter> probeParams, engine::span<const unsigned> keyOffsets, engine::span<const unsigned> attrOffsets) {
18 |     static_assert(std::is_base_of_v<TargetBase, Target>);
19 |     auto& target = dynamic_cast<Target&>(targetBase);
20 |     auto& scan = dynamic_cast<Scan&>(scanBase);
21 |     assert(keyOffsets.size() == Keys::size());
22 |     assert(attrOffsets.size() == Attrs::size());
23 |     std::array<unsigned, Keys::size()> ko;
24 |     for (size_t i = 0; i < Keys::size(); ++i)
25 |         ko[i] = keyOffsets[i];
26 |     std::array<unsigned, Attrs::size()> ao;
27 |     for (size_t i = 0; i < Attrs::size(); ++i)
28 |         ao[i] = attrOffsets[i];
29 |     auto probes = genProbes(probeParams, std::make_index_sequence<NumJoins>{});
30 |     JoinPipeline<Target, Scan, decltype(probes), Keys, Attrs> pipeline{target, scan, probes, ko, ao};
31 |     pipeline();
32 | }
33 | //---------------------------------------------------------------------------
34 | }
35 | 


--------------------------------------------------------------------------------
/include/hardware__koroneia.h:
--------------------------------------------------------------------------------
 1 | // Hardware information for AMD EPYC 7F72 node koroneia.
 2 | 
 3 | // Architecture from `uname -srm`.
 4 | #define SPC__X86_64
 5 | 
 6 | // CPU from `/proc/cpuinfo`.
 7 | #define SPC__CPU_NAME "AMD EPYC 7F72 24-Core Processor"
 8 | 
 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers
10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system.
11 | #define SPC__CORE_COUNT 24
12 | #define SPC__THREAD_COUNT 48
13 | #define SPC__NUMA_NODE_COUNT 2
14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1
15 | 
16 | // Main memory per NUMA node (MB).
17 | #define SPC__NUMA_NODE_DRAM_MB 257699
18 | 
19 | // Obtained from `lsb_release -a`.
20 | #define SPC__OS "Ubuntu 24.04.2 LTS"
21 | 
22 | // Obtained from: `uname -srm`.
23 | #define SPC__KERNEL "Linux 5.15.0-106-generic x86_64"
24 | 
25 | // AMD: possible options are AVX, AVX2, and AVX512. No AMD CPU older than AMD EPYC 7F72 will be used.
26 | #define SPC__SUPPORTS_AVX
27 | #define SPC__SUPPORTS_AVX2
28 | 
29 | // Cache information from `getconf -a | grep CACHE`.
30 | #define SPC__LEVEL1_ICACHE_SIZE                 32768
31 | #define SPC__LEVEL1_ICACHE_ASSOC
32 | #define SPC__LEVEL1_ICACHE_LINESIZE             64
33 | #define SPC__LEVEL1_DCACHE_SIZE                 32768
34 | #define SPC__LEVEL1_DCACHE_ASSOC                8
35 | #define SPC__LEVEL1_DCACHE_LINESIZE             64
36 | #define SPC__LEVEL2_CACHE_SIZE                  524288
37 | #define SPC__LEVEL2_CACHE_ASSOC                 8
38 | #define SPC__LEVEL2_CACHE_LINESIZE              64
39 | #define SPC__LEVEL3_CACHE_SIZE                  16777216
40 | #define SPC__LEVEL3_CACHE_ASSOC                 16
41 | #define SPC__LEVEL3_CACHE_LINESIZE              64
42 | #define SPC__LEVEL4_CACHE_SIZE                  0
43 | #define SPC__LEVEL4_CACHE_ASSOC
44 | #define SPC__LEVEL4_CACHE_LINESIZE
45 | 


--------------------------------------------------------------------------------
/include/hardware__cp02.h:
--------------------------------------------------------------------------------
 1 | // Hardware information for IBM Power8 node cp02.
 2 | 
 3 | // Architecture from `uname -srm`.
 4 | #define SPC__PPC64LE
 5 | 
 6 | // CPU from `/proc/cpuinfo`.
 7 | #define SPC__CPU_NAME "POWER8 (architected), altivec supported"
 8 | 
 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers
10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system.
11 | #define SPC__CORE_COUNT 12
12 | #define SPC__THREAD_COUNT 96
13 | #define SPC__NUMA_NODE_COUNT 8
14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1
15 | 
16 | // Main memory per NUMA node (MB).
17 | #define SPC__NUMA_NODE_DRAM_MB 1039964
18 | 
19 | // Obtained from `lsb_release -a`.
20 | #define SPC__OS "Ubuntu 20.04.6 LTS"
21 | 
22 | // Obtained from: `uname -srm`.
23 | #define SPC__KERNEL "Linux 5.4.0-137-generic x86_64"
24 | 
25 | // IBM: possible options are VSX, VMX, and MMA. No IBM CPU older than Power8 will be used.
26 | #define SPC__SUPPORTS_VSX
27 | #define SPC__SUPPORTS_VMX
28 | 
29 | // Cache information from `getconf -a | grep CACHE`.
30 | #define SPC__LEVEL1_ICACHE_SIZE                 32768
31 | #define SPC__LEVEL1_ICACHE_ASSOC                8
32 | #define SPC__LEVEL1_ICACHE_LINESIZE             128
33 | #define SPC__LEVEL1_DCACHE_SIZE                 65536
34 | #define SPC__LEVEL1_DCACHE_ASSOC                8
35 | #define SPC__LEVEL1_DCACHE_LINESIZE             128
36 | #define SPC__LEVEL2_CACHE_SIZE                  524288
37 | #define SPC__LEVEL2_CACHE_ASSOC                 8
38 | #define SPC__LEVEL2_CACHE_LINESIZE              128
39 | #define SPC__LEVEL3_CACHE_SIZE                  8388608
40 | #define SPC__LEVEL3_CACHE_ASSOC                 8
41 | #define SPC__LEVEL3_CACHE_LINESIZE              128
42 | #define SPC__LEVEL4_CACHE_SIZE                  0
43 | #define SPC__LEVEL4_CACHE_ASSOC                 0
44 | #define SPC__LEVEL4_CACHE_LINESIZE              0
45 | 


--------------------------------------------------------------------------------
/engine/infra/AdressSanitizer.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include <cstddef>
 4 | //---------------------------------------------------------------------------
 5 | #if defined(__has_feature)
 6 | #if __has_feature(address_sanitizer)
 7 | #define ADDRESS_SANITIZER_ACTIVE
 8 | #endif
 9 | #elif defined(__SANITIZE_ADDRESS__)
10 | #define ADDRESS_SANITIZER_ACTIVE
11 | #endif
12 | //---------------------------------------------------------------------------
13 | #ifdef ADDRESS_SANITIZER_ACTIVE
14 | extern "C" void __asan_poison_memory_region(void const volatile* p, size_t size); // NOLINT(bugprone-reserved-identifier)
15 | extern "C" void __asan_unpoison_memory_region(void const volatile* p, size_t size); // NOLINT(bugprone-reserved-identifier)
16 | #endif
17 | //---------------------------------------------------------------------------
18 | namespace engine::AddressSanitizer {
19 | /// Is the address sanitizer compiled into this binary?
20 | constexpr const bool addressSanitizerActive =
21 | #ifdef ADDRESS_SANITIZER_ACTIVE
22 |     true
23 | #else
24 |     false
25 | #endif
26 |     ;
27 | 
28 | #ifdef ADDRESS_SANITIZER_ACTIVE
29 | [[gnu::always_inline]] static inline void poisonMemoryRegion(void const volatile* p, size_t size)
30 | // Poison a region of memory
31 | {
32 |     __asan_poison_memory_region(p, size);
33 | }
34 | [[gnu::always_inline]] static inline void unpoisonMemoryRegion(void const volatile* p, size_t size)
35 | // Unpoison a region of memory
36 | {
37 |     __asan_unpoison_memory_region(p, size);
38 | }
39 | #else
40 | [[gnu::always_inline]] static inline void poisonMemoryRegion(void const volatile* /*p*/, size_t /*size*/)
41 | // Poison a region of memory
42 | {
43 | }
44 | [[gnu::always_inline]] static inline void unpoisonMemoryRegion(void const volatile* /*p*/, size_t /*size*/)
45 | // Unpoison a region of memory
46 | {
47 | }
48 | #endif
49 | }
50 | //---------------------------------------------------------------------------
51 | 


--------------------------------------------------------------------------------
/engine/tools/ParsedSQL.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include <nlohmann/json_fwd.hpp>
 4 | #include <plan.h>
 5 | #include <table_entity.h>
 6 | #include <tuple>
 7 | #include <vector>
 8 | //---------------------------------------------------------------------------
 9 | namespace engine {
10 | //---------------------------------------------------------------------------
11 | using OutputAttrsType = std::vector<std::tuple<TableEntity, std::string>>;
12 | using AliasMapType = std::unordered_map<std::string, TableEntity>;
13 | using FilterMapType = std::unordered_map<TableEntity, std::unique_ptr<Statement>>;
14 | using JoinGraphType = std::unordered_map<TableEntity,
15 |                                          std::unordered_map<TableEntity, std::tuple<std::string, std::string>>>;
16 | using ColumnMapType = std::unordered_map<TableEntity, std::unordered_map<std::string, size_t>>;
17 | //---------------------------------------------------------------------------
18 | struct ParsedSQL {
19 |     static const std::unordered_map<std::string, std::vector<Attribute>> attributes_map;
20 | 
21 |     const std::unordered_map<std::string, std::vector<std::string>>& column_to_tables;
22 |     std::unordered_map<std::string, int> table_counts;
23 |     AliasMapType alias_map;
24 |     std::unordered_map<TableEntity, std::string> entity_to_alias;
25 |     JoinGraphType join_graph;
26 |     FilterMapType filters;
27 |     OutputAttrsType output_attrs;
28 |     ColumnMapType column_map;
29 |     std::vector<std::tuple<TableEntity, std::string>> column_vec;
30 | 
31 |     ParsedSQL(const std::unordered_map<std::string, std::vector<std::string>>& column_to_tables);
32 | 
33 |     std::string executed_sql(const std::string& sql);
34 | 
35 |     void parse_sql(const std::string& sql, std::string_view name);
36 | };
37 | //---------------------------------------------------------------------------
38 | }
39 | //---------------------------------------------------------------------------


--------------------------------------------------------------------------------
/engine/storage/CopyLogic.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include <cstdint>
 4 | #include <cstddef>
 5 | //---------------------------------------------------------------------------
 6 | namespace engine {
 7 | //---------------------------------------------------------------------------
 8 | class CopyLogic {
 9 |     public:
10 |     /// Copy 32 bit integers with mask
11 |     /// srcOffsets is a bitset that describes the indices of the src array that should be copied
12 |     /// dstOffsets is a bitset that describes the indices of the dst array that should be copied to
13 |     /// The rest of the indices of the dst array (up to numTuples) should be set to null (~0ull)
14 |     static void extractInt32(uint64_t* dst, const uint32_t* src, uint64_t srcOffsets, uint64_t dstOffsets, size_t numTuples) noexcept;
15 |     /// Copy 32 bit integers with mask
16 |     /// srcOffsets is a bitset that describes the indices of the src array that should be copied
17 |     /// dstOffsets is a bitset that describes the indices of the dst array that should be copied to
18 |     /// The rest of the indices of the dst array (up to numTuples) should be set to null (~0ull)
19 |     static void extractInt64(uint64_t* dst, const uint64_t* src, uint64_t srcOffsets, uint64_t dstOffsets, size_t numTuples) noexcept;
20 |     /// Copy 32 bit strings with mask
21 |     /// srcOffsets is a bitset that describes the indices of the src array that should be copied
22 |     /// dstOffsets is a bitset that describes the indices of the dst array that should be copied to
23 |     /// The rest of the indices of the dst array (up to numTuples) should be set to null (~0ull)
24 |     /// All the strings are short strings
25 |     static void extractVarChar(uint64_t* dst, const uint16_t* src, uint64_t srcOffsets, uint64_t dstOffsets, size_t numTuples, const char* stringHead) noexcept;
26 | };
27 | //---------------------------------------------------------------------------
28 | }
29 | //---------------------------------------------------------------------------


--------------------------------------------------------------------------------
/engine/pipeline/PipelineFunction.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include "pipeline/PipelineConcepts.hpp"
 4 | #include "infra/helper/Span.hpp"
 5 | #include "infra/Util.hpp"
 6 | #include "op/ScanBase.hpp"
 7 | #include "op/TargetBase.hpp"
 8 | #include <cassert>
 9 | #include <string_view>
10 | //---------------------------------------------------------------------------
11 | namespace engine {
12 | //---------------------------------------------------------------------------
13 | class Hashtable;
14 | class HashtableBuild;
15 | class HashtableProbe;
16 | class TableScan;
17 | class TableTarget;
18 | //---------------------------------------------------------------------------
19 | using DefaultProbe = HashtableProbe;
20 | //---------------------------------------------------------------------------
21 | using DefaultProbeParameter = const Hashtable*;
22 | //---------------------------------------------------------------------------
23 | using PipelineFunction = void (*)(TargetBase& target, ScanBase& scan, engine::span<const DefaultProbeParameter> probes, engine::span<const unsigned> keyOffsets, engine::span<const unsigned> outputAttributeOffsets);
24 | //---------------------------------------------------------------------------
25 | class JoinPipelineBase {
26 |     virtual ~JoinPipelineBase() noexcept = default;
27 | };
28 | //---------------------------------------------------------------------------
29 | struct PipelineFunctions {
30 |     static size_t numFunctions;
31 |     static std::pair<std::string_view, PipelineFunction> functions[];
32 | 
33 |     static PipelineFunction lookupPipeline(std::string_view name);
34 | 
35 |     template <typename Target, typename Scan, size_t NumJoins, typename Keys, typename Attrs>
36 |     static void runPipeline(TargetBase& target, ScanBase& scan, engine::span<const DefaultProbeParameter> probeParams, engine::span<const unsigned> keyOffsets, engine::span<const unsigned> attrOffsets);
37 | };
38 | //---------------------------------------------------------------------------
39 | }
40 | 


--------------------------------------------------------------------------------
/engine/Execute.cpp:
--------------------------------------------------------------------------------
 1 | #include "infra/PageMemory.hpp"
 2 | #include "infra/QueryMemory.hpp"
 3 | #include "infra/Scheduler.hpp"
 4 | #include "query/PlanImport.hpp"
 5 | #include "query/QueryPlan.hpp"
 6 | #include <plan.h>
 7 | #include <unistd.h>
 8 | //---------------------------------------------------------------------------
 9 | namespace engine {
10 | //---------------------------------------------------------------------------
11 | ColumnarTable execute(QueryPlan plan, [[maybe_unused]] void* context) {
12 |     Scheduler::start_query();
13 |     pagememory::start_query();
14 |     ColumnarTable output;
15 |     {
16 |         QueryPlan pp = std::move(plan);
17 |         output = pp.run();
18 |     }
19 |     querymemory::end_query();
20 |     Scheduler::end_query();
21 |     return std::move(output);
22 | }
23 | //---------------------------------------------------------------------------
24 | }
25 | //---------------------------------------------------------------------------
26 | namespace Contest {
27 | //---------------------------------------------------------------------------
28 | ColumnarTable execute(const Plan& plan, [[maybe_unused]] void* context) {
29 |     engine::Scheduler::start_query();
30 |     engine::pagememory::start_query();
31 |     ColumnarTable output;
32 |     {
33 |         engine::DataSource ds;
34 |         auto imported = engine::PlanImport::importPlan(ds, plan);
35 |         output = imported.run();
36 |     }
37 |     engine::querymemory::end_query();
38 |     engine::Scheduler::end_query();
39 |     return std::move(output);
40 | }
41 | //---------------------------------------------------------------------------
42 | void* build_context() {
43 |     engine::Scheduler::setup();
44 | 
45 |     return nullptr;
46 | }
47 | //---------------------------------------------------------------------------
48 | void destroy_context([[maybe_unused]] void* context) { engine::Scheduler::teardown(); }
49 | //---------------------------------------------------------------------------
50 | } // namespace Contest
51 | //---------------------------------------------------------------------------
52 | 


--------------------------------------------------------------------------------
/engine/storage/RestrictionLogic.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include "infra/QueryMemory.hpp"
 4 | #include "storage/BitLogic.hpp"
 5 | #include <cstddef>
 6 | #include <cstdint>
 7 | #include <memory>
 8 | #include <string_view>
 9 | //---------------------------------------------------------------------------
10 | namespace engine {
11 | //---------------------------------------------------------------------------
12 | struct Restriction;
13 | //---------------------------------------------------------------------------
14 | class RestrictionLogic {
15 |     public:
16 |     /// Filter masked values and return a bitset mask
17 |     uint64_t run(const uint32_t* values, uint64_t mask) const {
18 |         if (!mask)
19 |             return 0;
20 |         if (BitLogic::isDense(mask)) [[likely]] {
21 |             auto [st, en] = BitLogic::getRange(mask);
22 |             auto len = en - st;
23 |             auto result = runDense(values + st, len);
24 |             return result << st;
25 |         } else {
26 |             return runSparse(values, mask);
27 |         }
28 |     }
29 |     virtual uint64_t runSparse(const uint32_t* values, uint64_t mask) const = 0;
30 |     virtual uint64_t runDense(const uint32_t* values, size_t len) const = 0;
31 |     virtual std::pair<uint64_t, size_t> runAndSkip(const uint32_t* values, size_t len) const = 0;
32 |     /// Estimate the selectivity very broadly
33 |     virtual double estimateSelectivity() const = 0;
34 |     virtual double estimateCost() const = 0;
35 |     /// Destructor
36 |     virtual ~RestrictionLogic() noexcept = default;
37 | 
38 |     /// The null restriction
39 |     static const RestrictionLogic* notNullRestriction;
40 | 
41 |     /// Setup a restriction logic given restriction
42 |     static UniquePtr<RestrictionLogic> setupRestriction(const Restriction& restriction);
43 | 
44 |     virtual std::string_view name() const = 0;
45 | };
46 | //---------------------------------------------------------------------------
47 | }
48 | //---------------------------------------------------------------------------
49 | 


--------------------------------------------------------------------------------
/include/attribute.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <array>
 4 | #include <string>
 5 | 
 6 | #include <fmt/core.h>
 7 | 
 8 | enum class DataType {
 9 |     INT32,       // 4-byte integer
10 |     INT64,       // 8-byte integer
11 |     FP64,        // 8-byte floating point
12 |     VARCHAR,     // string of arbitary length
13 | };
14 | 
15 | template <>
16 | struct fmt::formatter<DataType> {
17 |     template <class ParseContext>
18 |     constexpr auto parse(ParseContext& ctx) {
19 |         return ctx.begin();
20 |     }
21 | 
22 |     template <class FormatContext>
23 |     auto format(DataType value, FormatContext& ctx) const {
24 |         static std::array<std::string_view, 4> names{
25 |             "INT32",
26 |             "INT64",
27 |             "FP64",
28 |             "VARCHAR",
29 |         };
30 |         return fmt::format_to(ctx.out(), "{}", names[int(value)]);
31 |     }
32 | };
33 | 
34 | #define DISPATCH_DATA_TYPE(type, TYPE, ...) \
35 |     do {                                    \
36 |         switch (type) {                     \
37 |         case DataType::INT32: {             \
38 |             using TYPE = int32_t;           \
39 |             __VA_ARGS__                     \
40 |             break;                          \
41 |         }                                   \
42 |         case DataType::INT64: {             \
43 |             using TYPE = int64_t;           \
44 |             __VA_ARGS__                     \
45 |             break;                          \
46 |         }                                   \
47 |         case DataType::FP64: {              \
48 |             using TYPE = double;            \
49 |             __VA_ARGS__                     \
50 |             break;                          \
51 |         }                                   \
52 |         case DataType::VARCHAR: {           \
53 |             using TYPE = std::string;       \
54 |             __VA_ARGS__                     \
55 |             break;                          \
56 |         }                                   \
57 |         }                                   \
58 |     } while (0)
59 | 
60 | struct Attribute {
61 |     DataType    type;
62 |     std::string name;
63 | };


--------------------------------------------------------------------------------
/engine/test/unit_tests3.cpp:
--------------------------------------------------------------------------------
 1 | #include "infra/SmallVec.hpp"
 2 | #include "infra/helper/BitOps.hpp"
 3 | #include "plan.h"
 4 | #include "storage/BitLogic.hpp"
 5 | #include <algorithm>
 6 | #include <array>
 7 | #include <catch2/catch_test_macros.hpp>
 8 | 
 9 | using namespace std;
10 | using namespace engine;
11 | 
12 | void testBits(uint64_t bits) {
13 |     array<uint8_t, 71> offsets{};
14 |     auto sz = BitLogic::bitsToOffsets(offsets.data(), bits) - offsets.data();
15 |     REQUIRE(sz == engine::popcount(bits));
16 |     uint64_t cur = bits;
17 |     for (int i = 0; i < sz; i++) {
18 |         REQUIRE(offsets[i] == countr_zero(cur));
19 |         cur &= cur - 1;
20 |     }
21 |     REQUIRE(cur == 0);
22 | }
23 | 
24 | TEST_CASE("BitLogic") {
25 |     array<uint8_t, 71> bits{};
26 | 
27 |     testBits(5);
28 |     testBits(0x8000000000000000);
29 |     testBits(0x8000000000000001);
30 |     testBits(0x19c0003040502);
31 | }
32 | 
33 | TEST_CASE("SmallVec") {
34 |     SmallVec<uint8_t> smallVec{};
35 |     smallVec.push_back(3);
36 |     auto x = smallVec.emplace_back(2);
37 |     REQUIRE(x == 2);
38 |     REQUIRE(smallVec[0] == 3);
39 |     REQUIRE(smallVec.size() == 2);
40 | 
41 |     for (auto i = 0; i < 10; i++) {
42 |         smallVec.push_back(i);
43 |         REQUIRE(smallVec[i + 2] == i);
44 |         REQUIRE(smallVec.size() == 3 + i);
45 |     }
46 | 
47 |     for (auto i = 0; i < 12; i++)
48 |         smallVec.pop_back();
49 | 
50 |     REQUIRE(smallVec.size() == 0);
51 | 
52 |     std::vector<uint8_t> stdVec;
53 |     for (auto i : {7, 3, 1, 6, 2, 9, 4, 8, 2, 3, 10, 6}) {
54 |         smallVec.push_back(i);
55 |         stdVec.push_back(i);
56 |         REQUIRE(smallVec.back() == i);
57 |     }
58 | 
59 |     // Can we sort?
60 |     std::sort(smallVec.begin(), smallVec.end());
61 |     REQUIRE(std::is_sorted(smallVec.begin(), smallVec.end()));
62 | 
63 |     // Sort the standard vector and compare
64 |     std::sort(stdVec.begin(), stdVec.end());
65 |     for (auto i = 0; i < stdVec.size(); i++)
66 |         REQUIRE(smallVec[i] == stdVec[i]);
67 | 
68 |     stdVec.erase(stdVec.begin(), stdVec.end());
69 | 
70 |     smallVec.reserve(512);
71 |     REQUIRE(smallVec.capacity() >= 512);
72 | 
73 |     smallVec.erase(smallVec.begin(), smallVec.end());
74 |     REQUIRE(smallVec.empty());
75 | }
76 | 


--------------------------------------------------------------------------------
/engine/infra/helper/Span.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include <array>
 4 | #include <cstddef>
 5 | #include <cassert>
 6 | #include <type_traits>
 7 | #include <iterator>
 8 | #include <cstdint>
 9 | //---------------------------------------------------------------------------
10 | namespace engine {
11 | //---------------------------------------------------------------------------
12 | template <typename T>
13 | class span {
14 |     public:
15 |     using value_type = T;
16 |     using iterator = T*;
17 |     using const_iterator = const T*;
18 | 
19 |     private:
20 |     T* data_;
21 |     size_t size_;
22 | 
23 |     public:
24 |     constexpr span(T* data, size_t size) noexcept : data_(data), size_(size) {}
25 |     constexpr span(T* begin, T* end) noexcept : data_(begin), size_(end - begin) {}
26 |     template <typename Container>
27 |     constexpr span(Container& arr) noexcept : data_(arr.data()), size_(arr.size()) {}
28 |     template <typename Container>
29 |     constexpr span(const Container& arr) noexcept : data_(arr.data()), size_(arr.size()) {}
30 |     constexpr span() noexcept : data_(nullptr), size_(0) {}
31 | 
32 |     constexpr size_t size() const noexcept { return size_; }
33 |     constexpr bool empty() const noexcept { return size_ == 0; }
34 |     constexpr T& operator[](size_t i) const noexcept {
35 |         assert(i < size_);
36 |         return data_[i];
37 |     }
38 |     constexpr T& front() const noexcept {
39 |         assert(size_ > 0);
40 |         return data_[0];
41 |     }
42 |     constexpr T& back() const noexcept {
43 |         assert(size_ > 0);
44 |         return data_[size_ - 1];
45 |     }
46 |     constexpr T* data() const noexcept { return data_; }
47 | 
48 |     constexpr auto begin() const noexcept { return data_; }
49 |     constexpr auto end() const noexcept { return data_ + size_; }
50 | };
51 | //---------------------------------------------------------------------------
52 | template <class Container>
53 | span(Container&) -> span<typename Container::value_type>;
54 | //---------------------------------------------------------------------------
55 | template <class Container>
56 | span(const Container&) -> span<const typename Container::value_type>;
57 | //---------------------------------------------------------------------------
58 | } // namespace engine
59 | //---------------------------------------------------------------------------
60 | 


--------------------------------------------------------------------------------
/engine/op/CollectorTarget.cpp:
--------------------------------------------------------------------------------
 1 | #include "op/CollectorTarget.hpp"
 2 | #include "pipeline/PipelineGen.hpp"
 3 | //---------------------------------------------------------------------------
 4 | namespace engine {
 5 | //---------------------------------------------------------------------------
 6 | CollectorTarget::LocalState::LocalState(CollectorTarget& target) {
 7 |     next = target.localStates.exchange(this);
 8 | }
 9 | //---------------------------------------------------------------------------
10 | void CollectorTarget::finishConsume() {
11 |     for (auto* ls = localStates.load(); ls; ls = ls->next)
12 |         values.insert(values.end(), ls->values.begin(), ls->values.end());
13 | }
14 | //---------------------------------------------------------------------------
15 | template <size_t... Is>
16 | static auto collectCallback(CollectorTarget& collector, engine::TableScan& op, SmallVec<unsigned, 8>& attrOffsets, std::index_sequence<Is...>) {
17 |     PipelineFunctions::runPipeline<CollectorTarget, TableScan, 0, std::index_sequence<>, std::index_sequence<Is - Is...>>(collector, op, {}, {}, {attrOffsets.data(), attrOffsets.size()});
18 | }
19 | //---------------------------------------------------------------------------
20 | void CollectorTarget::collect(engine::TableScan& op) {
21 |     size_t cols = op.getProducedColumns();
22 |     SmallVec<unsigned, 8> attrOffsets;
23 |     for (size_t i = 0; i < cols; i++)
24 |         attrOffsets.push_back(i);
25 | 
26 |     switch (cols) {
27 |         case 1: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<1>{});
28 |         case 2: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<2>{});
29 |         case 3: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<3>{});
30 |         case 4: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<4>{});
31 |         case 5: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<5>{});
32 |         case 6: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<6>{});
33 |         case 7: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<7>{});
34 |         case 8: return collectCallback(*this, op, attrOffsets, std::make_index_sequence<8>{});
35 |         default:
36 |             throw std::runtime_error("Unsupported number of columns");
37 |     }
38 | }
39 | //---------------------------------------------------------------------------
40 | }
41 | //---------------------------------------------------------------------------
42 | 


--------------------------------------------------------------------------------
/job/README:
--------------------------------------------------------------------------------
 1 | This package contains the Join Order Benchmark (JOB) queries from:
 2 | 
 3 | "How Good Are Query Optimizers, Really?"
 4 | by Viktor Leis, Andrey Gubichev, Atans Mirchev, Peter Boncz, Alfons Kemper, Thomas Neumann
 5 | PVLDB Volume 9, No. 3, 2015
 6 | 
 7 | IMDB Data Set
 8 | -------------
 9 | 
10 | The CSV files used in the paper, which are from May 2013, can be found
11 | at http://homepages.cwi.nl/~boncz/job/imdb.tgz
12 | 
13 | The license and links to the current version IMDB data set can be
14 | found at http://www.imdb.com/interfaces
15 | Step-by-step instructions:
16 | 1. download *gz files (unpacking not necessary)
17 |   wget ftp://ftp.fu-berlin.de/pub/misc/movies/database/*gz
18 | 2. download and unpack imdbpy and the imdbpy2sql.py script
19 |   wget https://bitbucket.org/alberanid/imdbpy/get/5.0.zip
20 | 3. create PostgreSQL database (e.g., name imdbload):
21 |   createdb imdbload
22 | 4. transform *gz files to relational schema (takes a while)
23 |   imdbpy2sql.py -d PATH_TO_GZ_FILES -u postgres://username:password@hostname/imdbload
24 | 
25 | Now you should have a PostgreSQL database named "imdbload" with the
26 | imdb data. Note that this database has some secondary indexes (but not
27 | on all foreign key attributes). You can export all tables to CSV:
28 | 
29 | \copy aka_name to 'PATH/aka_name.csv' csv
30 | \copy aka_title to 'PATH/aka_title.csv' csv
31 | \copy cast_info to 'PATH/cast_info.csv' csv
32 | \copy char_name to 'PATH/char_name.csv' csv
33 | \copy comp_cast_type to 'PATH/comp_cast_type.csv' csv
34 | \copy company_name to 'PATH/company_name.csv' csv
35 | \copy company_type to 'PATH/company_type.csv' csv
36 | \copy complete_cast to 'PATH/complete_cast.csv' csv
37 | \copy info_type to 'PATH/info_type.csv' csv
38 | \copy keyword to 'PATH/keyword.csv' csv
39 | \copy kind_type to 'PATH/kind_type.csv' csv
40 | \copy link_type to 'PATH/link_type.csv' csv
41 | \copy movie_companies to 'PATH/movie_companies.csv' csv
42 | \copy movie_info to 'PATH/movie_info.csv' csv
43 | \copy movie_info_idx to 'PATH/movie_info_idx.csv' csv
44 | \copy movie_keyword to 'PATH/movie_keyword.csv' csv
45 | \copy movie_link to 'PATH/movie_link.csv' csv
46 | \copy name to 'PATH/name.csv' csv
47 | \copy person_info to 'PATH/person_info.csv' csv
48 | \copy role_type to 'PATH/role_type.csv' csv
49 | \copy title to 'PATH/title.csv' csv
50 | 
51 | To import the CSV files to another database, create all tables (see
52 | schema.sql and optionally fkindexes.sql) and run the same copy as
53 | above statements but replace the keyword "to" by "from".
54 | 
55 | Questions
56 | ---------
57 | 
58 | Contact Viktor Leis (leis@in.tum.de) if you have any questions.
59 | 


--------------------------------------------------------------------------------
/engine/infra/Mmap.cpp:
--------------------------------------------------------------------------------
 1 | #include "infra/Mmap.hpp"
 2 | #include "infra/Util.hpp"
 3 | #include "infra/Scheduler.hpp"
 4 | #include <fcntl.h>
 5 | #include <sys/mman.h>
 6 | #include <unistd.h>
 7 | //---------------------------------------------------------------------------
 8 | namespace engine {
 9 | //---------------------------------------------------------------------------
10 | void Mmap::prefault(void* data, size_t size) {
11 |     char* mem = static_cast<char*>(data);
12 |     for (std::size_t i = 0; i < size; i += 4096)
13 |         mem[i] = 0;
14 | }
15 | //---------------------------------------------------------------------------
16 | Mmap Mmap::mapFile(const std::string& filename) {
17 |     Mmap result;
18 |     result.file = open(filename.c_str(), O_RDONLY);
19 |     if (result.file == -1) {
20 |         return result;
21 |     }
22 |     result.size_ = lseek(result.file, 0, SEEK_END);
23 |     if (result.size_ == -1) {
24 |         close(result.file);
25 |         result.file = -1;
26 |         return result;
27 |     }
28 |     result.data_ = static_cast<char*>(mmap(nullptr, result.size_, PROT_READ, MAP_SHARED, result.file, 0));
29 |     if (result.data_ == MAP_FAILED) {
30 |         close(result.file);
31 |         result.file = -1;
32 |         result.size_ = 0;
33 |         result.data_ = nullptr;
34 |     }
35 | #ifdef SIGMOD_LOCAL
36 |     size_t morselSize = 1ull << 21;
37 |     Scheduler::parallelMorsel(0, result.size_, morselSize, [&](size_t workerId, size_t pos) {
38 |         auto end = std::min(pos + morselSize, result.size_);
39 |         madvise(result.data_ + pos, end - pos, MADV_POPULATE_READ);
40 |     });
41 | #endif
42 |     return result;
43 | }
44 | //---------------------------------------------------------------------------
45 | Mmap Mmap::mapMemory(size_t size) {
46 |     Mmap result;
47 |     result.size_ = size;
48 | #ifdef NDEBUG
49 |     result.data_ = static_cast<char*>(mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
50 |     if (size >= 2'000'000)
51 |         madvise(result.data_, size, MADV_HUGEPAGE);
52 | #else
53 |     result.data_ = static_cast<char*>(aligned_alloc(4096, size));
54 | #endif
55 |     return result;
56 | }
57 | //---------------------------------------------------------------------------
58 | void Mmap::reset() noexcept {
59 |     if (data_) {
60 | #ifdef NDEBUG
61 |         munmap(data_, size_);
62 | #else
63 |         if (file)
64 |             munmap(data_, size_);
65 |         else
66 |             free(data_);
67 | #endif
68 |         data_ = nullptr;
69 |         size_ = 0;
70 |     }
71 |     if (file != -1) {
72 |         close(file);
73 |         file = -1;
74 |     }
75 | }
76 | //---------------------------------------------------------------------------
77 | }
78 | 


--------------------------------------------------------------------------------
/engine/infra/Scheduler.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include "infra/Util.hpp"
 4 | #include <algorithm>
 5 | #include <atomic>
 6 | #include <cassert>
 7 | #include <cstddef>
 8 | #include <utility>
 9 | //---------------------------------------------------------------------------
10 | namespace engine {
11 | class Scheduler {
12 |     static __thread size_t currentWorker;
13 | 
14 |     public:
15 |     struct Worker;
16 |     struct Impl;
17 | 
18 |     class Task;
19 |     template <typename Fun>
20 |     class TaskImpl;
21 | 
22 |     /// Setup the scheduler
23 |     static void setup();
24 |     /// Teardown the scheduler
25 |     static void teardown();
26 |     /// Start a query
27 |     static void start_query();
28 |     /// End a query
29 |     static void end_query();
30 | 
31 |     /// Run a parallel morsel task
32 |     static void parallelImpl(size_t size, FunctionRef<void(size_t, size_t)> task, bool finalizeTask = false);
33 |     /// Run a parallel morsel task
34 |     template <typename Fun>
35 |     static void parallelMorsel(size_t begin, size_t end, size_t morselSize, Fun&& task, bool finalizeTask = false) {
36 |         return parallelImpl((end - begin + morselSize - 1) / morselSize, [&task, morselSize, begin](size_t workerId, size_t i) { return task(workerId, i >= ~0ull - 1 ? i : begin + i * morselSize); }, finalizeTask);
37 |     }
38 |     /// Run a parallel for task
39 |     template <typename Fun>
40 |     static void parallelFor(size_t begin, size_t end, size_t stepSize, size_t morselSize, Fun&& task) {
41 |         assert(stepSize > 0);
42 |         assert(morselSize > 0);
43 |         assert(stepSize % morselSize == 0);
44 |         return parallelMorsel(begin, end, stepSize, [stepSize, morselSize, &task](size_t workerId, size_t i) {
45 |             for (size_t j = 0; j < stepSize; j += morselSize)
46 |                 task(workerId, i + j);
47 |         });
48 |     }
49 |     /// Run a parallel for task
50 |     template <typename Fun>
51 |     static void parallelFor(size_t begin, size_t end, size_t stepSize, Fun&& task) {
52 |         return parallelFor(begin, end, stepSize, 1, std::forward<Fun>(task));
53 |     }
54 |     /// Run a parallel for task
55 |     template <typename Fun>
56 |     static void parallelFor(size_t begin, size_t end, Fun&& task) {
57 |         return parallelFor(begin, end, 1, std::forward<Fun>(task));
58 |     }
59 | 
60 |     /// Get thread id
61 |     static size_t threadId() noexcept { return currentWorker; }
62 | 
63 |     /// Get the hardware concurrency
64 |     static size_t concurrency() noexcept;
65 |     /// Get the ratio of cores we are using
66 |     static size_t unusedRatio() noexcept;
67 | };
68 | //---------------------------------------------------------------------------
69 | }
70 | 


--------------------------------------------------------------------------------
/engine/storage/BitLogic.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include "infra/helper/BitOps.hpp"
 4 | #include <cassert>
 5 | #include <cstdint>
 6 | #include <cstddef>
 7 | #include <utility>
 8 | //---------------------------------------------------------------------------
 9 | namespace engine {
10 | //---------------------------------------------------------------------------
11 | class BitLogic {
12 |     public:
13 |     /// Convert the bits in a 64bit integer to an array of offsets (and the length of this array)
14 |     /// Target should have size at least 64+7=71 bytes
15 |     static uint8_t* bitsToOffsets(uint8_t* target, uint64_t bits) noexcept;
16 | 
17 |     /// Is a bitset dense? Are all bits packed together?
18 |     static constexpr bool isDense(uint64_t mask) {
19 |         auto lowest = mask & -mask;
20 |         auto incremented = mask + lowest;
21 |         return (incremented & (incremented - 1)) == 0;
22 |     }
23 |     /// Get the range of bits from a bitset (assuming it is dense)
24 |     static constexpr std::pair<size_t, size_t> getRange(uint64_t mask) {
25 |         assert(isDense(mask));
26 |         if (!mask)
27 |             return {0, 0};
28 |         return {engine::countr_zero(mask), 64 - engine::countl_zero(mask)};
29 |     }
30 | 
31 |     struct IndexIterator {
32 |         size_t index = 0;
33 | 
34 |         void operator++() { index++; }
35 |         bool operator!=(const IndexIterator& other) const { return index != other.index; }
36 |         size_t operator*() const { return index; }
37 |     };
38 | 
39 |     struct IndirectIterator {
40 |         uint8_t* indices = nullptr;
41 | 
42 |         void operator++() { indices++; }
43 |         bool operator!=(const IndirectIterator& other) const { return indices != other.indices; }
44 |         size_t operator*() const { return *indices; }
45 |     };
46 | 
47 | 
48 |     /// Get the range of bits from a bitset (assuming it is dense)
49 |     static std::pair<IndexIterator, IndexIterator> getDenseIterators(uint64_t mask) {
50 |         assert(isDense(mask));
51 |         if (!mask)
52 |             return {IndexIterator{0}, IndexIterator{0}};
53 |         return {IndexIterator{size_t(engine::countr_zero(mask))}, IndexIterator{size_t(64 - engine::countl_zero(mask))}};
54 |     }
55 |     /// Get the range of bits from a bitset (assuming it is dense)
56 |     static std::pair<IndirectIterator, IndirectIterator> getSparseIterators(uint8_t* target, uint64_t mask) {
57 |         if (!mask)
58 |             return {IndirectIterator{nullptr}, IndirectIterator{nullptr}};
59 |         auto* end = bitsToOffsets(target, mask);
60 |         return {IndirectIterator{target}, IndirectIterator{end}};
61 |     }
62 | };
63 | //---------------------------------------------------------------------------
64 | }
65 | //---------------------------------------------------------------------------


--------------------------------------------------------------------------------
/engine/query/QueryGraph.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include "infra/BitSet.hpp"
 4 | #include "infra/QueryMemory.hpp"
 5 | #include "infra/SmallVec.hpp"
 6 | #include <vector>
 7 | #include <map>
 8 | //---------------------------------------------------------------------------
 9 | namespace engine {
10 | //---------------------------------------------------------------------------
11 | class QueryPlan;
12 | //---------------------------------------------------------------------------
13 | struct QueryGraph {
14 |     struct Input {
15 |         /// The equivalence classes being produced
16 |         BitSet producedEq;
17 |         /// The cardinality estimation
18 |         double cardinality = 1.0;
19 |         /// The multiplicity for hash tables
20 |         double multiplicity = 1.0;
21 |         /// The key that needs to be joined with
22 |         unsigned joinKey = ~0u;
23 |     };
24 |     struct Plan {
25 |         Plan* left = nullptr;
26 |         Plan* right = nullptr;
27 |         BitSet set;
28 |         size_t pipes;
29 |         BitSet eqs;
30 |         mutable BitSet neighborhood;
31 |         double card = -1;
32 |         double bc = 0.0;
33 |         double mc = 1.0;
34 |         double cost = std::numeric_limits<double>::infinity();
35 | 
36 |         bool isLeaf() const { return !left; }
37 |     };
38 | 
39 |     /// The maximum number of pipes in a plan
40 |     static constexpr size_t maxPipelineLength = 3;
41 | 
42 |     /// Reference to the query plan
43 |     QueryPlan& qp;
44 |     /// The inputs
45 |     SmallVec<Input> inputs;
46 |     /// The plans
47 |     Vector<std::array<Plan, maxPipelineLength>> plans;
48 |     /// The number of pipes of the best plan
49 |     size_t bestPipes = 0;
50 | 
51 |     explicit QueryGraph(QueryPlan& qp, SmallVec<Input> inputs);
52 | 
53 |     size_t size() const;
54 | 
55 |     BitSet computeNeighborhood(BitSet rels, BitSet eqs) const;
56 |     BitSet neighborhood(BitSet bs) const;
57 |     bool connected(BitSet bs) const;
58 |     bool canJoin(BitSet left, BitSet right);
59 |     void consider(BitSet left, BitSet right);
60 |     static double computeCost(double card, double leftCard, [[maybe_unused]] double rightCard);
61 | 
62 |     /// Get a plan
63 |     [[gnu::always_inline]] Plan& get(BitSet bs, size_t pipes) {
64 |         assert(pipes < maxPipelineLength);
65 |         assert(bs.asU64() < plans.size());
66 |         auto& result = plans[bs.asU64()][pipes];
67 |         assert(result.set == bs);
68 |         assert(result.pipes == pipes);
69 |         return result;
70 |     }
71 | 
72 |     /// Compute cardinality
73 |     double computeCard(BitSet rels);
74 |     /// Compute cardinality
75 |     void computeCard(Plan& target, const Plan& left, const Plan& right);
76 | 
77 |     Plan* optimize();
78 | };
79 | //---------------------------------------------------------------------------
80 | }
81 | 


--------------------------------------------------------------------------------
/engine/query/DPccp.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include "infra/BitSet.hpp"
 4 | #include "infra/Util.hpp"
 5 | #include <concepts>
 6 | //---------------------------------------------------------------------------
 7 | namespace engine {
 8 | //---------------------------------------------------------------------------
 9 | template <typename T>
10 | struct QG {
11 |     static_assert(std::is_same<decltype(std::declval<const T>().neighborhood(std::declval<typename BitSet::arg_type>())), BitSet>::value);
12 |     static_assert(std::is_same<decltype(std::declval<const T>().connected(std::declval<typename BitSet::arg_type>())), bool>::value);
13 |     static_assert(std::is_convertible<decltype(std::declval<const T>().size()), unsigned>::value);
14 | };
15 | //---------------------------------------------------------------------------
16 | class DPccp {
17 |     public:
18 |     template <typename QG, typename Callback, typename = std::enable_if_t<Fun<Callback, void(BitSet::arg_type)>>>
19 |     static void enumerateCsg(const QG& qg, unsigned N, Callback&& callback) {
20 |         enumerateCsg(qg, BitSet::prefix(N), BitSet{}, callback);
21 |     }
22 | 
23 |     template <typename QG, typename Callback, typename = std::enable_if_t<Fun<Callback, void(BitSet::arg_type)>>>
24 |     static void enumerateCsg(const QG& qg, BitSet::arg_type s, BitSet::arg_type x, Callback&& callback) {
25 |         for (unsigned i : s.reversed())
26 |             enumerateCsgRec(qg, {i}, x + (BitSet::prefix(i) & s), callback);
27 |     }
28 | 
29 |     template <typename QG, typename Callback, typename = std::enable_if_t<Fun<Callback, void(BitSet::arg_type)>>>
30 |     static void enumerateCsgRec(const QG& qg, BitSet::arg_type s, BitSet::arg_type x, Callback&& callback) {
31 |         callback(s);
32 |         auto n = qg.neighborhood(s) - x;
33 |         for (BitSet sp : n.subsets())
34 |             enumerateCsgRec(qg, s + sp, x + n, callback);
35 |     }
36 | 
37 |     template <typename QG, typename Callback, typename = std::enable_if_t<Fun<Callback, void(BitSet::arg_type)>>>
38 |     static void enumerateCmp(const QG& qg, BitSet::arg_type s, Callback&& callback) {
39 |         auto x = BitSet::prefix(s.front()) + s;
40 |         auto n = qg.neighborhood(s) - x;
41 |         enumerateCsg(qg, n, x, callback);
42 |     }
43 | 
44 |     template <typename QG, typename Callback, typename = std::enable_if_t<Fun<Callback, void(BitSet::arg_type, BitSet::arg_type)>>>
45 |     static void enumerateCsgCmp(const QG& qg,Callback && callback) {
46 |         enumerateCsg(qg, qg.size(), [&](BitSet::arg_type s) {
47 |             if (!qg.connected(s))
48 |                 return;
49 |             enumerateCmp(qg, s, [&](BitSet::arg_type c) {
50 |                 if (!qg.connected(c))
51 |                     return;
52 |                 callback(s, c);
53 |             });
54 |         });
55 |     }
56 | };
57 | //---------------------------------------------------------------------------
58 | }


--------------------------------------------------------------------------------
/engine/genpipelines.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | 
 3 | #&runPipeline<BasicScanner, 2, std::index_sequence<0, 1>, std::index_sequence<0, 0, 1>>;
 4 | def comma(seq):
 5 |     return ','.join(map(str, seq))
 6 | 
 7 | def jj(seq):
 8 |     return ''.join(map(str, seq))
 9 | 
10 | def subsets_with_replacement(p, r):
11 |     for i in range(0, r + 1):
12 |         for v in itertools.combinations_with_replacement(p, i):
13 |             yield v
14 | 
15 | targets = [
16 |     "engine::HashtableBuild",
17 |     "engine::TableTarget"
18 | ]
19 | scans = [
20 |     "engine::TableScan"
21 | ]
22 | 
23 | funcs = []
24 | maxn = 3
25 | maxattrs = 6
26 | 
27 | for i in range(0, maxn):
28 |     for keys in itertools.product(range(maxn), repeat = i):
29 |         print(i, keys)
30 |         isValid = True
31 |         for ind, v in enumerate(keys):
32 |             if v > ind:
33 |                 isValid = False
34 |                 break
35 |         if not isValid:
36 |             continue
37 | 
38 |         for target in ["engine::TableTarget"]:
39 |             for scan in scans:
40 |                 attrs = []
41 |                 restAttrs = []
42 |                 funcs.append((f'{target},{scan},{i},({jj(keys)}),({jj(attrs)})',f'PipelineFunctions::runPipeline<{target}, {scan}, {i}, std::index_sequence<{comma(keys)}>, std::index_sequence<{comma(attrs)}>>'))
43 |         # Let the first attribute come from any table as it will be interpreted as the hash key by hash builds
44 |         for firstAttr in range(i + 1):
45 |             for restAttrs in subsets_with_replacement(range(i + 1), maxattrs):
46 |                 for target in targets:
47 |                     for scan in scans:
48 |                         attrs = [firstAttr] + list(restAttrs)
49 |                         funcs.append((f'{target},{scan},{i},({jj(keys)}),({jj(attrs)})',f'PipelineFunctions::runPipeline<{target}, {scan}, {i}, std::index_sequence<{comma(keys)}>, std::index_sequence<{comma(attrs)}>>'))
50 | 
51 | 
52 | funcs.sort()
53 | 
54 | def inst(v):
55 |     return f"template void {v}(TargetBase&, ScanBase&, engine::span<const DefaultProbeParameter>, engine::span<const unsigned>, engine::span<const unsigned>)"
56 | 
57 | step = (len(funcs) + 15) // 16
58 | for ind, i in enumerate(range(0, len(funcs), step)):
59 |     with open(f"pipeline/PipelineGen{ind}.cpp", "w") as f:
60 |         f.write('#include "pipeline/PipelineGen.hpp"\n')
61 |         f.write('namespace engine {\n')
62 |         for n, v in funcs[i:i + step]:
63 |             f.write(inst(v) + ";\n")
64 |         f.write("}\n")
65 | 
66 | with open("pipeline/PipelineGen.cpp", "w") as f:
67 |     f.write('#include "pipeline/PipelineFunction.hpp"\n')
68 |     f.write('namespace engine {\n')
69 |     f.write(f'size_t PipelineFunctions::numFunctions = {len(funcs)};\n')
70 |     f.write('std::pair<std::string_view, PipelineFunction> PipelineFunctions::functions[] = {\n')
71 |     for n, v in funcs:
72 |         f.write(f'{{std::string_view{{"{n}"}},&{v}}},\n')
73 |     f.write('};\n')
74 |     f.write('}\n')
75 | 
76 | print("Number of generated functions:", len(funcs))


--------------------------------------------------------------------------------
/engine/infra/QueryMemory.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <cassert>
 3 | #include <cstdlib>
 4 | #include <limits>
 5 | #include <vector>
 6 | #include <unordered_set>
 7 | #include <unordered_map>
 8 | #include <memory>
 9 | //---------------------------------------------------------------------------
10 | namespace engine::querymemory {
11 | //---------------------------------------------------------------------------
12 | /// Setup the query memory
13 | void setup();
14 | //---------------------------------------------------------------------------
15 | /// Prefault the page memory
16 | bool prefault();
17 | //---------------------------------------------------------------------------
18 | /// End a query
19 | void end_query();
20 | //---------------------------------------------------------------------------
21 | /// Allocate a page
22 | void* allocate(size_t bytes);
23 | //---------------------------------------------------------------------------
24 | /// std::allocator like wrapper for querymemory::allocator
25 | template <typename T>
26 | struct Allocator {
27 |     using value_type = T;
28 |     using pointer = T*;
29 |     using const_pointer = const T*;
30 |     using reference = T&;
31 |     using const_reference = const T&;
32 |     using size_type = std::size_t;
33 |     using difference_type = std::ptrdiff_t;
34 | 
35 |     Allocator() noexcept = default;
36 |     template <typename U>
37 |     Allocator(const Allocator<U>&) noexcept {}
38 | 
39 |     T* allocate(std::size_t n) {
40 |         return static_cast<T*>(::engine::querymemory::allocate(n * sizeof(T)));
41 |     }
42 | 
43 |     void deallocate(T* p, std::size_t) noexcept { }
44 | 
45 |     template <typename U, typename... Args>
46 |     void construct(U* p, Args&&... args) {
47 |         ::new (static_cast<void*>(p)) U(std::forward<Args>(args)...);
48 |     }
49 | 
50 |     template <typename U>
51 |     void destroy(U* p) {
52 |         p->~U();
53 |     }
54 | 
55 |     template <typename U>
56 |     struct rebind {
57 |         using other = Allocator<U>;
58 |     };
59 | };
60 | //---------------------------------------------------------------------------
61 | struct Deleter {
62 |     template <typename T>
63 |     void operator()(T* p) const {
64 |         if (p) {
65 |             p->~T();
66 |         }
67 |     }
68 | };
69 | //---------------------------------------------------------------------------
70 | }
71 | //---------------------------------------------------------------------------
72 | namespace engine {
73 | template <typename T>
74 | using Vector = std::vector<T, querymemory::Allocator<T>>;
75 | template <typename T>
76 | using UniquePtr = std::unique_ptr<T, querymemory::Deleter>;
77 | template <typename T, typename... Args>
78 | UniquePtr<T> makeUnique(Args&&... args) {
79 |     return UniquePtr<T>(new (querymemory::allocate(sizeof(T))) T(std::forward<Args>(args)...));
80 | }
81 | template <typename T, typename Hash = std::hash<T>, typename Equal = std::equal_to<T>>
82 | using UnorderedSet = std::unordered_set<T, Hash, Equal, querymemory::Allocator<T>>;
83 | template <typename K, typename V, typename Hash = std::hash<K>, typename Equal = std::equal_to<K>>
84 | using UnorderedMap = std::unordered_map<K, V, Hash, Equal, querymemory::Allocator<std::pair<const K, V>>>;
85 | }
86 | //---------------------------------------------------------------------------
87 | 


--------------------------------------------------------------------------------
/engine/infra/helper/BitOps.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <bit>
 3 | #include <limits>
 4 | //---------------------------------------------------------------------------
 5 | namespace engine {
 6 | //---------------------------------------------------------------------------
 7 | constexpr auto Nd_ull = std::numeric_limits<unsigned long long>::digits;
 8 | constexpr auto Nd_ul = std::numeric_limits<unsigned long>::digits;
 9 | constexpr auto Nd_u =  std::numeric_limits<unsigned>::digits;
10 | //---------------------------------------------------------------------------
11 | [[noreturn]] inline void unreachable()
12 | {
13 |     // Uses compiler specific extensions if possible.
14 |     // Even if no extension is used, undefined behavior is still raised by
15 |     // an empty function body and the noreturn attribute.
16 | #if defined(_MSC_VER) && !defined(__clang__) // MSVC
17 |     __assume(false);
18 | #else // GCC, Clang
19 |     __builtin_unreachable();
20 | #endif
21 | }
22 | //---------------------------------------------------------------------------
23 | template <typename T>
24 | constexpr int popcount(T x) noexcept {
25 |     constexpr auto Nd = std::numeric_limits<T>::digits;
26 | 
27 |     if constexpr (Nd <= Nd_u)
28 |         return __builtin_popcount(x);
29 |     else if constexpr (Nd <= Nd_ul)
30 |         return __builtin_popcountl(x);
31 |     else if constexpr (Nd <= Nd_ull)
32 |         return __builtin_popcountll(x);
33 |     else
34 |         unreachable();
35 | }
36 | //---------------------------------------------------------------------------
37 | template <typename T>
38 | constexpr int countr_zero(T x) noexcept {
39 |     constexpr auto Nd = std::numeric_limits<T>::digits;
40 | 
41 |     if (x == 0)
42 |         return Nd;
43 | 
44 |     if constexpr (Nd <= Nd_u)
45 |         return __builtin_ctz(x);
46 |     else if constexpr (Nd <= Nd_ul)
47 |         return __builtin_ctzl(x);
48 |     else if constexpr (Nd <= Nd_ull)
49 |         return __builtin_ctzll(x);
50 |     else
51 |         unreachable();
52 | }
53 | //---------------------------------------------------------------------------
54 | template <typename T>
55 | constexpr int countl_zero(T x) noexcept {
56 |     constexpr auto Nd = std::numeric_limits<T>::digits;
57 | 
58 |     if (x == 0)
59 |         return Nd;
60 | 
61 |     if constexpr (Nd <= Nd_u) {
62 |         constexpr int diff = Nd_u - Nd;
63 |         return __builtin_clz(x) - diff;
64 |     } else if constexpr (Nd <= Nd_ul) {
65 |         constexpr int diff = Nd_ul - Nd;
66 |         return __builtin_clzl(x) - diff;
67 |     } else if constexpr (Nd <= Nd_ull) {
68 |         constexpr int diff = Nd_ull - Nd;
69 |         return __builtin_clzll(x) - diff;
70 |     } else {
71 |         unreachable();
72 |     }
73 | }
74 | //---------------------------------------------------------------------------
75 | template <typename T>
76 | constexpr int has_single_bit(T x) noexcept {
77 |     return popcount(x) == 1;
78 | }
79 | //---------------------------------------------------------------------------
80 | template <typename T>
81 | constexpr int bit_width(T x) noexcept {
82 |     constexpr auto Nd = std::numeric_limits<T>::digits;
83 |     return Nd - countl_zero(x);
84 | }
85 | //---------------------------------------------------------------------------
86 | template <typename T>
87 | constexpr int countr_one(T x) noexcept {
88 |     return countr_zero((T)~x);
89 | }
90 | //---------------------------------------------------------------------------
91 | }
92 | //---------------------------------------------------------------------------
93 | 


--------------------------------------------------------------------------------
/engine/infra/Util.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include <concepts>
 4 | #include <cstddef>
 5 | #include <type_traits>
 6 | #include <utility>
 7 | //---------------------------------------------------------------------------
 8 | namespace engine {
 9 | //---------------------------------------------------------------------------
10 | template <typename T>
11 | [[gnu::always_inline]] inline void unalignedStore(void* ptr, const T& value) noexcept {
12 |     static_assert(std::is_trivially_copyable_v<T>);
13 |     __builtin_memcpy(ptr, &value, sizeof(T));
14 | }
15 | //---------------------------------------------------------------------------
16 | template <typename T>
17 | [[gnu::always_inline]] inline T unalignedLoad(const void* ptr) noexcept {
18 |     static_assert(std::is_trivially_copyable_v<T>);
19 |     T value;
20 |     __builtin_memcpy(&value, ptr, sizeof(T));
21 |     return value;
22 | }
23 | //---------------------------------------------------------------------------
24 | namespace detail {
25 | template <typename T, typename Def>
26 | struct IsFunT;
27 | 
28 | template <typename T, typename Dest>
29 | struct IsFitFor {
30 |     static constexpr bool value = std::is_void_v<Dest> || std::is_convertible_v<T, Dest>;
31 | };
32 | 
33 | template <typename T, typename R, typename... Args>
34 | struct IsFunT<T, R(Args...)> {
35 |     private:
36 |     template <typename U>
37 |     static auto test(int) -> decltype(std::declval<U>()(std::declval<Args>()...), std::true_type{});
38 | 
39 |     template <typename>
40 |     static std::false_type test(...);
41 | 
42 |     public:
43 |     static constexpr bool value = decltype(test<T>(0))::value && IsFitFor<decltype(std::declval<T>()(std::declval<Args>()...)), R>::value;
44 | };
45 | }
46 | //---------------------------------------------------------------------------
47 | template <typename T, typename Def>
48 | constexpr bool Fun = detail::IsFunT<T, Def>::value;
49 | //---------------------------------------------------------------------------
50 | template <typename T>
51 | class FunctionRef;
52 | template <typename R, typename... Args>
53 | class FunctionRef<R(Args...)> {
54 |     private:
55 |     using Func = R (*)(const void*, Args...);
56 |     Func func = nullptr;
57 |     const void* obj = nullptr;
58 | 
59 |     public:
60 |     constexpr FunctionRef() noexcept = default;
61 |     template <typename F, typename = std::enable_if_t<Fun<F, R(Args...)>>>
62 |     constexpr FunctionRef(const F& f) {
63 |         obj = &f;
64 |         func = [](const void* o, Args... args) -> R {
65 |             return (*static_cast<const F*>(o))(std::forward<Args>(args)...);
66 |         };
67 |     }
68 |     constexpr FunctionRef(R (*f)(Args...)) {
69 |         obj = f;
70 |         func = [](const void* o, Args... args) -> R {
71 |             return static_cast<decltype(f)>(o)(std::forward<Args>(args)...);
72 |         };
73 |     }
74 |     R operator()(Args... args) const { return func(obj, std::forward<Args>(args)...); }
75 |     explicit operator bool() const noexcept { return func != nullptr; }
76 | };
77 | //---------------------------------------------------------------------------
78 | // cacheline sizes for x64, ARM, and Power8
79 | #if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__)
80 | static constexpr size_t hardwareCachelineSize = 64;
81 | #elif defined(__PPC64__)
82 | static constexpr size_t hardwareCachelineSize = 128;
83 | #else
84 | static constexpr size_t hardwareCachelineSize = 64;
85 | #endif
86 | //---------------------------------------------------------------------------
87 | }
88 | 


--------------------------------------------------------------------------------
/engine/query/QueryPlan.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //---------------------------------------------------------------------------
 3 | #include "infra/BitSet.hpp"
 4 | #include "infra/QueryMemory.hpp"
 5 | #include "infra/SmallVec.hpp"
 6 | #include "query/DataSource.hpp"
 7 | #include "query/Restriction.hpp"
 8 | #include "query/RuntimeValue.hpp"
 9 | #include <variant>
10 | #include <vector>
11 | #include <attribute.h>
12 | #include <plan.h>
13 | //---------------------------------------------------------------------------
14 | namespace engine {
15 | //---------------------------------------------------------------------------
16 | class TableTarget;
17 | class Hashtable;
18 | class HashtableBuild;
19 | class TableScan;
20 | class RestrictionLogic;
21 | class QueryGraph;
22 | //---------------------------------------------------------------------------
23 | class QueryPlan {
24 |     public:
25 |     /// An attribute corresponding to a physical column
26 |     struct Attribute;
27 |     /// An input. It is either a base table or a hash table built on an attribute
28 |     struct Input;
29 | 
30 |     private:
31 |     /// The query graph for optimizing joins
32 |     friend class QueryGraph;
33 |     /// A pipeline descriptor
34 |     struct PlanPipeline;
35 |     struct CheapestPipelineFinder;
36 |     /// The input data
37 |     DataSource* db;
38 |     /// All attributes provided by table scans in the query plan
39 |     Vector<Attribute> attributes;
40 |     /// The output equivalence classes
41 |     SmallVec<unsigned> outputEqs;
42 |     /// The inputs
43 |     SmallVec<UniquePtr<Input>> inputs;
44 |     /// The used inputs that we have to keep alive
45 |     SmallVec<UniquePtr<Input>> graveyard;
46 |     /// Sets of attributes within each equivalence class
47 |     SmallVec<BitSet> equivalenceSets;
48 |     /// Equivalence classes for which we have a constant value
49 |     UnorderedMap<unsigned, uint64_t> eqConstants;
50 |     /// Equivalence classes for which we have a restriction
51 |     UnorderedMap<unsigned, const RestrictionLogic*> eqRestrictions;
52 |     /// The result
53 |     ColumnarTable finalResult;
54 | 
55 |     /// Estimate the cardinality of a table and the selectivities of its restrictions
56 |     void estimateCardinality(Input& input);
57 |     /// Build a table scan for an input
58 |     TableScan buildScan(Input& input, BitSet requiredEqs, double mult);
59 |     /// Compute the required equivalence classes outside of a set of relations
60 |     BitSet computeRequiredEq(BitSet relations);
61 |     /// Eliminate singletons
62 |     void eliminateSingletons();
63 |     /// Compute samples
64 |     void computeSamples();
65 |     /// Run a pipeline
66 |     bool runPipeline(const PlanPipeline& pipeline, double cardinalityEstimate);
67 |     /// Print a query plan
68 |     void printPlan(Input& root) const;
69 | 
70 |     public:
71 |     /// Constructor
72 |     explicit QueryPlan(DataSource& db);
73 |     /// Destructor
74 |     ~QueryPlan() noexcept;
75 |     /// Move constructor
76 |     QueryPlan(QueryPlan&&) noexcept;
77 |     /// Move assignment
78 |     QueryPlan& operator=(QueryPlan&&) noexcept;
79 | 
80 |     /// Add an input
81 |     void addInput(DataSource::Table& table, BitSet attrs);
82 |     /// Add an attribute
83 |     void addAttribute(unsigned relation, unsigned column, DataType dataType);
84 |     /// Prepare query plan after all inputs and attributes have been added
85 |     void prepare(SmallVec<BitSet> equivalenceSets);
86 |     /// Set the output attribtues
87 |     void setOutput(engine::span<const unsigned> attrs);
88 | 
89 |     /// Run the query
90 |     ColumnarTable run();
91 | };
92 | //---------------------------------------------------------------------------
93 | }
94 | 


--------------------------------------------------------------------------------
/include/table.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <filesystem>
 4 | #include <fmt/core.h>
 5 | #include <range/v3/all.hpp>
 6 | 
 7 | #include <attribute.h>
 8 | #include <plan.h>
 9 | #include <statement.h>
10 | 
11 | struct Table {
12 | public:
13 |     Table() = default;
14 | 
15 |     Table(std::vector<std::vector<Data>> data, std::vector<DataType> types)
16 |     : types_(types)
17 |     , data_(data) {}
18 | 
19 |     static ColumnarTable from_csv(const std::vector<Attribute>& attributes,
20 |         const std::filesystem::path&                            path,
21 |         Statement*                                              filter,
22 |         bool                                                    header = false);
23 | 
24 |     static Table from_columnar(const ColumnarTable& input);
25 | 
26 |     ColumnarTable to_columnar() const;
27 | 
28 |     const std::vector<std::vector<Data>>& table() const { return data_; }
29 | 
30 |     std::vector<std::vector<Data>>& table() { return data_; }
31 | 
32 |     const std::vector<DataType>& types() const { return types_; }
33 | 
34 |     size_t number_rows() const { return this->data_.size(); }
35 | 
36 |     size_t number_cols() const { return this->types_.size(); }
37 | 
38 |     static void print(const std::vector<std::vector<Data>>& data) {
39 |         namespace views = ranges::views;
40 | 
41 |         auto escape_string = [](const std::string& s) {
42 |             std::string escaped;
43 |             for (char c: s) {
44 |                 switch (c) {
45 |                 case '"':  escaped += "\\\""; break;
46 |                 case '\\': escaped += "\\\\"; break;
47 |                 case '\n': escaped += "\\n"; break;
48 |                 case '\r': escaped += "\\r"; break;
49 |                 case '\t': escaped += "\\t"; break;
50 |                 default:   escaped += c; break;
51 |                 }
52 |             }
53 |             return escaped;
54 |         };
55 | 
56 |         for (auto& record: data) {
57 |             auto line = record
58 |                       | views::transform([&escape_string](const Data& field) -> std::string {
59 |                             return std::visit(
60 |                                 [&escape_string](const auto& arg) {
61 |                                     using T = std::decay_t<decltype(arg)>;
62 |                                     using namespace std::string_literals;
63 |                                     if constexpr (std::is_same_v<T, std::monostate>) {
64 |                                         return "NULL"s;
65 |                                     } else if constexpr (std::is_same_v<T, int32_t>
66 |                                                          || std::is_same_v<T, int64_t>
67 |                                                          || std::is_same_v<T, double>) {
68 |                                         return fmt::format("{}", arg);
69 |                                     } else if constexpr (std::is_same_v<T, std::string>) {
70 |                                         return fmt::format("\"{}\"", escape_string(arg));
71 |                                         // return fmt::format("{}", arg);
72 |                                     }
73 |                                 },
74 |                                 field);
75 |                         })
76 |                       | views::join('|') | ranges::to<std::string>();
77 |             fmt::println("{}", line);
78 |         }
79 |     }
80 | 
81 | private:
82 |     std::vector<DataType>          types_;
83 |     std::vector<std::vector<Data>> data_;
84 | 
85 |     void set_attributes(const std::vector<Attribute>& attributes) {
86 |         this->types_.clear();
87 |         for (auto& attr: attributes) {
88 |             this->types_.push_back(attr.type);
89 |         }
90 |     }
91 | };
92 | 


--------------------------------------------------------------------------------
/engine/tools/DuckDB.cpp:
--------------------------------------------------------------------------------
 1 | #include "tools/DuckDB.hpp"
 2 | #ifndef NO_DUCK
 3 | #include <duckdb.hpp>
 4 | #endif
 5 | //---------------------------------------------------------------------------
 6 | namespace engine {
 7 | //---------------------------------------------------------------------------
 8 | struct DuckDB::Impl {
 9 | #ifndef NO_DUCK
10 |     duckdb::DBConfig config;
11 |     duckdb::DuckDB db;
12 |     duckdb::Connection conn;
13 | 
14 |     Impl() : config(true), db("imdb.db", &config), conn(db) {
15 |         conn.Query("SET memory_limit = '20GB';");
16 |         conn.Query("SET temp_directory = '';");
17 |     }
18 | #endif
19 | };
20 | //---------------------------------------------------------------------------
21 | #ifndef NO_DUCK
22 | static DataType mapType(const duckdb::LogicalType& lhs) {
23 |     using namespace duckdb;
24 |     switch (lhs.id()) {
25 |         case LogicalTypeId::INTEGER: return DataType::INT32;
26 |         case LogicalTypeId::BIGINT:  return DataType::INT64;
27 |         case LogicalTypeId::DOUBLE:  return DataType::FP64;
28 |         case LogicalTypeId::VARCHAR: return DataType::VARCHAR;
29 |         default:
30 |             throw std::runtime_error("in DuckDB is not supported");
31 |     }
32 | }
33 | #endif
34 | //---------------------------------------------------------------------------
35 | DuckDB::DuckDB() : impl(std::make_unique<Impl>()) {}
36 | DuckDB::~DuckDB() noexcept = default;
37 | //---------------------------------------------------------------------------
38 | ColumnarTable DuckDB::execute(std::string query) {
39 | #ifndef NO_DUCK
40 |     auto results = impl->conn.SendQuery(query);
41 |     auto& duckdb_results = *results;
42 |     auto num_cols = duckdb_results.ColumnCount();
43 | 
44 |     std::vector<DataType> cols;
45 | 
46 |     for (size_t i = 0; i < num_cols; ++i) {
47 |         cols.push_back(mapType(duckdb_results.types[i]));
48 |     }
49 | 
50 |     std::vector<std::vector<Data>> duckdb_table;
51 |     size_t rowCount = 0;
52 |     for (auto& row : *results) {
53 |         if (rowCount > 50'000'000ull) {
54 |             throw std::runtime_error("Too many rows in result");
55 |         }
56 |         rowCount++;
57 |         std::vector<Data> record;
58 |         for (size_t col_idx = 0; col_idx < num_cols; col_idx++) {
59 |             auto val = row.iterator.chunk->GetValue(col_idx, row.row);
60 |             if (val.IsNull()) {
61 |                 record.emplace_back(std::monostate{});
62 |             } else {
63 |                 switch (cols[col_idx]) {
64 |                     case DataType::INT32: {
65 |                         record.emplace_back(duckdb::IntegerValue::Get(val));
66 |                         break;
67 |                     }
68 |                     case DataType::INT64: {
69 |                         record.emplace_back(duckdb::BigIntValue::Get(val));
70 |                         break;
71 |                     }
72 |                     case DataType::FP64: {
73 |                         record.emplace_back(duckdb::FloatValue::Get(val));
74 |                         break;
75 |                     }
76 |                     case DataType::VARCHAR: {
77 |                         record.emplace_back(duckdb::StringValue::Get(val));
78 |                         break;
79 |                     }
80 |                 }
81 |             }
82 |         }
83 |         duckdb_table.emplace_back(std::move(record));
84 |     }
85 |     sort(duckdb_table.begin(), duckdb_table.end());
86 |     Table tbl(std::move(duckdb_table), std::move(cols));
87 |     return tbl.to_columnar();
88 | #else
89 |     throw std::runtime_error("Built with NO_DUCK but trying to run a DuckDB query!");
90 | #endif
91 | }
92 | //---------------------------------------------------------------------------
93 | }
94 | //---------------------------------------------------------------------------


--------------------------------------------------------------------------------
/include/common.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <filesystem>
  4 | #include <numeric>
  5 | #include <vector>
  6 | 
  7 | #include <cstdint>
  8 | #include <cstdlib>
  9 | 
 10 | namespace detail {
 11 | inline uint32_t rotl32(uint32_t x, uint8_t bits) {
 12 |     return (x << bits) | (x >> (32 - bits));
 13 | }
 14 | 
 15 | inline void hash_combine_impl(uint32_t& h1, uint32_t k1) {
 16 |     constexpr uint32_t c1 = 0xcc9e2d51u;
 17 |     constexpr uint32_t c2 = 0x1b873593u;
 18 | 
 19 |     k1 *= c1;
 20 |     k1  = rotl32(k1, 15);
 21 |     k1 *= c2;
 22 | 
 23 |     h1 ^= k1;
 24 |     h1  = rotl32(h1, 13);
 25 |     h1  = h1 * 5u + 0xe6546b64u;
 26 | }
 27 | 
 28 | inline void hash_combine_impl(uint64_t& h, uint64_t k) {
 29 |     constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
 30 |     constexpr int      r = 47;
 31 | 
 32 |     k *= m;
 33 |     k ^= k >> r;
 34 |     k *= m;
 35 | 
 36 |     h ^= k;
 37 |     h *= m;
 38 |     h += 0xe6546b64;
 39 | }
 40 | } // namespace detail
 41 | 
 42 | inline void hash_combine(std::size_t& seed, std::size_t k) {
 43 |     if constexpr (sizeof(std::size_t) == 4) {
 44 |         uint32_t h = static_cast<uint32_t>(seed);
 45 |         detail::hash_combine_impl(h, static_cast<uint32_t>(k));
 46 |         seed = h;
 47 |     } else if constexpr (sizeof(std::size_t) == 8) {
 48 |         uint64_t h = static_cast<uint64_t>(seed);
 49 |         detail::hash_combine_impl(h, static_cast<uint64_t>(k));
 50 |         seed = h;
 51 |     } else {
 52 |         static_assert(sizeof(std::size_t) == 4 || sizeof(std::size_t) == 8,
 53 |             "Unsupported size_t size for hash_combine");
 54 |     }
 55 | }
 56 | 
 57 | class File {
 58 | public:
 59 |     File(const std::filesystem::path& path, const char* mode)
 60 |     : handle(std::fopen(path.string().c_str(), mode)) {
 61 |         if (!handle) {
 62 |             throw std::runtime_error("Failed to open file: " + path.string());
 63 |         }
 64 |     }
 65 | 
 66 |     operator FILE*() const noexcept { return handle; }
 67 | 
 68 |     File(File&& other) noexcept
 69 |     : handle(other.handle) {
 70 |         other.handle = nullptr;
 71 |     }
 72 | 
 73 |     File& operator=(File&& other) noexcept {
 74 |         if (this != &other) {
 75 |             close();
 76 |             handle       = other.handle;
 77 |             other.handle = nullptr;
 78 |         }
 79 |         return *this;
 80 |     }
 81 | 
 82 |     File(const File&)            = delete;
 83 |     File& operator=(const File&) = delete;
 84 | 
 85 |     ~File() { close(); }
 86 | 
 87 | private:
 88 |     FILE* handle = nullptr;
 89 | 
 90 |     void close() noexcept {
 91 |         if (handle) {
 92 |             std::fclose(handle);
 93 |             handle = nullptr;
 94 |         }
 95 |     }
 96 | };
 97 | 
 98 | inline std::string read_file(const std::filesystem::path& path) {
 99 |     File f(path, "rb");
100 |     ::fseek(f, 0, SEEK_END);
101 |     auto size = ::ftell(f);
102 |     ::fseek(f, 0, SEEK_SET);
103 |     std::string result;
104 |     result.resize(size);
105 |     std::ignore = ::fread(result.data(), 1, size, f);
106 |     return result;
107 | }
108 | 
109 | struct DSU {
110 |     std::vector<size_t> pa;
111 | 
112 |     explicit DSU(size_t size)
113 |     : pa(size) {
114 |         std::iota(pa.begin(), pa.end(), 0);
115 |     }
116 | 
117 |     size_t find(size_t x) { return pa[x] == x ? x : pa[x] = find(pa[x]); }
118 | 
119 |     void unite(size_t x, size_t y) { pa[find(x)] = find(y); }
120 | };
121 | 
122 | [[noreturn]] inline void unreachable()
123 | {
124 |     // Uses compiler specific extensions if possible.
125 |     // Even if no extension is used, undefined behavior is still raised by
126 |     // an empty function body and the noreturn attribute.
127 | #if defined(_MSC_VER) && !defined(__clang__) // MSVC
128 |     __assume(false);
129 | #else // GCC, Clang
130 |     __builtin_unreachable();
131 | #endif
132 | }


--------------------------------------------------------------------------------
/engine/tools/Setting.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | //---------------------------------------------------------------------------
  3 | #include <atomic>
  4 | #include <cstdint>
  5 | #include <string>
  6 | #include <string_view>
  7 | #include <limits>
  8 | //---------------------------------------------------------------------------
  9 | namespace engine {
 10 | //---------------------------------------------------------------------------
 11 | namespace setting {
 12 | struct Bool {
 13 |     bool defaultValue;
 14 | 
 15 |     constexpr Bool(bool defaultValue = false) noexcept : defaultValue(defaultValue) {}
 16 | 
 17 |     bool operator()(std::string_view name, std::string_view value) const;
 18 | 
 19 |     constexpr auto getDefault() const noexcept {
 20 |         return defaultValue;
 21 |     }
 22 | };
 23 | struct Size {
 24 |     size_t defaultValue;
 25 |     size_t minValue;
 26 |     size_t maxValue;
 27 | 
 28 |     constexpr Size(size_t defaultValue = 0, size_t minValue = std::numeric_limits<size_t>::min(), size_t maxValue = std::numeric_limits<size_t>::max()) noexcept : defaultValue(defaultValue), minValue(minValue), maxValue(maxValue) {}
 29 | 
 30 |     size_t operator()(std::string_view name, std::string_view value) const;
 31 | 
 32 |     constexpr auto getDefault() const noexcept {
 33 |         return defaultValue;
 34 |     }
 35 | };
 36 | }
 37 | //---------------------------------------------------------------------------
 38 | #ifdef SIGMOD_LOCAL
 39 | /// Class for runtime settings using environment variables
 40 | class SettingBase {
 41 |     /// The name of the setting
 42 |     std::string name;
 43 |     /// The cached value of the setting
 44 |     mutable std::string cached;
 45 |     /// Whether initialized
 46 |     mutable std::atomic<uint8_t> initialized{0};
 47 | 
 48 |     /// Ensure the value is ready
 49 |     void ensureReadyImpl() const;
 50 | 
 51 |     protected:
 52 |     /// Compute the value
 53 |     virtual void computeImpl(std::string_view str) const = 0;
 54 | 
 55 |     /// Ensure the value is ready
 56 |     void ensureReady() const {
 57 |         if (initialized.load() == 1) [[likely]]
 58 |             return;
 59 |         return ensureReadyImpl();
 60 |     }
 61 | 
 62 |     public:
 63 |     /// Constructor
 64 |     explicit SettingBase(std::string name);
 65 | 
 66 |     /// Get as string
 67 |     std::string_view getAsString() const;
 68 |     /// Get the name
 69 |     std::string_view getName() const {
 70 |         return name;
 71 |     }
 72 |     /// Set the value
 73 |     void set(std::string value);
 74 | };
 75 | //---------------------------------------------------------------------------
 76 | /// Class for runtime settings using environment variables
 77 | template <typename Parser>
 78 | class Setting : public SettingBase, Parser {
 79 |     /// The cached value
 80 |     mutable decltype(std::declval<Parser>()(std::declval<std::string_view>(), std::declval<std::string_view>())) cached;
 81 | 
 82 |     /// Compute the value
 83 |     void computeImpl(std::string_view str) const override {
 84 |         cached = Parser::operator()(getName(), str);
 85 |     }
 86 | 
 87 |     public:
 88 |     /// Constructor
 89 |     Setting(std::string name, Parser parser) : SettingBase(std::move(name)), Parser(parser) {
 90 |         ensureReady();
 91 |     }
 92 | 
 93 |     /// Get the value
 94 |     auto get() const {
 95 |         ensureReady();
 96 |         return cached;
 97 |     }
 98 | };
 99 | #else
100 | /// Class for runtime settings using environment variables
101 | template <typename Parser>
102 | class Setting : public Parser {
103 |     public:
104 |     /// Constructor
105 |     constexpr Setting(std::string_view, Parser parser) noexcept : Parser(parser) {}
106 | 
107 |     /// Get the value
108 |     constexpr auto get() const noexcept {
109 |         return Parser::getDefault();
110 |     }
111 | };
112 | #endif
113 | //---------------------------------------------------------------------------
114 | }
115 | 


--------------------------------------------------------------------------------