├── .clang-format
├── .clangd
├── .gitignore
├── ANNOUNCEMENTS.md
├── CMakeLists.txt
├── README.md
├── download_imdb.sh
├── include
    ├── attribute.h
    ├── common.h
    ├── csv_parser.h
    ├── hardware__ca09.h
    ├── hardware__cp02.h
    ├── hardware__koroneia.h
    ├── hardware__sidon.h
    ├── inner_column.h
    ├── plan.h
    ├── statement.h
    ├── table.h
    └── table_entity.h
├── job
    ├── 10a.sql
    ├── 10b.sql
    ├── 10c.sql
    ├── 11a.sql
    ├── 11b.sql
    ├── 11c.sql
    ├── 11d.sql
    ├── 12a.sql
    ├── 12b.sql
    ├── 12c.sql
    ├── 13a.sql
    ├── 13b.sql
    ├── 13c.sql
    ├── 13d.sql
    ├── 14a.sql
    ├── 14b.sql
    ├── 14c.sql
    ├── 15a.sql
    ├── 15b.sql
    ├── 15c.sql
    ├── 15d.sql
    ├── 16a.sql
    ├── 16b.sql
    ├── 16c.sql
    ├── 16d.sql
    ├── 17a.sql
    ├── 17b.sql
    ├── 17c.sql
    ├── 17d.sql
    ├── 17e.sql
    ├── 17f.sql
    ├── 18a.sql
    ├── 18b.sql
    ├── 18c.sql
    ├── 19a.sql
    ├── 19b.sql
    ├── 19c.sql
    ├── 19d.sql
    ├── 1a.sql
    ├── 1b.sql
    ├── 1c.sql
    ├── 1d.sql
    ├── 20a.sql
    ├── 20b.sql
    ├── 20c.sql
    ├── 21a.sql
    ├── 21b.sql
    ├── 21c.sql
    ├── 22a.sql
    ├── 22b.sql
    ├── 22c.sql
    ├── 22d.sql
    ├── 23a.sql
    ├── 23b.sql
    ├── 23c.sql
    ├── 24a.sql
    ├── 24b.sql
    ├── 25a.sql
    ├── 25b.sql
    ├── 25c.sql
    ├── 26a.sql
    ├── 26b.sql
    ├── 26c.sql
    ├── 27a.sql
    ├── 27b.sql
    ├── 27c.sql
    ├── 28a.sql
    ├── 28b.sql
    ├── 28c.sql
    ├── 29a.sql
    ├── 29b.sql
    ├── 29c.sql
    ├── 2a.sql
    ├── 2b.sql
    ├── 2c.sql
    ├── 2d.sql
    ├── 30a.sql
    ├── 30b.sql
    ├── 30c.sql
    ├── 31a.sql
    ├── 31b.sql
    ├── 31c.sql
    ├── 32a.sql
    ├── 32b.sql
    ├── 33a.sql
    ├── 33b.sql
    ├── 33c.sql
    ├── 3a.sql
    ├── 3b.sql
    ├── 3c.sql
    ├── 4a.sql
    ├── 4b.sql
    ├── 4c.sql
    ├── 5a.sql
    ├── 5b.sql
    ├── 5c.sql
    ├── 6a.sql
    ├── 6b.sql
    ├── 6c.sql
    ├── 6d.sql
    ├── 6e.sql
    ├── 6f.sql
    ├── 7a.sql
    ├── 7b.sql
    ├── 7c.sql
    ├── 8a.sql
    ├── 8b.sql
    ├── 8c.sql
    ├── 8d.sql
    ├── 9a.sql
    ├── 9b.sql
    ├── 9c.sql
    ├── 9d.sql
    ├── README
    ├── fkindexes.sql
    └── schema.sql
├── plans.json
├── src
    ├── build_table.cpp
    ├── csv_parser.cpp
    ├── execute.cpp
    └── statement.cpp
└── tests
    ├── build_database.cpp
    ├── read_sql.cpp
    └── unit_tests.cpp


/.clang-format:
--------------------------------------------------------------------------------
  1 | ---
  2 | IndentWidth: 4
  3 | ---
  4 | Language: Cpp
  5 | Standard: Latest
  6 | AccessModifierOffset: -4
  7 | AlignAfterOpenBracket: DontAlign
  8 | AlignArrayOfStructures: Left
  9 | AlignConsecutiveAssignments:
 10 |   Enabled: true
 11 |   AcrossEmptyLines: false
 12 |   AcrossComments: false
 13 |   AlignCompound: true
 14 |   PadOperators: true
 15 | AlignConsecutiveBitFields: Consecutive
 16 | AlignConsecutiveDeclarations:
 17 |   Enabled: true
 18 |   AcrossEmptyLines: false
 19 |   AcrossComments: false
 20 |   # AlignFunctionDeclarations: true
 21 |   AlignFunctionPointers: true
 22 | AlignConsecutiveMacros: Consecutive
 23 | AlignConsecutiveShortCaseStatements:
 24 |   Enabled: true
 25 |   AcrossEmptyLines: false
 26 |   AcrossComments: false
 27 |   AlignCaseColons: false
 28 | AlignEscapedNewlines: LeftWithLastLine
 29 | AlignOperands: AlignAfterOperator
 30 | AlignTrailingComments:
 31 |   Kind: Always
 32 |   OverEmptyLines: 1
 33 | AllowAllArgumentsOnNextLine: false
 34 | AllowAllParametersOfDeclarationOnNextLine: false
 35 | AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
 36 | AllowShortBlocksOnASingleLine: Empty
 37 | AllowShortCaseLabelsOnASingleLine: true
 38 | AllowShortCompoundRequirementOnASingleLine: true
 39 | AllowShortEnumsOnASingleLine: false
 40 | AllowShortFunctionsOnASingleLine: Inline
 41 | AllowShortIfStatementsOnASingleLine: Never
 42 | AllowShortLambdasOnASingleLine: All
 43 | AllowShortLoopsOnASingleLine: false
 44 | AlwaysBreakBeforeMultilineStrings: true
 45 | BinPackArguments: false
 46 | BinPackParameters: false
 47 | BitFieldColonSpacing: After
 48 | BraceWrapping:
 49 |   AfterCaseLabel: false
 50 |   AfterClass: false
 51 |   AfterControlStatement: Never
 52 |   AfterEnum: false
 53 |   AfterFunction: false
 54 |   AfterNamespace: false
 55 |   AfterStruct: false
 56 |   AfterUnion: false
 57 |   AfterExternBlock: false
 58 |   BeforeCatch: false
 59 |   BeforeElse: false
 60 |   BeforeLambdaBody: false
 61 |   BeforeWhile: false
 62 |   SplitEmptyFunction: false
 63 |   SplitEmptyRecord: false
 64 |   SplitEmptyNamespace: true
 65 | BracedInitializerIndentWidth: 4
 66 | BreakAdjacentStringLiterals: true
 67 | BreakAfterAttributes: Leave
 68 | BreakAfterReturnType: Automatic
 69 | BreakBeforeBinaryOperators: NonAssignment
 70 | BreakBeforeBraces: Custom
 71 | BreakBeforeConceptDeclarations: Always
 72 | BreakBeforeInlineASMColon: Always
 73 | BreakBeforeTernaryOperators: true
 74 | BreakConstructorInitializers: BeforeComma
 75 | BreakFunctionDefinitionParameters: false
 76 | BreakInheritanceList: BeforeComma
 77 | BreakStringLiterals: true
 78 | BreakTemplateDeclarations: Yes
 79 | ColumnLimit: 96
 80 | CompactNamespaces: false
 81 | ConstructorInitializerIndentWidth: 0
 82 | ContinuationIndentWidth: 4
 83 | Cpp11BracedListStyle: true
 84 | DerivePointerAlignment: false
 85 | EmptyLineAfterAccessModifier: Never
 86 | FixNamespaceComments: true
 87 | IncludeBlocks: Preserve
 88 | IndentAccessModifiers: false
 89 | IndentCaseBlocks: false
 90 | IndentCaseLabels: false
 91 | IndentExternBlock: false
 92 | IndentGotoLabels: false
 93 | IndentPPDirectives: BeforeHash
 94 | IndentRequiresClause: false
 95 | IndentWrappedFunctionNames: false
 96 | InsertBraces: true
 97 | InsertNewlineAtEOF: true
 98 | InsertTrailingCommas: Wrapped
 99 | KeepEmptyLines:
100 |   AtEndOfFile: false
101 |   AtStartOfBlock: false
102 |   AtStartOfFile: false
103 | LambdaBodyIndentation: Signature
104 | LineEnding: LF
105 | MaxEmptyLinesToKeep: 2
106 | NamespaceIndentation: None
107 | PPIndentWidth: 4
108 | PackConstructorInitializers: Never
109 | PointerAlignment: Left
110 | QualifierAlignment: Left
111 | ReferenceAlignment: Left
112 | ReflowComments: true
113 | # RemoveEmptyLinesInUnwrappedLines: true
114 | RemoveParentheses: MultipleParentheses
115 | RemoveSemicolon: true
116 | RequiresClausePosition: OwnLine
117 | RequiresExpressionIndentation: OuterScope
118 | SeparateDefinitionBlocks: Always
119 | SortIncludes: CaseInsensitive
120 | SortUsingDeclarations: Lexicographic
121 | SpaceAfterCStyleCast: false
122 | SpaceAfterLogicalNot: false
123 | SpaceAfterTemplateKeyword: true
124 | SpaceBeforeAssignmentOperators: true
125 | SpaceBeforeCaseColon: false
126 | SpaceBeforeCpp11BracedList: false
127 | SpaceBeforeCtorInitializerColon: false
128 | SpaceBeforeInheritanceColon: false
129 | SpaceBeforeParens: Custom
130 | SpaceBeforeParensOptions:
131 |   AfterControlStatements: true
132 |   AfterForeachMacros: false
133 |   AfterFunctionDeclarationName: false
134 |   AfterFunctionDefinitionName: false
135 |   AfterIfMacros: false
136 |   AfterOverloadedOperator: false
137 |   AfterPlacementOperator: true
138 |   AfterRequiresInClause: true
139 |   AfterRequiresInExpression: true
140 |   BeforeNonEmptyParentheses: false
141 | SpaceBeforeRangeBasedForLoopColon: false
142 | SpaceBeforeSquareBrackets: false
143 | SpaceInEmptyBlock: false
144 | SpacesBeforeTrailingComments: 1
145 | SpacesInAngles: Never
146 | SpacesInContainerLiterals: false
147 | SpacesInLineCommentPrefix:
148 |   Minimum: 1
149 |   Maximum: -1
150 | SpacesInParens: Custom
151 | SpacesInParensOptions:
152 |   ExceptDoubleParentheses: true
153 |   InConditionalStatements: false
154 |   InCStyleCasts: false
155 |   InEmptyParentheses: false
156 |   Other: false
157 | SpacesInSquareBrackets: false
158 | TabWidth: 4
159 | UseTab: Never


--------------------------------------------------------------------------------
/.clangd:
--------------------------------------------------------------------------------
1 | CompileFlags:
2 |     Remove:
3 |         - -fmodules-ts
4 |         - -fmodule-mapper=*
5 |         - -fdeps-format=p1689r5


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.vscode
2 | /build
3 | /imdb
4 | /output*.txt
5 | /*.tgz
6 | /*.db
7 | /TPC-H*
8 | /job-sample
9 | /.cache


--------------------------------------------------------------------------------
/ANNOUNCEMENTS.md:
--------------------------------------------------------------------------------
 1 | # Announcements
 2 | 
 3 | ### 2025-03-13
 4 |   - We provide a new header, `hardware.h`. This header contains basic hardware information which enables optimizing for a server's cache sizes or vectorization capabilities.
 5 |     - If you miss any information or find issues with the headers, please do not hesitate to contact us.
 6 |   - As one of the goals of this contest is to write efficient code for multiple platforms (some of those are kept secret until the final evaluation), we encourage you to read about vector extensions (e.g., Clang's "Vectors and Extended Vectors").
 7 |   - We are considering changing the benchmark to include all queries of the standard JOB benchmark. We will reset the leaderboard in this case. We will let you know upfront when this change is about to land.
 8 |   - **Third-party libraries:**
 9 |     - We want to re-iterate our last notes from 2025-03-04: third-party libraries are **not allowed in your final submission**.
10 |   - **Evaluation workload:**
11 |     -  While there will be a larger variety of queries in the final evaluation workload, we will not add any "surprises". For example, as in the original JoinOrder Benchmark, there will be no joins on string columns.
12 | 
13 | ### 2025-03-04
14 |   - With today's changes to the main repository you forked from, we improved the performance of the evaluation phase
15 |   - **Important notes:**
16 |     - **Deadline change:** The deadline for the final submission has been extended to March 31
17 |     - **Own source files**: The CMake file (which cannot be modified by participants) now includes all *.cpp fiels in the `src` directory. This way, you can add your own source files and better structure your code.
18 |     - **Third-party library:** We found that some teams use third-party libraries, e.g., for  logging. Please note that third-party libraries are not allowed in the contest. You are free to use them during development, but you need to remove them prior to the final submission. Otherwise, your submission is disqualified.
19 | 
20 | ### 2025-02-27
21 |   - The recently pushed GitHub workflow will automatically compile, test, and benchmark your solution on all four systems
22 |   - Check your repository's pull requests
23 |   - The results are currently shown at https://sigmod-contest-25.hpi-sci.de/ and will soon be published on the official contest website
24 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.16)
  2 | 
  3 | project(SigmodContest)
  4 | 
  5 | set(CMAKE_CXX_STANDARD 17)
  6 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
  7 | set(CMAKE_CXX_EXTENSIONS OFF)
  8 | 
  9 | Include(FetchContent)
 10 | 
 11 | FetchContent_Declare(
 12 |     Catch2
 13 |     URL https://github.com/catchorg/Catch2/archive/refs/tags/v3.8.0.tar.gz
 14 | )
 15 | 
 16 | FetchContent_MakeAvailable(Catch2)
 17 | 
 18 | FetchContent_Declare(
 19 |     abseil
 20 |     URL https://github.com/abseil/abseil-cpp/releases/download/20240722.1/abseil-cpp-20240722.1.tar.gz
 21 | )
 22 | 
 23 | set(ABSL_PROPAGATE_CXX_STD ON)
 24 | set(ABSL_ENABLE_INSTALL ON)
 25 | FetchContent_MakeAvailable(abseil)
 26 | 
 27 | FetchContent_Declare(
 28 |     re2
 29 |     URL https://github.com/google/re2/releases/download/2024-07-02/re2-2024-07-02.tar.gz
 30 | )
 31 | 
 32 | FetchContent_MakeAvailable(re2)
 33 | 
 34 | FetchContent_Declare(
 35 |     json
 36 |     URL https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz
 37 | )
 38 | 
 39 | FetchContent_MakeAvailable(json)
 40 | 
 41 | FetchContent_Declare(
 42 |     sql-parser
 43 |     URL https://github.com/a858438680/sql-parser/archive/refs/tags/win-port-2.tar.gz
 44 | )
 45 | set(HSQL_ENABLE_WERROR OFF)
 46 | FetchContent_MakeAvailable(sql-parser)
 47 | 
 48 | FetchContent_Declare(
 49 |     range-v3
 50 |     URL https://github.com/ericniebler/range-v3/archive/refs/tags/0.12.0.tar.gz
 51 | )
 52 | 
 53 | FetchContent_MakeAvailable(range-v3)
 54 | 
 55 | FetchContent_Declare(
 56 |     fmtlib
 57 |     URL https://github.com/fmtlib/fmt/releases/download/11.1.3/fmt-11.1.3.zip
 58 | )
 59 | 
 60 | FetchContent_MakeAvailable(fmtlib)
 61 | 
 62 | FetchContent_Declare(
 63 |     duckdb
 64 |     URL https://github.com/duckdb/duckdb/archive/refs/tags/v1.2.0.tar.gz
 65 | )
 66 | set(ENABLE_SANITIZER OFF)
 67 | set(ENABLE_UBSAN OFF)
 68 | if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc|powerpc|ppc64|ppc64le")
 69 |     message("Disabling jemalloc extension of DuckDB on Power.")
 70 |     set(SKIP_EXTENSIONS jemalloc)
 71 | endif()
 72 | FetchContent_MakeAvailable(duckdb)
 73 | 
 74 | # Include all sources from /src directory. CONFIGURE_DEPENDS can be unreliable.
 75 | # Try re-running cmake in case changes are not recognized.
 76 | file(GLOB SIGMODPC_SRC
 77 |     CONFIGURE_DEPENDS
 78 |     "src/*.cpp"
 79 | )
 80 | 
 81 | add_executable(
 82 |     run
 83 | 
 84 |     ${SIGMODPC_SRC}
 85 |     tests/read_sql.cpp
 86 | )
 87 | 
 88 | target_include_directories(run PRIVATE include)
 89 | target_link_libraries(run PRIVATE re2 fmt range-v3 nlohmann_json::nlohmann_json sqlparser duckdb)
 90 | if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
 91 |     target_compile_definitions(run PRIVATE _CRT_SECURE_NO_WARNINGS)
 92 | endif()
 93 | 
 94 | # Enable server-specific compiler optimizations.
 95 | # Use march=native for all but Power servers, which results in the following error:
 96 | #    clang++-18: error: unsupported option '-march=' for target 'powerpc64le-unknown-linux-gnu'
 97 | # This flag works on other Power systems, but for now, we disable march=native on all Power machines.
 98 | if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "ppc|powerpc|ppc64|ppc64le")
 99 |     add_compile_options(-march=native)
100 | endif()
101 | 
102 | add_executable(
103 |     build_database
104 | 
105 |     tests/build_database.cpp
106 | )
107 | 
108 | target_include_directories(build_database PRIVATE include)
109 | target_link_libraries(build_database PRIVATE fmt duckdb)
110 | if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
111 |     target_compile_definitions(build_database PRIVATE _CRT_SECURE_NO_WARNINGS)
112 | endif()
113 | 
114 | add_executable(
115 |     unit_tests
116 | 
117 |     ${SIGMODPC_SRC}
118 |     tests/unit_tests.cpp
119 | )
120 | 
121 | target_include_directories(unit_tests PRIVATE include)
122 | target_link_libraries(unit_tests PRIVATE range-v3 fmt Catch2::Catch2WithMain duckdb)
123 | if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
124 |     target_compile_definitions(unit_tests PRIVATE _CRT_SECURE_NO_WARNINGS)
125 | endif()
126 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # SIGMOD Contest 2025
  2 | 
  3 | ## Task
  4 | 
  5 | Given the joining pipeline and the pre-filtered input data, your task is to implement an efficient joining algorithm to accelerate the execution time of the joining pipeline. Specifically, you need to implement the following function in `src/execute.cpp`:
  6 | 
  7 | ```C++
  8 | ColumnarTable execute(const Plan& plan, void* context);
  9 | ```
 10 | 
 11 | Optionally, you can implement these two functions as well to prepare any global context (e.g., thread pool) to accelerate the execution.
 12 | 
 13 | ```C++
 14 | void* build_context();
 15 | void destroy_context(void*);
 16 | ```
 17 | 
 18 | ### Input format
 19 | 
 20 | The input plan in the above function is defined as the following struct.
 21 | 
 22 | ```C++
 23 | struct ScanNode {
 24 |     size_t base_table_id;
 25 | };
 26 | 
 27 | struct JoinNode {
 28 |     bool   build_left;
 29 |     size_t left;
 30 |     size_t right;
 31 |     size_t left_attr;
 32 |     size_t right_attr;
 33 | };
 34 | 
 35 | struct PlanNode {
 36 |     std::variant<ScanNode, JoinNode>          data;
 37 |     std::vector<std::tuple<size_t, DataType>> output_attrs;
 38 | };
 39 | 
 40 | struct Plan {
 41 |     std::vector<PlanNode>      nodes;
 42 |     std::vector<ColumnarTable> inputs;
 43 |     size_t root;
 44 | }
 45 | ```
 46 | 
 47 | **Scan**:
 48 | - The `base_table_id` member refers to which input table in the `inputs` member of a plan is used by the Scan node.
 49 | - Each item in the `output_attrs` indicates which column in the base table should be output and what type it is.
 50 | 
 51 | **Join**:
 52 | - The `build_left` member refers to which side the hash table should be built on, where `true` indicates building the hash table on the left child, and `false` indicates the opposite.
 53 | - The `left` and `right` members are the indexes of the left and right child of the Join node in the `nodes` member of a plan, respectively.
 54 | - The `left_attr` and `right_attr` members are the join condition of Join node. Supposing that there are two records, `left_record` and `right_record`, from the intermediate results of the left and right child, respectively. The members indicate that the two records should be joined when `left_record[left_attr] == right_record[right_attr]`.
 55 | - Each item in the `output_attrs` indicates which column in the result of children should be output and what type it is. Supposing that the left child has $n_l$ columns and the right child has $n_r$ columns, the value of the index $i \in \{0, \dots, n_l + n_r - 1\}$, where the ranges $\{0, \dots, n_l - 1\}$ and $\{n_l, \dots, n_l + n_r - 1\}$ indicate the output column is from left and right child respectively.
 56 | 
 57 | **Root**: The `root` member of a plan indicates which node is the root node of the execution plan tree.
 58 | 
 59 | ### Data format
 60 | 
 61 | The input and output data both follow a simple columnar data format.
 62 | 
 63 | ```C++
 64 | enum class DataType {
 65 |     INT32,       // 4-byte integer
 66 |     INT64,       // 8-byte integer
 67 |     FP64,        // 8-byte floating point
 68 |     VARCHAR,     // string of arbitary length
 69 | };
 70 | 
 71 | constexpr size_t PAGE_SIZE = 8192;
 72 | 
 73 | struct alignas(8) Page {
 74 |     std::byte data[PAGE_SIZE];
 75 | };
 76 | 
 77 | struct Column {
 78 |     DataType           type;
 79 |     std::vector<Page*> pages;
 80 | };
 81 | 
 82 | struct ColumnarTable {
 83 |     size_t              num_rows;
 84 |     std::vector<Column> columns;
 85 | };
 86 | ```
 87 | 
 88 | A `ColumnarTable` first stores how many rows the table has in the `num_rows` member, then stores each column seperately as a `Column`. Each `Column` has a type and stores the items of the column into several pages. Each page is of 8192 bytes. In each page:
 89 | 
 90 | - The first 2 bytes are a `uint16_t` which is the number of rows $n_r$ in the page.
 91 | - The following 2 bytes are a `uint16_t` which is the number of non-`NULL` values $n_v$ in the page.
 92 | - The first $n_r$ bits in the last $\left\lfloor\frac{(n_r + 7)}{8}\right\rfloor$ bytes is a bitmap indicating whether the corresponding row has value or is `NULL`.
 93 | 
 94 | **Fixed-length attribute**: There are $n_v$ contiguous values begins at the first aligned position. For example, in a `Page` of `INT32`, the first value is at `data + 4`. While in a `Page` of `INT64` and `FP64`, the first value is at `data + 8`.
 95 | 
 96 | **Variable-length attribute**: There are $n_v$ contigous offsets (`uint16_t`) begins at `data + 4` in a `Page`, followed by the content of the varchars which begins at `char_begin = data + 4 + n_r * 2`. Each offset indicates the ending offset of the corresponding `VARCHAR` with respect to the `char_begin`.
 97 | 
 98 | **Long string**: When the length of a string is longer than `PAGE_SIZE - 7`, it can not fit in a normal page. Special pages will be used to store such string. If $n_r$ `== 0xffff` or $n_r$ `== 0xfffe`, the `Page` is a special page for long string. `0xffff` means the page is the first page of a long string and `0xfffe` means the page is the following page of a long string. The following 2 bytes is a `uint16_t` indicating the number of chars in the page, beginning at `data + 4`.
 99 | 
100 | ## Requirement
101 | 
102 | - You can only modify the file `src/execute.cpp` in the project.
103 | - You must not use any third-party libraries. If you are using libraries for development (e.g., for logging), ensure to remove them before the final submission.
104 | - The joining pipeline (including order and build side) is optimized by PostgreSQL for `Hash Join` only. However, in the `execute` function, you are free to use other algorithms and change the pipeline, as long as the result is equivalent.
105 | - For any struct listed above, all of there members are public. You can manipulate them in free functions as desired as long as the original files are not changed and the manipulated objects can be destructed properly.
106 | - Your program will be evaluated on an unpublished benchmark sampled from the original JOB benchmark. You will not be able to access the test benchmark.
107 | 
108 | ## Quick start
109 | 
110 | > [!TIP]
111 | > Run all the following commands in the root directory of this project.
112 | 
113 | First, download the imdb dataset.
114 | 
115 | ```bash
116 | ./download_imdb.sh
117 | ```
118 | 
119 | Second, build the project.
120 | 
121 | ```bash
122 | cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -Wno-dev
123 | cmake --build build -- -j $(nproc)
124 | ```
125 | 
126 | Third, prepare the DuckDB database for correctness checking.
127 | 
128 | ```bash
129 | ./build/build_database imdb.db
130 | ```
131 | 
132 | Now, you can run the tests:
133 | ```bash
134 | ./build/run plans.json
135 | ```
136 | > [!TIP]
137 | > If you want to use `Ninja Multi-Config` as the generator. The commands will look like:
138 | > 
139 | >```bash
140 | > cmake -S . -B build -Wno-dev -G "Ninja Multi-Config"
141 | > cmake --build build --config Release -- -j $(nproc)
142 | > ./build/Release/build_database imdb.db
143 | > ./build/Release/run plans.json
144 | > ```
145 | 
146 | # Hardware
147 | 
148 | The evaluation is automatically executed on four different servers. On multi-socket machines, the benchmarks are bound to a single socket (using `numactl -m 0 -N 0`).
149 | 
150 |  * **AMD #1**
151 |     * CPU: 2x AMD EPYC 7F72 (SMT 2, 24 cores, 48 threads)
152 |     * Main memory: 256 GB
153 |  * **ARM #1**
154 |     * CPU: 1x Ampere Altra Max (SMT 1, 128 cores, 128 threads)
155 |     * Main memory: 512 GB
156 |  * **IBM #1**
157 |     * CPU: 8x IBM Power8 (SMT 8, 12 cores, 96 threads)
158 |     * Main memory: 1024 GB
159 |  * **Intel #1**
160 |     * CPU: 4x Intel Xeon E7-4880 v2 (SMT 2, 15 cores, 30 threads)
161 |     * Main memory: 512 GB
162 | 
163 | Additional Evaluation Server:
164 |  * **AMD #2**
165 |     * CPU: 1x AMD EPYC 7343 (SMT 2, 16 cores, 32 threads; 20 threads enabled)
166 |     * Main memory: 96 GB
167 |  * **ARM #2**
168 |     * CPU: 1x NVIDIA GH200 Grace Hopper (SMT 1, 72 cores, 72 threads)
169 |     * Main memory: 480 GB
170 |  * **IBM #2**
171 |     * CPU: 2x IBM Power10 (SMT 8, 12 cores, 96 threads)
172 |     * Main memory: 512 GB
173 |  * **Intel #2**
174 |     * CPU: 2x Intel Xeon Platinum 8352Y (SMT 2, 32 cores, 64 threads)
175 |     * Main memory: 256 GB
176 | 
177 | Code is compiled with Clang 18.
178 | 


--------------------------------------------------------------------------------
/download_imdb.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | url="https://event.cwi.nl/da/job/imdb.tgz"
 4 | output_file="imdb.tgz"
 5 | target_dir="imdb"
 6 | 
 7 | # Detect and select downloader
 8 | if command -v wget &> /dev/null; then
 9 |     if ! wget "$url" -O "$output_file"; then
10 |         echo "Error: downloading failed" >&2
11 |         exit 1
12 |     fi
13 | elif command -v curl &> /dev/null; then
14 |     if ! curl -L "$url" -o "$output_file"; then
15 |         echo "Error: downloading failed" >&2
16 |         exit 1
17 |     fi
18 | else
19 |     echo "Error: please install wget or curl to download imdb.tgz" >&2
20 |     exit 1
21 | fi
22 | 
23 | # make target directory (if not exists)
24 | if ! mkdir -p "$target_dir"; then
25 |     echo "Error: cannot make directory '$target_dir'" >&2
26 |     exit 1
27 | fi
28 | 
29 | # decompress the file to the target directory
30 | if ! tar -xf "$output_file" -C "$target_dir"; then
31 |     echo "Error: failed to decompress the file" >&2
32 |     exit 1
33 | fi
34 | 
35 | echo "Success!"
36 | 


--------------------------------------------------------------------------------
/include/attribute.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <array>
 4 | #include <string>
 5 | 
 6 | #include <fmt/core.h>
 7 | 
 8 | enum class DataType {
 9 |     INT32,       // 4-byte integer
10 |     INT64,       // 8-byte integer
11 |     FP64,        // 8-byte floating point
12 |     VARCHAR,     // string of arbitary length
13 | };
14 | 
15 | template <>
16 | struct fmt::formatter<DataType> {
17 |     template <class ParseContext>
18 |     constexpr auto parse(ParseContext& ctx) {
19 |         return ctx.begin();
20 |     }
21 | 
22 |     template <class FormatContext>
23 |     auto format(DataType value, FormatContext& ctx) const {
24 |         static std::array<std::string_view, 4> names{
25 |             "INT32",
26 |             "INT64",
27 |             "FP64",
28 |             "VARCHAR",
29 |         };
30 |         return fmt::format_to(ctx.out(), "{}", names[int(value)]);
31 |     }
32 | };
33 | 
34 | #define DISPATCH_DATA_TYPE(type, TYPE, ...) \
35 |     do {                                    \
36 |         switch (type) {                     \
37 |         case DataType::INT32: {             \
38 |             using TYPE = int32_t;           \
39 |             __VA_ARGS__                     \
40 |             break;                          \
41 |         }                                   \
42 |         case DataType::INT64: {             \
43 |             using TYPE = int64_t;           \
44 |             __VA_ARGS__                     \
45 |             break;                          \
46 |         }                                   \
47 |         case DataType::FP64: {              \
48 |             using TYPE = double;            \
49 |             __VA_ARGS__                     \
50 |             break;                          \
51 |         }                                   \
52 |         case DataType::VARCHAR: {           \
53 |             using TYPE = std::string;       \
54 |             __VA_ARGS__                     \
55 |             break;                          \
56 |         }                                   \
57 |         }                                   \
58 |     } while (0)
59 | 
60 | struct Attribute {
61 |     DataType    type;
62 |     std::string name;
63 | };


--------------------------------------------------------------------------------
/include/common.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <filesystem>
  4 | #include <numeric>
  5 | #include <vector>
  6 | 
  7 | #include <cstdint>
  8 | #include <cstdlib>
  9 | 
 10 | namespace detail {
 11 | inline uint32_t rotl32(uint32_t x, uint8_t bits) {
 12 |     return (x << bits) | (x >> (32 - bits));
 13 | }
 14 | 
 15 | inline void hash_combine_impl(uint32_t& h1, uint32_t k1) {
 16 |     constexpr uint32_t c1 = 0xcc9e2d51u;
 17 |     constexpr uint32_t c2 = 0x1b873593u;
 18 | 
 19 |     k1 *= c1;
 20 |     k1  = rotl32(k1, 15);
 21 |     k1 *= c2;
 22 | 
 23 |     h1 ^= k1;
 24 |     h1  = rotl32(h1, 13);
 25 |     h1  = h1 * 5u + 0xe6546b64u;
 26 | }
 27 | 
 28 | inline void hash_combine_impl(uint64_t& h, uint64_t k) {
 29 |     constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
 30 |     constexpr int      r = 47;
 31 | 
 32 |     k *= m;
 33 |     k ^= k >> r;
 34 |     k *= m;
 35 | 
 36 |     h ^= k;
 37 |     h *= m;
 38 |     h += 0xe6546b64;
 39 | }
 40 | } // namespace detail
 41 | 
 42 | inline void hash_combine(std::size_t& seed, std::size_t k) {
 43 |     if constexpr (sizeof(std::size_t) == 4) {
 44 |         uint32_t h = static_cast<uint32_t>(seed);
 45 |         detail::hash_combine_impl(h, static_cast<uint32_t>(k));
 46 |         seed = h;
 47 |     } else if constexpr (sizeof(std::size_t) == 8) {
 48 |         uint64_t h = static_cast<uint64_t>(seed);
 49 |         detail::hash_combine_impl(h, static_cast<uint64_t>(k));
 50 |         seed = h;
 51 |     } else {
 52 |         static_assert(sizeof(std::size_t) == 4 || sizeof(std::size_t) == 8,
 53 |             "Unsupported size_t size for hash_combine");
 54 |     }
 55 | }
 56 | 
 57 | class File {
 58 | public:
 59 |     File(const std::filesystem::path& path, const char* mode)
 60 |     : handle(std::fopen(path.string().c_str(), mode)) {
 61 |         if (!handle) {
 62 |             throw std::runtime_error("Failed to open file: " + path.string());
 63 |         }
 64 |     }
 65 | 
 66 |     operator FILE*() const noexcept { return handle; }
 67 | 
 68 |     File(File&& other) noexcept
 69 |     : handle(other.handle) {
 70 |         other.handle = nullptr;
 71 |     }
 72 | 
 73 |     File& operator=(File&& other) noexcept {
 74 |         if (this != &other) {
 75 |             close();
 76 |             handle       = other.handle;
 77 |             other.handle = nullptr;
 78 |         }
 79 |         return *this;
 80 |     }
 81 | 
 82 |     File(const File&)            = delete;
 83 |     File& operator=(const File&) = delete;
 84 | 
 85 |     ~File() { close(); }
 86 | 
 87 | private:
 88 |     FILE* handle = nullptr;
 89 | 
 90 |     void close() noexcept {
 91 |         if (handle) {
 92 |             std::fclose(handle);
 93 |             handle = nullptr;
 94 |         }
 95 |     }
 96 | };
 97 | 
 98 | inline std::string read_file(const std::filesystem::path& path) {
 99 |     File f(path, "rb");
100 |     ::fseek(f, 0, SEEK_END);
101 |     auto size = ::ftell(f);
102 |     ::fseek(f, 0, SEEK_SET);
103 |     std::string result;
104 |     result.resize(size);
105 |     std::ignore = ::fread(result.data(), 1, size, f);
106 |     return result;
107 | }
108 | 
109 | struct DSU {
110 |     std::vector<size_t> pa;
111 | 
112 |     explicit DSU(size_t size)
113 |     : pa(size) {
114 |         std::iota(pa.begin(), pa.end(), 0);
115 |     }
116 | 
117 |     size_t find(size_t x) { return pa[x] == x ? x : pa[x] = find(pa[x]); }
118 | 
119 |     void unite(size_t x, size_t y) { pa[find(x)] = find(y); }
120 | };
121 | 
122 | [[noreturn]] inline void unreachable()
123 | {
124 |     // Uses compiler specific extensions if possible.
125 |     // Even if no extension is used, undefined behavior is still raised by
126 |     // an empty function body and the noreturn attribute.
127 | #if defined(_MSC_VER) && !defined(__clang__) // MSVC
128 |     __assume(false);
129 | #else // GCC, Clang
130 |     __builtin_unreachable();
131 | #endif
132 | }


--------------------------------------------------------------------------------
/include/csv_parser.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <vector>
 4 | 
 5 | #include <cstdlib>
 6 | 
 7 | class CSVParser {
 8 | public:
 9 |     enum Error {
10 |         Ok,
11 |         QuoteNotClosed,
12 |         InconsistentColumns,
13 |         NoTrailingComma,
14 |     };
15 | 
16 |     CSVParser(char escape = '"', char sep = ',', bool has_trailing_comma = false)
17 |     : escape_(escape)
18 |     , comma_(sep)
19 |     , has_trailing_comma_(has_trailing_comma) {}
20 | 
21 |     [[nodiscard]] Error execute(const char* buffer, size_t len);
22 |     [[nodiscard]] Error finish();
23 | 
24 |     virtual void on_field(size_t col_idx, size_t row_idx, const char* begin, size_t len) = 0;
25 | 
26 | private:
27 |     // configure
28 |     char escape_{'"'}; // may also be '\\'
29 |     char comma_{','};  // may also be '|'
30 |     // true means # commas = # columns and the last comma in each line is followed by the record
31 |     // seperator; false means # commas + 1 = # columns
32 |     bool has_trailing_comma_{false};
33 | 
34 |     // states
35 |     std::vector<char> current_field_;
36 |     size_t            col_idx_{0};
37 |     size_t            row_idx_{0};
38 |     size_t            num_cols_{0};
39 |     bool              after_first_row_{false};
40 |     bool              quoted_{false};
41 |     bool              after_field_sep_{false};
42 |     bool              after_record_sep_{false};
43 |     bool              escaping_{false};
44 |     bool              newlining_{false};
45 | };
46 | 


--------------------------------------------------------------------------------
/include/hardware__ca09.h:
--------------------------------------------------------------------------------
 1 | // Hardware information for Ampere Altra Max node ca09.
 2 | 
 3 | // Architecture from `uname -srm`.
 4 | #define SPC__AARCH64
 5 | 
 6 | // CPU from `/proc/cpuinfo`.
 7 | #define SPC__CPU_NAME ""
 8 | 
 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers
10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system.
11 | #define SPC__CORE_COUNT 128
12 | #define SPC__THREAD_COUNT 128
13 | #define SPC__NUMA_NODE_COUNT 1
14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1
15 | 
16 | // Main memory per NUMA node (MB).
17 | #define SPC__NUMA_NODE_DRAM_MB 515809
18 | 
19 | // Obtained from `lsb_release -a`.
20 | #define SPC__OS "Ubuntu 24.04.1 LTS"
21 | 
22 | // Obtained from: `uname -srm`.
23 | #define SPC__KERNEL "Linux 6.8.0-50-generic aarch64"
24 | 
25 | // ARM: possible options are SVE, SVE2, and NEON. No ARM CPU older than Ampere Altra Max will be used.
26 | #define SPC__SUPPORTS_NEON
27 | 
28 | // Cache information from `getconf -a | grep CACHE`.
29 | // As Ubuntu did not list all numbers, we also took cache sizes from `cat /sys/devices/system/cpu/cpu0/cache/index*/size`
30 | #define SPC__LEVEL1_ICACHE_SIZE                 65536
31 | #define SPC__LEVEL1_ICACHE_ASSOC
32 | #define SPC__LEVEL1_ICACHE_LINESIZE             64
33 | #define SPC__LEVEL1_DCACHE_SIZE                 65536
34 | #define SPC__LEVEL1_DCACHE_ASSOC
35 | #define SPC__LEVEL1_DCACHE_LINESIZE             64
36 | #define SPC__LEVEL2_CACHE_SIZE                  1048576
37 | #define SPC__LEVEL2_CACHE_ASSOC
38 | #define SPC__LEVEL2_CACHE_LINESIZE
39 | #define SPC__LEVEL3_CACHE_SIZE
40 | #define SPC__LEVEL3_CACHE_ASSOC
41 | #define SPC__LEVEL3_CACHE_LINESIZE
42 | #define SPC__LEVEL4_CACHE_SIZE 
43 | #define SPC__LEVEL4_CACHE_ASSOC
44 | #define SPC__LEVEL4_CACHE_LINESIZE
45 | 


--------------------------------------------------------------------------------
/include/hardware__cp02.h:
--------------------------------------------------------------------------------
 1 | // Hardware information for IBM Power8 node cp02.
 2 | 
 3 | // Architecture from `uname -srm`.
 4 | #define SPC__PPC64LE
 5 | 
 6 | // CPU from `/proc/cpuinfo`.
 7 | #define SPC__CPU_NAME "POWER8 (architected), altivec supported"
 8 | 
 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers
10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system.
11 | #define SPC__CORE_COUNT 12
12 | #define SPC__THREAD_COUNT 96
13 | #define SPC__NUMA_NODE_COUNT 8
14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1
15 | 
16 | // Main memory per NUMA node (MB).
17 | #define SPC__NUMA_NODE_DRAM_MB 1039964
18 | 
19 | // Obtained from `lsb_release -a`.
20 | #define SPC__OS "Ubuntu 20.04.6 LTS"
21 | 
22 | // Obtained from: `uname -srm`.
23 | #define SPC__KERNEL "Linux 5.4.0-137-generic x86_64"
24 | 
25 | // IBM: possible options are VSX, VMX, and MMA. No IBM CPU older than Power8 will be used.
26 | #define SPC__SUPPORTS_VSX
27 | #define SPC__SUPPORTS_VMX
28 | 
29 | // Cache information from `getconf -a | grep CACHE`.
30 | #define SPC__LEVEL1_ICACHE_SIZE                 32768
31 | #define SPC__LEVEL1_ICACHE_ASSOC                8
32 | #define SPC__LEVEL1_ICACHE_LINESIZE             128
33 | #define SPC__LEVEL1_DCACHE_SIZE                 65536
34 | #define SPC__LEVEL1_DCACHE_ASSOC                8
35 | #define SPC__LEVEL1_DCACHE_LINESIZE             128
36 | #define SPC__LEVEL2_CACHE_SIZE                  524288
37 | #define SPC__LEVEL2_CACHE_ASSOC                 8
38 | #define SPC__LEVEL2_CACHE_LINESIZE              128
39 | #define SPC__LEVEL3_CACHE_SIZE                  8388608
40 | #define SPC__LEVEL3_CACHE_ASSOC                 8
41 | #define SPC__LEVEL3_CACHE_LINESIZE              128
42 | #define SPC__LEVEL4_CACHE_SIZE                  0
43 | #define SPC__LEVEL4_CACHE_ASSOC                 0
44 | #define SPC__LEVEL4_CACHE_LINESIZE              0
45 | 


--------------------------------------------------------------------------------
/include/hardware__koroneia.h:
--------------------------------------------------------------------------------
 1 | // Hardware information for AMD EPYC 7F72 node koroneia.
 2 | 
 3 | // Architecture from `uname -srm`.
 4 | #define SPC__X86_64
 5 | 
 6 | // CPU from `/proc/cpuinfo`.
 7 | #define SPC__CPU_NAME "AMD EPYC 7F72 24-Core Processor"
 8 | 
 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers
10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system.
11 | #define SPC__CORE_COUNT 24
12 | #define SPC__THREAD_COUNT 48
13 | #define SPC__NUMA_NODE_COUNT 2
14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1
15 | 
16 | // Main memory per NUMA node (MB).
17 | #define SPC__NUMA_NODE_DRAM_MB 257699
18 | 
19 | // Obtained from `lsb_release -a`.
20 | #define SPC__OS "Ubuntu 24.04.2 LTS"
21 | 
22 | // Obtained from: `uname -srm`.
23 | #define SPC__KERNEL "Linux 5.15.0-106-generic x86_64"
24 | 
25 | // AMD: possible options are AVX, AVX2, and AVX512. No AMD CPU older than AMD EPYC 7F72 will be used.
26 | #define SPC__SUPPORTS_AVX
27 | #define SPC__SUPPORTS_AVX2
28 | 
29 | // Cache information from `getconf -a | grep CACHE`.
30 | #define SPC__LEVEL1_ICACHE_SIZE                 32768
31 | #define SPC__LEVEL1_ICACHE_ASSOC
32 | #define SPC__LEVEL1_ICACHE_LINESIZE             64
33 | #define SPC__LEVEL1_DCACHE_SIZE                 32768
34 | #define SPC__LEVEL1_DCACHE_ASSOC                8
35 | #define SPC__LEVEL1_DCACHE_LINESIZE             64
36 | #define SPC__LEVEL2_CACHE_SIZE                  524288
37 | #define SPC__LEVEL2_CACHE_ASSOC                 8
38 | #define SPC__LEVEL2_CACHE_LINESIZE              64
39 | #define SPC__LEVEL3_CACHE_SIZE                  16777216
40 | #define SPC__LEVEL3_CACHE_ASSOC                 16
41 | #define SPC__LEVEL3_CACHE_LINESIZE              64
42 | #define SPC__LEVEL4_CACHE_SIZE                  0
43 | #define SPC__LEVEL4_CACHE_ASSOC
44 | #define SPC__LEVEL4_CACHE_LINESIZE
45 | 


--------------------------------------------------------------------------------
/include/hardware__sidon.h:
--------------------------------------------------------------------------------
 1 | // Hardware information for Intel Xeon E7-4880 v2 node sidon.
 2 | 
 3 | // Architecture from `uname -srm`.
 4 | #define SPC__X86_64
 5 | 
 6 | // CPU from `/proc/cpuinfo`.
 7 | #define SPC__CPU_NAME "Intel(R) Xeon(R) CPU E7-4880 v2 @ 2.50GHz"
 8 | 
 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers
10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system.
11 | #define SPC__CORE_COUNT 15
12 | #define SPC__THREAD_COUNT 30
13 | #define SPC__NUMA_NODE_COUNT 4
14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1
15 | 
16 | // Main memory per NUMA node (MB).
17 | #define SPC__NUMA_NODE_DRAM_MB 515809
18 | 
19 | // Obtained from `lsb_release -a`.
20 | #define SPC__OS "Ubuntu 22.04.4 LTS"
21 | 
22 | // Obtained from: `uname -srm`.
23 | #define SPC__KERNEL "Linux 5.15.0-116-generic x86_64"
24 | 
25 | // Intel: possible options are AVX, AVX2, and AVX512. No Intel CPU older than Intel Xeon E7-4880 v2 will be used.
26 | #define SPC__SUPPORTS_AVX
27 | 
28 | // Cache information from `getconf -a | grep CACHE`.
29 | #define SPC__LEVEL1_ICACHE_SIZE                 32768
30 | #define SPC__LEVEL1_ICACHE_ASSOC
31 | #define SPC__LEVEL1_ICACHE_LINESIZE             64
32 | #define SPC__LEVEL1_DCACHE_SIZE                 32768
33 | #define SPC__LEVEL1_DCACHE_ASSOC                8
34 | #define SPC__LEVEL1_DCACHE_LINESIZE             64
35 | #define SPC__LEVEL2_CACHE_SIZE                  262144
36 | #define SPC__LEVEL2_CACHE_ASSOC                 8
37 | #define SPC__LEVEL2_CACHE_LINESIZE              64
38 | #define SPC__LEVEL3_CACHE_SIZE                  39321600
39 | #define SPC__LEVEL3_CACHE_ASSOC                 20
40 | #define SPC__LEVEL3_CACHE_LINESIZE              64
41 | #define SPC__LEVEL4_CACHE_SIZE                  0
42 | #define SPC__LEVEL4_CACHE_ASSOC
43 | #define SPC__LEVEL4_CACHE_LINESIZE
44 | 


--------------------------------------------------------------------------------
/include/plan.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2025 Matthias Boehm, TU Berlin
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | // API of the SIGMOD 2025 Programming Contest,
 18 | // See https://sigmod-contest-2025.github.io/index.html
 19 | #pragma once
 20 | 
 21 | #include <attribute.h>
 22 | #include <statement.h>
 23 | // #include <table.h>
 24 | 
 25 | // supported attribute data types
 26 | 
 27 | enum class NodeType {
 28 |     HashJoin,
 29 |     Scan,
 30 | };
 31 | 
 32 | struct ScanNode {
 33 |     size_t base_table_id;
 34 | };
 35 | 
 36 | struct JoinNode {
 37 |     bool   build_left;
 38 |     size_t left;
 39 |     size_t right;
 40 |     size_t left_attr;
 41 |     size_t right_attr;
 42 | };
 43 | 
 44 | struct PlanNode {
 45 |     std::variant<ScanNode, JoinNode>          data;
 46 |     std::vector<std::tuple<size_t, DataType>> output_attrs;
 47 | 
 48 |     PlanNode(std::variant<ScanNode, JoinNode>     data,
 49 |         std::vector<std::tuple<size_t, DataType>> output_attrs)
 50 |     : data(std::move(data))
 51 |     , output_attrs(std::move(output_attrs)) {}
 52 | };
 53 | 
 54 | constexpr size_t PAGE_SIZE = 8192;
 55 | 
 56 | struct alignas(8) Page {
 57 |     std::byte data[PAGE_SIZE];
 58 | };
 59 | 
 60 | struct Column {
 61 |     DataType           type;
 62 |     std::vector<Page*> pages;
 63 | 
 64 |     Page* new_page() {
 65 |         auto ret = new Page;
 66 |         pages.push_back(ret);
 67 |         return ret;
 68 |     }
 69 | 
 70 |     Column(DataType data_type)
 71 |     : type(data_type)
 72 |     , pages() {}
 73 | 
 74 |     Column(Column&& other) noexcept
 75 |     : type(other.type)
 76 |     , pages(std::move(other.pages)) {
 77 |         other.pages.clear();
 78 |     }
 79 | 
 80 |     Column& operator=(Column&& other) noexcept {
 81 |         if (this != &other) {
 82 |             for (auto* page: pages) {
 83 |                 delete page;
 84 |             }
 85 |             type  = other.type;
 86 |             pages = std::move(other.pages);
 87 |             other.pages.clear();
 88 |         }
 89 |         return *this;
 90 |     }
 91 | 
 92 |     Column(const Column&)            = delete;
 93 |     Column& operator=(const Column&) = delete;
 94 | 
 95 |     ~Column() {
 96 |         for (auto* page: pages) {
 97 |             delete page;
 98 |         }
 99 |     }
100 | };
101 | 
102 | struct ColumnarTable {
103 |     size_t              num_rows{0};
104 |     std::vector<Column> columns;
105 | };
106 | 
107 | std::tuple<std::vector<std::vector<Data>>, std::vector<DataType>> from_columnar(
108 |     const ColumnarTable& table);
109 | ColumnarTable from_table(const std::vector<std::vector<Data>>& table,
110 |     const std::vector<DataType>&                               data_types);
111 | 
112 | struct Plan {
113 |     std::vector<PlanNode>      nodes;
114 |     std::vector<ColumnarTable> inputs;
115 |     // std::vector<Table>         tables;
116 |     size_t root;
117 | 
118 |     size_t new_join_node(bool                     build_left,
119 |         size_t                                    left,
120 |         size_t                                    right,
121 |         size_t                                    left_attr,
122 |         size_t                                    right_attr,
123 |         std::vector<std::tuple<size_t, DataType>> output_attrs) {
124 |         JoinNode join{
125 |             .build_left = build_left,
126 |             .left       = left,
127 |             .right      = right,
128 |             .left_attr  = left_attr,
129 |             .right_attr = right_attr,
130 |         };
131 |         auto ret = nodes.size();
132 |         nodes.emplace_back(join, std::move(output_attrs));
133 |         return ret;
134 |     }
135 | 
136 |     size_t new_scan_node(size_t                   base_table_id,
137 |         std::vector<std::tuple<size_t, DataType>> output_attrs) {
138 |         ScanNode scan{.base_table_id = base_table_id};
139 |         auto     ret = nodes.size();
140 |         nodes.emplace_back(scan, std::move(output_attrs));
141 |         return ret;
142 |     }
143 | 
144 |     size_t new_input(ColumnarTable input) {
145 |         auto ret = inputs.size();
146 |         inputs.emplace_back(std::move(input));
147 |         return ret;
148 |     }
149 | };
150 | 
151 | template <class T>
152 | struct ColumnInserter {
153 |     Column&              column;
154 |     size_t               last_page_idx = 0;
155 |     uint16_t             num_rows      = 0;
156 |     size_t               data_end      = data_begin();
157 |     std::vector<uint8_t> bitmap;
158 | 
159 |     constexpr static size_t data_begin() {
160 |         if (sizeof(T) < 4) {
161 |             return 4;
162 |         } else {
163 |             return sizeof(T);
164 |         }
165 |     }
166 | 
167 |     ColumnInserter(Column& column)
168 |     : column(column) {
169 |         bitmap.resize(PAGE_SIZE);
170 |     }
171 | 
172 |     std::byte* get_page() {
173 |         if (last_page_idx == column.pages.size()) [[unlikely]] {
174 |             column.new_page();
175 |         }
176 |         auto* page = column.pages[last_page_idx];
177 |         return page->data;
178 |     }
179 | 
180 |     void save_page() {
181 |         auto* page                         = get_page();
182 |         *reinterpret_cast<uint16_t*>(page) = num_rows;
183 |         *reinterpret_cast<uint16_t*>(page + 2) =
184 |             static_cast<uint16_t>((data_end - data_begin()) / sizeof(T));
185 |         size_t bitmap_size = (num_rows + 7) / 8;
186 |         memcpy(page + PAGE_SIZE - bitmap_size, bitmap.data(), bitmap_size);
187 |         ++last_page_idx;
188 |         num_rows = 0;
189 |         data_end = data_begin();
190 |     }
191 | 
192 |     void set_bitmap(size_t idx) {
193 |         size_t byte_idx   = idx / 8;
194 |         size_t bit_idx    = idx % 8;
195 |         bitmap[byte_idx] |= (0x1 << bit_idx);
196 |     }
197 | 
198 |     void unset_bitmap(size_t idx) {
199 |         size_t byte_idx   = idx / 8;
200 |         size_t bit_idx    = idx % 8;
201 |         bitmap[byte_idx] &= ~(0x1 << bit_idx);
202 |     }
203 | 
204 |     void insert(T value) {
205 |         if (data_end + 4 + num_rows / 8 + 1 > PAGE_SIZE) [[unlikely]] {
206 |             save_page();
207 |         }
208 |         auto* page                              = get_page();
209 |         *reinterpret_cast<T*>(page + data_end)  = value;
210 |         data_end                               += sizeof(T);
211 |         set_bitmap(num_rows);
212 |         ++num_rows;
213 |     }
214 | 
215 |     void insert_null() {
216 |         if (data_end + num_rows / 8 + 1 > PAGE_SIZE) [[unlikely]] {
217 |             save_page();
218 |         }
219 |         unset_bitmap(num_rows);
220 |         ++num_rows;
221 |     }
222 | 
223 |     void finalize() {
224 |         if (num_rows != 0) {
225 |             save_page();
226 |         }
227 |     }
228 | };
229 | 
230 | template <>
231 | struct ColumnInserter<std::string> {
232 |     Column&              column;
233 |     size_t               last_page_idx = 0;
234 |     uint16_t             num_rows      = 0;
235 |     uint16_t             data_size     = 0;
236 |     size_t               offset_end    = 4;
237 |     std::vector<char>    data;
238 |     std::vector<uint8_t> bitmap;
239 | 
240 |     constexpr static size_t offset_begin() { return 4; }
241 | 
242 |     ColumnInserter(Column& column)
243 |     : column(column) {
244 |         data.resize(PAGE_SIZE);
245 |         bitmap.resize(PAGE_SIZE);
246 |     }
247 | 
248 |     std::byte* get_page() {
249 |         if (last_page_idx == column.pages.size()) [[unlikely]] {
250 |             column.new_page();
251 |         }
252 |         auto* page = column.pages[last_page_idx];
253 |         return page->data;
254 |     }
255 | 
256 |     void save_long_string(std::string_view value) {
257 |         size_t offset     = 0;
258 |         auto   first_page = true;
259 |         while (offset < value.size()) {
260 |             auto* page = get_page();
261 |             if (first_page) {
262 |                 *reinterpret_cast<uint16_t*>(page) = 0xffff;
263 |                 first_page                         = false;
264 |             } else {
265 |                 *reinterpret_cast<uint16_t*>(page) = 0xfffe;
266 |             }
267 |             auto page_data_len = std::min(value.size() - offset, PAGE_SIZE - 4);
268 |             *reinterpret_cast<uint16_t*>(page + 2) = page_data_len;
269 |             memcpy(page + 4, value.data() + offset, page_data_len);
270 |             offset += page_data_len;
271 |             ++last_page_idx;
272 |         }
273 |     }
274 | 
275 |     void save_page() {
276 |         auto* page                         = get_page();
277 |         *reinterpret_cast<uint16_t*>(page) = num_rows;
278 |         *reinterpret_cast<uint16_t*>(page + 2) =
279 |             static_cast<uint16_t>((offset_end - offset_begin()) / 2);
280 |         size_t bitmap_size = (num_rows + 7) / 8;
281 |         memcpy(page + offset_end, data.data(), data_size);
282 |         memcpy(page + PAGE_SIZE - bitmap_size, bitmap.data(), bitmap_size);
283 |         ++last_page_idx;
284 |         num_rows   = 0;
285 |         data_size  = 0;
286 |         offset_end = offset_begin();
287 |     }
288 | 
289 |     void set_bitmap(size_t idx) {
290 |         size_t byte_idx   = idx / 8;
291 |         size_t bit_idx    = idx % 8;
292 |         bitmap[byte_idx] |= (0x1 << bit_idx);
293 |     }
294 | 
295 |     void unset_bitmap(size_t idx) {
296 |         size_t byte_idx   = idx / 8;
297 |         size_t bit_idx    = idx % 8;
298 |         bitmap[byte_idx] &= ~(0x1 << bit_idx);
299 |     }
300 | 
301 |     void insert(std::string_view value) {
302 |         if (value.size() > PAGE_SIZE - 7) {
303 |             if (num_rows > 0) {
304 |                 save_page();
305 |             }
306 |             save_long_string(value);
307 |         } else {
308 |             if (offset_end + sizeof(uint16_t) + data_size + value.size() + num_rows / 8 + 1
309 |                 > PAGE_SIZE) {
310 |                 save_page();
311 |             }
312 |             memcpy(data.data() + data_size, value.data(), value.size());
313 |             data_size  += static_cast<uint16_t>(value.size());
314 |             auto* page  = get_page();
315 |             *reinterpret_cast<uint16_t*>(page + offset_end)  = data_size;
316 |             offset_end                                      += sizeof(uint16_t);
317 |             set_bitmap(num_rows);
318 |             ++num_rows;
319 |         }
320 |     }
321 | 
322 |     void insert_null() {
323 |         if (offset_end + data_size + num_rows / 8 + 1 > PAGE_SIZE) [[unlikely]] {
324 |             save_page();
325 |         }
326 |         unset_bitmap(num_rows);
327 |         ++num_rows;
328 |     }
329 | 
330 |     void finalize() {
331 |         if (num_rows != 0) {
332 |             save_page();
333 |         }
334 |     }
335 | };
336 | 
337 | namespace Contest {
338 | 
339 | void* build_context();
340 | void  destroy_context(void*);
341 | 
342 | ColumnarTable execute(const Plan& plan, void* context);
343 | 
344 | } // namespace Contest
345 | 


--------------------------------------------------------------------------------
/include/statement.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <memory>
  4 | #include <optional>
  5 | #include <string>
  6 | #include <unordered_map>
  7 | #include <variant>
  8 | #include <vector>
  9 | 
 10 | #include <fmt/core.h>
 11 | #include <re2/re2.h>
 12 | 
 13 | using Data    = std::variant<int32_t, int64_t, double, std::string, std::monostate>;
 14 | using Literal = std::variant<int64_t, double, std::string, std::monostate>;
 15 | 
 16 | template <>
 17 | struct fmt::formatter<Data> {
 18 |     template <class ParseContext>
 19 |     constexpr auto parse(ParseContext& ctx) {
 20 |         return ctx.begin();
 21 |     }
 22 | 
 23 |     template <class FormatContext>
 24 |     auto format(const Data& value, FormatContext& ctx) const {
 25 |         return std::visit(
 26 |             [&ctx](const auto& value) {
 27 |                 using T = std::decay_t<decltype(value)>;
 28 |                 if constexpr (std::is_same_v<T, std::monostate>) {
 29 |                     return fmt::format_to(ctx.out(), "NULL");
 30 |                 } else {
 31 |                     return fmt::format_to(ctx.out(), "{}", value);
 32 |                 }
 33 |             },
 34 |             value);
 35 |     }
 36 | };
 37 | 
 38 | struct Attribute;
 39 | struct Statement;
 40 | struct Comparison;
 41 | struct LogicalOperation;
 42 | struct InnerColumnBase;
 43 | 
 44 | // AST Node
 45 | struct Statement {
 46 |     virtual ~Statement()                                                     = default;
 47 |     virtual std::string          pretty_print(int indent = 0) const          = 0;
 48 |     virtual bool                 eval(const std::vector<Data>& record) const = 0;
 49 |     virtual std::vector<uint8_t> eval(
 50 |         const std::vector<const InnerColumnBase*>& table) const = 0;
 51 | };
 52 | 
 53 | struct Comparison: Statement {
 54 |     size_t column;
 55 | 
 56 |     enum Op {
 57 |         EQ,
 58 |         NEQ,
 59 |         LT,
 60 |         GT,
 61 |         LEQ,
 62 |         GEQ,
 63 |         LIKE,
 64 |         NOT_LIKE,
 65 |         IS_NULL,
 66 |         IS_NOT_NULL
 67 |     };
 68 | 
 69 |     Op      op;
 70 |     Literal value;
 71 | 
 72 |     Comparison(size_t col, Op o, Literal val)
 73 |     : column(col)
 74 |     , op(o)
 75 |     , value(std::move(val)) {}
 76 | 
 77 |     std::string pretty_print(int indent) const override {
 78 |         return fmt::format("{:{}}{} {} {}", "", indent, column, opToString(), valueToString());
 79 |     }
 80 | 
 81 |     bool                 eval(const std::vector<Data>& record) const override;
 82 |     std::vector<uint8_t> eval(const std::vector<const InnerColumnBase*>& table) const override;
 83 | 
 84 |     std::string opToString() const {
 85 |         switch (op) {
 86 |         case EQ:          return "=";
 87 |         case NEQ:         return "!=";
 88 |         case LT:          return "<";
 89 |         case GT:          return ">";
 90 |         case LEQ:         return "<=";
 91 |         case GEQ:         return ">=";
 92 |         case LIKE:        return "LIKE";
 93 |         case NOT_LIKE:    return "NOT LIKE";
 94 |         case IS_NULL:     return "IS NULL";
 95 |         case IS_NOT_NULL: return "IS NOT NULL";
 96 |         default:          return "??";
 97 |         }
 98 |     }
 99 | 
100 |     std::string valueToString() const {
101 |         if (op == IS_NULL || op == IS_NOT_NULL) {
102 |             return "";
103 |         }
104 |         return visit(
105 |             [](auto&& arg) -> std::string {
106 |                 using T = std::decay_t<decltype(arg)>;
107 |                 if constexpr (std::is_same_v<T, std::string>) {
108 |                     return fmt::format("'{}'", arg);
109 |                 } else if constexpr (std::is_same_v<T, std::monostate>) {
110 |                     return "";
111 |                 } else {
112 |                     return fmt::format("{}", arg);
113 |                 }
114 |             },
115 |             value);
116 |     }
117 | 
118 |     static bool like_match(std::string_view str, const std::string& pattern) {
119 |         // static cache and mutex
120 |         thread_local auto regex_cache = std::unordered_map<std::string, std::unique_ptr<RE2>>{};
121 | 
122 |         const RE2* re = nullptr;
123 |         auto       it = regex_cache.find(pattern);
124 |         if (it != regex_cache.end()) {
125 |             re = it->second.get();
126 |         }
127 | 
128 |         // cache miss and compile
129 |         if (!re) {
130 |             // conver to regex
131 |             std::string regex_str;
132 |             for (char c: pattern) {
133 |                 if (c == '%') {
134 |                     regex_str += ".*";
135 |                 } else if (c == '_') {
136 |                     regex_str += '.';
137 |                 } else {
138 |                     // escape sepcical characters
139 |                     if (c == '\\' || c == '.' || c == '^' || c == '$' || c == '|' || c == '?'
140 |                         || c == '*' || c == '+' || c == '(' || c == ')' || c == '[' || c == ']'
141 |                         || c == '{' || c == '}') {
142 |                         regex_str += '\\';
143 |                     }
144 |                     regex_str += c;
145 |                 }
146 |             }
147 | 
148 |             RE2::Options options;
149 | 
150 |             auto new_re = std::make_unique<RE2>(regex_str, options);
151 |             if (!new_re->ok()) {
152 |                 return false; // invalid regex
153 |             }
154 | 
155 |             re = new_re.get();
156 |             regex_cache.emplace(pattern, std::move(new_re));
157 |         }
158 | 
159 |         // execute full match
160 |         return RE2::FullMatch(str, *re);
161 |     }
162 | 
163 |     static std::optional<double> get_numeric_value(const Data& data) {
164 |         if (auto* i32 = std::get_if<int32_t>(&data)) {
165 |             return *i32;
166 |         } else if (auto* i64 = std::get_if<int64_t>(&data)) {
167 |             return static_cast<double>(*i64);
168 |         } else if (auto* d = std::get_if<double>(&data)) {
169 |             return *d;
170 |         } else {
171 |             return std::nullopt;
172 |         }
173 |     }
174 | 
175 |     static std::optional<double> get_numeric_value(const Literal& value) {
176 |         if (auto* i = std::get_if<int64_t>(&value)) {
177 |             return *i;
178 |         } else if (auto* d = std::get_if<double>(&value)) {
179 |             return *d;
180 |         } else {
181 |             return std::nullopt;
182 |         }
183 |     }
184 | };
185 | 
186 | struct LogicalOperation: Statement {
187 |     enum Type {
188 |         AND,
189 |         OR,
190 |         NOT
191 |     };
192 | 
193 |     Type                                    op_type;
194 |     std::vector<std::unique_ptr<Statement>> children;
195 | 
196 |     static std::unique_ptr<LogicalOperation> makeAnd(std::unique_ptr<Statement> l,
197 |         std::unique_ptr<Statement>                                              r) {
198 |         auto node     = std::make_unique<LogicalOperation>();
199 |         node->op_type = AND;
200 |         node->children.push_back(std::move(l));
201 |         node->children.push_back(std::move(r));
202 |         return node;
203 |     }
204 | 
205 |     static std::unique_ptr<LogicalOperation> makeOr(std::unique_ptr<Statement> l,
206 |         std::unique_ptr<Statement>                                             r) {
207 |         auto node     = std::make_unique<LogicalOperation>();
208 |         node->op_type = OR;
209 |         node->children.push_back(std::move(l));
210 |         node->children.push_back(std::move(r));
211 |         return node;
212 |     }
213 | 
214 |     static std::unique_ptr<LogicalOperation> makeNot(std::unique_ptr<Statement> child) {
215 |         auto node     = std::make_unique<LogicalOperation>();
216 |         node->op_type = NOT;
217 |         node->children.push_back(std::move(child));
218 |         return node;
219 |     }
220 | 
221 |     std::string pretty_print(int indent) const override {
222 |         std::string op_str = [this] {
223 |             switch (op_type) {
224 |             case AND: return "AND";
225 |             case OR:  return "OR";
226 |             case NOT: return "NOT";
227 |             default:  return "UNKNOWN";
228 |             }
229 |         }();
230 | 
231 |         std::string result = fmt::format("{:{}}[{}]\n", "", indent, op_str);
232 | 
233 |         for (auto& child: children) {
234 |             result += child->pretty_print(indent + 2) + "\n";
235 |         }
236 | 
237 |         if (!children.empty()) {
238 |             result.pop_back();
239 |         }
240 |         return result;
241 |     }
242 | 
243 |     bool                 eval(const std::vector<Data>& record) const override;
244 |     std::vector<uint8_t> eval(const std::vector<const InnerColumnBase*>& table) const override;
245 | };
246 | 


--------------------------------------------------------------------------------
/include/table.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <filesystem>
 4 | #include <fmt/core.h>
 5 | #include <range/v3/all.hpp>
 6 | 
 7 | #include <attribute.h>
 8 | #include <plan.h>
 9 | #include <statement.h>
10 | 
11 | struct Table {
12 | public:
13 |     Table() = default;
14 | 
15 |     Table(std::vector<std::vector<Data>> data, std::vector<DataType> types)
16 |     : types_(types)
17 |     , data_(data) {}
18 | 
19 |     static ColumnarTable from_csv(const std::vector<Attribute>& attributes,
20 |         const std::filesystem::path&                            path,
21 |         Statement*                                              filter,
22 |         bool                                                    header = false);
23 | 
24 |     static Table from_columnar(const ColumnarTable& input);
25 | 
26 |     ColumnarTable to_columnar() const;
27 | 
28 |     const std::vector<std::vector<Data>>& table() const { return data_; }
29 | 
30 |     std::vector<std::vector<Data>>& table() { return data_; }
31 | 
32 |     const std::vector<DataType>& types() const { return types_; }
33 | 
34 |     size_t number_rows() const { return this->data_.size(); }
35 | 
36 |     size_t number_cols() const { return this->types_.size(); }
37 | 
38 |     static void print(const std::vector<std::vector<Data>>& data) {
39 |         namespace views = ranges::views;
40 | 
41 |         auto escape_string = [](const std::string& s) {
42 |             std::string escaped;
43 |             for (char c: s) {
44 |                 switch (c) {
45 |                 case '"':  escaped += "\\\""; break;
46 |                 case '\\': escaped += "\\\\"; break;
47 |                 case '\n': escaped += "\\n"; break;
48 |                 case '\r': escaped += "\\r"; break;
49 |                 case '\t': escaped += "\\t"; break;
50 |                 default:   escaped += c; break;
51 |                 }
52 |             }
53 |             return escaped;
54 |         };
55 | 
56 |         for (auto& record: data) {
57 |             auto line = record
58 |                       | views::transform([&escape_string](const Data& field) -> std::string {
59 |                             return std::visit(
60 |                                 [&escape_string](const auto& arg) {
61 |                                     using T = std::decay_t<decltype(arg)>;
62 |                                     using namespace std::string_literals;
63 |                                     if constexpr (std::is_same_v<T, std::monostate>) {
64 |                                         return "NULL"s;
65 |                                     } else if constexpr (std::is_same_v<T, int32_t>
66 |                                                          || std::is_same_v<T, int64_t>
67 |                                                          || std::is_same_v<T, double>) {
68 |                                         return fmt::format("{}", arg);
69 |                                     } else if constexpr (std::is_same_v<T, std::string>) {
70 |                                         return fmt::format("\"{}\"", escape_string(arg));
71 |                                         // return fmt::format("{}", arg);
72 |                                     }
73 |                                 },
74 |                                 field);
75 |                         })
76 |                       | views::join('|') | ranges::to<std::string>();
77 |             fmt::println("{}", line);
78 |         }
79 |     }
80 | 
81 | private:
82 |     std::vector<DataType>          types_;
83 |     std::vector<std::vector<Data>> data_;
84 | 
85 |     void set_attributes(const std::vector<Attribute>& attributes) {
86 |         this->types_.clear();
87 |         for (auto& attr: attributes) {
88 |             this->types_.push_back(attr.type);
89 |         }
90 |     }
91 | };
92 | 


--------------------------------------------------------------------------------
/include/table_entity.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <fmt/core.h>
 4 | #include <string>
 5 | 
 6 | #include "common.h"
 7 | 
 8 | struct TableEntity {
 9 |     std::string table;
10 |     int         id;
11 | 
12 |     friend bool operator==(const TableEntity& left, const TableEntity& right);
13 |     friend bool operator!=(const TableEntity& left, const TableEntity& right);
14 |     friend bool operator<(const TableEntity& left, const TableEntity& right);
15 | };
16 | 
17 | inline bool operator==(const TableEntity& left, const TableEntity& right) {
18 |     return left.table == right.table && left.id == right.id;
19 | }
20 | 
21 | inline bool operator!=(const TableEntity& left, const TableEntity& right) {
22 |     return !(left == right);
23 | }
24 | 
25 | inline bool operator<(const TableEntity& left, const TableEntity& right) {
26 |     if (left.table < right.table) {
27 |         return true;
28 |     } else if (left.table > right.table) {
29 |         return false;
30 |     } else {
31 |         return left.id < right.id;
32 |     }
33 | }
34 | 
35 | namespace std {
36 | template <>
37 | struct hash<TableEntity> {
38 |     size_t operator()(const TableEntity& te) const noexcept {
39 |         size_t seed = 0;
40 |         hash_combine(seed, hash<string>{}(te.table));
41 |         hash_combine(seed, hash<int>{}(te.id));
42 |         return seed;
43 |     }
44 | };
45 | 
46 | } // namespace std
47 | 
48 | template <>
49 | struct fmt::formatter<TableEntity> {
50 |     template <class ParseContext>
51 |     constexpr auto parse(ParseContext& ctx) {
52 |         return ctx.begin();
53 |     }
54 | 
55 |     template <class FormatContext>
56 |     auto format(const TableEntity& te, FormatContext& ctx) const {
57 |         return fmt::format_to(ctx.out(), "({}, {})", te.table, te.id);
58 |     }
59 | };
60 | 


--------------------------------------------------------------------------------
/job/10a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS uncredited_voiced_character, MIN(t.title) AS russian_movie FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note  like '%(voice)%' and ci.note like '%(uncredited)%' AND cn.country_code  = '[ru]' AND rt.role  = 'actor' AND t.production_year > 2005 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id;
2 | 


--------------------------------------------------------------------------------
/job/10b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS character, MIN(t.title) AS russian_mov_with_actor_producer FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note  like '%(producer)%' AND cn.country_code  = '[ru]' AND rt.role  = 'actor' AND t.production_year > 2010 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id;
2 | 


--------------------------------------------------------------------------------
/job/10c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS character, MIN(t.title) AS movie_with_american_producer FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note  like '%(producer)%' AND cn.country_code  = '[us]' AND t.production_year > 1990 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id;
2 | 


--------------------------------------------------------------------------------
/job/11a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS from_company, MIN(lt.link) AS movie_link_type, MIN(t.title) AS non_polish_sequel_movie FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/11b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS from_company, MIN(lt.link) AS movie_link_type, MIN(t.title) AS sequel_movie FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follows%' AND mc.note IS NULL AND t.production_year  = 1998 and t.title like '%Money%' AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/11c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS from_company, MIN(mc.note) AS production_note, MIN(t.title) AS movie_based_on_book FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code  !='[pl]' and (cn.name like '20th Century Fox%' or cn.name like 'Twentieth Century Fox%') AND ct.kind  != 'production companies' and ct.kind is not NULL AND k.keyword  in ('sequel', 'revenge', 'based-on-novel') AND mc.note  is not NULL AND t.production_year  > 1950 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/11d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS from_company, MIN(mc.note) AS production_note, MIN(t.title) AS movie_based_on_book FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code  !='[pl]' AND ct.kind  != 'production companies' and ct.kind is not NULL AND k.keyword  in ('sequel', 'revenge', 'based-on-novel') AND mc.note  is not NULL AND t.production_year  > 1950 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/12a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS drama_horror_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code  = '[us]' AND ct.kind  = 'production companies' AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info  in ('Drama', 'Horror') AND mi_idx.info  > '8.0' AND t.production_year  between 2005 and 2008 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/12b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS budget, MIN(t.title) AS unsuccsessful_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code ='[us]' AND ct.kind  is not NULL and (ct.kind ='production companies' or ct.kind = 'distributors') AND it1.info ='budget' AND it2.info ='bottom 10 rank' AND t.production_year >2000 AND (t.title LIKE 'Birdemic%' OR t.title LIKE '%Movie%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/12c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS mainstream_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code  = '[us]' AND ct.kind  = 'production companies' AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info  in ('Drama', 'Horror', 'Western', 'Family') AND mi_idx.info  > '7.0' AND t.production_year  between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/13a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS release_date, MIN(miidx.info) AS rating, MIN(t.title) AS german_movie FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[de]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/13b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie_about_winning FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND t.title  != '' AND (t.title LIKE '%Champion%' OR t.title LIKE '%Loser%') AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/13c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie_about_winning FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND t.title  != '' AND (t.title LIKE 'Champion%' OR t.title LIKE 'Loser%') AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/13d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/14a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS northern_dark_movie FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  = 'movie' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info  < '8.5' AND t.production_year  > 2010 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/14b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS western_dark_production FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title') AND kt.kind  = 'movie' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info  > '6.0' AND t.production_year  > 2010 and (t.title like '%murder%' or t.title like '%Murder%' or t.title like '%Mord%') AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/14c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS north_european_dark_production FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  is not null and k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info  < '8.5' AND t.production_year  > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/15a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS internet_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code  = '[us]' AND it1.info  = 'release dates' AND mc.note  like '%(200%)%' and mc.note like '%(worldwide)%' AND mi.note  like '%internet%' AND mi.info  like 'USA:% 200%' AND t.production_year  > 2000 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id;
2 | 


--------------------------------------------------------------------------------
/job/15b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS youtube_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code  = '[us]' and cn.name = 'YouTube' AND it1.info  = 'release dates' AND mc.note  like '%(200%)%' and mc.note like '%(worldwide)%' AND mi.note  like '%internet%' AND mi.info  like 'USA:% 200%' AND t.production_year  between 2005 and 2010 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id;
2 | 


--------------------------------------------------------------------------------
/job/15c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS modern_american_internet_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code  = '[us]' AND it1.info  = 'release dates' AND mi.note  like '%internet%' AND mi.info  is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year  > 1990 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id;
2 | 


--------------------------------------------------------------------------------
/job/15d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(at.title) AS aka_title, MIN(t.title) AS internet_movie_title FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code  = '[us]' AND it1.info  = 'release dates' AND mi.note  like '%internet%' AND t.production_year  > 1990 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id;
2 | 


--------------------------------------------------------------------------------
/job/16a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr >= 50 AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/16b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/16c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/16d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr >= 5 AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/17a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_american_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND n.name  LIKE 'B%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/17b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name  LIKE 'Z%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/17c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name  LIKE 'X%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/17d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name  LIKE '%Bert%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/17e.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/17f.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name  LIKE '%B%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/18a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note  in ('(producer)', '(executive producer)') AND it1.info  = 'budget' AND it2.info  = 'votes' AND n.gender  = 'm' and n.name like '%Tim%' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/18b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'rating' AND mi.info  in ('Horror', 'Thriller') and mi.note is NULL AND mi_idx.info  > '8.0' AND n.gender  is not null and n.gender = 'f' AND t.production_year  between 2008 and 2014 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/18c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'votes' AND mi.info  in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender  = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/19a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info  = 'release dates' AND mc.note  is not NULL and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND mi.info  is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%Ang%' AND rt.role ='actress' AND t.production_year  between 2005 and 2009 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id;
2 | 


--------------------------------------------------------------------------------
/job/19b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS kung_fu_panda FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note  = '(voice)' AND cn.country_code ='[us]' AND it.info  = 'release dates' AND mc.note  like '%(200%)%' and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND mi.info  is not null and (mi.info like 'Japan:%2007%' or mi.info like 'USA:%2008%') AND n.gender ='f' and n.name like '%Angel%' AND rt.role ='actress' AND t.production_year  between 2007 and 2008 and t.title like '%Kung%Fu%Panda%' AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id;
2 | 


--------------------------------------------------------------------------------
/job/19c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS jap_engl_voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info  = 'release dates' AND mi.info  is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year  > 2000 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id;
2 | 


--------------------------------------------------------------------------------
/job/19d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS jap_engl_voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info  = 'release dates' AND n.gender ='f' AND rt.role ='actress' AND t.production_year  > 2000 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id;
2 | 


--------------------------------------------------------------------------------
/job/1a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'top 250 rank' AND mc.note  not like '%(as Metro-Goldwyn-Mayer Pictures)%' and (mc.note like '%(co-production)%' or mc.note like '%(presents)%') AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/1b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'bottom 10 rank' AND mc.note  not like '%(as Metro-Goldwyn-Mayer Pictures)%' AND t.production_year between 2005 and 2010 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/1c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'top 250 rank' AND mc.note  not like '%(as Metro-Goldwyn-Mayer Pictures)%' and (mc.note like '%(co-production)%') AND t.production_year >2010 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/1d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'bottom 10 rank' AND mc.note  not like '%(as Metro-Goldwyn-Mayer Pictures)%' AND t.production_year >2000 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/20a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS complete_downey_ironman_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  like '%complete%' AND chn.name  not like '%Sherlock%' and (chn.name like '%Tony%Stark%' or chn.name like '%Iron%Man%') AND k.keyword  in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND kt.kind  = 'movie' AND t.production_year  > 1950 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/20b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS complete_downey_ironman_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  like '%complete%' AND chn.name  not like '%Sherlock%' and (chn.name like '%Tony%Stark%' or chn.name like '%Iron%Man%') AND k.keyword  in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND kt.kind  = 'movie' AND n.name  LIKE '%Downey%Robert%' AND t.production_year  > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/20c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS cast_member, MIN(t.title) AS complete_dynamic_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  like '%complete%' AND chn.name  is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND k.keyword  in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind  = 'movie' AND t.production_year  > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/21a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS western_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/21b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS german_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Germany', 'German') AND t.production_year BETWEEN 2000 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/21c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS western_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'English') AND t.production_year BETWEEN 1950 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/22a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code  != '[us]' AND it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mc.note  not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Germany', 'German', 'USA', 'American') AND mi_idx.info  < '7.0' AND t.production_year  > 2008 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id;
2 | 


--------------------------------------------------------------------------------
/job/22b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code  != '[us]' AND it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mc.note  not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Germany', 'German', 'USA', 'American') AND mi_idx.info  < '7.0' AND t.production_year  > 2009 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id;
2 | 


--------------------------------------------------------------------------------
/job/22c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code  != '[us]' AND it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mc.note  not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info  < '8.5' AND t.production_year  > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id;
2 | 


--------------------------------------------------------------------------------
/job/22d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code  != '[us]' AND it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info  < '8.5' AND t.production_year  > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id;
2 | 


--------------------------------------------------------------------------------
/job/23a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_us_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind  = 'complete+verified' AND cn.country_code  = '[us]' AND it1.info  = 'release dates' AND kt.kind  in ('movie') AND mi.note  like '%internet%' AND mi.info  is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year  > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/23b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_nerdy_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind  = 'complete+verified' AND cn.country_code  = '[us]' AND it1.info  = 'release dates' AND k.keyword  in ('nerd', 'loner', 'alienation', 'dignity') AND kt.kind  in ('movie') AND mi.note  like '%internet%' AND mi.info  like 'USA:% 200%' AND t.production_year  > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/23c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_us_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind  = 'complete+verified' AND cn.country_code  = '[us]' AND it1.info  = 'release dates' AND kt.kind  in ('movie', 'tv movie', 'video movie', 'video game') AND mi.note  like '%internet%' AND mi.info  is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year  > 1990 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/24a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress_name, MIN(t.title) AS voiced_action_movie_jap_eng FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info  = 'release dates' AND k.keyword  in ('hero', 'martial-arts', 'hand-to-hand-combat') AND mi.info  is not null and (mi.info like 'Japan:%201%' or mi.info like 'USA:%201%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year  > 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND ci.movie_id = mk.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/job/24b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress_name, MIN(t.title) AS kung_fu_panda FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND cn.name  = 'DreamWorks Animation' AND it.info  = 'release dates' AND k.keyword  in ('hero', 'martial-arts', 'hand-to-hand-combat', 'computer-animated-movie') AND mi.info  is not null and (mi.info like 'Japan:%201%' or mi.info like 'USA:%201%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year  > 2010 AND t.title like 'Kung Fu Panda%' AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND ci.movie_id = mk.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/job/25a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'blood', 'gore', 'death', 'female-nudity') AND mi.info  = 'Horror' AND n.gender  = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/job/25b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'blood', 'gore', 'death', 'female-nudity') AND mi.info  = 'Horror' AND n.gender   = 'm' AND t.production_year  > 2010 AND t.title  like 'Vampire%' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/job/25c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info  in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender   = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/job/26a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(n.name) AS playing_actor, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  like '%complete%' AND chn.name  is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info  = 'rating' AND k.keyword  in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind  = 'movie' AND mi_idx.info  > '7.0' AND t.production_year  > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/26b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  like '%complete%' AND chn.name  is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info  = 'rating' AND k.keyword  in ('superhero', 'marvel-comics', 'based-on-comic', 'fight') AND kt.kind  = 'movie' AND mi_idx.info  > '8.0' AND t.production_year  > 2005 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/26c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  like '%complete%' AND chn.name  is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info  = 'rating' AND k.keyword  in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind  = 'movie' AND t.production_year  > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/27a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind  in ('cast', 'crew') AND cct2.kind  = 'complete' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Germany','Swedish', 'German') AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/27b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind  in ('cast', 'crew') AND cct2.kind  = 'complete' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Germany','Swedish', 'German') AND t.production_year  = 1998 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/27c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  like 'complete%' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'English') AND t.production_year BETWEEN 1950 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/28a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind  = 'crew' AND cct2.kind  != 'complete+verified' AND cn.country_code  != '[us]' AND it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mc.note  not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info  < '8.5' AND t.production_year  > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/28b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind  = 'crew' AND cct2.kind  != 'complete+verified' AND cn.country_code  != '[us]' AND it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mc.note  not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info  IN ('Sweden', 'Germany', 'Swedish', 'German') AND mi_idx.info  > '6.5' AND t.production_year  > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/28c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  = 'complete' AND cn.country_code  != '[us]' AND it1.info  = 'countries' AND it2.info  = 'rating' AND k.keyword  in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind  in ('movie', 'episode') AND mc.note  not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info  < '8.5' AND t.production_year  > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/29a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind  ='cast' AND cct2.kind  ='complete+verified' AND chn.name  = 'Queen' AND ci.note  in ('(voice)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info  = 'release dates' AND it3.info  = 'trivia' AND k.keyword  = 'computer-animation' AND mi.info  is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.title  = 'Shrek 2' AND t.production_year  between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/29b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind  ='cast' AND cct2.kind  ='complete+verified' AND chn.name  = 'Queen' AND ci.note  in ('(voice)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info  = 'release dates' AND it3.info  = 'height' AND k.keyword  = 'computer-animation' AND mi.info  like 'USA:%200%' AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.title  = 'Shrek 2' AND t.production_year  between 2000 and 2005 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/29c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind  ='cast' AND cct2.kind  ='complete+verified' AND ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info  = 'release dates' AND it3.info  = 'trivia' AND k.keyword  = 'computer-animation' AND mi.info  is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year  between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/2a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[de]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/2b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[nl]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/2c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[sm]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/2d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/30a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_violent_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  in ('cast', 'crew') AND cct2.kind  ='complete+verified' AND ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info  in ('Horror', 'Thriller') AND n.gender  = 'm' AND t.production_year  > 2000 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/30b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_gore_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  in ('cast', 'crew') AND cct2.kind  ='complete+verified' AND ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info  in ('Horror', 'Thriller') AND n.gender  = 'm' AND t.production_year  > 2000 and (t.title like '%Freddy%' or t.title like '%Jason%' or t.title like 'Saw%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/30c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_violent_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind  = 'cast' AND cct2.kind  ='complete+verified' AND ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info  in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender  = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id;
2 | 


--------------------------------------------------------------------------------
/job/31a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name  like 'Lionsgate%' AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info  in ('Horror', 'Thriller') AND n.gender   = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id;
2 | 


--------------------------------------------------------------------------------
/job/31b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name  like 'Lionsgate%' AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mc.note  like '%(Blu-ray)%' AND mi.info  in ('Horror', 'Thriller') AND n.gender  = 'm' AND t.production_year  > 2000 and (t.title like '%Freddy%' or t.title like '%Jason%' or t.title like 'Saw%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id;
2 | 


--------------------------------------------------------------------------------
/job/31c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note  in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name  like 'Lionsgate%' AND it1.info  = 'genres' AND it2.info  = 'votes' AND k.keyword  in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info  in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id;
2 | 


--------------------------------------------------------------------------------
/job/32a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(lt.link) AS link_type, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM keyword AS k, link_type AS lt, movie_keyword AS mk, movie_link AS ml, title AS t1, title AS t2 WHERE k.keyword ='10,000-mile-club' AND mk.keyword_id = k.id AND t1.id = mk.movie_id AND ml.movie_id = t1.id AND ml.linked_movie_id = t2.id AND lt.id = ml.link_type_id AND mk.movie_id = t1.id;
2 | 


--------------------------------------------------------------------------------
/job/32b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(lt.link) AS link_type, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM keyword AS k, link_type AS lt, movie_keyword AS mk, movie_link AS ml, title AS t1, title AS t2 WHERE k.keyword ='character-name-in-title' AND mk.keyword_id = k.id AND t1.id = mk.movie_id AND ml.movie_id = t1.id AND ml.linked_movie_id = t2.id AND lt.id = ml.link_type_id AND mk.movie_id = t1.id;
2 | 


--------------------------------------------------------------------------------
/job/33a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code  = '[us]' AND it1.info  = 'rating' AND it2.info  = 'rating' AND kt1.kind  in ('tv series') AND kt2.kind  in ('tv series') AND lt.link  in ('sequel', 'follows', 'followed by') AND mi_idx2.info  < '3.0' AND t2.production_year  between 2005 and 2008 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/33b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code  = '[nl]' AND it1.info  = 'rating' AND it2.info  = 'rating' AND kt1.kind  in ('tv series') AND kt2.kind  in ('tv series') AND lt.link  LIKE '%follow%' AND mi_idx2.info  < '3.0' AND t2.production_year  = 2007 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/33c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code  != '[us]' AND it1.info  = 'rating' AND it2.info  = 'rating' AND kt1.kind  in ('tv series', 'episode') AND kt2.kind  in ('tv series', 'episode') AND lt.link  in ('sequel', 'follows', 'followed by') AND mi_idx2.info  < '3.5' AND t2.production_year  between 2000 and 2010 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/3a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword  like '%sequel%' AND mi.info  IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year > 2005 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/job/3b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword  like '%sequel%' AND mi.info  IN ('Bulgaria') AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/job/3c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword  like '%sequel%' AND mi.info  IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND t.production_year > 1990 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id;
2 | 


--------------------------------------------------------------------------------
/job/4a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword  like '%sequel%' AND mi_idx.info  > '5.0' AND t.production_year > 2005 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/4b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword  like '%sequel%' AND mi_idx.info  > '9.0' AND t.production_year > 2010 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/4c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword  like '%sequel%' AND mi_idx.info  > '2.0' AND t.production_year > 1990 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/5a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS typical_european_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind  = 'production companies' AND mc.note  like '%(theatrical)%' and mc.note like '%(France)%' AND mi.info  IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year > 2005 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/5b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS american_vhs_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind  = 'production companies' AND mc.note  like '%(VHS)%' and mc.note like '%(USA)%' and mc.note like '%(1994)%' AND mi.info  IN ('USA', 'America') AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/5c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS american_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind  = 'production companies' AND mc.note  not like '%(TV)%' and mc.note like '%(USA)%' AND mi.info  IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND t.production_year > 1990 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id;
2 | 


--------------------------------------------------------------------------------
/job/6a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2010 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/6b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2014 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/6c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2014 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/6d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/6e.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/6f.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/7a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS of_person, MIN(t.title) AS biography_movie FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name LIKE '%a%' AND it.info ='mini biography' AND lt.link ='features' AND n.name_pcode_cf BETWEEN 'A' AND 'F' AND (n.gender='m' OR (n.gender = 'f' AND n.name LIKE 'B%')) AND pi.note ='Volker Boehm' AND t.production_year BETWEEN 1980 AND 1995 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id;
2 | 


--------------------------------------------------------------------------------
/job/7b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS of_person, MIN(t.title) AS biography_movie FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name LIKE '%a%' AND it.info ='mini biography' AND lt.link ='features' AND n.name_pcode_cf LIKE 'D%' AND n.gender='m' AND pi.note ='Volker Boehm' AND t.production_year BETWEEN 1980 AND 1984 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id;
2 | 


--------------------------------------------------------------------------------
/job/7c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS cast_member_name, MIN(pi.info) AS cast_member_info FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name  is not NULL and (an.name LIKE '%a%' or an.name LIKE 'A%') AND it.info ='mini biography' AND lt.link  in ('references', 'referenced in', 'features', 'featured in') AND n.name_pcode_cf BETWEEN 'A' AND 'F' AND (n.gender='m' OR (n.gender = 'f' AND n.name LIKE 'A%')) AND pi.note  is not NULL AND t.production_year BETWEEN 1980 AND 2010 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id;
2 | 


--------------------------------------------------------------------------------
/job/8a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an1.name) AS actress_pseudonym, MIN(t.title) AS japanese_movie_dubbed FROM aka_name AS an1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE ci.note ='(voice: English version)' AND cn.country_code ='[jp]' AND mc.note like '%(Japan)%' and mc.note not like '%(USA)%' AND n1.name like '%Yo%' and n1.name not like '%Yu%' AND rt.role ='actress' AND an1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an1.person_id = ci.person_id AND ci.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/8b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS acress_pseudonym, MIN(t.title) AS japanese_anime_movie FROM aka_name AS an, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note ='(voice: English version)' AND cn.country_code ='[jp]' AND mc.note like '%(Japan)%' and mc.note not like '%(USA)%' and (mc.note like '%(2006)%' or mc.note like '%(2007)%') AND n.name like '%Yo%' and n.name not like '%Yu%' AND rt.role ='actress' AND t.production_year between 2006 and 2007 and (t.title like 'One Piece%' or t.title like 'Dragon Ball Z%') AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/8c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(a1.name) AS writer_pseudo_name, MIN(t.title) AS movie_title FROM aka_name AS a1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE cn.country_code ='[us]' AND rt.role ='writer' AND a1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND a1.person_id = ci.person_id AND ci.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/8d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an1.name) AS costume_designer_pseudo, MIN(t.title) AS movie_with_costumes FROM aka_name AS an1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE cn.country_code ='[us]' AND rt.role ='costume designer' AND an1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an1.person_id = ci.person_id AND ci.movie_id = mc.movie_id;
2 | 


--------------------------------------------------------------------------------
/job/9a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS character_name, MIN(t.title) AS movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND mc.note  is not NULL and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND n.gender ='f' and n.name like '%Ang%' AND rt.role ='actress' AND t.production_year  between 2005 and 2015 AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/9b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_character, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note  = '(voice)' AND cn.country_code ='[us]' AND mc.note  like '%(200%)%' and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND n.gender ='f' and n.name like '%Angel%' AND rt.role ='actress' AND t.production_year  between 2007 and 2010 AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/9c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_character_name, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/9d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note  in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND n.gender ='f' AND rt.role ='actress' AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id;
2 | 


--------------------------------------------------------------------------------
/job/README:
--------------------------------------------------------------------------------
 1 | This package contains the Join Order Benchmark (JOB) queries from:
 2 | 
 3 | "How Good Are Query Optimizers, Really?"
 4 | by Viktor Leis, Andrey Gubichev, Atans Mirchev, Peter Boncz, Alfons Kemper, Thomas Neumann
 5 | PVLDB Volume 9, No. 3, 2015
 6 | 
 7 | IMDB Data Set
 8 | -------------
 9 | 
10 | The CSV files used in the paper, which are from May 2013, can be found
11 | at http://homepages.cwi.nl/~boncz/job/imdb.tgz
12 | 
13 | The license and links to the current version IMDB data set can be
14 | found at http://www.imdb.com/interfaces
15 | Step-by-step instructions:
16 | 1. download *gz files (unpacking not necessary)
17 |   wget ftp://ftp.fu-berlin.de/pub/misc/movies/database/*gz
18 | 2. download and unpack imdbpy and the imdbpy2sql.py script
19 |   wget https://bitbucket.org/alberanid/imdbpy/get/5.0.zip
20 | 3. create PostgreSQL database (e.g., name imdbload):
21 |   createdb imdbload
22 | 4. transform *gz files to relational schema (takes a while)
23 |   imdbpy2sql.py -d PATH_TO_GZ_FILES -u postgres://username:password@hostname/imdbload
24 | 
25 | Now you should have a PostgreSQL database named "imdbload" with the
26 | imdb data. Note that this database has some secondary indexes (but not
27 | on all foreign key attributes). You can export all tables to CSV:
28 | 
29 | \copy aka_name to 'PATH/aka_name.csv' csv
30 | \copy aka_title to 'PATH/aka_title.csv' csv
31 | \copy cast_info to 'PATH/cast_info.csv' csv
32 | \copy char_name to 'PATH/char_name.csv' csv
33 | \copy comp_cast_type to 'PATH/comp_cast_type.csv' csv
34 | \copy company_name to 'PATH/company_name.csv' csv
35 | \copy company_type to 'PATH/company_type.csv' csv
36 | \copy complete_cast to 'PATH/complete_cast.csv' csv
37 | \copy info_type to 'PATH/info_type.csv' csv
38 | \copy keyword to 'PATH/keyword.csv' csv
39 | \copy kind_type to 'PATH/kind_type.csv' csv
40 | \copy link_type to 'PATH/link_type.csv' csv
41 | \copy movie_companies to 'PATH/movie_companies.csv' csv
42 | \copy movie_info to 'PATH/movie_info.csv' csv
43 | \copy movie_info_idx to 'PATH/movie_info_idx.csv' csv
44 | \copy movie_keyword to 'PATH/movie_keyword.csv' csv
45 | \copy movie_link to 'PATH/movie_link.csv' csv
46 | \copy name to 'PATH/name.csv' csv
47 | \copy person_info to 'PATH/person_info.csv' csv
48 | \copy role_type to 'PATH/role_type.csv' csv
49 | \copy title to 'PATH/title.csv' csv
50 | 
51 | To import the CSV files to another database, create all tables (see
52 | schema.sql and optionally fkindexes.sql) and run the same copy as
53 | above statements but replace the keyword "to" by "from".
54 | 
55 | Questions
56 | ---------
57 | 
58 | Contact Viktor Leis (leis@in.tum.de) if you have any questions.
59 | 


--------------------------------------------------------------------------------
/job/fkindexes.sql:
--------------------------------------------------------------------------------
 1 | create index company_id_movie_companies on movie_companies(company_id);
 2 | create index company_type_id_movie_companies on movie_companies(company_type_id);
 3 | create index info_type_id_movie_info_idx on movie_info_idx(info_type_id);
 4 | create index info_type_id_movie_info on movie_info(info_type_id);
 5 | create index info_type_id_person_info on person_info(info_type_id);
 6 | create index keyword_id_movie_keyword on movie_keyword(keyword_id);
 7 | create index kind_id_aka_title on aka_title(kind_id);
 8 | create index kind_id_title on title(kind_id);
 9 | create index linked_movie_id_movie_link on movie_link(linked_movie_id);
10 | create index link_type_id_movie_link on movie_link(link_type_id);
11 | create index movie_id_aka_title on aka_title(movie_id);
12 | create index movie_id_cast_info on cast_info(movie_id);
13 | create index movie_id_complete_cast on complete_cast(movie_id);
14 | create index movie_id_movie_companies on movie_companies(movie_id);
15 | create index movie_id_movie_info_idx on movie_info_idx(movie_id);
16 | create index movie_id_movie_keyword on movie_keyword(movie_id);
17 | create index movie_id_movie_link on movie_link(movie_id);
18 | create index movie_id_movie_info on movie_info(movie_id);
19 | create index person_id_aka_name on aka_name(person_id);
20 | create index person_id_cast_info on cast_info(person_id);
21 | create index person_id_person_info on person_info(person_id);
22 | create index person_role_id_cast_info on cast_info(person_role_id);
23 | create index role_id_cast_info on cast_info(role_id);
24 | 


--------------------------------------------------------------------------------
/job/schema.sql:
--------------------------------------------------------------------------------
  1 | CREATE TABLE aka_name (
  2 |     id integer NOT NULL PRIMARY KEY,
  3 |     person_id integer NOT NULL,
  4 |     name text NOT NULL,
  5 |     imdb_index character varying(12),
  6 |     name_pcode_cf character varying(5),
  7 |     name_pcode_nf character varying(5),
  8 |     surname_pcode character varying(5),
  9 |     md5sum character varying(32)
 10 | );
 11 | 
 12 | CREATE TABLE aka_title (
 13 |     id integer NOT NULL PRIMARY KEY,
 14 |     movie_id integer NOT NULL,
 15 |     title text NOT NULL,
 16 |     imdb_index character varying(12),
 17 |     kind_id integer NOT NULL,
 18 |     production_year integer,
 19 |     phonetic_code character varying(5),
 20 |     episode_of_id integer,
 21 |     season_nr integer,
 22 |     episode_nr integer,
 23 |     note text,
 24 |     md5sum character varying(32)
 25 | );
 26 | 
 27 | CREATE TABLE cast_info (
 28 |     id integer NOT NULL PRIMARY KEY,
 29 |     person_id integer NOT NULL,
 30 |     movie_id integer NOT NULL,
 31 |     person_role_id integer,
 32 |     note text,
 33 |     nr_order integer,
 34 |     role_id integer NOT NULL
 35 | );
 36 | 
 37 | CREATE TABLE char_name (
 38 |     id integer NOT NULL PRIMARY KEY,
 39 |     name text NOT NULL,
 40 |     imdb_index character varying(12),
 41 |     imdb_id integer,
 42 |     name_pcode_nf character varying(5),
 43 |     surname_pcode character varying(5),
 44 |     md5sum character varying(32)
 45 | );
 46 | 
 47 | CREATE TABLE comp_cast_type (
 48 |     id integer NOT NULL PRIMARY KEY,
 49 |     kind character varying(32) NOT NULL
 50 | );
 51 | 
 52 | CREATE TABLE company_name (
 53 |     id integer NOT NULL PRIMARY KEY,
 54 |     name text NOT NULL,
 55 |     country_code character varying(255),
 56 |     imdb_id integer,
 57 |     name_pcode_nf character varying(5),
 58 |     name_pcode_sf character varying(5),
 59 |     md5sum character varying(32)
 60 | );
 61 | 
 62 | CREATE TABLE company_type (
 63 |     id integer NOT NULL PRIMARY KEY,
 64 |     kind character varying(32) NOT NULL
 65 | );
 66 | 
 67 | CREATE TABLE complete_cast (
 68 |     id integer NOT NULL PRIMARY KEY,
 69 |     movie_id integer,
 70 |     subject_id integer NOT NULL,
 71 |     status_id integer NOT NULL
 72 | );
 73 | 
 74 | CREATE TABLE info_type (
 75 |     id integer NOT NULL PRIMARY KEY,
 76 |     info character varying(32) NOT NULL
 77 | );
 78 | 
 79 | CREATE TABLE keyword (
 80 |     id integer NOT NULL PRIMARY KEY,
 81 |     keyword text NOT NULL,
 82 |     phonetic_code character varying(5)
 83 | );
 84 | 
 85 | CREATE TABLE kind_type (
 86 |     id integer NOT NULL PRIMARY KEY,
 87 |     kind character varying(15) NOT NULL
 88 | );
 89 | 
 90 | CREATE TABLE link_type (
 91 |     id integer NOT NULL PRIMARY KEY,
 92 |     link character varying(32) NOT NULL
 93 | );
 94 | 
 95 | CREATE TABLE movie_companies (
 96 |     id integer NOT NULL PRIMARY KEY,
 97 |     movie_id integer NOT NULL,
 98 |     company_id integer NOT NULL,
 99 |     company_type_id integer NOT NULL,
100 |     note text
101 | );
102 | 
103 | CREATE TABLE movie_info (
104 |     id integer NOT NULL PRIMARY KEY,
105 |     movie_id integer NOT NULL,
106 |     info_type_id integer NOT NULL,
107 |     info text NOT NULL,
108 |     note text
109 | );
110 | 
111 | CREATE TABLE movie_info_idx (
112 |     id integer NOT NULL PRIMARY KEY,
113 |     movie_id integer NOT NULL,
114 |     info_type_id integer NOT NULL,
115 |     info text NOT NULL,
116 |     note text
117 | );
118 | 
119 | CREATE TABLE movie_keyword (
120 |     id integer NOT NULL PRIMARY KEY,
121 |     movie_id integer NOT NULL,
122 |     keyword_id integer NOT NULL
123 | );
124 | 
125 | CREATE TABLE movie_link (
126 |     id integer NOT NULL PRIMARY KEY,
127 |     movie_id integer NOT NULL,
128 |     linked_movie_id integer NOT NULL,
129 |     link_type_id integer NOT NULL
130 | );
131 | 
132 | CREATE TABLE name (
133 |     id integer NOT NULL PRIMARY KEY,
134 |     name text NOT NULL,
135 |     imdb_index character varying(12),
136 |     imdb_id integer,
137 |     gender character varying(1),
138 |     name_pcode_cf character varying(5),
139 |     name_pcode_nf character varying(5),
140 |     surname_pcode character varying(5),
141 |     md5sum character varying(32)
142 | );
143 | 
144 | CREATE TABLE person_info (
145 |     id integer NOT NULL PRIMARY KEY,
146 |     person_id integer NOT NULL,
147 |     info_type_id integer NOT NULL,
148 |     info text NOT NULL,
149 |     note text
150 | );
151 | 
152 | CREATE TABLE role_type (
153 |     id integer NOT NULL PRIMARY KEY,
154 |     role character varying(32) NOT NULL
155 | );
156 | 
157 | CREATE TABLE title (
158 |     id integer NOT NULL PRIMARY KEY,
159 |     title text NOT NULL,
160 |     imdb_index character varying(12),
161 |     kind_id integer NOT NULL,
162 |     production_year integer,
163 |     imdb_id integer,
164 |     phonetic_code character varying(5),
165 |     episode_of_id integer,
166 |     season_nr integer,
167 |     episode_nr integer,
168 |     series_years character varying(49),
169 |     md5sum character varying(32)
170 | );
171 | 


--------------------------------------------------------------------------------
/src/csv_parser.cpp:
--------------------------------------------------------------------------------
  1 | #include <csv_parser.h>
  2 | 
  3 | CSVParser::Error CSVParser::execute(const char* buffer, size_t len) {
  4 |     size_t i = 0;
  5 |     if (this->escaping_) {
  6 |         if (this->escape_ == '"') {
  7 |             if (buffer[0] == '"') {
  8 |                 ++i;
  9 |                 this->current_field_.push_back('"');
 10 |             } else {
 11 |                 this->quoted_ = false;
 12 |             }
 13 |         } else {
 14 |             char c = buffer[0];
 15 |             if (c == '"' or c == this->escape_) {
 16 |                 this->current_field_.push_back(c);
 17 |                 ++i;
 18 |             } else {
 19 |                 this->current_field_.push_back(this->escape_);
 20 |             }
 21 |         }
 22 |         this->escaping_ = false;
 23 |     }
 24 |     if (this->newlining_) {
 25 |         if (len > 0 and buffer[0] == '\n') {
 26 |             ++i;
 27 |         }
 28 |         if (this->has_trailing_comma_) {
 29 |             if (not this->after_field_sep_) {
 30 |                 return NoTrailingComma;
 31 |             }
 32 |             if (not this->after_first_row_) {
 33 |                 this->after_first_row_ = true;
 34 |                 this->num_cols_        = this->col_idx_;
 35 |             } else [[likely]] {
 36 |                 if (this->col_idx_ != this->num_cols_) {
 37 |                     return InconsistentColumns;
 38 |                 }
 39 |             }
 40 |         } else {
 41 |             if (not this->after_first_row_) {
 42 |                 this->after_first_row_ = true;
 43 |                 this->num_cols_        = this->col_idx_ + 1;
 44 |             } else [[likely]] {
 45 |                 if (this->col_idx_ + 1 != this->num_cols_) {
 46 |                     return InconsistentColumns;
 47 |                 }
 48 |             }
 49 |             this->on_field(this->col_idx_,
 50 |                 this->row_idx_,
 51 |                 this->current_field_.data(),
 52 |                 this->current_field_.size());
 53 |             this->current_field_.clear();
 54 |         }
 55 |         this->col_idx_ = 0;
 56 |         ++this->row_idx_;
 57 |         this->after_record_sep_ = true;
 58 |         this->newlining_ = false;
 59 |     }
 60 |     for (; i < len; ++i) {
 61 |         bool set_after_record_sep = false;
 62 |         bool set_after_field_sep  = false;
 63 |         char c                    = buffer[i];
 64 |         if (c != this->comma_ and c != '\n' and c != '\r' and c != '"' and c != this->escape_)
 65 |             [[likely]] {
 66 |             this->current_field_.push_back(c);
 67 |         } else if (c == this->comma_) {
 68 |             if (not this->quoted_) [[likely]] {
 69 |                 this->on_field(this->col_idx_,
 70 |                     this->row_idx_,
 71 |                     this->current_field_.data(),
 72 |                     this->current_field_.size());
 73 |                 this->current_field_.clear();
 74 |                 ++this->col_idx_;
 75 |                 set_after_field_sep = true;
 76 |             } else {
 77 |                 this->current_field_.push_back(c);
 78 |             }
 79 |         } else if (c == '\n' or c == '\r') {
 80 |             if (not this->quoted_) [[likely]] {
 81 |                 if (c == '\r') {
 82 |                     if (i + 1 == len) {
 83 |                         this->newlining_ = true;
 84 |                         return Ok;
 85 |                     }
 86 |                     if (buffer[i + 1] == '\n') {
 87 |                         ++i;
 88 |                     }
 89 |                 }
 90 |                 if (this->has_trailing_comma_) {
 91 |                     if (not this->after_field_sep_) {
 92 |                         return NoTrailingComma;
 93 |                     }
 94 |                     if (not this->after_first_row_) {
 95 |                         this->after_first_row_ = true;
 96 |                         this->num_cols_        = this->col_idx_;
 97 |                     } else [[likely]] {
 98 |                         if (this->col_idx_ != this->num_cols_) {
 99 |                             return InconsistentColumns;
100 |                         }
101 |                     }
102 |                 } else {
103 |                     if (not this->after_first_row_) {
104 |                         this->after_first_row_ = true;
105 |                         this->num_cols_        = this->col_idx_ + 1;
106 |                     } else [[likely]] {
107 |                         if (this->col_idx_ + 1 != this->num_cols_) {
108 |                             return InconsistentColumns;
109 |                         }
110 |                     }
111 |                     this->on_field(this->col_idx_,
112 |                         this->row_idx_,
113 |                         this->current_field_.data(),
114 |                         this->current_field_.size());
115 |                     this->current_field_.clear();
116 |                 }
117 |                 this->col_idx_ = 0;
118 |                 ++this->row_idx_;
119 |                 set_after_record_sep = true;
120 |             } else {
121 |                 this->current_field_.push_back(c);
122 |             }
123 |         } else if (c == '"') {
124 |             if (this->escape_ == '"') {
125 |                 if (not this->quoted_) {
126 |                     this->quoted_ = true;
127 |                 } else {
128 |                     if (i + 1 == len) {
129 |                         this->escaping_ = true;
130 |                         return Ok;
131 |                     }
132 |                     if (buffer[i + 1] == '"') {
133 |                         ++i;
134 |                         this->current_field_.push_back(c);
135 |                     } else {
136 |                         this->quoted_ = false;
137 |                     }
138 |                 }
139 |             } else {
140 |                 this->quoted_ = not this->quoted_;
141 |             }
142 |         } else {
143 |             if (this->quoted_) [[likely]] {
144 |                 if (i + 1 == len) {
145 |                     this->escaping_ = true;
146 |                     return Ok;
147 |                 }
148 |                 char c = buffer[i + 1];
149 |                 if (c == '"' or c == this->escape_) {
150 |                     this->current_field_.push_back(c);
151 |                     ++i;
152 |                 } else {
153 |                     this->current_field_.push_back(this->escape_);
154 |                 }
155 |             } else {
156 |                 this->current_field_.push_back(c);
157 |             }
158 |         }
159 |         this->after_field_sep_  = set_after_field_sep;
160 |         this->after_record_sep_ = set_after_record_sep;
161 |     }
162 |     return Ok;
163 | }
164 | 
165 | CSVParser::Error CSVParser::finish() {
166 |     if (this->quoted_) {
167 |         return QuoteNotClosed;
168 |     } else if (this->newlining_) {
169 |         return this->execute("", 0);
170 |     } else if (not this->after_record_sep_) {
171 |         return this->execute("\n", 1);
172 |     } else {
173 |         return Ok;
174 |     }
175 | }
176 | 


--------------------------------------------------------------------------------
/src/execute.cpp:
--------------------------------------------------------------------------------
  1 | #include <hardware.h>
  2 | #include <plan.h>
  3 | #include <table.h>
  4 | 
  5 | namespace Contest {
  6 | 
  7 | using ExecuteResult = std::vector<std::vector<Data>>;
  8 | 
  9 | ExecuteResult execute_impl(const Plan& plan, size_t node_idx);
 10 | 
 11 | struct JoinAlgorithm {
 12 |     bool                                             build_left;
 13 |     ExecuteResult&                                   left;
 14 |     ExecuteResult&                                   right;
 15 |     ExecuteResult&                                   results;
 16 |     size_t                                           left_col, right_col;
 17 |     const std::vector<std::tuple<size_t, DataType>>& output_attrs;
 18 | 
 19 |     template <class T>
 20 |     auto run() {
 21 |         namespace views = ranges::views;
 22 |         std::unordered_map<T, std::vector<size_t>> hash_table;
 23 |         if (build_left) {
 24 |             for (auto&& [idx, record]: left | views::enumerate) {
 25 |                 std::visit(
 26 |                     [&hash_table, idx = idx](const auto& key) {
 27 |                         using Tk = std::decay_t<decltype(key)>;
 28 |                         if constexpr (std::is_same_v<Tk, T>) {
 29 |                             if (auto itr = hash_table.find(key); itr == hash_table.end()) {
 30 |                                 hash_table.emplace(key, std::vector<size_t>(1, idx));
 31 |                             } else {
 32 |                                 itr->second.push_back(idx);
 33 |                             }
 34 |                         } else if constexpr (not std::is_same_v<Tk, std::monostate>) {
 35 |                             throw std::runtime_error("wrong type of field");
 36 |                         }
 37 |                     },
 38 |                     record[left_col]);
 39 |             }
 40 |             for (auto& right_record: right) {
 41 |                 std::visit(
 42 |                     [&](const auto& key) {
 43 |                         using Tk = std::decay_t<decltype(key)>;
 44 |                         if constexpr (std::is_same_v<Tk, T>) {
 45 |                             if (auto itr = hash_table.find(key); itr != hash_table.end()) {
 46 |                                 for (auto left_idx: itr->second) {
 47 |                                     auto&             left_record = left[left_idx];
 48 |                                     std::vector<Data> new_record;
 49 |                                     new_record.reserve(output_attrs.size());
 50 |                                     for (auto [col_idx, _]: output_attrs) {
 51 |                                         if (col_idx < left_record.size()) {
 52 |                                             new_record.emplace_back(left_record[col_idx]);
 53 |                                         } else {
 54 |                                             new_record.emplace_back(
 55 |                                                 right_record[col_idx - left_record.size()]);
 56 |                                         }
 57 |                                     }
 58 |                                     results.emplace_back(std::move(new_record));
 59 |                                 }
 60 |                             }
 61 |                         } else if constexpr (not std::is_same_v<Tk, std::monostate>) {
 62 |                             throw std::runtime_error("wrong type of field");
 63 |                         }
 64 |                     },
 65 |                     right_record[right_col]);
 66 |             }
 67 |         } else {
 68 |             for (auto&& [idx, record]: right | views::enumerate) {
 69 |                 std::visit(
 70 |                     [&hash_table, idx = idx](const auto& key) {
 71 |                         using Tk = std::decay_t<decltype(key)>;
 72 |                         if constexpr (std::is_same_v<Tk, T>) {
 73 |                             if (auto itr = hash_table.find(key); itr == hash_table.end()) {
 74 |                                 hash_table.emplace(key, std::vector<size_t>(1, idx));
 75 |                             } else {
 76 |                                 itr->second.push_back(idx);
 77 |                             }
 78 |                         } else if constexpr (not std::is_same_v<Tk, std::monostate>) {
 79 |                             throw std::runtime_error("wrong type of field");
 80 |                         }
 81 |                     },
 82 |                     record[right_col]);
 83 |             }
 84 |             for (auto& left_record: left) {
 85 |                 std::visit(
 86 |                     [&](const auto& key) {
 87 |                         using Tk = std::decay_t<decltype(key)>;
 88 |                         if constexpr (std::is_same_v<Tk, T>) {
 89 |                             if (auto itr = hash_table.find(key); itr != hash_table.end()) {
 90 |                                 for (auto right_idx: itr->second) {
 91 |                                     auto&             right_record = right[right_idx];
 92 |                                     std::vector<Data> new_record;
 93 |                                     new_record.reserve(output_attrs.size());
 94 |                                     for (auto [col_idx, _]: output_attrs) {
 95 |                                         if (col_idx < left_record.size()) {
 96 |                                             new_record.emplace_back(left_record[col_idx]);
 97 |                                         } else {
 98 |                                             new_record.emplace_back(
 99 |                                                 right_record[col_idx - left_record.size()]);
100 |                                         }
101 |                                     }
102 |                                     results.emplace_back(std::move(new_record));
103 |                                 }
104 |                             }
105 |                         } else if constexpr (not std::is_same_v<Tk, std::monostate>) {
106 |                             throw std::runtime_error("wrong type of field");
107 |                         }
108 |                     },
109 |                     left_record[left_col]);
110 |             }
111 |         }
112 |     }
113 | };
114 | 
115 | ExecuteResult execute_hash_join(const Plan&          plan,
116 |     const JoinNode&                                  join,
117 |     const std::vector<std::tuple<size_t, DataType>>& output_attrs) {
118 |     auto                           left_idx    = join.left;
119 |     auto                           right_idx   = join.right;
120 |     auto&                          left_node   = plan.nodes[left_idx];
121 |     auto&                          right_node  = plan.nodes[right_idx];
122 |     auto&                          left_types  = left_node.output_attrs;
123 |     auto&                          right_types = right_node.output_attrs;
124 |     auto                           left        = execute_impl(plan, left_idx);
125 |     auto                           right       = execute_impl(plan, right_idx);
126 |     std::vector<std::vector<Data>> results;
127 | 
128 |     JoinAlgorithm join_algorithm{.build_left = join.build_left,
129 |         .left                                = left,
130 |         .right                               = right,
131 |         .results                             = results,
132 |         .left_col                            = join.left_attr,
133 |         .right_col                           = join.right_attr,
134 |         .output_attrs                        = output_attrs};
135 |     if (join.build_left) {
136 |         switch (std::get<1>(left_types[join.left_attr])) {
137 |         case DataType::INT32:   join_algorithm.run<int32_t>(); break;
138 |         case DataType::INT64:   join_algorithm.run<int64_t>(); break;
139 |         case DataType::FP64:    join_algorithm.run<double>(); break;
140 |         case DataType::VARCHAR: join_algorithm.run<std::string>(); break;
141 |         }
142 |     } else {
143 |         switch (std::get<1>(right_types[join.right_attr])) {
144 |         case DataType::INT32:   join_algorithm.run<int32_t>(); break;
145 |         case DataType::INT64:   join_algorithm.run<int64_t>(); break;
146 |         case DataType::FP64:    join_algorithm.run<double>(); break;
147 |         case DataType::VARCHAR: join_algorithm.run<std::string>(); break;
148 |         }
149 |     }
150 | 
151 |     return results;
152 | }
153 | 
154 | ExecuteResult execute_scan(const Plan&               plan,
155 |     const ScanNode&                                  scan,
156 |     const std::vector<std::tuple<size_t, DataType>>& output_attrs) {
157 |     auto                           table_id = scan.base_table_id;
158 |     auto&                          input    = plan.inputs[table_id];
159 |     auto                           table    = Table::from_columnar(input);
160 |     std::vector<std::vector<Data>> results;
161 |     for (auto& record: table.table()) {
162 |         std::vector<Data> new_record;
163 |         new_record.reserve(output_attrs.size());
164 |         for (auto [col_idx, _]: output_attrs) {
165 |             new_record.emplace_back(record[col_idx]);
166 |         }
167 |         results.emplace_back(std::move(new_record));
168 |     }
169 |     return results;
170 | }
171 | 
172 | ExecuteResult execute_impl(const Plan& plan, size_t node_idx) {
173 |     auto& node = plan.nodes[node_idx];
174 |     return std::visit(
175 |         [&](const auto& value) {
176 |             using T = std::decay_t<decltype(value)>;
177 |             if constexpr (std::is_same_v<T, JoinNode>) {
178 |                 return execute_hash_join(plan, value, node.output_attrs);
179 |             } else {
180 |                 return execute_scan(plan, value, node.output_attrs);
181 |             }
182 |         },
183 |         node.data);
184 | }
185 | 
186 | ColumnarTable execute(const Plan& plan, [[maybe_unused]] void* context) {
187 |     namespace views = ranges::views;
188 |     auto ret        = execute_impl(plan, plan.root);
189 |     auto ret_types  = plan.nodes[plan.root].output_attrs
190 |                    | views::transform([](const auto& v) { return std::get<1>(v); })
191 |                    | ranges::to<std::vector<DataType>>();
192 |     Table table{std::move(ret), std::move(ret_types)};
193 |     return table.to_columnar();
194 | }
195 | 
196 | void* build_context() {
197 |     return nullptr;
198 | }
199 | 
200 | void destroy_context([[maybe_unused]] void* context) {}
201 | 
202 | } // namespace Contest
203 | 


--------------------------------------------------------------------------------
/src/statement.cpp:
--------------------------------------------------------------------------------
  1 | #include <cassert>
  2 | 
  3 | #include <common.h>
  4 | #include <inner_column.h>
  5 | #include <plan.h>
  6 | #include <statement.h>
  7 | 
  8 | std::vector<uint8_t> bitmap_not(std::vector<uint8_t> bitmap) {
  9 |     auto task = [&bitmap](size_t begin, size_t end) {
 10 |         for (size_t i = begin; i < end; ++i) {
 11 |             bitmap[i] = ~bitmap[i];
 12 |         }
 13 |     };
 14 |     filter_tp.run(task, bitmap.size());
 15 |     return bitmap;
 16 | }
 17 | 
 18 | std::vector<uint8_t> bitmap_and(const std::vector<uint8_t>& lhs,
 19 |     const std::vector<uint8_t>&                             rhs) {
 20 |     std::vector<uint8_t> ret;
 21 |     assert(lhs.size() == rhs.size());
 22 |     ret.resize(lhs.size());
 23 |     auto task = [&lhs, &rhs, &ret](size_t begin, size_t end) {
 24 |         for (size_t i = begin; i < end; ++i) {
 25 |             ret[i] = lhs[i] & rhs[i];
 26 |         }
 27 |     };
 28 |     filter_tp.run(task, lhs.size());
 29 |     return ret;
 30 | }
 31 | 
 32 | std::vector<uint8_t> bitmap_or(const std::vector<uint8_t>& lhs,
 33 |     const std::vector<uint8_t>&                            rhs) {
 34 |     std::vector<uint8_t> ret;
 35 |     assert(lhs.size() == rhs.size());
 36 |     ret.resize(lhs.size());
 37 |     auto task = [&lhs, &rhs, &ret](size_t begin, size_t end) {
 38 |         for (size_t i = begin; i < end; ++i) {
 39 |             ret[i] = lhs[i] | rhs[i];
 40 |         }
 41 |     };
 42 |     filter_tp.run(task, lhs.size());
 43 |     return ret;
 44 | }
 45 | 
 46 | std::vector<uint8_t> Comparison::eval(const std::vector<const InnerColumnBase*>& table) const {
 47 |     auto* c = table[column];
 48 |     switch (c->type) {
 49 |     case DataType::INT32: {
 50 |         auto column = reinterpret_cast<const InnerColumn<int32_t>*>(c);
 51 |         if (op == IS_NULL) {
 52 |             return bitmap_not(column->bitmap);
 53 |         } else if (op == IS_NOT_NULL) {
 54 |             return column->bitmap;
 55 |         } else {
 56 |             auto comp_value = static_cast<int32_t>(std::get<int64_t>(value));
 57 |             switch (op) {
 58 |             case EQ:  return column->equal(comp_value);
 59 |             case NEQ: return column->not_equal(comp_value);
 60 |             case LT:  return column->less(comp_value);
 61 |             case GT:  return column->greater(comp_value);
 62 |             case LEQ: return column->less_equal(comp_value);
 63 |             case GEQ: return column->greater_equal(comp_value);
 64 |             default:  unreachable();
 65 |             }
 66 |         }
 67 |         break;
 68 |     }
 69 |     case DataType::INT64: {
 70 |         auto column = reinterpret_cast<const InnerColumn<int64_t>*>(c);
 71 |         if (op == IS_NULL) {
 72 |             return bitmap_not(column->bitmap);
 73 |         } else if (op == IS_NOT_NULL) {
 74 |             return column->bitmap;
 75 |         } else {
 76 |             auto comp_value = std::get<int64_t>(value);
 77 |             switch (op) {
 78 |             case EQ:  return column->equal(comp_value);
 79 |             case NEQ: return column->not_equal(comp_value);
 80 |             case LT:  return column->less(comp_value);
 81 |             case GT:  return column->greater(comp_value);
 82 |             case LEQ: return column->less_equal(comp_value);
 83 |             case GEQ: return column->greater_equal(comp_value);
 84 |             default:  unreachable();
 85 |             }
 86 |         }
 87 |         break;
 88 |     }
 89 |     case DataType::FP64: {
 90 |         auto column = reinterpret_cast<const InnerColumn<double>*>(c);
 91 |         if (op == IS_NULL) {
 92 |             return bitmap_not(column->bitmap);
 93 |         } else if (op == IS_NOT_NULL) {
 94 |             return column->bitmap;
 95 |         } else {
 96 |             auto comp_value = std::get<double>(value);
 97 |             switch (op) {
 98 |             case EQ:  return column->equal(comp_value);
 99 |             case NEQ: return column->not_equal(comp_value);
100 |             case LT:  return column->less(comp_value);
101 |             case GT:  return column->greater(comp_value);
102 |             case LEQ: return column->less_equal(comp_value);
103 |             case GEQ: return column->greater_equal(comp_value);
104 |             default:  unreachable();
105 |             }
106 |         }
107 |         break;
108 |     }
109 |     case DataType::VARCHAR: {
110 |         auto column = reinterpret_cast<const InnerColumn<std::string>*>(c);
111 |         if (op == IS_NULL) {
112 |             return bitmap_not(column->bitmap);
113 |         } else if (op == IS_NOT_NULL) {
114 |             return column->bitmap;
115 |         } else {
116 |             auto& comp_value = std::get<std::string>(value);
117 |             switch (op) {
118 |             case EQ:          return column->equal(comp_value);
119 |             case NEQ:         return column->not_equal(comp_value);
120 |             case LT:          return column->less(comp_value);
121 |             case GT:          return column->greater(comp_value);
122 |             case LEQ:         return column->less_equal(comp_value);
123 |             case GEQ:         return column->greater_equal(comp_value);
124 |             case LIKE:        return column->like(comp_value);
125 |             case NOT_LIKE:    return column->not_like(comp_value);
126 |             default:          unreachable();
127 |             }
128 |         }
129 |         break;
130 |     }
131 |     }
132 |     unreachable();
133 | }
134 | 
135 | bool Comparison::eval(const std::vector<Data>& record) const {
136 |     const Data& record_data = record[column];
137 |     const auto& comp_value  = value;
138 | 
139 |     switch (op) {
140 |     case IS_NULL:     return std::holds_alternative<std::monostate>(record_data);
141 |     case IS_NOT_NULL: return !std::holds_alternative<std::monostate>(record_data);
142 |     default:          break;
143 |     }
144 | 
145 |     if (op == LIKE || op == NOT_LIKE) {
146 |         const std::string* record_str = std::get_if<std::string>(&record_data);
147 |         const std::string* comp_str   = std::get_if<std::string>(&comp_value);
148 |         if (!record_str || !comp_str) {
149 |             return false;
150 |         }
151 |         bool match = like_match(*record_str, *comp_str);
152 |         return (op == LIKE) ? match : !match;
153 |     } else {
154 |         auto record_num = get_numeric_value(record_data);
155 |         auto comp_num   = get_numeric_value(comp_value);
156 |         if (record_num.has_value() && comp_num.has_value()) {
157 |             switch (op) {
158 |             case EQ:  return *record_num == *comp_num;
159 |             case NEQ: return *record_num != *comp_num;
160 |             case LT:  return *record_num < *comp_num;
161 |             case GT:  return *record_num > *comp_num;
162 |             case LEQ: return *record_num <= *comp_num;
163 |             case GEQ: return *record_num >= *comp_num;
164 |             default:  return false;
165 |             }
166 |         } else {
167 |             const std::string* record_str = std::get_if<std::string>(&record_data);
168 |             const std::string* comp_str   = std::get_if<std::string>(&comp_value);
169 |             if (record_str && comp_str) {
170 |                 switch (op) {
171 |                 case EQ:  return *record_str == *comp_str;
172 |                 case NEQ: return *record_str != *comp_str;
173 |                 case LT:  return *record_str < *comp_str;
174 |                 case GT:  return *record_str > *comp_str;
175 |                 case LEQ: return *record_str <= *comp_str;
176 |                 case GEQ: return *record_str >= *comp_str;
177 |                 default:  return false;
178 |                 }
179 |             } else {
180 |                 return false;
181 |             }
182 |         }
183 |     }
184 | }
185 | 
186 | std::vector<uint8_t> LogicalOperation::eval(
187 |     const std::vector<const InnerColumnBase*>& table) const {
188 |     switch (op_type) {
189 |     case AND: {
190 |         return bitmap_and(children[0]->eval(table), children[1]->eval(table));
191 |     }
192 |     case OR: {
193 |         return bitmap_or(children[0]->eval(table), children[1]->eval(table));
194 |     }
195 |     case NOT: {
196 |         return bitmap_not(children[0]->eval(table));
197 |     }
198 |     }
199 |     unreachable();
200 | }
201 | 
202 | bool LogicalOperation::eval(const std::vector<Data>& record) const {
203 |     switch (op_type) {
204 |     case AND: {
205 |         for (const auto& child: children) {
206 |             if (!child->eval(record)) {
207 |                 return false;
208 |             }
209 |         }
210 |         return true;
211 |     }
212 |     case OR: {
213 |         for (const auto& child: children) {
214 |             if (child->eval(record)) {
215 |                 return true;
216 |             }
217 |         }
218 |         return false;
219 |     }
220 |     case NOT: {
221 |         if (children.size() != 1) {
222 |             return false;
223 |         }
224 |         return !children[0]->eval(record);
225 |     }
226 |     default: return false;
227 |     }
228 | }
229 | 


--------------------------------------------------------------------------------
/tests/build_database.cpp:
--------------------------------------------------------------------------------
 1 | #include <common.h>
 2 | 
 3 | #include <duckdb.hpp>
 4 | #include <fmt/core.h>
 5 | 
 6 | int main(int argc, char* argv[]) {
 7 |     using namespace duckdb;
 8 |     namespace fs = std::filesystem;
 9 | 
10 |     if (argc < 2) {
11 |         fmt::println(stderr, "Usage: {} <DuckDB database file>", argv[0]);
12 |         exit(EXIT_FAILURE);
13 |     }
14 | 
15 |     auto schema = read_file(fs::path("job") / "schema.sql");
16 | 
17 |     DuckDB     db(argv[1]);
18 |     Connection conn(db);
19 |     auto       result = conn.Query(schema);
20 |     if (result->HasError()) {
21 |         fmt::println("Error: {}", result->GetError());
22 |     }
23 | 
24 |     std::vector<std::string> table_names{
25 |         "char_name",
26 |         "kind_type",
27 |         "cast_info",
28 |         "movie_companies",
29 |         "role_type",
30 |         "complete_cast",
31 |         "comp_cast_type",
32 |         "company_name",
33 |         "company_type",
34 |         "movie_link",
35 |         "movie_keyword",
36 |         "name",
37 |         "info_type",
38 |         "movie_info_idx",
39 |         "person_info",
40 |         "link_type",
41 |         "title",
42 |         "aka_name",
43 |         "movie_info",
44 |         "keyword",
45 |         "aka_title",
46 |     };
47 | 
48 |     for (auto& table: table_names) {
49 |         result =
50 |             conn.Query(fmt::format("COPY {0} FROM 'imdb/{0}.csv' (ESCAPE '\\');", table));
51 |         if (result->HasError()) {
52 |             fmt::println("Error: {}", result->GetError());
53 |         } else {
54 |             fmt::println("Successfully loaded table {} into {}", table, argv[1]);
55 |         }
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/tests/unit_tests.cpp:
--------------------------------------------------------------------------------
  1 | #include <catch2/catch_test_macros.hpp>
  2 | 
  3 | #include <table.h>
  4 | #include <plan.h>
  5 | 
  6 | void sort(std::vector<std::vector<Data>>& table) {
  7 |     std::sort(table.begin(), table.end());
  8 | }
  9 | 
 10 | TEST_CASE("Empty join", "[join]") {
 11 |     Plan plan;
 12 |     plan.new_scan_node(0, {{0, DataType::INT32}});
 13 |     plan.new_scan_node(1, {{0, DataType::INT32}});
 14 |     plan.new_join_node(true, 0, 1, 0, 0, {{0, DataType::INT32}, {1, DataType::INT32}});
 15 |     ColumnarTable table1, table2;
 16 |     table1.columns.emplace_back(DataType::INT32);
 17 |     table2.columns.emplace_back(DataType::INT32);
 18 |     plan.inputs.emplace_back(std::move(table1));
 19 |     plan.inputs.emplace_back(std::move(table2));
 20 |     plan.root = 2;
 21 |     auto* context = Contest::build_context();
 22 |     auto result = Contest::execute(plan, context);
 23 |     Contest::destroy_context(context);
 24 |     REQUIRE(result.num_rows == 0);
 25 |     REQUIRE(result.columns.size() == 2);
 26 |     REQUIRE(result.columns[0].type == DataType::INT32);
 27 |     REQUIRE(result.columns[1].type == DataType::INT32);
 28 | }
 29 | 
 30 | TEST_CASE("One line join", "[join]") {
 31 |     Plan plan;
 32 |     plan.new_scan_node(0, {{0, DataType::INT32}});
 33 |     plan.new_scan_node(1, {{0, DataType::INT32}});
 34 |     plan.new_join_node(true, 0, 1, 0, 0, {{0, DataType::INT32}, {1, DataType::INT32}});
 35 |     std::vector<std::vector<Data>> data{
 36 |         {1, },
 37 |     };
 38 |     std::vector<DataType> types{DataType::INT32};
 39 |     Table table(std::move(data), std::move(types));
 40 |     ColumnarTable table1 = table.to_columnar();
 41 |     ColumnarTable table2 = table.to_columnar();
 42 |     plan.inputs.emplace_back(std::move(table1));
 43 |     plan.inputs.emplace_back(std::move(table2));
 44 |     plan.root = 2;
 45 |     auto* context = Contest::build_context();
 46 |     auto result = Contest::execute(plan, context);
 47 |     Contest::destroy_context(context);
 48 |     REQUIRE(result.num_rows == 1);
 49 |     REQUIRE(result.columns.size() == 2);
 50 |     REQUIRE(result.columns[0].type == DataType::INT32);
 51 |     REQUIRE(result.columns[1].type == DataType::INT32);
 52 |     auto result_table = Table::from_columnar(result);
 53 |     std::vector<std::vector<Data>> ground_truth{
 54 |         {1, 1,},
 55 |     };
 56 |     REQUIRE(result_table.table() == ground_truth);
 57 | }
 58 | 
 59 | TEST_CASE("Simple join", "[join]") {
 60 |     Plan plan;
 61 |     plan.new_scan_node(0, {{0, DataType::INT32}});
 62 |     plan.new_scan_node(1, {{0, DataType::INT32}});
 63 |     plan.new_join_node(true, 0, 1, 0, 0, {{0, DataType::INT32}, {1, DataType::INT32}});
 64 |     std::vector<std::vector<Data>> data{
 65 |         {1,},
 66 |         {2,},
 67 |         {3,},
 68 |     };
 69 |     std::vector<DataType> types{DataType::INT32};
 70 |     Table table(std::move(data), std::move(types));
 71 |     ColumnarTable table1 = table.to_columnar();
 72 |     ColumnarTable table2 = table.to_columnar();
 73 |     plan.inputs.emplace_back(std::move(table1));
 74 |     plan.inputs.emplace_back(std::move(table2));
 75 |     plan.root = 2;
 76 |     auto* context = Contest::build_context();
 77 |     auto result = Contest::execute(plan, context);
 78 |     Contest::destroy_context(context);
 79 |     REQUIRE(result.num_rows == 3);
 80 |     REQUIRE(result.columns.size() == 2);
 81 |     REQUIRE(result.columns[0].type == DataType::INT32);
 82 |     REQUIRE(result.columns[1].type == DataType::INT32);
 83 |     auto result_table = Table::from_columnar(result);
 84 |     std::vector<std::vector<Data>> ground_truth{
 85 |         {1, 1,},
 86 |         {2, 2,},
 87 |         {3, 3,},
 88 |     };
 89 |     sort(result_table.table());
 90 |     REQUIRE(result_table.table() == ground_truth);
 91 | }
 92 | 
 93 | TEST_CASE("Empty Result", "[join]") {
 94 |     Plan plan;
 95 |     plan.new_scan_node(0, {{0, DataType::INT32}});
 96 |     plan.new_scan_node(1, {{0, DataType::INT32}});
 97 |     plan.new_join_node(true, 0, 1, 0, 0, {{0, DataType::INT32}, {1, DataType::INT32}});
 98 |     std::vector<std::vector<Data>> data1{
 99 |         {1,},
100 |         {2,},
101 |         {3,},
102 |     };
103 |     std::vector<std::vector<Data>> data2{
104 |         {4,},
105 |         {5,},
106 |         {6,},
107 |     };
108 |     std::vector<DataType> types{DataType::INT32};
109 |     Table table1(std::move(data1), types);
110 |     Table table2(std::move(data2), std::move(types));
111 |     ColumnarTable input1 = table1.to_columnar();
112 |     ColumnarTable input2 = table2.to_columnar();
113 |     plan.inputs.emplace_back(std::move(input1));
114 |     plan.inputs.emplace_back(std::move(input2));
115 |     plan.root = 2;
116 |     auto* context = Contest::build_context();
117 |     auto result = Contest::execute(plan, context);
118 |     Contest::destroy_context(context);
119 |     REQUIRE(result.num_rows == 0);
120 |     REQUIRE(result.columns.size() == 2);
121 |     REQUIRE(result.columns[0].type == DataType::INT32);
122 |     REQUIRE(result.columns[1].type == DataType::INT32);
123 | }
124 | 
125 | TEST_CASE("Multiple same keys", "[join]") {
126 |     Plan plan;
127 |     plan.new_scan_node(0, {{0, DataType::INT32}});
128 |     plan.new_scan_node(1, {{0, DataType::INT32}});
129 |     plan.new_join_node(true, 0, 1, 0, 0, {{0, DataType::INT32}, {1, DataType::INT32}});
130 |     std::vector<std::vector<Data>> data1{
131 |         {1,},
132 |         {1,},
133 |         {2,},
134 |         {3,},
135 |     };
136 |     std::vector<DataType> types{DataType::INT32};
137 |     Table table1(std::move(data1), std::move(types));
138 |     ColumnarTable input1 = table1.to_columnar();
139 |     ColumnarTable input2 = table1.to_columnar();
140 |     plan.inputs.emplace_back(std::move(input1));
141 |     plan.inputs.emplace_back(std::move(input2));
142 |     plan.root = 2;
143 |     auto* context = Contest::build_context();
144 |     auto result = Contest::execute(plan, context);
145 |     Contest::destroy_context(context);
146 |     REQUIRE(result.num_rows == 6);
147 |     REQUIRE(result.columns.size() == 2);
148 |     REQUIRE(result.columns[0].type == DataType::INT32);
149 |     REQUIRE(result.columns[1].type == DataType::INT32);
150 |     auto result_table = Table::from_columnar(result);
151 |     std::vector<std::vector<Data>> ground_truth{
152 |         {1, 1,},
153 |         {1, 1,},
154 |         {1, 1,},
155 |         {1, 1,},
156 |         {2, 2,},
157 |         {3, 3,},
158 |     };
159 |     sort(result_table.table());
160 |     REQUIRE(result_table.table() == ground_truth);
161 | }
162 | 
163 | TEST_CASE("NULL keys", "[join]") {
164 |     Plan plan;
165 |     plan.new_scan_node(0, {{0, DataType::INT32}});
166 |     plan.new_scan_node(1, {{0, DataType::INT32}});
167 |     plan.new_join_node(true, 0, 1, 0, 0, {{0, DataType::INT32}, {1, DataType::INT32}});
168 |     std::vector<std::vector<Data>> data1{
169 |         {1,               },
170 |         {1,               },
171 |         {std::monostate{},},
172 |         {2,               },
173 |         {3,               },
174 |     };
175 |     std::vector<DataType> types{DataType::INT32};
176 |     Table table1(std::move(data1), std::move(types));
177 |     ColumnarTable input1 = table1.to_columnar();
178 |     ColumnarTable input2 = table1.to_columnar();
179 |     plan.inputs.emplace_back(std::move(input1));
180 |     plan.inputs.emplace_back(std::move(input2));
181 |     plan.root = 2;
182 |     auto* context = Contest::build_context();
183 |     auto result = Contest::execute(plan, context);
184 |     Contest::destroy_context(context);
185 |     REQUIRE(result.num_rows == 6);
186 |     REQUIRE(result.columns.size() == 2);
187 |     REQUIRE(result.columns[0].type == DataType::INT32);
188 |     REQUIRE(result.columns[1].type == DataType::INT32);
189 |     auto result_table = Table::from_columnar(result);
190 |     std::vector<std::vector<Data>> ground_truth{
191 |         {1, 1,},
192 |         {1, 1,},
193 |         {1, 1,},
194 |         {1, 1,},
195 |         {2, 2,},
196 |         {3, 3,},
197 |     };
198 |     sort(result_table.table());
199 |     REQUIRE(result_table.table() == ground_truth);
200 | }
201 | 
202 | TEST_CASE("Multiple columns", "[join]") {
203 |     Plan plan;
204 |     plan.new_scan_node(0, {{0, DataType::INT32}});
205 |     plan.new_scan_node(1, {{1, DataType::VARCHAR}, {0, DataType::INT32}});
206 |     plan.new_join_node(true, 0, 1, 0, 1, {{0, DataType::INT32}, {2, DataType::INT32}, {1, DataType::VARCHAR}});
207 |     using namespace std::string_literals;
208 |     std::vector<std::vector<Data>> data1{
209 |         {1               , "xxx"s,},
210 |         {1               , "yyy"s,},
211 |         {std::monostate{}, "zzz"s,},
212 |         {2               , "uuu"s,},
213 |         {3               , "vvv"s,},
214 |     };
215 |     std::vector<DataType> types{DataType::INT32, DataType::VARCHAR};
216 |     Table table1(std::move(data1), std::move(types));
217 |     ColumnarTable input1 = table1.to_columnar();
218 |     ColumnarTable input2 = table1.to_columnar();
219 |     plan.inputs.emplace_back(std::move(input1));
220 |     plan.inputs.emplace_back(std::move(input2));
221 |     plan.root = 2;
222 |     auto* context = Contest::build_context();
223 |     auto result = Contest::execute(plan, context);
224 |     Contest::destroy_context(context);
225 |     REQUIRE(result.num_rows == 6);
226 |     REQUIRE(result.columns.size() == 3);
227 |     REQUIRE(result.columns[0].type == DataType::INT32);
228 |     REQUIRE(result.columns[1].type == DataType::INT32);
229 |     REQUIRE(result.columns[2].type == DataType::VARCHAR);
230 |     auto result_table = Table::from_columnar(result);
231 |     std::vector<std::vector<Data>> ground_truth{
232 |         {1, 1, "xxx"s},
233 |         {1, 1, "xxx"s},
234 |         {1, 1, "yyy"s},
235 |         {1, 1, "yyy"s},
236 |         {2, 2, "uuu"s},
237 |         {3, 3, "vvv"s},
238 |     };
239 |     sort(result_table.table());
240 |     REQUIRE(result_table.table() == ground_truth);
241 | }
242 | 
243 | TEST_CASE("Build on right", "[join]") {
244 |     Plan plan;
245 |     plan.new_scan_node(0, {{0, DataType::INT32}});
246 |     plan.new_scan_node(1, {{1, DataType::VARCHAR}, {0, DataType::INT32}});
247 |     plan.new_join_node(false, 0, 1, 0, 1, {{0, DataType::INT32}, {2, DataType::INT32}, {1, DataType::VARCHAR}});
248 |     using namespace std::string_literals;
249 |     std::vector<std::vector<Data>> data1{
250 |         {1               , "xxx"s,},
251 |         {1               , "yyy"s,},
252 |         {std::monostate{}, "zzz"s,},
253 |         {2               , "uuu"s,},
254 |         {3               , "vvv"s,},
255 |     };
256 |     std::vector<DataType> types{DataType::INT32, DataType::VARCHAR};
257 |     Table table1(std::move(data1), std::move(types));
258 |     ColumnarTable input1 = table1.to_columnar();
259 |     ColumnarTable input2 = table1.to_columnar();
260 |     plan.inputs.emplace_back(std::move(input1));
261 |     plan.inputs.emplace_back(std::move(input2));
262 |     plan.root = 2;
263 |     auto* context = Contest::build_context();
264 |     auto result = Contest::execute(plan, context);
265 |     Contest::destroy_context(context);
266 |     REQUIRE(result.num_rows == 6);
267 |     REQUIRE(result.columns.size() == 3);
268 |     REQUIRE(result.columns[0].type == DataType::INT32);
269 |     REQUIRE(result.columns[1].type == DataType::INT32);
270 |     REQUIRE(result.columns[2].type == DataType::VARCHAR);
271 |     auto result_table = Table::from_columnar(result);
272 |     std::vector<std::vector<Data>> ground_truth{
273 |         {1, 1, "xxx"s},
274 |         {1, 1, "xxx"s},
275 |         {1, 1, "yyy"s},
276 |         {1, 1, "yyy"s},
277 |         {2, 2, "uuu"s},
278 |         {3, 3, "vvv"s},
279 |     };
280 |     sort(result_table.table());
281 |     REQUIRE(result_table.table() == ground_truth);
282 | }
283 | 


--------------------------------------------------------------------------------