├── .clang-format ├── .clangd ├── .gitignore ├── ANNOUNCEMENTS.md ├── CMakeLists.txt ├── README.md ├── download_imdb.sh ├── include ├── attribute.h ├── common.h ├── csv_parser.h ├── hardware__ca09.h ├── hardware__cp02.h ├── hardware__koroneia.h ├── hardware__sidon.h ├── inner_column.h ├── plan.h ├── statement.h ├── table.h └── table_entity.h ├── job ├── 10a.sql ├── 10b.sql ├── 10c.sql ├── 11a.sql ├── 11b.sql ├── 11c.sql ├── 11d.sql ├── 12a.sql ├── 12b.sql ├── 12c.sql ├── 13a.sql ├── 13b.sql ├── 13c.sql ├── 13d.sql ├── 14a.sql ├── 14b.sql ├── 14c.sql ├── 15a.sql ├── 15b.sql ├── 15c.sql ├── 15d.sql ├── 16a.sql ├── 16b.sql ├── 16c.sql ├── 16d.sql ├── 17a.sql ├── 17b.sql ├── 17c.sql ├── 17d.sql ├── 17e.sql ├── 17f.sql ├── 18a.sql ├── 18b.sql ├── 18c.sql ├── 19a.sql ├── 19b.sql ├── 19c.sql ├── 19d.sql ├── 1a.sql ├── 1b.sql ├── 1c.sql ├── 1d.sql ├── 20a.sql ├── 20b.sql ├── 20c.sql ├── 21a.sql ├── 21b.sql ├── 21c.sql ├── 22a.sql ├── 22b.sql ├── 22c.sql ├── 22d.sql ├── 23a.sql ├── 23b.sql ├── 23c.sql ├── 24a.sql ├── 24b.sql ├── 25a.sql ├── 25b.sql ├── 25c.sql ├── 26a.sql ├── 26b.sql ├── 26c.sql ├── 27a.sql ├── 27b.sql ├── 27c.sql ├── 28a.sql ├── 28b.sql ├── 28c.sql ├── 29a.sql ├── 29b.sql ├── 29c.sql ├── 2a.sql ├── 2b.sql ├── 2c.sql ├── 2d.sql ├── 30a.sql ├── 30b.sql ├── 30c.sql ├── 31a.sql ├── 31b.sql ├── 31c.sql ├── 32a.sql ├── 32b.sql ├── 33a.sql ├── 33b.sql ├── 33c.sql ├── 3a.sql ├── 3b.sql ├── 3c.sql ├── 4a.sql ├── 4b.sql ├── 4c.sql ├── 5a.sql ├── 5b.sql ├── 5c.sql ├── 6a.sql ├── 6b.sql ├── 6c.sql ├── 6d.sql ├── 6e.sql ├── 6f.sql ├── 7a.sql ├── 7b.sql ├── 7c.sql ├── 8a.sql ├── 8b.sql ├── 8c.sql ├── 8d.sql ├── 9a.sql ├── 9b.sql ├── 9c.sql ├── 9d.sql ├── README ├── fkindexes.sql └── schema.sql ├── plans.json ├── src ├── build_table.cpp ├── csv_parser.cpp ├── execute.cpp └── statement.cpp └── tests ├── build_database.cpp ├── read_sql.cpp └── unit_tests.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | IndentWidth: 4 3 | --- 4 | Language: Cpp 5 | Standard: Latest 6 | AccessModifierOffset: -4 7 | AlignAfterOpenBracket: DontAlign 8 | AlignArrayOfStructures: Left 9 | AlignConsecutiveAssignments: 10 | Enabled: true 11 | AcrossEmptyLines: false 12 | AcrossComments: false 13 | AlignCompound: true 14 | PadOperators: true 15 | AlignConsecutiveBitFields: Consecutive 16 | AlignConsecutiveDeclarations: 17 | Enabled: true 18 | AcrossEmptyLines: false 19 | AcrossComments: false 20 | # AlignFunctionDeclarations: true 21 | AlignFunctionPointers: true 22 | AlignConsecutiveMacros: Consecutive 23 | AlignConsecutiveShortCaseStatements: 24 | Enabled: true 25 | AcrossEmptyLines: false 26 | AcrossComments: false 27 | AlignCaseColons: false 28 | AlignEscapedNewlines: LeftWithLastLine 29 | AlignOperands: AlignAfterOperator 30 | AlignTrailingComments: 31 | Kind: Always 32 | OverEmptyLines: 1 33 | AllowAllArgumentsOnNextLine: false 34 | AllowAllParametersOfDeclarationOnNextLine: false 35 | AllowBreakBeforeNoexceptSpecifier: OnlyWithParen 36 | AllowShortBlocksOnASingleLine: Empty 37 | AllowShortCaseLabelsOnASingleLine: true 38 | AllowShortCompoundRequirementOnASingleLine: true 39 | AllowShortEnumsOnASingleLine: false 40 | AllowShortFunctionsOnASingleLine: Inline 41 | AllowShortIfStatementsOnASingleLine: Never 42 | AllowShortLambdasOnASingleLine: All 43 | AllowShortLoopsOnASingleLine: false 44 | AlwaysBreakBeforeMultilineStrings: true 45 | BinPackArguments: false 46 | BinPackParameters: false 47 | BitFieldColonSpacing: After 48 | BraceWrapping: 49 | AfterCaseLabel: false 50 | AfterClass: false 51 | AfterControlStatement: Never 52 | AfterEnum: false 53 | AfterFunction: false 54 | AfterNamespace: false 55 | AfterStruct: false 56 | AfterUnion: false 57 | AfterExternBlock: false 58 | BeforeCatch: false 59 | BeforeElse: false 60 | BeforeLambdaBody: false 61 | BeforeWhile: false 62 | SplitEmptyFunction: false 63 | SplitEmptyRecord: false 64 | SplitEmptyNamespace: true 65 | BracedInitializerIndentWidth: 4 66 | BreakAdjacentStringLiterals: true 67 | BreakAfterAttributes: Leave 68 | BreakAfterReturnType: Automatic 69 | BreakBeforeBinaryOperators: NonAssignment 70 | BreakBeforeBraces: Custom 71 | BreakBeforeConceptDeclarations: Always 72 | BreakBeforeInlineASMColon: Always 73 | BreakBeforeTernaryOperators: true 74 | BreakConstructorInitializers: BeforeComma 75 | BreakFunctionDefinitionParameters: false 76 | BreakInheritanceList: BeforeComma 77 | BreakStringLiterals: true 78 | BreakTemplateDeclarations: Yes 79 | ColumnLimit: 96 80 | CompactNamespaces: false 81 | ConstructorInitializerIndentWidth: 0 82 | ContinuationIndentWidth: 4 83 | Cpp11BracedListStyle: true 84 | DerivePointerAlignment: false 85 | EmptyLineAfterAccessModifier: Never 86 | FixNamespaceComments: true 87 | IncludeBlocks: Preserve 88 | IndentAccessModifiers: false 89 | IndentCaseBlocks: false 90 | IndentCaseLabels: false 91 | IndentExternBlock: false 92 | IndentGotoLabels: false 93 | IndentPPDirectives: BeforeHash 94 | IndentRequiresClause: false 95 | IndentWrappedFunctionNames: false 96 | InsertBraces: true 97 | InsertNewlineAtEOF: true 98 | InsertTrailingCommas: Wrapped 99 | KeepEmptyLines: 100 | AtEndOfFile: false 101 | AtStartOfBlock: false 102 | AtStartOfFile: false 103 | LambdaBodyIndentation: Signature 104 | LineEnding: LF 105 | MaxEmptyLinesToKeep: 2 106 | NamespaceIndentation: None 107 | PPIndentWidth: 4 108 | PackConstructorInitializers: Never 109 | PointerAlignment: Left 110 | QualifierAlignment: Left 111 | ReferenceAlignment: Left 112 | ReflowComments: true 113 | # RemoveEmptyLinesInUnwrappedLines: true 114 | RemoveParentheses: MultipleParentheses 115 | RemoveSemicolon: true 116 | RequiresClausePosition: OwnLine 117 | RequiresExpressionIndentation: OuterScope 118 | SeparateDefinitionBlocks: Always 119 | SortIncludes: CaseInsensitive 120 | SortUsingDeclarations: Lexicographic 121 | SpaceAfterCStyleCast: false 122 | SpaceAfterLogicalNot: false 123 | SpaceAfterTemplateKeyword: true 124 | SpaceBeforeAssignmentOperators: true 125 | SpaceBeforeCaseColon: false 126 | SpaceBeforeCpp11BracedList: false 127 | SpaceBeforeCtorInitializerColon: false 128 | SpaceBeforeInheritanceColon: false 129 | SpaceBeforeParens: Custom 130 | SpaceBeforeParensOptions: 131 | AfterControlStatements: true 132 | AfterForeachMacros: false 133 | AfterFunctionDeclarationName: false 134 | AfterFunctionDefinitionName: false 135 | AfterIfMacros: false 136 | AfterOverloadedOperator: false 137 | AfterPlacementOperator: true 138 | AfterRequiresInClause: true 139 | AfterRequiresInExpression: true 140 | BeforeNonEmptyParentheses: false 141 | SpaceBeforeRangeBasedForLoopColon: false 142 | SpaceBeforeSquareBrackets: false 143 | SpaceInEmptyBlock: false 144 | SpacesBeforeTrailingComments: 1 145 | SpacesInAngles: Never 146 | SpacesInContainerLiterals: false 147 | SpacesInLineCommentPrefix: 148 | Minimum: 1 149 | Maximum: -1 150 | SpacesInParens: Custom 151 | SpacesInParensOptions: 152 | ExceptDoubleParentheses: true 153 | InConditionalStatements: false 154 | InCStyleCasts: false 155 | InEmptyParentheses: false 156 | Other: false 157 | SpacesInSquareBrackets: false 158 | TabWidth: 4 159 | UseTab: Never -------------------------------------------------------------------------------- /.clangd: -------------------------------------------------------------------------------- 1 | CompileFlags: 2 | Remove: 3 | - -fmodules-ts 4 | - -fmodule-mapper=* 5 | - -fdeps-format=p1689r5 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.vscode 2 | /build 3 | /imdb 4 | /output*.txt 5 | /*.tgz 6 | /*.db 7 | /TPC-H* 8 | /job-sample 9 | /.cache -------------------------------------------------------------------------------- /ANNOUNCEMENTS.md: -------------------------------------------------------------------------------- 1 | # Announcements 2 | 3 | ### 2025-03-13 4 | - We provide a new header, `hardware.h`. This header contains basic hardware information which enables optimizing for a server's cache sizes or vectorization capabilities. 5 | - If you miss any information or find issues with the headers, please do not hesitate to contact us. 6 | - As one of the goals of this contest is to write efficient code for multiple platforms (some of those are kept secret until the final evaluation), we encourage you to read about vector extensions (e.g., Clang's "Vectors and Extended Vectors"). 7 | - We are considering changing the benchmark to include all queries of the standard JOB benchmark. We will reset the leaderboard in this case. We will let you know upfront when this change is about to land. 8 | - **Third-party libraries:** 9 | - We want to re-iterate our last notes from 2025-03-04: third-party libraries are **not allowed in your final submission**. 10 | - **Evaluation workload:** 11 | - While there will be a larger variety of queries in the final evaluation workload, we will not add any "surprises". For example, as in the original JoinOrder Benchmark, there will be no joins on string columns. 12 | 13 | ### 2025-03-04 14 | - With today's changes to the main repository you forked from, we improved the performance of the evaluation phase 15 | - **Important notes:** 16 | - **Deadline change:** The deadline for the final submission has been extended to March 31 17 | - **Own source files**: The CMake file (which cannot be modified by participants) now includes all *.cpp fiels in the `src` directory. This way, you can add your own source files and better structure your code. 18 | - **Third-party library:** We found that some teams use third-party libraries, e.g., for logging. Please note that third-party libraries are not allowed in the contest. You are free to use them during development, but you need to remove them prior to the final submission. Otherwise, your submission is disqualified. 19 | 20 | ### 2025-02-27 21 | - The recently pushed GitHub workflow will automatically compile, test, and benchmark your solution on all four systems 22 | - Check your repository's pull requests 23 | - The results are currently shown at https://sigmod-contest-25.hpi-sci.de/ and will soon be published on the official contest website 24 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16) 2 | 3 | project(SigmodContest) 4 | 5 | set(CMAKE_CXX_STANDARD 17) 6 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 7 | set(CMAKE_CXX_EXTENSIONS OFF) 8 | 9 | Include(FetchContent) 10 | 11 | FetchContent_Declare( 12 | Catch2 13 | URL https://github.com/catchorg/Catch2/archive/refs/tags/v3.8.0.tar.gz 14 | ) 15 | 16 | FetchContent_MakeAvailable(Catch2) 17 | 18 | FetchContent_Declare( 19 | abseil 20 | URL https://github.com/abseil/abseil-cpp/releases/download/20240722.1/abseil-cpp-20240722.1.tar.gz 21 | ) 22 | 23 | set(ABSL_PROPAGATE_CXX_STD ON) 24 | set(ABSL_ENABLE_INSTALL ON) 25 | FetchContent_MakeAvailable(abseil) 26 | 27 | FetchContent_Declare( 28 | re2 29 | URL https://github.com/google/re2/releases/download/2024-07-02/re2-2024-07-02.tar.gz 30 | ) 31 | 32 | FetchContent_MakeAvailable(re2) 33 | 34 | FetchContent_Declare( 35 | json 36 | URL https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz 37 | ) 38 | 39 | FetchContent_MakeAvailable(json) 40 | 41 | FetchContent_Declare( 42 | sql-parser 43 | URL https://github.com/a858438680/sql-parser/archive/refs/tags/win-port-2.tar.gz 44 | ) 45 | set(HSQL_ENABLE_WERROR OFF) 46 | FetchContent_MakeAvailable(sql-parser) 47 | 48 | FetchContent_Declare( 49 | range-v3 50 | URL https://github.com/ericniebler/range-v3/archive/refs/tags/0.12.0.tar.gz 51 | ) 52 | 53 | FetchContent_MakeAvailable(range-v3) 54 | 55 | FetchContent_Declare( 56 | fmtlib 57 | URL https://github.com/fmtlib/fmt/releases/download/11.1.3/fmt-11.1.3.zip 58 | ) 59 | 60 | FetchContent_MakeAvailable(fmtlib) 61 | 62 | FetchContent_Declare( 63 | duckdb 64 | URL https://github.com/duckdb/duckdb/archive/refs/tags/v1.2.0.tar.gz 65 | ) 66 | set(ENABLE_SANITIZER OFF) 67 | set(ENABLE_UBSAN OFF) 68 | if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc|powerpc|ppc64|ppc64le") 69 | message("Disabling jemalloc extension of DuckDB on Power.") 70 | set(SKIP_EXTENSIONS jemalloc) 71 | endif() 72 | FetchContent_MakeAvailable(duckdb) 73 | 74 | # Include all sources from /src directory. CONFIGURE_DEPENDS can be unreliable. 75 | # Try re-running cmake in case changes are not recognized. 76 | file(GLOB SIGMODPC_SRC 77 | CONFIGURE_DEPENDS 78 | "src/*.cpp" 79 | ) 80 | 81 | add_executable( 82 | run 83 | 84 | ${SIGMODPC_SRC} 85 | tests/read_sql.cpp 86 | ) 87 | 88 | target_include_directories(run PRIVATE include) 89 | target_link_libraries(run PRIVATE re2 fmt range-v3 nlohmann_json::nlohmann_json sqlparser duckdb) 90 | if(CMAKE_SYSTEM_NAME STREQUAL "Windows") 91 | target_compile_definitions(run PRIVATE _CRT_SECURE_NO_WARNINGS) 92 | endif() 93 | 94 | # Enable server-specific compiler optimizations. 95 | # Use march=native for all but Power servers, which results in the following error: 96 | # clang++-18: error: unsupported option '-march=' for target 'powerpc64le-unknown-linux-gnu' 97 | # This flag works on other Power systems, but for now, we disable march=native on all Power machines. 98 | if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "ppc|powerpc|ppc64|ppc64le") 99 | add_compile_options(-march=native) 100 | endif() 101 | 102 | add_executable( 103 | build_database 104 | 105 | tests/build_database.cpp 106 | ) 107 | 108 | target_include_directories(build_database PRIVATE include) 109 | target_link_libraries(build_database PRIVATE fmt duckdb) 110 | if(CMAKE_SYSTEM_NAME STREQUAL "Windows") 111 | target_compile_definitions(build_database PRIVATE _CRT_SECURE_NO_WARNINGS) 112 | endif() 113 | 114 | add_executable( 115 | unit_tests 116 | 117 | ${SIGMODPC_SRC} 118 | tests/unit_tests.cpp 119 | ) 120 | 121 | target_include_directories(unit_tests PRIVATE include) 122 | target_link_libraries(unit_tests PRIVATE range-v3 fmt Catch2::Catch2WithMain duckdb) 123 | if(CMAKE_SYSTEM_NAME STREQUAL "Windows") 124 | target_compile_definitions(unit_tests PRIVATE _CRT_SECURE_NO_WARNINGS) 125 | endif() 126 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SIGMOD Contest 2025 2 | 3 | ## Task 4 | 5 | Given the joining pipeline and the pre-filtered input data, your task is to implement an efficient joining algorithm to accelerate the execution time of the joining pipeline. Specifically, you need to implement the following function in `src/execute.cpp`: 6 | 7 | ```C++ 8 | ColumnarTable execute(const Plan& plan, void* context); 9 | ``` 10 | 11 | Optionally, you can implement these two functions as well to prepare any global context (e.g., thread pool) to accelerate the execution. 12 | 13 | ```C++ 14 | void* build_context(); 15 | void destroy_context(void*); 16 | ``` 17 | 18 | ### Input format 19 | 20 | The input plan in the above function is defined as the following struct. 21 | 22 | ```C++ 23 | struct ScanNode { 24 | size_t base_table_id; 25 | }; 26 | 27 | struct JoinNode { 28 | bool build_left; 29 | size_t left; 30 | size_t right; 31 | size_t left_attr; 32 | size_t right_attr; 33 | }; 34 | 35 | struct PlanNode { 36 | std::variant data; 37 | std::vector> output_attrs; 38 | }; 39 | 40 | struct Plan { 41 | std::vector nodes; 42 | std::vector inputs; 43 | size_t root; 44 | } 45 | ``` 46 | 47 | **Scan**: 48 | - The `base_table_id` member refers to which input table in the `inputs` member of a plan is used by the Scan node. 49 | - Each item in the `output_attrs` indicates which column in the base table should be output and what type it is. 50 | 51 | **Join**: 52 | - The `build_left` member refers to which side the hash table should be built on, where `true` indicates building the hash table on the left child, and `false` indicates the opposite. 53 | - The `left` and `right` members are the indexes of the left and right child of the Join node in the `nodes` member of a plan, respectively. 54 | - The `left_attr` and `right_attr` members are the join condition of Join node. Supposing that there are two records, `left_record` and `right_record`, from the intermediate results of the left and right child, respectively. The members indicate that the two records should be joined when `left_record[left_attr] == right_record[right_attr]`. 55 | - Each item in the `output_attrs` indicates which column in the result of children should be output and what type it is. Supposing that the left child has $n_l$ columns and the right child has $n_r$ columns, the value of the index $i \in \{0, \dots, n_l + n_r - 1\}$, where the ranges $\{0, \dots, n_l - 1\}$ and $\{n_l, \dots, n_l + n_r - 1\}$ indicate the output column is from left and right child respectively. 56 | 57 | **Root**: The `root` member of a plan indicates which node is the root node of the execution plan tree. 58 | 59 | ### Data format 60 | 61 | The input and output data both follow a simple columnar data format. 62 | 63 | ```C++ 64 | enum class DataType { 65 | INT32, // 4-byte integer 66 | INT64, // 8-byte integer 67 | FP64, // 8-byte floating point 68 | VARCHAR, // string of arbitary length 69 | }; 70 | 71 | constexpr size_t PAGE_SIZE = 8192; 72 | 73 | struct alignas(8) Page { 74 | std::byte data[PAGE_SIZE]; 75 | }; 76 | 77 | struct Column { 78 | DataType type; 79 | std::vector pages; 80 | }; 81 | 82 | struct ColumnarTable { 83 | size_t num_rows; 84 | std::vector columns; 85 | }; 86 | ``` 87 | 88 | A `ColumnarTable` first stores how many rows the table has in the `num_rows` member, then stores each column seperately as a `Column`. Each `Column` has a type and stores the items of the column into several pages. Each page is of 8192 bytes. In each page: 89 | 90 | - The first 2 bytes are a `uint16_t` which is the number of rows $n_r$ in the page. 91 | - The following 2 bytes are a `uint16_t` which is the number of non-`NULL` values $n_v$ in the page. 92 | - The first $n_r$ bits in the last $\left\lfloor\frac{(n_r + 7)}{8}\right\rfloor$ bytes is a bitmap indicating whether the corresponding row has value or is `NULL`. 93 | 94 | **Fixed-length attribute**: There are $n_v$ contiguous values begins at the first aligned position. For example, in a `Page` of `INT32`, the first value is at `data + 4`. While in a `Page` of `INT64` and `FP64`, the first value is at `data + 8`. 95 | 96 | **Variable-length attribute**: There are $n_v$ contigous offsets (`uint16_t`) begins at `data + 4` in a `Page`, followed by the content of the varchars which begins at `char_begin = data + 4 + n_r * 2`. Each offset indicates the ending offset of the corresponding `VARCHAR` with respect to the `char_begin`. 97 | 98 | **Long string**: When the length of a string is longer than `PAGE_SIZE - 7`, it can not fit in a normal page. Special pages will be used to store such string. If $n_r$ `== 0xffff` or $n_r$ `== 0xfffe`, the `Page` is a special page for long string. `0xffff` means the page is the first page of a long string and `0xfffe` means the page is the following page of a long string. The following 2 bytes is a `uint16_t` indicating the number of chars in the page, beginning at `data + 4`. 99 | 100 | ## Requirement 101 | 102 | - You can only modify the file `src/execute.cpp` in the project. 103 | - You must not use any third-party libraries. If you are using libraries for development (e.g., for logging), ensure to remove them before the final submission. 104 | - The joining pipeline (including order and build side) is optimized by PostgreSQL for `Hash Join` only. However, in the `execute` function, you are free to use other algorithms and change the pipeline, as long as the result is equivalent. 105 | - For any struct listed above, all of there members are public. You can manipulate them in free functions as desired as long as the original files are not changed and the manipulated objects can be destructed properly. 106 | - Your program will be evaluated on an unpublished benchmark sampled from the original JOB benchmark. You will not be able to access the test benchmark. 107 | 108 | ## Quick start 109 | 110 | > [!TIP] 111 | > Run all the following commands in the root directory of this project. 112 | 113 | First, download the imdb dataset. 114 | 115 | ```bash 116 | ./download_imdb.sh 117 | ``` 118 | 119 | Second, build the project. 120 | 121 | ```bash 122 | cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -Wno-dev 123 | cmake --build build -- -j $(nproc) 124 | ``` 125 | 126 | Third, prepare the DuckDB database for correctness checking. 127 | 128 | ```bash 129 | ./build/build_database imdb.db 130 | ``` 131 | 132 | Now, you can run the tests: 133 | ```bash 134 | ./build/run plans.json 135 | ``` 136 | > [!TIP] 137 | > If you want to use `Ninja Multi-Config` as the generator. The commands will look like: 138 | > 139 | >```bash 140 | > cmake -S . -B build -Wno-dev -G "Ninja Multi-Config" 141 | > cmake --build build --config Release -- -j $(nproc) 142 | > ./build/Release/build_database imdb.db 143 | > ./build/Release/run plans.json 144 | > ``` 145 | 146 | # Hardware 147 | 148 | The evaluation is automatically executed on four different servers. On multi-socket machines, the benchmarks are bound to a single socket (using `numactl -m 0 -N 0`). 149 | 150 | * **AMD #1** 151 | * CPU: 2x AMD EPYC 7F72 (SMT 2, 24 cores, 48 threads) 152 | * Main memory: 256 GB 153 | * **ARM #1** 154 | * CPU: 1x Ampere Altra Max (SMT 1, 128 cores, 128 threads) 155 | * Main memory: 512 GB 156 | * **IBM #1** 157 | * CPU: 8x IBM Power8 (SMT 8, 12 cores, 96 threads) 158 | * Main memory: 1024 GB 159 | * **Intel #1** 160 | * CPU: 4x Intel Xeon E7-4880 v2 (SMT 2, 15 cores, 30 threads) 161 | * Main memory: 512 GB 162 | 163 | Additional Evaluation Server: 164 | * **AMD #2** 165 | * CPU: 1x AMD EPYC 7343 (SMT 2, 16 cores, 32 threads; 20 threads enabled) 166 | * Main memory: 96 GB 167 | * **ARM #2** 168 | * CPU: 1x NVIDIA GH200 Grace Hopper (SMT 1, 72 cores, 72 threads) 169 | * Main memory: 480 GB 170 | * **IBM #2** 171 | * CPU: 2x IBM Power10 (SMT 8, 12 cores, 96 threads) 172 | * Main memory: 512 GB 173 | * **Intel #2** 174 | * CPU: 2x Intel Xeon Platinum 8352Y (SMT 2, 32 cores, 64 threads) 175 | * Main memory: 256 GB 176 | 177 | Code is compiled with Clang 18. 178 | -------------------------------------------------------------------------------- /download_imdb.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | url="https://event.cwi.nl/da/job/imdb.tgz" 4 | output_file="imdb.tgz" 5 | target_dir="imdb" 6 | 7 | # Detect and select downloader 8 | if command -v wget &> /dev/null; then 9 | if ! wget "$url" -O "$output_file"; then 10 | echo "Error: downloading failed" >&2 11 | exit 1 12 | fi 13 | elif command -v curl &> /dev/null; then 14 | if ! curl -L "$url" -o "$output_file"; then 15 | echo "Error: downloading failed" >&2 16 | exit 1 17 | fi 18 | else 19 | echo "Error: please install wget or curl to download imdb.tgz" >&2 20 | exit 1 21 | fi 22 | 23 | # make target directory (if not exists) 24 | if ! mkdir -p "$target_dir"; then 25 | echo "Error: cannot make directory '$target_dir'" >&2 26 | exit 1 27 | fi 28 | 29 | # decompress the file to the target directory 30 | if ! tar -xf "$output_file" -C "$target_dir"; then 31 | echo "Error: failed to decompress the file" >&2 32 | exit 1 33 | fi 34 | 35 | echo "Success!" 36 | -------------------------------------------------------------------------------- /include/attribute.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | enum class DataType { 9 | INT32, // 4-byte integer 10 | INT64, // 8-byte integer 11 | FP64, // 8-byte floating point 12 | VARCHAR, // string of arbitary length 13 | }; 14 | 15 | template <> 16 | struct fmt::formatter { 17 | template 18 | constexpr auto parse(ParseContext& ctx) { 19 | return ctx.begin(); 20 | } 21 | 22 | template 23 | auto format(DataType value, FormatContext& ctx) const { 24 | static std::array names{ 25 | "INT32", 26 | "INT64", 27 | "FP64", 28 | "VARCHAR", 29 | }; 30 | return fmt::format_to(ctx.out(), "{}", names[int(value)]); 31 | } 32 | }; 33 | 34 | #define DISPATCH_DATA_TYPE(type, TYPE, ...) \ 35 | do { \ 36 | switch (type) { \ 37 | case DataType::INT32: { \ 38 | using TYPE = int32_t; \ 39 | __VA_ARGS__ \ 40 | break; \ 41 | } \ 42 | case DataType::INT64: { \ 43 | using TYPE = int64_t; \ 44 | __VA_ARGS__ \ 45 | break; \ 46 | } \ 47 | case DataType::FP64: { \ 48 | using TYPE = double; \ 49 | __VA_ARGS__ \ 50 | break; \ 51 | } \ 52 | case DataType::VARCHAR: { \ 53 | using TYPE = std::string; \ 54 | __VA_ARGS__ \ 55 | break; \ 56 | } \ 57 | } \ 58 | } while (0) 59 | 60 | struct Attribute { 61 | DataType type; 62 | std::string name; 63 | }; -------------------------------------------------------------------------------- /include/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | namespace detail { 11 | inline uint32_t rotl32(uint32_t x, uint8_t bits) { 12 | return (x << bits) | (x >> (32 - bits)); 13 | } 14 | 15 | inline void hash_combine_impl(uint32_t& h1, uint32_t k1) { 16 | constexpr uint32_t c1 = 0xcc9e2d51u; 17 | constexpr uint32_t c2 = 0x1b873593u; 18 | 19 | k1 *= c1; 20 | k1 = rotl32(k1, 15); 21 | k1 *= c2; 22 | 23 | h1 ^= k1; 24 | h1 = rotl32(h1, 13); 25 | h1 = h1 * 5u + 0xe6546b64u; 26 | } 27 | 28 | inline void hash_combine_impl(uint64_t& h, uint64_t k) { 29 | constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995); 30 | constexpr int r = 47; 31 | 32 | k *= m; 33 | k ^= k >> r; 34 | k *= m; 35 | 36 | h ^= k; 37 | h *= m; 38 | h += 0xe6546b64; 39 | } 40 | } // namespace detail 41 | 42 | inline void hash_combine(std::size_t& seed, std::size_t k) { 43 | if constexpr (sizeof(std::size_t) == 4) { 44 | uint32_t h = static_cast(seed); 45 | detail::hash_combine_impl(h, static_cast(k)); 46 | seed = h; 47 | } else if constexpr (sizeof(std::size_t) == 8) { 48 | uint64_t h = static_cast(seed); 49 | detail::hash_combine_impl(h, static_cast(k)); 50 | seed = h; 51 | } else { 52 | static_assert(sizeof(std::size_t) == 4 || sizeof(std::size_t) == 8, 53 | "Unsupported size_t size for hash_combine"); 54 | } 55 | } 56 | 57 | class File { 58 | public: 59 | File(const std::filesystem::path& path, const char* mode) 60 | : handle(std::fopen(path.string().c_str(), mode)) { 61 | if (!handle) { 62 | throw std::runtime_error("Failed to open file: " + path.string()); 63 | } 64 | } 65 | 66 | operator FILE*() const noexcept { return handle; } 67 | 68 | File(File&& other) noexcept 69 | : handle(other.handle) { 70 | other.handle = nullptr; 71 | } 72 | 73 | File& operator=(File&& other) noexcept { 74 | if (this != &other) { 75 | close(); 76 | handle = other.handle; 77 | other.handle = nullptr; 78 | } 79 | return *this; 80 | } 81 | 82 | File(const File&) = delete; 83 | File& operator=(const File&) = delete; 84 | 85 | ~File() { close(); } 86 | 87 | private: 88 | FILE* handle = nullptr; 89 | 90 | void close() noexcept { 91 | if (handle) { 92 | std::fclose(handle); 93 | handle = nullptr; 94 | } 95 | } 96 | }; 97 | 98 | inline std::string read_file(const std::filesystem::path& path) { 99 | File f(path, "rb"); 100 | ::fseek(f, 0, SEEK_END); 101 | auto size = ::ftell(f); 102 | ::fseek(f, 0, SEEK_SET); 103 | std::string result; 104 | result.resize(size); 105 | std::ignore = ::fread(result.data(), 1, size, f); 106 | return result; 107 | } 108 | 109 | struct DSU { 110 | std::vector pa; 111 | 112 | explicit DSU(size_t size) 113 | : pa(size) { 114 | std::iota(pa.begin(), pa.end(), 0); 115 | } 116 | 117 | size_t find(size_t x) { return pa[x] == x ? x : pa[x] = find(pa[x]); } 118 | 119 | void unite(size_t x, size_t y) { pa[find(x)] = find(y); } 120 | }; 121 | 122 | [[noreturn]] inline void unreachable() 123 | { 124 | // Uses compiler specific extensions if possible. 125 | // Even if no extension is used, undefined behavior is still raised by 126 | // an empty function body and the noreturn attribute. 127 | #if defined(_MSC_VER) && !defined(__clang__) // MSVC 128 | __assume(false); 129 | #else // GCC, Clang 130 | __builtin_unreachable(); 131 | #endif 132 | } -------------------------------------------------------------------------------- /include/csv_parser.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | class CSVParser { 8 | public: 9 | enum Error { 10 | Ok, 11 | QuoteNotClosed, 12 | InconsistentColumns, 13 | NoTrailingComma, 14 | }; 15 | 16 | CSVParser(char escape = '"', char sep = ',', bool has_trailing_comma = false) 17 | : escape_(escape) 18 | , comma_(sep) 19 | , has_trailing_comma_(has_trailing_comma) {} 20 | 21 | [[nodiscard]] Error execute(const char* buffer, size_t len); 22 | [[nodiscard]] Error finish(); 23 | 24 | virtual void on_field(size_t col_idx, size_t row_idx, const char* begin, size_t len) = 0; 25 | 26 | private: 27 | // configure 28 | char escape_{'"'}; // may also be '\\' 29 | char comma_{','}; // may also be '|' 30 | // true means # commas = # columns and the last comma in each line is followed by the record 31 | // seperator; false means # commas + 1 = # columns 32 | bool has_trailing_comma_{false}; 33 | 34 | // states 35 | std::vector current_field_; 36 | size_t col_idx_{0}; 37 | size_t row_idx_{0}; 38 | size_t num_cols_{0}; 39 | bool after_first_row_{false}; 40 | bool quoted_{false}; 41 | bool after_field_sep_{false}; 42 | bool after_record_sep_{false}; 43 | bool escaping_{false}; 44 | bool newlining_{false}; 45 | }; 46 | -------------------------------------------------------------------------------- /include/hardware__ca09.h: -------------------------------------------------------------------------------- 1 | // Hardware information for Ampere Altra Max node ca09. 2 | 3 | // Architecture from `uname -srm`. 4 | #define SPC__AARCH64 5 | 6 | // CPU from `/proc/cpuinfo`. 7 | #define SPC__CPU_NAME "" 8 | 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers 10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system. 11 | #define SPC__CORE_COUNT 128 12 | #define SPC__THREAD_COUNT 128 13 | #define SPC__NUMA_NODE_COUNT 1 14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1 15 | 16 | // Main memory per NUMA node (MB). 17 | #define SPC__NUMA_NODE_DRAM_MB 515809 18 | 19 | // Obtained from `lsb_release -a`. 20 | #define SPC__OS "Ubuntu 24.04.1 LTS" 21 | 22 | // Obtained from: `uname -srm`. 23 | #define SPC__KERNEL "Linux 6.8.0-50-generic aarch64" 24 | 25 | // ARM: possible options are SVE, SVE2, and NEON. No ARM CPU older than Ampere Altra Max will be used. 26 | #define SPC__SUPPORTS_NEON 27 | 28 | // Cache information from `getconf -a | grep CACHE`. 29 | // As Ubuntu did not list all numbers, we also took cache sizes from `cat /sys/devices/system/cpu/cpu0/cache/index*/size` 30 | #define SPC__LEVEL1_ICACHE_SIZE 65536 31 | #define SPC__LEVEL1_ICACHE_ASSOC 32 | #define SPC__LEVEL1_ICACHE_LINESIZE 64 33 | #define SPC__LEVEL1_DCACHE_SIZE 65536 34 | #define SPC__LEVEL1_DCACHE_ASSOC 35 | #define SPC__LEVEL1_DCACHE_LINESIZE 64 36 | #define SPC__LEVEL2_CACHE_SIZE 1048576 37 | #define SPC__LEVEL2_CACHE_ASSOC 38 | #define SPC__LEVEL2_CACHE_LINESIZE 39 | #define SPC__LEVEL3_CACHE_SIZE 40 | #define SPC__LEVEL3_CACHE_ASSOC 41 | #define SPC__LEVEL3_CACHE_LINESIZE 42 | #define SPC__LEVEL4_CACHE_SIZE 43 | #define SPC__LEVEL4_CACHE_ASSOC 44 | #define SPC__LEVEL4_CACHE_LINESIZE 45 | -------------------------------------------------------------------------------- /include/hardware__cp02.h: -------------------------------------------------------------------------------- 1 | // Hardware information for IBM Power8 node cp02. 2 | 3 | // Architecture from `uname -srm`. 4 | #define SPC__PPC64LE 5 | 6 | // CPU from `/proc/cpuinfo`. 7 | #define SPC__CPU_NAME "POWER8 (architected), altivec supported" 8 | 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers 10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system. 11 | #define SPC__CORE_COUNT 12 12 | #define SPC__THREAD_COUNT 96 13 | #define SPC__NUMA_NODE_COUNT 8 14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1 15 | 16 | // Main memory per NUMA node (MB). 17 | #define SPC__NUMA_NODE_DRAM_MB 1039964 18 | 19 | // Obtained from `lsb_release -a`. 20 | #define SPC__OS "Ubuntu 20.04.6 LTS" 21 | 22 | // Obtained from: `uname -srm`. 23 | #define SPC__KERNEL "Linux 5.4.0-137-generic x86_64" 24 | 25 | // IBM: possible options are VSX, VMX, and MMA. No IBM CPU older than Power8 will be used. 26 | #define SPC__SUPPORTS_VSX 27 | #define SPC__SUPPORTS_VMX 28 | 29 | // Cache information from `getconf -a | grep CACHE`. 30 | #define SPC__LEVEL1_ICACHE_SIZE 32768 31 | #define SPC__LEVEL1_ICACHE_ASSOC 8 32 | #define SPC__LEVEL1_ICACHE_LINESIZE 128 33 | #define SPC__LEVEL1_DCACHE_SIZE 65536 34 | #define SPC__LEVEL1_DCACHE_ASSOC 8 35 | #define SPC__LEVEL1_DCACHE_LINESIZE 128 36 | #define SPC__LEVEL2_CACHE_SIZE 524288 37 | #define SPC__LEVEL2_CACHE_ASSOC 8 38 | #define SPC__LEVEL2_CACHE_LINESIZE 128 39 | #define SPC__LEVEL3_CACHE_SIZE 8388608 40 | #define SPC__LEVEL3_CACHE_ASSOC 8 41 | #define SPC__LEVEL3_CACHE_LINESIZE 128 42 | #define SPC__LEVEL4_CACHE_SIZE 0 43 | #define SPC__LEVEL4_CACHE_ASSOC 0 44 | #define SPC__LEVEL4_CACHE_LINESIZE 0 45 | -------------------------------------------------------------------------------- /include/hardware__koroneia.h: -------------------------------------------------------------------------------- 1 | // Hardware information for AMD EPYC 7F72 node koroneia. 2 | 3 | // Architecture from `uname -srm`. 4 | #define SPC__X86_64 5 | 6 | // CPU from `/proc/cpuinfo`. 7 | #define SPC__CPU_NAME "AMD EPYC 7F72 24-Core Processor" 8 | 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers 10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system. 11 | #define SPC__CORE_COUNT 24 12 | #define SPC__THREAD_COUNT 48 13 | #define SPC__NUMA_NODE_COUNT 2 14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1 15 | 16 | // Main memory per NUMA node (MB). 17 | #define SPC__NUMA_NODE_DRAM_MB 257699 18 | 19 | // Obtained from `lsb_release -a`. 20 | #define SPC__OS "Ubuntu 24.04.2 LTS" 21 | 22 | // Obtained from: `uname -srm`. 23 | #define SPC__KERNEL "Linux 5.15.0-106-generic x86_64" 24 | 25 | // AMD: possible options are AVX, AVX2, and AVX512. No AMD CPU older than AMD EPYC 7F72 will be used. 26 | #define SPC__SUPPORTS_AVX 27 | #define SPC__SUPPORTS_AVX2 28 | 29 | // Cache information from `getconf -a | grep CACHE`. 30 | #define SPC__LEVEL1_ICACHE_SIZE 32768 31 | #define SPC__LEVEL1_ICACHE_ASSOC 32 | #define SPC__LEVEL1_ICACHE_LINESIZE 64 33 | #define SPC__LEVEL1_DCACHE_SIZE 32768 34 | #define SPC__LEVEL1_DCACHE_ASSOC 8 35 | #define SPC__LEVEL1_DCACHE_LINESIZE 64 36 | #define SPC__LEVEL2_CACHE_SIZE 524288 37 | #define SPC__LEVEL2_CACHE_ASSOC 8 38 | #define SPC__LEVEL2_CACHE_LINESIZE 64 39 | #define SPC__LEVEL3_CACHE_SIZE 16777216 40 | #define SPC__LEVEL3_CACHE_ASSOC 16 41 | #define SPC__LEVEL3_CACHE_LINESIZE 64 42 | #define SPC__LEVEL4_CACHE_SIZE 0 43 | #define SPC__LEVEL4_CACHE_ASSOC 44 | #define SPC__LEVEL4_CACHE_LINESIZE 45 | -------------------------------------------------------------------------------- /include/hardware__sidon.h: -------------------------------------------------------------------------------- 1 | // Hardware information for Intel Xeon E7-4880 v2 node sidon. 2 | 3 | // Architecture from `uname -srm`. 4 | #define SPC__X86_64 5 | 6 | // CPU from `/proc/cpuinfo`. 7 | #define SPC__CPU_NAME "Intel(R) Xeon(R) CPU E7-4880 v2 @ 2.50GHz" 8 | 9 | // The servers might have multiple CPUs. We limit all benchmarks to a single node using numactl. The listed CPU numbers 10 | // below are for a single CPU. The listed NUMA numbers are just meant to give you a rough idea of the system. 11 | #define SPC__CORE_COUNT 15 12 | #define SPC__THREAD_COUNT 30 13 | #define SPC__NUMA_NODE_COUNT 4 14 | #define SPC__NUMA_NODES_ACTIVE_IN_BENCHMARK 1 15 | 16 | // Main memory per NUMA node (MB). 17 | #define SPC__NUMA_NODE_DRAM_MB 515809 18 | 19 | // Obtained from `lsb_release -a`. 20 | #define SPC__OS "Ubuntu 22.04.4 LTS" 21 | 22 | // Obtained from: `uname -srm`. 23 | #define SPC__KERNEL "Linux 5.15.0-116-generic x86_64" 24 | 25 | // Intel: possible options are AVX, AVX2, and AVX512. No Intel CPU older than Intel Xeon E7-4880 v2 will be used. 26 | #define SPC__SUPPORTS_AVX 27 | 28 | // Cache information from `getconf -a | grep CACHE`. 29 | #define SPC__LEVEL1_ICACHE_SIZE 32768 30 | #define SPC__LEVEL1_ICACHE_ASSOC 31 | #define SPC__LEVEL1_ICACHE_LINESIZE 64 32 | #define SPC__LEVEL1_DCACHE_SIZE 32768 33 | #define SPC__LEVEL1_DCACHE_ASSOC 8 34 | #define SPC__LEVEL1_DCACHE_LINESIZE 64 35 | #define SPC__LEVEL2_CACHE_SIZE 262144 36 | #define SPC__LEVEL2_CACHE_ASSOC 8 37 | #define SPC__LEVEL2_CACHE_LINESIZE 64 38 | #define SPC__LEVEL3_CACHE_SIZE 39321600 39 | #define SPC__LEVEL3_CACHE_ASSOC 20 40 | #define SPC__LEVEL3_CACHE_LINESIZE 64 41 | #define SPC__LEVEL4_CACHE_SIZE 0 42 | #define SPC__LEVEL4_CACHE_ASSOC 43 | #define SPC__LEVEL4_CACHE_LINESIZE 44 | -------------------------------------------------------------------------------- /include/plan.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2025 Matthias Boehm, TU Berlin 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // API of the SIGMOD 2025 Programming Contest, 18 | // See https://sigmod-contest-2025.github.io/index.html 19 | #pragma once 20 | 21 | #include 22 | #include 23 | // #include 24 | 25 | // supported attribute data types 26 | 27 | enum class NodeType { 28 | HashJoin, 29 | Scan, 30 | }; 31 | 32 | struct ScanNode { 33 | size_t base_table_id; 34 | }; 35 | 36 | struct JoinNode { 37 | bool build_left; 38 | size_t left; 39 | size_t right; 40 | size_t left_attr; 41 | size_t right_attr; 42 | }; 43 | 44 | struct PlanNode { 45 | std::variant data; 46 | std::vector> output_attrs; 47 | 48 | PlanNode(std::variant data, 49 | std::vector> output_attrs) 50 | : data(std::move(data)) 51 | , output_attrs(std::move(output_attrs)) {} 52 | }; 53 | 54 | constexpr size_t PAGE_SIZE = 8192; 55 | 56 | struct alignas(8) Page { 57 | std::byte data[PAGE_SIZE]; 58 | }; 59 | 60 | struct Column { 61 | DataType type; 62 | std::vector pages; 63 | 64 | Page* new_page() { 65 | auto ret = new Page; 66 | pages.push_back(ret); 67 | return ret; 68 | } 69 | 70 | Column(DataType data_type) 71 | : type(data_type) 72 | , pages() {} 73 | 74 | Column(Column&& other) noexcept 75 | : type(other.type) 76 | , pages(std::move(other.pages)) { 77 | other.pages.clear(); 78 | } 79 | 80 | Column& operator=(Column&& other) noexcept { 81 | if (this != &other) { 82 | for (auto* page: pages) { 83 | delete page; 84 | } 85 | type = other.type; 86 | pages = std::move(other.pages); 87 | other.pages.clear(); 88 | } 89 | return *this; 90 | } 91 | 92 | Column(const Column&) = delete; 93 | Column& operator=(const Column&) = delete; 94 | 95 | ~Column() { 96 | for (auto* page: pages) { 97 | delete page; 98 | } 99 | } 100 | }; 101 | 102 | struct ColumnarTable { 103 | size_t num_rows{0}; 104 | std::vector columns; 105 | }; 106 | 107 | std::tuple>, std::vector> from_columnar( 108 | const ColumnarTable& table); 109 | ColumnarTable from_table(const std::vector>& table, 110 | const std::vector& data_types); 111 | 112 | struct Plan { 113 | std::vector nodes; 114 | std::vector inputs; 115 | // std::vector tables; 116 | size_t root; 117 | 118 | size_t new_join_node(bool build_left, 119 | size_t left, 120 | size_t right, 121 | size_t left_attr, 122 | size_t right_attr, 123 | std::vector> output_attrs) { 124 | JoinNode join{ 125 | .build_left = build_left, 126 | .left = left, 127 | .right = right, 128 | .left_attr = left_attr, 129 | .right_attr = right_attr, 130 | }; 131 | auto ret = nodes.size(); 132 | nodes.emplace_back(join, std::move(output_attrs)); 133 | return ret; 134 | } 135 | 136 | size_t new_scan_node(size_t base_table_id, 137 | std::vector> output_attrs) { 138 | ScanNode scan{.base_table_id = base_table_id}; 139 | auto ret = nodes.size(); 140 | nodes.emplace_back(scan, std::move(output_attrs)); 141 | return ret; 142 | } 143 | 144 | size_t new_input(ColumnarTable input) { 145 | auto ret = inputs.size(); 146 | inputs.emplace_back(std::move(input)); 147 | return ret; 148 | } 149 | }; 150 | 151 | template 152 | struct ColumnInserter { 153 | Column& column; 154 | size_t last_page_idx = 0; 155 | uint16_t num_rows = 0; 156 | size_t data_end = data_begin(); 157 | std::vector bitmap; 158 | 159 | constexpr static size_t data_begin() { 160 | if (sizeof(T) < 4) { 161 | return 4; 162 | } else { 163 | return sizeof(T); 164 | } 165 | } 166 | 167 | ColumnInserter(Column& column) 168 | : column(column) { 169 | bitmap.resize(PAGE_SIZE); 170 | } 171 | 172 | std::byte* get_page() { 173 | if (last_page_idx == column.pages.size()) [[unlikely]] { 174 | column.new_page(); 175 | } 176 | auto* page = column.pages[last_page_idx]; 177 | return page->data; 178 | } 179 | 180 | void save_page() { 181 | auto* page = get_page(); 182 | *reinterpret_cast(page) = num_rows; 183 | *reinterpret_cast(page + 2) = 184 | static_cast((data_end - data_begin()) / sizeof(T)); 185 | size_t bitmap_size = (num_rows + 7) / 8; 186 | memcpy(page + PAGE_SIZE - bitmap_size, bitmap.data(), bitmap_size); 187 | ++last_page_idx; 188 | num_rows = 0; 189 | data_end = data_begin(); 190 | } 191 | 192 | void set_bitmap(size_t idx) { 193 | size_t byte_idx = idx / 8; 194 | size_t bit_idx = idx % 8; 195 | bitmap[byte_idx] |= (0x1 << bit_idx); 196 | } 197 | 198 | void unset_bitmap(size_t idx) { 199 | size_t byte_idx = idx / 8; 200 | size_t bit_idx = idx % 8; 201 | bitmap[byte_idx] &= ~(0x1 << bit_idx); 202 | } 203 | 204 | void insert(T value) { 205 | if (data_end + 4 + num_rows / 8 + 1 > PAGE_SIZE) [[unlikely]] { 206 | save_page(); 207 | } 208 | auto* page = get_page(); 209 | *reinterpret_cast(page + data_end) = value; 210 | data_end += sizeof(T); 211 | set_bitmap(num_rows); 212 | ++num_rows; 213 | } 214 | 215 | void insert_null() { 216 | if (data_end + num_rows / 8 + 1 > PAGE_SIZE) [[unlikely]] { 217 | save_page(); 218 | } 219 | unset_bitmap(num_rows); 220 | ++num_rows; 221 | } 222 | 223 | void finalize() { 224 | if (num_rows != 0) { 225 | save_page(); 226 | } 227 | } 228 | }; 229 | 230 | template <> 231 | struct ColumnInserter { 232 | Column& column; 233 | size_t last_page_idx = 0; 234 | uint16_t num_rows = 0; 235 | uint16_t data_size = 0; 236 | size_t offset_end = 4; 237 | std::vector data; 238 | std::vector bitmap; 239 | 240 | constexpr static size_t offset_begin() { return 4; } 241 | 242 | ColumnInserter(Column& column) 243 | : column(column) { 244 | data.resize(PAGE_SIZE); 245 | bitmap.resize(PAGE_SIZE); 246 | } 247 | 248 | std::byte* get_page() { 249 | if (last_page_idx == column.pages.size()) [[unlikely]] { 250 | column.new_page(); 251 | } 252 | auto* page = column.pages[last_page_idx]; 253 | return page->data; 254 | } 255 | 256 | void save_long_string(std::string_view value) { 257 | size_t offset = 0; 258 | auto first_page = true; 259 | while (offset < value.size()) { 260 | auto* page = get_page(); 261 | if (first_page) { 262 | *reinterpret_cast(page) = 0xffff; 263 | first_page = false; 264 | } else { 265 | *reinterpret_cast(page) = 0xfffe; 266 | } 267 | auto page_data_len = std::min(value.size() - offset, PAGE_SIZE - 4); 268 | *reinterpret_cast(page + 2) = page_data_len; 269 | memcpy(page + 4, value.data() + offset, page_data_len); 270 | offset += page_data_len; 271 | ++last_page_idx; 272 | } 273 | } 274 | 275 | void save_page() { 276 | auto* page = get_page(); 277 | *reinterpret_cast(page) = num_rows; 278 | *reinterpret_cast(page + 2) = 279 | static_cast((offset_end - offset_begin()) / 2); 280 | size_t bitmap_size = (num_rows + 7) / 8; 281 | memcpy(page + offset_end, data.data(), data_size); 282 | memcpy(page + PAGE_SIZE - bitmap_size, bitmap.data(), bitmap_size); 283 | ++last_page_idx; 284 | num_rows = 0; 285 | data_size = 0; 286 | offset_end = offset_begin(); 287 | } 288 | 289 | void set_bitmap(size_t idx) { 290 | size_t byte_idx = idx / 8; 291 | size_t bit_idx = idx % 8; 292 | bitmap[byte_idx] |= (0x1 << bit_idx); 293 | } 294 | 295 | void unset_bitmap(size_t idx) { 296 | size_t byte_idx = idx / 8; 297 | size_t bit_idx = idx % 8; 298 | bitmap[byte_idx] &= ~(0x1 << bit_idx); 299 | } 300 | 301 | void insert(std::string_view value) { 302 | if (value.size() > PAGE_SIZE - 7) { 303 | if (num_rows > 0) { 304 | save_page(); 305 | } 306 | save_long_string(value); 307 | } else { 308 | if (offset_end + sizeof(uint16_t) + data_size + value.size() + num_rows / 8 + 1 309 | > PAGE_SIZE) { 310 | save_page(); 311 | } 312 | memcpy(data.data() + data_size, value.data(), value.size()); 313 | data_size += static_cast(value.size()); 314 | auto* page = get_page(); 315 | *reinterpret_cast(page + offset_end) = data_size; 316 | offset_end += sizeof(uint16_t); 317 | set_bitmap(num_rows); 318 | ++num_rows; 319 | } 320 | } 321 | 322 | void insert_null() { 323 | if (offset_end + data_size + num_rows / 8 + 1 > PAGE_SIZE) [[unlikely]] { 324 | save_page(); 325 | } 326 | unset_bitmap(num_rows); 327 | ++num_rows; 328 | } 329 | 330 | void finalize() { 331 | if (num_rows != 0) { 332 | save_page(); 333 | } 334 | } 335 | }; 336 | 337 | namespace Contest { 338 | 339 | void* build_context(); 340 | void destroy_context(void*); 341 | 342 | ColumnarTable execute(const Plan& plan, void* context); 343 | 344 | } // namespace Contest 345 | -------------------------------------------------------------------------------- /include/statement.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | using Data = std::variant; 14 | using Literal = std::variant; 15 | 16 | template <> 17 | struct fmt::formatter { 18 | template 19 | constexpr auto parse(ParseContext& ctx) { 20 | return ctx.begin(); 21 | } 22 | 23 | template 24 | auto format(const Data& value, FormatContext& ctx) const { 25 | return std::visit( 26 | [&ctx](const auto& value) { 27 | using T = std::decay_t; 28 | if constexpr (std::is_same_v) { 29 | return fmt::format_to(ctx.out(), "NULL"); 30 | } else { 31 | return fmt::format_to(ctx.out(), "{}", value); 32 | } 33 | }, 34 | value); 35 | } 36 | }; 37 | 38 | struct Attribute; 39 | struct Statement; 40 | struct Comparison; 41 | struct LogicalOperation; 42 | struct InnerColumnBase; 43 | 44 | // AST Node 45 | struct Statement { 46 | virtual ~Statement() = default; 47 | virtual std::string pretty_print(int indent = 0) const = 0; 48 | virtual bool eval(const std::vector& record) const = 0; 49 | virtual std::vector eval( 50 | const std::vector& table) const = 0; 51 | }; 52 | 53 | struct Comparison: Statement { 54 | size_t column; 55 | 56 | enum Op { 57 | EQ, 58 | NEQ, 59 | LT, 60 | GT, 61 | LEQ, 62 | GEQ, 63 | LIKE, 64 | NOT_LIKE, 65 | IS_NULL, 66 | IS_NOT_NULL 67 | }; 68 | 69 | Op op; 70 | Literal value; 71 | 72 | Comparison(size_t col, Op o, Literal val) 73 | : column(col) 74 | , op(o) 75 | , value(std::move(val)) {} 76 | 77 | std::string pretty_print(int indent) const override { 78 | return fmt::format("{:{}}{} {} {}", "", indent, column, opToString(), valueToString()); 79 | } 80 | 81 | bool eval(const std::vector& record) const override; 82 | std::vector eval(const std::vector& table) const override; 83 | 84 | std::string opToString() const { 85 | switch (op) { 86 | case EQ: return "="; 87 | case NEQ: return "!="; 88 | case LT: return "<"; 89 | case GT: return ">"; 90 | case LEQ: return "<="; 91 | case GEQ: return ">="; 92 | case LIKE: return "LIKE"; 93 | case NOT_LIKE: return "NOT LIKE"; 94 | case IS_NULL: return "IS NULL"; 95 | case IS_NOT_NULL: return "IS NOT NULL"; 96 | default: return "??"; 97 | } 98 | } 99 | 100 | std::string valueToString() const { 101 | if (op == IS_NULL || op == IS_NOT_NULL) { 102 | return ""; 103 | } 104 | return visit( 105 | [](auto&& arg) -> std::string { 106 | using T = std::decay_t; 107 | if constexpr (std::is_same_v) { 108 | return fmt::format("'{}'", arg); 109 | } else if constexpr (std::is_same_v) { 110 | return ""; 111 | } else { 112 | return fmt::format("{}", arg); 113 | } 114 | }, 115 | value); 116 | } 117 | 118 | static bool like_match(std::string_view str, const std::string& pattern) { 119 | // static cache and mutex 120 | thread_local auto regex_cache = std::unordered_map>{}; 121 | 122 | const RE2* re = nullptr; 123 | auto it = regex_cache.find(pattern); 124 | if (it != regex_cache.end()) { 125 | re = it->second.get(); 126 | } 127 | 128 | // cache miss and compile 129 | if (!re) { 130 | // conver to regex 131 | std::string regex_str; 132 | for (char c: pattern) { 133 | if (c == '%') { 134 | regex_str += ".*"; 135 | } else if (c == '_') { 136 | regex_str += '.'; 137 | } else { 138 | // escape sepcical characters 139 | if (c == '\\' || c == '.' || c == '^' || c == '$' || c == '|' || c == '?' 140 | || c == '*' || c == '+' || c == '(' || c == ')' || c == '[' || c == ']' 141 | || c == '{' || c == '}') { 142 | regex_str += '\\'; 143 | } 144 | regex_str += c; 145 | } 146 | } 147 | 148 | RE2::Options options; 149 | 150 | auto new_re = std::make_unique(regex_str, options); 151 | if (!new_re->ok()) { 152 | return false; // invalid regex 153 | } 154 | 155 | re = new_re.get(); 156 | regex_cache.emplace(pattern, std::move(new_re)); 157 | } 158 | 159 | // execute full match 160 | return RE2::FullMatch(str, *re); 161 | } 162 | 163 | static std::optional get_numeric_value(const Data& data) { 164 | if (auto* i32 = std::get_if(&data)) { 165 | return *i32; 166 | } else if (auto* i64 = std::get_if(&data)) { 167 | return static_cast(*i64); 168 | } else if (auto* d = std::get_if(&data)) { 169 | return *d; 170 | } else { 171 | return std::nullopt; 172 | } 173 | } 174 | 175 | static std::optional get_numeric_value(const Literal& value) { 176 | if (auto* i = std::get_if(&value)) { 177 | return *i; 178 | } else if (auto* d = std::get_if(&value)) { 179 | return *d; 180 | } else { 181 | return std::nullopt; 182 | } 183 | } 184 | }; 185 | 186 | struct LogicalOperation: Statement { 187 | enum Type { 188 | AND, 189 | OR, 190 | NOT 191 | }; 192 | 193 | Type op_type; 194 | std::vector> children; 195 | 196 | static std::unique_ptr makeAnd(std::unique_ptr l, 197 | std::unique_ptr r) { 198 | auto node = std::make_unique(); 199 | node->op_type = AND; 200 | node->children.push_back(std::move(l)); 201 | node->children.push_back(std::move(r)); 202 | return node; 203 | } 204 | 205 | static std::unique_ptr makeOr(std::unique_ptr l, 206 | std::unique_ptr r) { 207 | auto node = std::make_unique(); 208 | node->op_type = OR; 209 | node->children.push_back(std::move(l)); 210 | node->children.push_back(std::move(r)); 211 | return node; 212 | } 213 | 214 | static std::unique_ptr makeNot(std::unique_ptr child) { 215 | auto node = std::make_unique(); 216 | node->op_type = NOT; 217 | node->children.push_back(std::move(child)); 218 | return node; 219 | } 220 | 221 | std::string pretty_print(int indent) const override { 222 | std::string op_str = [this] { 223 | switch (op_type) { 224 | case AND: return "AND"; 225 | case OR: return "OR"; 226 | case NOT: return "NOT"; 227 | default: return "UNKNOWN"; 228 | } 229 | }(); 230 | 231 | std::string result = fmt::format("{:{}}[{}]\n", "", indent, op_str); 232 | 233 | for (auto& child: children) { 234 | result += child->pretty_print(indent + 2) + "\n"; 235 | } 236 | 237 | if (!children.empty()) { 238 | result.pop_back(); 239 | } 240 | return result; 241 | } 242 | 243 | bool eval(const std::vector& record) const override; 244 | std::vector eval(const std::vector& table) const override; 245 | }; 246 | -------------------------------------------------------------------------------- /include/table.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | struct Table { 12 | public: 13 | Table() = default; 14 | 15 | Table(std::vector> data, std::vector types) 16 | : types_(types) 17 | , data_(data) {} 18 | 19 | static ColumnarTable from_csv(const std::vector& attributes, 20 | const std::filesystem::path& path, 21 | Statement* filter, 22 | bool header = false); 23 | 24 | static Table from_columnar(const ColumnarTable& input); 25 | 26 | ColumnarTable to_columnar() const; 27 | 28 | const std::vector>& table() const { return data_; } 29 | 30 | std::vector>& table() { return data_; } 31 | 32 | const std::vector& types() const { return types_; } 33 | 34 | size_t number_rows() const { return this->data_.size(); } 35 | 36 | size_t number_cols() const { return this->types_.size(); } 37 | 38 | static void print(const std::vector>& data) { 39 | namespace views = ranges::views; 40 | 41 | auto escape_string = [](const std::string& s) { 42 | std::string escaped; 43 | for (char c: s) { 44 | switch (c) { 45 | case '"': escaped += "\\\""; break; 46 | case '\\': escaped += "\\\\"; break; 47 | case '\n': escaped += "\\n"; break; 48 | case '\r': escaped += "\\r"; break; 49 | case '\t': escaped += "\\t"; break; 50 | default: escaped += c; break; 51 | } 52 | } 53 | return escaped; 54 | }; 55 | 56 | for (auto& record: data) { 57 | auto line = record 58 | | views::transform([&escape_string](const Data& field) -> std::string { 59 | return std::visit( 60 | [&escape_string](const auto& arg) { 61 | using T = std::decay_t; 62 | using namespace std::string_literals; 63 | if constexpr (std::is_same_v) { 64 | return "NULL"s; 65 | } else if constexpr (std::is_same_v 66 | || std::is_same_v 67 | || std::is_same_v) { 68 | return fmt::format("{}", arg); 69 | } else if constexpr (std::is_same_v) { 70 | return fmt::format("\"{}\"", escape_string(arg)); 71 | // return fmt::format("{}", arg); 72 | } 73 | }, 74 | field); 75 | }) 76 | | views::join('|') | ranges::to(); 77 | fmt::println("{}", line); 78 | } 79 | } 80 | 81 | private: 82 | std::vector types_; 83 | std::vector> data_; 84 | 85 | void set_attributes(const std::vector& attributes) { 86 | this->types_.clear(); 87 | for (auto& attr: attributes) { 88 | this->types_.push_back(attr.type); 89 | } 90 | } 91 | }; 92 | -------------------------------------------------------------------------------- /include/table_entity.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "common.h" 7 | 8 | struct TableEntity { 9 | std::string table; 10 | int id; 11 | 12 | friend bool operator==(const TableEntity& left, const TableEntity& right); 13 | friend bool operator!=(const TableEntity& left, const TableEntity& right); 14 | friend bool operator<(const TableEntity& left, const TableEntity& right); 15 | }; 16 | 17 | inline bool operator==(const TableEntity& left, const TableEntity& right) { 18 | return left.table == right.table && left.id == right.id; 19 | } 20 | 21 | inline bool operator!=(const TableEntity& left, const TableEntity& right) { 22 | return !(left == right); 23 | } 24 | 25 | inline bool operator<(const TableEntity& left, const TableEntity& right) { 26 | if (left.table < right.table) { 27 | return true; 28 | } else if (left.table > right.table) { 29 | return false; 30 | } else { 31 | return left.id < right.id; 32 | } 33 | } 34 | 35 | namespace std { 36 | template <> 37 | struct hash { 38 | size_t operator()(const TableEntity& te) const noexcept { 39 | size_t seed = 0; 40 | hash_combine(seed, hash{}(te.table)); 41 | hash_combine(seed, hash{}(te.id)); 42 | return seed; 43 | } 44 | }; 45 | 46 | } // namespace std 47 | 48 | template <> 49 | struct fmt::formatter { 50 | template 51 | constexpr auto parse(ParseContext& ctx) { 52 | return ctx.begin(); 53 | } 54 | 55 | template 56 | auto format(const TableEntity& te, FormatContext& ctx) const { 57 | return fmt::format_to(ctx.out(), "({}, {})", te.table, te.id); 58 | } 59 | }; 60 | -------------------------------------------------------------------------------- /job/10a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS uncredited_voiced_character, MIN(t.title) AS russian_movie FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note like '%(voice)%' and ci.note like '%(uncredited)%' AND cn.country_code = '[ru]' AND rt.role = 'actor' AND t.production_year > 2005 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /job/10b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character, MIN(t.title) AS russian_mov_with_actor_producer FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note like '%(producer)%' AND cn.country_code = '[ru]' AND rt.role = 'actor' AND t.production_year > 2010 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /job/10c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character, MIN(t.title) AS movie_with_american_producer FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note like '%(producer)%' AND cn.country_code = '[us]' AND t.production_year > 1990 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /job/11a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(lt.link) AS movie_link_type, MIN(t.title) AS non_polish_sequel_movie FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/11b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(lt.link) AS movie_link_type, MIN(t.title) AS sequel_movie FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follows%' AND mc.note IS NULL AND t.production_year = 1998 and t.title like '%Money%' AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/11c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(mc.note) AS production_note, MIN(t.title) AS movie_based_on_book FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' and (cn.name like '20th Century Fox%' or cn.name like 'Twentieth Century Fox%') AND ct.kind != 'production companies' and ct.kind is not NULL AND k.keyword in ('sequel', 'revenge', 'based-on-novel') AND mc.note is not NULL AND t.production_year > 1950 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/11d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(mc.note) AS production_note, MIN(t.title) AS movie_based_on_book FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND ct.kind != 'production companies' and ct.kind is not NULL AND k.keyword in ('sequel', 'revenge', 'based-on-novel') AND mc.note is not NULL AND t.production_year > 1950 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/12a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS drama_horror_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code = '[us]' AND ct.kind = 'production companies' AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info in ('Drama', 'Horror') AND mi_idx.info > '8.0' AND t.production_year between 2005 and 2008 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id; 2 | -------------------------------------------------------------------------------- /job/12b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS budget, MIN(t.title) AS unsuccsessful_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code ='[us]' AND ct.kind is not NULL and (ct.kind ='production companies' or ct.kind = 'distributors') AND it1.info ='budget' AND it2.info ='bottom 10 rank' AND t.production_year >2000 AND (t.title LIKE 'Birdemic%' OR t.title LIKE '%Movie%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id; 2 | -------------------------------------------------------------------------------- /job/12c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS mainstream_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code = '[us]' AND ct.kind = 'production companies' AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info in ('Drama', 'Horror', 'Western', 'Family') AND mi_idx.info > '7.0' AND t.production_year between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id; 2 | -------------------------------------------------------------------------------- /job/13a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(miidx.info) AS rating, MIN(t.title) AS german_movie FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[de]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/13b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie_about_winning FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND t.title != '' AND (t.title LIKE '%Champion%' OR t.title LIKE '%Loser%') AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/13c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie_about_winning FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND t.title != '' AND (t.title LIKE 'Champion%' OR t.title LIKE 'Loser%') AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/13d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/14a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS northern_dark_movie FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind = 'movie' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2010 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/14b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS western_dark_production FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title') AND kt.kind = 'movie' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info > '6.0' AND t.production_year > 2010 and (t.title like '%murder%' or t.title like '%Murder%' or t.title like '%Mord%') AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/14c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS north_european_dark_production FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info = 'countries' AND it2.info = 'rating' AND k.keyword is not null and k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/15a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS internet_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' AND it1.info = 'release dates' AND mc.note like '%(200%)%' and mc.note like '%(worldwide)%' AND mi.note like '%internet%' AND mi.info like 'USA:% 200%' AND t.production_year > 2000 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /job/15b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS youtube_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' and cn.name = 'YouTube' AND it1.info = 'release dates' AND mc.note like '%(200%)%' and mc.note like '%(worldwide)%' AND mi.note like '%internet%' AND mi.info like 'USA:% 200%' AND t.production_year between 2005 and 2010 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /job/15c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS modern_american_internet_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' AND it1.info = 'release dates' AND mi.note like '%internet%' AND mi.info is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year > 1990 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /job/15d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(at.title) AS aka_title, MIN(t.title) AS internet_movie_title FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' AND it1.info = 'release dates' AND mi.note like '%internet%' AND t.production_year > 1990 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /job/16a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr >= 50 AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/16b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/16c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/16d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr >= 5 AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/17a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_american_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND n.name LIKE 'B%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/17b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE 'Z%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/17c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE 'X%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/17d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE '%Bert%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/17e.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/17f.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE '%B%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/18a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note in ('(producer)', '(executive producer)') AND it1.info = 'budget' AND it2.info = 'votes' AND n.gender = 'm' and n.name like '%Tim%' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/18b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info in ('Horror', 'Thriller') and mi.note is NULL AND mi_idx.info > '8.0' AND n.gender is not null and n.gender = 'f' AND t.production_year between 2008 and 2014 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/18c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/19a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND mc.note is not NULL and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%Ang%' AND rt.role ='actress' AND t.production_year between 2005 and 2009 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /job/19b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS kung_fu_panda FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note = '(voice)' AND cn.country_code ='[us]' AND it.info = 'release dates' AND mc.note like '%(200%)%' and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND mi.info is not null and (mi.info like 'Japan:%2007%' or mi.info like 'USA:%2008%') AND n.gender ='f' and n.name like '%Angel%' AND rt.role ='actress' AND t.production_year between 2007 and 2008 and t.title like '%Kung%Fu%Panda%' AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /job/19c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS jap_engl_voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year > 2000 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /job/19d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS jap_engl_voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND n.gender ='f' AND rt.role ='actress' AND t.production_year > 2000 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /job/1a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'top 250 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' and (mc.note like '%(co-production)%' or mc.note like '%(presents)%') AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/1b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'bottom 10 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' AND t.production_year between 2005 and 2010 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/1c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'top 250 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' and (mc.note like '%(co-production)%') AND t.production_year >2010 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/1d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'bottom 10 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' AND t.production_year >2000 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/20a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS complete_downey_ironman_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name not like '%Sherlock%' and (chn.name like '%Tony%Stark%' or chn.name like '%Iron%Man%') AND k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND kt.kind = 'movie' AND t.production_year > 1950 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/20b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS complete_downey_ironman_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name not like '%Sherlock%' and (chn.name like '%Tony%Stark%' or chn.name like '%Iron%Man%') AND k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND kt.kind = 'movie' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/20c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS cast_member, MIN(t.title) AS complete_dynamic_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind = 'movie' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/21a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS western_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id; 2 | -------------------------------------------------------------------------------- /job/21b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS german_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Germany', 'German') AND t.production_year BETWEEN 2000 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id; 2 | -------------------------------------------------------------------------------- /job/21c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS western_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'English') AND t.production_year BETWEEN 1950 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id; 2 | -------------------------------------------------------------------------------- /job/22a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Germany', 'German', 'USA', 'American') AND mi_idx.info < '7.0' AND t.production_year > 2008 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /job/22b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Germany', 'German', 'USA', 'American') AND mi_idx.info < '7.0' AND t.production_year > 2009 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /job/22c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /job/22d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /job/23a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_us_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind = 'complete+verified' AND cn.country_code = '[us]' AND it1.info = 'release dates' AND kt.kind in ('movie') AND mi.note like '%internet%' AND mi.info is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/23b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_nerdy_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind = 'complete+verified' AND cn.country_code = '[us]' AND it1.info = 'release dates' AND k.keyword in ('nerd', 'loner', 'alienation', 'dignity') AND kt.kind in ('movie') AND mi.note like '%internet%' AND mi.info like 'USA:% 200%' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/23c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_us_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind = 'complete+verified' AND cn.country_code = '[us]' AND it1.info = 'release dates' AND kt.kind in ('movie', 'tv movie', 'video movie', 'video game') AND mi.note like '%internet%' AND mi.info is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year > 1990 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/24a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress_name, MIN(t.title) AS voiced_action_movie_jap_eng FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND k.keyword in ('hero', 'martial-arts', 'hand-to-hand-combat') AND mi.info is not null and (mi.info like 'Japan:%201%' or mi.info like 'USA:%201%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND ci.movie_id = mk.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /job/24b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress_name, MIN(t.title) AS kung_fu_panda FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND cn.name = 'DreamWorks Animation' AND it.info = 'release dates' AND k.keyword in ('hero', 'martial-arts', 'hand-to-hand-combat', 'computer-animated-movie') AND mi.info is not null and (mi.info like 'Japan:%201%' or mi.info like 'USA:%201%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year > 2010 AND t.title like 'Kung Fu Panda%' AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND ci.movie_id = mk.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /job/25a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'blood', 'gore', 'death', 'female-nudity') AND mi.info = 'Horror' AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /job/25b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'blood', 'gore', 'death', 'female-nudity') AND mi.info = 'Horror' AND n.gender = 'm' AND t.production_year > 2010 AND t.title like 'Vampire%' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /job/25c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /job/26a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(n.name) AS playing_actor, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info = 'rating' AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind = 'movie' AND mi_idx.info > '7.0' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/26b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info = 'rating' AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'fight') AND kt.kind = 'movie' AND mi_idx.info > '8.0' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/26c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info = 'rating' AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind = 'movie' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/27a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind = 'complete' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Germany','Swedish', 'German') AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id; 2 | -------------------------------------------------------------------------------- /job/27b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind = 'complete' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Germany','Swedish', 'German') AND t.production_year = 1998 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id; 2 | -------------------------------------------------------------------------------- /job/27c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like 'complete%' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'English') AND t.production_year BETWEEN 1950 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id; 2 | -------------------------------------------------------------------------------- /job/28a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind = 'crew' AND cct2.kind != 'complete+verified' AND cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/28b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind = 'crew' AND cct2.kind != 'complete+verified' AND cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Germany', 'Swedish', 'German') AND mi_idx.info > '6.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/28c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind = 'cast' AND cct2.kind = 'complete' AND cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/29a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind ='cast' AND cct2.kind ='complete+verified' AND chn.name = 'Queen' AND ci.note in ('(voice)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND it3.info = 'trivia' AND k.keyword = 'computer-animation' AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.title = 'Shrek 2' AND t.production_year between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/29b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind ='cast' AND cct2.kind ='complete+verified' AND chn.name = 'Queen' AND ci.note in ('(voice)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND it3.info = 'height' AND k.keyword = 'computer-animation' AND mi.info like 'USA:%200%' AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.title = 'Shrek 2' AND t.production_year between 2000 and 2005 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/29c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind ='cast' AND cct2.kind ='complete+verified' AND ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND it3.info = 'trivia' AND k.keyword = 'computer-animation' AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/2a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[de]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/2b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[nl]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/2c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[sm]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/2d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /job/30a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_violent_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind ='complete+verified' AND ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.production_year > 2000 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/30b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_gore_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind ='complete+verified' AND ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.production_year > 2000 and (t.title like '%Freddy%' or t.title like '%Jason%' or t.title like 'Saw%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/30c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_violent_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind ='complete+verified' AND ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /job/31a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name like 'Lionsgate%' AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /job/31b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name like 'Lionsgate%' AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mc.note like '%(Blu-ray)%' AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.production_year > 2000 and (t.title like '%Freddy%' or t.title like '%Jason%' or t.title like 'Saw%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /job/31c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name like 'Lionsgate%' AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /job/32a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(lt.link) AS link_type, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM keyword AS k, link_type AS lt, movie_keyword AS mk, movie_link AS ml, title AS t1, title AS t2 WHERE k.keyword ='10,000-mile-club' AND mk.keyword_id = k.id AND t1.id = mk.movie_id AND ml.movie_id = t1.id AND ml.linked_movie_id = t2.id AND lt.id = ml.link_type_id AND mk.movie_id = t1.id; 2 | -------------------------------------------------------------------------------- /job/32b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(lt.link) AS link_type, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM keyword AS k, link_type AS lt, movie_keyword AS mk, movie_link AS ml, title AS t1, title AS t2 WHERE k.keyword ='character-name-in-title' AND mk.keyword_id = k.id AND t1.id = mk.movie_id AND ml.movie_id = t1.id AND ml.linked_movie_id = t2.id AND lt.id = ml.link_type_id AND mk.movie_id = t1.id; 2 | -------------------------------------------------------------------------------- /job/33a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code = '[us]' AND it1.info = 'rating' AND it2.info = 'rating' AND kt1.kind in ('tv series') AND kt2.kind in ('tv series') AND lt.link in ('sequel', 'follows', 'followed by') AND mi_idx2.info < '3.0' AND t2.production_year between 2005 and 2008 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id; 2 | -------------------------------------------------------------------------------- /job/33b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code = '[nl]' AND it1.info = 'rating' AND it2.info = 'rating' AND kt1.kind in ('tv series') AND kt2.kind in ('tv series') AND lt.link LIKE '%follow%' AND mi_idx2.info < '3.0' AND t2.production_year = 2007 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id; 2 | -------------------------------------------------------------------------------- /job/33c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code != '[us]' AND it1.info = 'rating' AND it2.info = 'rating' AND kt1.kind in ('tv series', 'episode') AND kt2.kind in ('tv series', 'episode') AND lt.link in ('sequel', 'follows', 'followed by') AND mi_idx2.info < '3.5' AND t2.production_year between 2000 and 2010 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id; 2 | -------------------------------------------------------------------------------- /job/3a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword like '%sequel%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year > 2005 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /job/3b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword like '%sequel%' AND mi.info IN ('Bulgaria') AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /job/3c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword like '%sequel%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND t.production_year > 1990 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /job/4a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword like '%sequel%' AND mi_idx.info > '5.0' AND t.production_year > 2005 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/4b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword like '%sequel%' AND mi_idx.info > '9.0' AND t.production_year > 2010 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/4c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword like '%sequel%' AND mi_idx.info > '2.0' AND t.production_year > 1990 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /job/5a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS typical_european_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind = 'production companies' AND mc.note like '%(theatrical)%' and mc.note like '%(France)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year > 2005 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id; 2 | -------------------------------------------------------------------------------- /job/5b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS american_vhs_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind = 'production companies' AND mc.note like '%(VHS)%' and mc.note like '%(USA)%' and mc.note like '%(1994)%' AND mi.info IN ('USA', 'America') AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id; 2 | -------------------------------------------------------------------------------- /job/5c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS american_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind = 'production companies' AND mc.note not like '%(TV)%' and mc.note like '%(USA)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND t.production_year > 1990 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id; 2 | -------------------------------------------------------------------------------- /job/6a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2010 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/6b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2014 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/6c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2014 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/6d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/6e.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/6f.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/7a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS of_person, MIN(t.title) AS biography_movie FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name LIKE '%a%' AND it.info ='mini biography' AND lt.link ='features' AND n.name_pcode_cf BETWEEN 'A' AND 'F' AND (n.gender='m' OR (n.gender = 'f' AND n.name LIKE 'B%')) AND pi.note ='Volker Boehm' AND t.production_year BETWEEN 1980 AND 1995 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id; 2 | -------------------------------------------------------------------------------- /job/7b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS of_person, MIN(t.title) AS biography_movie FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name LIKE '%a%' AND it.info ='mini biography' AND lt.link ='features' AND n.name_pcode_cf LIKE 'D%' AND n.gender='m' AND pi.note ='Volker Boehm' AND t.production_year BETWEEN 1980 AND 1984 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id; 2 | -------------------------------------------------------------------------------- /job/7c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS cast_member_name, MIN(pi.info) AS cast_member_info FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name is not NULL and (an.name LIKE '%a%' or an.name LIKE 'A%') AND it.info ='mini biography' AND lt.link in ('references', 'referenced in', 'features', 'featured in') AND n.name_pcode_cf BETWEEN 'A' AND 'F' AND (n.gender='m' OR (n.gender = 'f' AND n.name LIKE 'A%')) AND pi.note is not NULL AND t.production_year BETWEEN 1980 AND 2010 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id; 2 | -------------------------------------------------------------------------------- /job/8a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an1.name) AS actress_pseudonym, MIN(t.title) AS japanese_movie_dubbed FROM aka_name AS an1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE ci.note ='(voice: English version)' AND cn.country_code ='[jp]' AND mc.note like '%(Japan)%' and mc.note not like '%(USA)%' AND n1.name like '%Yo%' and n1.name not like '%Yu%' AND rt.role ='actress' AND an1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an1.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/8b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS acress_pseudonym, MIN(t.title) AS japanese_anime_movie FROM aka_name AS an, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note ='(voice: English version)' AND cn.country_code ='[jp]' AND mc.note like '%(Japan)%' and mc.note not like '%(USA)%' and (mc.note like '%(2006)%' or mc.note like '%(2007)%') AND n.name like '%Yo%' and n.name not like '%Yu%' AND rt.role ='actress' AND t.production_year between 2006 and 2007 and (t.title like 'One Piece%' or t.title like 'Dragon Ball Z%') AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/8c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(a1.name) AS writer_pseudo_name, MIN(t.title) AS movie_title FROM aka_name AS a1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE cn.country_code ='[us]' AND rt.role ='writer' AND a1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND a1.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/8d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an1.name) AS costume_designer_pseudo, MIN(t.title) AS movie_with_costumes FROM aka_name AS an1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE cn.country_code ='[us]' AND rt.role ='costume designer' AND an1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an1.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /job/9a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS character_name, MIN(t.title) AS movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND mc.note is not NULL and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND n.gender ='f' and n.name like '%Ang%' AND rt.role ='actress' AND t.production_year between 2005 and 2015 AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/9b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_character, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note = '(voice)' AND cn.country_code ='[us]' AND mc.note like '%(200%)%' and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND n.gender ='f' and n.name like '%Angel%' AND rt.role ='actress' AND t.production_year between 2007 and 2010 AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/9c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_character_name, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/9d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND n.gender ='f' AND rt.role ='actress' AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /job/README: -------------------------------------------------------------------------------- 1 | This package contains the Join Order Benchmark (JOB) queries from: 2 | 3 | "How Good Are Query Optimizers, Really?" 4 | by Viktor Leis, Andrey Gubichev, Atans Mirchev, Peter Boncz, Alfons Kemper, Thomas Neumann 5 | PVLDB Volume 9, No. 3, 2015 6 | 7 | IMDB Data Set 8 | ------------- 9 | 10 | The CSV files used in the paper, which are from May 2013, can be found 11 | at http://homepages.cwi.nl/~boncz/job/imdb.tgz 12 | 13 | The license and links to the current version IMDB data set can be 14 | found at http://www.imdb.com/interfaces 15 | Step-by-step instructions: 16 | 1. download *gz files (unpacking not necessary) 17 | wget ftp://ftp.fu-berlin.de/pub/misc/movies/database/*gz 18 | 2. download and unpack imdbpy and the imdbpy2sql.py script 19 | wget https://bitbucket.org/alberanid/imdbpy/get/5.0.zip 20 | 3. create PostgreSQL database (e.g., name imdbload): 21 | createdb imdbload 22 | 4. transform *gz files to relational schema (takes a while) 23 | imdbpy2sql.py -d PATH_TO_GZ_FILES -u postgres://username:password@hostname/imdbload 24 | 25 | Now you should have a PostgreSQL database named "imdbload" with the 26 | imdb data. Note that this database has some secondary indexes (but not 27 | on all foreign key attributes). You can export all tables to CSV: 28 | 29 | \copy aka_name to 'PATH/aka_name.csv' csv 30 | \copy aka_title to 'PATH/aka_title.csv' csv 31 | \copy cast_info to 'PATH/cast_info.csv' csv 32 | \copy char_name to 'PATH/char_name.csv' csv 33 | \copy comp_cast_type to 'PATH/comp_cast_type.csv' csv 34 | \copy company_name to 'PATH/company_name.csv' csv 35 | \copy company_type to 'PATH/company_type.csv' csv 36 | \copy complete_cast to 'PATH/complete_cast.csv' csv 37 | \copy info_type to 'PATH/info_type.csv' csv 38 | \copy keyword to 'PATH/keyword.csv' csv 39 | \copy kind_type to 'PATH/kind_type.csv' csv 40 | \copy link_type to 'PATH/link_type.csv' csv 41 | \copy movie_companies to 'PATH/movie_companies.csv' csv 42 | \copy movie_info to 'PATH/movie_info.csv' csv 43 | \copy movie_info_idx to 'PATH/movie_info_idx.csv' csv 44 | \copy movie_keyword to 'PATH/movie_keyword.csv' csv 45 | \copy movie_link to 'PATH/movie_link.csv' csv 46 | \copy name to 'PATH/name.csv' csv 47 | \copy person_info to 'PATH/person_info.csv' csv 48 | \copy role_type to 'PATH/role_type.csv' csv 49 | \copy title to 'PATH/title.csv' csv 50 | 51 | To import the CSV files to another database, create all tables (see 52 | schema.sql and optionally fkindexes.sql) and run the same copy as 53 | above statements but replace the keyword "to" by "from". 54 | 55 | Questions 56 | --------- 57 | 58 | Contact Viktor Leis (leis@in.tum.de) if you have any questions. 59 | -------------------------------------------------------------------------------- /job/fkindexes.sql: -------------------------------------------------------------------------------- 1 | create index company_id_movie_companies on movie_companies(company_id); 2 | create index company_type_id_movie_companies on movie_companies(company_type_id); 3 | create index info_type_id_movie_info_idx on movie_info_idx(info_type_id); 4 | create index info_type_id_movie_info on movie_info(info_type_id); 5 | create index info_type_id_person_info on person_info(info_type_id); 6 | create index keyword_id_movie_keyword on movie_keyword(keyword_id); 7 | create index kind_id_aka_title on aka_title(kind_id); 8 | create index kind_id_title on title(kind_id); 9 | create index linked_movie_id_movie_link on movie_link(linked_movie_id); 10 | create index link_type_id_movie_link on movie_link(link_type_id); 11 | create index movie_id_aka_title on aka_title(movie_id); 12 | create index movie_id_cast_info on cast_info(movie_id); 13 | create index movie_id_complete_cast on complete_cast(movie_id); 14 | create index movie_id_movie_companies on movie_companies(movie_id); 15 | create index movie_id_movie_info_idx on movie_info_idx(movie_id); 16 | create index movie_id_movie_keyword on movie_keyword(movie_id); 17 | create index movie_id_movie_link on movie_link(movie_id); 18 | create index movie_id_movie_info on movie_info(movie_id); 19 | create index person_id_aka_name on aka_name(person_id); 20 | create index person_id_cast_info on cast_info(person_id); 21 | create index person_id_person_info on person_info(person_id); 22 | create index person_role_id_cast_info on cast_info(person_role_id); 23 | create index role_id_cast_info on cast_info(role_id); 24 | -------------------------------------------------------------------------------- /job/schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE aka_name ( 2 | id integer NOT NULL PRIMARY KEY, 3 | person_id integer NOT NULL, 4 | name text NOT NULL, 5 | imdb_index character varying(12), 6 | name_pcode_cf character varying(5), 7 | name_pcode_nf character varying(5), 8 | surname_pcode character varying(5), 9 | md5sum character varying(32) 10 | ); 11 | 12 | CREATE TABLE aka_title ( 13 | id integer NOT NULL PRIMARY KEY, 14 | movie_id integer NOT NULL, 15 | title text NOT NULL, 16 | imdb_index character varying(12), 17 | kind_id integer NOT NULL, 18 | production_year integer, 19 | phonetic_code character varying(5), 20 | episode_of_id integer, 21 | season_nr integer, 22 | episode_nr integer, 23 | note text, 24 | md5sum character varying(32) 25 | ); 26 | 27 | CREATE TABLE cast_info ( 28 | id integer NOT NULL PRIMARY KEY, 29 | person_id integer NOT NULL, 30 | movie_id integer NOT NULL, 31 | person_role_id integer, 32 | note text, 33 | nr_order integer, 34 | role_id integer NOT NULL 35 | ); 36 | 37 | CREATE TABLE char_name ( 38 | id integer NOT NULL PRIMARY KEY, 39 | name text NOT NULL, 40 | imdb_index character varying(12), 41 | imdb_id integer, 42 | name_pcode_nf character varying(5), 43 | surname_pcode character varying(5), 44 | md5sum character varying(32) 45 | ); 46 | 47 | CREATE TABLE comp_cast_type ( 48 | id integer NOT NULL PRIMARY KEY, 49 | kind character varying(32) NOT NULL 50 | ); 51 | 52 | CREATE TABLE company_name ( 53 | id integer NOT NULL PRIMARY KEY, 54 | name text NOT NULL, 55 | country_code character varying(255), 56 | imdb_id integer, 57 | name_pcode_nf character varying(5), 58 | name_pcode_sf character varying(5), 59 | md5sum character varying(32) 60 | ); 61 | 62 | CREATE TABLE company_type ( 63 | id integer NOT NULL PRIMARY KEY, 64 | kind character varying(32) NOT NULL 65 | ); 66 | 67 | CREATE TABLE complete_cast ( 68 | id integer NOT NULL PRIMARY KEY, 69 | movie_id integer, 70 | subject_id integer NOT NULL, 71 | status_id integer NOT NULL 72 | ); 73 | 74 | CREATE TABLE info_type ( 75 | id integer NOT NULL PRIMARY KEY, 76 | info character varying(32) NOT NULL 77 | ); 78 | 79 | CREATE TABLE keyword ( 80 | id integer NOT NULL PRIMARY KEY, 81 | keyword text NOT NULL, 82 | phonetic_code character varying(5) 83 | ); 84 | 85 | CREATE TABLE kind_type ( 86 | id integer NOT NULL PRIMARY KEY, 87 | kind character varying(15) NOT NULL 88 | ); 89 | 90 | CREATE TABLE link_type ( 91 | id integer NOT NULL PRIMARY KEY, 92 | link character varying(32) NOT NULL 93 | ); 94 | 95 | CREATE TABLE movie_companies ( 96 | id integer NOT NULL PRIMARY KEY, 97 | movie_id integer NOT NULL, 98 | company_id integer NOT NULL, 99 | company_type_id integer NOT NULL, 100 | note text 101 | ); 102 | 103 | CREATE TABLE movie_info ( 104 | id integer NOT NULL PRIMARY KEY, 105 | movie_id integer NOT NULL, 106 | info_type_id integer NOT NULL, 107 | info text NOT NULL, 108 | note text 109 | ); 110 | 111 | CREATE TABLE movie_info_idx ( 112 | id integer NOT NULL PRIMARY KEY, 113 | movie_id integer NOT NULL, 114 | info_type_id integer NOT NULL, 115 | info text NOT NULL, 116 | note text 117 | ); 118 | 119 | CREATE TABLE movie_keyword ( 120 | id integer NOT NULL PRIMARY KEY, 121 | movie_id integer NOT NULL, 122 | keyword_id integer NOT NULL 123 | ); 124 | 125 | CREATE TABLE movie_link ( 126 | id integer NOT NULL PRIMARY KEY, 127 | movie_id integer NOT NULL, 128 | linked_movie_id integer NOT NULL, 129 | link_type_id integer NOT NULL 130 | ); 131 | 132 | CREATE TABLE name ( 133 | id integer NOT NULL PRIMARY KEY, 134 | name text NOT NULL, 135 | imdb_index character varying(12), 136 | imdb_id integer, 137 | gender character varying(1), 138 | name_pcode_cf character varying(5), 139 | name_pcode_nf character varying(5), 140 | surname_pcode character varying(5), 141 | md5sum character varying(32) 142 | ); 143 | 144 | CREATE TABLE person_info ( 145 | id integer NOT NULL PRIMARY KEY, 146 | person_id integer NOT NULL, 147 | info_type_id integer NOT NULL, 148 | info text NOT NULL, 149 | note text 150 | ); 151 | 152 | CREATE TABLE role_type ( 153 | id integer NOT NULL PRIMARY KEY, 154 | role character varying(32) NOT NULL 155 | ); 156 | 157 | CREATE TABLE title ( 158 | id integer NOT NULL PRIMARY KEY, 159 | title text NOT NULL, 160 | imdb_index character varying(12), 161 | kind_id integer NOT NULL, 162 | production_year integer, 163 | imdb_id integer, 164 | phonetic_code character varying(5), 165 | episode_of_id integer, 166 | season_nr integer, 167 | episode_nr integer, 168 | series_years character varying(49), 169 | md5sum character varying(32) 170 | ); 171 | -------------------------------------------------------------------------------- /src/csv_parser.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | CSVParser::Error CSVParser::execute(const char* buffer, size_t len) { 4 | size_t i = 0; 5 | if (this->escaping_) { 6 | if (this->escape_ == '"') { 7 | if (buffer[0] == '"') { 8 | ++i; 9 | this->current_field_.push_back('"'); 10 | } else { 11 | this->quoted_ = false; 12 | } 13 | } else { 14 | char c = buffer[0]; 15 | if (c == '"' or c == this->escape_) { 16 | this->current_field_.push_back(c); 17 | ++i; 18 | } else { 19 | this->current_field_.push_back(this->escape_); 20 | } 21 | } 22 | this->escaping_ = false; 23 | } 24 | if (this->newlining_) { 25 | if (len > 0 and buffer[0] == '\n') { 26 | ++i; 27 | } 28 | if (this->has_trailing_comma_) { 29 | if (not this->after_field_sep_) { 30 | return NoTrailingComma; 31 | } 32 | if (not this->after_first_row_) { 33 | this->after_first_row_ = true; 34 | this->num_cols_ = this->col_idx_; 35 | } else [[likely]] { 36 | if (this->col_idx_ != this->num_cols_) { 37 | return InconsistentColumns; 38 | } 39 | } 40 | } else { 41 | if (not this->after_first_row_) { 42 | this->after_first_row_ = true; 43 | this->num_cols_ = this->col_idx_ + 1; 44 | } else [[likely]] { 45 | if (this->col_idx_ + 1 != this->num_cols_) { 46 | return InconsistentColumns; 47 | } 48 | } 49 | this->on_field(this->col_idx_, 50 | this->row_idx_, 51 | this->current_field_.data(), 52 | this->current_field_.size()); 53 | this->current_field_.clear(); 54 | } 55 | this->col_idx_ = 0; 56 | ++this->row_idx_; 57 | this->after_record_sep_ = true; 58 | this->newlining_ = false; 59 | } 60 | for (; i < len; ++i) { 61 | bool set_after_record_sep = false; 62 | bool set_after_field_sep = false; 63 | char c = buffer[i]; 64 | if (c != this->comma_ and c != '\n' and c != '\r' and c != '"' and c != this->escape_) 65 | [[likely]] { 66 | this->current_field_.push_back(c); 67 | } else if (c == this->comma_) { 68 | if (not this->quoted_) [[likely]] { 69 | this->on_field(this->col_idx_, 70 | this->row_idx_, 71 | this->current_field_.data(), 72 | this->current_field_.size()); 73 | this->current_field_.clear(); 74 | ++this->col_idx_; 75 | set_after_field_sep = true; 76 | } else { 77 | this->current_field_.push_back(c); 78 | } 79 | } else if (c == '\n' or c == '\r') { 80 | if (not this->quoted_) [[likely]] { 81 | if (c == '\r') { 82 | if (i + 1 == len) { 83 | this->newlining_ = true; 84 | return Ok; 85 | } 86 | if (buffer[i + 1] == '\n') { 87 | ++i; 88 | } 89 | } 90 | if (this->has_trailing_comma_) { 91 | if (not this->after_field_sep_) { 92 | return NoTrailingComma; 93 | } 94 | if (not this->after_first_row_) { 95 | this->after_first_row_ = true; 96 | this->num_cols_ = this->col_idx_; 97 | } else [[likely]] { 98 | if (this->col_idx_ != this->num_cols_) { 99 | return InconsistentColumns; 100 | } 101 | } 102 | } else { 103 | if (not this->after_first_row_) { 104 | this->after_first_row_ = true; 105 | this->num_cols_ = this->col_idx_ + 1; 106 | } else [[likely]] { 107 | if (this->col_idx_ + 1 != this->num_cols_) { 108 | return InconsistentColumns; 109 | } 110 | } 111 | this->on_field(this->col_idx_, 112 | this->row_idx_, 113 | this->current_field_.data(), 114 | this->current_field_.size()); 115 | this->current_field_.clear(); 116 | } 117 | this->col_idx_ = 0; 118 | ++this->row_idx_; 119 | set_after_record_sep = true; 120 | } else { 121 | this->current_field_.push_back(c); 122 | } 123 | } else if (c == '"') { 124 | if (this->escape_ == '"') { 125 | if (not this->quoted_) { 126 | this->quoted_ = true; 127 | } else { 128 | if (i + 1 == len) { 129 | this->escaping_ = true; 130 | return Ok; 131 | } 132 | if (buffer[i + 1] == '"') { 133 | ++i; 134 | this->current_field_.push_back(c); 135 | } else { 136 | this->quoted_ = false; 137 | } 138 | } 139 | } else { 140 | this->quoted_ = not this->quoted_; 141 | } 142 | } else { 143 | if (this->quoted_) [[likely]] { 144 | if (i + 1 == len) { 145 | this->escaping_ = true; 146 | return Ok; 147 | } 148 | char c = buffer[i + 1]; 149 | if (c == '"' or c == this->escape_) { 150 | this->current_field_.push_back(c); 151 | ++i; 152 | } else { 153 | this->current_field_.push_back(this->escape_); 154 | } 155 | } else { 156 | this->current_field_.push_back(c); 157 | } 158 | } 159 | this->after_field_sep_ = set_after_field_sep; 160 | this->after_record_sep_ = set_after_record_sep; 161 | } 162 | return Ok; 163 | } 164 | 165 | CSVParser::Error CSVParser::finish() { 166 | if (this->quoted_) { 167 | return QuoteNotClosed; 168 | } else if (this->newlining_) { 169 | return this->execute("", 0); 170 | } else if (not this->after_record_sep_) { 171 | return this->execute("\n", 1); 172 | } else { 173 | return Ok; 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /src/execute.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace Contest { 6 | 7 | using ExecuteResult = std::vector>; 8 | 9 | ExecuteResult execute_impl(const Plan& plan, size_t node_idx); 10 | 11 | struct JoinAlgorithm { 12 | bool build_left; 13 | ExecuteResult& left; 14 | ExecuteResult& right; 15 | ExecuteResult& results; 16 | size_t left_col, right_col; 17 | const std::vector>& output_attrs; 18 | 19 | template 20 | auto run() { 21 | namespace views = ranges::views; 22 | std::unordered_map> hash_table; 23 | if (build_left) { 24 | for (auto&& [idx, record]: left | views::enumerate) { 25 | std::visit( 26 | [&hash_table, idx = idx](const auto& key) { 27 | using Tk = std::decay_t; 28 | if constexpr (std::is_same_v) { 29 | if (auto itr = hash_table.find(key); itr == hash_table.end()) { 30 | hash_table.emplace(key, std::vector(1, idx)); 31 | } else { 32 | itr->second.push_back(idx); 33 | } 34 | } else if constexpr (not std::is_same_v) { 35 | throw std::runtime_error("wrong type of field"); 36 | } 37 | }, 38 | record[left_col]); 39 | } 40 | for (auto& right_record: right) { 41 | std::visit( 42 | [&](const auto& key) { 43 | using Tk = std::decay_t; 44 | if constexpr (std::is_same_v) { 45 | if (auto itr = hash_table.find(key); itr != hash_table.end()) { 46 | for (auto left_idx: itr->second) { 47 | auto& left_record = left[left_idx]; 48 | std::vector new_record; 49 | new_record.reserve(output_attrs.size()); 50 | for (auto [col_idx, _]: output_attrs) { 51 | if (col_idx < left_record.size()) { 52 | new_record.emplace_back(left_record[col_idx]); 53 | } else { 54 | new_record.emplace_back( 55 | right_record[col_idx - left_record.size()]); 56 | } 57 | } 58 | results.emplace_back(std::move(new_record)); 59 | } 60 | } 61 | } else if constexpr (not std::is_same_v) { 62 | throw std::runtime_error("wrong type of field"); 63 | } 64 | }, 65 | right_record[right_col]); 66 | } 67 | } else { 68 | for (auto&& [idx, record]: right | views::enumerate) { 69 | std::visit( 70 | [&hash_table, idx = idx](const auto& key) { 71 | using Tk = std::decay_t; 72 | if constexpr (std::is_same_v) { 73 | if (auto itr = hash_table.find(key); itr == hash_table.end()) { 74 | hash_table.emplace(key, std::vector(1, idx)); 75 | } else { 76 | itr->second.push_back(idx); 77 | } 78 | } else if constexpr (not std::is_same_v) { 79 | throw std::runtime_error("wrong type of field"); 80 | } 81 | }, 82 | record[right_col]); 83 | } 84 | for (auto& left_record: left) { 85 | std::visit( 86 | [&](const auto& key) { 87 | using Tk = std::decay_t; 88 | if constexpr (std::is_same_v) { 89 | if (auto itr = hash_table.find(key); itr != hash_table.end()) { 90 | for (auto right_idx: itr->second) { 91 | auto& right_record = right[right_idx]; 92 | std::vector new_record; 93 | new_record.reserve(output_attrs.size()); 94 | for (auto [col_idx, _]: output_attrs) { 95 | if (col_idx < left_record.size()) { 96 | new_record.emplace_back(left_record[col_idx]); 97 | } else { 98 | new_record.emplace_back( 99 | right_record[col_idx - left_record.size()]); 100 | } 101 | } 102 | results.emplace_back(std::move(new_record)); 103 | } 104 | } 105 | } else if constexpr (not std::is_same_v) { 106 | throw std::runtime_error("wrong type of field"); 107 | } 108 | }, 109 | left_record[left_col]); 110 | } 111 | } 112 | } 113 | }; 114 | 115 | ExecuteResult execute_hash_join(const Plan& plan, 116 | const JoinNode& join, 117 | const std::vector>& output_attrs) { 118 | auto left_idx = join.left; 119 | auto right_idx = join.right; 120 | auto& left_node = plan.nodes[left_idx]; 121 | auto& right_node = plan.nodes[right_idx]; 122 | auto& left_types = left_node.output_attrs; 123 | auto& right_types = right_node.output_attrs; 124 | auto left = execute_impl(plan, left_idx); 125 | auto right = execute_impl(plan, right_idx); 126 | std::vector> results; 127 | 128 | JoinAlgorithm join_algorithm{.build_left = join.build_left, 129 | .left = left, 130 | .right = right, 131 | .results = results, 132 | .left_col = join.left_attr, 133 | .right_col = join.right_attr, 134 | .output_attrs = output_attrs}; 135 | if (join.build_left) { 136 | switch (std::get<1>(left_types[join.left_attr])) { 137 | case DataType::INT32: join_algorithm.run(); break; 138 | case DataType::INT64: join_algorithm.run(); break; 139 | case DataType::FP64: join_algorithm.run(); break; 140 | case DataType::VARCHAR: join_algorithm.run(); break; 141 | } 142 | } else { 143 | switch (std::get<1>(right_types[join.right_attr])) { 144 | case DataType::INT32: join_algorithm.run(); break; 145 | case DataType::INT64: join_algorithm.run(); break; 146 | case DataType::FP64: join_algorithm.run(); break; 147 | case DataType::VARCHAR: join_algorithm.run(); break; 148 | } 149 | } 150 | 151 | return results; 152 | } 153 | 154 | ExecuteResult execute_scan(const Plan& plan, 155 | const ScanNode& scan, 156 | const std::vector>& output_attrs) { 157 | auto table_id = scan.base_table_id; 158 | auto& input = plan.inputs[table_id]; 159 | auto table = Table::from_columnar(input); 160 | std::vector> results; 161 | for (auto& record: table.table()) { 162 | std::vector new_record; 163 | new_record.reserve(output_attrs.size()); 164 | for (auto [col_idx, _]: output_attrs) { 165 | new_record.emplace_back(record[col_idx]); 166 | } 167 | results.emplace_back(std::move(new_record)); 168 | } 169 | return results; 170 | } 171 | 172 | ExecuteResult execute_impl(const Plan& plan, size_t node_idx) { 173 | auto& node = plan.nodes[node_idx]; 174 | return std::visit( 175 | [&](const auto& value) { 176 | using T = std::decay_t; 177 | if constexpr (std::is_same_v) { 178 | return execute_hash_join(plan, value, node.output_attrs); 179 | } else { 180 | return execute_scan(plan, value, node.output_attrs); 181 | } 182 | }, 183 | node.data); 184 | } 185 | 186 | ColumnarTable execute(const Plan& plan, [[maybe_unused]] void* context) { 187 | namespace views = ranges::views; 188 | auto ret = execute_impl(plan, plan.root); 189 | auto ret_types = plan.nodes[plan.root].output_attrs 190 | | views::transform([](const auto& v) { return std::get<1>(v); }) 191 | | ranges::to>(); 192 | Table table{std::move(ret), std::move(ret_types)}; 193 | return table.to_columnar(); 194 | } 195 | 196 | void* build_context() { 197 | return nullptr; 198 | } 199 | 200 | void destroy_context([[maybe_unused]] void* context) {} 201 | 202 | } // namespace Contest 203 | -------------------------------------------------------------------------------- /src/statement.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | std::vector bitmap_not(std::vector bitmap) { 9 | auto task = [&bitmap](size_t begin, size_t end) { 10 | for (size_t i = begin; i < end; ++i) { 11 | bitmap[i] = ~bitmap[i]; 12 | } 13 | }; 14 | filter_tp.run(task, bitmap.size()); 15 | return bitmap; 16 | } 17 | 18 | std::vector bitmap_and(const std::vector& lhs, 19 | const std::vector& rhs) { 20 | std::vector ret; 21 | assert(lhs.size() == rhs.size()); 22 | ret.resize(lhs.size()); 23 | auto task = [&lhs, &rhs, &ret](size_t begin, size_t end) { 24 | for (size_t i = begin; i < end; ++i) { 25 | ret[i] = lhs[i] & rhs[i]; 26 | } 27 | }; 28 | filter_tp.run(task, lhs.size()); 29 | return ret; 30 | } 31 | 32 | std::vector bitmap_or(const std::vector& lhs, 33 | const std::vector& rhs) { 34 | std::vector ret; 35 | assert(lhs.size() == rhs.size()); 36 | ret.resize(lhs.size()); 37 | auto task = [&lhs, &rhs, &ret](size_t begin, size_t end) { 38 | for (size_t i = begin; i < end; ++i) { 39 | ret[i] = lhs[i] | rhs[i]; 40 | } 41 | }; 42 | filter_tp.run(task, lhs.size()); 43 | return ret; 44 | } 45 | 46 | std::vector Comparison::eval(const std::vector& table) const { 47 | auto* c = table[column]; 48 | switch (c->type) { 49 | case DataType::INT32: { 50 | auto column = reinterpret_cast*>(c); 51 | if (op == IS_NULL) { 52 | return bitmap_not(column->bitmap); 53 | } else if (op == IS_NOT_NULL) { 54 | return column->bitmap; 55 | } else { 56 | auto comp_value = static_cast(std::get(value)); 57 | switch (op) { 58 | case EQ: return column->equal(comp_value); 59 | case NEQ: return column->not_equal(comp_value); 60 | case LT: return column->less(comp_value); 61 | case GT: return column->greater(comp_value); 62 | case LEQ: return column->less_equal(comp_value); 63 | case GEQ: return column->greater_equal(comp_value); 64 | default: unreachable(); 65 | } 66 | } 67 | break; 68 | } 69 | case DataType::INT64: { 70 | auto column = reinterpret_cast*>(c); 71 | if (op == IS_NULL) { 72 | return bitmap_not(column->bitmap); 73 | } else if (op == IS_NOT_NULL) { 74 | return column->bitmap; 75 | } else { 76 | auto comp_value = std::get(value); 77 | switch (op) { 78 | case EQ: return column->equal(comp_value); 79 | case NEQ: return column->not_equal(comp_value); 80 | case LT: return column->less(comp_value); 81 | case GT: return column->greater(comp_value); 82 | case LEQ: return column->less_equal(comp_value); 83 | case GEQ: return column->greater_equal(comp_value); 84 | default: unreachable(); 85 | } 86 | } 87 | break; 88 | } 89 | case DataType::FP64: { 90 | auto column = reinterpret_cast*>(c); 91 | if (op == IS_NULL) { 92 | return bitmap_not(column->bitmap); 93 | } else if (op == IS_NOT_NULL) { 94 | return column->bitmap; 95 | } else { 96 | auto comp_value = std::get(value); 97 | switch (op) { 98 | case EQ: return column->equal(comp_value); 99 | case NEQ: return column->not_equal(comp_value); 100 | case LT: return column->less(comp_value); 101 | case GT: return column->greater(comp_value); 102 | case LEQ: return column->less_equal(comp_value); 103 | case GEQ: return column->greater_equal(comp_value); 104 | default: unreachable(); 105 | } 106 | } 107 | break; 108 | } 109 | case DataType::VARCHAR: { 110 | auto column = reinterpret_cast*>(c); 111 | if (op == IS_NULL) { 112 | return bitmap_not(column->bitmap); 113 | } else if (op == IS_NOT_NULL) { 114 | return column->bitmap; 115 | } else { 116 | auto& comp_value = std::get(value); 117 | switch (op) { 118 | case EQ: return column->equal(comp_value); 119 | case NEQ: return column->not_equal(comp_value); 120 | case LT: return column->less(comp_value); 121 | case GT: return column->greater(comp_value); 122 | case LEQ: return column->less_equal(comp_value); 123 | case GEQ: return column->greater_equal(comp_value); 124 | case LIKE: return column->like(comp_value); 125 | case NOT_LIKE: return column->not_like(comp_value); 126 | default: unreachable(); 127 | } 128 | } 129 | break; 130 | } 131 | } 132 | unreachable(); 133 | } 134 | 135 | bool Comparison::eval(const std::vector& record) const { 136 | const Data& record_data = record[column]; 137 | const auto& comp_value = value; 138 | 139 | switch (op) { 140 | case IS_NULL: return std::holds_alternative(record_data); 141 | case IS_NOT_NULL: return !std::holds_alternative(record_data); 142 | default: break; 143 | } 144 | 145 | if (op == LIKE || op == NOT_LIKE) { 146 | const std::string* record_str = std::get_if(&record_data); 147 | const std::string* comp_str = std::get_if(&comp_value); 148 | if (!record_str || !comp_str) { 149 | return false; 150 | } 151 | bool match = like_match(*record_str, *comp_str); 152 | return (op == LIKE) ? match : !match; 153 | } else { 154 | auto record_num = get_numeric_value(record_data); 155 | auto comp_num = get_numeric_value(comp_value); 156 | if (record_num.has_value() && comp_num.has_value()) { 157 | switch (op) { 158 | case EQ: return *record_num == *comp_num; 159 | case NEQ: return *record_num != *comp_num; 160 | case LT: return *record_num < *comp_num; 161 | case GT: return *record_num > *comp_num; 162 | case LEQ: return *record_num <= *comp_num; 163 | case GEQ: return *record_num >= *comp_num; 164 | default: return false; 165 | } 166 | } else { 167 | const std::string* record_str = std::get_if(&record_data); 168 | const std::string* comp_str = std::get_if(&comp_value); 169 | if (record_str && comp_str) { 170 | switch (op) { 171 | case EQ: return *record_str == *comp_str; 172 | case NEQ: return *record_str != *comp_str; 173 | case LT: return *record_str < *comp_str; 174 | case GT: return *record_str > *comp_str; 175 | case LEQ: return *record_str <= *comp_str; 176 | case GEQ: return *record_str >= *comp_str; 177 | default: return false; 178 | } 179 | } else { 180 | return false; 181 | } 182 | } 183 | } 184 | } 185 | 186 | std::vector LogicalOperation::eval( 187 | const std::vector& table) const { 188 | switch (op_type) { 189 | case AND: { 190 | return bitmap_and(children[0]->eval(table), children[1]->eval(table)); 191 | } 192 | case OR: { 193 | return bitmap_or(children[0]->eval(table), children[1]->eval(table)); 194 | } 195 | case NOT: { 196 | return bitmap_not(children[0]->eval(table)); 197 | } 198 | } 199 | unreachable(); 200 | } 201 | 202 | bool LogicalOperation::eval(const std::vector& record) const { 203 | switch (op_type) { 204 | case AND: { 205 | for (const auto& child: children) { 206 | if (!child->eval(record)) { 207 | return false; 208 | } 209 | } 210 | return true; 211 | } 212 | case OR: { 213 | for (const auto& child: children) { 214 | if (child->eval(record)) { 215 | return true; 216 | } 217 | } 218 | return false; 219 | } 220 | case NOT: { 221 | if (children.size() != 1) { 222 | return false; 223 | } 224 | return !children[0]->eval(record); 225 | } 226 | default: return false; 227 | } 228 | } 229 | -------------------------------------------------------------------------------- /tests/build_database.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int main(int argc, char* argv[]) { 7 | using namespace duckdb; 8 | namespace fs = std::filesystem; 9 | 10 | if (argc < 2) { 11 | fmt::println(stderr, "Usage: {} ", argv[0]); 12 | exit(EXIT_FAILURE); 13 | } 14 | 15 | auto schema = read_file(fs::path("job") / "schema.sql"); 16 | 17 | DuckDB db(argv[1]); 18 | Connection conn(db); 19 | auto result = conn.Query(schema); 20 | if (result->HasError()) { 21 | fmt::println("Error: {}", result->GetError()); 22 | } 23 | 24 | std::vector table_names{ 25 | "char_name", 26 | "kind_type", 27 | "cast_info", 28 | "movie_companies", 29 | "role_type", 30 | "complete_cast", 31 | "comp_cast_type", 32 | "company_name", 33 | "company_type", 34 | "movie_link", 35 | "movie_keyword", 36 | "name", 37 | "info_type", 38 | "movie_info_idx", 39 | "person_info", 40 | "link_type", 41 | "title", 42 | "aka_name", 43 | "movie_info", 44 | "keyword", 45 | "aka_title", 46 | }; 47 | 48 | for (auto& table: table_names) { 49 | result = 50 | conn.Query(fmt::format("COPY {0} FROM 'imdb/{0}.csv' (ESCAPE '\\');", table)); 51 | if (result->HasError()) { 52 | fmt::println("Error: {}", result->GetError()); 53 | } else { 54 | fmt::println("Successfully loaded table {} into {}", table, argv[1]); 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /tests/unit_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | void sort(std::vector>& table) { 7 | std::sort(table.begin(), table.end()); 8 | } 9 | 10 | TEST_CASE("Empty join", "[join]") { 11 | Plan plan; 12 | plan.new_scan_node(0, {{0, DataType::INT32}}); 13 | plan.new_scan_node(1, {{0, DataType::INT32}}); 14 | plan.new_join_node(true, 0, 1, 0, 0, {{0, DataType::INT32}, {1, DataType::INT32}}); 15 | ColumnarTable table1, table2; 16 | table1.columns.emplace_back(DataType::INT32); 17 | table2.columns.emplace_back(DataType::INT32); 18 | plan.inputs.emplace_back(std::move(table1)); 19 | plan.inputs.emplace_back(std::move(table2)); 20 | plan.root = 2; 21 | auto* context = Contest::build_context(); 22 | auto result = Contest::execute(plan, context); 23 | Contest::destroy_context(context); 24 | REQUIRE(result.num_rows == 0); 25 | REQUIRE(result.columns.size() == 2); 26 | REQUIRE(result.columns[0].type == DataType::INT32); 27 | REQUIRE(result.columns[1].type == DataType::INT32); 28 | } 29 | 30 | TEST_CASE("One line join", "[join]") { 31 | Plan plan; 32 | plan.new_scan_node(0, {{0, DataType::INT32}}); 33 | plan.new_scan_node(1, {{0, DataType::INT32}}); 34 | plan.new_join_node(true, 0, 1, 0, 0, {{0, DataType::INT32}, {1, DataType::INT32}}); 35 | std::vector> data{ 36 | {1, }, 37 | }; 38 | std::vector types{DataType::INT32}; 39 | Table table(std::move(data), std::move(types)); 40 | ColumnarTable table1 = table.to_columnar(); 41 | ColumnarTable table2 = table.to_columnar(); 42 | plan.inputs.emplace_back(std::move(table1)); 43 | plan.inputs.emplace_back(std::move(table2)); 44 | plan.root = 2; 45 | auto* context = Contest::build_context(); 46 | auto result = Contest::execute(plan, context); 47 | Contest::destroy_context(context); 48 | REQUIRE(result.num_rows == 1); 49 | REQUIRE(result.columns.size() == 2); 50 | REQUIRE(result.columns[0].type == DataType::INT32); 51 | REQUIRE(result.columns[1].type == DataType::INT32); 52 | auto result_table = Table::from_columnar(result); 53 | std::vector> ground_truth{ 54 | {1, 1,}, 55 | }; 56 | REQUIRE(result_table.table() == ground_truth); 57 | } 58 | 59 | TEST_CASE("Simple join", "[join]") { 60 | Plan plan; 61 | plan.new_scan_node(0, {{0, DataType::INT32}}); 62 | plan.new_scan_node(1, {{0, DataType::INT32}}); 63 | plan.new_join_node(true, 0, 1, 0, 0, {{0, DataType::INT32}, {1, DataType::INT32}}); 64 | std::vector> data{ 65 | {1,}, 66 | {2,}, 67 | {3,}, 68 | }; 69 | std::vector types{DataType::INT32}; 70 | Table table(std::move(data), std::move(types)); 71 | ColumnarTable table1 = table.to_columnar(); 72 | ColumnarTable table2 = table.to_columnar(); 73 | plan.inputs.emplace_back(std::move(table1)); 74 | plan.inputs.emplace_back(std::move(table2)); 75 | plan.root = 2; 76 | auto* context = Contest::build_context(); 77 | auto result = Contest::execute(plan, context); 78 | Contest::destroy_context(context); 79 | REQUIRE(result.num_rows == 3); 80 | REQUIRE(result.columns.size() == 2); 81 | REQUIRE(result.columns[0].type == DataType::INT32); 82 | REQUIRE(result.columns[1].type == DataType::INT32); 83 | auto result_table = Table::from_columnar(result); 84 | std::vector> ground_truth{ 85 | {1, 1,}, 86 | {2, 2,}, 87 | {3, 3,}, 88 | }; 89 | sort(result_table.table()); 90 | REQUIRE(result_table.table() == ground_truth); 91 | } 92 | 93 | TEST_CASE("Empty Result", "[join]") { 94 | Plan plan; 95 | plan.new_scan_node(0, {{0, DataType::INT32}}); 96 | plan.new_scan_node(1, {{0, DataType::INT32}}); 97 | plan.new_join_node(true, 0, 1, 0, 0, {{0, DataType::INT32}, {1, DataType::INT32}}); 98 | std::vector> data1{ 99 | {1,}, 100 | {2,}, 101 | {3,}, 102 | }; 103 | std::vector> data2{ 104 | {4,}, 105 | {5,}, 106 | {6,}, 107 | }; 108 | std::vector types{DataType::INT32}; 109 | Table table1(std::move(data1), types); 110 | Table table2(std::move(data2), std::move(types)); 111 | ColumnarTable input1 = table1.to_columnar(); 112 | ColumnarTable input2 = table2.to_columnar(); 113 | plan.inputs.emplace_back(std::move(input1)); 114 | plan.inputs.emplace_back(std::move(input2)); 115 | plan.root = 2; 116 | auto* context = Contest::build_context(); 117 | auto result = Contest::execute(plan, context); 118 | Contest::destroy_context(context); 119 | REQUIRE(result.num_rows == 0); 120 | REQUIRE(result.columns.size() == 2); 121 | REQUIRE(result.columns[0].type == DataType::INT32); 122 | REQUIRE(result.columns[1].type == DataType::INT32); 123 | } 124 | 125 | TEST_CASE("Multiple same keys", "[join]") { 126 | Plan plan; 127 | plan.new_scan_node(0, {{0, DataType::INT32}}); 128 | plan.new_scan_node(1, {{0, DataType::INT32}}); 129 | plan.new_join_node(true, 0, 1, 0, 0, {{0, DataType::INT32}, {1, DataType::INT32}}); 130 | std::vector> data1{ 131 | {1,}, 132 | {1,}, 133 | {2,}, 134 | {3,}, 135 | }; 136 | std::vector types{DataType::INT32}; 137 | Table table1(std::move(data1), std::move(types)); 138 | ColumnarTable input1 = table1.to_columnar(); 139 | ColumnarTable input2 = table1.to_columnar(); 140 | plan.inputs.emplace_back(std::move(input1)); 141 | plan.inputs.emplace_back(std::move(input2)); 142 | plan.root = 2; 143 | auto* context = Contest::build_context(); 144 | auto result = Contest::execute(plan, context); 145 | Contest::destroy_context(context); 146 | REQUIRE(result.num_rows == 6); 147 | REQUIRE(result.columns.size() == 2); 148 | REQUIRE(result.columns[0].type == DataType::INT32); 149 | REQUIRE(result.columns[1].type == DataType::INT32); 150 | auto result_table = Table::from_columnar(result); 151 | std::vector> ground_truth{ 152 | {1, 1,}, 153 | {1, 1,}, 154 | {1, 1,}, 155 | {1, 1,}, 156 | {2, 2,}, 157 | {3, 3,}, 158 | }; 159 | sort(result_table.table()); 160 | REQUIRE(result_table.table() == ground_truth); 161 | } 162 | 163 | TEST_CASE("NULL keys", "[join]") { 164 | Plan plan; 165 | plan.new_scan_node(0, {{0, DataType::INT32}}); 166 | plan.new_scan_node(1, {{0, DataType::INT32}}); 167 | plan.new_join_node(true, 0, 1, 0, 0, {{0, DataType::INT32}, {1, DataType::INT32}}); 168 | std::vector> data1{ 169 | {1, }, 170 | {1, }, 171 | {std::monostate{},}, 172 | {2, }, 173 | {3, }, 174 | }; 175 | std::vector types{DataType::INT32}; 176 | Table table1(std::move(data1), std::move(types)); 177 | ColumnarTable input1 = table1.to_columnar(); 178 | ColumnarTable input2 = table1.to_columnar(); 179 | plan.inputs.emplace_back(std::move(input1)); 180 | plan.inputs.emplace_back(std::move(input2)); 181 | plan.root = 2; 182 | auto* context = Contest::build_context(); 183 | auto result = Contest::execute(plan, context); 184 | Contest::destroy_context(context); 185 | REQUIRE(result.num_rows == 6); 186 | REQUIRE(result.columns.size() == 2); 187 | REQUIRE(result.columns[0].type == DataType::INT32); 188 | REQUIRE(result.columns[1].type == DataType::INT32); 189 | auto result_table = Table::from_columnar(result); 190 | std::vector> ground_truth{ 191 | {1, 1,}, 192 | {1, 1,}, 193 | {1, 1,}, 194 | {1, 1,}, 195 | {2, 2,}, 196 | {3, 3,}, 197 | }; 198 | sort(result_table.table()); 199 | REQUIRE(result_table.table() == ground_truth); 200 | } 201 | 202 | TEST_CASE("Multiple columns", "[join]") { 203 | Plan plan; 204 | plan.new_scan_node(0, {{0, DataType::INT32}}); 205 | plan.new_scan_node(1, {{1, DataType::VARCHAR}, {0, DataType::INT32}}); 206 | plan.new_join_node(true, 0, 1, 0, 1, {{0, DataType::INT32}, {2, DataType::INT32}, {1, DataType::VARCHAR}}); 207 | using namespace std::string_literals; 208 | std::vector> data1{ 209 | {1 , "xxx"s,}, 210 | {1 , "yyy"s,}, 211 | {std::monostate{}, "zzz"s,}, 212 | {2 , "uuu"s,}, 213 | {3 , "vvv"s,}, 214 | }; 215 | std::vector types{DataType::INT32, DataType::VARCHAR}; 216 | Table table1(std::move(data1), std::move(types)); 217 | ColumnarTable input1 = table1.to_columnar(); 218 | ColumnarTable input2 = table1.to_columnar(); 219 | plan.inputs.emplace_back(std::move(input1)); 220 | plan.inputs.emplace_back(std::move(input2)); 221 | plan.root = 2; 222 | auto* context = Contest::build_context(); 223 | auto result = Contest::execute(plan, context); 224 | Contest::destroy_context(context); 225 | REQUIRE(result.num_rows == 6); 226 | REQUIRE(result.columns.size() == 3); 227 | REQUIRE(result.columns[0].type == DataType::INT32); 228 | REQUIRE(result.columns[1].type == DataType::INT32); 229 | REQUIRE(result.columns[2].type == DataType::VARCHAR); 230 | auto result_table = Table::from_columnar(result); 231 | std::vector> ground_truth{ 232 | {1, 1, "xxx"s}, 233 | {1, 1, "xxx"s}, 234 | {1, 1, "yyy"s}, 235 | {1, 1, "yyy"s}, 236 | {2, 2, "uuu"s}, 237 | {3, 3, "vvv"s}, 238 | }; 239 | sort(result_table.table()); 240 | REQUIRE(result_table.table() == ground_truth); 241 | } 242 | 243 | TEST_CASE("Build on right", "[join]") { 244 | Plan plan; 245 | plan.new_scan_node(0, {{0, DataType::INT32}}); 246 | plan.new_scan_node(1, {{1, DataType::VARCHAR}, {0, DataType::INT32}}); 247 | plan.new_join_node(false, 0, 1, 0, 1, {{0, DataType::INT32}, {2, DataType::INT32}, {1, DataType::VARCHAR}}); 248 | using namespace std::string_literals; 249 | std::vector> data1{ 250 | {1 , "xxx"s,}, 251 | {1 , "yyy"s,}, 252 | {std::monostate{}, "zzz"s,}, 253 | {2 , "uuu"s,}, 254 | {3 , "vvv"s,}, 255 | }; 256 | std::vector types{DataType::INT32, DataType::VARCHAR}; 257 | Table table1(std::move(data1), std::move(types)); 258 | ColumnarTable input1 = table1.to_columnar(); 259 | ColumnarTable input2 = table1.to_columnar(); 260 | plan.inputs.emplace_back(std::move(input1)); 261 | plan.inputs.emplace_back(std::move(input2)); 262 | plan.root = 2; 263 | auto* context = Contest::build_context(); 264 | auto result = Contest::execute(plan, context); 265 | Contest::destroy_context(context); 266 | REQUIRE(result.num_rows == 6); 267 | REQUIRE(result.columns.size() == 3); 268 | REQUIRE(result.columns[0].type == DataType::INT32); 269 | REQUIRE(result.columns[1].type == DataType::INT32); 270 | REQUIRE(result.columns[2].type == DataType::VARCHAR); 271 | auto result_table = Table::from_columnar(result); 272 | std::vector> ground_truth{ 273 | {1, 1, "xxx"s}, 274 | {1, 1, "xxx"s}, 275 | {1, 1, "yyy"s}, 276 | {1, 1, "yyy"s}, 277 | {2, 2, "uuu"s}, 278 | {3, 3, "vvv"s}, 279 | }; 280 | sort(result_table.table()); 281 | REQUIRE(result_table.table() == ground_truth); 282 | } 283 | --------------------------------------------------------------------------------