├── .github ├── ISSUE_TEMPLATE │ ├── 100_feature_request.yml │ ├── 200_bug_report.yml │ ├── 300_RFC.yml │ └── 400_explain_discrepancy_report.yml └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── LICENSE.GPL-2.0 └── LICENSE.MIT ├── LICENSES.md ├── README.md ├── README_zh.md ├── build ├── Dockerfile.build_env ├── Dockerfile.videx ├── build.sh ├── build_videx.sh ├── config.sh ├── init_server.sh ├── my.cnf └── start_server.sh ├── data ├── tpch_sf1 │ ├── explain_tpch_sf1_q21_innodb.json │ ├── explain_tpch_sf1_q21_videx.json │ └── videx_metadata_tpch_sf1.json └── tpch_tiny │ ├── explain_tpch_tiny_q21_innodb.json │ ├── explain_tpch_tiny_q21_innodb_57.json │ ├── explain_tpch_tiny_q21_videx.json │ ├── explain_tpch_tiny_q21_videx_57.json │ ├── tpch_tiny.sql.tar.gz │ └── videx_metadata_tpch_tiny.json ├── doc ├── explain_tpch_sf1_compare.png ├── explain_tpch_tiny_compare.png ├── explain_tpch_tiny_compare_alter_index.png ├── explain_tpch_tiny_mysql57_compare.png ├── explain_tpch_tiny_table_for_mysql57.png ├── installation.md ├── installation_zh.md └── videx-structure.png ├── pyproject.toml ├── requirements.txt ├── src ├── mysql │ └── videx │ │ ├── CMakeLists.txt │ │ ├── ha_videx.cc │ │ ├── ha_videx.h │ │ ├── videx_json_item.cc │ │ ├── videx_json_item.h │ │ ├── videx_log_utils.cc │ │ └── videx_log_utils.h └── sub_platforms │ ├── __init__.py │ └── sql_opt │ ├── __init__.py │ ├── column_statastics │ ├── __init__.py │ └── statistics_info.py │ ├── common │ ├── __init__.py │ ├── db_variable.py │ ├── exceptions.py │ ├── pydantic_utils.py │ ├── sample_file_info.py │ └── sample_info.py │ ├── databases │ ├── __init__.py │ └── mysql │ │ ├── __init__.py │ │ ├── common_operation.py │ │ ├── explain_result.py │ │ └── mysql_command.py │ ├── env │ ├── __init__.py │ └── rds_env.py │ ├── histogram │ ├── __init__.py │ ├── histogram_utils.py │ └── ndv_estimator.py │ ├── meta.py │ ├── sql_opt_utils │ ├── __init__.py │ └── sqlbrain_constants.py │ └── videx │ ├── __init__.py │ ├── common │ ├── __init__.py │ └── estimate_stats_length.py │ ├── model │ ├── __init__.py │ ├── videx_model_example.py │ ├── videx_model_innodb.py │ └── videx_strategy.py │ ├── scripts │ ├── __init__.py │ ├── analyze │ │ ├── __init__.py │ │ ├── analyze_delete_rows.py │ │ ├── analyze_linear_distribution.py │ │ └── analyze_trace_utils.py │ ├── clear_mysql57_env.sql │ ├── setup_mysql57_env.sql │ ├── start_videx_server.py │ └── videx_build_env.py │ ├── videx_histogram.py │ ├── videx_logging.py │ ├── videx_metadata.py │ ├── videx_mysql_utils.py │ ├── videx_service.py │ └── videx_utils.py └── test └── videx ├── __init__.py ├── data ├── test_imdbload_1024_b10 │ ├── videx_imdbload_histogram_b10.json │ ├── videx_imdbload_info_stats.json │ ├── videx_imdbload_ndv_mulcol.json │ └── videx_imdbload_ndv_single.json ├── test_info_item.json ├── test_info_item2.json ├── test_result_range_rows_gt.json ├── test_tpch_1024 │ ├── gt_rec_in_ranges_wo_idx_innodb.json │ ├── gt_req_resp.json │ ├── test_cases_tpch_rec_in_range_requests.json │ ├── videx_tpch_histogram.json │ ├── videx_tpch_info_stats.json │ ├── videx_tpch_info_stats_with_pct_cached.json │ ├── videx_tpch_ndv_mulcol.json │ └── videx_tpch_ndv_single.json ├── test_trace_range_rows_gt1.json ├── test_trace_range_rows_gt2.json ├── test_videx_meta_record_in_ranges_tpcc.json ├── test_videx_trace_check.json ├── tpch_64 │ ├── videx_tpch_histogram.json │ ├── videx_tpch_info_stats.json │ ├── videx_tpch_ndv_mulcol.json │ └── videx_tpch_ndv_single.json ├── videx_metadata_desc_index.json ├── videx_metadata_test_null_db.json └── whatif │ ├── join_trace.json │ ├── normal_union_trace.json │ └── uk_union_trace.json ├── test_desc_index.py ├── test_info_low.py ├── test_mulcol_ndv_singleton.py ├── test_rec_in_ranges_singleton.py ├── test_records_in_range.py ├── test_records_in_range_nullable.py └── test_videx_utils.py /.github/ISSUE_TEMPLATE/100_feature_request.yml: -------------------------------------------------------------------------------- 1 | name: "✨ Feature Request" 2 | description: Suggest new functionality for VIDEX engine 3 | labels: ["enhancement"] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: "## Feature Proposal Guidelines" 8 | - type: textarea 9 | attributes: 10 | label: "Feature Description and Motivation" 11 | description: | 12 | Describe what you want to achieve and why this is valuable. 13 | Example: "Add support for virtual indexes on JSON columns" 14 | placeholder: | 15 | I propose [feature summary] because [motivation]. 16 | This would help with [problem scenario] by [expected outcome]. 17 | validations: 18 | required: true 19 | - type: textarea 20 | attributes: 21 | label: "System Version" 22 | description: | 23 | Specify relevant environment details: 24 | - MySQL/Percona versions 25 | - Plugin/Standalone mode 26 | placeholder: | 27 | Target Versions: MySQL 8.0.34 28 | Deployment Mode: Standalone 29 | - type: textarea 30 | attributes: 31 | label: "Use Case Scenario" 32 | description: | 33 | Describe a real-world application or academic scenario 34 | placeholder: | 35 | In my environment with [dataset characteristics], 36 | this feature would enable [specific workflow]. 37 | validations: 38 | required: true 39 | - type: textarea 40 | attributes: 41 | label: "Proposed Implementation (Optional)" 42 | description: | 43 | Share technical suggestions if available 44 | placeholder: | 45 | Potential approaches could involve [technical strategy] 46 | leveraging [existing interfaces/components]. 47 | validations: 48 | required: false -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/200_bug_report.yml: -------------------------------------------------------------------------------- 1 | name: "🐛 Bug Report" 2 | description: Report unexpected behavior in VIDEX 3 | labels: ["bug"] 4 | body: 5 | - type: textarea 6 | attributes: 7 | label: "Environment Setup" 8 | description: | 9 | Required information for reproduction: 10 | placeholder: | 11 | VIDEX Version: [commit SHA / Release-x.y.z / tag] 12 | Database: [MySQL|Percona|Others] / 8.0.xx/5.7.xx 13 | Deployment Mode: Plugin/Standalone 14 | OS: Ubuntu xx / Debian xx / macOS xx / Others 15 | Container: Docker 24.0.6 (if applicable) 16 | Workload: JOB benchmark / TPC-H / Others 17 | validations: 18 | required: true 19 | - type: textarea 20 | attributes: 21 | label: "Observed vs Expected Behavior" 22 | description: | 23 | Include actual results, expected outcomes, and evidence: 24 | - Query plans 25 | - Error logs 26 | - Performance metrics 27 | placeholder: | 28 | [Describe test scenario] 29 | 30 | Actual Behavior: 31 | - VIDEX output: [result/plan/trace] 32 | - Error message: [if applicable] 33 | 34 | Expected Behavior: 35 | - MySQL output: [baseline result] 36 | - Expected plan: [correct behavior] 37 | 38 | Supporting Evidence: 39 | [Attach logs/scripts/screenshots] 40 | validations: 41 | required: true -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/300_RFC.yml: -------------------------------------------------------------------------------- 1 | name: "💡 Architecture Proposal" 2 | description: Propose significant design changes 3 | labels: ["RFC"] 4 | body: 5 | - type: textarea 6 | attributes: 7 | label: "Proposal Summary" 8 | description: | 9 | One-paragraph overview of the proposed change 10 | validations: 11 | required: true 12 | - type: textarea 13 | attributes: 14 | label: "Motivation and Impact" 15 | description: | 16 | Explain why this change is needed and affected components: 17 | [ ] Optimizer Plugin 18 | [ ] Statistic Server RESTFul API 19 | [ ] Statistic Server 20 | [ ] Algorithm Interfaces 21 | [ ] Algorithm Implementations 22 | [ ] Others 23 | placeholder: | 24 | This addresses [problem statement] affecting [component]. 25 | Will impact [specific workflow/scenario]. 26 | validations: 27 | required: true 28 | - type: textarea 29 | attributes: 30 | label: "Technical Plan" 31 | description: | 32 | Describe implementation strategy and alternatives 33 | placeholder: | 34 | Implementation Roadmap: 35 | 1. Phase 1: [Core changes] 36 | 2. Phase 2: [Interface updates] 37 | 38 | Alternative Approaches: 39 | - [Option 1] [Pros/Cons] 40 | - [Option 2] [Pros/Cons] -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/400_explain_discrepancy_report.yml: -------------------------------------------------------------------------------- 1 | name: "📊 EXPLAIN Discrepancy Report" 2 | description: Report differences between VIDEX and MySQL/Percona/MariaDB query plans 3 | labels: [ "discrepancy" ] 4 | body: 5 | - type: dropdown 6 | attributes: 7 | label: "VIDEX Mode" 8 | description: "Which VIDEX deployment mode are you using?" 9 | options: 10 | - Standalone Mode 11 | - Plugin Mode 12 | validations: 13 | required: true 14 | 15 | - type: input 16 | attributes: 17 | label: "Target Database" 18 | description: "Database type and version you're comparing against. e.g. MySQL 5.7, Percona 8.0.34" 19 | placeholder: "MySQL 8.0.34, Percona 5.7.42, MariaDB 10.6.12, etc." 20 | validations: 21 | required: true 22 | 23 | - type: textarea 24 | attributes: 25 | label: "Table Schema" 26 | description: "Provide the relevant table definitions/schemas" 27 | placeholder: | 28 | ```sql 29 | CREATE TABLE example ( 30 | id INT PRIMARY KEY, 31 | value VARCHAR(255), 32 | created_at DATETIME, 33 | KEY idx_created (created_at) 34 | ); 35 | ``` 36 | validations: 37 | required: true 38 | 39 | - type: textarea 40 | attributes: 41 | label: "SQL Query" 42 | description: "The SQL query showing different plans" 43 | placeholder: | 44 | ```sql 45 | SELECT * FROM example 46 | WHERE created_at > '2024-01-01' 47 | ORDER BY id LIMIT 10; 48 | ``` 49 | validations: 50 | required: true 51 | 52 | - type: textarea 53 | attributes: 54 | label: "EXPLAIN Results Comparison" 55 | description: "EXPLAIN FORMAT=JSON output from both VIDEX and the target database" 56 | placeholder: | 57 | **Target Database:** 58 | ```json 59 | [EXPLAIN FORMAT=JSON SQL] 60 | ``` 61 | 62 | **VIDEX:** 63 | ```json 64 | [EXPLAIN FORMAT=JSON SQL] 65 | ``` 66 | 67 | validations: 68 | required: true 69 | 70 | - type: textarea 71 | attributes: 72 | label: "Optimizer Trace Comparison [Optional]" 73 | description: "Provide optimizer trace output if available" 74 | placeholder: | 75 | Run following scripts on both systems, if supported: 76 | ```sql 77 | SET optimizer_trace="enabled=on"; 78 | SET optimizer_trace_max_mem_size=4294967295; 79 | EXPLAIN YOUR_SQL; 80 | SELECT trace FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE\G 81 | ``` 82 | 83 | validations: 84 | required: false 85 | 86 | - type: textarea 87 | attributes: 88 | label: "Demo Data Generation [Optional]" 89 | description: "Code to generate sample data that reproduces the issue (SQL procedure, shell script, Python code, or SQL file)" 90 | placeholder: | 91 | ```sql 92 | -- SQL procedure 93 | DELIMITER // 94 | CREATE PROCEDURE generate_test_data() 95 | BEGIN 96 | DECLARE i INT DEFAULT 1; 97 | WHILE i <= 1000 DO 98 | INSERT INTO example VALUES (i, CONCAT('val_', i), DATE_ADD('2023-01-01', INTERVAL i DAY)); 99 | SET i = i + 1; 100 | END WHILE; 101 | END // 102 | DELIMITER ; 103 | 104 | CALL generate_test_data(); 105 | ``` 106 | 107 | Or attach a data generation script (shell/Python/etc.) 108 | 109 | Or an Example SQL file to load data: 110 | INSERT INTO example (id, value, created_at) 111 | VALUES 112 | (1, 'test1', '2023-01-01'), 113 | (2, 'test2', '2023-02-01'), 114 | (3, 'test3', '2023-03-01'); 115 | 116 | validations: 117 | required: false 118 | 119 | - type: textarea 120 | attributes: 121 | label: "Additional Context [Optional]" 122 | description: "Any other information that might help diagnose the discrepancy" 123 | placeholder: | 124 | - Statistics differences 125 | - Indexes present in one system but not the other 126 | - Configuration differences 127 | - Expected behavior based on documentation 128 | validations: 129 | required: false -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Pull Request Summary 2 | 3 | [Provide a clear and concise summary of your changes (1-3 sentences)] 4 | 5 | ## Related Issues 6 | 7 | Resolves: #[Insert issue number(s)] 8 | 9 | ## Detailed Description 10 | 11 | [Explain your changes in detail, including: 12 | 13 | - What problem does this PR solve? 14 | - How does your solution work? 15 | - Any trade-offs or alternative approaches considered?] 16 | 17 | **Important: Before submitting, please complete the description above and review the checklist below.** 18 | 19 | --- 20 | 21 |
22 | Contribution Guidelines (Expand for Details) 23 | 24 |

We appreciate your contribution to VIDEX! To ensure a smooth review process and maintain high code quality, please adhere to the following guidelines:

25 | 26 |

Pull Request Title Format

27 |

Your PR title should start with one of these prefixes to indicate the nature of the change:

28 | 41 |

Note: For changes spanning multiple categories, use the most specific prefix or multiple prefixes in order of importance (e.g., [Algorithm][Stats]).

42 | 43 |

Submission Checklist

44 | 55 | 56 |

By submitting this PR, you confirm that you've read these guidelines and your changes align with the project's contribution standards.

57 | 58 |
59 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .DS_store 3 | *.iml 4 | .idea/workspace.xml 5 | .vscode 6 | .coverage 7 | __pycache__/ 8 | *.pyc 9 | span.log 10 | .DS_Store 11 | output/ 12 | *span.log 13 | *nohup.out 14 | *.ipynb 15 | large_data/ 16 | *.csv 17 | cache 18 | *.gzip 19 | .lock 20 | *.vscode 21 | *__pycache__/ 22 | *.log 23 | *.out 24 | videx.egg-info 25 | tpch_tiny.sql 26 | videx_metadata_tpch_tiny.json 27 | dist 28 | data -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | Including `Added`, `Changed`, `Fixed`, `Deprecated`, `Removed`, `Security`. 6 | 7 | ## [Unreleased] 8 | 9 | ### Added 10 | - refactor videx-server codes, more decoupling with SQLBrain. 11 | - Add a single-stack script to fetch metadata, mirror schema into VIDEX-MySQL, and import metadata into Videx-Server. 12 | 13 | ### Fixed 14 | - fix ndv calculation bugs when ndv information is missing. 15 | 16 | ### Use 17 | - Add an example metadata file based on TPC-H (scale-factor=1) for onboarding. 18 | - provide a simple videx model implementation: `VidexModelExample` for onboarding. 19 | 20 | ## [0.1.0] - 2025-02-12 21 | ### Added 22 | - VIDEX-MySQL plugin 23 | - VIDEX-Server basic code 24 | 25 | -------------------------------------------------------------------------------- /LICENSE/LICENSE.MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /LICENSES.md: -------------------------------------------------------------------------------- 1 | # Licensing Information 2 | 3 | This project contains software under two different licenses: 4 | 5 | 1. MySQL Engine Implementation 6 | - Licensed under GNU General Public License v2.0 7 | - Located in: src/mysql 8 | - SPDX-License-Identifier: GPL-2.0 9 | 10 | 2. Python Implementation 11 | - Licensed under MIT License 12 | - Located in: all other codes and scripts 13 | - SPDX-License-Identifier: MIT 14 | 15 | ## Notice 16 | - When using or distributing the MySQL engine components, you must comply with GPL-2.0 terms 17 | - The other components can be used under the more permissive MIT license 18 | - If you combine both components, the resulting work must comply with GPL-2.0 19 | 20 | For full license texts, see the LICENSE directory. -------------------------------------------------------------------------------- /build/Dockerfile.build_env: -------------------------------------------------------------------------------- 1 | # build GCC 2 | FROM debian:bullseye AS gcc-builder 3 | ENV GCC_VERSION=9.3.0 4 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ 5 | build-essential wget \ 6 | libgmp-dev libmpfr-dev libmpc-dev \ 7 | && apt-get clean && rm -rf /var/lib/apt/lists/* 8 | 9 | WORKDIR /build 10 | RUN wget https://mirrors.tuna.tsinghua.edu.cn/gnu/gcc/gcc-${GCC_VERSION}/gcc-${GCC_VERSION}.tar.gz \ 11 | && tar -xf gcc-${GCC_VERSION}.tar.gz \ 12 | && cd gcc-${GCC_VERSION} \ 13 | && mkdir build && cd build \ 14 | && ../configure \ 15 | --prefix=/usr/local/gcc-${GCC_VERSION} \ 16 | --disable-multilib \ 17 | --enable-languages=c,c++ \ 18 | --disable-bootstrap \ 19 | --disable-nls \ 20 | --disable-libsanitizer \ 21 | --disable-libvtv \ 22 | --disable-libssp \ 23 | --disable-libquadmath \ 24 | --disable-libgomp \ 25 | --disable-libada \ 26 | --disable-libstdcxx-pch \ 27 | && make -j $(nproc) \ 28 | && make install-strip DESTDIR=/gcc-install \ 29 | && cd /build && rm -rf * 30 | 31 | # build Bison 32 | FROM debian:bullseye AS bison-builder 33 | ENV BISON_VERSION=3.4.2 34 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ 35 | build-essential wget \ 36 | flex \ 37 | m4 \ 38 | && apt-get clean && rm -rf /var/lib/apt/lists/* 39 | 40 | WORKDIR /build 41 | RUN wget --no-check-certificate https://ftp.gnu.org/gnu/bison/bison-${BISON_VERSION}.tar.gz \ 42 | && tar -xf bison-${BISON_VERSION}.tar.gz \ 43 | && cd bison-${BISON_VERSION} \ 44 | && ./configure --prefix=/usr/local \ 45 | && make -j $(nproc) \ 46 | && make install DESTDIR=/bison-install \ 47 | && cd /build && rm -rf * 48 | 49 | # final stage 50 | FROM debian:bullseye 51 | COPY --from=gcc-builder /gcc-install/usr/local/gcc-9.3.0 /usr/local/gcc-9.3.0 52 | COPY --from=bison-builder /bison-install/usr/local/bin/bison /usr/local/bin/bison 53 | COPY --from=bison-builder /bison-install/usr/local/lib/liby.* /usr/local/lib/ 54 | COPY --from=bison-builder /bison-install/usr/local/share/bison /usr/local/share/bison 55 | 56 | # online required part 57 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ 58 | cmake \ 59 | curl \ 60 | python3.9 \ 61 | python3.9-dev \ 62 | python3.9-venv \ 63 | libgmp10 \ 64 | libmpfr6 \ 65 | libmpc3 \ 66 | m4 \ 67 | pkg-config \ 68 | git \ 69 | git-lfs \ 70 | libssl-dev \ 71 | libreadline-dev \ 72 | zlib1g-dev \ 73 | libcurl4-openssl-dev \ 74 | libldap2-dev \ 75 | libsasl2-dev \ 76 | libsasl2-modules-gssapi-mit \ 77 | libkrb5-dev \ 78 | libnuma-dev \ 79 | libmecab-dev \ 80 | libaio-dev \ 81 | libncurses-dev \ 82 | libtirpc-dev \ 83 | && apt-get clean && rm -rf /var/lib/apt/lists/* 84 | 85 | # 配置 Python 和其他工具 86 | RUN ln -sf /usr/local/gcc-9.3.0/bin/gcc /usr/local/bin/gcc \ 87 | && ln -sf /usr/bin/python3.9 /usr/bin/python3 \ 88 | && ln -sf /usr/bin/python3.9 /usr/bin/python \ 89 | && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py \ 90 | && python3.9 get-pip.py \ 91 | && rm get-pip.py 92 | 93 | # validation 94 | RUN gcc --version | grep "9.3.0" \ 95 | && bison --version \ 96 | && curl --version | grep "curl" \ 97 | && python3.9 --version | grep "3.9" \ 98 | && cmake --version | grep "cmake version" \ 99 | && echo "All version checks passed!" 100 | 101 | ENV PATH=/usr/local/gcc-9.3.0/bin:/opt/tiger/typhoon-blade:/opt/common_tools:$PATH 102 | ENV LD_LIBRARY_PATH=/usr/local/gcc-9.3.0/lib64 -------------------------------------------------------------------------------- /build/Dockerfile.videx: -------------------------------------------------------------------------------- 1 | # Build MySQL 2 | FROM videx_build:latest AS builder 3 | 4 | WORKDIR /root 5 | 6 | # copy MySQL source files 7 | COPY mysql_server /root/mysql_server 8 | COPY videx_server/build/config.sh /root/videx_server/build/ 9 | COPY videx_server/src/mysql/videx /root/videx_server/src/mysql/videx 10 | COPY videx_server/build/build.sh /root/videx_server/build/ 11 | 12 | # build 13 | WORKDIR /root/videx_server/build 14 | RUN chmod +x *.sh && \ 15 | ./build.sh 16 | 17 | # collect dependency 18 | RUN mkdir -p /root/mysql_server/mysql_build_output/lib64 && \ 19 | cp /usr/local/gcc-9.3.0/lib64/libstdc++.so* /root/mysql_server/mysql_build_output/lib64/ && \ 20 | cp /usr/local/gcc-9.3.0/lib64/libgcc_s.so* /root/mysql_server/mysql_build_output/lib64/ 21 | 22 | # Build final 23 | FROM videx_build:latest 24 | 25 | WORKDIR /root 26 | 27 | COPY --from=builder /root/mysql_server/mysql_build_output /root/mysql_server/mysql_build_output 28 | COPY videx_server /root/videx_server 29 | 30 | WORKDIR /root/videx_server/build 31 | 32 | RUN chmod +x *.sh 33 | 34 | RUN ./init_server.sh 35 | 36 | EXPOSE 13308 5001 37 | 38 | CMD ["./start_server.sh"] -------------------------------------------------------------------------------- /build/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | source "${SCRIPT_DIR}/config.sh" 4 | 5 | # Error handling 6 | set -e # Exit on error 7 | set -x # Print commands for debugging 8 | 9 | if [ -d "$MYSQL_HOME/storage/videx" ]; then 10 | echo "Deleting existing $MYSQL_HOME/storage/videx directory..." 11 | rm -rf "$MYSQL_HOME/storage/videx" 12 | fi 13 | 14 | echo "Copying $VIDEX_HOME/src/mysql/videx to $MYSQL_HOME/storage..." 15 | cp -r "$VIDEX_HOME/src/mysql/videx" "$MYSQL_HOME/storage" 16 | 17 | BOOST_DIR=$MYSQL_HOME/boost 18 | 19 | # Clean previous build if exists 20 | if [ -d "$MYSQL_BUILD_DIR" ]; then 21 | echo "Cleaning previous build directory..." 22 | rm -rf "$MYSQL_BUILD_DIR" 23 | fi 24 | 25 | # Create necessary directories 26 | mkdir -p "$BOOST_DIR" 27 | mkdir -p "$MYSQL_BUILD_DIR"/{etc,build,lib64} 28 | 29 | # Change to MySQL source directory 30 | cd "$MYSQL_BUILD_DIR" 31 | 32 | # Configure MySQL build with CMake 33 | # -DWITH_DEBUG=OFF: Disable debug build 34 | # -DCMAKE_BUILD_TYPE=Release: Build release version 35 | # -DBUILD_CONFIG=mysql_release: Use release configuration 36 | # -DFEATURE_SET=community: Build community edition 37 | # -DCMAKE_INSTALL_PREFIX: Set installation directory 38 | # -DMYSQL_DATADIR: Set data directory 39 | # -DSYSCONFDIR: Set configuration directory 40 | # -DWITH_BOOST: Specify boost directory 41 | # -DDOWNLOAD_BOOST: Automatically download boost if needed 42 | cmake .. \ 43 | -B./build \ 44 | -DWITH_DEBUG=OFF \ 45 | -DCMAKE_BUILD_TYPE=Release \ 46 | -DBUILD_CONFIG=mysql_release \ 47 | -DFEATURE_SET=community \ 48 | -DCMAKE_INSTALL_PREFIX=. \ 49 | -DMYSQL_DATADIR=./data \ 50 | -DSYSCONFDIR=./etc \ 51 | -DWITH_BOOST="$BOOST_DIR" \ 52 | -DDOWNLOAD_BOOST=ON \ 53 | -DWITH_ROCKSDB=OFF \ 54 | -DDOWNLOAD_BOOST_TIMEOUT=3600 \ 55 | -DWITH_VIDEX_STORAGE_ENGINE=1 56 | 57 | # Build MySQL server (mysqld) 58 | echo "Building MySQL server..." 59 | cmake --build build --target mysqld -- -j "$(nproc)" 60 | 61 | # Build MySQL client 62 | echo "Building MySQL client..." 63 | cmake --build build --target mysql -- -j "$(nproc)" 64 | 65 | # build videx 66 | cmake --build build --target videx -- -j "$(nproc)" 67 | 68 | # Check if build was successful 69 | if [ ! -f "build/runtime_output_directory/mysqld" ]; then 70 | echo "Error: MySQL server build failed!" 71 | exit 1 72 | fi 73 | 74 | if [ ! -f "build/runtime_output_directory/mysql" ]; then 75 | echo "Error: MySQL client build failed!" 76 | exit 1 77 | fi 78 | 79 | # Copy necessary libraries and scripts 80 | echo "Copying libraries and scripts..." 81 | # cp /usr/local/gcc-9.3.0/lib64/* "$MYSQL_BUILD_DIR/lib64/" 82 | # cp ../build/scripts/* "$MYSQL_BUILD_DIR/" 83 | 84 | # Add MySQL and mysqld to PATH environment variable 85 | echo "Adding MySQL client and server to PATH..." 86 | MYSQL_BIN_DIR="$MYSQL_BUILD_DIR/build/runtime_output_directory/" 87 | 88 | if [ -d "$MYSQL_BIN_DIR" ]; then 89 | export PATH="$MYSQL_BIN_DIR:$PATH" 90 | echo "export PATH=\"$MYSQL_BIN_DIR:\$PATH\"" >> ~/.bashrc 91 | echo "MySQL binaries added to PATH. Please restart your shell or run 'source ~/.bashrc' to apply." 92 | else 93 | echo "Error: MySQL binary directory not found! Ensure the build was successful." 94 | exit 1 95 | fi 96 | 97 | echo "MySQL build completed successfully!" 98 | echo "Build directory: $MYSQL_BUILD_DIR" -------------------------------------------------------------------------------- /build/build_videx.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | source "${SCRIPT_DIR}/config.sh" 4 | 5 | # Error handling 6 | set -e # Exit on error 7 | set -x # Print commands for debugging 8 | 9 | if [ -d "$MYSQL_HOME/storage/videx" ]; then 10 | echo "Deleting existing $MYSQL_HOME/storage/videx directory..." 11 | rm -rf "$MYSQL_HOME/storage/videx" 12 | fi 13 | 14 | echo "Copying $VIDEX_HOME/src/mysql/videx to $MYSQL_HOME/storage..." 15 | cp -r "$VIDEX_HOME/src/mysql/videx" "$MYSQL_HOME/storage" 16 | 17 | BOOST_DIR=$MYSQL_HOME/boost 18 | 19 | # Clean previous build 20 | if [ -d "$MYSQL_BUILD_DIR" ]; then 21 | echo "Cleaning previous build directory..." 22 | rm -rf "$MYSQL_BUILD_DIR" 23 | fi 24 | 25 | # Create necessary directories 26 | mkdir -p "$BOOST_DIR" 27 | mkdir -p "$MYSQL_BUILD_DIR"/{etc,build,lib64} 28 | 29 | # Change to MySQL source directory 30 | cd "$MYSQL_BUILD_DIR" 31 | 32 | 33 | cmake .. \ 34 | -B./build \ 35 | -DWITH_DEBUG=OFF \ 36 | -DCMAKE_BUILD_TYPE=Release \ 37 | -DBUILD_CONFIG=mysql_release \ 38 | -DFEATURE_SET=community \ 39 | -DCMAKE_INSTALL_PREFIX=. \ 40 | -DMYSQL_DATADIR=./data \ 41 | -DSYSCONFDIR=./etc \ 42 | -DWITH_BOOST="$BOOST_DIR" \ 43 | -DDOWNLOAD_BOOST=ON \ 44 | -DWITH_ROCKSDB=OFF \ 45 | -DDOWNLOAD_BOOST_TIMEOUT=3600 \ 46 | -DWITH_VIDEX_STORAGE_ENGINE=1 \ 47 | -DPLUGIN_VIDEX=DYNAMIC 48 | 49 | echo "Building MySQL server..." 50 | cmake --build build --target videx -- -j "$(nproc)" 51 | -------------------------------------------------------------------------------- /build/config.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Global configurations 3 | export MYSQL_HOME=/root/mysql_server 4 | export VIDEX_HOME=/root/videx_server 5 | export MYSQL_BUILD_DIR=$MYSQL_HOME/mysql_build_output 6 | export MYSQL_PORT=13308 7 | export VIDEX_PORT=5001 8 | export LD_LIBRARY_PATH=$MYSQL_BUILD_DIR/lib64:$MYSQL_BUILD_DIR/build/plugin_output_directory:$MYSQL_BUILD_DIR/build/library_output_directory:$LD_LIBRARY_PATH 9 | export MYSQL_LOG=/var/log/mysql.log 10 | export VIDEX_LOG=/var/log/videx.log 11 | -------------------------------------------------------------------------------- /build/init_server.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 4 | source "${SCRIPT_DIR}/config.sh" 5 | 6 | set -e 7 | set -x 8 | # Copy configuration file 9 | if [ -f "$SCRIPT_DIR/my.cnf" ]; then 10 | cp "$SCRIPT_DIR/my.cnf" "$MYSQL_BUILD_DIR/etc/my.cnf" 11 | else 12 | echo "Warning: my.cnf not found!" 13 | exit 1 14 | fi 15 | 16 | [ ! -d "$MYSQL_BUILD_DIR" ] && echo "MySQL build directory not found!" && exit 1 17 | [ ! -f "$MYSQL_BUILD_DIR/etc/my.cnf" ] && echo "my.cnf not found!" && exit 1 18 | 19 | cd $MYSQL_BUILD_DIR 20 | 21 | # Clean previous build if exists 22 | if [ -d ./data ]; then 23 | echo "Cleaning data..." 24 | rm -rf ./data 25 | fi 26 | 27 | mkdir -p ./data 28 | mkdir -p ./log 29 | 30 | 31 | echo "Starting initialization process..." > $MYSQL_LOG 32 | 33 | ./build/runtime_output_directory/mysqld --defaults-file=./etc/my.cnf --initialize-insecure --user=root --basedir="$MYSQL_BUILD_DIR" --datadir=./data || exit 1 34 | 35 | 36 | # 启动 MySQL 服务 37 | echo "Starting MySQL server..." 38 | ./build/runtime_output_directory/mysqld --defaults-file=./etc/my.cnf --user=root --basedir="$MYSQL_BUILD_DIR" --datadir=./data --socket=./mysql_80.sock --port=$MYSQL_PORT & 39 | MYSQL_PID=$! # 获取 MySQL 进程的 PID 40 | 41 | # 等待 MySQL 服务启动完成 42 | echo "Waiting for MySQL to be ready..." 43 | for i in {1..30}; do 44 | ./build/runtime_output_directory/mysql -h127.0.0.1 -uroot -P$MYSQL_PORT -e "SELECT 1" && break 45 | sleep 2 46 | done 47 | 48 | # 检查 MySQL 是否启动成功 49 | if ! ./build/runtime_output_directory/mysql -h127.0.0.1 -uroot -P$MYSQL_PORT -e "SELECT 1"; then 50 | echo "MySQL failed to start." 51 | kill $MYSQL_PID 52 | exit 1 53 | fi 54 | 55 | # 创建用户 videx 56 | echo "Creating user videx..." 57 | echo "CREATE USER 'videx'@'%' IDENTIFIED WITH mysql_native_password BY 'password'; GRANT ALL ON *.* TO 'videx'@'%'; FLUSH PRIVILEGES;" | \ 58 | ./build/runtime_output_directory/mysql -h127.0.0.1 -uroot -P$MYSQL_PORT 59 | 60 | if [ $? -eq 0 ]; then 61 | echo "User videx created successfully" 62 | else 63 | echo "Failed to create user videx!" 64 | kill $MYSQL_PID 65 | exit 1 66 | fi 67 | 68 | # 直接杀死 MySQL 进程 69 | echo "Shutting down MySQL server..." 70 | kill $MYSQL_PID 71 | 72 | # 确保进程被成功终止 73 | for i in {1..10}; do 74 | if ps -p $MYSQL_PID > /dev/null; then 75 | echo "Waiting for MySQL to shut down..." 76 | sleep 1 77 | else 78 | echo "MySQL server stopped successfully." 79 | break 80 | fi 81 | done 82 | 83 | # 如果进程仍未停止,强制杀死 84 | if ps -p $MYSQL_PID > /dev/null; then 85 | echo "MySQL did not shut down gracefully. Forcing termination..." 86 | kill -9 $MYSQL_PID 87 | fi 88 | 89 | echo "Starting initialization videx..." >> $VIDEX_LOG 90 | cd $VIDEX_HOME 91 | python3.9 -m pip install -e . --use-pep517 >> $VIDEX_LOG 92 | -------------------------------------------------------------------------------- /build/my.cnf: -------------------------------------------------------------------------------- 1 | [mysqld] 2 | port= 13308 3 | socket=mysql_80.sock 4 | innodb_file_per_table=1 5 | optimizer_trace='enabled=on' 6 | optimizer_trace_max_mem_size=4294967295 7 | optimizer_switch=subquery_to_derived=on 8 | 9 | sql_mode = NO_ENGINE_SUBSTITUTION 10 | skip_ssl = 1 11 | -------------------------------------------------------------------------------- /build/start_server.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | source "${SCRIPT_DIR}/config.sh" 4 | 5 | 6 | # Unset proxy environment variables to avoid interference 7 | unset http_proxy HTTP_PROXY 8 | unset https_proxy HTTPS_PROXY 9 | unset no_proxy NO_PROXY 10 | unset all_proxy ALL_PROXY 11 | unset ftp_proxy FTP_PROXY 12 | 13 | cd $MYSQL_BUILD_DIR 14 | echo "Starting MySQL..." >> $MYSQL_LOG 15 | build/runtime_output_directory/mysqld --defaults-file=$MYSQL_BUILD_DIR/etc/my.cnf --user=root --basedir=$MYSQL_BUILD_DIR --datadir=$MYSQL_BUILD_DIR/data >> $MYSQL_LOG 2>&1 & 16 | 17 | echo "Starting videx_server..." >> $VIDEX_LOG 18 | 19 | echo $VIDEX_HOME/src/sub_platforms/sql_opt/videx/scripts 20 | cd $VIDEX_HOME/src/sub_platforms/sql_opt/videx/scripts 21 | echo "Starting Videx server on port 5001..." >> $VIDEX_LOG 22 | python3.9 start_videx_server.py --port 5001 >> $VIDEX_LOG 2>&1 & 23 | 24 | tail -f $VIDEX_LOG -------------------------------------------------------------------------------- /data/tpch_sf1/explain_tpch_sf1_q21_innodb.json: -------------------------------------------------------------------------------- 1 | { 2 | "query_block": { 3 | "select_id": 1, 4 | "cost_info": { 5 | "query_cost": "1291730.72" 6 | }, 7 | "ordering_operation": { 8 | "using_filesort": true, 9 | "grouping_operation": { 10 | "using_temporary_table": true, 11 | "using_filesort": false, 12 | "nested_loop": [ 13 | { 14 | "table": { 15 | "table_name": "orders", 16 | "access_type": "ALL", 17 | "possible_keys": [ 18 | "PRIMARY" 19 | ], 20 | "rows_examined_per_scan": 1492893, 21 | "rows_produced_per_join": 149289, 22 | "filtered": "10.00", 23 | "cost_info": { 24 | "read_cost": "146692.37", 25 | "eval_cost": "14928.93", 26 | "prefix_cost": "161621.30", 27 | "data_read_per_join": "67M" 28 | }, 29 | "used_columns": [ 30 | "O_ORDERKEY", 31 | "O_ORDERSTATUS" 32 | ], 33 | "attached_condition": "(`tpch_sf1`.`orders`.`O_ORDERSTATUS` = 'F')" 34 | } 35 | }, 36 | { 37 | "table": { 38 | "table_name": "l1", 39 | "access_type": "ref", 40 | "possible_keys": [ 41 | "LINEITEM_UK1", 42 | "LINEITEM_FK1" 43 | ], 44 | "key": "LINEITEM_FK1", 45 | "used_key_parts": [ 46 | "L_ORDERKEY" 47 | ], 48 | "key_length": "4", 49 | "ref": [ 50 | "tpch_sf1.orders.O_ORDERKEY" 51 | ], 52 | "rows_examined_per_scan": 3, 53 | "rows_produced_per_join": 192671, 54 | "filtered": "33.33", 55 | "cost_info": { 56 | "read_cost": "578073.02", 57 | "eval_cost": "19267.17", 58 | "prefix_cost": "797501.62", 59 | "data_read_per_join": "70M" 60 | }, 61 | "used_columns": [ 62 | "L_ORDERKEY", 63 | "L_SUPPKEY", 64 | "L_COMMITDATE", 65 | "L_RECEIPTDATE", 66 | "L_ID" 67 | ], 68 | "attached_condition": "(`tpch_sf1`.`l1`.`L_RECEIPTDATE` > `tpch_sf1`.`l1`.`L_COMMITDATE`)" 69 | } 70 | }, 71 | { 72 | "table": { 73 | "table_name": "nation", 74 | "access_type": "ALL", 75 | "possible_keys": [ 76 | "PRIMARY" 77 | ], 78 | "rows_examined_per_scan": 25, 79 | "rows_produced_per_join": 481679, 80 | "filtered": "10.00", 81 | "using_join_buffer": "hash join", 82 | "cost_info": { 83 | "read_cost": "55.80", 84 | "eval_cost": "48167.93", 85 | "prefix_cost": "845725.36", 86 | "data_read_per_join": "330M" 87 | }, 88 | "used_columns": [ 89 | "N_NATIONKEY", 90 | "N_NAME" 91 | ], 92 | "attached_condition": "(`tpch_sf1`.`nation`.`N_NAME` = 'IRAQ')" 93 | } 94 | }, 95 | { 96 | "table": { 97 | "table_name": "supplier", 98 | "access_type": "eq_ref", 99 | "possible_keys": [ 100 | "PRIMARY", 101 | "SUPPLIER_FK1", 102 | "idx_S_NATIONKEY_S_SUPPKEY_S_NAME" 103 | ], 104 | "key": "PRIMARY", 105 | "used_key_parts": [ 106 | "S_SUPPKEY" 107 | ], 108 | "key_length": "4", 109 | "ref": [ 110 | "tpch_sf1.l1.L_SUPPKEY" 111 | ], 112 | "rows_examined_per_scan": 1, 113 | "rows_produced_per_join": 24083, 114 | "filtered": "5.00", 115 | "cost_info": { 116 | "read_cost": "192671.73", 117 | "eval_cost": "2408.40", 118 | "prefix_cost": "1086565.03", 119 | "data_read_per_join": "17M" 120 | }, 121 | "used_columns": [ 122 | "S_SUPPKEY", 123 | "S_NAME", 124 | "S_NATIONKEY" 125 | ], 126 | "attached_condition": "(`tpch_sf1`.`supplier`.`S_NATIONKEY` = `tpch_sf1`.`nation`.`N_NATIONKEY`)" 127 | } 128 | }, 129 | { 130 | "table": { 131 | "table_name": "l2", 132 | "access_type": "ref", 133 | "possible_keys": [ 134 | "LINEITEM_UK1", 135 | "LINEITEM_FK1" 136 | ], 137 | "key": "LINEITEM_FK1", 138 | "used_key_parts": [ 139 | "L_ORDERKEY" 140 | ], 141 | "key_length": "4", 142 | "ref": [ 143 | "tpch_sf1.orders.O_ORDERKEY" 144 | ], 145 | "rows_examined_per_scan": 3, 146 | "rows_produced_per_join": 24083, 147 | "filtered": "90.00", 148 | "first_match": "supplier", 149 | "cost_info": { 150 | "read_cost": "93257.14", 151 | "eval_cost": "2408.40", 152 | "prefix_cost": "1189147.88", 153 | "data_read_per_join": "8M" 154 | }, 155 | "used_columns": [ 156 | "L_ORDERKEY", 157 | "L_SUPPKEY", 158 | "L_ID" 159 | ], 160 | "attached_condition": "(`tpch_sf1`.`l2`.`L_SUPPKEY` <> `tpch_sf1`.`l1`.`L_SUPPKEY`)" 161 | } 162 | }, 163 | { 164 | "table": { 165 | "table_name": "l3", 166 | "access_type": "ref", 167 | "possible_keys": [ 168 | "LINEITEM_UK1", 169 | "LINEITEM_FK1" 170 | ], 171 | "key": "LINEITEM_FK1", 172 | "used_key_parts": [ 173 | "L_ORDERKEY" 174 | ], 175 | "key_length": "4", 176 | "ref": [ 177 | "tpch_sf1.orders.O_ORDERKEY" 178 | ], 179 | "rows_examined_per_scan": 3, 180 | "rows_produced_per_join": 24083, 181 | "filtered": "100.00", 182 | "not_exists": true, 183 | "cost_info": { 184 | "read_cost": "93257.14", 185 | "eval_cost": "2408.40", 186 | "prefix_cost": "1291730.73", 187 | "data_read_per_join": "8M" 188 | }, 189 | "used_columns": [ 190 | "L_ORDERKEY", 191 | "L_SUPPKEY", 192 | "L_COMMITDATE", 193 | "L_RECEIPTDATE", 194 | "L_ID" 195 | ], 196 | "attached_condition": "((is_not_null_compl(l3), ((`tpch_sf1`.`l3`.`L_SUPPKEY` <> `tpch_sf1`.`l1`.`L_SUPPKEY`) and (`tpch_sf1`.`l3`.`L_RECEIPTDATE` > `tpch_sf1`.`l3`.`L_COMMITDATE`)), true) and (is_not_null_compl(l3), (found_match(l3), false, true), true))" 197 | } 198 | } 199 | ] 200 | } 201 | } 202 | } 203 | } -------------------------------------------------------------------------------- /data/tpch_sf1/explain_tpch_sf1_q21_videx.json: -------------------------------------------------------------------------------- 1 | { 2 | "query_block": { 3 | "select_id": 1, 4 | "cost_info": { 5 | "query_cost": "1291730.72" 6 | }, 7 | "ordering_operation": { 8 | "using_filesort": true, 9 | "grouping_operation": { 10 | "using_temporary_table": true, 11 | "using_filesort": false, 12 | "nested_loop": [ 13 | { 14 | "table": { 15 | "table_name": "orders", 16 | "access_type": "ALL", 17 | "possible_keys": [ 18 | "PRIMARY" 19 | ], 20 | "rows_examined_per_scan": 1492893, 21 | "rows_produced_per_join": 149289, 22 | "filtered": "10.00", 23 | "cost_info": { 24 | "read_cost": "146692.37", 25 | "eval_cost": "14928.93", 26 | "prefix_cost": "161621.30", 27 | "data_read_per_join": "67M" 28 | }, 29 | "used_columns": [ 30 | "O_ORDERKEY", 31 | "O_ORDERSTATUS" 32 | ], 33 | "attached_condition": "(`videx_tpch_sf1`.`orders`.`O_ORDERSTATUS` = 'F')" 34 | } 35 | }, 36 | { 37 | "table": { 38 | "table_name": "l1", 39 | "access_type": "ref", 40 | "possible_keys": [ 41 | "LINEITEM_UK1", 42 | "LINEITEM_FK1" 43 | ], 44 | "key": "LINEITEM_FK1", 45 | "used_key_parts": [ 46 | "L_ORDERKEY" 47 | ], 48 | "key_length": "4", 49 | "ref": [ 50 | "videx_tpch_sf1.orders.O_ORDERKEY" 51 | ], 52 | "rows_examined_per_scan": 3, 53 | "rows_produced_per_join": 192671, 54 | "filtered": "33.33", 55 | "cost_info": { 56 | "read_cost": "578073.02", 57 | "eval_cost": "19267.17", 58 | "prefix_cost": "797501.62", 59 | "data_read_per_join": "70M" 60 | }, 61 | "used_columns": [ 62 | "L_ORDERKEY", 63 | "L_SUPPKEY", 64 | "L_COMMITDATE", 65 | "L_RECEIPTDATE", 66 | "L_ID" 67 | ], 68 | "attached_condition": "(`videx_tpch_sf1`.`l1`.`L_RECEIPTDATE` > `videx_tpch_sf1`.`l1`.`L_COMMITDATE`)" 69 | } 70 | }, 71 | { 72 | "table": { 73 | "table_name": "nation", 74 | "access_type": "ALL", 75 | "possible_keys": [ 76 | "PRIMARY" 77 | ], 78 | "rows_examined_per_scan": 25, 79 | "rows_produced_per_join": 481679, 80 | "filtered": "10.00", 81 | "using_join_buffer": "hash join", 82 | "cost_info": { 83 | "read_cost": "55.80", 84 | "eval_cost": "48167.93", 85 | "prefix_cost": "845725.36", 86 | "data_read_per_join": "330M" 87 | }, 88 | "used_columns": [ 89 | "N_NATIONKEY", 90 | "N_NAME" 91 | ], 92 | "attached_condition": "(`videx_tpch_sf1`.`nation`.`N_NAME` = 'IRAQ')" 93 | } 94 | }, 95 | { 96 | "table": { 97 | "table_name": "supplier", 98 | "access_type": "eq_ref", 99 | "possible_keys": [ 100 | "PRIMARY", 101 | "SUPPLIER_FK1", 102 | "idx_S_NATIONKEY_S_SUPPKEY_S_NAME" 103 | ], 104 | "key": "PRIMARY", 105 | "used_key_parts": [ 106 | "S_SUPPKEY" 107 | ], 108 | "key_length": "4", 109 | "ref": [ 110 | "videx_tpch_sf1.l1.L_SUPPKEY" 111 | ], 112 | "rows_examined_per_scan": 1, 113 | "rows_produced_per_join": 24083, 114 | "filtered": "5.00", 115 | "cost_info": { 116 | "read_cost": "192671.73", 117 | "eval_cost": "2408.40", 118 | "prefix_cost": "1086565.03", 119 | "data_read_per_join": "17M" 120 | }, 121 | "used_columns": [ 122 | "S_SUPPKEY", 123 | "S_NAME", 124 | "S_NATIONKEY" 125 | ], 126 | "attached_condition": "(`videx_tpch_sf1`.`supplier`.`S_NATIONKEY` = `videx_tpch_sf1`.`nation`.`N_NATIONKEY`)" 127 | } 128 | }, 129 | { 130 | "table": { 131 | "table_name": "l2", 132 | "access_type": "ref", 133 | "possible_keys": [ 134 | "LINEITEM_UK1", 135 | "LINEITEM_FK1" 136 | ], 137 | "key": "LINEITEM_FK1", 138 | "used_key_parts": [ 139 | "L_ORDERKEY" 140 | ], 141 | "key_length": "4", 142 | "ref": [ 143 | "videx_tpch_sf1.orders.O_ORDERKEY" 144 | ], 145 | "rows_examined_per_scan": 3, 146 | "rows_produced_per_join": 24083, 147 | "filtered": "90.00", 148 | "first_match": "supplier", 149 | "cost_info": { 150 | "read_cost": "93257.14", 151 | "eval_cost": "2408.40", 152 | "prefix_cost": "1189147.88", 153 | "data_read_per_join": "8M" 154 | }, 155 | "used_columns": [ 156 | "L_ORDERKEY", 157 | "L_SUPPKEY", 158 | "L_ID" 159 | ], 160 | "attached_condition": "(`videx_tpch_sf1`.`l2`.`L_SUPPKEY` <> `videx_tpch_sf1`.`l1`.`L_SUPPKEY`)" 161 | } 162 | }, 163 | { 164 | "table": { 165 | "table_name": "l3", 166 | "access_type": "ref", 167 | "possible_keys": [ 168 | "LINEITEM_UK1", 169 | "LINEITEM_FK1" 170 | ], 171 | "key": "LINEITEM_FK1", 172 | "used_key_parts": [ 173 | "L_ORDERKEY" 174 | ], 175 | "key_length": "4", 176 | "ref": [ 177 | "videx_tpch_sf1.orders.O_ORDERKEY" 178 | ], 179 | "rows_examined_per_scan": 3, 180 | "rows_produced_per_join": 24083, 181 | "filtered": "100.00", 182 | "not_exists": true, 183 | "cost_info": { 184 | "read_cost": "93257.14", 185 | "eval_cost": "2408.40", 186 | "prefix_cost": "1291730.73", 187 | "data_read_per_join": "8M" 188 | }, 189 | "used_columns": [ 190 | "L_ORDERKEY", 191 | "L_SUPPKEY", 192 | "L_COMMITDATE", 193 | "L_RECEIPTDATE", 194 | "L_ID" 195 | ], 196 | "attached_condition": "((is_not_null_compl(l3), ((`videx_tpch_sf1`.`l3`.`L_SUPPKEY` <> `videx_tpch_sf1`.`l1`.`L_SUPPKEY`) and (`videx_tpch_sf1`.`l3`.`L_RECEIPTDATE` > `videx_tpch_sf1`.`l3`.`L_COMMITDATE`)), true) and (is_not_null_compl(l3), (found_match(l3), false, true), true))" 197 | } 198 | } 199 | ] 200 | } 201 | } 202 | } 203 | } -------------------------------------------------------------------------------- /data/tpch_tiny/explain_tpch_tiny_q21_innodb.json: -------------------------------------------------------------------------------- 1 | { 2 | "query_block": { 3 | "select_id": 1, 4 | "cost_info": { 5 | "query_cost": "2503.34" 6 | }, 7 | "ordering_operation": { 8 | "using_filesort": true, 9 | "grouping_operation": { 10 | "using_temporary_table": true, 11 | "using_filesort": false, 12 | "nested_loop": [ 13 | { 14 | "table": { 15 | "table_name": "orders", 16 | "access_type": "ALL", 17 | "possible_keys": [ 18 | "PRIMARY" 19 | ], 20 | "rows_examined_per_scan": 15103, 21 | "rows_produced_per_join": 1510, 22 | "filtered": "10.00", 23 | "cost_info": { 24 | "read_cost": "1399.52", 25 | "eval_cost": "151.03", 26 | "prefix_cost": "1550.55", 27 | "data_read_per_join": "696K" 28 | }, 29 | "used_columns": [ 30 | "O_ORDERKEY", 31 | "O_ORDERSTATUS" 32 | ], 33 | "attached_condition": "(`tpch_tiny`.`orders`.`O_ORDERSTATUS` = 'F')" 34 | } 35 | }, 36 | { 37 | "table": { 38 | "table_name": "l1", 39 | "access_type": "ref", 40 | "possible_keys": [ 41 | "LINEITEM_UK1", 42 | "LINEITEM_FK1" 43 | ], 44 | "key": "LINEITEM_UK1", 45 | "used_key_parts": [ 46 | "L_ORDERKEY" 47 | ], 48 | "key_length": "4", 49 | "ref": [ 50 | "tpch_tiny.orders.O_ORDERKEY" 51 | ], 52 | "rows_examined_per_scan": 1, 53 | "rows_produced_per_join": 503, 54 | "filtered": "33.33", 55 | "cost_info": { 56 | "read_cost": "377.58", 57 | "eval_cost": "50.34", 58 | "prefix_cost": "2079.16", 59 | "data_read_per_join": "188K" 60 | }, 61 | "used_columns": [ 62 | "L_ORDERKEY", 63 | "L_SUPPKEY", 64 | "L_COMMITDATE", 65 | "L_RECEIPTDATE", 66 | "L_ID" 67 | ], 68 | "attached_condition": "(`tpch_tiny`.`l1`.`L_RECEIPTDATE` > `tpch_tiny`.`l1`.`L_COMMITDATE`)" 69 | } 70 | }, 71 | { 72 | "table": { 73 | "table_name": "nation", 74 | "access_type": "ALL", 75 | "possible_keys": [ 76 | "PRIMARY" 77 | ], 78 | "rows_examined_per_scan": 25, 79 | "rows_produced_per_join": 1258, 80 | "filtered": "10.00", 81 | "using_join_buffer": "hash join", 82 | "cost_info": { 83 | "read_cost": "2.61", 84 | "eval_cost": "125.85", 85 | "prefix_cost": "2207.61", 86 | "data_read_per_join": "884K" 87 | }, 88 | "used_columns": [ 89 | "N_NATIONKEY", 90 | "N_NAME" 91 | ], 92 | "attached_condition": "(`tpch_tiny`.`nation`.`N_NAME` = 'IRAQ')" 93 | } 94 | }, 95 | { 96 | "table": { 97 | "table_name": "supplier", 98 | "access_type": "eq_ref", 99 | "possible_keys": [ 100 | "PRIMARY", 101 | "SUPPLIER_FK1", 102 | "idx_S_NATIONKEY_S_SUPPKEY_S_NAME" 103 | ], 104 | "key": "PRIMARY", 105 | "used_key_parts": [ 106 | "S_SUPPKEY" 107 | ], 108 | "key_length": "4", 109 | "ref": [ 110 | "tpch_tiny.l1.L_SUPPKEY" 111 | ], 112 | "rows_examined_per_scan": 1, 113 | "rows_produced_per_join": 62, 114 | "filtered": "5.00", 115 | "cost_info": { 116 | "read_cost": "125.85", 117 | "eval_cost": "6.29", 118 | "prefix_cost": "2459.30", 119 | "data_read_per_join": "45K" 120 | }, 121 | "used_columns": [ 122 | "S_SUPPKEY", 123 | "S_NAME", 124 | "S_NATIONKEY" 125 | ], 126 | "attached_condition": "(`tpch_tiny`.`supplier`.`S_NATIONKEY` = `tpch_tiny`.`nation`.`N_NATIONKEY`)" 127 | } 128 | }, 129 | { 130 | "table": { 131 | "table_name": "l2", 132 | "access_type": "ref", 133 | "possible_keys": [ 134 | "LINEITEM_UK1", 135 | "LINEITEM_FK1" 136 | ], 137 | "key": "LINEITEM_UK1", 138 | "used_key_parts": [ 139 | "L_ORDERKEY" 140 | ], 141 | "key_length": "4", 142 | "ref": [ 143 | "tpch_tiny.orders.O_ORDERKEY" 144 | ], 145 | "rows_examined_per_scan": 1, 146 | "rows_produced_per_join": 62, 147 | "filtered": "90.00", 148 | "first_match": "supplier", 149 | "cost_info": { 150 | "read_cost": "15.73", 151 | "eval_cost": "6.29", 152 | "prefix_cost": "2481.32", 153 | "data_read_per_join": "23K" 154 | }, 155 | "used_columns": [ 156 | "L_ORDERKEY", 157 | "L_SUPPKEY", 158 | "L_ID" 159 | ], 160 | "attached_condition": "(`tpch_tiny`.`l2`.`L_SUPPKEY` <> `tpch_tiny`.`l1`.`L_SUPPKEY`)" 161 | } 162 | }, 163 | { 164 | "table": { 165 | "table_name": "l3", 166 | "access_type": "ref", 167 | "possible_keys": [ 168 | "LINEITEM_UK1", 169 | "LINEITEM_FK1" 170 | ], 171 | "key": "LINEITEM_UK1", 172 | "used_key_parts": [ 173 | "L_ORDERKEY" 174 | ], 175 | "key_length": "4", 176 | "ref": [ 177 | "tpch_tiny.orders.O_ORDERKEY" 178 | ], 179 | "rows_examined_per_scan": 1, 180 | "rows_produced_per_join": 62, 181 | "filtered": "100.00", 182 | "not_exists": true, 183 | "cost_info": { 184 | "read_cost": "15.73", 185 | "eval_cost": "6.29", 186 | "prefix_cost": "2503.34", 187 | "data_read_per_join": "23K" 188 | }, 189 | "used_columns": [ 190 | "L_ORDERKEY", 191 | "L_SUPPKEY", 192 | "L_COMMITDATE", 193 | "L_RECEIPTDATE", 194 | "L_ID" 195 | ], 196 | "attached_condition": "((is_not_null_compl(l3), ((`tpch_tiny`.`l3`.`L_SUPPKEY` <> `tpch_tiny`.`l1`.`L_SUPPKEY`) and (`tpch_tiny`.`l3`.`L_RECEIPTDATE` > `tpch_tiny`.`l3`.`L_COMMITDATE`)), true) and (is_not_null_compl(l3), (found_match(l3), false, true), true))" 197 | } 198 | } 199 | ] 200 | } 201 | } 202 | } 203 | } -------------------------------------------------------------------------------- /data/tpch_tiny/explain_tpch_tiny_q21_innodb_57.json: -------------------------------------------------------------------------------- 1 | { 2 | "query_block": { 3 | "select_id": 1, 4 | "cost_info": { 5 | "query_cost": "5859.31" 6 | }, 7 | "ordering_operation": { 8 | "using_filesort": true, 9 | "grouping_operation": { 10 | "using_temporary_table": true, 11 | "using_filesort": false, 12 | "nested_loop": [ 13 | { 14 | "table": { 15 | "table_name": "orders", 16 | "access_type": "ALL", 17 | "possible_keys": [ 18 | "PRIMARY" 19 | ], 20 | "rows_examined_per_scan": 15066, 21 | "rows_produced_per_join": 1506, 22 | "filtered": "10.00", 23 | "cost_info": { 24 | "read_cost": "2872.88", 25 | "eval_cost": "301.32", 26 | "prefix_cost": "3174.20", 27 | "data_read_per_join": "694K" 28 | }, 29 | "used_columns": [ 30 | "O_ORDERKEY", 31 | "O_ORDERSTATUS" 32 | ], 33 | "attached_condition": "(`tpch_tiny_57`.`orders`.`O_ORDERSTATUS` = 'F')" 34 | } 35 | }, 36 | { 37 | "table": { 38 | "table_name": "l1", 39 | "access_type": "ref", 40 | "possible_keys": [ 41 | "LINEITEM_UK1", 42 | "LINEITEM_FK1" 43 | ], 44 | "key": "LINEITEM_FK1", 45 | "used_key_parts": [ 46 | "L_ORDERKEY" 47 | ], 48 | "key_length": "4", 49 | "ref": [ 50 | "tpch_tiny_57.orders.O_ORDERKEY" 51 | ], 52 | "rows_examined_per_scan": 1, 53 | "rows_produced_per_join": 505, 54 | "filtered": "33.33", 55 | "cost_info": { 56 | "read_cost": "1516.78", 57 | "eval_cost": "101.11", 58 | "prefix_cost": "4994.34", 59 | "data_read_per_join": "189K" 60 | }, 61 | "used_columns": [ 62 | "L_ORDERKEY", 63 | "L_SUPPKEY", 64 | "L_COMMITDATE", 65 | "L_RECEIPTDATE", 66 | "L_ID" 67 | ], 68 | "attached_condition": "((`tpch_tiny_57`.`l1`.`L_RECEIPTDATE` > `tpch_tiny_57`.`l1`.`L_COMMITDATE`) and exists(/* select#2 */ select 1 from `tpch_tiny_57`.`lineitem` `l2` where ((`tpch_tiny_57`.`l2`.`L_ORDERKEY` = `tpch_tiny_57`.`l1`.`L_ORDERKEY`) and (`tpch_tiny_57`.`l2`.`L_SUPPKEY` <> `tpch_tiny_57`.`l1`.`L_SUPPKEY`))) and (not(exists(/* select#3 */ select 1 from `tpch_tiny_57`.`lineitem` `l3` where ((`tpch_tiny_57`.`l3`.`L_ORDERKEY` = `tpch_tiny_57`.`l1`.`L_ORDERKEY`) and (`tpch_tiny_57`.`l3`.`L_SUPPKEY` <> `tpch_tiny_57`.`l1`.`L_SUPPKEY`) and (`tpch_tiny_57`.`l3`.`L_RECEIPTDATE` > `tpch_tiny_57`.`l3`.`L_COMMITDATE`))))))", 69 | "attached_subqueries": [ 70 | { 71 | "dependent": true, 72 | "cacheable": false, 73 | "query_block": { 74 | "select_id": 3, 75 | "cost_info": { 76 | "query_cost": "1.21" 77 | }, 78 | "table": { 79 | "table_name": "l3", 80 | "access_type": "ref", 81 | "possible_keys": [ 82 | "LINEITEM_UK1", 83 | "LINEITEM_FK1" 84 | ], 85 | "key": "LINEITEM_FK1", 86 | "used_key_parts": [ 87 | "L_ORDERKEY" 88 | ], 89 | "key_length": "4", 90 | "ref": [ 91 | "tpch_tiny_57.l1.L_ORDERKEY" 92 | ], 93 | "rows_examined_per_scan": 1, 94 | "rows_produced_per_join": 0, 95 | "filtered": "30.00", 96 | "cost_info": { 97 | "read_cost": "1.01", 98 | "eval_cost": "0.06", 99 | "prefix_cost": "1.21", 100 | "data_read_per_join": "115" 101 | }, 102 | "used_columns": [ 103 | "L_ORDERKEY", 104 | "L_SUPPKEY", 105 | "L_COMMITDATE", 106 | "L_RECEIPTDATE" 107 | ], 108 | "attached_condition": "((`tpch_tiny_57`.`l3`.`L_SUPPKEY` <> `tpch_tiny_57`.`l1`.`L_SUPPKEY`) and (`tpch_tiny_57`.`l3`.`L_RECEIPTDATE` > `tpch_tiny_57`.`l3`.`L_COMMITDATE`))" 109 | } 110 | } 111 | }, 112 | { 113 | "dependent": true, 114 | "cacheable": false, 115 | "query_block": { 116 | "select_id": 2, 117 | "cost_info": { 118 | "query_cost": "1.21" 119 | }, 120 | "table": { 121 | "table_name": "l2", 122 | "access_type": "ref", 123 | "possible_keys": [ 124 | "LINEITEM_UK1", 125 | "LINEITEM_FK1" 126 | ], 127 | "key": "LINEITEM_FK1", 128 | "used_key_parts": [ 129 | "L_ORDERKEY" 130 | ], 131 | "key_length": "4", 132 | "ref": [ 133 | "tpch_tiny_57.l1.L_ORDERKEY" 134 | ], 135 | "rows_examined_per_scan": 1, 136 | "rows_produced_per_join": 0, 137 | "filtered": "90.00", 138 | "cost_info": { 139 | "read_cost": "1.01", 140 | "eval_cost": "0.18", 141 | "prefix_cost": "1.21", 142 | "data_read_per_join": "347" 143 | }, 144 | "used_columns": [ 145 | "L_ORDERKEY", 146 | "L_SUPPKEY" 147 | ], 148 | "attached_condition": "(`tpch_tiny_57`.`l2`.`L_SUPPKEY` <> `tpch_tiny_57`.`l1`.`L_SUPPKEY`)" 149 | } 150 | } 151 | } 152 | ] 153 | } 154 | }, 155 | { 156 | "table": { 157 | "table_name": "supplier", 158 | "access_type": "eq_ref", 159 | "possible_keys": [ 160 | "PRIMARY", 161 | "SUPPLIER_FK1", 162 | "idx_S_NATIONKEY_S_SUPPKEY_S_NAME" 163 | ], 164 | "key": "PRIMARY", 165 | "used_key_parts": [ 166 | "S_SUPPKEY" 167 | ], 168 | "key_length": "4", 169 | "ref": [ 170 | "tpch_tiny_57.l1.L_SUPPKEY" 171 | ], 172 | "rows_examined_per_scan": 1, 173 | "rows_produced_per_join": 505, 174 | "filtered": "100.00", 175 | "cost_info": { 176 | "read_cost": "505.54", 177 | "eval_cost": "101.11", 178 | "prefix_cost": "5600.99", 179 | "data_read_per_join": "367K" 180 | }, 181 | "used_columns": [ 182 | "S_SUPPKEY", 183 | "S_NAME", 184 | "S_NATIONKEY" 185 | ] 186 | } 187 | }, 188 | { 189 | "table": { 190 | "table_name": "nation", 191 | "access_type": "ALL", 192 | "possible_keys": [ 193 | "PRIMARY" 194 | ], 195 | "rows_examined_per_scan": 25, 196 | "rows_produced_per_join": 505, 197 | "filtered": "4.00", 198 | "using_join_buffer": "Block Nested Loop", 199 | "cost_info": { 200 | "read_cost": "5.54", 201 | "eval_cost": "101.11", 202 | "prefix_cost": "5859.31", 203 | "data_read_per_join": "355K" 204 | }, 205 | "used_columns": [ 206 | "N_NATIONKEY", 207 | "N_NAME" 208 | ], 209 | "attached_condition": "((`tpch_tiny_57`.`nation`.`N_NATIONKEY` = `tpch_tiny_57`.`supplier`.`S_NATIONKEY`) and (`tpch_tiny_57`.`nation`.`N_NAME` = 'IRAQ'))" 210 | } 211 | } 212 | ] 213 | } 214 | } 215 | } 216 | } -------------------------------------------------------------------------------- /data/tpch_tiny/explain_tpch_tiny_q21_videx.json: -------------------------------------------------------------------------------- 1 | { 2 | "query_block": { 3 | "select_id": 1, 4 | "cost_info": { 5 | "query_cost": "2503.34" 6 | }, 7 | "ordering_operation": { 8 | "using_filesort": true, 9 | "grouping_operation": { 10 | "using_temporary_table": true, 11 | "using_filesort": false, 12 | "nested_loop": [ 13 | { 14 | "table": { 15 | "table_name": "orders", 16 | "access_type": "ALL", 17 | "possible_keys": [ 18 | "PRIMARY" 19 | ], 20 | "rows_examined_per_scan": 15103, 21 | "rows_produced_per_join": 1510, 22 | "filtered": "10.00", 23 | "cost_info": { 24 | "read_cost": "1399.52", 25 | "eval_cost": "151.03", 26 | "prefix_cost": "1550.55", 27 | "data_read_per_join": "696K" 28 | }, 29 | "used_columns": [ 30 | "O_ORDERKEY", 31 | "O_ORDERSTATUS" 32 | ], 33 | "attached_condition": "(`videx_tpch_tiny`.`orders`.`O_ORDERSTATUS` = 'F')" 34 | } 35 | }, 36 | { 37 | "table": { 38 | "table_name": "l1", 39 | "access_type": "ref", 40 | "possible_keys": [ 41 | "LINEITEM_UK1", 42 | "LINEITEM_FK1" 43 | ], 44 | "key": "LINEITEM_UK1", 45 | "used_key_parts": [ 46 | "L_ORDERKEY" 47 | ], 48 | "key_length": "4", 49 | "ref": [ 50 | "videx_tpch_tiny.orders.O_ORDERKEY" 51 | ], 52 | "rows_examined_per_scan": 1, 53 | "rows_produced_per_join": 503, 54 | "filtered": "33.33", 55 | "cost_info": { 56 | "read_cost": "377.58", 57 | "eval_cost": "50.34", 58 | "prefix_cost": "2079.16", 59 | "data_read_per_join": "188K" 60 | }, 61 | "used_columns": [ 62 | "L_ORDERKEY", 63 | "L_SUPPKEY", 64 | "L_COMMITDATE", 65 | "L_RECEIPTDATE", 66 | "L_ID" 67 | ], 68 | "attached_condition": "(`videx_tpch_tiny`.`l1`.`L_RECEIPTDATE` > `videx_tpch_tiny`.`l1`.`L_COMMITDATE`)" 69 | } 70 | }, 71 | { 72 | "table": { 73 | "table_name": "nation", 74 | "access_type": "ALL", 75 | "possible_keys": [ 76 | "PRIMARY" 77 | ], 78 | "rows_examined_per_scan": 25, 79 | "rows_produced_per_join": 1258, 80 | "filtered": "10.00", 81 | "using_join_buffer": "hash join", 82 | "cost_info": { 83 | "read_cost": "2.61", 84 | "eval_cost": "125.85", 85 | "prefix_cost": "2207.61", 86 | "data_read_per_join": "884K" 87 | }, 88 | "used_columns": [ 89 | "N_NATIONKEY", 90 | "N_NAME" 91 | ], 92 | "attached_condition": "(`videx_tpch_tiny`.`nation`.`N_NAME` = 'IRAQ')" 93 | } 94 | }, 95 | { 96 | "table": { 97 | "table_name": "supplier", 98 | "access_type": "eq_ref", 99 | "possible_keys": [ 100 | "PRIMARY", 101 | "SUPPLIER_FK1", 102 | "idx_S_NATIONKEY_S_SUPPKEY_S_NAME" 103 | ], 104 | "key": "PRIMARY", 105 | "used_key_parts": [ 106 | "S_SUPPKEY" 107 | ], 108 | "key_length": "4", 109 | "ref": [ 110 | "videx_tpch_tiny.l1.L_SUPPKEY" 111 | ], 112 | "rows_examined_per_scan": 1, 113 | "rows_produced_per_join": 62, 114 | "filtered": "5.00", 115 | "cost_info": { 116 | "read_cost": "125.85", 117 | "eval_cost": "6.29", 118 | "prefix_cost": "2459.30", 119 | "data_read_per_join": "45K" 120 | }, 121 | "used_columns": [ 122 | "S_SUPPKEY", 123 | "S_NAME", 124 | "S_NATIONKEY" 125 | ], 126 | "attached_condition": "(`videx_tpch_tiny`.`supplier`.`S_NATIONKEY` = `videx_tpch_tiny`.`nation`.`N_NATIONKEY`)" 127 | } 128 | }, 129 | { 130 | "table": { 131 | "table_name": "l2", 132 | "access_type": "ref", 133 | "possible_keys": [ 134 | "LINEITEM_UK1", 135 | "LINEITEM_FK1" 136 | ], 137 | "key": "LINEITEM_UK1", 138 | "used_key_parts": [ 139 | "L_ORDERKEY" 140 | ], 141 | "key_length": "4", 142 | "ref": [ 143 | "videx_tpch_tiny.orders.O_ORDERKEY" 144 | ], 145 | "rows_examined_per_scan": 1, 146 | "rows_produced_per_join": 62, 147 | "filtered": "90.00", 148 | "first_match": "supplier", 149 | "cost_info": { 150 | "read_cost": "15.73", 151 | "eval_cost": "6.29", 152 | "prefix_cost": "2481.32", 153 | "data_read_per_join": "23K" 154 | }, 155 | "used_columns": [ 156 | "L_ORDERKEY", 157 | "L_SUPPKEY", 158 | "L_ID" 159 | ], 160 | "attached_condition": "(`videx_tpch_tiny`.`l2`.`L_SUPPKEY` <> `videx_tpch_tiny`.`l1`.`L_SUPPKEY`)" 161 | } 162 | }, 163 | { 164 | "table": { 165 | "table_name": "l3", 166 | "access_type": "ref", 167 | "possible_keys": [ 168 | "LINEITEM_UK1", 169 | "LINEITEM_FK1" 170 | ], 171 | "key": "LINEITEM_UK1", 172 | "used_key_parts": [ 173 | "L_ORDERKEY" 174 | ], 175 | "key_length": "4", 176 | "ref": [ 177 | "videx_tpch_tiny.orders.O_ORDERKEY" 178 | ], 179 | "rows_examined_per_scan": 1, 180 | "rows_produced_per_join": 62, 181 | "filtered": "100.00", 182 | "not_exists": true, 183 | "cost_info": { 184 | "read_cost": "15.73", 185 | "eval_cost": "6.29", 186 | "prefix_cost": "2503.34", 187 | "data_read_per_join": "23K" 188 | }, 189 | "used_columns": [ 190 | "L_ORDERKEY", 191 | "L_SUPPKEY", 192 | "L_COMMITDATE", 193 | "L_RECEIPTDATE", 194 | "L_ID" 195 | ], 196 | "attached_condition": "((is_not_null_compl(l3), ((`videx_tpch_tiny`.`l3`.`L_SUPPKEY` <> `videx_tpch_tiny`.`l1`.`L_SUPPKEY`) and (`videx_tpch_tiny`.`l3`.`L_RECEIPTDATE` > `videx_tpch_tiny`.`l3`.`L_COMMITDATE`)), true) and (is_not_null_compl(l3), (found_match(l3), false, true), true))" 197 | } 198 | } 199 | ] 200 | } 201 | } 202 | } 203 | } -------------------------------------------------------------------------------- /data/tpch_tiny/explain_tpch_tiny_q21_videx_57.json: -------------------------------------------------------------------------------- 1 | { 2 | "query_block": { 3 | "select_id": 1, 4 | "cost_info": { 5 | "query_cost": "5860.64" 6 | }, 7 | "ordering_operation": { 8 | "using_filesort": true, 9 | "grouping_operation": { 10 | "using_temporary_table": true, 11 | "using_filesort": false, 12 | "nested_loop": [ 13 | { 14 | "table": { 15 | "table_name": "orders", 16 | "access_type": "ALL", 17 | "possible_keys": [ 18 | "PRIMARY" 19 | ], 20 | "rows_examined_per_scan": 15066, 21 | "rows_produced_per_join": 1506, 22 | "filtered": "10.00", 23 | "cost_info": { 24 | "read_cost": "2872.88", 25 | "eval_cost": "301.32", 26 | "prefix_cost": "3174.20", 27 | "data_read_per_join": "694K" 28 | }, 29 | "used_columns": [ 30 | "O_ORDERKEY", 31 | "O_ORDERSTATUS" 32 | ], 33 | "attached_condition": "(`videx_tpch_tiny`.`orders`.`O_ORDERSTATUS` = 'F')" 34 | } 35 | }, 36 | { 37 | "table": { 38 | "table_name": "l1", 39 | "access_type": "ref", 40 | "possible_keys": [ 41 | "LINEITEM_UK1", 42 | "LINEITEM_FK1" 43 | ], 44 | "key": "LINEITEM_FK1", 45 | "used_key_parts": [ 46 | "L_ORDERKEY" 47 | ], 48 | "key_length": "4", 49 | "ref": [ 50 | "videx_tpch_tiny.orders.O_ORDERKEY" 51 | ], 52 | "rows_examined_per_scan": 1, 53 | "rows_produced_per_join": 505, 54 | "filtered": "33.33", 55 | "cost_info": { 56 | "read_cost": "1516.78", 57 | "eval_cost": "101.11", 58 | "prefix_cost": "4994.34", 59 | "data_read_per_join": "189K" 60 | }, 61 | "used_columns": [ 62 | "L_ORDERKEY", 63 | "L_SUPPKEY", 64 | "L_COMMITDATE", 65 | "L_RECEIPTDATE", 66 | "L_ID" 67 | ], 68 | "attached_condition": "((`videx_tpch_tiny`.`l1`.`L_RECEIPTDATE` > `videx_tpch_tiny`.`l1`.`L_COMMITDATE`) and exists(/* select#2 */ select 1 from `videx_tpch_tiny`.`lineitem` `l2` where ((`videx_tpch_tiny`.`l2`.`L_ORDERKEY` = `videx_tpch_tiny`.`l1`.`L_ORDERKEY`) and (`videx_tpch_tiny`.`l2`.`L_SUPPKEY` <> `videx_tpch_tiny`.`l1`.`L_SUPPKEY`))) and exists(/* select#3 */ select 1 from `videx_tpch_tiny`.`lineitem` `l3` where ((`videx_tpch_tiny`.`l3`.`L_ORDERKEY` = `videx_tpch_tiny`.`l1`.`L_ORDERKEY`) and (`videx_tpch_tiny`.`l3`.`L_SUPPKEY` <> `videx_tpch_tiny`.`l1`.`L_SUPPKEY`) and (`videx_tpch_tiny`.`l3`.`L_RECEIPTDATE` > `videx_tpch_tiny`.`l3`.`L_COMMITDATE`))) is false)", 69 | "attached_subqueries": [ 70 | { 71 | "dependent": true, 72 | "cacheable": false, 73 | "query_block": { 74 | "select_id": 3, 75 | "cost_info": { 76 | "query_cost": "1.21" 77 | }, 78 | "table": { 79 | "table_name": "l3", 80 | "access_type": "ref", 81 | "possible_keys": [ 82 | "LINEITEM_UK1", 83 | "LINEITEM_FK1" 84 | ], 85 | "key": "LINEITEM_FK1", 86 | "used_key_parts": [ 87 | "L_ORDERKEY" 88 | ], 89 | "key_length": "4", 90 | "ref": [ 91 | "videx_tpch_tiny.l1.L_ORDERKEY" 92 | ], 93 | "rows_examined_per_scan": 1, 94 | "rows_produced_per_join": 0, 95 | "filtered": "30.00", 96 | "cost_info": { 97 | "read_cost": "1.01", 98 | "eval_cost": "0.06", 99 | "prefix_cost": "1.21", 100 | "data_read_per_join": "115" 101 | }, 102 | "used_columns": [ 103 | "L_ORDERKEY", 104 | "L_SUPPKEY", 105 | "L_COMMITDATE", 106 | "L_RECEIPTDATE" 107 | ], 108 | "attached_condition": "((`videx_tpch_tiny`.`l3`.`L_SUPPKEY` <> `videx_tpch_tiny`.`l1`.`L_SUPPKEY`) and (`videx_tpch_tiny`.`l3`.`L_RECEIPTDATE` > `videx_tpch_tiny`.`l3`.`L_COMMITDATE`))" 109 | } 110 | } 111 | }, 112 | { 113 | "dependent": true, 114 | "cacheable": false, 115 | "query_block": { 116 | "select_id": 2, 117 | "cost_info": { 118 | "query_cost": "1.21" 119 | }, 120 | "table": { 121 | "table_name": "l2", 122 | "access_type": "ref", 123 | "possible_keys": [ 124 | "LINEITEM_UK1", 125 | "LINEITEM_FK1" 126 | ], 127 | "key": "LINEITEM_FK1", 128 | "used_key_parts": [ 129 | "L_ORDERKEY" 130 | ], 131 | "key_length": "4", 132 | "ref": [ 133 | "videx_tpch_tiny.l1.L_ORDERKEY" 134 | ], 135 | "rows_examined_per_scan": 1, 136 | "rows_produced_per_join": 0, 137 | "filtered": "90.00", 138 | "cost_info": { 139 | "read_cost": "1.01", 140 | "eval_cost": "0.18", 141 | "prefix_cost": "1.21", 142 | "data_read_per_join": "347" 143 | }, 144 | "used_columns": [ 145 | "L_ORDERKEY", 146 | "L_SUPPKEY" 147 | ], 148 | "attached_condition": "(`videx_tpch_tiny`.`l2`.`L_SUPPKEY` <> `videx_tpch_tiny`.`l1`.`L_SUPPKEY`)" 149 | } 150 | } 151 | } 152 | ] 153 | } 154 | }, 155 | { 156 | "table": { 157 | "table_name": "supplier", 158 | "access_type": "eq_ref", 159 | "possible_keys": [ 160 | "PRIMARY", 161 | "SUPPLIER_FK1", 162 | "idx_S_NATIONKEY_S_SUPPKEY_S_NAME" 163 | ], 164 | "key": "PRIMARY", 165 | "used_key_parts": [ 166 | "S_SUPPKEY" 167 | ], 168 | "key_length": "4", 169 | "ref": [ 170 | "videx_tpch_tiny.l1.L_SUPPKEY" 171 | ], 172 | "rows_examined_per_scan": 1, 173 | "rows_produced_per_join": 505, 174 | "filtered": "100.00", 175 | "cost_info": { 176 | "read_cost": "505.54", 177 | "eval_cost": "101.11", 178 | "prefix_cost": "5600.99", 179 | "data_read_per_join": "367K" 180 | }, 181 | "used_columns": [ 182 | "S_SUPPKEY", 183 | "S_NAME", 184 | "S_NATIONKEY" 185 | ] 186 | } 187 | }, 188 | { 189 | "table": { 190 | "table_name": "nation", 191 | "access_type": "ALL", 192 | "possible_keys": [ 193 | "PRIMARY" 194 | ], 195 | "rows_examined_per_scan": 25, 196 | "rows_produced_per_join": 505, 197 | "filtered": "4.00", 198 | "using_join_buffer": "hash join", 199 | "cost_info": { 200 | "read_cost": "6.88", 201 | "eval_cost": "101.11", 202 | "prefix_cost": "5860.64", 203 | "data_read_per_join": "355K" 204 | }, 205 | "used_columns": [ 206 | "N_NATIONKEY", 207 | "N_NAME" 208 | ], 209 | "attached_condition": "((`videx_tpch_tiny`.`nation`.`N_NAME` = 'IRAQ') and (`videx_tpch_tiny`.`nation`.`N_NATIONKEY` = `videx_tpch_tiny`.`supplier`.`S_NATIONKEY`))" 210 | } 211 | } 212 | ] 213 | } 214 | } 215 | } 216 | } -------------------------------------------------------------------------------- /data/tpch_tiny/tpch_tiny.sql.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/videx/6ccab9a4b8339d1cf96e20385414522dee349478/data/tpch_tiny/tpch_tiny.sql.tar.gz -------------------------------------------------------------------------------- /doc/explain_tpch_sf1_compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/videx/6ccab9a4b8339d1cf96e20385414522dee349478/doc/explain_tpch_sf1_compare.png -------------------------------------------------------------------------------- /doc/explain_tpch_tiny_compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/videx/6ccab9a4b8339d1cf96e20385414522dee349478/doc/explain_tpch_tiny_compare.png -------------------------------------------------------------------------------- /doc/explain_tpch_tiny_compare_alter_index.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/videx/6ccab9a4b8339d1cf96e20385414522dee349478/doc/explain_tpch_tiny_compare_alter_index.png -------------------------------------------------------------------------------- /doc/explain_tpch_tiny_mysql57_compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/videx/6ccab9a4b8339d1cf96e20385414522dee349478/doc/explain_tpch_tiny_mysql57_compare.png -------------------------------------------------------------------------------- /doc/explain_tpch_tiny_table_for_mysql57.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/videx/6ccab9a4b8339d1cf96e20385414522dee349478/doc/explain_tpch_tiny_table_for_mysql57.png -------------------------------------------------------------------------------- /doc/installation.md: -------------------------------------------------------------------------------- 1 | # Installation Guide 2 | 3 |

4 | English | 5 | 简体中文 6 |

7 | 8 | 9 | VIDEX supports the following installation methods: 10 | 11 | 1. Compile a complete MySQL Server (including VIDEX engine) 12 | 2. Compile VIDEX plugin and install it on an existing MySQL Server 13 | 3. Use Docker image installation method 14 | 15 | ## 1. Preparation 16 | 17 | ### 1.1 Download Code 18 | 19 | > Using Percona-MySQL 8.0.34-26 as an example. 20 | 21 | ```bash 22 | cd ~/code 23 | VIDEX_HOME=$(pwd)/videx_server 24 | MySQL8_HOME=$(pwd)/mysql_server 25 | 26 | # Clone repositories 27 | git clone git@github.com:bytedance/videx.git $VIDEX_HOME 28 | git clone --depth=1 --recursive -b release-8.0.34-26 https://github.com/percona/percona-server.git $MySQL8_HOME 29 | 30 | # Download Boost library to speed up installation 31 | mkdir $MySQL8_HOME/boost && cd $MySQL8_HOME/boost 32 | wget https://archives.boost.io/release/1.77.0/source/boost_1_77_0.tar.bz2 33 | tar -xvjf boost_1_77_0.tar.bz2 34 | ``` 35 | 36 | ### 1.2 Install Dependencies 37 | 38 | VIDEX depends on the following components: 39 | - MySQL build environment: gcc, cmake, bison, etc. (see `build/Dockerfile.build_env`) 40 | - Python 3.9 41 | 42 | > Tip: Refer to `build/Dockerfile.build_env` for setting up a complete build environment 43 | 44 | ## 2. Installation Method 1: Compile Complete MySQL Server 45 | 46 | This step is an alternative to [3. Installation Method 2: Compile VIDEX Plugin](#3-installation-method-2-compile-videx-plugin). 47 | This method will compile a complete MySQL Server with the VIDEX engine included. 48 | 49 | ```bash 50 | # Build 51 | cd $VIDEX_HOME/build && bash build.sh 52 | ``` 53 | 54 | > Note: You can customize the following in `build/config.sh`: 55 | > - VIDEX/MySQL repository locations 56 | > - MySQL/VIDEX service ports 57 | > - Other configuration options 58 | 59 | ## 3. Installation Method 2: Compile VIDEX Plugin 60 | 61 | This method only compiles the VIDEX plugin, which can be installed on an existing MySQL Server. 62 | 63 | This step is an alternative to [2. Installation Method 1: Compile Complete MySQL Server](#2-installation-method-1-compile-complete-mysql-server). 64 | Users can compile just the videx plugin and install it on a running mysql-server. 65 | 66 | > Important: The MySQL version in the build environment must exactly match the target MySQL Server version 67 | 68 | ### 3.1 Compile Plugin 69 | ```bash 70 | cd $VIDEX_HOME/build && bash build_videx.sh 71 | ``` 72 | The plugin file `ha_videx.so` will be generated in `mysql_build_output/build/plugin_output_directory/` 73 | 74 | ### 3.2 Install Plugin 75 | 76 | 1. Check MySQL plugin directory: 77 | 78 | ```sql 79 | SHOW VARIABLES LIKE "%plugin%" 80 | +-----------------+---------------------------------------+ 81 | | Variable_name | Value | 82 | +-----------------+---------------------------------------+ 83 | | plugin_dir | /path/to/percona-mysql-8/lib/plugin/ | 84 | +-----------------+---------------------------------------+ 85 | ``` 86 | 87 | 2. Copy plugin to plugin directory: 88 | ```bash 89 | cp ha_videx.so /path/to/percona-mysql-8/lib/plugin/ 90 | ``` 91 | 92 | 3. Install plugin: 93 | ```sql 94 | INSTALL PLUGIN VIDEX SONAME 'ha_videx.so'; 95 | ``` 96 | 97 | 4. Verify installation: 98 | ```sql 99 | SHOW ENGINES; -- VIDEX should appear in the engine list 100 | ``` 101 | 102 | ## 4. Start Service 103 | 104 | ### 4.1 Complete Environment Startup 105 | 106 | If you compiled a complete MySQL Server (Installation Method 1), you can use scripts to start everything with one command: 107 | 108 | 1. Initialize service: 109 | ```bash 110 | cd $VIDEX_HOME/build && bash init_server.sh 111 | ``` 112 | 113 | 2. Start service: 114 | ```bash 115 | cd $VIDEX_HOME/build && bash start_server.sh 116 | ``` 117 | 118 | ### 4.2 Start VIDEX Server Independently 119 | 120 | 1. Prepare Python environment: 121 | ```bash 122 | cd $VIDEX_HOME 123 | conda create -n videx_py39 python=3.9 124 | conda activate videx_py39 125 | python3.9 -m pip install -e . --use-pep517 126 | ``` 127 | 128 | 2. Start service: 129 | ```bash 130 | cd $VIDEX_HOME/src/sub_platforms/sql_opt/videx/scripts 131 | python start_videx_server.py --port 5001 132 | ``` 133 | 134 | ## 5. Installation Method 3: Using Docker Image 135 | 136 | ### 5.1 Preparation 137 | 138 | First, complete step 1: download the VIDEX and MySQL code. 139 | Ensure that the VIDEX and MySQL code are in the same directory, named `videx_server` and `mysql_server` respectively. 140 | You may use symbolic links. 141 | 142 | ### 5.2 Build and Run Docker Image 143 | 144 | 1. Build the environment image: 145 | ```bash 146 | cd videx_server 147 | docker build -t videx_build:latest -f build/Dockerfile.build_env . 148 | ``` 149 | 150 | 2. Build the VIDEX image: 151 | ```bash 152 | docker build -t videx:latest -f build/Dockerfile.videx .. 153 | ``` 154 | 155 | > Note: This process requires significant memory resources (at least 8GB Docker memory is recommended). 156 | 157 | 3. Run the Docker image: 158 | ```bash 159 | docker run -d --name videx-server \ 160 | -p 13308:13308 \ 161 | -p 5001:5001 \ 162 | videx:latest 163 | ``` -------------------------------------------------------------------------------- /doc/installation_zh.md: -------------------------------------------------------------------------------- 1 | # Installation Guide 2 | 3 |

4 | English | 5 | 简体中文 6 |

7 | 8 | 9 | VIDEX 支持以下安装方式: 10 | 11 | 1. 编译完整的 MySQL Server (包含 VIDEX 引擎) 12 | 2. 编译 VIDEX 插件并安装到现有 MySQL Server 13 | 3. 使用 Docker 镜像方式安装 14 | 15 | ## 1. 准备工作 16 | 17 | ### 1.1 下载代码 18 | 19 | > 以 Percona-MySQL 8.0.34-26 为例。 20 | 21 | ```bash 22 | cd ~/code 23 | VIDEX_HOME=$(pwd)/videx_server 24 | MySQL8_HOME=$(pwd)/mysql_server 25 | 26 | # 克隆代码仓库 27 | git clone git@github.com:bytedance/videx.git $VIDEX_HOME 28 | git clone --depth=1 --recursive -b release-8.0.34-26 https://github.com/percona/percona-server.git $MySQL8_HOME 29 | 30 | # 下载 Boost 库,加速安装 31 | mkdir $MySQL8_HOME/boost && cd $MySQL8_HOME/boost 32 | wget https://archives.boost.io/release/1.77.0/source/boost_1_77_0.tar.bz2 33 | tar -xvjf boost_1_77_0.tar.bz2 34 | ``` 35 | 36 | ### 1.2 安装依赖 37 | 38 | VIDEX 依赖以下组件: 39 | - MySQL 编译环境:gcc, cmake, bison 等(详见 `build/Dockerfile.build_env`) 40 | - Python 3.9 41 | 42 | > 提示:可参考 `build/Dockerfile.build_env` 准备完整的编译环境 43 | 44 | ## 2. 安装方式一:编译完整 MySQL Server 45 | 46 | 这一步是 [3. 安装方式二:编译 VIDEX 插件](#3-安装方式二编译-videx-插件) 的替代项。这种方式会编译一个包含 VIDEX 引擎的完整 MySQL Server。 47 | 48 | ```bash 49 | # 编译 50 | cd $VIDEX_HOME/build && bash build.sh 51 | ``` 52 | 53 | > 注:可通过修改 `build/config.sh` 自定义: 54 | > - VIDEX/MySQL 代码仓库位置 55 | > - MySQL/VIDEX 服务端口 56 | > - 其他配置项 57 | 58 | ## 3. 安装方式二:编译 VIDEX 插件 59 | 60 | 这种方式仅编译 VIDEX 插件,可安装到现有的 MySQL Server。 61 | 62 | 这一步是 [2. 安装方式一:编译完整 MySQL Server](#2-安装方式一编译完整-mysql-server) 的替代项。 63 | 用户可以仅编译一个 videx 插件,然后安装到正在运行的 mysql-server 上。 64 | 65 | 66 | > 重要:编译环境的 MySQL 版本必须与目标 MySQL Server 完全一致 67 | 68 | ### 3.1 编译插件 69 | ```bash 70 | cd $VIDEX_HOME/build && bash build_videx.sh 71 | ``` 72 | 插件文件 `ha_videx.so` 将生成在 `mysql_build_output/build/plugin_output_directory/` 73 | 74 | ### 3.2 安装插件 75 | 76 | 1. 查看 MySQL 插件目录: 77 | 78 | ```sql 79 | SHOW VARIABLES LIKE "%plugin%" 80 | +-----------------+---------------------------------------+ 81 | | Variable_name | Value | 82 | +-----------------+---------------------------------------+ 83 | | plugin_dir | /path/to/percona-mysql-8/lib/plugin/ | 84 | +-----------------+---------------------------------------+ 85 | ``` 86 | 87 | 2. 拷贝插件到插件目录: 88 | ```bash 89 | cp ha_videx.so /path/to/percona-mysql-8/lib/plugin/ 90 | ``` 91 | 92 | 3. 安装插件: 93 | ```sql 94 | INSTALL PLUGIN VIDEX SONAME 'ha_videx.so'; 95 | ``` 96 | 97 | 4. 验证安装: 98 | ```sql 99 | SHOW ENGINES; -- VIDEX 应出现在引擎列表中 100 | ``` 101 | 102 | ## 4. 启动服务 103 | 104 | ### 4.1 完整环境启动 105 | 106 | 如果您编译了完整的 MySQL Server(安装方式一),可以使用脚本一键启动: 107 | 108 | 1. 初始化服务: 109 | ```bash 110 | cd $VIDEX_HOME/build && bash init_server.sh 111 | ``` 112 | 113 | 2. 启动服务: 114 | ```bash 115 | cd $VIDEX_HOME/build && bash start_server.sh 116 | ``` 117 | 118 | ### 4.2 独立启动 VIDEX Server 119 | 120 | 1. 准备 Python 环境: 121 | ```bash 122 | cd $VIDEX_HOME 123 | conda create -n videx_py39 python=3.9 124 | conda activate videx_py39 125 | python3.9 -m pip install -e . --use-pep517 126 | ``` 127 | 128 | 2. 启动服务: 129 | ```bash 130 | cd $VIDEX_HOME/src/sub_platforms/sql_opt/videx/scripts 131 | python start_videx_server.py --port 5001 132 | ``` 133 | 134 | ## 5. 安装方式三:使用 Docker 镜像 135 | 136 | ### 5.1 准备工作 137 | 138 | 首先完成步骤 1:下载 VIDEX 和 MySQL 的代码。 139 | 请确保 VIDEX 和 MySQL 的代码位于同一目录下。并且目录名分别为 `videx_server` 和 `mysql_server`。你可以使用软链接。 140 | 141 | ### 5.2 构建和运行 Docker 镜像 142 | 143 | 1. 构建环境镜像: 144 | ```bash 145 | cd videx_server 146 | docker build -t videx_build:latest -f build/Dockerfile.build_env . 147 | ``` 148 | 149 | 2. 构建 VIDEX 镜像: 150 | ```bash 151 | docker build -t videx:latest -f build/Dockerfile.videx .. 152 | ``` 153 | 154 | > 注意:此过程需要较大内存资源(建议至少 8GB Docker 内存)。 155 | 156 | 3. 运行 Docker 镜像: 157 | ```bash 158 | docker run -d --name videx-server \ 159 | -p 13308:13308 \ 160 | -p 5001:5001 \ 161 | videx:latest 162 | ``` -------------------------------------------------------------------------------- /doc/videx-structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/videx/6ccab9a4b8339d1cf96e20385414522dee349478/doc/videx-structure.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "videx" 7 | version = "0.0.3" 8 | description = "videx, the Disaggregated, Extensible Virtual Index Engine for What-If Analysis" 9 | readme = "README.md" 10 | requires-python = ">=3.9" 11 | license = {text = "MIT and GPL-2.0"} 12 | authors = [ 13 | {name = "kangrong", email = "kr11thss@gmail.com"} 14 | ] 15 | keywords = ["index", "database", "what-if", "mysql", "virtual index"] 16 | classifiers = [ 17 | "Development Status :: 3 - Alpha", 18 | "Intended Audience :: Developers", 19 | "Topic :: Software Development :: Build Tools", 20 | "License :: OSI Approved :: MIT License", 21 | "License :: OSI Approved :: GNU General Public License v2 (GPLv2)", 22 | "Programming Language :: Python :: 3", 23 | "Programming Language :: Python :: 3.9", 24 | "Programming Language :: Python :: 3.10", 25 | "Programming Language :: Python :: 3 :: Only", 26 | ] 27 | dependencies = [ 28 | "DBUtils==3.0.3", 29 | "msgpack==1.0.5", 30 | "numpy==1.24.4", 31 | "pandas==2.0.3", 32 | "pyarrow==12.0.1", 33 | "PyMySQL==0.9.3", 34 | "requests==2.29.0", 35 | "SQLAlchemy==2.0.18", 36 | "dataclasses-json==0.5.13", 37 | "pyyaml==6.0.1", 38 | "tqdm==4.65.2", 39 | "matplotlib==3.7.2", 40 | "Flask==2.3.3", 41 | "flask-restx==1.3.0", 42 | "gunicorn~=21.2.0", 43 | "psutil==5.9.5", 44 | "retrying==1.3.4", 45 | "setuptools==75.6.0", 46 | "Werkzeug==3.1.3", 47 | "pyDes~=2.0.1", 48 | "cachetools~=5.3.3", 49 | "estndv==0.0.2", 50 | "sqlglot==25.4.1", 51 | "pydantic==2.10.4" 52 | ] 53 | 54 | [project.urls] 55 | Homepage = "https://github.com/bytedance/videx" 56 | Repository = "https://github.com/bytedance/videx" 57 | Documentation = "https://github.com/bytedance/videx/blob/main/README.md" 58 | "Bug Tracker" = "https://github.com/bytedance/videx/issues" 59 | 60 | [tool.setuptools.packages.find] 61 | where = ["src"] 62 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | DBUtils==3.0.3 2 | msgpack==1.0.5 3 | numpy==1.24.4 4 | pandas==2.0.3 5 | pyarrow==12.0.1 6 | PyMySQL==0.9.3 7 | requests==2.29.0 8 | SQLAlchemy==2.0.18 9 | dataclasses-json==0.5.13 10 | pyyaml==6.0.1 11 | tqdm==4.65.2 12 | matplotlib==3.7.2 13 | Flask==2.3.3 14 | flask-restx==1.3.0 15 | gunicorn~=21.2.0 16 | psutil==5.9.5 17 | retrying==1.3.4 18 | setuptools==75.6.0 19 | Werkzeug==3.1.3 20 | pyDes~=2.0.1 21 | cachetools~=5.3.3 22 | estndv==0.0.2 23 | sqlglot==25.4.1 24 | pydantic==2.10.4 -------------------------------------------------------------------------------- /src/mysql/videx/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2006, 2023, Oracle and/or its affiliates. 2 | # 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License, version 2.0, 5 | # as published by the Free Software Foundation. 6 | # 7 | # This program is also distributed with certain software (including 8 | # but not limited to OpenSSL) that is licensed under separate terms, 9 | # as designated in a particular file or component or in included license 10 | # documentation. The authors of MySQL hereby grant you an additional 11 | # permission to link the program and your derivative works with the 12 | # separately licensed software that they have included with MySQL. 13 | # 14 | # This program is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU General Public License, version 2.0, for more details. 18 | # 19 | # You should have received a copy of the GNU General Public License 20 | # along with this program; if not, write to the Free Software 21 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 22 | 23 | DISABLE_MISSING_PROFILE_WARNING() 24 | ADD_DEFINITIONS(-DMYSQL_SERVER) 25 | ADD_DEFINITIONS(-DMUTEX_FUTEX) 26 | 27 | INCLUDE_DIRECTORIES( 28 | ${CMAKE_SOURCE_DIR}/sql 29 | ${CMAKE_SOURCE_DIR}/sql/auth 30 | ${CMAKE_SOURCE_DIR}/extra/rapidjson/include 31 | ${CMAKE_SOURCE_DIR}/extra/curl/curl-8.1.2/include 32 | ) 33 | 34 | IF (WITH_VIDEX_STORAGE_ENGINE AND NOT WITHOUT_VIDEX_STORAGE_ENGINE) 35 | # Check if the dynamic library is explicitly specified 36 | IF (PLUGIN_VIDEX STREQUAL "DYNAMIC") 37 | # Build only the dynamic library 38 | MESSAGE(STATUS "Building VIDEX as dynamic plugin only") 39 | MYSQL_ADD_PLUGIN(videx 40 | videx_json_item.cc 41 | videx_log_utils.cc 42 | ha_videx.cc 43 | STORAGE_ENGINE 44 | MODULE_ONLY 45 | LINK_LIBRARIES ext::zlib ext::curl 46 | ) 47 | ELSE () 48 | # Default: Build both static and dynamic libraries 49 | MESSAGE(STATUS "Building VIDEX as static plugin (default)") 50 | MYSQL_ADD_PLUGIN(videx_static 51 | videx_json_item.cc 52 | videx_log_utils.cc 53 | ha_videx.cc 54 | STORAGE_ENGINE 55 | DEFAULT 56 | STATIC_ONLY 57 | LINK_LIBRARIES ext::zlib ext::curl 58 | ) 59 | target_compile_definitions(videx_static PRIVATE STATIC_VIDEX) 60 | 61 | MESSAGE(STATUS "Building VIDEX as dynamic plugin") 62 | # Generate the dynamic library at the same time 63 | MYSQL_ADD_PLUGIN(videx 64 | videx_json_item.cc 65 | videx_log_utils.cc 66 | ha_videx.cc 67 | STORAGE_ENGINE 68 | MODULE_ONLY 69 | LINK_LIBRARIES ext::zlib ext::curl 70 | ) 71 | ENDIF () 72 | ENDIF () -------------------------------------------------------------------------------- /src/mysql/videx/videx_json_item.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License, version 2.0, 5 | as published by the Free Software Foundation. 6 | 7 | This program is also distributed with certain software (including 8 | but not limited to OpenSSL) that is licensed under separate terms, 9 | as designated in a particular file or component or in included license 10 | documentation. The authors of MySQL hereby grant you an additional 11 | permission to link the program and your derivative works with the 12 | separately licensed software that they have included with MySQL. 13 | 14 | This program is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License, version 2.0, for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | along with this program; if not, write to the Free Software 21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ 22 | 23 | #include "videx_json_item.h" 24 | 25 | /** 26 | * A simple parsing function is written here instead, 27 | * since rapid_json always encounters strange segmentation faults across platforms, 28 | * 29 | * @param json 30 | * @param code 31 | * @param message 32 | * @param data_dict 33 | * @return 34 | */ 35 | int videx_parse_simple_json(const std::string &json, int &code, std::string &message, 36 | std::map &data_dict) { 37 | try { 38 | // find code and message 39 | std::size_t pos_code = json.find("\"code\":"); 40 | std::size_t pos_message = json.find("\"message\":"); 41 | std::size_t pos_data = json.find("\"data\":"); 42 | 43 | if (pos_code == std::string::npos || pos_message == std::string::npos || pos_data == std::string::npos) { 44 | throw std::invalid_argument("Missing essential components in JSON."); 45 | } 46 | 47 | // parse code 48 | std::size_t start = json.find_first_of("0123456789", pos_code); 49 | std::size_t end = json.find(',', start); 50 | code = std::stoi(json.substr(start, end - start)); 51 | 52 | // parse message 53 | start = json.find('\"', pos_message + 10) + 1; 54 | end = json.find('\"', start); 55 | message = json.substr(start, end - start); 56 | 57 | // parse data 58 | start = json.find('{', pos_data) + 1; 59 | end = json.find('}', start); 60 | std::string data_content = json.substr(start, end - start); 61 | std::istringstream data_stream(data_content); 62 | std::string line; 63 | 64 | while (std::getline(data_stream, line, ',')) { 65 | std::size_t colon_pos = line.find(':'); 66 | if (colon_pos == std::string::npos) { 67 | continue; // Skip malformed line 68 | } 69 | std::string key = line.substr(0, colon_pos); 70 | std::string value = line.substr(colon_pos + 1); 71 | 72 | // clean key 和 value 73 | auto trim_quotes_and_space = [](std::string &str) { 74 | // Trim whitespace and surrounding quotes 75 | size_t first = str.find_first_not_of(" \t\n\""); 76 | size_t last = str.find_last_not_of(" \t\n\""); 77 | if (first == std::string::npos || last == std::string::npos) { 78 | str.clear(); // All whitespace or empty 79 | } else { 80 | str = str.substr(first, last - first + 1); 81 | } 82 | }; 83 | 84 | trim_quotes_and_space(key); 85 | trim_quotes_and_space(value); 86 | 87 | data_dict[key] = value; 88 | } 89 | 90 | return 0; 91 | } catch (std::exception &e) { 92 | std::cerr << "Failed to parse JSON: " << e.what() << std::endl; 93 | message = e.what(); 94 | code = -1; 95 | return 1; 96 | } 97 | } 98 | 99 | 100 | /** 101 | * This function is used to escape double quotes in a string. 102 | * @param input 103 | * @param len 104 | * @return 105 | */ 106 | std::string videx_escape_double_quotes(const std::string &input, size_t len) { 107 | if (len == std::string::npos) len = input.length(); 108 | 109 | // if (len > input.length()) { 110 | // throw std::invalid_argument("Length exceeds input string size"); 111 | // } 112 | 113 | std::string output = input.substr(0, len); 114 | size_t pos = output.find('\\'); 115 | while (pos != std::string::npos) { 116 | output.replace(pos, 1, "\\\\"); 117 | pos = output.find('\\', pos + 2); 118 | } 119 | // replace " 120 | pos = output.find('\"'); 121 | while (pos != std::string::npos) { 122 | output.replace(pos, 1, "\\\""); 123 | pos = output.find('\"', pos + 2); 124 | } 125 | 126 | // replace \n with space 127 | pos = output.find('\n'); 128 | while (pos != std::string::npos) { 129 | output.replace(pos, 1, " "); 130 | pos = output.find('\n', pos + 1); 131 | } 132 | 133 | // replace \t with space 134 | pos = output.find('\t'); 135 | while (pos != std::string::npos) { 136 | output.replace(pos, 1, " "); 137 | pos = output.find('\t', pos + 1); 138 | } 139 | return output; 140 | } 141 | -------------------------------------------------------------------------------- /src/mysql/videx/videx_json_item.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License, version 2.0, 5 | as published by the Free Software Foundation. 6 | 7 | This program is also distributed with certain software (including 8 | but not limited to OpenSSL) that is licensed under separate terms, 9 | as designated in a particular file or component or in included license 10 | documentation. The authors of MySQL hereby grant you an additional 11 | permission to link the program and your derivative works with the 12 | separately licensed software that they have included with MySQL. 13 | 14 | This program is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License, version 2.0, for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | along with this program; if not, write to the Free Software 21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ 22 | 23 | #ifndef VIDEX_JSON_ITEM_H 24 | #define VIDEX_JSON_ITEM_H 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include "sql_string.h" 42 | #include 43 | 44 | typedef std::map VidexStringMap; 45 | 46 | inline bool videx_contains_key(const VidexStringMap &myMap, const std::string &key) { 47 | return myMap.find(key) != myMap.end(); 48 | } 49 | 50 | int videx_parse_simple_json(const std::string &json, int &code, std::string &message, 51 | std::map &data_dict); 52 | 53 | 54 | std::string videx_escape_double_quotes(const std::string &input, 55 | size_t len = std::string::npos); 56 | 57 | class VidexJsonItem { 58 | public: 59 | std::string item_type; 60 | std::map properties; 61 | std::list data; 62 | int depth; 63 | 64 | VidexJsonItem() 65 | : item_type("empty"), depth(0) {} 66 | 67 | /// specify item_type 68 | VidexJsonItem(const std::string &item_type, int depth) 69 | : item_type(item_type), depth(depth) {} 70 | 71 | /// create a new VidexJsonItem,插入data,然后返回这个VidexJsonItem的引用 72 | VidexJsonItem *create(const std::string &new_item_type) { 73 | data.push_back(VidexJsonItem(new_item_type, depth + 1)); 74 | return &data.back(); 75 | } 76 | 77 | VidexJsonItem *create(const std::string &item_type, const char *prompt) { 78 | VidexJsonItem newOne = VidexJsonItem(item_type, depth + 1); 79 | newOne.add_property("prompt", prompt); 80 | data.push_back(newOne); 81 | return &data.back(); 82 | } 83 | 84 | /// add to properties 85 | void add_property(const std::string &key, const std::string &value) { 86 | properties[key] = videx_escape_double_quotes(value); 87 | } 88 | 89 | void add_property(const std::string &key, const char *value) { 90 | if (value != NULL) { 91 | properties[key] = videx_escape_double_quotes(value); 92 | } else { 93 | properties[key] = "NULL"; 94 | } 95 | } 96 | 97 | void add_property(const std::string &key, const Simple_cstring &value) { 98 | if (value.is_set() && value.ptr() != NULL) { 99 | properties[key] = videx_escape_double_quotes(value.ptr(), value.length()); 100 | } else { 101 | properties[key] = "NULL"; 102 | } 103 | } 104 | 105 | void add_property(const std::string &key, const String &value) { 106 | if (!value.is_alloced() || !value.ptr() || !value.alloced_length() || 107 | (value.alloced_length() < (value.length() + 1))) { 108 | properties[key] = "NULL"; 109 | } else { 110 | properties[key] = videx_escape_double_quotes(value.ptr(), value.length()); 111 | } 112 | } 113 | 114 | void add_property(const std::string &key, const String *value) { 115 | if (value == NULL) { 116 | properties[key] = "NULL"; 117 | } else { 118 | add_property(key, *value); 119 | } 120 | } 121 | 122 | template 123 | // Except for string which might be empty and needs to be converted to NULL separately, 124 | // all other values can be handled using this function. 125 | void add_property_nonan(const std::string &key, V value) { 126 | std::stringstream ss; 127 | ss << value; 128 | properties[key] = ss.str(); 129 | } 130 | 131 | std::string to_json() const { 132 | std::string json = "{"; 133 | 134 | json += "\"item_type\":\"" + item_type + "\","; 135 | 136 | json += "\"properties\":{"; 137 | for (std::map::const_iterator it = 138 | properties.begin(); 139 | it != properties.end(); ++it) { 140 | json += "\"" + it->first + "\":\"" + it->second + "\","; 141 | } 142 | if (!properties.empty()) { 143 | json.erase(json.length() - 1); // remove trailing comma 144 | } 145 | json += "},"; 146 | 147 | json += "\"data\":["; 148 | for (std::list::const_iterator it = data.begin(); 149 | it != data.end(); ++it) { 150 | json += it->to_json() + ","; 151 | } 152 | if (!data.empty()) { 153 | json.erase(json.length() - 1); // remove trailing comma 154 | } 155 | json += "]}"; 156 | 157 | return json; 158 | } 159 | }; 160 | 161 | /** 162 | * construct a basic request, and other parameters can be conveniently added externally. 163 | */ 164 | inline VidexJsonItem construct_request(const std::string &db_name, 165 | const std::string &table_name, 166 | const std::string &function, 167 | const std::string &target_storage_engine = "INNODB") { 168 | VidexJsonItem req("videx_request", 0); 169 | req.add_property("dbname", db_name); 170 | req.add_property("table_name", table_name); 171 | req.add_property("function", function); 172 | req.add_property("target_storage_engine", target_storage_engine); 173 | return req; 174 | } 175 | 176 | #endif // VIDEX_JSON_ITEM_H 177 | -------------------------------------------------------------------------------- /src/mysql/videx/videx_log_utils.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License, version 2.0, 5 | as published by the Free Software Foundation. 6 | 7 | This program is also distributed with certain software (including 8 | but not limited to OpenSSL) that is licensed under separate terms, 9 | as designated in a particular file or component or in included license 10 | documentation. The authors of MySQL hereby grant you an additional 11 | permission to link the program and your derivative works with the 12 | separately licensed software that they have included with MySQL. 13 | 14 | This program is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License, version 2.0, for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | along with this program; if not, write to the Free Software 21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ 22 | 23 | #ifndef VIDEX_LOG_UTILS 24 | #define VIDEX_LOG_UTILS 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "sql/key.h" 31 | #include "sql_string.h" 32 | #include 33 | #include "join_optimizer/bit_utils.h" 34 | #include "sql/current_thd.h" 35 | #include "sql/field.h" 36 | #include "sql/item_func.h" 37 | #include "sql/sql_class.h" 38 | #include "videx_json_item.h" 39 | 40 | #define FUNC_FILE_LINE __PRETTY_FUNCTION__, __FILE__, __LINE__ 41 | 42 | 43 | class VidexLogUtils { 44 | private: 45 | int count = 0; 46 | std::string tag = "^_^"; 47 | bool enable_cout = true; 48 | bool enable_trace = false; 49 | std::unordered_map data; 50 | public: 51 | 52 | /** 53 | * whether to show cout 54 | * @param p_show_cout 55 | */ 56 | void set_cout(bool p_show_cout) { 57 | this->enable_cout = p_show_cout; 58 | } 59 | 60 | /** 61 | * whether to show trace 62 | * @param p_enable_trace 63 | */ 64 | void set_enable_trace(bool p_enable_trace) { 65 | this->enable_trace = p_enable_trace; 66 | } 67 | 68 | /** 69 | * set tag and to be displayed in markHaFuncPassby 70 | * @param new_tag 71 | */ 72 | void set_tag(const std::string &new_tag) { 73 | this->tag = new_tag; 74 | } 75 | 76 | void markHaFuncPassby(const std::string &func, const std::string &file, const int line, 77 | const std::string &others = "", bool silent = true); 78 | 79 | void markPassbyUnexpected(const std::string &func, const std::string &file, const int line); 80 | 81 | void NotMarkPassby(const std::string &, const std::string &, const int ); 82 | 83 | template 84 | void markPassby_DBTB_otherType(const std::string &func, const std::string &file, const int line, 85 | const std::string &db_name, const std::string &tb_name, 86 | V value) { 87 | // markHaFuncPassby(func, file, line, std::to_string(value)); 88 | std::ostringstream oss; 89 | oss << "db=" << db_name << ", tb=" << tb_name << ", value=" << value; 90 | markHaFuncPassby(func, file, line, oss.str(), false); 91 | } 92 | 93 | template 94 | void markPassby_otherType(const std::string &func, const std::string &file, const int line, V value) { 95 | // markHaFuncPassby(func, file, line, std::to_string(value)); 96 | std::ostringstream oss; 97 | oss << value; 98 | markHaFuncPassby(func, file, line, oss.str()); 99 | } 100 | 101 | void markRecordInRange([[maybe_unused]]const std::string &func, [[maybe_unused]]const std::string &file, 102 | [[maybe_unused]]const int line, key_range *min_key, key_range *max_key, 103 | KEY *key, VidexJsonItem *req_json); 104 | }; 105 | 106 | 107 | extern VidexLogUtils videx_log_ins; 108 | 109 | 110 | #endif // VIDEX_LOG_UTILS -------------------------------------------------------------------------------- /src/sub_platforms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/videx/6ccab9a4b8339d1cf96e20385414522dee349478/src/sub_platforms/__init__.py -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/videx/6ccab9a4b8339d1cf96e20385414522dee349478/src/sub_platforms/sql_opt/__init__.py -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/column_statastics/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | """ -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/column_statastics/statistics_info.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | """ 5 | from typing import Dict, List, Any, Optional 6 | from pydantic import BaseModel, Field, PrivateAttr, PlainSerializer, BeforeValidator 7 | from typing_extensions import Annotated 8 | 9 | from sub_platforms.sql_opt.common.pydantic_utils import PydanticDataClassJsonMixin 10 | from sub_platforms.sql_opt.videx.videx_histogram import HistogramStats 11 | 12 | 13 | def large_number_decoder(y): 14 | if isinstance(y, list): 15 | for item in y: 16 | if isinstance(item, dict) and "Value" in item: 17 | item['Value'] = str(item['Value']) 18 | return y 19 | else: 20 | res = [{"ColumnName": "id", "Value": str(y)}] 21 | return res 22 | 23 | 24 | class TableStatisticsInfo(BaseModel, PydanticDataClassJsonMixin): 25 | db_name: str 26 | table_name: str 27 | # {col_name: col ndv} 28 | ndv_dict: Optional[Dict[str, float]] = Field(default_factory=dict) 29 | # {col_name: histogram} 30 | histogram_dict: Optional[Dict[str, HistogramStats]] = Field(default_factory=dict) 31 | # {col_name: not null ratio} 32 | not_null_ratio_dict: Optional[Dict[str, float]] = Field(default_factory=dict) 33 | 34 | # table rows 35 | num_of_rows: Optional[int] = Field(default=0) 36 | max_pk: Annotated[Optional[List[Dict[str, str]]], BeforeValidator(large_number_decoder)] = Field(default=None) 37 | min_pk: Annotated[Optional[List[Dict[str, str]]], BeforeValidator(large_number_decoder)] = Field(default=None) 38 | 39 | # sample related info 40 | is_sample_success: Optional[bool] = Field(default=True) 41 | is_sample_supported: Optional[bool] = Field(default=True) 42 | unsupported_reason: Optional[str] = Field(default=None) 43 | sample_rows: Optional[int] = Field(default=0) 44 | local_path_prefix: Optional[str] = Field(default=None) 45 | tos_path_prefix: Optional[str] = Field(default=None) 46 | sample_file_list: Optional[List[str]] = Field(default_factory=list) 47 | block_size_list: Optional[List[int]] = Field(default_factory=list) 48 | shard_no: Optional[int] = Field(default=0) 49 | # {col_name: sample error} 50 | sample_error_dict: Optional[Dict[str, str]] = Field(default_factory=dict) 51 | # {col_name: histogram error} 52 | histogram_error_dict: Optional[Dict[str, float]] = Field(default_factory=dict) 53 | msg: Optional[str] = None 54 | extra_info: Optional[Dict[str, Any]] = Field(default_factory=dict) 55 | 56 | _version: Optional[str] = PrivateAttr(default='1.0.0') 57 | 58 | 59 | def trans_dict_to_statistics(numerical_info: Dict[str, Any]) -> TableStatisticsInfo: 60 | """a temp convert function,from numerical info to TableStatisticsInfo""" 61 | table_statistics = TableStatisticsInfo() 62 | table_statistics.ndv_dict = numerical_info['ndv_dict'] 63 | table_statistics.histogram_dict = numerical_info['histogram'] 64 | table_statistics.not_null_ratio_dict = numerical_info['not_null_ratio_dict'] 65 | table_statistics.num_of_rows = numerical_info['num_of_rows'] 66 | table_statistics.is_sample_success = numerical_info['is_sample_succ'] 67 | table_statistics.shard_no = numerical_info['shard_no'] 68 | return table_statistics 69 | 70 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/common/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | """ -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/common/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | """ 5 | class RequestFormatException(Exception): 6 | """ 7 | 输入的优化任务信息不全,格式错误等 8 | """ 9 | 10 | def __init__(self, message): 11 | self.message = message 12 | super().__init__(self.message) 13 | 14 | def __str__(self): 15 | return f"Optimize task format Exception: {self.message}" 16 | 17 | 18 | class TableNotFoundException(Exception): 19 | def __init__(self, message, table_name): 20 | self.message = message 21 | self.table_name = table_name 22 | super().__init__(self.message) 23 | 24 | def __str__(self): 25 | return f"Table not found Exception: {self.message}, table : {self.table_name}" 26 | 27 | 28 | class UnsupportedException(Exception): 29 | def __init__(self, message): 30 | super(UnsupportedException, self).__init__(message) 31 | 32 | 33 | class UnsupportedQueryException(UnsupportedException): 34 | def __init__(self, message, fingerprint_md5, sample_sql): 35 | self.message = message 36 | self.fingerprint_md5 = fingerprint_md5 37 | self.sample_sql = sample_sql 38 | super().__init__(self.message) 39 | 40 | def __str__(self): 41 | return f"Unsupported Query Exception: {self.message}, finger: {self.fingerprint_md5}, sql text: {self.sample_sql}" 42 | 43 | 44 | class UnsupportedSamplingException(UnsupportedException): 45 | def __init__(self, message): 46 | self.message = message 47 | super().__init__(self.message) 48 | 49 | def __str__(self): 50 | return f"Unsupported sampling Exception: {self.message}" 51 | 52 | 53 | class UnsupportedParseEngine(UnsupportedException): 54 | def __init__(self, message): 55 | self.message = message 56 | super().__init__(self.message) 57 | 58 | def __str__(self): 59 | return f"Unsupported Parse Engine Exception: {self.message}" 60 | 61 | 62 | class TraceLoadException(ValueError): 63 | def __init__(self, message): 64 | self.message = message 65 | super().__init__(self.message) 66 | 67 | def __str__(self): 68 | return f"Failed load trace from OPTIMIZE_TRACE" 69 | 70 | 71 | class LexDictLoadException(ValueError): 72 | def __init__(self, message): 73 | self.message = message 74 | super().__init__(self.message) 75 | 76 | def __str__(self): 77 | return f"Failed load Lex Dict from TRACE" 78 | 79 | 80 | class CollationQueryException(ValueError): 81 | def __init__(self, message): 82 | self.message = message 83 | super().__init__(self.message) 84 | 85 | def __str__(self): 86 | return f"Failed Query ASCII Collation Weight: {self.message}" 87 | 88 | 89 | class CollationGenerateStrException(ValueError): 90 | def __init__(self, message): 91 | self.message = message 92 | super().__init__(self.message) 93 | 94 | def __str__(self): 95 | return f"Failed Generated String: {self.message}" 96 | 97 | 98 | class GenerateNumException(ValueError): 99 | def __init__(self, message): 100 | self.message = message 101 | super().__init__(self.message) 102 | 103 | def __str__(self): 104 | return f"Failed Generated Numeric: {self.message}" 105 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/common/pydantic_utils.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from pydantic import BaseModel 3 | from typing import Union, TypeVar, Type, Any, Dict 4 | 5 | A = TypeVar('A', bound="PydanticDataClassJsonMixin") 6 | 7 | 8 | class PydanticDataClassJsonMixin(abc.ABC): 9 | 10 | @classmethod 11 | def _validate_cls(cls): 12 | if not issubclass(cls, BaseModel): 13 | raise TypeError( 14 | f"Class {cls.__name__} must be a subclass of BaseModel" 15 | ) 16 | 17 | def to_json(self: BaseModel) -> str: 18 | self._validate_cls() 19 | return self.model_dump_json() 20 | 21 | def to_dict(self: BaseModel) -> Dict[str, Any]: 22 | self._validate_cls() 23 | return self.model_dump() 24 | 25 | @classmethod 26 | def from_json(cls: Type[A], json_data: Union[str, bytes, bytearray]) -> A: 27 | cls._validate_cls() 28 | return cls.model_validate_json(json_data) 29 | 30 | @classmethod 31 | def from_dict(cls: Type[A], dict_data: Any) -> A: 32 | cls._validate_cls() 33 | return cls.model_validate(dict_data) 34 | 35 | 36 | def pydantic_dataclass_json(_cls=None): 37 | """ 38 | Based on the code in the `dataclasses` module to handle optional-parens 39 | decorators. See example below: 40 | 41 | @dataclass_json 42 | class Example: 43 | ... 44 | """ 45 | 46 | def wrap(cls): 47 | cls.to_json = PydanticDataClassJsonMixin.to_json 48 | cls._validate_cls = classmethod(PydanticDataClassJsonMixin._validate_cls.__func__) # type: ignore 49 | cls.from_json = classmethod(PydanticDataClassJsonMixin.from_json.__func__) # type: ignore 50 | cls.to_dict = PydanticDataClassJsonMixin.to_dict 51 | cls.from_dict = classmethod(PydanticDataClassJsonMixin.from_dict.__func__) # type: ignore 52 | 53 | PydanticDataClassJsonMixin.register(cls) 54 | return cls 55 | 56 | if _cls is None: 57 | return wrap 58 | return wrap(_cls) 59 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/common/sample_file_info.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | """ 5 | 6 | from typing import Dict, List, Optional 7 | from pydantic import BaseModel 8 | 9 | from sub_platforms.sql_opt.common.pydantic_utils import PydanticDataClassJsonMixin 10 | 11 | # return N_NO_LOAD_ROWS if load_rows is not availabl 12 | UNKNOWN_LOAD_ROWS: int = -1 13 | 14 | 15 | class SampleFileInfo(BaseModel, PydanticDataClassJsonMixin): 16 | local_path_prefix: str 17 | tos_path_prefix: str 18 | sample_file_dict: Dict[str, Dict[str, List[str]]] 19 | # to remain the relative table rows between join tables, we only import table data with row of table_load_rows 20 | # from the sampling parquet data 21 | table_load_rows: Optional[Dict[str, Dict[str, int]]] = None 22 | 23 | def get_table_load_row(self, db: str, table: str): 24 | if self.table_load_rows is None \ 25 | or self.table_load_rows.get(db, None) is None \ 26 | or self.table_load_rows.get(db).get(table) is None: 27 | return -1 28 | else: 29 | return self.table_load_rows.get(db).get(table) 30 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/common/sample_info.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | """ 5 | 6 | from dataclasses import dataclass, field 7 | from typing import List, Set, Dict, Optional 8 | from collections import defaultdict 9 | 10 | from sub_platforms.sql_opt.meta import Column, TableId 11 | 12 | 13 | 14 | 15 | @dataclass 16 | class SampleColumnInfo: 17 | table_id: TableId 18 | column_name: str 19 | data_type: Optional[str] = None 20 | # 大字段,进行前缀采样的长度,0 表示不进行前缀采样 21 | sample_length: Optional[int] = 0 22 | 23 | @property 24 | def db_name(self): 25 | return self.table_id.db_name 26 | 27 | @property 28 | def table_name(self): 29 | return self.table_id.table_name 30 | 31 | @classmethod 32 | def from_column(cls, column: Column, sample_length: int = 0): 33 | table_id = TableId(db_name=column.db, table_name=column.table) 34 | column_info = SampleColumnInfo(table_id, column.name) 35 | column_info.data_type = column.data_type 36 | column_info.sample_length = sample_length 37 | return column_info 38 | 39 | @classmethod 40 | def new_ins(cls, db_name, table_name, column_name: str, sample_length: int = 0, data_type: str = None): 41 | table_id = TableId(db_name=db_name, table_name=table_name) 42 | column_info = SampleColumnInfo(table_id, column_name) 43 | column_info.data_type = data_type 44 | column_info.sample_length = sample_length 45 | return column_info 46 | 47 | def __hash__(self): 48 | return hash((self.table_id, self.column_name)) 49 | 50 | def __eq__(self, other): 51 | if not isinstance(other, SampleColumnInfo): 52 | return False 53 | return self.table_id == other.table_id and self.column_name == other.column_name 54 | 55 | 56 | def sample_info_set_to_name_list(col_set: Set[SampleColumnInfo]): 57 | return [col.column_name for col in col_set] 58 | 59 | 60 | # @dataclass 61 | class SampleInfo: 62 | involved_query: Set[str] = Set 63 | # {db: {table: [Column]}} 64 | sample_columns: Dict[str, Dict[str, Set[SampleColumnInfo]]] = field(default_factory=dict) 65 | # sample_info: Dict[str, Dict[str, List[str]]] = None 66 | sample_fingerprint = None 67 | 68 | # @property 69 | # def sample_dml(self, block_num=10): 70 | # """ 71 | # for the db that supports selecting all tuples 72 | # """ 73 | # dml_list = [] 74 | # for db in self.sample_info.keys(): 75 | # for table in self.sample_info[db].keys(): 76 | # # projections = ','.join([f'{db}.{item}' for item in self.sample_info[db][table]]) 77 | # projections = ','.join([f'{item}' for item in self.sample_info[db][table]]) 78 | # dml = f'SELECT {projections} FROM {table};' 79 | # dml_list.append(dml) 80 | # return dml_list 81 | # 82 | # def get_column_statistics(self): 83 | # """ 84 | # Get the total number of tuples and NDV of the table, for the db that supports selecting all tuples 85 | # """ 86 | # dml_list = [] 87 | # for db in self.sample_info.keys(): 88 | # for table in self.sample_info[db].keys(): 89 | # projection_count = ','.join([f'COUNT({item})' for item in self.sample_info[db][table]]) 90 | # projection_distinct = ','.join([f'COUNT(DISTINCT {item})' for item in self.sample_info[db][table]]) 91 | # dml = f'SELECT {projection_count}, {projection_distinct} FROM {table};' 92 | # dml_list.append(dml) 93 | # return dml_list 94 | # 95 | # def sample_column_names(self): 96 | # cols = [] 97 | # for db in self.sample_info.keys(): 98 | # for table in self.sample_info[db].keys(): 99 | # cols = self.sample_info[db][table] 100 | # return cols 101 | 102 | 103 | @dataclass 104 | class SampleResult: 105 | sample_fingerprint: str = None 106 | result: tuple = None 107 | DML: str = None 108 | numerical_info = {} 109 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/databases/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 4 | SPDX-License-Identifier: MIT 5 | """ 6 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/databases/mysql/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | """ -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/databases/mysql/common_operation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | """ 5 | 6 | import logging 7 | 8 | import numpy as np 9 | import pandas as pd 10 | from typing import List, Dict 11 | 12 | import sqlglot.expressions 13 | 14 | from sub_platforms.sql_opt.meta import Column, Index, Table, mysql_to_pandas_type 15 | from sub_platforms.sql_opt.common.exceptions import UnsupportedException 16 | from sqlglot.dialects.mysql import MySQL 17 | 18 | 19 | def parse_from_expression(expression): 20 | ast = sqlglot.parse_one(expression, read=MySQL) 21 | for node in ast.dfs(): 22 | if isinstance(node, sqlglot.expressions.Column): 23 | return node.name 24 | 25 | 26 | def mapping_index_columns(table: Table): 27 | column_dict = {} 28 | for column in table.columns: 29 | column_dict[column.name] = column 30 | 31 | for index in table.indexes: 32 | for index_column in index.columns: 33 | column_name = index_column.name 34 | if column_name is None or column_name == "": 35 | if index_column.expression is not None: 36 | # use replace to support "cast(json_extract(`owners`,_utf8mb4\\'$\\') as char(100) array)" 37 | column_name = parse_from_expression(index_column.expression.replace("\\'", "\'")) 38 | index_column.name = column_name 39 | else: 40 | raise UnsupportedException(f"table [{table.name}] index[{index.name}] column name is empty") 41 | index_column.column_ref = column_dict[column_name] 42 | 43 | 44 | def patch_index_invisible(table: Table): 45 | """根据 ddl 信息更新 index 是否 invisible""" 46 | import re 47 | ddl = table.ddl 48 | reg_pattern = r'ELBISIVNI 00008.*?\(\s+`(.*?)`\s+YEK' 49 | re_match = re.findall(reg_pattern, ddl[::-1]) 50 | invisible_indexes = list(map(lambda x: x[::-1], re_match)) 51 | if len(invisible_indexes) == 0: 52 | return 53 | 54 | for index in table.indexes: 55 | if index.name in invisible_indexes: 56 | logging.info(f"{index.db}, {index.table} {index.name} is invisible") 57 | index.is_visible = False 58 | 59 | 60 | def replace_illegal_value(data, expected_pd_type): 61 | if expected_pd_type == 'date': 62 | data = data.replace('0000-00-00', '1970-01-01') 63 | elif expected_pd_type == 'datetime': 64 | data = data.replace('0000-00-00 00:00:00', '1970-01-01 00:00:00') 65 | return data 66 | 67 | 68 | def correct_df_type_by_mysql_type(df_sample_raw: pd.DataFrame, table_meta: Table) -> pd.DataFrame: 69 | col_meta_dict = {column.name.lower(): column for column in table_meta.columns} 70 | 71 | for col in df_sample_raw.columns: 72 | # 从 table_meta.columns 中寻找 column meta,如果不存在,跳过并 log warning 73 | col_meta = col_meta_dict.get(col.lower()) 74 | exist_pd_type = str(df_sample_raw[col].dtype) 75 | if col_meta is None: 76 | logging.warning(f"sample data has column {col} but does not exists in table meta") 77 | continue 78 | expected_pd_type = mysql_to_pandas_type(col_meta.column_type) 79 | if exist_pd_type == expected_pd_type: 80 | continue 81 | 82 | try: 83 | df_sample_raw[col] = replace_illegal_value(df_sample_raw[col], col_meta.data_type.lower()) 84 | if col_meta.data_type.lower() in ['datetime', 'timestamp']: 85 | df_sample_raw[col] = pd.to_datetime(df_sample_raw[col], format='mixed') 86 | else: 87 | # 尝试转换列的数据类型 88 | df_sample_raw[col] = df_sample_raw[col].astype(expected_pd_type) 89 | # logging.warning(f"for col: {col}, mysql_type={col_meta.column_type}, " 90 | # f"expected pd_type={expected_pd_type}, but found {exist_pd_type}, convert it.") 91 | except Exception as e: 92 | logging.warning(f"meet error when astype. to fix it: {e}") 93 | # 检查当前列的数据类型是否为 float64 或 float32 94 | if ('int' in expected_pd_type and 95 | (df_sample_raw[col].hasnans \ 96 | or np.inf in df_sample_raw[col].values \ 97 | or -np.inf in df_sample_raw[col].values)): 98 | logging.warning(f"col='{col}' contains NaN/inf but want to convert to {expected_pd_type}. " 99 | f"convert to Int (nullable int) instead") 100 | try: 101 | df_sample_raw[col] = df_sample_raw[col].astype( 102 | expected_pd_type.replace('uint', 'UInt').replace('int', 'Int')) 103 | except Exception as e2: 104 | logging.error(f"meet error after try convert type. remain original type. {e2}") 105 | 106 | return df_sample_raw 107 | 108 | 109 | def parse_sample_data_to_dataframe(data: List[Dict[str, str]], table_meta: Table) -> pd.DataFrame: 110 | if data is None: 111 | return pd.DataFrame({}) 112 | df_dict = {} 113 | for row in data: 114 | for col, val in row.items(): 115 | if col not in df_dict: 116 | df_dict[col] = [] 117 | 118 | df_dict[col].append(val) 119 | 120 | ret = pd.DataFrame(df_dict) 121 | # Create DataFrame with specific dtypes 122 | # N.T. 这里没有采用预先指定类型的方案 {col.name: pd.Series(dtype=xxx)} 因为这可能效率非常低 123 | return correct_df_type_by_mysql_type(ret, table_meta) 124 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/databases/mysql/explain_result.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | """ 5 | import pandas as pd 6 | from typing import List, Optional, Union 7 | from pydantic import BaseModel, Field 8 | 9 | from sub_platforms.sql_opt.common.pydantic_utils import PydanticDataClassJsonMixin 10 | 11 | 12 | class MySQLExplainItem(BaseModel, PydanticDataClassJsonMixin): 13 | id: Optional[int] = None 14 | select_type: Optional[str] = None 15 | table: Optional[str] = None 16 | partitions: Optional[str] = None 17 | type: Optional[str] = None 18 | possible_keys: Optional[str] = None 19 | key: Optional[str] = None 20 | key_len: Optional[int] = None 21 | ref: Optional[str] = None 22 | rows: Optional[int] = None 23 | filtered: Optional[Union[int, float]] = None 24 | extra: Optional[str] = None 25 | 26 | 27 | class MySQLExplainResult(BaseModel, PydanticDataClassJsonMixin): 28 | format: Optional[str] = None # json or None 29 | # if format is None, result fill in explain_items 30 | explain_items: List[MySQLExplainItem] = None 31 | # if format is json, fill the explain_json 32 | explain_json: Optional[dict] = None 33 | trace_dict: Optional[dict] = Field(default=None, exclude=True, skip_dumps=True) 34 | 35 | @staticmethod 36 | def from_df(explain_df: pd.DataFrame) -> 'MySQLExplainResult': 37 | """ 38 | 基于 df-like 的结果构造 MySQLExplainResult 39 | """ 40 | result = MySQLExplainResult() 41 | result.format = None 42 | result.explain_items = [] 43 | for rid, row in explain_df.iterrows(): 44 | item = MySQLExplainItem() 45 | item.id = row['id'] 46 | item.select_type = row['select_type'] 47 | item.table = row['table'] 48 | item.partitions = row['partitions'] 49 | item.type = row['type'] 50 | item.possible_keys = row['possible_keys'] 51 | item.key = row['key'] 52 | item.key_len = row['key_len'] 53 | item.ref = row['ref'] 54 | item.rows = row['rows'] 55 | item.filtered = row['filtered'] 56 | item.extra = row['Extra'] 57 | 58 | result.explain_items.append(item) 59 | return result 60 | 61 | def to_print(self, explain_format='normal'): 62 | """将 explain result 按照 MySQL output 的样子打印出来 63 | 64 | Args: 65 | explain_format (str, optional): [normal, tree, json]. Defaults to 'normal'. 66 | """ 67 | if explain_format not in ['normal']: 68 | raise NotImplementedError(f"{explain_format} haven't supported") 69 | if len(self.explain_items) == 0: 70 | return "empty explain result" 71 | 72 | key_max_len = max(len(str(it.key)) for it in self.explain_items) + 1 73 | table_max_len = max(len(str(it.table)) for it in self.explain_items) + 1 74 | key_len_max = max(len(str(it.key_len)) for it in self.explain_items) + 1 75 | ref_max_len = max(len(str(it.ref)) for it in self.explain_items) + 1 76 | rows_max_len = max(len(str(it.rows)) for it in self.explain_items) + 1 77 | filtered_max_len = max(len(str(it.filtered)) for it in self.explain_items) + 1 78 | extra_max_len = max(len(str(it.extra)) for it in self.explain_items) + 1 79 | 80 | res = [f"id\t{'select_type':>{12}}\t{'table':>{table_max_len}}\t{'key':>{key_max_len}}\t" 81 | f"{'key_len':>{key_len_max}}\t{'ref':>{ref_max_len}}\t{'rows':>{rows_max_len}}\t" 82 | f"{'filtered':>{filtered_max_len}}\t{'extra':>{extra_max_len}}\tpossible_keys"] 83 | 84 | for in_item in self.explain_items: 85 | in_item: MySQLExplainItem 86 | res.append( 87 | f"{in_item.id}\t{str(in_item.type):>{12}}" 88 | f"\t{str(in_item.table):>{table_max_len}}\t" 89 | f"{str(in_item.key):>{key_max_len}}\t" 90 | f"{str(in_item.key_len):>{key_len_max}}\t" 91 | f"{str(in_item.ref):>{ref_max_len}}\t" 92 | f"{str(in_item.rows):>{rows_max_len}}\t" 93 | f"{str(in_item.filtered):>{filtered_max_len}}\t" 94 | f"{str(in_item.extra):>{extra_max_len}}\t" 95 | f"{in_item.possible_keys}" 96 | ) 97 | return '\n'.join(res) 98 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/env/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | """ 5 | 6 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/histogram/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 4 | SPDX-License-Identifier: MIT 5 | """ -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/histogram/histogram_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 4 | SPDX-License-Identifier: MIT 5 | """ 6 | from sub_platforms.sql_opt.videx.videx_metadata import VidexTableStats 7 | 8 | def load_sample_file(table_stats: VidexTableStats): 9 | raise NotImplementedError("Will integrated in next stage") 10 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/sql_opt_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 4 | SPDX-License-Identifier: MIT 5 | """ 6 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/sql_opt_utils/sqlbrain_constants.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | """ 5 | 6 | SYSTEM_DB_LIST = [ 7 | 'INFORMATION_SCHEMA', 8 | 'PERFORMANCE_SCHEMA', 9 | 'MYSQL', 10 | 'SYS' 11 | ] 12 | 13 | UNSUPPORTED_MYSQL_DATATYPE = [ # not support Spatial Data Types 14 | "GEOMETRY", 15 | "POINT", 16 | "LINESTRING", 17 | "POLYGON", 18 | "MULTIPOINT", 19 | "MULTILINESTRING", 20 | "MULTIPOLYGON", 21 | "GEOMETRYCOLLECTION" 22 | ] 23 | 24 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/videx/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 4 | SPDX-License-Identifier: MIT 5 | """ 6 | 7 | if __name__ == '__main__': 8 | pass 9 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/videx/common/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 4 | SPDX-License-Identifier: MIT 5 | 6 | __init__.py 7 | """ 8 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/videx/model/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | """ -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/videx/model/videx_model_example.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | """ 5 | from typing import List 6 | 7 | from sub_platforms.sql_opt.videx.model.videx_model_innodb import VidexModelInnoDB 8 | from sub_platforms.sql_opt.videx.videx_metadata import VidexTableStats 9 | from sub_platforms.sql_opt.videx.model.videx_strategy import VidexStrategy 10 | from sub_platforms.sql_opt.videx.videx_utils import IndexRangeCond 11 | 12 | 13 | class VidexModelExample(VidexModelInnoDB): 14 | """ 15 | VidexModelExample estimates NDV, scan_time, and cardinality in a naive way. 16 | Unlike VidexModelInnoDB, VidexModelExample does not require statistics such as NDV, histograms, or where clauses, 17 | which can be costly to fetch. 18 | 19 | VidexModelExample inherits from VidexModelInnoDB regarding system variables, schema, and metadata information. 20 | Fetching this information is efficient since it only requires querying MySQL information tables once. 21 | 22 | The drawback of VidexModelExample is its inaccuracy in characterizing data distribution. 23 | However, we believe it's a good and simple demonstration for users to get started. 24 | 25 | References: 26 | MySQL, storage/example/ha_example.cc 27 | """ 28 | 29 | def __init__(self, stats: VidexTableStats, **kwargs): 30 | super().__init__(stats, **kwargs) 31 | self.strategy = VidexStrategy.example 32 | 33 | def scan_time(self, req_json_item: dict) -> float: 34 | return (self.table_stats.records + self.table_stats.deleted) / 20.0 + 10 35 | 36 | def get_memory_buffer_size(self, req_json_item: dict) -> int: 37 | return -1 38 | 39 | def cardinality(self, idx_range_cond: IndexRangeCond) -> int: 40 | """ 41 | corresponds to cardinality methods 42 | """ 43 | return 10 44 | 45 | def ndv(self, index_name, field_list: List[str]) -> int: 46 | return 1 47 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/videx/model/videx_strategy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 4 | SPDX-License-Identifier: MIT 5 | """ 6 | import enum 7 | import logging 8 | from abc import abstractmethod, ABC 9 | from typing import List, Dict, Optional 10 | 11 | from sub_platforms.sql_opt.meta import Index 12 | from sub_platforms.sql_opt.videx.videx_metadata import VidexTableStats 13 | from sub_platforms.sql_opt.videx.videx_utils import str_lower_eq, IndexRangeCond 14 | 15 | 16 | class VidexStrategy(enum.Enum): 17 | # Refer to MySQL example engine. It relies solely on MySQL system statistics, calculate cost in a simple way. 18 | example = "example" 19 | # Utilize both MySQL system statistics with InnoDB statistics 20 | innodb = "innodb" 21 | # A god's-eye view, aware of hard-to-obtain statistical details. 22 | # This type is for debugging: if InnoDB can't be perfectly mocked, it indicates that we miss some logic. 23 | ideal = "ideal" 24 | # sqlbrain 25 | sqlbrain = "sqlbrain" 26 | 27 | 28 | class VidexModelBase(ABC): 29 | """ 30 | Abstract cost model class. VIDEX-Statistic-Server receives requests from VIDEX-MySQL for Cardinality 31 | and NDV estimates, parses them into structured data for ease use of developers. 32 | 33 | Implement these methods to inject Cardinality and NDV algorithms into MySQL. 34 | """ 35 | def __init__(self, stats: VidexTableStats, strategy: VidexStrategy): 36 | self.table_stats: VidexTableStats = stats 37 | self.strategy: VidexStrategy = strategy 38 | 39 | @property 40 | def table_name(self): 41 | return self.table_stats.table_name 42 | 43 | @abstractmethod 44 | def cardinality(self, idx_range_cond: IndexRangeCond) -> int: 45 | """ 46 | Estimates the cardinality (number of rows matching a criteria) for a given index range condition. 47 | 48 | Parameters: 49 | idx_range_cond (IndexRangeCond): Condition object representing the index range. 50 | 51 | Returns: 52 | int: Estimated number of rows that match the condition. 53 | 54 | Example: 55 | where c1 = 3 and c2 < 3 and c2 > 1, ranges = [RangeCond(c1 = 3), RangeCond(c2 < 3 and c2 > 1)] 56 | """ 57 | pass 58 | 59 | @abstractmethod 60 | def ndv(self, index_name: str, field_list: List[str]) -> int: 61 | """ 62 | Estimates the number of distinct values (NDV) for specified fields within an index. 63 | 64 | Parameters: 65 | index_name (str): Name of the index. 66 | field_list (List[str]): List of fields for which NDV is to be estimated. 67 | 68 | Returns: 69 | int: Estimated number of distinct values. 70 | 71 | Example: 72 | index_name = 'idx_c1c2', field_list = ['c1', 'c2'] 73 | """ 74 | raise NotImplementedError() 75 | 76 | @abstractmethod 77 | def scan_time(self, req_json_item: dict) -> float: 78 | """ 79 | virtual double scan_time(); 80 | """ 81 | raise NotImplementedError() 82 | 83 | @abstractmethod 84 | def get_memory_buffer_size(self, req_json_item: dict) -> int: 85 | """ 86 | virtual double get_memory_buffer_size(); 87 | """ 88 | raise NotImplementedError() 89 | 90 | @abstractmethod 91 | def info_low(self, req_json_item: dict) -> int: 92 | """ 93 | virtual ull info(); 94 | """ 95 | raise NotImplementedError() 96 | 97 | def get_index_schema(self, index_name: str) -> Optional[Index]: 98 | if self.table_stats.table_meta and self.table_stats.table_meta.indexes: 99 | return next((idx for idx in self.table_stats.table_meta.indexes if idx.name == index_name), None) 100 | return None 101 | 102 | def records_in_range(self, req_json_item: dict) -> int: 103 | """ 104 | virtual ull records_in_range(); 105 | """ 106 | # parse key 107 | # assert str_lower_eq(req_json_item.get('properties').get('dbname'), self.stats.dbname) 108 | assert str_lower_eq(req_json_item.get('properties').get('table_name'), self.table_stats.table_name) 109 | assert len(req_json_item['data']) == 2 110 | min_key = req_json_item['data'][0] 111 | max_key = req_json_item['data'][1] 112 | assert min_key['item_type'] == 'min_key' 113 | assert max_key['item_type'] == 'max_key' 114 | index_name = min_key['properties'].get('index_name', max_key['properties'].get('index_name')) 115 | assert index_name is not None, f"both min and max key has no index_name, {req_json_item=}" 116 | 117 | idx_range_cond = IndexRangeCond.from_dict(min_key, max_key, 118 | index_meta=self.get_index_schema(index_name), 119 | ) 120 | 121 | """ 122 | 有 key 的格式如下: 123 | { 124 | "item_type": "min_key", 125 | "properties": { 126 | "index_name": "idx_S_DIST_10", 127 | "length": "24", 128 | "operator": ">" 129 | }, 130 | "data": [ 131 | { 132 | 'item_type': 'column_and_bound', 133 | 'properties': { 134 | 'column': 'S_YTD', 135 | 'value': '123.00' 136 | }, 137 | 'data': [] 138 | }, 139 | { 140 | 'item_type': 'column_and_bound', 141 | 'properties': { 142 | 'column': 'S_DIST_10', 143 | 'value': "'123'" 144 | }, 145 | 'data': [] 146 | } 147 | ] 148 | } 149 | NO_KEY_RANGE 的格式是下面这样: 150 | { 151 | "item_type": "max_key", 152 | "properties": {}, 153 | "data": [] 154 | } 155 | """ 156 | return self.cardinality(idx_range_cond) 157 | 158 | 159 | def record_range_request_to_str(min_key: dict, max_key: dict) -> str: 160 | """ 161 | `min_key` and `max_key` are derived from underlying MySQL function calls. 162 | Convert them into the standard format expression: `col OP val`, for comparison with trace data. 163 | Args: 164 | min_key: 165 | max_key: 166 | 167 | Returns: 168 | 169 | """ 170 | return "" 171 | 172 | 173 | def calc_mulcol_ndv_independent(col_names: List[str], ndvs_single: Dict[str, int], table_rows: int) -> int: 174 | """ 175 | Based on the assumption of independent distribution across multiple column NDVs, 176 | calculate the NDV for multiple columns from the single column NDV. 177 | 178 | Args: 179 | col_names: column name list 180 | ndvs_single: col_name -> ndv 181 | table_rows 182 | 183 | Returns: 184 | 185 | """ 186 | 187 | ndv_product = 1 188 | for col in col_names: 189 | if col in ndvs_single: 190 | ndv_product *= ndvs_single[col] 191 | else: 192 | logging.warning(f"Column {col} not found in ndvs_single when calc_mulcol_ndv_independent") 193 | # If a column ndv is missing, we tend to overestimate its cost, implying `ndv(col) as 1`, and `cardinality as table_rows`. 194 | ndv_product *= 1 195 | 196 | # The combined NDV cannot exceed the total number of rows in the table. 197 | return min(ndv_product, table_rows) 198 | 199 | 200 | if __name__ == '__main__': 201 | pass 202 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/videx/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 4 | SPDX-License-Identifier: MIT 5 | """ -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/videx/scripts/analyze/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 4 | SPDX-License-Identifier: MIT 5 | 6 | __init__.py 7 | """ 8 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/videx/scripts/analyze/analyze_delete_rows.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 4 | SPDX-License-Identifier: MIT 5 | 6 | CREATE TABLE `request_info` ( 7 | `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键 ID', 8 | `query` text COMMENT '请求query信息', 9 | `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '创建时间', 10 | PRIMARY KEY (`id`), 11 | KEY `idx_created_at` (`created_at`) 12 | ) ENGINE=InnoDB 13 | """ 14 | import logging 15 | import random 16 | import string 17 | import time 18 | from datetime import datetime, timedelta 19 | 20 | import pandas as pd 21 | 22 | from sub_platforms.sql_opt.env.rds_env import OpenMySQLEnv 23 | from sub_platforms.sql_opt.videx import videx_logging 24 | 25 | 26 | def generate_random_query(length=200): 27 | """generate random query only contains lowercase letters a-z""" 28 | return ''.join(random.choice(string.ascii_lowercase) for _ in range(length)) 29 | 30 | 31 | def get_system_info(parse_env): 32 | query = """ 33 | SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_ROWS, AVG_ROW_LENGTH, DATA_LENGTH, 34 | MAX_DATA_LENGTH, INDEX_LENGTH, DATA_FREE 35 | FROM information_schema.TABLES 36 | WHERE TABLE_NAME = 'request_info' and ENGINE = 'InnoDB' 37 | """ 38 | 39 | res = parse_env.query_for_dataframe(query) 40 | res = res.iloc[0].to_dict() 41 | 42 | query = """ 43 | select TABLE_NAME, N_ROWS,CLUSTERED_INDEX_SIZE, SUM_OF_OTHER_INDEX_SIZES 44 | from `mysql`.`innodb_table_stats` where TABLE_NAME = 'request_info' 45 | """ 46 | 47 | res2 = parse_env.query_for_dataframe(query) 48 | res2 = res2.iloc[0].to_dict() 49 | res.update(res2) 50 | return res 51 | 52 | 53 | def insert_batch_data(parse_env, current_time, batch_size): 54 | queries = [generate_random_query() for _ in range(batch_size)] 55 | 56 | df = pd.DataFrame({ 57 | 'created_at': current_time, 58 | 'query': queries 59 | }) 60 | 61 | insert_sql = "INSERT INTO request_info (created_at, query) VALUES " 62 | values = [] 63 | for _, row in df.iterrows(): 64 | values.append(f"('{row['created_at']}', '{row['query']}')") 65 | 66 | insert_sql += ",".join(values) 67 | 68 | parse_env.execute(insert_sql) 69 | return len(df) 70 | 71 | 72 | def delete_old_data(parse_env, current_time, days): 73 | delete_time = current_time - timedelta(days=days) 74 | delete_sql = f"DELETE FROM request_info WHERE created_at < '{delete_time}'" 75 | 76 | start_time = time.time() 77 | result = parse_env.execute(delete_sql) 78 | delete_duration = time.time() - start_time 79 | 80 | return result, delete_duration 81 | 82 | 83 | def main(): 84 | videx_logging.initial_config() 85 | scale_factor = 10000 86 | start_time = datetime(2022, 1, 1) # 从2024年1月1日开始 87 | end_time = start_time + timedelta(days=300) 88 | batch_size = int(scale_factor) 89 | 90 | # 数据库连接 91 | parse_env = None # blabla 92 | 93 | parse_env.execute("drop table if exists request_info") 94 | 95 | has_create_index = True 96 | logging.info(f"#@#@# has_create_index: {has_create_index}") 97 | if has_create_index: 98 | create_ddl = """ 99 | CREATE TABLE `request_info` ( 100 | `id` bigint NOT NULL AUTO_INCREMENT COMMENT 'pk ID', 101 | `query` text COMMENT 'request query', 102 | `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '创建时间', 103 | PRIMARY KEY (`id`), 104 | KEY `idx_created_at` (`created_at`) 105 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci 106 | """ 107 | else: 108 | create_ddl = """ 109 | CREATE TABLE `request_info` ( 110 | `id` bigint NOT NULL AUTO_INCREMENT COMMENT 'pk ID', 111 | `query` text COMMENT 'request query', 112 | `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '创建时间', 113 | PRIMARY KEY (`id`) 114 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci 115 | """ 116 | parse_env.execute(create_ddl) 117 | 118 | last_check_time = start_time 119 | total_inserted = 0 120 | current_time = start_time 121 | 122 | try: 123 | while current_time < end_time: 124 | # insert data each minute 125 | inserted = insert_batch_data(parse_env, current_time, batch_size) 126 | total_inserted += inserted 127 | logging.info(f"Time: {current_time}, Inserted {inserted} records, total: {total_inserted}") 128 | 129 | # Check the system information every two days. 130 | if (current_time - last_check_time).days >= 2: 131 | logging.info(f"\n=== System Info Before Deletion at {current_time} ===") 132 | system_info = get_system_info(parse_env) 133 | logging.info(system_info) 134 | 135 | deleted, delete_duration = delete_old_data(parse_env, current_time, 2) 136 | logging.info(f"\nDeleted {deleted} records in {delete_duration:.2f} seconds") 137 | 138 | logging.info(f"\n=== System Info After Deletion at {current_time} ===") 139 | system_info = get_system_info(parse_env) 140 | logging.info(system_info) 141 | 142 | last_check_time = current_time 143 | 144 | current_time += timedelta(days=1) 145 | 146 | except KeyboardInterrupt: 147 | logging.info("\nScript interrupted by user") 148 | finally: 149 | logging.info(f"\nFinal System Info at {current_time}:") 150 | system_info = get_system_info(parse_env) 151 | logging.info(system_info) 152 | 153 | 154 | if __name__ == "__main__": 155 | main() 156 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/videx/scripts/analyze/analyze_linear_distribution.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | 5 | Function description: 6 | 1. Obtain the event_time data from the stream_event table. 7 | 2. Analyze the distribution of event_time: 8 | - Calculate the correlation coefficient between the actual data and the theoretical uniform distribution. 9 | - Draw a cumulative distribution plot to compare the actual distribution with the uniform distribution. 10 | - Output the basic statistical information. 11 | 3. Visualize the results to help determine whether the data conforms to a uniform distribution. 12 | 13 | """ 14 | 15 | import matplotlib.pyplot as plt 16 | import numpy as np 17 | 18 | from sub_platforms.sql_opt.env.rds_env import OpenMySQLEnv 19 | 20 | 21 | def fetch_data(env): 22 | query = "SELECT event_time FROM stream_event where event_time > '2000-01-01' and event_time < '2030-01-01'" 23 | df = env.query_for_dataframe(query) 24 | return df['event_time'] 25 | 26 | 27 | def analyze_distribution(data): 28 | # convert timestamp to relative time in hours 29 | min_time = data.min() 30 | time_deltas = [(t - min_time).total_seconds() / 3600 for t in data] # 转换为小时 31 | sorted_data = np.sort(time_deltas) 32 | 33 | cumulative_prob = np.arange(1, len(sorted_data) + 1) / len(sorted_data) 34 | 35 | uniform_theoretical = np.linspace(min(sorted_data), max(sorted_data), len(sorted_data)) 36 | uniform_prob = np.arange(1, len(uniform_theoretical) + 1) / len(uniform_theoretical) 37 | 38 | correlation = np.corrcoef(sorted_data, cumulative_prob)[0,1] 39 | 40 | plt.figure(figsize=(10, 6)) 41 | plt.scatter(sorted_data, cumulative_prob, alpha=0.5, label='Actual Distribution') 42 | plt.plot(uniform_theoretical, uniform_prob, 'r--', label='Uniform Distribution') 43 | 44 | plt.xlabel('Hours since start') 45 | plt.ylabel('Cumulative Probability') 46 | plt.title(f'Cumulative Distribution of Event Times\nCorrelation with Uniform: {correlation:.4f}') 47 | plt.legend() 48 | plt.grid(True) 49 | 50 | plt.figtext(0.02, 0.02, f'Start time: {min_time}', size='small') 51 | 52 | plt.show() 53 | 54 | return correlation 55 | 56 | 57 | def main(): 58 | env = None # blabla 59 | 60 | data = fetch_data(env) 61 | 62 | if not data.empty: 63 | correlation = analyze_distribution(data) 64 | print(f"Correlation coefficient with uniform distribution: {correlation:.4f}") 65 | 66 | print("\nBasic statistics:") 67 | print(f"Count: {len(data)}") 68 | print(f"Mean: {data.mean():.2f}") 69 | print(f"Std: {data.std()}") 70 | print(f"Min: {data.min():.2f}") 71 | print(f"Max: {data.max():.2f}") 72 | 73 | 74 | if __name__ == "__main__": 75 | main() 76 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/videx/scripts/analyze/analyze_trace_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 4 | SPDX-License-Identifier: MIT 5 | """ 6 | import json 7 | 8 | 9 | def process_unknown_dict(unknown_dict): 10 | """process dict starting with unknown """ 11 | processed = {} 12 | if 'request' in unknown_dict: 13 | try: 14 | request_json = json.loads(unknown_dict['request']) 15 | if 'properties' in request_json and 'function' in request_json['properties']: 16 | function_full = request_json['properties']['function'] 17 | # 按 :: 分割并取最后一部分 18 | function_name = function_full.split('::')[-1].strip('()') 19 | processed['function'] = function_name 20 | except json.JSONDecodeError: 21 | processed['function'] = None 22 | 23 | if 'detail' in unknown_dict: 24 | try: 25 | # processed['detail'] = json.loads(unknown_dict['detail']) 26 | detail = json.loads(unknown_dict['detail']) 27 | new_detail = {'data': detail.get('data', {}).get('value', None), 'message': detail.get('message')} 28 | processed['detail'] = new_detail 29 | except json.JSONDecodeError: 30 | processed['detail'] = unknown_dict['detail'] 31 | 32 | return processed 33 | 34 | 35 | def process_videx_info_recursively(d): 36 | if not isinstance(d, dict): 37 | return d 38 | 39 | result = {} 40 | for key, value in d.items(): 41 | if key.startswith('unknown_key_'): 42 | number = key.split('_')[-1] 43 | new_key = f'videx_{number}' 44 | result[new_key] = process_unknown_dict(value) 45 | else: 46 | if isinstance(value, dict): 47 | result[key] = process_videx_info_recursively(value) 48 | elif isinstance(value, list): 49 | result[key] = [process_videx_info_recursively(item) if isinstance(item, dict) else item for item in 50 | value] 51 | else: 52 | result[key] = value 53 | 54 | return result 55 | 56 | 57 | def find_key_paths(data, current_path=None, result=None): 58 | if current_path is None: 59 | current_path = [] 60 | if result is None: 61 | result = [] 62 | 63 | if isinstance(data, dict): 64 | # 如果遇到 attached_conditions_computation,保存整个字典 65 | if 'attached_conditions_computation' in data: 66 | result.append({ 67 | 'path': current_path + ['attached_conditions_computation'], 68 | 'data': data # 保存整个包含 attached_conditions_computation 的字典 69 | }) 70 | return result # 提前返回,不再继续递归处理这个分支 71 | 72 | # 检查是否包含其他关键的分析信息 73 | if 'range_analysis' in data and 'table_scan' in data['range_analysis']: 74 | result.append({ 75 | 'path': current_path + ['range_analysis', 'table_scan'], 76 | 'data': data['range_analysis']['table_scan'] 77 | }) 78 | 79 | if 'range_analysis' in data and 'analyzing_range_alternatives' in data['range_analysis']: 80 | if 'range_scan_alternatives' in data['range_analysis']['analyzing_range_alternatives']: 81 | result.append({ 82 | 'path': current_path + ['range_analysis', 'analyzing_range_alternatives', 83 | 'range_scan_alternatives'], 84 | 'data': data['range_analysis']['analyzing_range_alternatives']['range_scan_alternatives'] 85 | }) 86 | 87 | if 'considered_execution_plans' in data and 'best_access_path' in data['considered_execution_plans']: 88 | result.append({ 89 | 'path': current_path + ['considered_execution_plans', 'best_access_path'], 90 | 'data': data['considered_execution_plans']['best_access_path'] 91 | }) 92 | 93 | for key, value in data.items(): 94 | find_key_paths(value, current_path + [key], result) 95 | 96 | elif isinstance(data, list): 97 | for item in data: 98 | find_key_paths(item, current_path, result) 99 | 100 | return result 101 | 102 | 103 | def extract_key_trace_info(trace): 104 | """ 105 | Extract key trace info from trace dict. 106 | The key parts include range_analysis, best_access_path, considered_execution_plans, 107 | and complete attached_conditions_computation structure. 108 | All of these reveal the reasons for index selection. 109 | 110 | :param trace: 111 | :return: 112 | """ 113 | paths_and_data = find_key_paths(trace) 114 | 115 | result = {} 116 | for path_info in paths_and_data: 117 | current = result 118 | 119 | for i, path_part in enumerate(path_info['path'][:-1]): 120 | if isinstance(current, list): 121 | if not current: 122 | current.append({}) 123 | current = current[-1] 124 | 125 | if path_part not in current: 126 | if path_part == 'steps': 127 | current[path_part] = [] 128 | else: 129 | current[path_part] = {} 130 | current = current[path_part] 131 | 132 | if isinstance(current, list): 133 | current.append(path_info['data']) 134 | else: 135 | current[path_info['path'][-1]] = path_info['data'] 136 | 137 | result = process_videx_info_recursively(result) 138 | return result 139 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/videx/scripts/clear_mysql57_env.sql: -------------------------------------------------------------------------------- 1 | -- reset optimizer cost back to MySQL 8.0 default value 2 | SET GLOBAL optimizer_switch='subquery_to_derived=on'; 3 | SET GLOBAL optimizer_switch='block_nested_loop=on,hash_join=on'; 4 | SET GLOBAL optimizer_switch='semijoin=on'; 5 | SET GLOBAL optimizer_switch='firstmatch=on,loosescan=on,duplicateweedout=on'; 6 | SET GLOBAL optimizer_switch='materialization=on'; 7 | 8 | -- reset mysql.server_cost back to MySQL 8.0 default value 9 | UPDATE mysql.server_cost SET cost_value = DEFAULT; 10 | FLUSH OPTIMIZER_COSTS; -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/videx/scripts/setup_mysql57_env.sql: -------------------------------------------------------------------------------- 1 | -- modify optimizer cost for mysql5.7 2 | SET GLOBAL optimizer_switch='subquery_to_derived=off'; 3 | SET GLOBAL optimizer_switch='block_nested_loop=on,hash_join=off'; 4 | SET GLOBAL optimizer_switch='semijoin=off'; 5 | SET GLOBAL optimizer_switch='firstmatch=off,loosescan=off,duplicateweedout=off'; 6 | SET GLOBAL optimizer_switch='materialization=off'; 7 | 8 | 9 | -- modify mysql.server_cost for mysql5.7 10 | UPDATE mysql.server_cost 11 | SET cost_value = 40.0, 12 | last_update = NOW(), 13 | comment = 'Modified to 2x default value for mysql5.7' 14 | WHERE cost_name = 'disk_temptable_create_cost'; 15 | 16 | UPDATE mysql.server_cost 17 | SET cost_value = 1.0, 18 | last_update = NOW(), 19 | comment = 'Modified to 2x default value for mysql5.7' 20 | WHERE cost_name = 'disk_temptable_row_cost'; 21 | 22 | UPDATE mysql.server_cost 23 | SET cost_value = 0.1, 24 | last_update = NOW(), 25 | comment = 'Modified to 2x default value for mysql5.7' 26 | WHERE cost_name = 'key_compare_cost'; 27 | 28 | UPDATE mysql.server_cost 29 | SET cost_value = 2.0, 30 | last_update = NOW(), 31 | comment = 'Modified to 2x default value for mysql5.7' 32 | WHERE cost_name = 'memory_temptable_create_cost'; 33 | 34 | UPDATE mysql.server_cost 35 | SET cost_value = 0.2, 36 | last_update = NOW(), 37 | comment = 'Modified to 2x default value for mysql5.7' 38 | WHERE cost_name = 'memory_temptable_row_cost'; 39 | 40 | UPDATE mysql.server_cost 41 | SET cost_value = 0.2, 42 | last_update = NOW(), 43 | comment = 'Modified to 2x default value for mysql5.7' 44 | WHERE cost_name = 'row_evaluate_cost'; 45 | 46 | FLUSH OPTIMIZER_COSTS; -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/videx/scripts/start_videx_server.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | """ 5 | import argparse 6 | from typing import Type 7 | 8 | from sub_platforms.sql_opt.videx.videx_metadata import PCT_CACHED_MODE_PREFER_META 9 | from sub_platforms.sql_opt.videx.videx_service import startup_videx_server 10 | from sub_platforms.sql_opt.videx.model.videx_strategy import VidexStrategy, VidexModelBase 11 | from sub_platforms.sql_opt.videx.model.videx_model_innodb import VidexModelInnoDB 12 | from sub_platforms.sql_opt.videx.model.videx_model_example import VidexModelExample 13 | 14 | if __name__ == '__main__': 15 | """ 16 | Examples: 17 | python start_videx_server.py --port 5001 18 | """ 19 | parser = argparse.ArgumentParser(description='Start the Videx stats server.') 20 | parser.add_argument('--server_ip', type=str, default='0.0.0.0', help='The IP address to bind the server to.') 21 | parser.add_argument('--debug', action='store_true', help='Run the server in debug mode.') 22 | parser.add_argument('--port', type=int, default=5001, help='The port number to run the server on.') 23 | parser.add_argument('--strategy', type=str, default="innodb", help='innodb or other model') 24 | parser.add_argument('--cache_pct', type=float, default=PCT_CACHED_MODE_PREFER_META, 25 | help='Table loaded cache percentage can significantly impact table scan costs. ' 26 | 'If set to -1, it prefers to use values calculated from the system table. ' 27 | 'If set to a float between 0 and 1, it forces the use of the specified value.') 28 | 29 | args = parser.parse_args() 30 | 31 | MainVidexModelClass: Type[VidexModelBase] 32 | """ 33 | N.B. You can inherit VidexModeInnoDB, and then re-implement ndv and cardinality method. 34 | """ 35 | if args.strategy == VidexStrategy.example.value: 36 | MainVidexModelClass = VidexModelExample 37 | elif args.strategy == VidexStrategy.innodb.value: 38 | MainVidexModelClass = VidexModelInnoDB 39 | else: 40 | raise NotImplementedError(f"Unsupported strategy: {args.strategy}") 41 | 42 | startup_videx_server(start_ip=args.server_ip, debug=args.debug, port=args.port, 43 | VidexModelClass=MainVidexModelClass, 44 | cache_pct=args.cache_pct, 45 | ) 46 | -------------------------------------------------------------------------------- /src/sub_platforms/sql_opt/videx/videx_logging.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 3 | SPDX-License-Identifier: MIT 4 | 5 | only for open-source, not for SQLBrain 6 | """ 7 | import logging 8 | import logging.config 9 | import logging.handlers 10 | import os 11 | import threading 12 | 13 | import six 14 | import yaml 15 | 16 | videx_log_context = threading.local() 17 | videx_log_context.__setattr__('videx_trace_id', '-') 18 | videx_log_context.__setattr__('x_tt_log_id', '-') 19 | videx_log_context.__setattr__('x_tt_trace_id', '-') 20 | 21 | 22 | def get_trace_id(): 23 | if hasattr(videx_log_context, 'videx_trace_id'): 24 | return videx_log_context.videx_trace_id 25 | else: 26 | return '-' 27 | 28 | 29 | class VidexTraceIdFilter(logging.Filter): 30 | def filter(self, record): 31 | trace_id = get_trace_id() 32 | record._videx_trace_id = six.ensure_text(trace_id) 33 | for key in videx_log_context.__dict__.keys(): 34 | if key == 'videx_trace_id': 35 | continue 36 | else: 37 | setattr(record, key, getattr(videx_log_context, key)) 38 | return True 39 | 40 | 41 | def set_thread_trace_id(trace_id: str): 42 | """ 43 | 设置日志TraceId,线程内有效 44 | Args: 45 | trace_id: trace_id 46 | Returns: 47 | None 48 | """ 49 | videx_log_context.videx_trace_id = trace_id 50 | 51 | 52 | def set_thread_local_property(property_name: str, property_value: str): 53 | """ 54 | 设置日志 roperty,线程内有效 55 | Args: 56 | Returns: 57 | None 58 | """ 59 | setattr(videx_log_context, property_name, property_value) 60 | 61 | 62 | def get_thread_local_property(property_name: str): 63 | """ 64 | 设置日志roperty,线程内有效 65 | Args: 66 | Returns: 67 | None 68 | """ 69 | return getattr(videx_log_context, property_name) 70 | 71 | 72 | def _read_config_from_file(file_path, log_file_prefix: str = None): 73 | print(os.getcwd()) 74 | if not os.path.exists(file_path): 75 | raise Exception(f'not find log config file {file_path}') 76 | 77 | with open(file_path, 'rt') as f: 78 | config = yaml.safe_load(f.read()) 79 | videx_trace_id_filters = { 80 | "videx_trace_filter": { 81 | "()": VidexTraceIdFilter, 82 | } 83 | } 84 | if 'filters' in config: 85 | config.get('filters').update(videx_trace_id_filters) 86 | else: 87 | config['filters'] = videx_trace_id_filters 88 | 89 | log_path = config.pop('log_path', './') 90 | try: 91 | if not os.path.exists(log_path): 92 | os.makedirs(log_path) 93 | except Exception as e1: 94 | print(f'can not create {log_path} {e1}') 95 | 96 | log_file_prefix_in_config = config.pop('log_file_prefix', 'videx_app') 97 | if log_file_prefix is not None: 98 | log_file_prefix_in_config = log_file_prefix 99 | 100 | for k, v in config.get('handlers', {}).items(): 101 | if 'filters' in v: 102 | v.get('filters').append('videx_trace_filter') 103 | else: 104 | v['filters'] = ['videx_trace_filter'] 105 | 106 | if 'filename' in v: 107 | v['filename'] = os.path.join(log_path, 108 | v['filename'].replace('${log_file_prefix}', log_file_prefix_in_config)) 109 | 110 | return config 111 | 112 | 113 | def default_videx_logging_config(log_file_prefix: str, log_path: str): 114 | return { 115 | 'version': 1, 116 | 'formatters': { 117 | 'default': { 118 | 'format': '%(asctime)s [%(process)d:%(thread)d] %(levelname)-8s %(name)-15s [%(filename)s:%(lineno)d] %(_videx_trace_id)s %(message)s', 119 | 'datefmt': '%Y-%m-%d %H:%M:%S' 120 | } 121 | }, 122 | 'filters': { 123 | 'videx_trace_filter': { 124 | '()': VidexTraceIdFilter 125 | }, 126 | }, 127 | 'handlers': { 128 | 'console': { 129 | 'class': 'logging.StreamHandler', 130 | 'formatter': 'default', 131 | 'level': 'INFO', 132 | 'stream': 'ext://sys.stdout', 133 | 'filters': ['videx_trace_filter'] 134 | }, 135 | 'info_log_file': { 136 | 'class': 'logging.handlers.RotatingFileHandler', 137 | 'formatter': 'default', 138 | 'filename': f'{log_path}/{log_file_prefix}_info.log', 139 | 'maxBytes': 100000000, 140 | 'backupCount': 5, 141 | 'filters': ['videx_trace_filter'] 142 | }, 143 | 'error_log_file': { 144 | 'class': 'logging.handlers.RotatingFileHandler', 145 | 'formatter': 'default', 146 | 'filename': f'{log_path}/{log_file_prefix}_error.log', 147 | 'maxBytes': 100000000, 148 | 'backupCount': 5, 149 | 'filters': ['videx_trace_filter'] 150 | }, 151 | }, 152 | 'root': { 153 | 'level': 'NOTSET', 154 | 'handlers': ['console', 'info_log_file'], 155 | 'propagate': False 156 | }, 157 | 'loggers': { 158 | 'error_logger': { 159 | 'level': 'ERROR', 160 | 'handlers': ['error_log_file'], 161 | 'propagate': True 162 | } 163 | } 164 | } 165 | 166 | 167 | def initial_config(config_file: str = None, log_file_prefix: str = 'videx_app', log_path: str = './log'): 168 | """ 169 | 根据配置文件初始化日志配置 170 | Args: 171 | config_file: 配置文件地址,默认读取根目录log_config.yaml 172 | log_file_prefix: 日志文件前缀,默认videx_app 173 | log_path: 日志路径,默认 ./logs 174 | 175 | Returns: 176 | None 177 | """ 178 | if config_file is None: 179 | config = default_videx_logging_config(log_file_prefix=log_file_prefix, log_path=log_path) 180 | else: 181 | config = _read_config_from_file(config_file, log_file_prefix=log_file_prefix) 182 | 183 | try: 184 | os.makedirs(log_path, exist_ok=True) 185 | except Exception as e: 186 | print(f'can not create {log_path=} {e}') 187 | 188 | print('logging config: ', config) 189 | logging.config.dictConfig(config) 190 | 191 | 192 | if __name__ == '__main__': 193 | initial_config(log_file_prefix='try_videx', log_path='./videx_logs') 194 | import time 195 | set_thread_trace_id(f"<>") 196 | for i in range(5): 197 | logging.info(f"info {i}") 198 | logging.warning(f"info {i}") 199 | logging.error(f"info {i}") 200 | time.sleep(1) 201 | -------------------------------------------------------------------------------- /test/videx/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/videx/6ccab9a4b8339d1cf96e20385414522dee349478/test/videx/__init__.py -------------------------------------------------------------------------------- /test/videx/data/test_imdbload_1024_b10/videx_imdbload_ndv_single.json: -------------------------------------------------------------------------------- 1 | { 2 | "aka_name": { 3 | "id": 901343, 4 | "imdb_index": 6, 5 | "md5sum": 810625, 6 | "name": 801037, 7 | "name_pcode_cf": 22018, 8 | "name_pcode_nf": 21139, 9 | "person_id": 588222, 10 | "surname_pcode": 4313 11 | }, 12 | "aka_title": { 13 | "episode_nr": 163, 14 | "episode_of_id": 755, 15 | "id": 361472, 16 | "imdb_index": 13, 17 | "kind_id": 6, 18 | "md5sum": 346289, 19 | "movie_id": 205631, 20 | "note": 3349, 21 | "phonetic_code": 20014, 22 | "production_year": 129, 23 | "season_nr": 41, 24 | "title": 306166 25 | }, 26 | "cast_info": { 27 | "id": 36244344, 28 | "movie_id": 2331601, 29 | "note": 707388, 30 | "nr_order": 1094, 31 | "person_id": 4051810, 32 | "person_role_id": 3140340, 33 | "role_id": 11 34 | }, 35 | "char_name": { 36 | "id": 3140339, 37 | "imdb_id": 1, 38 | "imdb_index": 7, 39 | "md5sum": 3140339, 40 | "name": 3078757, 41 | "name_pcode_nf": 23077, 42 | "surname_pcode": 17199 43 | }, 44 | "comp_cast_type": { 45 | "id": 4, 46 | "kind": 4 47 | }, 48 | "company_name": { 49 | "country_code": 216, 50 | "id": 234997, 51 | "imdb_id": 1, 52 | "md5sum": 234997, 53 | "name": 222968, 54 | "name_pcode_nf": 18487, 55 | "name_pcode_sf": 17447 56 | }, 57 | "company_type": { 58 | "id": 4, 59 | "kind": 4 60 | }, 61 | "complete_cast": { 62 | "id": 135086, 63 | "movie_id": 93514, 64 | "status_id": 2, 65 | "subject_id": 2 66 | }, 67 | "info_type": { 68 | "id": 113, 69 | "info": 113 70 | }, 71 | "keyword": { 72 | "id": 134170, 73 | "keyword": 134135, 74 | "phonetic_code": 15482 75 | }, 76 | "kind_type": { 77 | "id": 7, 78 | "kind": 7 79 | }, 80 | "link_type": { 81 | "id": 18, 82 | "link": 18 83 | }, 84 | "movie_companies": { 85 | "company_id": 234997, 86 | "company_type_id": 2, 87 | "id": 2609129, 88 | "movie_id": 1087236, 89 | "note": 65506 90 | }, 91 | "movie_info": { 92 | "id": 14835720, 93 | "info": 2719293, 94 | "info_type_id": 71, 95 | "movie_id": 2468825, 96 | "note": 131356 97 | }, 98 | "movie_info_idx": { 99 | "id": 1380035, 100 | "info": 146245, 101 | "info_type_id": 5, 102 | "movie_id": 459925, 103 | "note": 1 104 | }, 105 | "movie_keyword": { 106 | "id": 4523930, 107 | "keyword_id": 134170, 108 | "movie_id": 476794 109 | }, 110 | "movie_link": { 111 | "id": 29997, 112 | "link_type_id": 16, 113 | "linked_movie_id": 16169, 114 | "movie_id": 6411 115 | }, 116 | "name": { 117 | "gender": 3, 118 | "id": 4167491, 119 | "imdb_id": 1, 120 | "imdb_index": 180, 121 | "md5sum": 4167491, 122 | "name": 3567986, 123 | "name_pcode_cf": 23604, 124 | "name_pcode_nf": 23262, 125 | "surname_pcode": 4672 126 | }, 127 | "person_info": { 128 | "id": 2963664, 129 | "info": 1921778, 130 | "info_type_id": 22, 131 | "note": 49327, 132 | "person_id": 550721 133 | }, 134 | "role_type": { 135 | "id": 12, 136 | "role": 12 137 | }, 138 | "title": { 139 | "episode_nr": 14907, 140 | "episode_of_id": 51482, 141 | "id": 2528312, 142 | "imdb_id": 1, 143 | "imdb_index": 26, 144 | "kind_id": 7, 145 | "md5sum": 2528311, 146 | "phonetic_code": 23260, 147 | "production_year": 133, 148 | "season_nr": 97, 149 | "series_years": 1409, 150 | "title": 1476893 151 | } 152 | } -------------------------------------------------------------------------------- /test/videx/data/test_info_item.json: -------------------------------------------------------------------------------- 1 | { 2 | "item_type": "videx_request", 3 | "properties": { 4 | "dbname": "tpcc_4", 5 | "function": "virtual int ha_videx::info_low(uint, bool)", 6 | "table_name": "ITEM", 7 | "target_storage_engine": "INNODB" 8 | }, 9 | "data": [ 10 | { 11 | "item_type": "key", 12 | "properties": { 13 | "key_length": "4", 14 | "name": "PRIMARY" 15 | }, 16 | "data": [ 17 | { 18 | "item_type": "field", 19 | "properties": { 20 | "name": "I_ID", 21 | "store_length": "4" 22 | }, 23 | "data": [] 24 | } 25 | ] 26 | }, 27 | { 28 | "item_type": "key", 29 | "properties": { 30 | "key_length": "7", 31 | "name": "idx_I_IM_ID_I_PRICE" 32 | }, 33 | "data": [ 34 | { 35 | "item_type": "field", 36 | "properties": { 37 | "name": "I_IM_ID", 38 | "store_length": "4" 39 | }, 40 | "data": [] 41 | }, 42 | { 43 | "item_type": "field", 44 | "properties": { 45 | "name": "I_PRICE", 46 | "store_length": "3" 47 | }, 48 | "data": [] 49 | } 50 | ] 51 | }, 52 | { 53 | "item_type": "key", 54 | "properties": { 55 | "key_length": "4", 56 | "name": "idx_I_IM_ID" 57 | }, 58 | "data": [ 59 | { 60 | "item_type": "field", 61 | "properties": { 62 | "name": "I_IM_ID", 63 | "store_length": "4" 64 | }, 65 | "data": [] 66 | } 67 | ] 68 | }, 69 | { 70 | "item_type": "key", 71 | "properties": { 72 | "key_length": "26", 73 | "name": "idx_I_NAME" 74 | }, 75 | "data": [ 76 | { 77 | "item_type": "field", 78 | "properties": { 79 | "name": "I_NAME", 80 | "store_length": "26" 81 | }, 82 | "data": [] 83 | } 84 | ] 85 | } 86 | ] 87 | } -------------------------------------------------------------------------------- /test/videx/data/test_info_item2.json: -------------------------------------------------------------------------------- 1 | { 2 | "item_type": "videx_request", 3 | "properties": { 4 | "dbname": "videx_tpch", 5 | "function": "virtual int ha_videx::info_low(uint, bool)", 6 | "table_name": "lineitem", 7 | "target_storage_engine": "INNODB", 8 | "videx_options": "{\"task_id\": \"127_0_0_1_13308@@@demo_tpch\", \"use_gt\": true}" 9 | }, 10 | "data": [ 11 | { 12 | "item_type": "key", 13 | "properties": { 14 | "key_length": "4", 15 | "name": "PRIMARY" 16 | }, 17 | "data": [ 18 | { 19 | "item_type": "field", 20 | "properties": { 21 | "name": "L_ID", 22 | "store_length": "4" 23 | }, 24 | "data": [] 25 | } 26 | ] 27 | }, 28 | { 29 | "item_type": "key", 30 | "properties": { 31 | "key_length": "8", 32 | "name": "LINEITEM_UK1" 33 | }, 34 | "data": [ 35 | { 36 | "item_type": "field", 37 | "properties": { 38 | "name": "L_ORDERKEY", 39 | "store_length": "4" 40 | }, 41 | "data": [] 42 | }, 43 | { 44 | "item_type": "field", 45 | "properties": { 46 | "name": "L_LINENUMBER", 47 | "store_length": "4" 48 | }, 49 | "data": [] 50 | } 51 | ] 52 | }, 53 | { 54 | "item_type": "key", 55 | "properties": { 56 | "key_length": "4", 57 | "name": "LINEITEM_FK1" 58 | }, 59 | "data": [ 60 | { 61 | "item_type": "field", 62 | "properties": { 63 | "name": "L_ORDERKEY", 64 | "store_length": "4" 65 | }, 66 | "data": [] 67 | }, 68 | { 69 | "item_type": "field", 70 | "properties": { 71 | "name": "L_ID", 72 | "store_length": "4" 73 | }, 74 | "data": [] 75 | } 76 | ] 77 | }, 78 | { 79 | "item_type": "key", 80 | "properties": { 81 | "key_length": "8", 82 | "name": "LINEITEM_FK2" 83 | }, 84 | "data": [ 85 | { 86 | "item_type": "field", 87 | "properties": { 88 | "name": "L_PARTKEY", 89 | "store_length": "4" 90 | }, 91 | "data": [] 92 | }, 93 | { 94 | "item_type": "field", 95 | "properties": { 96 | "name": "L_SUPPKEY", 97 | "store_length": "4" 98 | }, 99 | "data": [] 100 | }, 101 | { 102 | "item_type": "field", 103 | "properties": { 104 | "name": "L_ID", 105 | "store_length": "4" 106 | }, 107 | "data": [] 108 | } 109 | ] 110 | } 111 | ] 112 | } 113 | -------------------------------------------------------------------------------- /test/videx/data/test_result_range_rows_gt.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "index": "idx_I_IM_ID", 4 | "ranges": [ 5 | "I_IM_ID = 70", 6 | "I_IM_ID = 80" 7 | ], 8 | "index_dives_for_eq_ranges": true, 9 | "rowid_ordered": false, 10 | "using_mrr": false, 11 | "index_only": true, 12 | "in_memory": 0.0119048, 13 | "rows": 21, 14 | "cost": 3.12041, 15 | "chosen": true, 16 | "table": "`item`" 17 | }, 18 | { 19 | "index": "idx_s_test", 20 | "ranges": [ 21 | "I_IM_ID = 70", 22 | "I_IM_ID = 80" 23 | ], 24 | "index_dives_for_eq_ranges": true, 25 | "rowid_ordered": false, 26 | "using_mrr": false, 27 | "index_only": true, 28 | "in_memory": 0.00980392, 29 | "rows": 21, 30 | "cost": 3.1293, 31 | "chosen": false, 32 | "cause": "cost", 33 | "table": "`item`" 34 | }, 35 | { 36 | "index": "idx_test_im", 37 | "ranges": [ 38 | "I_IM_ID = 70", 39 | "I_IM_ID = 80" 40 | ], 41 | "index_dives_for_eq_ranges": true, 42 | "rowid_ordered": false, 43 | "using_mrr": false, 44 | "index_only": true, 45 | "in_memory": 0.00980392, 46 | "rows": 21, 47 | "cost": 3.1293, 48 | "chosen": false, 49 | "cause": "cost", 50 | "table": "`item`" 51 | }, 52 | { 53 | "index": "1_2_idx_I_IM_ID", 54 | "ranges": [ 55 | "I_IM_ID = 70", 56 | "I_IM_ID = 80" 57 | ], 58 | "index_dives_for_eq_ranges": true, 59 | "rowid_ordered": false, 60 | "using_mrr": false, 61 | "index_only": true, 62 | "in_memory": 0.0119048, 63 | "rows": 21, 64 | "cost": 3.12041, 65 | "chosen": true, 66 | "table": "`item`" 67 | }, 68 | { 69 | "index": "2_1_idx_I_IM_ID", 70 | "ranges": [ 71 | "I_IM_ID = 70", 72 | "I_IM_ID = 80" 73 | ], 74 | "index_dives_for_eq_ranges": true, 75 | "rowid_ordered": false, 76 | "using_mrr": false, 77 | "index_only": true, 78 | "in_memory": 0.0119048, 79 | "rows": 21, 80 | "cost": 3.12041, 81 | "chosen": true, 82 | "table": "`item2`" 83 | }, 84 | { 85 | "index": "2_2_idx_I_IM_ID", 86 | "ranges": [ 87 | "I_IM_ID = 70", 88 | "I_IM_ID = 80" 89 | ], 90 | "index_dives_for_eq_ranges": true, 91 | "rowid_ordered": false, 92 | "using_mrr": false, 93 | "index_only": true, 94 | "in_memory": 0.0119048, 95 | "rows": 21, 96 | "cost": 3.12041, 97 | "chosen": true, 98 | "table": "`item4`" 99 | } 100 | ] -------------------------------------------------------------------------------- /test/videx/data/test_tpch_1024/gt_rec_in_ranges_wo_idx_innodb.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "index": "idx_L_SHIPDATE", 4 | "ranges": [ 5 | "'1995-01-01' <= L_SHIPDATE <= '1996-12-31'" 6 | ], 7 | "index_dives_for_eq_ranges": true, 8 | "rowid_ordered": false, 9 | "using_mrr": true, 10 | "index_only": false, 11 | "in_memory": 0.0023825, 12 | "rows": 2896768, 13 | "cost": 2799680.0, 14 | "chosen": false, 15 | "cause": "cost", 16 | "table": "`lineitem`" 17 | }, 18 | { 19 | "index": "idx_N_NAME_N_NATIONKEY", 20 | "ranges": [ 21 | "N_NAME = 'CANADA'", 22 | "N_NAME = 'IRAN'" 23 | ], 24 | "index_dives_for_eq_ranges": true, 25 | "rowid_ordered": false, 26 | "using_mrr": false, 27 | "index_only": true, 28 | "in_memory": 1, 29 | "rows": 2, 30 | "cost": 0.463289, 31 | "chosen": true, 32 | "table": "`nation` `n1`" 33 | }, 34 | { 35 | "index": "idx_N_NAME_N_NATIONKEY", 36 | "ranges": [ 37 | "N_NAME = 'CANADA'", 38 | "N_NAME = 'IRAN'" 39 | ], 40 | "index_dives_for_eq_ranges": true, 41 | "rowid_ordered": false, 42 | "using_mrr": false, 43 | "index_only": true, 44 | "in_memory": 1, 45 | "rows": 2, 46 | "cost": 0.463289, 47 | "chosen": true, 48 | "table": "`nation` `n2`" 49 | } 50 | ] -------------------------------------------------------------------------------- /test/videx/data/test_tpch_1024/gt_req_resp.json: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /test/videx/data/test_tpch_1024/videx_tpch_info_stats.json: -------------------------------------------------------------------------------- 1 | { 2 | "customer": { 3 | "TABLE_CATALOG": "def", 4 | "TABLE_SCHEMA": "tpch", 5 | "TABLE_NAME": "customer", 6 | "TABLE_TYPE": "BASE TABLE", 7 | "ENGINE": "InnoDB", 8 | "VERSION": 10, 9 | "ROW_FORMAT": "Dynamic", 10 | "TABLE_ROWS": 147763, 11 | "AVG_ROW_LENGTH": 202, 12 | "DATA_LENGTH": 29933568, 13 | "MAX_DATA_LENGTH": 0, 14 | "INDEX_LENGTH": 3686400, 15 | "DATA_FREE": 0, 16 | "AUTO_INCREMENT": NaN, 17 | "CREATE_TIME": 1701871739, 18 | "UPDATE_TIME": null, 19 | "CHECK_TIME": null, 20 | "TABLE_COLLATION": "utf8mb4_0900_ai_ci", 21 | "CHECKSUM": null, 22 | "CREATE_OPTIONS": "", 23 | "TABLE_COMMENT": "", 24 | "innodb_page_size": 16384, 25 | "myisam_max_sort_file_size": 9223372036853727232, 26 | "innodb_buffer_pool_size": 134217728, 27 | "N_ROWS": 147763, 28 | "CLUSTERED_INDEX_SIZE": 1827, 29 | "SUM_OF_OTHER_INDEX_SIZES": 225, 30 | "DDL": "old_ut has no ddl, add it for compatibility" 31 | }, 32 | "lineitem": { 33 | "TABLE_CATALOG": "def", 34 | "TABLE_SCHEMA": "tpch", 35 | "TABLE_NAME": "lineitem", 36 | "TABLE_TYPE": "BASE TABLE", 37 | "ENGINE": "InnoDB", 38 | "VERSION": 10, 39 | "ROW_FORMAT": "Dynamic", 40 | "TABLE_ROWS": 5793536, 41 | "AVG_ROW_LENGTH": 161, 42 | "DATA_LENGTH": 936378368, 43 | "MAX_DATA_LENGTH": 0, 44 | "INDEX_LENGTH": 347127808, 45 | "DATA_FREE": 2097152, 46 | "AUTO_INCREMENT": 6002014.0, 47 | "CREATE_TIME": 1701870237, 48 | "UPDATE_TIME": null, 49 | "CHECK_TIME": null, 50 | "TABLE_COLLATION": "utf8mb4_0900_ai_ci", 51 | "CHECKSUM": null, 52 | "CREATE_OPTIONS": "", 53 | "TABLE_COMMENT": "", 54 | "innodb_page_size": 16384, 55 | "myisam_max_sort_file_size": 9223372036853727232, 56 | "innodb_buffer_pool_size": 134217728, 57 | "N_ROWS": 5793536, 58 | "CLUSTERED_INDEX_SIZE": 57152, 59 | "SUM_OF_OTHER_INDEX_SIZES": 21187, 60 | "DDL": "old_ut has no ddl, add it for compatibility" 61 | }, 62 | "nation": { 63 | "TABLE_CATALOG": "def", 64 | "TABLE_SCHEMA": "tpch", 65 | "TABLE_NAME": "nation", 66 | "TABLE_TYPE": "BASE TABLE", 67 | "ENGINE": "InnoDB", 68 | "VERSION": 10, 69 | "ROW_FORMAT": "Dynamic", 70 | "TABLE_ROWS": 25, 71 | "AVG_ROW_LENGTH": 655, 72 | "DATA_LENGTH": 16384, 73 | "MAX_DATA_LENGTH": 0, 74 | "INDEX_LENGTH": 16384, 75 | "DATA_FREE": 0, 76 | "AUTO_INCREMENT": NaN, 77 | "CREATE_TIME": 1701871751, 78 | "UPDATE_TIME": null, 79 | "CHECK_TIME": null, 80 | "TABLE_COLLATION": "utf8mb4_0900_ai_ci", 81 | "CHECKSUM": null, 82 | "CREATE_OPTIONS": "", 83 | "TABLE_COMMENT": "", 84 | "innodb_page_size": 16384, 85 | "myisam_max_sort_file_size": 9223372036853727232, 86 | "innodb_buffer_pool_size": 134217728, 87 | "N_ROWS": 25, 88 | "CLUSTERED_INDEX_SIZE": 1, 89 | "SUM_OF_OTHER_INDEX_SIZES": 1, 90 | "DDL": "old_ut has no ddl, add it for compatibility" 91 | }, 92 | "orders": { 93 | "TABLE_CATALOG": "def", 94 | "TABLE_SCHEMA": "tpch", 95 | "TABLE_NAME": "orders", 96 | "TABLE_TYPE": "BASE TABLE", 97 | "ENGINE": "InnoDB", 98 | "VERSION": 10, 99 | "ROW_FORMAT": "Dynamic", 100 | "TABLE_ROWS": 1481853, 101 | "AVG_ROW_LENGTH": 136, 102 | "DATA_LENGTH": 202047488, 103 | "MAX_DATA_LENGTH": 0, 104 | "INDEX_LENGTH": 39403520, 105 | "DATA_FREE": 3145728, 106 | "AUTO_INCREMENT": NaN, 107 | "CREATE_TIME": 1701870864, 108 | "UPDATE_TIME": null, 109 | "CHECK_TIME": null, 110 | "TABLE_COLLATION": "utf8mb4_0900_ai_ci", 111 | "CHECKSUM": null, 112 | "CREATE_OPTIONS": "", 113 | "TABLE_COMMENT": "", 114 | "innodb_page_size": 16384, 115 | "myisam_max_sort_file_size": 9223372036853727232, 116 | "innodb_buffer_pool_size": 134217728, 117 | "N_ROWS": 1481853, 118 | "CLUSTERED_INDEX_SIZE": 12332, 119 | "SUM_OF_OTHER_INDEX_SIZES": 2405, 120 | "DDL": "old_ut has no ddl, add it for compatibility" 121 | }, 122 | "part": { 123 | "TABLE_CATALOG": "def", 124 | "TABLE_SCHEMA": "tpch", 125 | "TABLE_NAME": "part", 126 | "TABLE_TYPE": "BASE TABLE", 127 | "ENGINE": "InnoDB", 128 | "VERSION": 10, 129 | "ROW_FORMAT": "Dynamic", 130 | "TABLE_ROWS": 198200, 131 | "AVG_ROW_LENGTH": 166, 132 | "DATA_LENGTH": 33095680, 133 | "MAX_DATA_LENGTH": 0, 134 | "INDEX_LENGTH": 0, 135 | "DATA_FREE": 3145728, 136 | "AUTO_INCREMENT": NaN, 137 | "CREATE_TIME": 1701871365, 138 | "UPDATE_TIME": null, 139 | "CHECK_TIME": null, 140 | "TABLE_COLLATION": "utf8mb4_0900_ai_ci", 141 | "CHECKSUM": null, 142 | "CREATE_OPTIONS": "", 143 | "TABLE_COMMENT": "", 144 | "innodb_page_size": 16384, 145 | "myisam_max_sort_file_size": 9223372036853727232, 146 | "innodb_buffer_pool_size": 134217728, 147 | "N_ROWS": 198200, 148 | "CLUSTERED_INDEX_SIZE": 2020, 149 | "SUM_OF_OTHER_INDEX_SIZES": 0, 150 | "DDL": "old_ut has no ddl, add it for compatibility" 151 | }, 152 | "partsupp": { 153 | "TABLE_CATALOG": "def", 154 | "TABLE_SCHEMA": "tpch", 155 | "TABLE_NAME": "partsupp", 156 | "TABLE_TYPE": "BASE TABLE", 157 | "ENGINE": "InnoDB", 158 | "VERSION": 10, 159 | "ROW_FORMAT": "Dynamic", 160 | "TABLE_ROWS": 784436, 161 | "AVG_ROW_LENGTH": 185, 162 | "DATA_LENGTH": 145375232, 163 | "MAX_DATA_LENGTH": 0, 164 | "INDEX_LENGTH": 45727744, 165 | "DATA_FREE": 0, 166 | "AUTO_INCREMENT": 800124.0, 167 | "CREATE_TIME": 1701871129, 168 | "UPDATE_TIME": null, 169 | "CHECK_TIME": null, 170 | "TABLE_COLLATION": "utf8mb4_0900_ai_ci", 171 | "CHECKSUM": null, 172 | "CREATE_OPTIONS": "", 173 | "TABLE_COMMENT": "", 174 | "innodb_page_size": 16384, 175 | "myisam_max_sort_file_size": 9223372036853727232, 176 | "innodb_buffer_pool_size": 134217728, 177 | "N_ROWS": 784436, 178 | "CLUSTERED_INDEX_SIZE": 8873, 179 | "SUM_OF_OTHER_INDEX_SIZES": 2791, 180 | "DDL": "old_ut has no ddl, add it for compatibility" 181 | }, 182 | "region": { 183 | "TABLE_CATALOG": "def", 184 | "TABLE_SCHEMA": "tpch", 185 | "TABLE_NAME": "region", 186 | "TABLE_TYPE": "BASE TABLE", 187 | "ENGINE": "InnoDB", 188 | "VERSION": 10, 189 | "ROW_FORMAT": "Dynamic", 190 | "TABLE_ROWS": 5, 191 | "AVG_ROW_LENGTH": 3276, 192 | "DATA_LENGTH": 16384, 193 | "MAX_DATA_LENGTH": 0, 194 | "INDEX_LENGTH": 0, 195 | "DATA_FREE": 0, 196 | "AUTO_INCREMENT": NaN, 197 | "CREATE_TIME": 1701871751, 198 | "UPDATE_TIME": null, 199 | "CHECK_TIME": null, 200 | "TABLE_COLLATION": "utf8mb4_0900_ai_ci", 201 | "CHECKSUM": null, 202 | "CREATE_OPTIONS": "", 203 | "TABLE_COMMENT": "", 204 | "innodb_page_size": 16384, 205 | "myisam_max_sort_file_size": 9223372036853727232, 206 | "innodb_buffer_pool_size": 134217728, 207 | "N_ROWS": 5, 208 | "CLUSTERED_INDEX_SIZE": 1, 209 | "SUM_OF_OTHER_INDEX_SIZES": 0, 210 | "DDL": "old_ut has no ddl, add it for compatibility" 211 | }, 212 | "supplier": { 213 | "TABLE_CATALOG": "def", 214 | "TABLE_SCHEMA": "tpch", 215 | "TABLE_NAME": "supplier", 216 | "TABLE_TYPE": "BASE TABLE", 217 | "ENGINE": "InnoDB", 218 | "VERSION": 10, 219 | "ROW_FORMAT": "Dynamic", 220 | "TABLE_ROWS": 10015, 221 | "AVG_ROW_LENGTH": 263, 222 | "DATA_LENGTH": 2637824, 223 | "MAX_DATA_LENGTH": 0, 224 | "INDEX_LENGTH": 1867776, 225 | "DATA_FREE": 0, 226 | "AUTO_INCREMENT": NaN, 227 | "CREATE_TIME": 1701871739, 228 | "UPDATE_TIME": null, 229 | "CHECK_TIME": null, 230 | "TABLE_COLLATION": "utf8mb4_0900_ai_ci", 231 | "CHECKSUM": null, 232 | "CREATE_OPTIONS": "", 233 | "TABLE_COMMENT": "", 234 | "innodb_page_size": 16384, 235 | "myisam_max_sort_file_size": 9223372036853727232, 236 | "innodb_buffer_pool_size": 134217728, 237 | "N_ROWS": 10015, 238 | "CLUSTERED_INDEX_SIZE": 161, 239 | "SUM_OF_OTHER_INDEX_SIZES": 114, 240 | "DDL": "old_ut has no ddl, add it for compatibility" 241 | } 242 | } -------------------------------------------------------------------------------- /test/videx/data/test_tpch_1024/videx_tpch_ndv_single.json: -------------------------------------------------------------------------------- 1 | { 2 | "customer": { 3 | "C_CUSTKEY": 150000, 4 | "C_NAME": 150000, 5 | "C_ADDRESS": 150000, 6 | "C_NATIONKEY": 25, 7 | "C_PHONE": 150000, 8 | "C_ACCTBAL": 140187, 9 | "C_MKTSEGMENT": 5, 10 | "C_COMMENT": 149968 11 | }, 12 | "lineitem": { 13 | "L_ORDERKEY": 1500000, 14 | "L_PARTKEY": 200000, 15 | "L_SUPPKEY": 10000, 16 | "L_LINENUMBER": 7, 17 | "L_QUANTITY": 50, 18 | "L_EXTENDEDPRICE": 933900, 19 | "L_DISCOUNT": 11, 20 | "L_TAX": 9, 21 | "L_RETURNFLAG": 3, 22 | "L_LINESTATUS": 2, 23 | "L_SHIPDATE": 2526, 24 | "L_COMMITDATE": 2466, 25 | "L_RECEIPTDATE": 2554, 26 | "L_SHIPINSTRUCT": 4, 27 | "L_SHIPMODE": 7, 28 | "L_COMMENT": 4580554, 29 | "L_ID": 6001215 30 | }, 31 | "nation": { 32 | "N_NATIONKEY": 25, 33 | "N_NAME": 25, 34 | "N_REGIONKEY": 5, 35 | "N_COMMENT": 25 36 | }, 37 | "orders": { 38 | "O_ORDERKEY": 1500000, 39 | "O_CUSTKEY": 99996, 40 | "O_ORDERSTATUS": 3, 41 | "O_TOTALPRICE": 1464556, 42 | "O_ORDERDATE": 2406, 43 | "O_ORDERPRIORITY": 5, 44 | "O_CLERK": 1000, 45 | "O_SHIPPRIORITY": 1, 46 | "O_COMMENT": 1482071 47 | }, 48 | "part": { 49 | "P_PARTKEY": 200000, 50 | "P_NAME": 199997, 51 | "P_MFGR": 5, 52 | "P_BRAND": 25, 53 | "P_TYPE": 150, 54 | "P_SIZE": 50, 55 | "P_CONTAINER": 40, 56 | "P_RETAILPRICE": 20899, 57 | "P_COMMENT": 131749 58 | }, 59 | "partsupp": { 60 | "PS_PARTKEY": 200000, 61 | "PS_SUPPKEY": 10000, 62 | "PS_AVAILQTY": 9999, 63 | "PS_SUPPLYCOST": 99865, 64 | "PS_COMMENT": 799124, 65 | "PS_ID": 800000 66 | }, 67 | "region": { 68 | "R_REGIONKEY": 5, 69 | "R_NAME": 5, 70 | "R_COMMENT": 5 71 | }, 72 | "supplier": { 73 | "S_SUPPKEY": 10000, 74 | "S_NAME": 10000, 75 | "S_ADDRESS": 10000, 76 | "S_NATIONKEY": 25, 77 | "S_PHONE": 10000, 78 | "S_ACCTBAL": 9955, 79 | "S_COMMENT": 10000 80 | } 81 | } -------------------------------------------------------------------------------- /test/videx/data/test_trace_range_rows_gt2.json: -------------------------------------------------------------------------------- 1 | { 2 | "steps": [ 3 | { 4 | "lex_json_preparse_tables": "" 5 | }, 6 | { 7 | "join_preparation": { 8 | "select#": 1, 9 | "steps": [ 10 | ] 11 | } 12 | }, 13 | { 14 | "lex_json_before_optimize": "" 15 | }, 16 | { 17 | "join_optimization": { 18 | "select#": 1, 19 | "steps": [ 20 | { 21 | "condition_processing": { 22 | } 23 | }, 24 | { 25 | "substitute_generated_columns": { 26 | } 27 | }, 28 | { 29 | "table_dependencies": [ 30 | ] 31 | }, 32 | { 33 | "ref_optimizer_key_uses": [ 34 | ] 35 | }, 36 | { 37 | "rows_estimation": [ 38 | { 39 | "table": "`item`", 40 | "range_analysis": { 41 | "analyzing_range_alternatives": { 42 | "range_scan_alternatives": [ 43 | { 44 | "index": "idx_I_IM_ID", 45 | "ranges": [ 46 | "I_IM_ID = 70", 47 | "I_IM_ID = 80" 48 | ], 49 | "index_dives_for_eq_ranges": true, 50 | "rowid_ordered": false, 51 | "using_mrr": false, 52 | "index_only": true, 53 | "in_memory": 0.0119048, 54 | "rows": 21, 55 | "cost": 3.12041, 56 | "chosen": true 57 | }, 58 | { 59 | "index": "idx_s_test", 60 | "ranges": [ 61 | "I_IM_ID = 70", 62 | "I_IM_ID = 80" 63 | ], 64 | "index_dives_for_eq_ranges": true, 65 | "rowid_ordered": false, 66 | "using_mrr": false, 67 | "index_only": true, 68 | "in_memory": 0.00980392, 69 | "rows": 21, 70 | "cost": 3.1293, 71 | "chosen": false, 72 | "cause": "cost" 73 | }, 74 | { 75 | "index": "idx_test_im", 76 | "ranges": [ 77 | "I_IM_ID = 70", 78 | "I_IM_ID = 80" 79 | ], 80 | "index_dives_for_eq_ranges": true, 81 | "rowid_ordered": false, 82 | "using_mrr": false, 83 | "index_only": true, 84 | "in_memory": 0.00980392, 85 | "rows": 21, 86 | "cost": 3.1293, 87 | "chosen": false, 88 | "cause": "cost" 89 | } 90 | ] 91 | } 92 | } 93 | }, 94 | { 95 | "table": "`item`", 96 | "range_analysis": { 97 | "analyzing_range_alternatives": { 98 | "range_scan_alternatives": [ 99 | { 100 | "index": "1_2_idx_I_IM_ID", 101 | "ranges": [ 102 | "I_IM_ID = 70", 103 | "I_IM_ID = 80" 104 | ], 105 | "index_dives_for_eq_ranges": true, 106 | "rowid_ordered": false, 107 | "using_mrr": false, 108 | "index_only": true, 109 | "in_memory": 0.0119048, 110 | "rows": 21, 111 | "cost": 3.12041, 112 | "chosen": true 113 | } 114 | ] 115 | } 116 | } 117 | } 118 | ] 119 | } 120 | ] 121 | } 122 | }, 123 | { 124 | "join_optimization": { 125 | "select#": 2, 126 | "steps": [ 127 | { 128 | "condition_processing": { 129 | } 130 | }, 131 | { 132 | "substitute_generated_columns": { 133 | } 134 | }, 135 | { 136 | "table_dependencies": [ 137 | ] 138 | }, 139 | { 140 | "ref_optimizer_key_uses": [ 141 | ] 142 | }, 143 | { 144 | "rows_estimation": [ 145 | { 146 | "table": "`item2`", 147 | "range_analysis": { 148 | "analyzing_range_alternatives": { 149 | "range_scan_alternatives": [ 150 | { 151 | "index": "2_1_idx_I_IM_ID", 152 | "ranges": [ 153 | "I_IM_ID = 70", 154 | "I_IM_ID = 80" 155 | ], 156 | "index_dives_for_eq_ranges": true, 157 | "rowid_ordered": false, 158 | "using_mrr": false, 159 | "index_only": true, 160 | "in_memory": 0.0119048, 161 | "rows": 21, 162 | "cost": 3.12041, 163 | "chosen": true 164 | } 165 | ] 166 | } 167 | } 168 | }, 169 | { 170 | "table": "`item4`", 171 | "range_analysis": { 172 | "analyzing_range_alternatives": { 173 | "range_scan_alternatives": [ 174 | { 175 | "index": "2_2_idx_I_IM_ID", 176 | "ranges": [ 177 | "I_IM_ID = 70", 178 | "I_IM_ID = 80" 179 | ], 180 | "index_dives_for_eq_ranges": true, 181 | "rowid_ordered": false, 182 | "using_mrr": false, 183 | "index_only": true, 184 | "in_memory": 0.0119048, 185 | "rows": 21, 186 | "cost": 3.12041, 187 | "chosen": true 188 | } 189 | ] 190 | } 191 | } 192 | } 193 | ] 194 | } 195 | ] 196 | } 197 | } 198 | ] 199 | } 200 | -------------------------------------------------------------------------------- /test/videx/data/test_videx_trace_check.json: -------------------------------------------------------------------------------- 1 | { 2 | "trace_wo_idx_videx_with_gt": { 3 | "steps": [ 4 | { 5 | "lex_json_preparse_tables": "{\"item_type\":\"table_list\",\"properties\":{},\"data\":[{\"item_type\":\"table\",\"properties\":{\"address\":\"0x7f9ef493f040\",\"alias\":\"orders\",\"db\":\"videx_tpch\",\"prompt\":\"select_precheck\",\"table\":\"orders\",\"tableno\":\"0\"},\"data\":[]},{\"item_type\":\"table\",\"properties\":{\"address\":\"0x7f9ef4947630\",\"alias\":\"lineitem\",\"db\":\"videx_tpch\",\"prompt\":\"select_precheck\",\"table\":\"lineitem\",\"tableno\":\"0\"},\"data\":[]}]}" 6 | }, 7 | { 8 | "dict_name": "videx_http", 9 | "url": "http://127.0.0.1:5001/ask_videx", 10 | "request": "{\"item_type\":\"videx_request\",\"properties\":{\"dbname\":\"videx_tpch\",\"function\":\"virtual int ha_videx::info_low(uint, bool)\",\"table_name\":\"orders\",\"target_storage_engine\":\"INNODB\",\"videx_options\":\"{\\\"task_id\\\": \\\"127_0_0_1_13308@@@demo_tpch\\\", \\\"use_gt\\\": true}\"},\"data\":[{\"item_type\":\"key\",\"properties\":{\"key_length\":\"4\",\"name\":\"PRIMARY\"},\"data\":[{\"item_type\":\"field\",\"properties\":{\"name\":\"O_ORDERKEY\",\"store_length\":\"4\"},\"data\":[]}]},{\"item_type\":\"key\",\"properties\":{\"key_length\":\"4\",\"name\":\"ORDERS_FK1\"},\"data\":[{\"item_type\":\"field\",\"properties\":{\"name\":\"O_CUSTKEY\",\"store_length\":\"4\"},\"data\":[]},{\"item_type\":\"field\",\"properties\":{\"name\":\"O_ORDERKEY\",\"store_length\":\"4\"},\"data\":[]}]}]}", 11 | "success": true, 12 | "detail": "{\n \"code\": 200, \n \"data\": {\n \"ORDERS_FK1 #@# O_CUSTKEY\": \"15.14957685096032\", \n \"ORDERS_FK1 #@# O_ORDERKEY\": \"1.0147397762828907\", \n \"PRIMARY #@# O_ORDERKEY\": \"1.0\", \n \"data_file_length\": \"202047488\", \n \"data_free_length\": \"4194304\", \n \"index_file_length\": \"39403520\", \n \"stat_clustered_index_size\": \"12332\", \n \"stat_n_rows\": \"1494733\", \n \"stat_sum_of_other_index_sizes\": \"2405\"\n }, \n \"message\": \"OK\"\n}\n" 13 | }, 14 | { 15 | "dict_name": "videx_http", 16 | "url": "http://127.0.0.1:5001/ask_videx", 17 | "request": "{\"item_type\":\"videx_request\",\"properties\":{\"dbname\":\"videx_tpch\",\"function\":\"virtual double ha_videx::scan_time()\",\"table_name\":\"region\",\"target_storage_engine\":\"INNODB\",\"videx_options\":\"{\\\"task_id\\\": \\\"127_0_0_1_13308@@@demo_tpch\\\", \\\"use_gt\\\": true}\"},\"data\":[]}", 18 | "success": false, 19 | "reason": "res_code != CURLE_OK", 20 | "detail": "A libcurl function was given a bad argument" 21 | } 22 | ] 23 | }, 24 | "trace_wo_idx_videx_wo_gt": { 25 | "steps": [ 26 | { 27 | "join_optimization": { 28 | "select#": 1, 29 | "steps": [ 30 | { 31 | "rows_estimation": [ 32 | { 33 | "table": "`lineitem`", 34 | "unknown_key_2": { 35 | "dict_name": "videx_http", 36 | "url": "http://127.0.0.1:5001/ask_videx", 37 | "request": "{\"item_type\":\"videx_request\",\"properties\":{\"dbname\":\"videx_tpch\",\"function\":\"virtual double ha_videx::scan_time()\",\"table_name\":\"lineitem\",\"target_storage_engine\":\"INNODB\",\"videx_options\":\"{\\\"task_id\\\": \\\"127_0_0_1_13308@@@demo_tpch\\\", \\\"use_gt\\\": false}\"},\"data\":[]}", 38 | "success": true, 39 | "detail": "{\n \"code\": 200, \n \"data\": {\n \"value\": \"56640\"\n }, \n \"message\": \"OK\"\n}\n" 40 | }, 41 | "unknown_key_7": { 42 | "dict_name": "videx_http", 43 | "url": "http://127.0.0.1:5001/ask_videx", 44 | "request": "{\"item_type\":\"videx_request\",\"properties\":{\"dbname\":\"videx_tpch\",\"function\":\"virtual double ha_videx::scan_time()\",\"table_name\":\"region\",\"target_storage_engine\":\"INNODB\",\"videx_options\":\"{\\\"task_id\\\": \\\"127_0_0_1_13308@@@demo_tpch\\\", \\\"use_gt\\\": true}\"},\"data\":[]}", 45 | "success": false, 46 | "reason": "res_code != CURLE_OK", 47 | "detail": "A libcurl function was given a bad argument" 48 | }, 49 | "table_scan": { 50 | "rows": 5750485, 51 | "cost": 56640 52 | } 53 | } 54 | ] 55 | }, 56 | { 57 | "considering_tmp_tables": [ 58 | { 59 | "adding_tmp_table_in_plan_at_position": 2, 60 | "write_method": "continuously_update_group_row" 61 | }, 62 | { 63 | "adding_sort_to_table": "" 64 | } 65 | ] 66 | } 67 | ] 68 | } 69 | } 70 | 71 | ] 72 | } 73 | } -------------------------------------------------------------------------------- /test/videx/data/tpch_64/videx_tpch_ndv_single.json: -------------------------------------------------------------------------------- 1 | { 2 | "customer": { 3 | "C_CUSTKEY": 150000, 4 | "C_NAME": 150000, 5 | "C_ADDRESS": 150000, 6 | "C_NATIONKEY": 25, 7 | "C_PHONE": 150000, 8 | "C_ACCTBAL": 140187, 9 | "C_MKTSEGMENT": 5, 10 | "C_COMMENT": 149968 11 | }, 12 | "lineitem": { 13 | "L_ORDERKEY": 1500000, 14 | "L_PARTKEY": 200000, 15 | "L_SUPPKEY": 10000, 16 | "L_LINENUMBER": 7, 17 | "L_QUANTITY": 50, 18 | "L_EXTENDEDPRICE": 933900, 19 | "L_DISCOUNT": 11, 20 | "L_TAX": 9, 21 | "L_RETURNFLAG": 3, 22 | "L_LINESTATUS": 2, 23 | "L_SHIPDATE": 2526, 24 | "L_COMMITDATE": 2466, 25 | "L_RECEIPTDATE": 2554, 26 | "L_SHIPINSTRUCT": 4, 27 | "L_SHIPMODE": 7, 28 | "L_COMMENT": 4580554, 29 | "L_ID": 6001215 30 | }, 31 | "nation": { 32 | "N_NATIONKEY": 25, 33 | "N_NAME": 25, 34 | "N_REGIONKEY": 5, 35 | "N_COMMENT": 25 36 | }, 37 | "orders": { 38 | "O_ORDERKEY": 1500000, 39 | "O_CUSTKEY": 99996, 40 | "O_ORDERSTATUS": 3, 41 | "O_TOTALPRICE": 1464556, 42 | "O_ORDERDATE": 2406, 43 | "O_ORDERPRIORITY": 5, 44 | "O_CLERK": 1000, 45 | "O_SHIPPRIORITY": 1, 46 | "O_COMMENT": 1482071 47 | }, 48 | "part": { 49 | "P_PARTKEY": 200000, 50 | "P_NAME": 199997, 51 | "P_MFGR": 5, 52 | "P_BRAND": 25, 53 | "P_TYPE": 150, 54 | "P_SIZE": 50, 55 | "P_CONTAINER": 40, 56 | "P_RETAILPRICE": 20899, 57 | "P_COMMENT": 131749 58 | }, 59 | "partsupp": { 60 | "PS_PARTKEY": 200000, 61 | "PS_SUPPKEY": 10000, 62 | "PS_AVAILQTY": 9999, 63 | "PS_SUPPLYCOST": 99865, 64 | "PS_COMMENT": 799124, 65 | "PS_ID": 800000 66 | }, 67 | "region": { 68 | "R_REGIONKEY": 5, 69 | "R_NAME": 5, 70 | "R_COMMENT": 5 71 | }, 72 | "supplier": { 73 | "S_SUPPKEY": 10000, 74 | "S_NAME": 10000, 75 | "S_ADDRESS": 10000, 76 | "S_NATIONKEY": 25, 77 | "S_PHONE": 10000, 78 | "S_ACCTBAL": 9955, 79 | "S_COMMENT": 10000 80 | } 81 | } -------------------------------------------------------------------------------- /test/videx/test_info_low.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 4 | SPDX-License-Identifier: MIT 5 | """ 6 | import json 7 | import os 8 | import unittest 9 | 10 | from sub_platforms.sql_opt.videx.videx_service import VidexSingleton 11 | 12 | 13 | class Test_info_low(unittest.TestCase): 14 | def setUp(self): 15 | # 替换 ITEM 的 histogram,便于测试。测试范围是 I_PRICE、I_IM_ID 16 | self.singleton = VidexSingleton(pct_cached=1) 17 | self.task_id = '127_0_0_1_13308@@@demo_tpch' 18 | self.raw_db = 'tpch' 19 | self.videx_db = 'videx_tpch' 20 | self.test_meta_dir = os.path.join(os.path.dirname(__file__), 21 | "data/test_tpch_1024") 22 | 23 | def test_info_with_ndv_mulcol(self): 24 | """ 25 | explain SELECT I_IM_ID FROM tpcc.ITEM force index(idx_I_IM_ID_I_PRICE) where I_IM_ID > 20 26 | Returns: 27 | 28 | """ 29 | loaded = self.singleton.add_task_meta_from_local_files( 30 | task_id=self.task_id, 31 | raw_db=self.raw_db, 32 | videx_db=self.videx_db, 33 | stats_file=os.path.join(self.test_meta_dir, 'videx_tpch_info_stats_with_pct_cached.json'), 34 | hist_file=os.path.join(self.test_meta_dir, 'videx_tpch_histogram.json'), 35 | ndv_single_file=os.path.join(self.test_meta_dir, 'videx_tpch_ndv_single.json'), 36 | ndv_mulcol_file=os.path.join(self.test_meta_dir, 'videx_tpch_ndv_mulcol.json'), 37 | gt_rec_in_ranges_file=None, 38 | gt_req_resp_file=None, 39 | ) 40 | self.assertTrue(loaded) 41 | 42 | with open(os.path.join(os.path.dirname(__file__), 43 | "data/test_info_item2.json"), "r") as f: 44 | req_json = json.load(f) 45 | req_json['properties']["dbname"] = self.videx_db 46 | req_json['properties']["videx_options"] = json.dumps({ 47 | "task_id": self.task_id, 48 | "use_gt": True 49 | }) 50 | res = self.singleton.ask( 51 | req_json_item=req_json, 52 | raise_out=True, 53 | result2str=False 54 | ) 55 | print(json.dumps(res, indent=4)) 56 | expect_gt = [ 57 | 200, 58 | "OK", 59 | { 60 | 'stat_n_rows': 5799239, 61 | "stat_clustered_index_size": 57152, 62 | 'stat_sum_of_other_index_sizes': 69962, 63 | "data_file_length": 936378368, 64 | 'index_file_length': 1146257408, 65 | "data_free_length": 2097152, 66 | # note, you can specify pct_cached to use gt, 0, or 1 as default 67 | 'pct_cached #@# LINEITEM_FK1': 1, 68 | 'pct_cached #@# LINEITEM_FK2': 1, 69 | 'pct_cached #@# LINEITEM_UK1': 1, 70 | 'pct_cached #@# PRIMARY': 1, 71 | # 'pct_cached #@# LINEITEM_FK1': 0.0, 72 | # 'pct_cached #@# LINEITEM_FK2': 1.0, 73 | # 'pct_cached #@# LINEITEM_UK1': 1.0, 74 | # 'pct_cached #@# PRIMARY': 0.8475, 75 | 'rec_per_key #@# LINEITEM_FK1 #@# L_ID': 1.0, 76 | 'rec_per_key #@# LINEITEM_FK1 #@# L_ORDERKEY': 3.880374438359189, 77 | 'rec_per_key #@# LINEITEM_FK2 #@# L_ID': 1.0, 78 | 'rec_per_key #@# LINEITEM_FK2 #@# L_PARTKEY': 30.66093020550806, 79 | 'rec_per_key #@# LINEITEM_FK2 #@# L_SUPPKEY': 7.353160308518232, 80 | 'rec_per_key #@# LINEITEM_UK1 #@# L_LINENUMBER': 1.0, 81 | 'rec_per_key #@# LINEITEM_UK1 #@# L_ORDERKEY': 3.9050747044547935, 82 | 'rec_per_key #@# PRIMARY #@# L_ID': 1.000984372928726, 83 | } 84 | ] 85 | self.assertEqual(tuple(expect_gt), res) 86 | 87 | def test_info_without_ndv_mulcol(self): 88 | """ 89 | explain SELECT I_IM_ID FROM tpcc.ITEM force index(idx_I_IM_ID_I_PRICE) where I_IM_ID > 20 90 | Returns: 91 | 92 | """ 93 | loaded = self.singleton.add_task_meta_from_local_files( 94 | task_id=self.task_id, 95 | raw_db=self.raw_db, 96 | videx_db=self.videx_db, 97 | stats_file=os.path.join(self.test_meta_dir, 'videx_tpch_info_stats_with_pct_cached.json'), 98 | hist_file=os.path.join(self.test_meta_dir, 'videx_tpch_histogram.json'), 99 | ndv_single_file=os.path.join(self.test_meta_dir, 'videx_tpch_ndv_single.json'), 100 | ndv_mulcol_file=None, # without ndv,则此处留 None 101 | gt_rec_in_ranges_file=None, 102 | gt_req_resp_file=None, 103 | ) 104 | self.assertTrue(loaded) 105 | 106 | with open(os.path.join(os.path.dirname(__file__), 107 | "data/test_info_item2.json"), "r") as f: 108 | req_json = json.load(f) 109 | req_json['properties']["dbname"] = self.videx_db 110 | req_json['properties']["videx_options"] = json.dumps({ 111 | "task_id": self.task_id, 112 | # "use_gt": False 113 | }) 114 | res = self.singleton.ask( 115 | req_json_item=req_json, 116 | raise_out=True, 117 | result2str=False 118 | ) 119 | print(json.dumps(res, indent=4)) 120 | expect_nogt = [ 121 | 200, 122 | 'OK', 123 | {'data_file_length': 936378368, 124 | 'data_free_length': 2097152, 125 | 'index_file_length': 1146257408, 126 | # 注意,pct_cached 是否使用 gt、默认值取 0 或者 1 ,是根据策略而定的。当策略更新后,直接把 expect 改掉即可 127 | # 修改后 128 | # 'pct_cached #@# LINEITEM_FK1': 0, 129 | # 'pct_cached #@# LINEITEM_FK2': 0, 130 | # 'pct_cached #@# LINEITEM_UK1': 0, 131 | # 'pct_cached #@# PRIMARY': 0, 132 | # 修改前 133 | 'pct_cached #@# LINEITEM_FK1': 1, 134 | 'pct_cached #@# LINEITEM_FK2': 1, 135 | 'pct_cached #@# LINEITEM_UK1': 1, 136 | 'pct_cached #@# PRIMARY': 1, 137 | 'rec_per_key #@# LINEITEM_FK1 #@# L_ID': 1.0, 138 | 'rec_per_key #@# LINEITEM_FK1 #@# L_ORDERKEY': 3.8661593333333335, 139 | 'rec_per_key #@# LINEITEM_FK2 #@# L_ID': 1.0, 140 | 'rec_per_key #@# LINEITEM_FK2 #@# L_PARTKEY': 28.996195, 141 | 'rec_per_key #@# LINEITEM_FK2 #@# L_SUPPKEY': 1.0, 142 | 'rec_per_key #@# LINEITEM_UK1 #@# L_LINENUMBER': 1.0, 143 | 'rec_per_key #@# LINEITEM_UK1 #@# L_ORDERKEY': 3.8661593333333335, 144 | 'rec_per_key #@# PRIMARY #@# L_ID': 1.0, 145 | 'stat_clustered_index_size': 57152, 146 | 'stat_n_rows': 5799239, 147 | 'stat_sum_of_other_index_sizes': 69962}, 148 | ] 149 | self.assertEqual(tuple(expect_nogt), res) 150 | 151 | 152 | if __name__ == '__main__': 153 | pass 154 | -------------------------------------------------------------------------------- /test/videx/test_records_in_range_nullable.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Copyright (c) 2024 Bytedance Ltd. and/or its affiliates 4 | SPDX-License-Identifier: MIT 5 | """ 6 | import json 7 | import os 8 | import unittest 9 | from typing import List 10 | 11 | from sub_platforms.sql_opt.videx.videx_histogram import HistogramBucket, HistogramStats, init_bucket_by_type 12 | from sub_platforms.sql_opt.videx.videx_metadata import construct_videx_task_meta_from_local_files 13 | from sub_platforms.sql_opt.videx.videx_service import VidexSingleton 14 | from sub_platforms.sql_opt.videx.videx_utils import IndexRangeCond, GT_Table_Return, load_json_from_file, \ 15 | BTreeKeyOp, BTreeKeySide, join_path 16 | 17 | 18 | class Test_record_in_ranges_algorithm(unittest.TestCase): 19 | """ 20 | 测试在 histogram 包含 null 值时,records_in_range 算法的表现 21 | """ 22 | 23 | def setUp(self): 24 | self.singleton = VidexSingleton() 25 | req_dict = load_json_from_file(join_path(__file__, 'data/videx_metadata_test_null_db.json')) 26 | 27 | meta = construct_videx_task_meta_from_local_files(task_id=None, 28 | videx_db='videx_test_null_db', 29 | stats_file=req_dict.get('stats_dict', {}), 30 | hist_file=req_dict.get('hist_dict', {}), 31 | ndv_single_file=req_dict.get('ndv_single_dict', {}), 32 | ndv_mulcol_file=req_dict.get('ndv_mulcol_dict', {}), 33 | gt_rec_in_ranges_file=None, 34 | gt_req_resp_file=None, 35 | raise_error=True, 36 | ) 37 | self.singleton.add_task_meta(meta.to_dict()) 38 | 39 | def test_gt_NULL_and_lt_empty_str(self): 40 | """ 41 | explain select nullable_code from test_columns where nullable_code != ''; 42 | 43 | data: 50% NULL, 10% 'A', 10% 'B', 10% 'C', 10% 'D', 10% 'E' 44 | this trace: NULL < nullable_code < '' 45 | 46 | Returns: 47 | freq is 0, take max(1, freq), so return 1 48 | 49 | """ 50 | req_json = {"item_type": "videx_request", 51 | "properties": {"dbname": "videx_test_null_db", 52 | "function": "virtual ha_rows ha_videx::records_in_range(uint, key_range *, key_range *)", 53 | "table_name": "test_columns", 54 | "target_storage_engine": "INNODB", 55 | "videx_options": "{}" 56 | }, 57 | "data": [ 58 | {"item_type": "min_key", 59 | "properties": {"index_name": "idx_nullable_code", "length": "5", "operator": ">"}, 60 | "data": [{"item_type": "column_and_bound", 61 | "properties": {"column": "nullable_code", "value": "NULL"}, 62 | "data": []}]}, 63 | {"item_type": "max_key", 64 | "properties": {"index_name": "idx_nullable_code", "length": "5", "operator": "<"}, 65 | "data": [ 66 | {"item_type": "column_and_bound", "properties": {"column": "nullable_code", "value": "''"}, 67 | "data": []}]} 68 | ]} 69 | res = self.singleton.ask( 70 | req_json_item=req_json, 71 | raise_out=True 72 | ) 73 | print(res) 74 | self.assertEqual(res, (200, 'OK', {'value': '1'})) 75 | 76 | def test_eq_NULL(self): 77 | """ 78 | data: 50% NULL, 10% 'A', 10% 'B', 10% 'C', 10% 'D', 10% 'E' 79 | 80 | explain select nullable_code from test_columns where nullable_code is NULL; 81 | 82 | trace condition: nullable_code = NULL 83 | 84 | Returns: 85 | 86 | """ 87 | req_json = {"item_type": "videx_request", 88 | "properties": {"dbname": "videx_test_null_db", 89 | "function": "virtual ha_rows ha_videx::records_in_range(uint, key_range *, key_range *)", 90 | "table_name": "test_columns", 91 | "target_storage_engine": "INNODB", 92 | "videx_options": "{}"}, 93 | "data": [ 94 | {"item_type": "min_key", 95 | "properties": {"index_name": "idx_nullable_code", "length": "5", "operator": "="}, 96 | "data": [{"item_type": "column_and_bound", 97 | "properties": {"column": "nullable_code", "value": "NULL"}, 98 | "data": []}]}, 99 | {"item_type": "max_key", 100 | "properties": {"index_name": "idx_nullable_code", "length": "5", "operator": ">"}, 101 | "data": [{"item_type": "column_and_bound", 102 | "properties": {"column": "nullable_code", "value": "NULL"}, 103 | "data": []}]}]} 104 | res = self.singleton.ask( 105 | req_json_item=req_json, 106 | raise_out=True 107 | ) 108 | print(res) 109 | self.assertEqual(res, (200, 'OK', {'value': '25'})) 110 | 111 | 112 | def test_neq_NULL(self): 113 | """ 114 | data: 50% NULL, 10% 'A', 10% 'B', 10% 'C', 10% 'D', 10% 'E' 115 | 116 | explain select nullable_code from test_columns where nullable_code is not NULL; 117 | 118 | trace condition: NULL < nullable_code 119 | 120 | Returns: 121 | 122 | """ 123 | req_json = {"item_type": "videx_request", 124 | "properties": { 125 | "dbname": "videx_test_null_db", 126 | "function": "virtual ha_rows ha_videx::records_in_range(uint, key_range *, key_range *)", 127 | "table_name": "test_columns", "target_storage_engine": "INNODB", "videx_options": "{}"}, 128 | "data": [{"item_type": "min_key", 129 | "properties": {"index_name": "idx_nullable_code", "length": "5", "operator": ">"}, 130 | "data": [{"item_type": "column_and_bound", 131 | "properties": {"column": "nullable_code", "value": "NULL"}, "data": []}]}, 132 | {"item_type": "max_key", "properties": {}, "data": []} 133 | ]} 134 | 135 | res = self.singleton.ask( 136 | req_json_item=req_json, 137 | raise_out=True 138 | ) 139 | print(res) 140 | self.assertEqual(res, (200, 'OK', {'value': '25'})) 141 | --------------------------------------------------------------------------------