├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── Makefile ├── PixelsScanFunction.cpp ├── README.md ├── env.sh ├── experiments ├── clion_guide.md ├── lab1.md ├── lab2.pdf └── tips.md ├── figure ├── 1.png ├── 2.png ├── 3.png ├── 4.png ├── 5.png └── lab1-ref.png ├── include ├── PixelsReadBindData.hpp ├── PixelsReadGlobalState.hpp ├── PixelsReadLocalState.hpp ├── PixelsScanFunction.hpp └── pixels_extension.hpp ├── pixels-cli ├── CMakeLists.txt ├── include │ ├── executor │ │ ├── CommandExecutor.h │ │ └── LoadExecutor.h │ └── load │ │ ├── Parameters.h │ │ └── PixelsConsumer.h ├── lib │ ├── executor │ │ └── LoadExecutor.cpp │ └── load │ │ ├── Parameters.cpp │ │ └── PixelsConsumer.cpp └── main.cpp ├── pixels-common ├── CMakeLists.txt ├── include │ ├── exception │ │ └── InvalidArgumentException.h │ ├── physical │ │ ├── BufferPool.h │ │ ├── FilePath.h │ │ ├── MergedRequest.h │ │ ├── PhysicalReader.h │ │ ├── PhysicalReaderUtil.h │ │ ├── PhysicalWriter.h │ │ ├── PhysicalWriterOption.h │ │ ├── PhysicalWriterUtil.h │ │ ├── Request.h │ │ ├── RequestBatch.h │ │ ├── Scheduler.h │ │ ├── SchedulerFactory.h │ │ ├── Status.h │ │ ├── Storage.h │ │ ├── StorageArrayScheduler.h │ │ ├── StorageFactory.h │ │ ├── StorageProvider.h │ │ ├── allocator │ │ │ ├── Allocator.h │ │ │ ├── BufferPoolAllocator.h │ │ │ └── OrdinaryAllocator.h │ │ ├── io │ │ │ └── PhysicalLocalReader.h │ │ ├── natives │ │ │ ├── ByteBuffer.h │ │ │ ├── ByteOrder.h │ │ │ ├── DirectIoLib.h │ │ │ ├── DirectRandomAccessFile.h │ │ │ ├── DirectUringRandomAccessFile.h │ │ │ └── PixelsRandomAccessFile.h │ │ ├── scheduler │ │ │ ├── NoopScheduler.h │ │ │ └── SortMergeScheduler.h │ │ └── storage │ │ │ ├── LocalFS.h │ │ │ ├── LocalFSProvider.h │ │ │ └── PhysicalLocalWriter.h │ ├── profiler │ │ ├── AbstractProfiler.h │ │ ├── CountProfiler.h │ │ └── TimeProfiler.h │ └── utils │ │ ├── ColumnSizeCSVReader.h │ │ ├── ConfigFactory.h │ │ ├── Constants.h │ │ └── String.h └── lib │ ├── MergedRequest.cpp │ ├── exception │ └── InvalidArgumentException.cpp │ ├── physical │ ├── BufferPool.cpp │ ├── FilePath.cpp │ ├── PhysicalWriterOption.cpp │ ├── Request.cpp │ ├── RequestBatch.cpp │ ├── SchedulerFactory.cpp │ ├── Status.cpp │ ├── Storage.cpp │ ├── StorageArrayScheduler.cpp │ ├── StorageFactory.cpp │ ├── allocator │ │ ├── BufferPoolAllocator.cpp │ │ └── OrdinaryAllocator.cpp │ ├── io │ │ └── PhysicalLocalReader.cpp │ ├── natives │ │ ├── ByteBuffer.cpp │ │ ├── DirectIoLib.cpp │ │ ├── DirectRandomAccessFile.cpp │ │ ├── DirectUringRandomAccessFile.cpp │ │ └── PixelsRandomAccessFile.cpp │ ├── scheduler │ │ ├── NoopScheduler.cpp │ │ └── SortMergeScheduler.cpp │ └── storage │ │ ├── LocalFS.cpp │ │ ├── LocalFSProvider.cpp │ │ └── PhysicalLocalWriter.cpp │ ├── profiler │ ├── CountProfiler.cpp │ └── TimeProfiler.cpp │ └── utils │ ├── ColumnSizeCSVReader.cpp │ ├── ConfigFactory.cpp │ ├── Constants.cpp │ └── String.cpp ├── pixels-core ├── CMakeLists.txt ├── include │ ├── Category.h │ ├── PixelsBitMask.h │ ├── PixelsFilter.h │ ├── PixelsFooterCache.h │ ├── PixelsReader.h │ ├── PixelsReaderBuilder.h │ ├── PixelsReaderImpl.h │ ├── PixelsVersion.h │ ├── PixelsWriter.h │ ├── PixelsWriterImpl.h │ ├── TypeDescription.h │ ├── encoding │ │ ├── Decoder.h │ │ ├── Encoder.h │ │ ├── EncodingLevel.h │ │ ├── RunLenIntDecoder.h │ │ └── RunLenIntEncoder.h │ ├── exception │ │ ├── PixelsFileMagicInvalidException.h │ │ ├── PixelsFileVersionInvalidException.h │ │ └── PixelsReaderException.h │ ├── reader │ │ ├── CharColumnReader.h │ │ ├── ColumnReader.h │ │ ├── ColumnReaderBuilder.h │ │ ├── DateColumnReader.h │ │ ├── DecimalColumnReader.h │ │ ├── IntegerColumnReader.h │ │ ├── PixelsReaderOption.h │ │ ├── PixelsRecordReader.h │ │ ├── PixelsRecordReaderImpl.h │ │ ├── StringColumnReader.h │ │ ├── TimestampColumnReader.h │ │ └── VarcharColumnReader.h │ ├── stats │ │ └── StatsRecorder.h │ ├── utils │ │ ├── BitUtils.h │ │ ├── DynamicIntArray.h │ │ └── EncodingUtils.h │ ├── vector │ │ ├── BinaryColumnVector.h │ │ ├── ByteColumnVector.h │ │ ├── ColumnVector.h │ │ ├── DateColumnVector.h │ │ ├── DecimalColumnVector.h │ │ ├── LongColumnVector.h │ │ ├── TimestampColumnVector.h │ │ └── VectorizedRowBatch.h │ └── writer │ │ ├── ByteColumnWriter.h │ │ ├── CharColumnWriter.h │ │ ├── ColumnWriter.h │ │ ├── ColumnWriterBuilder.h │ │ ├── DateColumnWriter.h │ │ ├── DecimalColumnWriter.h │ │ ├── DoubleColumnWriter.h │ │ ├── FloatColumnWriter.h │ │ ├── IntegerColumnWriter.h │ │ ├── PixelsWriterOption.h │ │ ├── StringColumnWriter.h │ │ └── TimestampColumnWriter.h └── lib │ ├── Category.cpp │ ├── PixelsBitMask.cpp │ ├── PixelsFilter.cpp │ ├── PixelsFooterCache.cpp │ ├── PixelsReaderBuilder.cpp │ ├── PixelsReaderImpl.cpp │ ├── PixelsVersion.cpp │ ├── PixelsWriterImpl.cpp │ ├── TypeDescription.cpp │ ├── encoding │ ├── Decoder.cpp │ ├── Encoder.cpp │ ├── EncodingLevel.cpp │ ├── RunLenIntDecoder.cpp │ └── RunLenIntEncoder.cpp │ ├── exception │ ├── PixelsFileMagicInvalidException.cpp │ ├── PixelsFileVersionInvalidException.cpp │ └── PixelsReaderException.cpp │ ├── reader │ ├── CharColumnReader.cpp │ ├── ColumnReader.cpp │ ├── ColumnReaderBuilder.cpp │ ├── DateColumnReader.cpp │ ├── DecimalColumnReader.cpp │ ├── IntegerColumnReader.cpp │ ├── PixelsReaderOption.cpp │ ├── PixelsRecordReaderImpl.cpp │ ├── StringColumnReader.cpp │ ├── TimestampColumnReader.cpp │ └── VarcharColumnReader.cpp │ ├── stats │ └── StatsRecorder.cpp │ ├── utils │ ├── BitUtils.cpp │ ├── DynamicIntArray.cpp │ └── EncodingUtils.cpp │ ├── vector │ ├── BinaryColumnVector.cpp │ ├── ByteColumnVector.cpp │ ├── ColumnVector.cpp │ ├── DateColumnVector.cpp │ ├── DecimalColumnVector.cpp │ ├── LongColumnVector.cpp │ ├── TimestampColumnVector.cpp │ └── VectorizedRowBatch.cpp │ └── writer │ ├── ByteColumnWriter.cpp │ ├── CharColumnWriter.cpp │ ├── ColumnWriter.cpp │ ├── ColumnWriterBuilder.cpp │ ├── DateColumnWriter.cpp │ ├── DecimalColumnWriter.cpp │ ├── DoubleColumnWriter.cpp │ ├── FloatColumnWriter.cpp │ ├── IntegerColumnWriter.cpp │ ├── LongDecimalColumnWriter.cpp │ ├── PixelsWriterOption.cpp │ ├── StringColumnWriter.cpp │ └── TimestampColumnWriter.cpp ├── pixels-cxx.properties ├── pixels-proto └── pixels.proto ├── pixels_extension.cpp ├── sync_files.sh └── tests ├── CMakeLists.txt ├── PerformanceTest.cpp ├── data └── example.pxl ├── generate └── generate_tbl.py ├── physical └── Dev.cpp ├── test_date.tbl ├── test_decimal.tbl ├── test_int.tbl ├── test_string.tbl ├── test_timestamp.tbl ├── unit_tests.cpp └── writer ├── CMakeLists.txt ├── IntegerWriterTest.cpp └── PixelsWriterTest.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | 35 | build/ 36 | cmake-build-*/ 37 | .idea/ 38 | .vscode/ 39 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third-party/protobuf"] 2 | path = third-party/protobuf 3 | url = git@github.com:protocolbuffers/protobuf.git 4 | branch = 21.x 5 | [submodule "pixels-duckdb"] 6 | path = pixels-duckdb 7 | url = git@github.com:pixelsdb/duckdb.git 8 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.19) 2 | set(CMAKE_CXX_STANDARD 14) 3 | # Set extension name here 4 | set(TARGET_NAME pixels) 5 | set(DCMAKE_EXPORT_COMPILE_COMMANDS=1) 6 | set(EXTENSION_NAME ${TARGET_NAME}_extension) 7 | project(${TARGET_NAME}) 8 | include_directories(include) 9 | 10 | set(ENV{PIXELS_SRC} ${CMAKE_CURRENT_SOURCE_DIR}) 11 | set(ENV{PIXELS_HOME} ${CMAKE_CURRENT_SOURCE_DIR}) 12 | set(EXTENSION_SOURCES 13 | pixels_extension.cpp 14 | PixelsScanFunction.cpp 15 | ) 16 | add_library(${EXTENSION_NAME} STATIC ${EXTENSION_SOURCES}) 17 | 18 | find_package(Protobuf REQUIRED) 19 | include_directories(${Protobuf_INCLUDE_DIRS}) 20 | include_directories(${CMAKE_CURRENT_BINARY_DIR}) 21 | 22 | add_subdirectory(pixels-common) 23 | add_subdirectory(pixels-core) 24 | add_subdirectory(pixels-cli) 25 | add_subdirectory(tests) 26 | 27 | include_directories(pixels-common/include) 28 | include_directories(pixels-core/include) 29 | include_directories(${CMAKE_CURRENT_BINARY_DIR}) 30 | include_directories(${CMAKE_CURRENT_BINARY_DIR}/pixels-common/liburing/src/include) 31 | 32 | target_link_libraries( 33 | ${EXTENSION_NAME} 34 | pixels-common 35 | pixels-core 36 | ) 37 | 38 | set(PARAMETERS "-warnings") 39 | build_loadable_extension(${TARGET_NAME} ${PARAMETERS} ${EXTENSION_SOURCES}) 40 | 41 | message("duckdb export set: ${DUCKDB_EXPORT_SET}") 42 | 43 | install( 44 | TARGETS ${EXTENSION_NAME} pixels-core pixels-common 45 | EXPORT "${DUCKDB_EXPORT_SET}" 46 | LIBRARY DESTINATION "${INSTALL_LIB_DIR}" 47 | ARCHIVE DESTINATION "${INSTALL_LIB_DIR}") 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 PixelsDB 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all clean debug release pull update deps 2 | 3 | all: release 4 | 5 | MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) 6 | PROJ_DIR := $(dir $(MKFILE_PATH)) 7 | 8 | OSX_BUILD_UNIVERSAL_FLAG= 9 | ifeq (${OSX_BUILD_UNIVERSAL}, 1) 10 | OSX_BUILD_UNIVERSAL_FLAG=-DOSX_BUILD_UNIVERSAL=1 11 | endif 12 | ifeq (${STATIC_LIBCPP}, 1) 13 | STATIC_LIBCPP=-DSTATIC_LIBCPP=TRUE 14 | endif 15 | 16 | ifeq ($(GEN),ninja) 17 | GENERATOR=-G "Ninja" 18 | FORCE_COLOR=-DFORCE_COLORED_OUTPUT=1 19 | endif 20 | 21 | PROTOBUF_DIR=third-party/protobuf 22 | BUILD_FLAGS=-DEXTENSION_STATIC_BUILD=1 -DBUILD_TPCH_EXTENSION=1 -DBUILD_BENCHMARKS=1 -DBUILD_PARQUET_EXTENSION=1 \ 23 | ${OSX_BUILD_UNIVERSAL_FLAG} ${STATIC_LIBCPP} 24 | 25 | CLIENT_FLAGS := 26 | 27 | # These flags will make DuckDB build the extension 28 | 29 | EXTENSION_FLAGS=-DDUCKDB_EXTENSION_NAMES="pixels" -DDUCKDB_EXTENSION_PIXELS_PATH="$(PROJ_DIR)" \ 30 | -DDUCKDB_EXTENSION_PIXELS_SHOULD_LINK="TRUE" -DDUCKDB_EXTENSION_PIXELS_INCLUDE_PATH="$(PROJ_DIR)include" \ 31 | -DCMAKE_PREFIX_PATH=$(PROJ_DIR)third-party/protobuf/cmake/build -DPIXELS_SRC="$(dirname $(pwd))" 32 | 33 | pull: 34 | git submodule init 35 | git submodule update --recursive --init 36 | 37 | update: 38 | git submodule update --remote --merge pixels-duckdb 39 | git -C third-party/protobuf checkout v3.21.6 40 | 41 | deps: 42 | + mkdir -p "${PROTOBUF_DIR}/cmake/build" && cd "third-party/protobuf/cmake/build" && \ 43 | cmake -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_BUILD_TYPE=Release ../.. -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ 44 | -Dprotobuf_BUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./ && \ 45 | make -j install 46 | 47 | clean: 48 | rm -rf build 49 | cd pixels-duckdb && make clean 50 | 51 | # Main build 52 | debug: deps 53 | + mkdir -p build/debug && \ 54 | cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Debug ${BUILD_FLAGS} -S pixels-duckdb/ -B build/debug && \ 55 | cmake --build build/debug --config Debug 56 | 57 | release: deps 58 | + mkdir -p build/release && \ 59 | cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Release ${BUILD_FLAGS} -S pixels-duckdb/ -B build/release && \ 60 | cmake --build build/release --config Release 61 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mini-pixels 2 | 3 | mini-pixels 是 [pixels](https://github.com/pixelsdb/pixels) 的简化版,旨在为OLAP数据库内核的教学提供实验框架。mini-pixels 保留了 Pixels 存储和查询的核心功能,基本代码来自于[pixels C++实现](https://github.com/pixelsdb/pixels/tree/master/cpp)。 4 | 5 | ## 代码结构 6 | 7 | mini-pixels 主要包含以下主要目录或sub-module: 8 | 9 | - **experiments**: 实验文档。 10 | - **include**: mini-pixels的duckdb extension所需的头文件。 11 | - **pixels-common**: 通用的工具库和基础组件,提供了整个项目中不同模块间的共享功能和结构定义。 12 | - **pixels-core**: 实现了项目的核心功能,包括数据的存储和查询逻辑。 13 | - **pixels-duckdb**: 修改后的duckdb-1.0,也是pixelsdb项目下维护的duckdb fork。 14 | - **pixels-proto**: protobuf的定义文件,目前主要是pixels文件格式的metadata定义。 15 | - **tests**: 功能测试和单元测试。 16 | - **third-party**: 第三方依赖,如protobuf。 17 | 18 | ## 课程与实验 19 | 20 | mini-pixels 目前用于中国人民大学 **实用数据库开发** 课程的实验框架。 21 | 课程于 **2024 年秋季学期** 开设。 22 | 课程和实验围绕分析型数据库和大数据系统中常用的**列式存储技术**展开,旨在通过实践帮助学生建立列存储引擎和数据库内核开发的基础。 23 | 24 | 以下是课程的实验设计时间表。 25 | 26 | | 实验编号 | 实验主题 | 预计开始时间 | 预计结束时间 | 说明 | 27 | | -------- |--------------------------| ------------ | ------------ |----------------------------------------| 28 | | 实验1 | 部署mini-pixels环境并进行TPCH测试 | 2024-10-15 | 2024-10-29 | 熟悉如何部署`mini-pixels`开发环境 | 29 | | 实验2 | 实现ColumnWriter | 2024-12-15 | 2025-1-20 | 熟悉列式存储的设计和实现,完善mini-pixels中的ColumnWriter并且正确读取写入的文件 | 30 | 31 | 如果在实验过程中遇到问题可以在 Discussions 中讨论或提交 Issue。 32 | 33 | 同学也可以通过提交 **Issue** 和 **Pull Request** 来贡献代码和提出改进建议,帮助完善 mini-pixels。 34 | 35 | ## 致谢 36 | 37 | [duckdb](https://github.com/duckdb/duckdb): 高效的嵌入式查询引擎,mini-pixels 中将 pixels 作为开放文件格式接入duckdb以执行查询。 38 | -------------------------------------------------------------------------------- /env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/zsh 2 | 3 | # Export current directory to PIXELS_SRC and PIXELS_HOME 4 | export PIXELS_SRC=$(pwd) 5 | export PIXELS_HOME=$(pwd) 6 | 7 | # Optionally print the values to verify 8 | echo "PIXELS_SRC is set to $PIXELS_SRC" 9 | echo "PIXELS_HOME is set to $PIXELS_HOME" 10 | 11 | -------------------------------------------------------------------------------- /experiments/clion_guide.md: -------------------------------------------------------------------------------- 1 | # CLion 开发配置 2 | 3 | 1.拉取项目并且执行make pull 4 | 5 | 2.打开项目mini-pixels目录下面的pixels-dukcdb 因为clion默认project目录下的CMakeLists.txt为根cmake 6 | 7 | ![image-20241229121331338](../figure/1.png) 8 | 9 | 3.配置cmake options 10 | 11 | 需要配置如下几项 12 | 13 | ``` 14 | -DDUCKDB_EXTENSION_NAMES="pixels" 15 | -DDUCKDB_EXTENSION_PIXELS_PATH=/home/whz/mini-pixels 16 | -DDUCKDB_EXTENSION_PIXELS_SHOULD_LINK="TRUE" 17 | -DDUCKDB_EXTENSION_PIXELS_INCLUDE_PATH=/home/whz/mini-pixels/include 18 | -DCMAKE_PREFIX_PATH=/home/whz/mini-pixels/third-party/protobuf/cmake/build 19 | ``` 20 | 21 | ![image-20241229122515503](../figure/2.png) 22 | 23 | 保存后 clion会自动build 24 | 25 | ![image-20241229121901319](../figure/3.png) 26 | 27 | 此时打开的工作目录是mini-pixels,项目目录是pixels-duckdb 28 | 29 | 4.点击pixels-cli作为目标,设置环境变量进行运行或者调试 30 | 31 | ![image-20241229122830709](../figure/4.png) 32 | 33 | 5.pixels-cli配置中,取消before lanuch的build并且设置好pixels的环境变量 34 | 35 | ![image-20250113160030103](../figure/5.png) 36 | 37 | 6.在命令行输入`make debug -j`编译debug版本 38 | 39 | -------------------------------------------------------------------------------- /experiments/lab1.md: -------------------------------------------------------------------------------- 1 | # 实验1:部署mini-pixels环境并进行TPCH测试 2 | 3 | --- 4 | 5 | **发布时间**:2024年10月15日 6 | **截止时间**:2024年10月29日下午1点 7 | **提交方式**:OBE平台 8 | **负责人**:王浩哲,尹佳 9 | 10 | --- 11 | 12 | ## 1. 实验概述 13 | 14 | 本实验旨在熟悉如何部署`mini-pixels`开发环境,按照文档编译与运行查询。通过本次实验,你将掌握Linux环境下软件环境的部署、数据集的操作及基本测试方法。 15 | 16 | --- 17 | 18 | ## 2. 实验步骤 19 | 20 | **注:本实验需在Linux环境下进行** 21 | 22 | 1. **fork项目并clone代码** 23 | 项目链接:[https://github.com/pixelsdb/mini-pixels](https://github.com/pixelsdb/mini-pixels) 24 | 在GitHub上fork项目,并将仓库clone至本地: 25 | 26 | ```bash 27 | git clone https://github.com/your_username/mini-pixels.git 28 | cd mini-pixels 29 | ``` 30 | 31 | [若不清楚如何fork,请参考GitHub官方文档](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo)。 32 | 33 | 2. **拉取submodule** 34 | 进入`mini-pixels`目录,并拉取项目的submodule(此步骤可能需要几分钟): 35 | 36 | ```bash 37 | make pull 38 | ``` 39 | 40 | 3. **配置环境变量** 41 | 42 | ```bash 43 | export PIXELS_HOME=$(pwd) 44 | export PIXELS_SRC=$(pwd) 45 | ``` 46 | 47 | 4. **编译代码** 48 | 49 | ```bash 50 | make -j$(nproc) 51 | ``` 52 | 53 | 5. **下载测试数据并解压** 54 | 将数据包从服务器下载并解压: 55 | 56 | ```bash 57 | wget http://10.77.110.75/pixels/pixels-tpch-1.zip 58 | unzip pixels-tpch-1.zip 59 | ``` 60 | 61 | 6. **修改测试数据路径** 62 | 使用`vim`编辑路径,并进行全局替换: 63 | 64 | ```bash 65 | cd pixels-duckdb/benchmark/tpch/pixels/ 66 | vim pixels_tpch_template.benchmark.in 67 | # 使用以下命令全局替换路径: 68 | :%s#/data/9a3-02/tpch-1#/home/pixels/about-class/mini-pixels#g 69 | ``` 70 | 71 | **提示**:请将`/home/pixels/about-class/mini-pixels`替换为实际路径。 72 | 73 | 7. **运行pixels reader测试并截屏** 74 | 75 | ```bash 76 | cd $PIXELS_SRC 77 | ./build/release/examples/pixels-example/pixels-example 78 | ``` 79 | 80 | 8. **进行TPCH测试并截屏** 81 | 82 | ```bash 83 | cd pixels-duckdb 84 | python run_benchmark_simple.py --dir benchmark/tpch/pixels/tpch_1/ 85 | cat output/pixels_tpch_1.csv 86 | ``` 87 | 88 | --- 89 | 90 | ## 3. 提交要求 91 | 92 | 1. **实验报告** 93 | 请参考实验报告模板撰写本次实验报告,报告应包括但不限于: 94 | - 实验描述 95 | - 实验过程 96 | - 实验结果(包括测试截图) 97 | - 遇到的问题及解决方案 98 | 99 | **篇幅要求**:请将内容控制在**2页内**。 100 | 101 | 2. **提交方式** 102 | 请将实验报告按时上传至OBE平台。 103 | - **截止时间**:2024年10月29日下午1点 104 | - **迟交政策**:迟交一周内标记为超时,一周后不再接收。 105 | 106 | --- 107 | 108 | ## 4. 附件示例 109 | 110 | 实验结果截图参考: 111 | 112 | ![lab1-ref](../figure/lab1-ref.png) 113 | -------------------------------------------------------------------------------- /experiments/lab2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pixelsdb/mini-pixels/7fe8f02e02f0d0a8a44e1adb049a05d89c9ea489/experiments/lab2.pdf -------------------------------------------------------------------------------- /experiments/tips.md: -------------------------------------------------------------------------------- 1 | # 实验相关的tips 2 | Author: [XBsleepy](https://github.com/XBsleepy) 3 | 4 | ## 依赖 5 | pixels-cli 依赖 boost 库 6 | 7 | wsl 或某些特定版本的系统对 liburing 的支持不太好,如果编译完成之后用 duckdb 进行 select 时出现 `DirectRandomAccessFile:initialize io uring fails.` 可以换一个发行版或者 git clone 一个 liburing,重装一下。 8 | 9 | ## 编译 10 | git clone 之后,需要执行 `make pull` 拉取所有的子模块,否则可能会出现 `CMake Error`。 11 | 12 | 随后需要执行 `make -j$(nproc)` 限制并行数(nproc 指当前进程可用的 CPU 数量),未指定时可能会无限使用 CPU 资源进而导致系统崩溃。 13 | 14 | `make` 默认编译得到的是 release 版本,建议使用 `make -j$(nproc) debug` 生成 debug 版本的可执行文件,方便找到问题。 15 | 16 | ## 运行 17 | 18 | `make release` (或 `make debug`) 会在 `build/release` (或 `build/debug`) 目录下生成文件。 19 | 20 | 可执行文件 `duckdb` 默认链接了 pixel 扩展,可以读取 *.pxl 的数据。 21 | 22 | 生成 *.pxl 文件的方法是运行可执行文件 `./pixels-cli`,按照pdf给出的语法执行load语句。 23 | 24 | > 在 linux 下可使用 `find . -name pixels-cli` 在当前目录下递归查找可执行文件 `pixels-cli` 25 | > 26 | > 默认位置是 `build/realease/extension/pixel/pixels-cli` 或 `build/debug/extension/pixel/pixels-cli` 27 | 28 | ## 任务 29 | 30 | 实验要求实现通过 `pixels-cli` 读取列数据类型为 date, timestamp, decimal 的 *.tbl 文件并生成 *.pxl 文件,可通过 `duckdb` 读取 *.pxl 文件验证其正确性。 31 | 32 | 具体我们需要做的就是,找到对应的比如 datecolumnvector, datecolumnwriter 等 .cpp 或 .h 等未完成的文件,参照已经给出的 integer 类型对应的函数和实现,以及 pixel 主仓库中 java 版本中的实现补完代码。 33 | 34 | 在补完代码之后,运行编译得到的 `pixels-cli`,执行 load 语句去生成 *.pxl 文件。 35 | 36 | 随后可以在 duckdb 中执行 select 语句,如果能正确显示数据,就完成了任务。 37 | 38 | 39 | ## 总结 40 | 1. 为了方便 debug,最好编译 debug 版本,方便 gdb 调试(也可借助 CLion 等工具) 41 | 2. 底层的存储,date 是 int 类型,timestamp 和 decimal 是 long 类型 42 | 3. pixels-cli 并不会写多个文件,当行数超过 -n (load 时指定的最大行数) 时并不会默认开一个新的,而是会生成一个无法读取的文件,并且cli不会报错 43 | 4. writer 默认都是先调用 add(string) 方法,对于 timestamp 和 date,cpp 没有 java 那样自带的 date 类型,所以需要自己完成 string 到 date 类型的解析。(decimal也是,decimal 支持 18 位的精度,如果直接转成 float 或者 double 再变成int,是很有可能有精度损失的) 44 | 5. 在修改代码后,通常可以直接到 mini-pixels 目录下执行 `make -j$(nproc) debug`。并且能正确更新可执行文件。不到万不得已最好不要直接`make clean`,因为会把所有的东西都删掉,重新编译会很慢 45 | 6. 目前版本的 decimal 数据类型在 precision 低于 10 的时候,reader 的显示会有问题,可以设置成比10大的 46 | 7. 如果想要自己生成测试文件请不要在文末添加空行,会导致`segmentation fault`,或者是`runtime error` 47 | 8. 任务三实际是在说使用 duckdb 读取 *.pxl 文件来验证通过 pixles-cli 生成的 *.pxl 是否正确,可以视作测试环节 48 | 9. 建议虚函数全部加上 override,这样编译器会帮你检查是否 override 正确 49 | 10. 目前的 timestamp 的 precision 没用,duckdb读取的时候默认是 秒 * 1e6 对应的 long 50 | -------------------------------------------------------------------------------- /figure/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pixelsdb/mini-pixels/7fe8f02e02f0d0a8a44e1adb049a05d89c9ea489/figure/1.png -------------------------------------------------------------------------------- /figure/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pixelsdb/mini-pixels/7fe8f02e02f0d0a8a44e1adb049a05d89c9ea489/figure/2.png -------------------------------------------------------------------------------- /figure/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pixelsdb/mini-pixels/7fe8f02e02f0d0a8a44e1adb049a05d89c9ea489/figure/3.png -------------------------------------------------------------------------------- /figure/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pixelsdb/mini-pixels/7fe8f02e02f0d0a8a44e1adb049a05d89c9ea489/figure/4.png -------------------------------------------------------------------------------- /figure/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pixelsdb/mini-pixels/7fe8f02e02f0d0a8a44e1adb049a05d89c9ea489/figure/5.png -------------------------------------------------------------------------------- /figure/lab1-ref.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pixelsdb/mini-pixels/7fe8f02e02f0d0a8a44e1adb049a05d89c9ea489/figure/lab1-ref.png -------------------------------------------------------------------------------- /include/PixelsReadBindData.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/27/23. 3 | // 4 | 5 | #ifndef EXAMPLE_C_PIXELSREADBINDDATA_HPP 6 | #define EXAMPLE_C_PIXELSREADBINDDATA_HPP 7 | 8 | 9 | #include "duckdb.hpp" 10 | #include "duckdb/common/exception.hpp" 11 | #include "duckdb/common/string_util.hpp" 12 | #include "duckdb/function/scalar_function.hpp" 13 | #include 14 | #include "PixelsReader.h" 15 | 16 | 17 | namespace duckdb { 18 | 19 | struct PixelsReadBindData : public TableFunctionData { 20 | std::shared_ptr initialPixelsReader; 21 | std::shared_ptr fileSchema; 22 | vector files; 23 | atomic curFileId; 24 | }; 25 | 26 | } 27 | #endif // EXAMPLE_C_PIXELSREADBINDDATA_HPP 28 | -------------------------------------------------------------------------------- /include/PixelsReadGlobalState.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/26/23. 3 | // 4 | 5 | #ifndef EXAMPLE_C_PIXELSREADGLOBALSTATE_HPP 6 | #define EXAMPLE_C_PIXELSREADGLOBALSTATE_HPP 7 | 8 | #include "duckdb.hpp" 9 | #include "duckdb/common/exception.hpp" 10 | #include "duckdb/common/string_util.hpp" 11 | #include "duckdb/function/scalar_function.hpp" 12 | #include 13 | #include "PixelsReader.h" 14 | #include "physical/StorageArrayScheduler.h" 15 | 16 | namespace duckdb { 17 | 18 | struct PixelsReadGlobalState : public GlobalTableFunctionState { 19 | mutex lock; 20 | 21 | //! The initial reader from the bind phase 22 | std::shared_ptr initialPixelsReader; 23 | 24 | //! Mutexes to wait for a file that is currently being opened 25 | unique_ptr file_mutexes; 26 | 27 | //! Signal to other threads that a file failed to open, letting every thread abort. 28 | bool error_opening_file = false; 29 | 30 | std::shared_ptr storageArrayScheduler; 31 | 32 | //! Index of file currently up for scanning 33 | vector file_index; 34 | 35 | //! Batch index of the next row group to be scanned 36 | idx_t batch_index; 37 | 38 | idx_t max_threads; 39 | 40 | TableFilterSet * filters; 41 | 42 | idx_t MaxThreads() const override { 43 | return max_threads; 44 | } 45 | }; 46 | 47 | } 48 | 49 | #endif // EXAMPLE_C_PIXELSREADGLOBALSTATE_HPP 50 | -------------------------------------------------------------------------------- /include/PixelsReadLocalState.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/26/23. 3 | // 4 | 5 | #ifndef EXAMPLE_C_PIXELSREADLOCALSTATE_HPP 6 | #define EXAMPLE_C_PIXELSREADLOCALSTATE_HPP 7 | 8 | #include "duckdb.hpp" 9 | #include "duckdb/common/exception.hpp" 10 | #include "duckdb/common/string_util.hpp" 11 | #include "duckdb/function/scalar_function.hpp" 12 | #include 13 | #include "PixelsReader.h" 14 | #include "reader/PixelsRecordReader.h" 15 | 16 | namespace duckdb { 17 | 18 | struct PixelsReadLocalState : public LocalTableFunctionState { 19 | PixelsReadLocalState() { 20 | curr_file_index = 0; 21 | next_file_index = 0; 22 | curr_batch_index = 0; 23 | next_batch_index = 0; 24 | rowOffset = 0; 25 | currPixelsRecordReader = nullptr; 26 | nextPixelsRecordReader = nullptr; 27 | vectorizedRowBatch = nullptr; 28 | currReader = nullptr; 29 | nextReader = nullptr; 30 | } 31 | std::shared_ptr currPixelsRecordReader; 32 | std::shared_ptr nextPixelsRecordReader; 33 | // this is used for storing row batch results. 34 | std::shared_ptr vectorizedRowBatch; 35 | int deviceID; 36 | int rowOffset; 37 | vector column_ids; 38 | vector column_names; 39 | std::shared_ptr currReader; 40 | std::shared_ptr nextReader; 41 | idx_t curr_file_index; 42 | idx_t next_file_index; 43 | idx_t curr_batch_index; 44 | idx_t next_batch_index; 45 | std::string next_file_name; 46 | std::string curr_file_name; 47 | }; 48 | 49 | } 50 | 51 | #endif // EXAMPLE_C_PIXELSREADLOCALSTATE_HPP 52 | -------------------------------------------------------------------------------- /include/pixels_extension.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | 5 | namespace duckdb { 6 | 7 | class PixelsExtension : public Extension { 8 | public: 9 | void Load(DuckDB &db) override; 10 | std::string Name() override; 11 | 12 | }; 13 | 14 | } // namespace duckdb 15 | -------------------------------------------------------------------------------- /pixels-cli/include/executor/CommandExecutor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-17. 23 | // 24 | 25 | #ifndef PIXELS_COMMANDEXECUTOR_H 26 | #define PIXELS_COMMANDEXECUTOR_H 27 | 28 | #include 29 | #include 30 | 31 | namespace bpo = boost::program_options; 32 | 33 | class CommandExecutor { 34 | public: 35 | virtual ~CommandExecutor() = default; 36 | virtual void execute(const bpo::variables_map& ns, const std::string& command) = 0; 37 | }; 38 | #endif //PIXELS_COMMANDEXECUTOR_H 39 | -------------------------------------------------------------------------------- /pixels-cli/include/executor/LoadExecutor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-17. 23 | // 24 | 25 | #ifndef PIXELS_LOADEXECUTOR_H 26 | #define PIXELS_LOADEXECUTOR_H 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | class LoadExecutor : public CommandExecutor { 33 | public: 34 | void execute(const bpo::variables_map& ns, const std::string& command) override; 35 | private: 36 | bool startConsumers(const std::vector &inputFiles, Parameters parameters, 37 | const std::vector &loadedFiles); 38 | }; 39 | #endif //PIXELS_LOADEXECUTOR_H 40 | -------------------------------------------------------------------------------- /pixels-cli/include/load/Parameters.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-19. 23 | // 24 | 25 | #ifndef PIXELS_PARAMETERS_H 26 | #define PIXELS_PARAMETERS_H 27 | 28 | #include 29 | #include 30 | 31 | class Parameters { 32 | public: 33 | Parameters(const std::string &schema, int maxRowNum, const std::string ®ex, 34 | const std::string &loadingPath, EncodingLevel encodingLevel, bool nullsPadding); 35 | std::string getLoadingPath() const; 36 | std::string getSchema() const; 37 | int getMaxRowNum() const; 38 | std::string getRegex() const; 39 | EncodingLevel getEncodingLevel() const; 40 | bool isNullsPadding() const; 41 | 42 | private: 43 | std::string schema; 44 | int maxRowNum; 45 | std::string regex; 46 | std::string loadingPath; 47 | EncodingLevel encodingLevel; 48 | bool nullsPadding; 49 | }; 50 | #endif //PIXELS_PARAMETERS_H 51 | -------------------------------------------------------------------------------- /pixels-cli/include/load/PixelsConsumer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-22. 23 | // 24 | 25 | #ifndef PIXELS_PIXELSCONSUMER_H 26 | #define PIXELS_PIXELSCONSUMER_H 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | class PixelsConsumer { 33 | public: 34 | PixelsConsumer(const std::vector &queue, const Parameters ¶meters, const std::vector &loadedFiles); 35 | void run(); 36 | private: 37 | static int GlobalTargetPathId; 38 | std::vector queue; 39 | Parameters parameters; 40 | std::vector loadedFiles; 41 | }; 42 | #endif //PIXELS_PIXELSCONSUMER_H 43 | -------------------------------------------------------------------------------- /pixels-cli/lib/load/Parameters.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-19. 23 | // 24 | 25 | #include 26 | 27 | Parameters::Parameters(const std::string &schema, int maxRowNum, const std::string ®ex, 28 | const std::string &loadingPath, EncodingLevel encodingLevel, bool nullsPadding) 29 | : schema(schema), maxRowNum(maxRowNum), regex(regex), loadingPath(loadingPath), 30 | encodingLevel(encodingLevel), nullsPadding(nullsPadding) {} 31 | 32 | std::string Parameters::getSchema() const { 33 | return this->schema; 34 | } 35 | 36 | int Parameters::getMaxRowNum() const { 37 | return this->maxRowNum; 38 | } 39 | 40 | std::string Parameters::getRegex() const { 41 | return this->regex; 42 | } 43 | 44 | std::string Parameters::getLoadingPath() const { 45 | return this->loadingPath; 46 | } 47 | 48 | EncodingLevel Parameters::getEncodingLevel() const { 49 | return this->encodingLevel; 50 | } 51 | 52 | bool Parameters::isNullsPadding() const { 53 | return this->nullsPadding; 54 | } -------------------------------------------------------------------------------- /pixels-common/include/exception/InvalidArgumentException.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/13/23. 3 | // 4 | 5 | #ifndef PIXELS_INVALIDARGUMENTEXCEPTION_H 6 | #define PIXELS_INVALIDARGUMENTEXCEPTION_H 7 | 8 | #include 9 | #include 10 | 11 | class InvalidArgumentException: public std::exception { 12 | public: 13 | InvalidArgumentException() = default; 14 | explicit InvalidArgumentException(std::string message); 15 | }; 16 | #endif //PIXELS_INVALIDARGUMENTEXCEPTION_H 17 | -------------------------------------------------------------------------------- /pixels-common/include/physical/BufferPool.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 5/25/23. 3 | // 4 | 5 | #ifndef DUCKDB_BUFFERPOOL_H 6 | #define DUCKDB_BUFFERPOOL_H 7 | 8 | #include 9 | #include 10 | #include "physical/natives/ByteBuffer.h" 11 | #include 12 | #include "physical/natives/DirectIoLib.h" 13 | #include "exception/InvalidArgumentException.h" 14 | #include "utils/ColumnSizeCSVReader.h" 15 | #include 16 | 17 | // when allocating buffer pool, we use the size of the first pxl file. Consider that 18 | // the remaining pxl file has larger size than the first file, we allocate some extra 19 | // size (10MB) to each column. 20 | // TODO: how to evaluate the maximal pool size 21 | #define EXTRA_POOL_SIZE 3*1024*1024 22 | 23 | class DirectUringRandomAccessFile; 24 | // This class is global class. The variable is shared by each thread 25 | class BufferPool { 26 | public: 27 | static void Initialize(std::vector colIds, std::vector bytes, std::vector columnNames); 28 | static std::shared_ptr GetBuffer(uint32_t colId); 29 | static int64_t GetBufferId(uint32_t index); 30 | static void Switch(); 31 | static void Reset(); 32 | private: 33 | BufferPool() = default; 34 | static thread_local int colCount; 35 | static thread_local std::map nrBytes; 36 | static thread_local bool isInitialized; 37 | static thread_local std::map> buffers[2]; 38 | static std::shared_ptr directIoLib; 39 | static thread_local int currBufferIdx; 40 | static thread_local int nextBufferIdx; 41 | friend class DirectUringRandomAccessFile; 42 | }; 43 | #endif // DUCKDB_BUFFERPOOL_H 44 | -------------------------------------------------------------------------------- /pixels-common/include/physical/FilePath.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-19. 23 | // 24 | 25 | #ifndef PIXELS_FILEPATH_H 26 | #define PIXELS_FILEPATH_H 27 | 28 | #include 29 | #include 30 | 31 | class FilePath { 32 | public: 33 | std::string realPath; 34 | bool valid; 35 | bool isDir; 36 | 37 | FilePath(); 38 | FilePath(const std::string &path); 39 | std::string toString() const; 40 | std::string toStringWithPrefix(const Storage &storage) const; 41 | }; 42 | #endif //PIXELS_FILEPATH_H 43 | -------------------------------------------------------------------------------- /pixels-common/include/physical/MergedRequest.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuly on 01.05.23. 3 | // 4 | 5 | #ifndef DUCKDB_MERGEDREQUEST_H 6 | #define DUCKDB_MERGEDREQUEST_H 7 | 8 | #include "physical/Request.h" 9 | #include 10 | #include 11 | #include "exception/InvalidArgumentException.h" 12 | #include "utils/ConfigFactory.h" 13 | #include "physical/natives/ByteBuffer.h" 14 | #include 15 | #include 16 | 17 | class MergedRequest: public std::enable_shared_from_this { 18 | public: 19 | MergedRequest(Request first); 20 | std::shared_ptr merge(Request curr); 21 | std::vector> complete(std::shared_ptr buffer); 22 | long getStart(); 23 | int getLength(); 24 | int getSize(); 25 | long getQueryId(); 26 | private: 27 | long queryId; 28 | long start; 29 | long end; 30 | int length; // the length of merged request 31 | int size; // the number of sub-requests 32 | int maxGap; 33 | std::vector offsets; // the starting offset of the sub-requests in the response of the merged request 34 | std::vector lengths; // the length of sub-requests 35 | }; 36 | #endif //DUCKDB_MERGEDREQUEST_H 37 | -------------------------------------------------------------------------------- /pixels-common/include/physical/PhysicalReaderUtil.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/6/23. 3 | // 4 | 5 | #ifndef PIXELS_PHYSICALREADERUTIL_H 6 | #define PIXELS_PHYSICALREADERUTIL_H 7 | 8 | #include "io/PhysicalLocalReader.h" 9 | #include "Storage.h" 10 | #include "StorageFactory.h" 11 | #include 12 | 13 | class PhysicalReaderUtil { 14 | public: 15 | static std::shared_ptr newPhysicalReader(std::shared_ptr storage, std::string path) { 16 | if(storage == nullptr) { 17 | throw std::runtime_error("storage should not be nullptr"); 18 | } 19 | if(path.size() == 0) { 20 | throw std::runtime_error("path should not be empty"); 21 | } 22 | std::shared_ptr reader; 23 | switch (storage->getScheme()) { 24 | case Storage::hdfs: 25 | throw std::runtime_error("hdfs not support"); 26 | break; 27 | case Storage::file: 28 | reader = std::make_shared(storage, path); 29 | break; 30 | case Storage::s3: 31 | throw std::runtime_error("hdfs not support"); 32 | break; 33 | case Storage::minio: 34 | throw std::runtime_error("hdfs not support"); 35 | break; 36 | case Storage::redis: 37 | throw std::runtime_error("hdfs not support"); 38 | break; 39 | case Storage::gcs: 40 | throw std::runtime_error("hdfs not support"); 41 | break; 42 | case Storage::mock: 43 | throw std::runtime_error("hdfs not support"); 44 | break; 45 | default: 46 | throw std::runtime_error("hdfs not support"); 47 | } 48 | return reader; 49 | } 50 | 51 | static std::shared_ptr newPhysicalReader(Storage::Scheme scheme, std::string path) { 52 | if(path.size() == 0) { 53 | throw std::runtime_error("path should not be empty"); 54 | } 55 | return newPhysicalReader(StorageFactory::getInstance()->getStorage(scheme), path); 56 | } 57 | }; 58 | #endif //PIXELS_PHYSICALREADERUTIL_H 59 | -------------------------------------------------------------------------------- /pixels-common/include/physical/PhysicalWriter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-25. 23 | // 24 | 25 | #ifndef PIXELS_PHYSICALWRITER_H 26 | #define PIXELS_PHYSICALWRITER_H 27 | 28 | #include 29 | #include 30 | #include "physical/natives/ByteBuffer.h" 31 | 32 | 33 | class PhysicalWriter { 34 | public: 35 | virtual ~PhysicalWriter() = default; 36 | /** 37 | * Prepare the writer to ensure the length can fit into current block. 38 | * 39 | * @param length length of content 40 | * @return starting offset after preparing. If -1, means prepare has failed, 41 | * due to the specified length cannot fit into current block. 42 | */ 43 | virtual std::int64_t prepare(int length) = 0; 44 | /** 45 | * Append content to the file. 46 | * 47 | * @param buffer content buffer container 48 | * @param offset start offset of actual content buffer 49 | * @param length length of actual content buffer 50 | * @return start offset of content in the file. 51 | */ 52 | virtual std::int64_t append(const uint8_t *buffer, int offset, int length) = 0; 53 | /** 54 | * Append content to the file. 55 | * @param buffer content buffer 56 | * @return start offset of content in the file 57 | */ 58 | virtual std::int64_t append(std::shared_ptr byteBuffer) =0 ; 59 | /** 60 | * Close writer. 61 | */ 62 | virtual void close() = 0; 63 | /** 64 | * Flush writer. 65 | */ 66 | virtual void flush() = 0; 67 | 68 | virtual std::string getPath() const = 0; 69 | 70 | virtual int getBufferSize() const = 0; 71 | }; 72 | #endif //PIXELS_PHYSICALWRITER_H 73 | -------------------------------------------------------------------------------- /pixels-common/include/physical/PhysicalWriterOption.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gegndy on 24-11-25. 23 | // 24 | 25 | #ifndef PIXELS_PHYSICALWRITEROPTION_H 26 | #define PIXELS_PHYSICALWRITEROPTION_H 27 | 28 | #include 29 | #include 30 | 31 | class PhysicalWriterOption : public std::enable_shared_from_this { 32 | public: 33 | PhysicalWriterOption(std::int64_t blockSize, bool addBlockPadding, bool overwrite); 34 | std::int64_t getBlockSize() const; 35 | std::shared_ptr setBlockSize(std::int64_t blockSize); 36 | bool isAddBlockPadding() const; 37 | std::shared_ptr setAddBlockPadding(bool addBlockPadding); 38 | bool isOverwrite() const; 39 | std::shared_ptr setOverwrite(bool overwrite); 40 | private: 41 | std::int64_t blockSize; 42 | bool addBlockPadding; 43 | bool overwrite; 44 | }; 45 | #endif //PIXELS_PHYSICALWRITEROPTION_H 46 | -------------------------------------------------------------------------------- /pixels-common/include/physical/PhysicalWriterUtil.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-25. 23 | // 24 | 25 | #ifndef PIXELS_PHYSICALWRITERUTIL_H 26 | #define PIXELS_PHYSICALWRITERUTIL_H 27 | 28 | #include "physical/PhysicalWriter.h" 29 | #include "physical/PhysicalWriterOption.h" 30 | #include "physical/storage/LocalFSProvider.h" 31 | 32 | class PhysicalWriterUtil { 33 | public: 34 | static std::shared_ptr newPhysicalWriter(std::string path, int blockSize, 35 | bool blockPadding, bool overwrite) { 36 | std::shared_ptr option = std::make_shared(blockSize, blockPadding, overwrite); 37 | LocalFSProvider provider; 38 | return provider.createWriter(path, option); 39 | } 40 | }; 41 | #endif //PIXELS_PHYSICALWRITERUTIL_H 42 | -------------------------------------------------------------------------------- /pixels-common/include/physical/Request.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/7/23. 3 | // 4 | 5 | #ifndef PIXELS_REQUEST_H 6 | #define PIXELS_REQUEST_H 7 | 8 | #include 9 | #include 10 | 11 | class Request { 12 | public: 13 | int64_t bufferId; 14 | uint64_t queryId; 15 | uint64_t start; 16 | uint64_t length; 17 | Request(uint64_t queryId_, uint64_t start_, uint64_t length_, 18 | int64_t bufferId = -1); 19 | int hashCode(); 20 | int comparedTo(Request o); 21 | }; 22 | #endif // PIXELS_REQUEST_H 23 | -------------------------------------------------------------------------------- /pixels-common/include/physical/RequestBatch.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/7/23. 3 | // 4 | 5 | #ifndef PIXELS_REQUESTBATCH_H 6 | #define PIXELS_REQUESTBATCH_H 7 | 8 | #include "physical/Request.h" 9 | #include 10 | #include 11 | #include 12 | #include "physical/natives/ByteBuffer.h" 13 | 14 | class RequestBatch { 15 | public: 16 | RequestBatch(); 17 | explicit RequestBatch(int capacity); 18 | void add(uint64_t queryId, uint64_t start, uint64_t length, int64_t bufferId = -1); 19 | void add(Request request); 20 | int getSize(); 21 | std::vector getRequests(); 22 | // std::vector> * getPromises(); 23 | private: 24 | int size; 25 | std::vector requests; 26 | // std::vector> promises; 27 | 28 | }; 29 | 30 | #endif //PIXELS_REQUESTBATCH_H 31 | -------------------------------------------------------------------------------- /pixels-common/include/physical/Scheduler.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/7/23. 3 | // 4 | 5 | #ifndef PIXELS_SCHEDULER_H 6 | #define PIXELS_SCHEDULER_H 7 | #include "physical/PhysicalReader.h" 8 | #include "physical/RequestBatch.h" 9 | #include "profiler/TimeProfiler.h" 10 | 11 | class Scheduler { 12 | public: 13 | /** 14 | * Execute a batch of read requests, and return the future of the completion of 15 | * all the requests. 16 | * @param reader 17 | * @param batch 18 | * @param queryId 19 | */ 20 | virtual std::vector> executeBatch(std::shared_ptr reader, RequestBatch batch, long queryId) = 0; 21 | virtual std::vector> executeBatch(std::shared_ptr reader, 22 | RequestBatch batch, std::vector> reuseBuffers, long queryId) = 0; 23 | }; 24 | #endif //PIXELS_SCHEDULER_H 25 | -------------------------------------------------------------------------------- /pixels-common/include/physical/SchedulerFactory.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/10/23. 3 | // 4 | 5 | #ifndef PIXELS_SCHEDULERFACTORY_H 6 | #define PIXELS_SCHEDULERFACTORY_H 7 | 8 | #include "physical/Scheduler.h" 9 | #include "physical/scheduler/NoopScheduler.h" 10 | #include "physical/scheduler/SortMergeScheduler.h" 11 | #include "utils/ConfigFactory.h" 12 | #include 13 | #include 14 | #include 15 | 16 | class SchedulerFactory { 17 | public: 18 | static SchedulerFactory * Instance(); 19 | Scheduler * getScheduler(); 20 | ~SchedulerFactory(); 21 | private: 22 | static SchedulerFactory * instance; 23 | Scheduler * scheduler; 24 | SchedulerFactory(); 25 | }; 26 | #endif //PIXELS_SCHEDULERFACTORY_H 27 | -------------------------------------------------------------------------------- /pixels-common/include/physical/Status.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-19. 23 | // 24 | 25 | #ifndef PIXELS_STATUS_H 26 | #define PIXELS_STATUS_H 27 | 28 | #include 29 | #include 30 | 31 | class Status { 32 | public: 33 | Status(); 34 | Status(const std::string &path, uint64_t length, bool isDir, int replication); 35 | Status(const Status &other); 36 | uint64_t getLength() const; 37 | bool isFile() const; 38 | bool isDirectory() const; 39 | short getReplication() const; 40 | std::string getPath() const; 41 | std::string getName() const; 42 | std::string toString() const; 43 | bool operator<(const Status &other) const; 44 | bool operator==(const Status &other) const; 45 | 46 | private: 47 | std::string path; 48 | uint64_t length; 49 | bool isDir; 50 | short replication; 51 | }; 52 | #endif //PIXELS_STATUS_H 53 | -------------------------------------------------------------------------------- /pixels-common/include/physical/Storage.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 2/27/23. 3 | // 4 | 5 | #ifndef PIXELS_READER_STORAGE_H 6 | #define PIXELS_READER_STORAGE_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | 20 | class Storage { 21 | public: 22 | /** 23 | * If we want to add more storage schemes here, modify this enum. 24 | */ 25 | enum Scheme { 26 | hdfs, // HDFS 27 | file, // local fs 28 | s3, // Amazon S3 29 | minio, // Minio 30 | redis, // Redis 31 | gcs, // google cloud storage 32 | mock, // mock 33 | }; 34 | static std::map schemeMap; 35 | Storage(); 36 | ~Storage(); 37 | /** 38 | * Case-insensitive parsing from String name to enum value. 39 | * @param value the name of storage scheme. 40 | * @return 41 | */ 42 | static Scheme from(std::string value); 43 | 44 | /** 45 | * Parse the scheme from the path which is prefixed with the storage scheme. 46 | * @param schemedPath 47 | */ 48 | static Scheme fromPath(const std::string& schemedPath); 49 | 50 | /** 51 | * Whether the value is a valid storage scheme. 52 | * @param value 53 | * @return 54 | */ 55 | static bool isValid(const std::string& value); 56 | 57 | // TODO: if we need to implement the function "public boolean equals()" ? 58 | 59 | virtual Scheme getScheme() = 0; 60 | 61 | virtual std::string ensureSchemePrefix(const std::string &path) const = 0; 62 | 63 | virtual std::vector listPaths(const std::string &path) = 0; 64 | 65 | virtual std::ifstream open(const std::string &path) = 0; 66 | 67 | virtual void close() = 0; 68 | // TODO: the remaining function to be implemented 69 | }; 70 | 71 | 72 | 73 | #endif //PIXELS_READER_STORAGE_H 74 | -------------------------------------------------------------------------------- /pixels-common/include/physical/StorageArrayScheduler.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 1/21/24. 3 | // 4 | 5 | #ifndef DUCKDB_STORAGEARRAYSCHEDULER_H 6 | #define DUCKDB_STORAGEARRAYSCHEDULER_H 7 | 8 | #include "utils/ConfigFactory.h" 9 | #include 10 | #include 11 | #include 12 | 13 | class StorageArrayScheduler { 14 | public: 15 | StorageArrayScheduler(std::vector& files, int threadNum); 16 | int acquireDeviceId(); 17 | int getDeviceSum(); 18 | 19 | std::string getFileName(int deviceID, int fileID); 20 | uint64_t getFileSum(int deviceID); 21 | int getMaxFileSum(); 22 | int getBatchID(int deviceID, int fileID); 23 | private: 24 | std::mutex m; 25 | int currentDeviceID; 26 | int devicesNum; 27 | std::vector> filesVector; 28 | }; 29 | 30 | #endif //DUCKDB_STORAGEARRAYSCHEDULER_H 31 | -------------------------------------------------------------------------------- /pixels-common/include/physical/StorageFactory.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/6/23. 3 | // 4 | 5 | #ifndef PIXELS_STORAGEFACTORY_H 6 | #define PIXELS_STORAGEFACTORY_H 7 | #include 8 | #include "physical/Storage.h" 9 | #include "physical/storage/LocalFS.h" 10 | 11 | class StorageFactory { 12 | public: 13 | static StorageFactory * getInstance(); 14 | 15 | std::vector getEnabledSchemes(); 16 | bool isEnabled(Storage::Scheme scheme); 17 | void closeAll(); 18 | void reloadAll(); 19 | void reload(Storage::Scheme scheme); 20 | std::shared_ptr getStorage(const std::string& schemeOrPath); 21 | std::shared_ptr getStorage(Storage::Scheme scheme); 22 | private: 23 | //TODO: logger 24 | StorageFactory(); 25 | std::unordered_map> storageImpls; 26 | std::set enabledSchemes; 27 | static StorageFactory * instance; 28 | 29 | }; 30 | #endif //PIXELS_STORAGEFACTORY_H 31 | -------------------------------------------------------------------------------- /pixels-common/include/physical/StorageProvider.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-25. 23 | // 24 | 25 | #ifndef PIXELS_STORAGEPROVIDER_H 26 | #define PIXELS_STORAGEPROVIDER_H 27 | 28 | #include "physical/PhysicalWriter.h" 29 | #include "physical/PhysicalWriterOption.h" 30 | #include 31 | 32 | class StorageProvider { 33 | virtual std::shared_ptr createWriter(const std::string &path, std::shared_ptr option) = 0; 34 | }; 35 | #endif //PIXELS_STORAGEPROVIDER_H 36 | -------------------------------------------------------------------------------- /pixels-common/include/physical/allocator/Allocator.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 5/21/23. 3 | // 4 | 5 | #ifndef DUCKDB_ALLOCATOR_H 6 | #define DUCKDB_ALLOCATOR_H 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | class Allocator { 13 | public: 14 | virtual void reset() = 0; 15 | virtual std::shared_ptr allocate(int size) = 0; 16 | }; 17 | #endif // DUCKDB_ALLOCATOR_H 18 | -------------------------------------------------------------------------------- /pixels-common/include/physical/allocator/BufferPoolAllocator.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 5/21/23. 3 | // 4 | 5 | #ifndef DUCKDB_BUFFERPOOLALLOCATOR_H 6 | #define DUCKDB_BUFFERPOOLALLOCATOR_H 7 | 8 | #include "Allocator.h" 9 | 10 | class BufferPoolAllocator: public Allocator { 11 | public: 12 | BufferPoolAllocator(); 13 | ~BufferPoolAllocator(); 14 | std::shared_ptr allocate(int size) override; 15 | void reset() override; 16 | private: 17 | long maxSize; 18 | std::shared_ptr buffer; 19 | }; 20 | #endif // DUCKDB_BUFFERPOOLALLOCATOR_H 21 | -------------------------------------------------------------------------------- /pixels-common/include/physical/allocator/OrdinaryAllocator.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 5/21/23. 3 | // 4 | 5 | #ifndef DUCKDB_ORDINARYALLOCATOR_H 6 | #define DUCKDB_ORDINARYALLOCATOR_H 7 | 8 | #include "physical/allocator/Allocator.h" 9 | 10 | class OrdinaryAllocator: public Allocator { 11 | public: 12 | OrdinaryAllocator() = default; 13 | std::shared_ptr allocate(int size) override; 14 | void reset() override {}; 15 | }; 16 | #endif // DUCKDB_ORDINARYALLOCATOR_H 17 | -------------------------------------------------------------------------------- /pixels-common/include/physical/io/PhysicalLocalReader.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 2/27/23. 3 | // 4 | 5 | #ifndef PIXELS_READER_PHYSICALLOCALREADER_H 6 | #define PIXELS_READER_PHYSICALLOCALREADER_H 7 | 8 | #include "physical/PhysicalReader.h" 9 | #include "physical/storage/LocalFS.h" 10 | #include "physical/natives/DirectRandomAccessFile.h" 11 | #include "physical/natives/DirectUringRandomAccessFile.h" 12 | #include 13 | #include 14 | 15 | 16 | class PhysicalLocalReader: public PhysicalReader { 17 | public: 18 | PhysicalLocalReader(std::shared_ptr storage, std::string path); 19 | std::shared_ptr readFully(int length) override; 20 | std::shared_ptr readFully(int length, std::shared_ptr bb) override; 21 | std::shared_ptr readAsync(int length, std::shared_ptr bb, int index); 22 | void readAsyncSubmit(uint32_t size); 23 | void readAsyncComplete(uint32_t size); 24 | void readAsyncSubmitAndComplete(uint32_t size); 25 | void close() override; 26 | long getFileLength() override; 27 | void seek(long desired) override; 28 | long readLong() override; 29 | int readInt() override; 30 | char readChar() override; 31 | std::string getName() override; 32 | private: 33 | std::shared_ptr local; 34 | std::string path; 35 | long id; 36 | std::atomic numRequests; 37 | std::atomic asyncNumRequests; 38 | std::shared_ptr raf; 39 | 40 | }; 41 | 42 | #endif //PIXELS_READER_PHYSICALLOCALREADER_H 43 | -------------------------------------------------------------------------------- /pixels-common/include/physical/natives/ByteOrder.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by whz on 11/27/24. 23 | // 24 | 25 | #ifndef DUCKDB_BYTEORDER_H 26 | #define DUCKDB_BYTEORDER_H 27 | enum class ByteOrder { 28 | PIXELS_LITTLE_ENDIAN, 29 | PIXELS_BIG_ENDIAN 30 | }; 31 | #endif //DUCKDB_BYTEORDER_H 32 | -------------------------------------------------------------------------------- /pixels-common/include/physical/natives/DirectIoLib.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuly on 19.04.23. 3 | // 4 | 5 | #ifndef DUCKDB_DIRECTIOLIB_H 6 | #define DUCKDB_DIRECTIOLIB_H 7 | 8 | /** 9 | * Mapping Linux I/O functions to native methods. 10 | * Partially referenced the implementation of Jaydio (https://github.com/smacke/jaydio), 11 | * which is implemented by Stephen Macke and licensed under Apache 2.0. 12 | *

13 | * Created at: 02/02/2023 14 | * Author: Liangyong Yu 15 | */ 16 | 17 | #include "utils/ConfigFactory.h" 18 | #include "physical/natives/ByteBuffer.h" 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "liburing.h" 24 | #include "liburing/io_uring.h" 25 | 26 | 27 | struct uringData { 28 | int idx; 29 | ByteBuffer * bb; 30 | }; 31 | 32 | 33 | class DirectIoLib { 34 | public: 35 | /** 36 | * the start address/size of direct buffer is the multiple of block Size 37 | */ 38 | DirectIoLib(int fsBlockSize); 39 | std::shared_ptr allocateDirectBuffer(long size); 40 | std::shared_ptr read(int fd, long fileOffset, std::shared_ptr directBuffer, long length); 41 | long blockStart(long value); 42 | long blockEnd(long value); 43 | private: 44 | int fsBlockSize; 45 | long fsBlockNotMask; 46 | }; 47 | 48 | #endif // DUCKDB_DIRECTIOLIB_H 49 | -------------------------------------------------------------------------------- /pixels-common/include/physical/natives/DirectRandomAccessFile.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuliangyong on 2023-03-02. 3 | // 4 | 5 | #ifndef PIXELS_DIRECTRANDOMACCESSFILE_H 6 | #define PIXELS_DIRECTRANDOMACCESSFILE_H 7 | 8 | #include "physical/natives/PixelsRandomAccessFile.h" 9 | #include "physical/natives/ByteBuffer.h" 10 | #include "physical/natives/DirectIoLib.h" 11 | #include 12 | #include 13 | #include "profiler/TimeProfiler.h" 14 | #include "physical/allocator/OrdinaryAllocator.h" 15 | 16 | class DirectRandomAccessFile: public PixelsRandomAccessFile { 17 | public: 18 | explicit DirectRandomAccessFile(const std::string& file); 19 | void close() override; 20 | std::shared_ptr readFully(int len) override; 21 | std::shared_ptr readFully(int len, std::shared_ptr bb) override; 22 | long length() override; 23 | void seek(long off) override; 24 | long readLong() override; 25 | char readChar() override; 26 | int readInt() override; 27 | private: 28 | void populatedBuffer(); 29 | std::shared_ptr allocator; 30 | std::vector> largeBuffers; 31 | /* smallDirectBuffer align to blockSize. smallBuffer adds the offset to smallDirectBuffer. */ 32 | std::shared_ptr smallBuffer; 33 | std::shared_ptr smallDirectBuffer; 34 | bool bufferValid; 35 | long len; 36 | protected: 37 | int fd; 38 | long offset; 39 | std::shared_ptr directIoLib; 40 | bool enableDirect; 41 | int fsBlockSize; 42 | }; 43 | #endif //PIXELS_DIRECTRANDOMACCESSFILE_H 44 | -------------------------------------------------------------------------------- /pixels-common/include/physical/natives/DirectUringRandomAccessFile.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 5/28/23. 3 | // 4 | 5 | #ifndef DUCKDB_DIRECTURINGRANDOMACCESSFILE_H 6 | #define DUCKDB_DIRECTURINGRANDOMACCESSFILE_H 7 | 8 | #include "liburing.h" 9 | #include "liburing/io_uring.h" 10 | #include "physical/natives/DirectRandomAccessFile.h" 11 | #include "exception/InvalidArgumentException.h" 12 | #include "DirectIoLib.h" 13 | #include "physical/BufferPool.h" 14 | class DirectUringRandomAccessFile: public DirectRandomAccessFile { 15 | public: 16 | explicit DirectUringRandomAccessFile(const std::string& file); 17 | static void RegisterBuffer(std::vector> buffers); 18 | static void RegisterBufferFromPool(std::vector colIds); 19 | static void Initialize(); 20 | static void Reset(); 21 | std::shared_ptr readAsync(int length, std::shared_ptr buffer, int index); 22 | void readAsyncSubmit(int size); 23 | void readAsyncComplete(int size); 24 | ~DirectUringRandomAccessFile(); 25 | private: 26 | static thread_local struct io_uring * ring; 27 | static thread_local bool isRegistered; 28 | static thread_local struct iovec * iovecs; 29 | static thread_local uint32_t iovecSize; 30 | }; 31 | #endif // DUCKDB_DIRECTURINGRANDOMACCESSFILE_H 32 | -------------------------------------------------------------------------------- /pixels-common/include/physical/natives/PixelsRandomAccessFile.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuliangyong on 2023-03-02. 3 | // 4 | 5 | #ifndef PIXELS_PIXELSRANDOMACCESSFILE_H 6 | #define PIXELS_PIXELSRANDOMACCESSFILE_H 7 | 8 | #include 9 | #include "physical/natives/ByteBuffer.h" 10 | class PixelsRandomAccessFile { 11 | public: 12 | virtual void seek(long off) = 0; 13 | virtual long length() = 0; 14 | virtual std::shared_ptr readFully(int len) = 0; 15 | virtual std::shared_ptr readFully(int len, std::shared_ptr bb) = 0; 16 | virtual void close() = 0; 17 | virtual long readLong() = 0; 18 | virtual char readChar() = 0; 19 | virtual int readInt() = 0; 20 | // virtual std::string readLine(); 21 | // virtual std::string readUTF(); 22 | }; 23 | 24 | #endif //PIXELS_PIXELSRANDOMACCESSFILE_H 25 | -------------------------------------------------------------------------------- /pixels-common/include/physical/scheduler/NoopScheduler.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/8/23. 3 | // 4 | 5 | #ifndef PIXELS_NOOPSCHEDULER_H 6 | #define PIXELS_NOOPSCHEDULER_H 7 | 8 | #include "physical/Scheduler.h" 9 | 10 | class NoopScheduler : public Scheduler { 11 | // TODO: logger 12 | public: 13 | static Scheduler * Instance(); 14 | std::vector> executeBatch(std::shared_ptr reader, RequestBatch batch, long queryId) override; 15 | std::vector> executeBatch(std::shared_ptr reader, RequestBatch batch, 16 | std::vector> reuseBuffers, long queryId) override; 17 | ~NoopScheduler(); 18 | private: 19 | static Scheduler * instance; 20 | }; 21 | #endif //PIXELS_NOOPSCHEDULER_H 22 | -------------------------------------------------------------------------------- /pixels-common/include/physical/scheduler/SortMergeScheduler.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuly on 01.05.23. 3 | // 4 | 5 | #ifndef DUCKDB_SORTMERGESCHEDULER_H 6 | #define DUCKDB_SORTMERGESCHEDULER_H 7 | 8 | #include "physical/Scheduler.h" 9 | #include "physical/MergedRequest.h" 10 | #include 11 | #include "exception/InvalidArgumentException.h" 12 | 13 | class SortMergeScheduler : public Scheduler { 14 | // TODO: logger 15 | public: 16 | static Scheduler * Instance(); 17 | std::vector> sortMerge(RequestBatch batch, long queryId); 18 | std::vector> executeBatch(std::shared_ptr reader, 19 | RequestBatch batch, long queryId) override; 20 | std::vector> executeBatch(std::shared_ptr reader, RequestBatch batch, 21 | std::vector> reuseBuffers, long queryId) override; 22 | 23 | 24 | private: 25 | SortMergeScheduler(); 26 | static Scheduler * instance; 27 | 28 | 29 | }; 30 | 31 | #endif //DUCKDB_SORTMERGESCHEDULER_H 32 | -------------------------------------------------------------------------------- /pixels-common/include/physical/storage/LocalFS.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 2/28/23. 3 | // 4 | 5 | #ifndef PIXELS_TEST_LOCALFS_H 6 | #define PIXELS_TEST_LOCALFS_H 7 | 8 | #include "physical/Storage.h" 9 | #include "physical/natives/PixelsRandomAccessFile.h" 10 | #include 11 | #include 12 | #include 13 | /** 14 | * This implementation is used to access all kinds of POSIX file systems that are mounted 15 | * on a local directory. The file system does not need to be local physically. For example, 16 | * it could be a network file system mounted on a local point such as /mnt/nfs. 17 | * 18 | * @author liangyong 19 | * Created at: 02/03/2023 20 | */ 21 | 22 | class LocalFS: public Storage { 23 | public: 24 | LocalFS(); 25 | ~LocalFS(); 26 | Scheme getScheme() override; 27 | std::string ensureSchemePrefix(const std::string &path) const override; 28 | std::shared_ptr openRaf(const std::string& path); 29 | std::vector listPaths(const std::string &path) override; 30 | std::ifstream open(const std::string &path) override; 31 | void close() override; 32 | private: 33 | // TODO: read the configuration from pixels.properties for the following to values. 34 | static bool MmapEnabled; 35 | static bool EnableCache; 36 | static std::string SchemePrefix; 37 | // TODO: the remaining function is needed to be implemented. 38 | }; 39 | 40 | 41 | #endif //PIXELS_TEST_LOCALFS_H 42 | -------------------------------------------------------------------------------- /pixels-common/include/physical/storage/LocalFSProvider.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-25. 23 | // 24 | 25 | #ifndef PIXELS_LOCALFSPROVIDER_H 26 | #define PIXELS_LOCALFSPROVIDER_H 27 | 28 | #include "physical/StorageProvider.h" 29 | #include "physical/PhysicalWriter.h" 30 | #include "physical/PhysicalWriterOption.h" 31 | 32 | class LocalFSProvider : public StorageProvider { 33 | public: 34 | std::shared_ptr createWriter(const std::string &path, std::shared_ptr option) override; 35 | }; 36 | #endif //PIXELS_LOCALFSPROVIDER_H 37 | -------------------------------------------------------------------------------- /pixels-common/include/physical/storage/PhysicalLocalWriter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-25. 23 | // 24 | 25 | #ifndef PIXELS_PHYSICALLOCALWRITER_H 26 | #define PIXELS_PHYSICALLOCALWRITER_H 27 | 28 | #include "physical/PhysicalWriter.h" 29 | #include "physical/storage/LocalFS.h" 30 | #include "physical/natives/ByteBuffer.h" 31 | #include 32 | 33 | class PhysicalLocalWriter : public PhysicalWriter { 34 | public: 35 | PhysicalLocalWriter(const std::string &path, bool overwrite); 36 | std::int64_t prepare(int length) override; 37 | std::int64_t append(const uint8_t *buffer, int offset, int length) override; 38 | std::int64_t append(std::shared_ptr byteBuffer) override; 39 | void close() override; 40 | void flush() override; 41 | std::string getPath() const override; 42 | int getBufferSize() const override; 43 | private: 44 | std::shared_ptr localFS; 45 | std::string path; 46 | std::int64_t position; 47 | std::ofstream rawWriter; 48 | }; 49 | #endif //PIXELS_PHYSICALLOCALWRITER_H 50 | -------------------------------------------------------------------------------- /pixels-common/include/profiler/AbstractProfiler.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuly on 03.05.23. 3 | // 4 | 5 | #ifndef DUCKDB_ABSTRACTPROFILER_H 6 | #define DUCKDB_ABSTRACTPROFILER_H 7 | 8 | constexpr bool enableProfile = true; 9 | 10 | class AbstractProfiler { 11 | public: 12 | virtual void Print() = 0; 13 | virtual void Reset() = 0; 14 | 15 | }; 16 | 17 | #endif //DUCKDB_ABSTRACTPROFILER_H 18 | -------------------------------------------------------------------------------- /pixels-common/include/profiler/CountProfiler.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuly on 03.05.23. 3 | // 4 | 5 | #ifndef DUCKDB_COUNTPROFILER_H 6 | #define DUCKDB_COUNTPROFILER_H 7 | #include 8 | #include 9 | #include 10 | #include "exception/InvalidArgumentException.h" 11 | #include "profiler/AbstractProfiler.h" 12 | #include 13 | #include 14 | #include 15 | 16 | 17 | // This class is used for showing how many times a function is invoked. 18 | 19 | class CountProfiler: public AbstractProfiler { 20 | public: 21 | static CountProfiler & Instance(); 22 | void Count(const std::string& label); 23 | void Count(const std::string& label, int num); 24 | void Print() override; 25 | void Reset() override; 26 | long Get(const std::string& label); 27 | private: 28 | std::mutex lock; 29 | std::map result; 30 | }; 31 | 32 | 33 | 34 | 35 | #endif //DUCKDB_COUNTPROFILER_H 36 | -------------------------------------------------------------------------------- /pixels-common/include/profiler/TimeProfiler.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuly on 03.05.23. 3 | // 4 | 5 | #ifndef DUCKDB_TIMEPROFILER_H 6 | #define DUCKDB_TIMEPROFILER_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include "exception/InvalidArgumentException.h" 12 | #include "profiler/AbstractProfiler.h" 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #define PROFILE_START(X) ::TimeProfiler::Instance().Start(X) 19 | #define PROFILE_END(X) ::TimeProfiler::Instance().End(X) 20 | 21 | class TimeProfiler: public AbstractProfiler { 22 | public: 23 | static TimeProfiler & Instance(); 24 | void Start(const std::string& label); 25 | void End(const std::string& label); 26 | long Get(const std::string &label); 27 | void Reset() override; 28 | void Print() override; 29 | void Collect(); 30 | int GetResultSize(); 31 | private: 32 | TimeProfiler(); 33 | static thread_local std::map profiling; 34 | static thread_local std::map localResult; 35 | std::mutex lock; 36 | std::map globalResult; 37 | }; 38 | 39 | #endif //DUCKDB_TIMEPROFILER_H 40 | -------------------------------------------------------------------------------- /pixels-common/include/utils/ColumnSizeCSVReader.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 1/24/24. 3 | // 4 | 5 | #ifndef DUCKDB_COLUMNSIZECSVREADER_H 6 | #define DUCKDB_COLUMNSIZECSVREADER_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "exception/InvalidArgumentException.h" 13 | 14 | class ColumnSizeCSVReader { 15 | public: 16 | ColumnSizeCSVReader(std::string csvPath) { 17 | std::ifstream file; 18 | file.open(csvPath); 19 | std::string line; 20 | while (getline(file, line)) { 21 | std::string delimiter = " "; 22 | std::string columnName = line.substr(0, line.find(delimiter)); 23 | int maxSize = std::stoi(line.substr(line.find(delimiter) + 1)); 24 | colSize[columnName] = maxSize; 25 | } 26 | file.close(); 27 | } 28 | int get(const std::string & columnName); 29 | private: 30 | std::unordered_map colSize; 31 | }; 32 | 33 | #endif //DUCKDB_COLUMNSIZECSVREADER_H 34 | -------------------------------------------------------------------------------- /pixels-common/include/utils/ConfigFactory.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuly on 19.04.23. 3 | // 4 | 5 | #ifndef DUCKDB_CONFIGFACTORY_H 6 | #define DUCKDB_CONFIGFACTORY_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "exception/InvalidArgumentException.h" 14 | #include 15 | #include 16 | 17 | class ConfigFactory { 18 | public: 19 | static ConfigFactory & Instance(); 20 | void Print(); 21 | std::string getProperty(std::string key); 22 | bool boolCheckProperty(std::string key); 23 | std::string getPixelsDirectory(); 24 | std::string getPixelsSourceDirectory(); 25 | private: 26 | ConfigFactory(); 27 | std::map prop; 28 | std::string pixelsHome; 29 | std::string pixelsSrc; 30 | }; 31 | #endif // DUCKDB_CONFIGFACTORY_H 32 | -------------------------------------------------------------------------------- /pixels-common/include/utils/Constants.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/14/23. 3 | // 4 | 5 | #ifndef PIXELS_CONSTANTS_H 6 | #define PIXELS_CONSTANTS_H 7 | 8 | #include 9 | class Constants { 10 | public: 11 | static int VERSION; 12 | static std::string MAGIC; 13 | 14 | static int DEFAULT_HDFS_BLOCK_SIZE; 15 | static int HDFS_BUFFER_SIZE; 16 | static int LOCAL_BUFFER_SIZE; 17 | static int S3_BUFFER_SIZE; 18 | static int REDIS_BUFFER_SIZE; 19 | static int GCS_BUFFER_SIZE; 20 | 21 | static int MIN_REPEAT; 22 | static int MAX_SCOPE; 23 | static int MAX_SHORT_REPEAT_LENGTH; 24 | static float DICT_KEY_SIZE_THRESHOLD; 25 | static int INIT_DICT_SIZE; 26 | 27 | static std::string LAYOUT_VERSION_LITERAL; 28 | static std::string CACHE_VERSION_LITERAL; 29 | static std::string CACHE_COORDINATOR_LITERAL; 30 | static std::string CACHE_NODE_STATUS_LITERAL; 31 | static std::string CACHE_LOCATION_LITERAL; 32 | static int MAX_BLOCK_ID_LEN; 33 | 34 | /** 35 | * Issue #108: 36 | * The prefix for read-write lock used in etcd auto-increment id. 37 | */ 38 | static std::string AI_LOCK_PATH_PREFIX; 39 | 40 | static std::string LOCAL_FS_ID_KEY; 41 | // the prefix for keys of local fs metadata (i.e. file path -> file id). 42 | static std::string LOCAL_FS_META_PREFIX; 43 | 44 | static std::string S3_ID_KEY; 45 | // the prefix for keys of s3 metadata (i.e. file path -> file id). 46 | static std::string S3_META_PREFIX; 47 | 48 | static std::string MINIO_ID_KEY; 49 | // the prefix for keys of minio metadata (i.e. file path -> file id). 50 | static std::string MINIO_META_PREFIX; 51 | 52 | static std::string REDIS_ID_KEY; 53 | // the prefix for keys of redis metadata (i.e. file path -> file id). 54 | static std::string REDIS_META_PREFIX; 55 | 56 | static std::string GCS_ID_KEY; 57 | // the prefix for keys of gcs metadata (i.e. file path -> file id). 58 | static std::string GCS_META_PREFIX; 59 | }; 60 | #endif //PIXELS_CONSTANTS_H 61 | -------------------------------------------------------------------------------- /pixels-common/include/utils/String.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/16/23. 3 | // 4 | 5 | #ifndef PIXELS_STRING_H 6 | #define PIXELS_STRING_H 7 | 8 | #include 9 | #include 10 | bool icompare_pred(unsigned char a, unsigned char b); 11 | 12 | bool icompare(std::string const& a, std::string const& b); 13 | 14 | #endif //PIXELS_STRING_H 15 | -------------------------------------------------------------------------------- /pixels-common/lib/MergedRequest.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuly on 01.05.23. 3 | // 4 | 5 | #include "physical/MergedRequest.h" 6 | 7 | std::shared_ptr MergedRequest::merge(Request curr) { 8 | if (curr.start < this->end) 9 | { 10 | throw InvalidArgumentException("MergedRequest: Can not merge backward request."); 11 | } 12 | if (curr.queryId != this->queryId) 13 | { 14 | throw InvalidArgumentException("MergedRequest: Can not merge requests from different queries (transactions)."); 15 | } 16 | long gap = curr.start - this->end; 17 | if(gap <= maxGap && this->length + gap + curr.length <= std::numeric_limits::max()) { 18 | this->offsets.emplace_back(this->length + (int) gap); 19 | this->lengths.emplace_back(curr.length); 20 | this->length += gap + curr.length; 21 | this->end = curr.start + curr.length; 22 | this->size++; 23 | return shared_from_this(); 24 | } 25 | return std::make_shared(curr); 26 | } 27 | 28 | MergedRequest::MergedRequest(Request first) { 29 | this->queryId = first.queryId; 30 | this->start = first.start; 31 | this->end = first.start + first.length; 32 | this->maxGap = std::stoi(ConfigFactory::Instance().getProperty("read.request.merge.gap")); 33 | this->offsets.emplace_back(0); 34 | this->lengths.emplace_back(first.length); 35 | this->length = first.length; 36 | this->size = 1; 37 | } 38 | 39 | // when the data has been read, split the merged buffer to original buffer 40 | std::vector> MergedRequest::complete(std::shared_ptr buffer) { 41 | std::vector> bbs; 42 | for(int i = 0; i < this->size; i++) { 43 | auto bb = std::make_shared(*buffer, 44 | offsets.at(i), 45 | lengths.at(i)); 46 | bbs.emplace_back(bb); 47 | } 48 | return bbs; 49 | } 50 | 51 | long MergedRequest::getStart() { 52 | return start; 53 | } 54 | 55 | int MergedRequest::getLength() { 56 | return length; 57 | } 58 | 59 | int MergedRequest::getSize() { 60 | return size; 61 | } 62 | 63 | long MergedRequest::getQueryId() { 64 | return queryId; 65 | } 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /pixels-common/lib/exception/InvalidArgumentException.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/13/23. 3 | // 4 | 5 | #include "exception/InvalidArgumentException.h" 6 | 7 | InvalidArgumentException::InvalidArgumentException(std::string message) { 8 | std::cout<. 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-19. 23 | // 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | FilePath::FilePath() 30 | : realPath(""), valid(false), isDir(false) {} 31 | 32 | FilePath::FilePath(const std::string &path) 33 | : realPath(""), valid(false), isDir(false) { 34 | if (path.empty()) { 35 | throw std::invalid_argument("path is null"); 36 | } 37 | if (path.rfind("file:///", 0) == 0) { 38 | this->valid = true; 39 | this->realPath = path.substr(path.find("://") + 3); 40 | } else if (path.rfind("/", 0) == 0) { 41 | this->valid = true; 42 | this->realPath = path; 43 | } 44 | 45 | if (this->valid) { 46 | std::filesystem::path file(this->realPath); 47 | this->isDir = std::filesystem::is_directory(file); 48 | } 49 | } 50 | 51 | std::string FilePath::toString() const { 52 | if (!this->valid) { 53 | return ""; 54 | } 55 | return this->realPath; 56 | } 57 | 58 | std::string FilePath::toStringWithPrefix(const Storage &storage) const { 59 | if (!this->valid) { 60 | return ""; 61 | } 62 | return storage.ensureSchemePrefix(this->realPath); 63 | } -------------------------------------------------------------------------------- /pixels-common/lib/physical/PhysicalWriterOption.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-25. 23 | // 24 | 25 | #include "physical/PhysicalWriterOption.h" 26 | 27 | PhysicalWriterOption::PhysicalWriterOption(std::int64_t blockSize, bool addBlockPadding, bool overwrite) 28 | : blockSize(blockSize), addBlockPadding(addBlockPadding), overwrite(overwrite) {} 29 | 30 | std::int64_t PhysicalWriterOption::getBlockSize() const { 31 | return blockSize; 32 | } 33 | 34 | std::shared_ptr PhysicalWriterOption::setBlockSize(std::int64_t blockSize) { 35 | this->blockSize = blockSize; 36 | return shared_from_this(); 37 | } 38 | 39 | bool PhysicalWriterOption::isAddBlockPadding() const { 40 | return addBlockPadding; 41 | } 42 | 43 | std::shared_ptr PhysicalWriterOption::setAddBlockPadding(bool addBlockPadding) { 44 | this->addBlockPadding = addBlockPadding; 45 | return shared_from_this(); 46 | } 47 | 48 | bool PhysicalWriterOption::isOverwrite() const { 49 | return overwrite; 50 | } 51 | 52 | std::shared_ptr PhysicalWriterOption::setOverwrite(bool overwrite) { 53 | this->overwrite = overwrite; 54 | return shared_from_this(); 55 | } 56 | -------------------------------------------------------------------------------- /pixels-common/lib/physical/Request.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/7/23. 3 | // 4 | 5 | #include "physical/Request.h" 6 | 7 | 8 | Request::Request(uint64_t queryId_, uint64_t start_, uint64_t length_, int64_t bufferId) { 9 | queryId = queryId_; 10 | start = start_; 11 | length = length_; 12 | this->bufferId = bufferId; 13 | } 14 | 15 | int Request::hashCode() { 16 | return (int) ((start << 32) >> 32); 17 | } 18 | 19 | int Request::comparedTo(Request o) { 20 | return start == o.start; 21 | } 22 | -------------------------------------------------------------------------------- /pixels-common/lib/physical/RequestBatch.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/7/23. 3 | // 4 | 5 | #include "physical/RequestBatch.h" 6 | 7 | RequestBatch::RequestBatch(int capacity) { 8 | if(capacity <= 0) { 9 | throw std::runtime_error("Request batch capacity: " + std::to_string(capacity)); 10 | } 11 | requests.reserve(capacity); 12 | size = 0; 13 | } 14 | 15 | RequestBatch::RequestBatch() { 16 | requests = std::vector(); 17 | size = 0; 18 | } 19 | 20 | int RequestBatch::getSize() { 21 | return size; 22 | } 23 | 24 | std::vector RequestBatch::getRequests() { 25 | return requests; 26 | } 27 | 28 | //std::vector> * RequestBatch::getPromises() {}() { 29 | // return &pro; 30 | //} 31 | 32 | void RequestBatch::add(uint64_t queryId, uint64_t start, uint64_t length, int64_t bufferId) { 33 | Request request = Request(queryId, start, length, bufferId); 34 | requests.push_back(request); 35 | size++; 36 | } 37 | 38 | void RequestBatch::add(Request request) { 39 | requests.push_back(request); 40 | size++; 41 | } 42 | -------------------------------------------------------------------------------- /pixels-common/lib/physical/SchedulerFactory.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/10/23. 3 | // 4 | 5 | #include "physical/SchedulerFactory.h" 6 | 7 | SchedulerFactory * SchedulerFactory::instance = nullptr; 8 | 9 | SchedulerFactory * SchedulerFactory::Instance() { 10 | if(instance == nullptr) { 11 | instance = new SchedulerFactory(); 12 | } 13 | return instance; 14 | } 15 | 16 | Scheduler *SchedulerFactory::getScheduler() { 17 | return scheduler; 18 | } 19 | 20 | SchedulerFactory::SchedulerFactory() { 21 | // TODO: here we read name from pixels.properties 22 | std::string name = ConfigFactory::Instance().getProperty("read.request.scheduler"); 23 | std::transform(name.begin(), name.end(), name.begin(), 24 | [](unsigned char c){ return std::tolower(c); }); 25 | if(name == "noop") { 26 | scheduler = NoopScheduler::Instance(); 27 | } else if(name == "sortmerge") { 28 | scheduler = SortMergeScheduler::Instance(); 29 | } else { 30 | throw std::runtime_error("the read request scheduler is not support. "); 31 | } 32 | } 33 | 34 | SchedulerFactory::~SchedulerFactory() { 35 | delete instance; 36 | instance = nullptr; 37 | } 38 | -------------------------------------------------------------------------------- /pixels-common/lib/physical/Status.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-19. 23 | // 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | Status::Status() : path(""), length(0), isDir(false), replication(0) {} 30 | 31 | Status::Status(const std::string &path, uint64_t length, bool isDir, int replication) 32 | : path(path), length(length), isDir(isDir), replication(static_cast(replication)) {} 33 | 34 | Status::Status(const Status &other) 35 | : path(other.path), length(other.length), isDir(other.isDir), replication(other.replication) {} 36 | 37 | uint64_t Status::getLength() const { 38 | return this->length; 39 | } 40 | 41 | bool Status::isFile() const { 42 | return !this->isDir; 43 | } 44 | 45 | bool Status::isDirectory() const { 46 | return this->isDir; 47 | } 48 | 49 | short Status::getReplication() const { 50 | return this->replication; 51 | } 52 | 53 | std::string Status::getPath() const { 54 | return this->path; 55 | } 56 | 57 | std::string Status::getName() const { 58 | size_t slash = path.find_last_of('/'); 59 | return (slash == std::string::npos) ? path : path.substr(slash + 1); 60 | } 61 | bool Status::operator<(const Status &other) const 62 | { 63 | return this->path < other.path; 64 | } 65 | 66 | bool Status::operator==(const Status &other) const { 67 | return this->path == other.path; 68 | } 69 | 70 | std::string Status::toString() const { 71 | std::ostringstream sb; 72 | sb << "Status{path=" << path << "; isDirectory=" << (isDir ? "true" : "false"); 73 | if (!isDirectory()) { 74 | sb << "; length=" << length; 75 | } 76 | sb << "}"; 77 | return sb.str(); 78 | } -------------------------------------------------------------------------------- /pixels-common/lib/physical/Storage.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/1/23. 3 | // 4 | #include "physical/Storage.h" 5 | 6 | std::map Storage::schemeMap = { 7 | {"hdfs", Storage::hdfs}, 8 | {"file", Storage::file}, 9 | {"s3", Storage::s3}, 10 | {"minio", Storage::minio}, 11 | {"redis", Storage::redis}, 12 | {"gcs", Storage::gcs}, 13 | {"mock", Storage::mock}, 14 | }; 15 | 16 | Storage::Storage() { 17 | 18 | } 19 | 20 | Storage::Scheme Storage::from(std::string value) { 21 | std::transform(value.begin(), value.end(), value.begin(), 22 | [](unsigned char c){ return std::tolower(c); }); 23 | return Storage::schemeMap[value]; 24 | } 25 | 26 | Storage::Scheme Storage::fromPath(const std::string& schemedPath) { 27 | std::size_t separatorIdx = schemedPath.find("://"); 28 | if (separatorIdx != std::string::npos) { 29 | std::string scheme = schemedPath.substr(0, separatorIdx); 30 | return from(scheme); 31 | } else { 32 | throw std::invalid_argument("Error: schemedPath doesn't contain separator."); 33 | } 34 | } 35 | 36 | bool Storage::isValid(const std::string& value) { 37 | return schemeMap.find(value) != schemeMap.end(); 38 | } 39 | 40 | Storage::~Storage() { 41 | 42 | } 43 | -------------------------------------------------------------------------------- /pixels-common/lib/physical/allocator/BufferPoolAllocator.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 5/21/23. 3 | // 4 | #include "physical/allocator/BufferPoolAllocator.h" 5 | 6 | void BufferPoolAllocator::reset() { 7 | buffer->resetPosition(); 8 | } 9 | 10 | BufferPoolAllocator::BufferPoolAllocator() { 11 | // 100M. This value is a temporary value 12 | maxSize = 100 * 1024 * 1024; 13 | buffer = std::make_shared(maxSize); 14 | 15 | } 16 | 17 | std::shared_ptr BufferPoolAllocator::allocate(int size) { 18 | auto bb = std::make_shared(*buffer, buffer->getReadPos(), size); 19 | int roundSize; 20 | int remainder = size % 4096; 21 | if(remainder != 0) { 22 | roundSize = size + 4096 - remainder; 23 | } else { 24 | roundSize = size; 25 | } 26 | buffer->setReadPos(buffer->getReadPos() + roundSize); 27 | return bb; 28 | } 29 | 30 | BufferPoolAllocator::~BufferPoolAllocator() { 31 | } 32 | -------------------------------------------------------------------------------- /pixels-common/lib/physical/allocator/OrdinaryAllocator.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 5/21/23. 3 | // 4 | #include "physical/allocator/OrdinaryAllocator.h" 5 | 6 | 7 | std::shared_ptr OrdinaryAllocator::allocate(int size) { 8 | auto * buffer = new uint8_t[size]; 9 | auto bb = std::make_shared(buffer, static_cast(size)); 10 | return bb; 11 | } 12 | -------------------------------------------------------------------------------- /pixels-common/lib/physical/natives/DirectIoLib.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuly on 19.04.23. 3 | // 4 | #include "physical/natives/DirectIoLib.h" 5 | 6 | 7 | DirectIoLib::DirectIoLib(int fsBlockSize) { 8 | this->fsBlockSize = fsBlockSize; 9 | this->fsBlockNotMask = ~((long) fsBlockSize - 1); 10 | } 11 | 12 | std::shared_ptr DirectIoLib::allocateDirectBuffer(long size) { 13 | int toAllocate = blockEnd(size) + (size == 1? 0: fsBlockSize); 14 | uint8_t * directBufferPointer; 15 | posix_memalign((void **)&directBufferPointer, fsBlockSize, toAllocate); 16 | auto directBuffer = std::make_shared(directBufferPointer, toAllocate, false); 17 | return directBuffer; 18 | } 19 | 20 | std::shared_ptr DirectIoLib::read(int fd, long fileOffset, 21 | std::shared_ptr directBuffer, long length) { 22 | // the file will be read from blockStart(fileOffset), and the first fileDelta bytes should be ignored. 23 | long fileOffsetAligned = blockStart(fileOffset); 24 | long toRead = blockEnd(fileOffset + length) - blockStart(fileOffset); 25 | if(pread(fd, directBuffer->getPointer(), toRead, fileOffsetAligned) == -1) { 26 | throw InvalidArgumentException("DirectIoLib::read: pread fail. "); 27 | } 28 | auto bb = std::make_shared(*directBuffer, 29 | fileOffset - fileOffsetAligned, length); 30 | return bb; 31 | } 32 | 33 | 34 | long DirectIoLib::blockStart(long value) { 35 | return (value & fsBlockNotMask); 36 | } 37 | 38 | long DirectIoLib::blockEnd(long value) { 39 | return (value + fsBlockSize - 1) & fsBlockNotMask; 40 | } 41 | 42 | -------------------------------------------------------------------------------- /pixels-common/lib/physical/natives/PixelsRandomAccessFile.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuliangyong on 2023-03-02. 3 | // 4 | #include "physical/natives/PixelsRandomAccessFile.h" 5 | 6 | //std::string PixelsRandomAccessFile::readLine() { 7 | // throw std::runtime_error("read line is not supported"); 8 | //} 9 | // 10 | //std::string PixelsRandomAccessFile::readUTF() { 11 | // throw std::runtime_error("read UTF is not supported"); 12 | //} 13 | 14 | -------------------------------------------------------------------------------- /pixels-common/lib/physical/scheduler/NoopScheduler.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/8/23. 3 | // 4 | 5 | #include "physical/scheduler/NoopScheduler.h" 6 | #include "exception/InvalidArgumentException.h" 7 | #include "physical/io/PhysicalLocalReader.h" 8 | 9 | Scheduler * NoopScheduler::instance = nullptr; 10 | 11 | Scheduler * NoopScheduler::Instance() { 12 | if(instance == nullptr) { 13 | instance = new NoopScheduler(); 14 | } 15 | return instance; 16 | } 17 | 18 | std::vector> NoopScheduler::executeBatch(std::shared_ptr reader, 19 | RequestBatch batch, long queryId) { 20 | return executeBatch(reader, batch, {}, queryId); 21 | } 22 | 23 | 24 | std::vector> NoopScheduler::executeBatch(std::shared_ptr reader, RequestBatch batch, 25 | std::vector> reuseBuffers, long queryId) { 26 | auto requests = batch.getRequests(); 27 | std::vector> results; 28 | results.resize(batch.getSize()); 29 | if(ConfigFactory::Instance().boolCheckProperty("localfs.enable.async.io") && reuseBuffers.size() > 0) { 30 | // async read 31 | auto localReader = std::static_pointer_cast(reader); 32 | for(int i = 0; i < batch.getSize(); i++) { 33 | Request request = requests[i]; 34 | localReader->seek(request.start); 35 | results.at(i) = localReader->readAsync(request.length, reuseBuffers.at(i), request.bufferId); 36 | } 37 | localReader->readAsyncSubmit(batch.getSize()); 38 | } else { 39 | // sync read 40 | for(int i = 0; i < batch.getSize(); i++) { 41 | Request request = requests[i]; 42 | reader->seek(request.start); 43 | if(reuseBuffers.size() > 0) { 44 | results.at(i) = reader->readFully(request.length, reuseBuffers.at(i)); 45 | } else { 46 | results.at(i) = reader->readFully(request.length); 47 | } 48 | 49 | } 50 | } 51 | return results; 52 | 53 | } 54 | 55 | 56 | 57 | NoopScheduler::~NoopScheduler() { 58 | delete instance; 59 | instance = nullptr; 60 | } 61 | -------------------------------------------------------------------------------- /pixels-common/lib/physical/scheduler/SortMergeScheduler.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuly on 01.05.23. 3 | // 4 | 5 | #include "physical/scheduler/SortMergeScheduler.h" 6 | #include "utils/ConfigFactory.h" 7 | #include "exception/InvalidArgumentException.h" 8 | 9 | Scheduler * SortMergeScheduler::instance = nullptr; 10 | 11 | Scheduler * SortMergeScheduler::Instance() { 12 | if(instance == nullptr) { 13 | instance = new SortMergeScheduler(); 14 | } 15 | return instance; 16 | } 17 | 18 | 19 | std::vector> SortMergeScheduler::executeBatch(std::shared_ptr reader, 20 | RequestBatch batch, long queryId) { 21 | return executeBatch(reader, batch, {}, queryId); 22 | } 23 | 24 | 25 | std::vector> SortMergeScheduler::executeBatch(std::shared_ptr reader, RequestBatch batch, 26 | std::vector> reuseBuffers, long queryId) { 27 | if(batch.getSize() < 0) { 28 | return std::vector>{}; 29 | } 30 | auto mergeRequests = sortMerge(batch, queryId); 31 | std::vector> bbs; 32 | for(auto merged : mergeRequests) { 33 | reader->seek(merged->getStart()); 34 | auto buffer = reader->readFully(merged->getLength()); 35 | auto separateBuffers = merged->complete(buffer); 36 | bbs.insert(bbs.end(), separateBuffers.begin(), separateBuffers.end()); 37 | } 38 | return bbs; 39 | } 40 | 41 | SortMergeScheduler::SortMergeScheduler() { 42 | 43 | } 44 | 45 | std::vector> SortMergeScheduler::sortMerge(RequestBatch batch, long queryId) { 46 | auto requests = batch.getRequests(); 47 | std::sort(requests.begin(), requests.end(), [](const Request& lhs, const Request& rhs) { 48 | return lhs.start < rhs.start; 49 | }); 50 | 51 | std::vector> mergedRequests; 52 | auto mr1 = std::make_shared(requests.at(0)); 53 | auto mr2 = mr1; 54 | for(int i = 1; i < batch.getSize(); i++) { 55 | mr2 = mr1->merge(requests.at(i)); 56 | if(mr1 == mr2) { 57 | continue; 58 | } 59 | mergedRequests.emplace_back(mr1); 60 | mr1 = mr2; 61 | } 62 | mergedRequests.emplace_back(mr2); 63 | return mergedRequests; 64 | } 65 | -------------------------------------------------------------------------------- /pixels-common/lib/physical/storage/LocalFSProvider.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-25. 23 | // 24 | 25 | #include "physical/storage/LocalFSProvider.h" 26 | #include "physical/storage/PhysicalLocalWriter.h" 27 | 28 | std::shared_ptr 29 | LocalFSProvider::createWriter(const std::string &path, std::shared_ptr option) { 30 | return std::static_pointer_cast(std::make_shared(path, option->isOverwrite())); 31 | } -------------------------------------------------------------------------------- /pixels-common/lib/physical/storage/PhysicalLocalWriter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-25. 23 | // 24 | 25 | #include "physical/storage/PhysicalLocalWriter.h" 26 | #include "utils/Constants.h" 27 | 28 | PhysicalLocalWriter::PhysicalLocalWriter(const std::string &path, bool overwrite) { 29 | this->position = 0; 30 | this->path = path; 31 | this->rawWriter.open(this->path, overwrite ? std::ios::trunc : std::ios::app); 32 | if (!this->rawWriter.is_open()) { 33 | throw std::runtime_error("Failed to open file: " + this->path); 34 | } 35 | } 36 | 37 | std::int64_t PhysicalLocalWriter::prepare(int length) { 38 | return position; 39 | } 40 | 41 | std::int64_t PhysicalLocalWriter::append(const uint8_t *buffer, int offset, int length) { 42 | std::int64_t start = position; 43 | rawWriter.write(reinterpret_cast(buffer + offset), length); 44 | position += length; 45 | return start; 46 | } 47 | 48 | 49 | 50 | void PhysicalLocalWriter::close() { 51 | rawWriter.close(); 52 | } 53 | 54 | void PhysicalLocalWriter::flush() { 55 | rawWriter.flush(); 56 | } 57 | 58 | std::string PhysicalLocalWriter::getPath() const { 59 | return path; 60 | } 61 | 62 | int PhysicalLocalWriter::getBufferSize() const { 63 | return Constants::LOCAL_BUFFER_SIZE; 64 | } 65 | 66 | std::int64_t PhysicalLocalWriter::append(std::shared_ptr byteBuffer) { 67 | byteBuffer->filp(); 68 | int length=byteBuffer->bytesRemaining(); 69 | 70 | 71 | return append(byteBuffer->getPointer(),byteBuffer->getBufferOffset(),length); 72 | } 73 | -------------------------------------------------------------------------------- /pixels-common/lib/profiler/CountProfiler.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuly on 03.05.23. 3 | // 4 | 5 | #include "profiler/CountProfiler.h" 6 | 7 | CountProfiler &CountProfiler::Instance() { 8 | static CountProfiler instance; 9 | return instance; 10 | } 11 | 12 | void CountProfiler::Count(const std::string &label) { 13 | if constexpr(enableProfile) { 14 | std::unique_lock parallel_lock(lock); 15 | if (result.find(label) != result.end()) { 16 | result[label] += 1; 17 | } else if (label.size() == 0) { 18 | throw InvalidArgumentException( 19 | "TimeProfiler::Start: Label cannot be the empty string. "); 20 | } else { 21 | result[label] = 1; 22 | } 23 | } 24 | } 25 | 26 | void CountProfiler::Count(const std::string &label, int num) { 27 | if constexpr(enableProfile) { 28 | std::unique_lock parallel_lock(lock); 29 | if (result.find(label) != result.end()) { 30 | result[label] += num; 31 | } else if (label.size() == 0) { 32 | throw InvalidArgumentException( 33 | "TimeProfiler::Start: Label cannot be the empty string. "); 34 | } else { 35 | result[label] = num; 36 | } 37 | } 38 | } 39 | 40 | void CountProfiler::Print() { 41 | if constexpr(enableProfile) { 42 | for(auto iter: result) { 43 | std::cout<< "The count of " < parallel_lock(lock); 54 | if(result.find(label) != result.end()) { 55 | return result[label]; 56 | } else { 57 | throw InvalidArgumentException( 58 | "CountProfiler::Get: The label is not contained in CountProfiler. "); 59 | } 60 | } 61 | 62 | -------------------------------------------------------------------------------- /pixels-common/lib/utils/ColumnSizeCSVReader.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 1/24/24. 3 | // 4 | 5 | #include "utils/ColumnSizeCSVReader.h" 6 | 7 | int ColumnSizeCSVReader::get(const std::string & columnName) { 8 | if (!colSize.count(columnName)) { 9 | throw InvalidArgumentException("ColumnSizeCSVReader::get: wrong column name!"); 10 | } else { 11 | return colSize[columnName]; 12 | } 13 | } -------------------------------------------------------------------------------- /pixels-common/lib/utils/ConfigFactory.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuly on 19.04.23. 3 | // 4 | 5 | #include "utils/ConfigFactory.h" 6 | 7 | ConfigFactory & ConfigFactory::Instance() { 8 | static ConfigFactory instance; 9 | return instance; 10 | } 11 | 12 | ConfigFactory::ConfigFactory() { 13 | if(std::getenv("PIXELS_SRC") == nullptr) { 14 | throw InvalidArgumentException("The environment variable 'PIXELS_SRC' is not set. "); 15 | } 16 | pixelsSrc = std::string(std::getenv("PIXELS_SRC")); 17 | std::cout<<"PIXELS_SRC is "< 11 | * A VectorizedRowBatch is a set of rows, organized with each column 12 | * as a vector. It is the unit of query execution, organized to minimize 13 | * the cost per row and achieve high cycles-per-instruction. 14 | * The major fields are public by design to allow fast and convenient 15 | * access by the vectorized query execution code. 16 | */ 17 | 18 | #include 19 | #include 20 | #include "vector/ColumnVector.h" 21 | #include 22 | 23 | class VectorizedRowBatch { 24 | public: 25 | int numCols; // number of columns 26 | std::vector> cols; // a vector for each column 27 | int rowCount; // number of rows that qualify, i.e., haven't been filtered out 28 | static int DEFAULT_SIZE; 29 | int maxSize; // capacity, i.e. the maximum number of rows can be stored in this row batch 30 | 31 | explicit VectorizedRowBatch(int nCols, int size = DEFAULT_SIZE); 32 | ~VectorizedRowBatch(); 33 | void close(); 34 | int getMaxSize(); 35 | void reset(); 36 | void resize(int size); 37 | uint64_t position(); 38 | uint64_t remaining(); 39 | void increment(int size); 40 | int count(); 41 | bool isEmpty(); 42 | bool isFull(); 43 | int freeSlots(); 44 | bool isEndOfFile(); 45 | private: 46 | bool closed; 47 | int current; // The current pointer of VectorizedRowBatch. 48 | }; 49 | #endif //PIXELS_VECTORIZEDROWBATCH_H 50 | -------------------------------------------------------------------------------- /pixels-core/include/writer/ByteColumnWriter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by whz on 11/19/24. 23 | // 24 | 25 | #ifndef DUCKDB_BYTECOLUMNWRITER_H 26 | #define DUCKDB_BYTECOLUMNWRITER_H 27 | 28 | #endif // DUCKDB_BYTECOLUMNWRITER_H 29 | -------------------------------------------------------------------------------- /pixels-core/include/writer/CharColumnWriter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by whz on 11/19/24. 23 | // 24 | 25 | #ifndef DUCKDB_CHARCOLUMNWRITER_H 26 | #define DUCKDB_CHARCOLUMNWRITER_H 27 | 28 | #endif // DUCKDB_CHARCOLUMNWRITER_H 29 | -------------------------------------------------------------------------------- /pixels-core/include/writer/ColumnWriterBuilder.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by whz on 24-11-29. 23 | // 24 | 25 | #ifndef PIXELS_COLUMNWRITERBUILDER_H 26 | #define PIXELS_COLUMNWRITERBUILDER_H 27 | #include "writer/ColumnWriter.h" 28 | #include "writer/PixelsWriterOption.h" 29 | 30 | class ColumnWriterBuilder { 31 | public: 32 | static std::shared_ptr 33 | newColumnWriter(std::shared_ptr type, std::shared_ptr writerOption); 34 | }; 35 | #endif // PIXELS_COLUMNWRITERBUILDER_H 36 | 37 | 38 | -------------------------------------------------------------------------------- /pixels-core/include/writer/DateColumnWriter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by whz on 11/19/24. 23 | // 24 | 25 | #ifndef DUCKDB_DATECOLUMNWRITER_H 26 | #define DUCKDB_DATECOLUMNWRITER_H 27 | 28 | #include "ColumnWriter.h" 29 | #include "encoding/RunLenIntEncoder.h" 30 | 31 | class DateColumnWriter : public ColumnWriter{ 32 | public: 33 | DateColumnWriter(std::shared_ptr type, std::shared_ptr writerOption); 34 | 35 | int write(std::shared_ptr vector, int length) override; 36 | bool decideNullsPadding(std::shared_ptr writerOption) override; 37 | 38 | private: 39 | bool runlengthEncoding; 40 | std::unique_ptr encoder; 41 | std::vector curPixelVector; // current pixel value vector haven't written out yet 42 | 43 | }; 44 | #endif // DUCKDB_DATECOLUMNWRITER_H 45 | -------------------------------------------------------------------------------- /pixels-core/include/writer/DecimalColumnWriter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by whz on 12/9/24. 23 | // 24 | 25 | #ifndef DUCKDB_DECIMALCOLUMNWRITER_H 26 | #define DUCKDB_DECIMALCOLUMNWRITER_H 27 | #include "encoding/RunLenIntEncoder.h" 28 | #include "ColumnWriter.h" 29 | #include "utils/EncodingUtils.h" 30 | 31 | class DecimalColumnWriter :public ColumnWriter{ 32 | public: 33 | DecimalColumnWriter(std::shared_ptr type,std::shared_ptr writerOption); 34 | int write(std::shared_ptr vector, int length) override; 35 | bool decideNullsPadding(std::shared_ptr writerOption) override; 36 | 37 | //void close() override; 38 | //void newPixel() override; 39 | //void writeCurPartTime(std::shared_ptr columnVector, long* values, int curPartLength, int curPartOffset); 40 | 41 | //pixels::proto::ColumnEncoding getColumnChunkEncoding() const; 42 | 43 | private: 44 | bool runlengthEncoding; 45 | std::unique_ptr encoder; 46 | std::vector curPixelVector; // current pixel value vector haven't written out yet 47 | }; 48 | 49 | #endif //DUCKDB_DECIMALCOLUMNWRITER_H 50 | -------------------------------------------------------------------------------- /pixels-core/include/writer/DoubleColumnWriter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by whz on 11/19/24. 23 | // 24 | 25 | #ifndef DUCKDB_DOUBLECOLUMNWRITER_H 26 | #define DUCKDB_DOUBLECOLUMNWRITER_H 27 | 28 | #endif // DUCKDB_DOUBLECOLUMNWRITER_H 29 | -------------------------------------------------------------------------------- /pixels-core/include/writer/FloatColumnWriter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by whz on 11/19/24. 23 | // 24 | 25 | #ifndef DUCKDB_FLOATCOLUMNWRITER_H 26 | #define DUCKDB_FLOATCOLUMNWRITER_H 27 | 28 | #endif // DUCKDB_FLOATCOLUMNWRITER_H 29 | -------------------------------------------------------------------------------- /pixels-core/include/writer/IntegerColumnWriter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by whz on 11/19/24. 23 | // 24 | 25 | #ifndef DUCKDB_INTEGERCOLUMNWRITER_H 26 | #define DUCKDB_INTEGERCOLUMNWRITER_H 27 | #include "encoding/RunLenIntEncoder.h" 28 | #include "ColumnWriter.h" 29 | 30 | class IntegerColumnWriter : public ColumnWriter{ 31 | public: 32 | 33 | IntegerColumnWriter(std::shared_ptr type, std::shared_ptr writerOption); 34 | 35 | int write(std::shared_ptr vector, int length) override; 36 | void close() override; 37 | void newPixel() override; 38 | void writeCurPartLong(std::shared_ptr columnVector, long* values, int curPartLength, int curPartOffset); 39 | bool decideNullsPadding(std::shared_ptr writerOption) override; 40 | pixels::proto::ColumnEncoding getColumnChunkEncoding() const override; 41 | private: 42 | bool isLong; //current column type is long or int, used for the first pixel 43 | bool runlengthEncoding; 44 | std::unique_ptr encoder; 45 | std::vector curPixelVector; // current pixel value vector haven't written out yet 46 | 47 | }; 48 | #endif // DUCKDB_INTEGERCOLUMNWRITER_H 49 | -------------------------------------------------------------------------------- /pixels-core/include/writer/PixelsWriterOption.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-25. 23 | // 24 | 25 | #ifndef PIXELS_PIXELSWRITEROPTION_H 26 | #define PIXELS_PIXELSWRITEROPTION_H 27 | 28 | #include "encoding/EncodingLevel.h" 29 | #include 30 | #include "physical/natives/ByteOrder.h" 31 | 32 | class PixelsWriterOption : public std::enable_shared_from_this { 33 | public: 34 | PixelsWriterOption(); 35 | int getPixelsStride() const; 36 | std::shared_ptr setPixelsStride(int pixelsStride); 37 | EncodingLevel getEncodingLevel() const; 38 | std::shared_ptr setEncodingLevel(EncodingLevel encodingLevel); 39 | bool isNullsPadding() const; 40 | std::shared_ptr setNullsPadding(bool nullsPadding); 41 | private: 42 | int pixelsStride; 43 | EncodingLevel encodingLevel; 44 | /** 45 | * Whether nulls positions in column are padded by arbitrary values and occupy storage and memory space. 46 | */ 47 | bool nullsPadding; 48 | ByteOrder byteOrder{ByteOrder::PIXELS_LITTLE_ENDIAN}; 49 | public: 50 | ByteOrder getByteOrder() const; 51 | void setByteOrder(ByteOrder byte_order); 52 | }; 53 | #endif //PIXELS_PIXELSWRITEROPTION_H 54 | -------------------------------------------------------------------------------- /pixels-core/include/writer/StringColumnWriter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by whz on 11/19/24. 23 | // 24 | 25 | #ifndef DUCKDB_STRINGCOLUMNWRITER_H 26 | #define DUCKDB_STRINGCOLUMNWRITER_H 27 | 28 | #include "ColumnWriter.h" 29 | #include "utils/DynamicIntArray.h" 30 | #include "utils/EncodingUtils.h" 31 | #include "encoding/RunLenIntEncoder.h" 32 | 33 | class StringColumnWriter : public ColumnWriter { 34 | public: 35 | StringColumnWriter(std::shared_ptr type, std::shared_ptr writerOption); 36 | 37 | // vector should be converted to BinaryColumnVector 38 | int write(std::shared_ptr vector,int length) override; 39 | void close() override; 40 | void newPixels() ; 41 | 42 | bool decideNullsPadding(std::shared_ptr writerOption) override; 43 | 44 | void writeCurPartWithoutDict(std::shared_ptr writerOption, std::vector& values, 45 | int* vLens,int* vOffsets,int curPartLength,int curPartOffset); 46 | 47 | void flush() override; 48 | 49 | //pixels::proto::ColumnEncoding getColumnChunkEncoding(); 50 | 51 | void flushStarts(); 52 | 53 | 54 | private: 55 | std::vector curPixelVector; 56 | bool runlengthEncoding; 57 | bool dictionaryEncoding; 58 | std::shared_ptr startsArray; 59 | std::shared_ptr encodingUtils; 60 | std::unique_ptr encoder; 61 | std::shared_ptr writerOption; 62 | int startOffset=0; 63 | 64 | 65 | }; 66 | #endif // DUCKDB_STRINGCOLUMNWRITER_H 67 | -------------------------------------------------------------------------------- /pixels-core/include/writer/TimestampColumnWriter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by whz on 12/9/24. 23 | // 24 | 25 | #ifndef DUCKDB_TIMESTAMPCOLUMNWRITER_H 26 | #define DUCKDB_TIMESTAMPCOLUMNWRITER_H 27 | #include "ColumnWriter.h" 28 | #include "encoding/RunLenIntEncoder.h" 29 | 30 | class TimestampColumnWriter : public ColumnWriter{ 31 | public: 32 | TimestampColumnWriter(std::shared_ptr type, std::shared_ptr writerOption); 33 | 34 | int write(std::shared_ptr vector, int length) override; 35 | bool decideNullsPadding(std::shared_ptr writerOption) override; 36 | private: 37 | bool runlengthEncoding; 38 | std::unique_ptr encoder; 39 | std::vector curPixelVector; // current pixel value vector haven't written out yet 40 | 41 | }; 42 | #endif //DUCKDB_TIMESTAMPCOLUMNWRITER_H 43 | -------------------------------------------------------------------------------- /pixels-core/lib/Category.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/17/23. 3 | // 4 | 5 | #include "Category.h" 6 | 7 | -------------------------------------------------------------------------------- /pixels-core/lib/PixelsFooterCache.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/14/23. 3 | // 4 | #include "PixelsFooterCache.h" 5 | #include "exception/InvalidArgumentException.h" 6 | 7 | PixelsFooterCache::PixelsFooterCache() { 8 | } 9 | 10 | void PixelsFooterCache::putFileTail(const std::string& id, std::shared_ptr fileTail) { 11 | fileTailCacheMap[id] = fileTail; 12 | } 13 | 14 | std::shared_ptr PixelsFooterCache::getFileTail(const std::string& id) { 15 | if(fileTailCacheMap.find(id) != fileTailCacheMap.end()) { 16 | return fileTailCacheMap[id]; 17 | } else { 18 | throw InvalidArgumentException("No such a FileTail id."); 19 | } 20 | } 21 | 22 | void PixelsFooterCache::putRGFooter(const std::string& id, std::shared_ptr footer) { 23 | rowGroupFooterCacheMap[id] = footer; 24 | } 25 | 26 | bool PixelsFooterCache::containsFileTail(const std::string &id) { 27 | return fileTailCacheMap.find(id) != fileTailCacheMap.end(); 28 | } 29 | 30 | std::shared_ptr PixelsFooterCache::getRGFooter(const std::string& id) { 31 | if(rowGroupFooterCacheMap.find(id) != rowGroupFooterCacheMap.end()) { 32 | return rowGroupFooterCacheMap[id]; 33 | } else { 34 | throw InvalidArgumentException("No such a RGFooter id."); 35 | } 36 | } 37 | 38 | bool PixelsFooterCache::containsRGFooter(const std::string &id) { 39 | return rowGroupFooterCacheMap.find(id) != rowGroupFooterCacheMap.end(); 40 | } 41 | 42 | 43 | -------------------------------------------------------------------------------- /pixels-core/lib/PixelsVersion.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/13/23. 3 | // 4 | 5 | #include "PixelsVersion.h" 6 | #include "exception/InvalidArgumentException.h" 7 | 8 | int PixelsVersion::getVersion() { 9 | return version; 10 | } 11 | 12 | PixelsVersion::PixelsVersion(int v) { 13 | version = v; 14 | } 15 | 16 | PixelsVersion::Version PixelsVersion::from(int v) { 17 | if (v == 1) { 18 | return V1; 19 | } else { 20 | throw InvalidArgumentException("Wrong pixels version. "); 21 | } 22 | } 23 | 24 | bool PixelsVersion::matchVersion(PixelsVersion::Version otherVersion) { 25 | return otherVersion == V1; 26 | } 27 | 28 | PixelsVersion::Version PixelsVersion::currentVersion() { 29 | return V1; 30 | } 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /pixels-core/lib/encoding/Decoder.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/20/23. 3 | // 4 | 5 | #include "encoding/Decoder.h" 6 | 7 | 8 | -------------------------------------------------------------------------------- /pixels-core/lib/encoding/Encoder.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/21/23. 3 | // 4 | 5 | #include "encoding/Encoder.h" 6 | 7 | -------------------------------------------------------------------------------- /pixels-core/lib/encoding/EncodingLevel.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-18. 23 | // 24 | 25 | #include 26 | #include 27 | 28 | EncodingLevel::EncodingLevel() {} 29 | 30 | EncodingLevel::EncodingLevel(Level level) { 31 | this->level = level; 32 | } 33 | 34 | EncodingLevel::EncodingLevel(int level) { 35 | if (!isValid(level)) { 36 | throw std::invalid_argument("invalid encoding level " + std::to_string(level)); 37 | } 38 | this->level = static_cast(level); 39 | } 40 | 41 | EncodingLevel::EncodingLevel(const std::string &level) { 42 | if (level.empty()) { 43 | throw std::invalid_argument("level is null"); 44 | } 45 | if (!isValid(std::stoi(level))) { 46 | throw std::invalid_argument("invalid encoding level " + level); 47 | } 48 | this->level = static_cast(std::stoi(level)); 49 | } 50 | 51 | EncodingLevel EncodingLevel::from(int level) { 52 | return EncodingLevel(level); 53 | } 54 | 55 | EncodingLevel EncodingLevel::from(const std::string &level) { 56 | return EncodingLevel(level); 57 | } 58 | 59 | bool EncodingLevel::isValid(int level) { 60 | return level >= 0 && level <= 2; 61 | } 62 | 63 | bool EncodingLevel::ge(int level) const { 64 | if (!isValid(level)) { 65 | throw std::invalid_argument("level is invalid"); 66 | } 67 | return static_cast(this->level) >= level; 68 | } 69 | 70 | bool EncodingLevel::ge(const EncodingLevel &encodingLevel) const { 71 | return this->level >= encodingLevel.level; 72 | } 73 | 74 | bool EncodingLevel::equals(int level) const { 75 | if (!isValid(level)) { 76 | throw std::invalid_argument("level is invalid"); 77 | } 78 | return static_cast(this->level) == level; 79 | } 80 | 81 | bool EncodingLevel::equals(const EncodingLevel &encodingLevel) const { 82 | return this->level == encodingLevel.level; 83 | } -------------------------------------------------------------------------------- /pixels-core/lib/exception/PixelsFileMagicInvalidException.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/14/23. 3 | // 4 | 5 | #include "exception/PixelsFileMagicInvalidException.h" 6 | 7 | PixelsFileMagicInvalidException::PixelsFileMagicInvalidException(const std::string &magic) { 8 | std::cout<<"The file magic " 9 | << magic 10 | <<" is not valid." 11 | < type) 8 | :StringColumnReader(type) { 9 | // TODO: implement 10 | } 11 | -------------------------------------------------------------------------------- /pixels-core/lib/reader/ColumnReaderBuilder.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/19/23. 3 | // 4 | 5 | #include "reader/ColumnReaderBuilder.h" 6 | #include "exception/InvalidArgumentException.h" 7 | 8 | std::shared_ptr ColumnReaderBuilder::newColumnReader(std::shared_ptr type) { 9 | switch (type->getCategory()) { 10 | // case TypeDescription::BOOLEAN: 11 | // break; 12 | // case TypeDescription::BYTE: 13 | // break; 14 | case TypeDescription::SHORT: 15 | case TypeDescription::INT: 16 | case TypeDescription::LONG: 17 | return std::make_shared(type); 18 | // case TypeDescription::FLOAT: 19 | // break; 20 | // case TypeDescription::DOUBLE: 21 | // break; 22 | case TypeDescription::DECIMAL: { 23 | if (type->getPrecision() <= TypeDescription::SHORT_DECIMAL_MAX_PRECISION) { 24 | return std::make_shared(type); 25 | } else { 26 | throw InvalidArgumentException("Currently we didn't implement LongDecimalColumnVector."); 27 | } 28 | } 29 | case TypeDescription::STRING: 30 | return std::make_shared(type); 31 | case TypeDescription::DATE: 32 | return std::make_shared(type); 33 | // case TypeDescription::TIME: 34 | // break; 35 | case TypeDescription::TIMESTAMP: 36 | return std::make_shared(type); 37 | // case TypeDescription::VARBINARY: 38 | // break; 39 | // case TypeDescription::BINARY: 40 | // break; 41 | case TypeDescription::VARCHAR: 42 | return std::make_shared(type); 43 | case TypeDescription::CHAR: 44 | return std::make_shared(type); 45 | // case TypeDescription::STRUCT: 46 | // break; 47 | default: 48 | throw InvalidArgumentException("bad column type in ColumnReaderBuilder: " + std::to_string(type->getCategory())); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /pixels-core/lib/reader/DateColumnReader.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by yuly on 06.04.23. 3 | // 4 | 5 | #include "reader/DateColumnReader.h" 6 | 7 | 8 | DateColumnReader::DateColumnReader(std::shared_ptr type) : ColumnReader(type) { 9 | // TODO: implement 10 | } 11 | 12 | void DateColumnReader::close() { 13 | 14 | } 15 | 16 | void DateColumnReader::read(std::shared_ptr input, pixels::proto::ColumnEncoding & encoding, int offset, 17 | int size, int pixelStride, int vectorIndex, std::shared_ptr vector, 18 | pixels::proto::ColumnChunkIndex & chunkIndex, std::shared_ptr filterMask) { 19 | std::shared_ptr columnVector = 20 | std::static_pointer_cast(vector); 21 | if(offset == 0) { 22 | decoder = std::make_shared(input, true); 23 | elementIndex = 0; 24 | isNullOffset = chunkIndex.isnulloffset(); 25 | } 26 | 27 | int pixelId = elementIndex / pixelStride; 28 | bool hasNull = chunkIndex.pixelstatistics(pixelId).statistic().hasnull(); 29 | setValid(input, pixelStride, vector, pixelId, hasNull); 30 | 31 | if(encoding.kind() == pixels::proto::ColumnEncoding_Kind_RUNLENGTH) { 32 | for (int i = 0; i < size; i++) { 33 | if (elementIndex % pixelStride == 0) { 34 | int pixelId = elementIndex / pixelStride; 35 | } 36 | columnVector->set(i + vectorIndex, (int) decoder->next()); 37 | elementIndex++; 38 | } 39 | } else { 40 | columnVector->dates = (int *)(input->getPointer() + input->getReadPos()); 41 | input->setReadPos(input->getReadPos() + size * sizeof(int)); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /pixels-core/lib/reader/TimestampColumnReader.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 12/23/23. 3 | // 4 | 5 | #include "reader/TimestampColumnReader.h" 6 | 7 | TimestampColumnReader::TimestampColumnReader(std::shared_ptr type) : ColumnReader(type) { 8 | 9 | } 10 | 11 | void TimestampColumnReader::close() { 12 | 13 | } 14 | 15 | void TimestampColumnReader::read(std::shared_ptr input, pixels::proto::ColumnEncoding &encoding, int offset, 16 | int size, int pixelStride, int vectorIndex, std::shared_ptr vector, 17 | pixels::proto::ColumnChunkIndex &chunkIndex, 18 | std::shared_ptr filterMask) { 19 | std::shared_ptr columnVector = 20 | std::static_pointer_cast(vector); 21 | // if read from start, init the stream and decoder 22 | if(offset == 0) { 23 | decoder = std::make_shared(input, true); 24 | ColumnReader::elementIndex = 0; 25 | isNullOffset = chunkIndex.isnulloffset(); 26 | } 27 | 28 | int pixelId = elementIndex / pixelStride; 29 | bool hasNull = chunkIndex.pixelstatistics(pixelId).statistic().hasnull(); 30 | setValid(input, pixelStride, vector, pixelId, hasNull); 31 | 32 | if(encoding.kind() == pixels::proto::ColumnEncoding_Kind_RUNLENGTH) { 33 | for (int i = 0; i < size; i++) { 34 | if (elementIndex % pixelStride == 0) { 35 | int pixelId = elementIndex / pixelStride; 36 | } 37 | columnVector->set(i + vectorIndex, decoder->next()); 38 | elementIndex++; 39 | } 40 | } else { 41 | columnVector->times = (int64_t *)(input->getPointer() + input->getReadPos()); 42 | input->setReadPos(input->getReadPos() + size * sizeof(int64_t)); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /pixels-core/lib/reader/VarcharColumnReader.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/19/23. 3 | // 4 | 5 | #include "reader/VarcharColumnReader.h" 6 | 7 | VarcharColumnReader::VarcharColumnReader(std::shared_ptr type) : StringColumnReader(type) { 8 | // TODO: implement 9 | } 10 | 11 | -------------------------------------------------------------------------------- /pixels-core/lib/vector/ByteColumnVector.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/17/23. 3 | // 4 | 5 | #include "vector/ByteColumnVector.h" 6 | 7 | ByteColumnVector::ByteColumnVector(int len, bool encoding): ColumnVector(len, encoding) { 8 | vector = new uint8_t[len]; 9 | memoryUsage += (long) sizeof(uint8_t) * len; 10 | } 11 | 12 | void ByteColumnVector::close() { 13 | if(!closed) { 14 | ColumnVector::close(); 15 | delete[] vector; 16 | vector = nullptr; 17 | } 18 | } -------------------------------------------------------------------------------- /pixels-core/lib/writer/ByteColumnWriter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ -------------------------------------------------------------------------------- /pixels-core/lib/writer/CharColumnWriter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ -------------------------------------------------------------------------------- /pixels-core/lib/writer/DateColumnWriter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | #include "writer/DateColumnWriter.h" 22 | #include "utils/BitUtils.h" 23 | 24 | DateColumnWriter::DateColumnWriter(std::shared_ptr type, std::shared_ptr writerOption) : 25 | ColumnWriter(type, writerOption) 26 | { 27 | 28 | } 29 | 30 | int DateColumnWriter::write(std::shared_ptr vector, int size) 31 | { 32 | auto columnVector = std::static_pointer_cast(vector); 33 | 34 | if (!columnVector) 35 | { 36 | throw std::invalid_argument("Invalid vector type"); 37 | } 38 | 39 | int* values = columnVector->dates; 40 | EncodingUtils encodingUtils; 41 | 42 | for (int i = 0; i < size; i++) { 43 | isNull[curPixelIsNullIndex] = columnVector->isNull[i]; 44 | curPixelEleIndex++; 45 | 46 | if (columnVector->isNull[i]) { 47 | hasNull = true; 48 | encodingUtils.writeIntLE(outputStream, 0); 49 | } 50 | else { 51 | if (byteOrder == ByteOrder::PIXELS_LITTLE_ENDIAN) { 52 | encodingUtils.writeIntLE(outputStream, values[i]); 53 | } 54 | else { 55 | encodingUtils.writeIntBE(outputStream, values[i]); 56 | } 57 | } 58 | 59 | if (curPixelEleIndex >= pixelStride) { 60 | newPixel(); 61 | } 62 | } 63 | return outputStream->getWritePos(); 64 | } 65 | 66 | bool DateColumnWriter::decideNullsPadding(std::shared_ptr writerOption) 67 | { 68 | return writerOption->isNullsPadding(); 69 | } 70 | 71 | 72 | -------------------------------------------------------------------------------- /pixels-core/lib/writer/DecimalColumnWriter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | #include "writer/DecimalColumnWriter.h" 22 | #include "utils/BitUtils.h" 23 | 24 | DecimalColumnWriter::DecimalColumnWriter(std::shared_ptr type, std::shared_ptr writerOption) : 25 | ColumnWriter(type, writerOption)/*, curPixelVector(pixelStride)*/ 26 | { 27 | 28 | } 29 | 30 | int DecimalColumnWriter::write(std::shared_ptr vector, int size) 31 | { 32 | 33 | auto columnVector = std::static_pointer_cast(vector); 34 | 35 | if (!columnVector) 36 | { 37 | throw std::invalid_argument("Invalid vector type"); 38 | } 39 | 40 | long* values = columnVector->vector; 41 | EncodingUtils encodingUtils; 42 | 43 | for (int i = 0; i < size; i++) { 44 | isNull[curPixelIsNullIndex] = columnVector->isNull[i]; 45 | curPixelEleIndex++; 46 | 47 | if (columnVector->isNull[i]) { 48 | hasNull = true; 49 | encodingUtils.writeLongLE(outputStream, 0L); 50 | } 51 | else { 52 | if (byteOrder == ByteOrder::PIXELS_LITTLE_ENDIAN) { 53 | encodingUtils.writeLongLE(outputStream, values[i]); 54 | } 55 | else { 56 | encodingUtils.writeLongBE(outputStream, values[i]); 57 | } 58 | } 59 | 60 | if (curPixelEleIndex >= pixelStride) { 61 | newPixel(); 62 | } 63 | } 64 | return outputStream->getWritePos(); 65 | } 66 | 67 | bool DecimalColumnWriter::decideNullsPadding(std::shared_ptr writerOption) 68 | { 69 | return writerOption->isNullsPadding(); 70 | } 71 | -------------------------------------------------------------------------------- /pixels-core/lib/writer/DoubleColumnWriter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ -------------------------------------------------------------------------------- /pixels-core/lib/writer/FloatColumnWriter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ -------------------------------------------------------------------------------- /pixels-core/lib/writer/LongDecimalColumnWriter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ -------------------------------------------------------------------------------- /pixels-core/lib/writer/PixelsWriterOption.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | // 22 | // Created by gengdy on 24-11-25. 23 | // 24 | 25 | #include "writer/PixelsWriterOption.h" 26 | #include 27 | 28 | PixelsWriterOption::PixelsWriterOption() {} 29 | 30 | int PixelsWriterOption::getPixelsStride() const { 31 | return this->pixelsStride; 32 | } 33 | 34 | std::shared_ptr PixelsWriterOption::setPixelsStride(int pixelsStride) { 35 | this->pixelsStride = pixelsStride; 36 | return shared_from_this(); 37 | } 38 | 39 | EncodingLevel PixelsWriterOption::getEncodingLevel() const { 40 | return this->encodingLevel; 41 | } 42 | 43 | std::shared_ptr PixelsWriterOption::setEncodingLevel(EncodingLevel encodingLevel) { 44 | this->encodingLevel = encodingLevel; 45 | return shared_from_this(); 46 | } 47 | 48 | bool PixelsWriterOption::isNullsPadding() const { 49 | return this->nullsPadding; 50 | } 51 | 52 | std::shared_ptr PixelsWriterOption::setNullsPadding(bool nullsPadding) { 53 | this->nullsPadding = nullsPadding; 54 | return shared_from_this(); 55 | } 56 | 57 | ByteOrder PixelsWriterOption::getByteOrder() const { 58 | return byteOrder; 59 | } 60 | 61 | void PixelsWriterOption::setByteOrder(ByteOrder byte_order) { 62 | byteOrder = byte_order; 63 | } 64 | -------------------------------------------------------------------------------- /pixels-core/lib/writer/TimestampColumnWriter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 PixelsDB. 3 | * 4 | * This file is part of Pixels. 5 | * 6 | * Pixels is free software: you can redistribute it and/or modify 7 | * it under the terms of the Affero GNU General Public License as 8 | * published by the Free Software Foundation, either version 3 of 9 | * the License, or (at your option) any later version. 10 | * 11 | * Pixels is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * Affero GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the Affero GNU General Public 17 | * License along with Pixels. If not, see 18 | * . 19 | */ 20 | 21 | #include "writer/TimestampColumnWriter.h" 22 | #include "utils/BitUtils.h" 23 | 24 | TimestampColumnWriter::TimestampColumnWriter(std::shared_ptr type, std::shared_ptr writerOption) : 25 | ColumnWriter(type, writerOption) 26 | { 27 | 28 | } 29 | 30 | int TimestampColumnWriter::write(std::shared_ptr vector, int size) 31 | { 32 | // std::cout<<"In TimestampColumnWriter"<(vector); 34 | 35 | if (!columnVector) 36 | { 37 | throw std::invalid_argument("Invalid vector type"); 38 | } 39 | 40 | long* values = columnVector->times; 41 | EncodingUtils encodingUtils; 42 | 43 | for (int i = 0; i < size; i++) { 44 | isNull[curPixelIsNullIndex] = columnVector->isNull[i]; 45 | curPixelEleIndex++; 46 | 47 | if (columnVector->isNull[i]) { 48 | hasNull = true; 49 | encodingUtils.writeLongLE(outputStream, 0L); 50 | } 51 | else { 52 | if (byteOrder == ByteOrder::PIXELS_LITTLE_ENDIAN) { 53 | encodingUtils.writeLongLE(outputStream, values[i]); 54 | } 55 | else { 56 | encodingUtils.writeLongBE(outputStream, values[i]); 57 | } 58 | } 59 | 60 | if (curPixelEleIndex >= pixelStride) { 61 | newPixel(); 62 | } 63 | } 64 | return outputStream->getWritePos(); 65 | } 66 | 67 | bool TimestampColumnWriter::decideNullsPadding(std::shared_ptr writerOption) 68 | { 69 | return writerOption->isNullsPadding(); 70 | } 71 | 72 | -------------------------------------------------------------------------------- /pixels-cxx.properties: -------------------------------------------------------------------------------- 1 | # pixels c++ reader configurations 2 | 3 | 4 | # valid values: noop, sortmerge, ratelimited 5 | read.request.scheduler=noop 6 | read.request.merge.gap=2097152 7 | 8 | # localfs properties 9 | localfs.block.size=4096 10 | localfs.enable.direct.io=true 11 | localfs.enable.async.io=true 12 | # the lib of async is iouring or aio 13 | localfs.async.lib=iouring 14 | # pixel.stride must be the same as the stride size in pxl data 15 | # pixel.stride=10000 16 | pixel.stride=2 17 | # the work thread to run pixels. -1 means using all CPU cores 18 | pixel.threads=-1 19 | # column size path. It is optional. If no column size path is designated, the 20 | # size of first pixels data is used. For example: 21 | # pixel.column.size.path=/scratch/liyu/opt/pixels/cpp/pixels-duckdb/benchmark/clickbench/clickbench-size.csv 22 | pixel.column.size.path= 23 | 24 | # the work thread to run parquet. -1 means using all CPU cores 25 | parquet.threads=-1 26 | 27 | # storage device identifier directory depth 28 | # this parameter defines the directory depth that determines the storage device. 29 | # for example, we have three SSDs, the path is /data/ssd1, /data/ssd2 and /data/ssd3, so the depth is 2 30 | # another example: we have three SSDs, the path is /ssd1, /ssd2 and /ssd3, so the depth is 1 31 | # this parameter helps us allocate SSD to specific threads 32 | storage.directory.depth=1 33 | 34 | # the row group size in bytes for pixels writer, should not exceed 2GB 35 | # row.group.size=268435456 36 | row.group.size=100 37 | # the block size for block-wise storage systems such as HDFS 38 | block.size=2147483648 39 | # the number of replications of each block for block-wise storage systems such as HDFS 40 | block.replication=1 41 | 42 | # the alignment of the start offset of a column chunk in the file, it is for SIMD and its unit is byte 43 | column.chunk.alignment=32 44 | 45 | # for DuckDB, it is only effective when column.chunk.alignment also meets the alignment of the isNull bitmap 46 | isnull.bitmap.alignment=8 47 | -------------------------------------------------------------------------------- /pixels_extension.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by liyu on 3/26/23. 3 | // 4 | #define DUCKDB_EXTENSION_MAIN 5 | 6 | #include "pixels_extension.hpp" 7 | #include "PixelsScanFunction.hpp" 8 | #include "PixelsReadBindData.hpp" 9 | #include "duckdb.hpp" 10 | #include "duckdb/common/exception.hpp" 11 | #include "duckdb/common/string_util.hpp" 12 | #include "duckdb/function/scalar_function.hpp" 13 | #include "duckdb/common/optional_ptr.hpp" 14 | #include 15 | 16 | namespace duckdb { 17 | 18 | // Pixels Scan Replacemet for duckdb 1.0 19 | unique_ptr PixelsScanReplacement(ClientContext &context,ReplacementScanInput &input, 20 | optional_ptr data){ 21 | auto table_name=ReplacementScan::GetFullPath(input); 22 | // if(!ReplacementScan::CanReplace(table_name,{"pixels"})){ 23 | // return nullptr; 24 | // } 25 | auto lower_name = StringUtil::Lower(table_name); 26 | if (!StringUtil::EndsWith(lower_name, ".pxl") && !StringUtil::Contains(lower_name, ".pxl?")) { 27 | return nullptr; 28 | } 29 | auto table_function= make_uniq(); 30 | vector> children; 31 | children.push_back(make_uniq(Value(table_name))); 32 | table_function->function = make_uniq("pixels_scan",std::move(children)); 33 | if(!FileSystem::HasGlob(table_name)){ 34 | auto &fs=FileSystem::GetFileSystem(context); 35 | table_function->alias=fs.ExtractBaseName(table_name); 36 | } 37 | return std::move(table_function); 38 | } 39 | 40 | void PixelsExtension::Load(DuckDB &db) { 41 | Connection con(*db.instance); 42 | con.BeginTransaction(); 43 | 44 | auto &context = *con.context; 45 | auto &catalog = Catalog::GetSystemCatalog(*con.context); 46 | 47 | auto scan_fun = PixelsScanFunction::GetFunctionSet(); 48 | CreateTableFunctionInfo cinfo(scan_fun); 49 | cinfo.name = "pixels_scan"; 50 | 51 | catalog.CreateTableFunction(context, &cinfo); 52 | con.Commit(); 53 | 54 | auto &config = DBConfig::GetConfig(*db.instance); 55 | config.replacement_scans.emplace_back(PixelsScanReplacement); 56 | } 57 | 58 | std::string PixelsExtension::Name() { 59 | return "pixels"; 60 | } 61 | 62 | 63 | } // namespace duckdb 64 | 65 | #ifndef DUCKDB_EXTENSION_MAIN 66 | #error DUCKDB_EXTENSION_MAIN not defined 67 | #endif 68 | 69 | -------------------------------------------------------------------------------- /sync_files.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | git remote add pixels-origin git@github.com:pixelsdb/pixels.git 4 | 5 | git fetch pixels-origin 6 | 7 | git checkout pixels-origin/master -- cpp 8 | 9 | # 遍历当前目录下的所有文件和文件夹 10 | for item in *; do 11 | # 检查是否存在 cpp/ 对应的文件或文件夹 12 | if [ -e "cpp/$item" ]; then 13 | echo "Found matching item in cpp/: $item" 14 | 15 | # 判断是文件还是目录 16 | if [ -d "$item" ]; then 17 | # 如果是目录,递归复制 cpp/ 目录下的内容到当前目录下的对应目录 18 | echo "Syncing directory: $item" 19 | cp -r cpp/"$item"/* "$item"/ 20 | elif [ -f "$item" ]; then 21 | # 如果是文件,直接覆盖当前目录的文件 22 | echo "Syncing file: $item" 23 | cp cpp/"$item" "$item" 24 | fi 25 | else 26 | echo "No matching item in cpp/ for: $item" 27 | fi 28 | done 29 | 30 | rm -rf cpp 31 | 32 | git add . 33 | 34 | git commit -m "Auto-synced cpp directory from pixels repo" 35 | 36 | 37 | git push origin master 38 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #project(tests) 2 | # 3 | #include(FetchContent) 4 | #FetchContent_Declare( 5 | # googletest 6 | # URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip 7 | #) 8 | # 9 | #set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) 10 | #FetchContent_MakeAvailable(googletest) 11 | # 12 | #enable_testing() 13 | # 14 | # 15 | #add_executable( 16 | # unit_tests 17 | # UnitTests.cpp) 18 | # 19 | #target_link_libraries( 20 | # unit_tests 21 | # GTest::gtest_main 22 | # pixels-common 23 | # pixels-core 24 | #) 25 | # 26 | #include(GoogleTest) 27 | #include_directories(../pixels-core/include) 28 | #include_directories(../pixels-common/include) 29 | #gtest_discover_tests(unit_tests) 30 | 31 | #add_subdirectory(writer) -------------------------------------------------------------------------------- /tests/data/example.pxl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pixelsdb/mini-pixels/7fe8f02e02f0d0a8a44e1adb049a05d89c9ea489/tests/data/example.pxl -------------------------------------------------------------------------------- /tests/test_date.tbl: -------------------------------------------------------------------------------- 1 | 1997-10-03 2 | 2025-09-16 3 | 1997-02-23 4 | 2019-03-28 5 | 2002-09-27 6 | 2019-01-02 7 | 1993-03-08 8 | 2014-04-29 9 | 2006-01-02 10 | 2017-10-06 11 | 1998-09-15 12 | 2012-07-25 13 | 2012-02-16 14 | 2016-04-29 15 | 2013-06-03 16 | 2003-01-19 17 | 2025-12-15 18 | 1998-12-05 19 | 2016-06-28 20 | 1990-09-24 21 | 1995-04-21 22 | 2003-05-14 23 | 2011-07-27 24 | 2008-06-17 25 | 1997-12-21 26 | 2023-12-18 27 | 1995-05-30 28 | 1996-02-11 29 | 1996-11-17 30 | 2024-10-27 31 | 1993-12-15 32 | 2017-05-01 33 | 2012-11-15 34 | 2024-07-21 35 | 2013-04-14 36 | 2021-09-16 37 | 1990-06-29 38 | 2023-03-10 39 | 2000-09-09 40 | 1998-11-19 41 | 2005-02-07 42 | 2024-12-15 43 | 1995-10-02 44 | 2007-06-25 45 | 2000-05-17 46 | 2018-01-05 47 | 2009-09-10 48 | 1996-09-06 49 | 1994-05-29 50 | 1996-09-21 51 | 2002-11-21 52 | 2012-02-02 53 | 1998-10-12 54 | 2006-10-02 55 | 2016-09-12 56 | 2007-06-19 57 | 2007-10-11 58 | 2013-09-16 59 | 2014-08-06 60 | 2011-03-13 61 | 2001-07-30 62 | 2017-04-22 63 | 2015-09-28 64 | 1995-06-02 65 | 2022-03-30 66 | 2005-04-27 67 | 1995-10-23 68 | 2016-09-27 69 | 1990-06-04 70 | 2000-11-25 71 | 2024-03-30 72 | 2001-05-23 73 | 1999-06-14 74 | 2006-03-15 75 | 2007-09-10 76 | 2018-02-05 77 | 1992-03-05 78 | 2004-10-11 79 | 2023-10-15 80 | 1991-03-21 81 | 2003-08-13 82 | 2021-10-09 83 | 1996-03-05 84 | 1996-06-30 85 | 2019-05-31 86 | 2017-12-12 87 | 2012-09-27 88 | 2021-01-26 89 | 2016-02-01 90 | 2019-07-05 91 | 2014-12-20 92 | 2020-06-25 93 | 2021-04-01 94 | 2021-07-01 95 | 2020-08-07 96 | 2011-03-01 97 | 2003-10-31 98 | 1996-06-14 99 | 1992-10-24 100 | 2007-05-20 101 | -------------------------------------------------------------------------------- /tests/test_decimal.tbl: -------------------------------------------------------------------------------- 1 | 34476661.13 2 | 9581641.68 3 | 97486758.64 4 | 2451781.32 5 | 330310.41 6 | 28381028.71 7 | 36915791.06 8 | 78169018.65 9 | 25024491.12 10 | 64608051.20 11 | 75169058.60 12 | 32558787.76 13 | 59501512.29 14 | 51011771.52 15 | 47060971.03 16 | 24526058.90 17 | 19731170.12 18 | 86239561.90 19 | 74720663.56 20 | 82939319.01 21 | 3723555.99 22 | 76940894.67 23 | 28789248.84 24 | 98465258.11 25 | 79252483.63 26 | 10417530.25 27 | 76540037.64 28 | 43652057.99 29 | 76507214.83 30 | 8092185.58 31 | 35415115.24 32 | 52390671.38 33 | 19917.00 34 | 48031577.24 35 | 16735376.42 36 | 26228942.38 37 | 19242965.91 38 | 80437344.00 39 | 48397869.99 40 | 30626007.53 41 | 28294296.10 42 | 35001103.82 43 | 86843829.06 44 | 96426960.87 45 | 20436946.46 46 | 87412348.75 47 | 98665602.45 48 | 75141753.47 49 | 5013601.37 50 | 92224164.89 51 | 19264537.06 52 | 25799675.52 53 | 24544958.11 54 | 78084325.03 55 | 40567200.23 56 | 61077440.29 57 | 67106882.96 58 | 89803830.27 59 | 69878894.80 60 | 85721994.21 61 | 34218060.79 62 | 64235797.09 63 | 21136308.68 64 | 92026154.49 65 | 66798318.89 66 | 14738571.12 67 | 55768983.56 68 | 267680.46 69 | 46444417.36 70 | 80465964.08 71 | 53620345.61 72 | 35581623.52 73 | 34261191.23 74 | 82089656.59 75 | 91749328.77 76 | 79165424.99 77 | 58301909.98 78 | 11969536.22 79 | 10703416.63 80 | 26941251.75 81 | 8390533.63 82 | 43073238.69 83 | 54568889.84 84 | 73868611.03 85 | 55567739.59 86 | 29154096.35 87 | 4363114.09 88 | 43498913.33 89 | 79572518.01 90 | 672185.78 91 | 62373227.88 92 | 10531080.74 93 | 93271018.06 94 | 43373363.09 95 | 95753200.88 96 | 73006424.84 97 | 60839564.34 98 | 66999704.67 99 | 78824534.48 100 | 15797914.88 101 | -------------------------------------------------------------------------------- /tests/test_int.tbl: -------------------------------------------------------------------------------- 1 | 1509095563 2 | 1262399562 3 | 1654921478 4 | 185012677 5 | 2042332856 6 | 622819451 7 | 844820549 8 | 666477209 9 | 1738431465 10 | 1973293514 11 | 58512675 12 | 1119844130 13 | 1413458068 14 | 1827906381 15 | 72743998 16 | 173988152 17 | 342279404 18 | 1895102276 19 | 10963606 20 | 1041832117 21 | 876288765 22 | 960203354 23 | 1974859087 24 | 2076338166 25 | 1895060850 26 | 1653255161 27 | 2030559496 28 | 1466414515 29 | 830208275 30 | 1806587984 31 | 750306443 32 | 212755712 33 | 2125657566 34 | 1361492212 35 | 234630558 36 | 1075418082 37 | 2075660678 38 | 1899466084 39 | 166443075 40 | 590397705 41 | 1884201937 42 | 355776279 43 | 1725100354 44 | 1449857551 45 | 1440841598 46 | 1687568029 47 | 1406190062 48 | 78521465 49 | 1927496238 50 | 492426696 51 | 657717426 52 | 1518129483 53 | 2078816733 54 | 319662435 55 | 1427222806 56 | 350552840 57 | 1687937732 58 | 1628165453 59 | 1026748531 60 | 175244069 61 | 54010848 62 | 490826929 63 | 1589704698 64 | 1464121273 65 | 1553261316 66 | 2016227877 67 | 167252709 68 | 2039583432 69 | 1283455528 70 | 1615687442 71 | 50506035 72 | 825490880 73 | 1313509245 74 | 362361560 75 | 392754401 76 | 2143911432 77 | 1500615463 78 | 650399721 79 | 1896710406 80 | 1648345575 81 | 1339350366 82 | 1748531877 83 | 294699004 84 | 342926526 85 | 184717996 86 | 10026590 87 | 1051887512 88 | 2079844848 89 | 1245023437 90 | 676327388 91 | 1431132372 92 | 335202267 93 | 1184021428 94 | 2127964637 95 | 1310149285 96 | 852108539 97 | 1594212773 98 | 2046390291 99 | 1821885747 100 | 1247616210 101 | -------------------------------------------------------------------------------- /tests/test_string.tbl: -------------------------------------------------------------------------------- 1 | YyZSEQCJwWEurY 2 | 5bjHLM 3 | 3vr5GdTM 4 | BZLvy 5 | w5jXm7XuMi5l0y 6 | CS324znSSw8Nj 7 | 2lBx2 8 | PVh3psxPzZ 9 | 1xIJ5UQoyqdWb 10 | gLr3b 11 | IjjBLO 12 | 9mWM0yO 13 | mAPuJDSZHhXpVS 14 | DYM46EwgRtgjzY 15 | P46SVNRPChM 16 | i1BnfuymR2 17 | sj0YsQeU2VET 18 | ql5JMVnM9 19 | V8cPcw9 20 | sEdoXG8O00 21 | qaiHuzHB 22 | Uf1wvh0J2W 23 | 8cY5nEYy7lC5e 24 | M49YJDvyKG 25 | uxajCxRe0 26 | zsN2tPQ00B 27 | ldTvsf9W3G 28 | xfweVB8PoLu0 29 | ctfJqal2 30 | cnwcWEC2frKl2 31 | pCAcLDzgn 32 | mUAIbfyPrWZyf 33 | QUvSKQqe 34 | 5WECK 35 | LM7bdj 36 | b7frC 37 | gshtn8e9Pgg 38 | JYCUA47Adv 39 | NrmpCcOSco03eq 40 | gtOE7Dp 41 | 3YwCMQ7ML9bMMP 42 | bdxIpK 43 | RqJZ4Aof3KbNA 44 | WLiyTyA8w7 45 | fAI9vrcaU 46 | ZgC0UN9uu1p 47 | big8t 48 | rkSkcyty7 49 | dVv39sDkE14i 50 | NCfNCf 51 | guLEc 52 | 6cKCU0DW 53 | gXQPF 54 | bGphB3upn1 55 | 9sq55J 56 | OBiFA8Ic 57 | of76b93U0PF6o 58 | eVDRGGTMBR 59 | 3KmQOoYT0yl 60 | OlvXA0nBMSd 61 | YtJK0zS6kpqQN 62 | naCBzAaCcE 63 | a7M84neJI 64 | vwYQVhBx 65 | DMcTREjMN 66 | DrAcymN 67 | aMpPyBnAaJ 68 | FKoGe7ArHPc0 69 | ZouHzeRA 70 | SV2Vi 71 | Y5myQ2JgrV 72 | N1iMe 73 | 5tehwn 74 | RJFUpgDvqvOJ 75 | bzVeC6 76 | qJyhlz 77 | iiQYK 78 | VZuzgqjkNin7 79 | oPdyQnXGMlNF4 80 | ABAAHXc2 81 | gLYpp7FVW 82 | qAFDOf0 83 | 4LVm5Lts7 84 | S4TFW 85 | Izc9pnflrlmQ 86 | QLX3F3c 87 | qFk0C 88 | H2meQBf3xgtgd6 89 | ptI2zooAfXBoOm 90 | 5Kl9nCkANtE 91 | FQERsikI8y1 92 | 9wLMp21a9L 93 | YIUObKrOvIzW 94 | yqO9lACRZA1x7v 95 | 3v2HSjBkP1Lnnu 96 | 46oHuhYVI7EzPP 97 | uODOcL4AQ4 98 | 59Tes04l 99 | 1IJVTDTnEB6S 100 | aUAip 101 | -------------------------------------------------------------------------------- /tests/test_timestamp.tbl: -------------------------------------------------------------------------------- 1 | 2004-10-14 14:32:47 2 | 2001-02-10 11:07:47 3 | 2004-08-13 18:33:04 4 | 2011-01-11 22:24:51 5 | 2023-06-26 00:54:27 6 | 2014-10-26 23:03:38 7 | 2010-03-09 14:49:53 8 | 2010-09-22 14:39:34 9 | 2011-06-08 14:49:51 10 | 2004-12-29 00:43:17 11 | 2008-01-05 00:06:19 12 | 2015-12-10 21:39:07 13 | 1993-07-21 18:21:24 14 | 1994-05-09 01:20:49 15 | 2024-09-06 14:01:08 16 | 1997-01-28 20:34:20 17 | 1997-07-11 13:46:30 18 | 2013-12-15 14:44:07 19 | 2022-12-10 10:56:04 20 | 1995-05-28 07:54:45 21 | 2005-07-13 12:01:03 22 | 2013-03-06 11:57:07 23 | 2021-10-22 05:35:20 24 | 2008-11-22 21:47:36 25 | 2000-12-12 11:18:49 26 | 2012-12-19 20:03:13 27 | 2000-06-26 03:43:44 28 | 2022-09-03 05:15:55 29 | 2023-10-09 09:32:26 30 | 1996-10-30 16:46:40 31 | 1992-11-15 20:18:25 32 | 2000-10-21 13:34:15 33 | 1995-03-17 09:47:18 34 | 2002-11-23 16:37:26 35 | 2021-03-06 06:41:03 36 | 2008-09-20 08:28:09 37 | 2017-06-10 18:10:41 38 | 2022-05-05 23:15:08 39 | 2022-10-01 12:35:26 40 | 2008-03-17 00:56:13 41 | 2023-12-31 01:46:31 42 | 1991-11-22 09:08:05 43 | 2003-12-08 07:04:10 44 | 1999-06-17 06:43:15 45 | 2023-09-27 20:56:06 46 | 2008-09-05 04:03:02 47 | 1993-05-16 02:28:15 48 | 2014-04-30 03:00:35 49 | 2000-09-24 15:55:06 50 | 2011-09-10 14:33:04 51 | 2005-02-20 14:10:28 52 | 1998-06-07 17:34:03 53 | 1992-11-13 23:39:39 54 | 2025-02-02 22:18:59 55 | 2017-07-21 19:49:14 56 | 2012-01-26 14:34:53 57 | 1997-10-31 20:51:31 58 | 2009-02-12 17:02:03 59 | 2008-12-21 23:15:25 60 | 2002-06-06 08:41:09 61 | 2018-07-27 07:19:28 62 | 1991-12-03 14:35:03 63 | 1998-11-29 20:51:59 64 | 2018-09-26 09:14:59 65 | 2001-11-22 07:20:10 66 | 2014-02-01 18:09:48 67 | 2021-07-20 12:38:25 68 | 2004-06-30 19:40:47 69 | 2010-03-23 07:57:05 70 | 2018-01-13 00:14:22 71 | 2002-11-22 10:22:47 72 | 1994-08-09 04:12:43 73 | 2016-01-20 02:33:20 74 | 2021-05-19 06:33:27 75 | 2020-04-30 02:26:30 76 | 2006-08-10 17:02:58 77 | 1999-05-23 03:44:25 78 | 1999-05-03 11:10:52 79 | 2010-04-25 18:30:46 80 | 2024-05-07 12:37:35 81 | 2019-03-22 17:46:36 82 | 1993-05-13 10:18:25 83 | 2008-08-04 12:40:46 84 | 1992-07-13 20:59:40 85 | 1992-05-02 22:40:14 86 | 2021-08-07 23:51:37 87 | 2001-03-20 16:26:46 88 | 1993-07-14 13:47:39 89 | 2025-01-29 17:39:17 90 | 2018-09-30 18:55:54 91 | 2020-02-12 10:01:57 92 | 1990-02-25 05:49:25 93 | 2012-03-18 22:04:54 94 | 2005-01-24 23:15:58 95 | 2021-01-07 18:41:50 96 | 2005-02-24 15:16:01 97 | 2003-04-29 16:52:19 98 | 2019-12-26 09:54:39 99 | 2009-03-26 01:19:16 100 | 2005-10-01 23:58:19 101 | -------------------------------------------------------------------------------- /tests/writer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Use FetchContent to download and integrate GoogleTest 2 | include(FetchContent) 3 | FetchContent_Declare( 4 | googletest 5 | URL https://github.com/google/googletest/archive/b514bdc898e2951020cbdca1304b75f5950d1f59.zip 6 | ) 7 | 8 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) # Force Google Test to use shared CRT 9 | FetchContent_MakeAvailable(googletest) # Make Google Test available 10 | 11 | # Enable testing for the project 12 | enable_testing() 13 | 14 | # Create executable targets for the tests 15 | add_executable(IntegerWriterTest IntegerWriterTest.cpp) 16 | add_executable(PixelsWriterTest PixelsWriterTest.cpp) 17 | 18 | # Set compiler options for Debug build 19 | if (CMAKE_BUILD_TYPE MATCHES "Debug") 20 | target_compile_options(IntegerWriterTest PRIVATE -fsanitize=undefined -fsanitize=address) 21 | target_compile_options(PixelsWriterTest PRIVATE -fsanitize=undefined -fsanitize=address) 22 | 23 | target_link_options(IntegerWriterTest BEFORE PUBLIC -fsanitize=undefined PUBLIC -fsanitize=address) 24 | target_link_options(PixelsWriterTest BEFORE PUBLIC -fsanitize=undefined PUBLIC -fsanitize=address) 25 | endif() 26 | 27 | # Link Google Test and other necessary libraries to the test executables 28 | target_link_libraries(IntegerWriterTest 29 | GTest::gtest_main 30 | pixels-common 31 | pixels-core 32 | duckdb 33 | ) 34 | 35 | target_link_libraries(PixelsWriterTest 36 | GTest::gtest_main 37 | pixels-common 38 | pixels-core 39 | duckdb 40 | ) 41 | 42 | set(GTEST_DIR "${PROJECT_SOURCE_DIR}/third-party/googletest") 43 | include_directories(${GTEST_DIR}/googletest/include) 44 | include_directories(${PROJECT_SOURCE_DIR}/pixels-core/include) 45 | include_directories(${PROJECT_SOURCE_DIR}/pixels-common/include) 46 | include_directories(${CMAKE_CURRENT_BINARY_DIR}/../../pixels-common/liburing/src/include) 47 | 48 | # Enable GoogleTest in the project 49 | include(GoogleTest) --------------------------------------------------------------------------------