├── include ├── queue_store_config.h └── queue_store.h ├── src ├── utils │ ├── utils.h │ ├── utils.cpp │ ├── log.h │ ├── serialization.h │ ├── log.c │ └── serialization.cpp ├── io │ ├── ConcurrentSet.hpp │ ├── blockingQueue.hpp │ ├── Barrier.hpp │ └── asyncfileio.hpp ├── dependencies │ └── fast_base64 │ │ ├── fastavxbase64.h │ │ ├── klompavxbase64.h │ │ ├── chromiumbase64.h │ │ ├── fastavxbase64.c │ │ ├── chromiumbase64.c │ │ └── klompavxbase64.c ├── benchmarker.cpp └── queue_store.cpp ├── .gitignore ├── cmake ├── FindSnappy.cmake ├── FindLZ4.cmake ├── FindTcmalloc.cmake ├── GetGitRevisionDescription.cmake.in ├── FindLibuv.cmake ├── GetGitRevisionDescription.cmake ├── FindSSE.cmake └── FindTBB.cmake ├── README.md ├── LICENSE └── CMakeLists.txt /include/queue_store_config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define IO_THREAD (4) 4 | #define SEND_THREAD_NUM (10) 5 | #define CHECK_THREAD_NUM (10) 6 | #define TOTAL_QUEUE_NUM (1000000) 7 | #define DATA_FILE_PATH "/alidata1/race2018/data/data" 8 | -------------------------------------------------------------------------------- /src/utils/utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #ifdef __cplusplus 5 | 6 | #include 7 | #include 8 | 9 | extern "C" { 10 | #else 11 | #include 12 | #include 13 | #endif 14 | 15 | int64_t getCurrentTimeInMS(); 16 | 17 | char *read_binary_file(char *filename, size_t *size); 18 | 19 | void save_to_binary_file(char *filename, char *buf, int size); 20 | 21 | #ifdef __cplusplus 22 | } 23 | #endif -------------------------------------------------------------------------------- /src/io/ConcurrentSet.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | template> 9 | class ConcurrentSet { 10 | private: 11 | std::set set_; 12 | std::mutex mutex_; 13 | 14 | public: 15 | typedef typename std::set::iterator iterator; 16 | 17 | std::pair 18 | insert(const T &val) { 19 | std::unique_lock lock(this->mutex_); 20 | return set_.insert(val); 21 | } 22 | 23 | size_t size() { 24 | std::unique_lock lock(this->mutex_); 25 | return set_.size(); 26 | } 27 | 28 | }; -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # jetbrains 2 | .idea/ 3 | cmake-build-* 4 | build/ 5 | 6 | # Prerequisites 7 | *.d 8 | 9 | # Object files 10 | *.o 11 | *.ko 12 | *.obj 13 | *.elf 14 | 15 | # Linker output 16 | *.ilk 17 | *.map 18 | *.exp 19 | 20 | # Precompiled Headers 21 | *.gch 22 | *.pch 23 | 24 | # Libraries 25 | *.lib 26 | *.a 27 | *.la 28 | *.lo 29 | 30 | # Shared objects (inc. Windows DLLs) 31 | *.dll 32 | *.so 33 | *.so.* 34 | *.dylib 35 | 36 | # Executables 37 | *.exe 38 | *.out 39 | *.app 40 | *.i*86 41 | *.x86_64 42 | *.hex 43 | 44 | # Debug files 45 | *.dSYM/ 46 | *.su 47 | *.idb 48 | *.pdb 49 | 50 | # Kernel Module Compile Results 51 | *.mod* 52 | *.cmd 53 | .tmp_versions/ 54 | modules.order 55 | Module.symvers 56 | Mkfile.old 57 | dkms.conf 58 | -------------------------------------------------------------------------------- /src/io/blockingQueue.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | template 9 | class BlockingQueue { 10 | private: 11 | std::mutex d_mutex; 12 | std::condition_variable d_condition; 13 | std::deque d_queue; 14 | public: 15 | void put(T const &value) { 16 | { 17 | std::unique_lock lock(this->d_mutex); 18 | d_queue.push_front(value); 19 | } 20 | this->d_condition.notify_one(); 21 | } 22 | 23 | T take() { 24 | std::unique_lock lock(this->d_mutex); 25 | this->d_condition.wait(lock, [=] { return !this->d_queue.empty(); }); 26 | T rc(std::move(this->d_queue.back())); 27 | this->d_queue.pop_back(); 28 | return rc; 29 | } 30 | }; -------------------------------------------------------------------------------- /cmake/FindSnappy.cmake: -------------------------------------------------------------------------------- 1 | # Find the Snappy libraries 2 | # 3 | # The following variables are optionally searched for defaults 4 | # Snappy_ROOT_DIR: Base directory where all Snappy components are found 5 | # 6 | # The following are set after configuration is done: 7 | # SNAPPY_FOUND 8 | # Snappy_INCLUDE_DIR 9 | # Snappy_LIBRARIES 10 | 11 | find_path(Snappy_INCLUDE_DIR NAMES snappy.h 12 | PATHS ${SNAPPY_ROOT_DIR} ${SNAPPY_ROOT_DIR}/include) 13 | 14 | find_library(Snappy_LIBRARIES NAMES snappy 15 | PATHS ${SNAPPY_ROOT_DIR} ${SNAPPY_ROOT_DIR}/lib) 16 | 17 | include(FindPackageHandleStandardArgs) 18 | find_package_handle_standard_args(Snappy DEFAULT_MSG Snappy_INCLUDE_DIR Snappy_LIBRARIES) 19 | 20 | if(SNAPPY_FOUND) 21 | message(STATUS "Found Snappy (include: ${Snappy_INCLUDE_DIR}, library: ${Snappy_LIBRARIES})") 22 | mark_as_advanced(Snappy_INCLUDE_DIR Snappy_LIBRARIES) 23 | 24 | set(Snappy_VERSION "${SNAPPY_MAJOR}.${SNAPPY_MINOR}.${SNAPPY_PATCHLEVEL}") 25 | endif() -------------------------------------------------------------------------------- /src/io/Barrier.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by will on 7/6/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | class Barrier { 14 | public: 15 | explicit Barrier(std::size_t iCount) : 16 | mThreshold(iCount), 17 | mCount(iCount), 18 | mGeneration(0) { 19 | } 20 | 21 | void Wait(std::function func) { 22 | std::unique_lock lLock{mMutex}; 23 | auto lGen = mGeneration; 24 | if (!--mCount) { 25 | mGeneration++; 26 | mCount = mThreshold; 27 | func(); 28 | mCond.notify_all(); 29 | } else { 30 | mCond.wait(lLock, [this, lGen] { return lGen != mGeneration; }); 31 | } 32 | } 33 | 34 | private: 35 | std::mutex mMutex; 36 | std::condition_variable mCond; 37 | std::size_t mThreshold; 38 | std::size_t mCount; 39 | std::size_t mGeneration; 40 | }; -------------------------------------------------------------------------------- /src/dependencies/fast_base64/fastavxbase64.h: -------------------------------------------------------------------------------- 1 | #ifndef EXPAVX_B64 2 | #define EXPAVX_B64 3 | 4 | /** 5 | * Assumes recent x64 hardware with AVX2 instructions. 6 | */ 7 | 8 | #include 9 | #include 10 | #include "chromiumbase64.h" 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif /* __cplusplus */ 15 | 16 | /** 17 | * This code extends Nick Galbreath's high performance base 64decoder (used in Chromium), the API is the 18 | * same effectively, see chromium64.h. 19 | */ 20 | 21 | /* 22 | * AVX2 accelerated version of Galbreath's chromium_base64_decode function 23 | * Usage remains the same, see chromium.h. 24 | */ 25 | size_t fast_avx2_base64_decode(char *out, const char *src, size_t srclen); 26 | 27 | /* 28 | * AVX2 accelerated version of Galbreath's chromium_base64_encode function 29 | * Usage remains the same, see chromium.h. 30 | */ 31 | size_t fast_avx2_base64_encode(char* dest, const char* str, size_t len); 32 | 33 | #ifdef __cplusplus 34 | } 35 | #endif /* __cplusplus */ 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Disclaimer 2 | 此项目仅供学习与交流使用,请遵循MIT协议,如果您在任何项目中使用相关代码,请保留此项目的LICENSE文件。 3 | 4 | 5 | # message-queue 6 | 7 | 1-million message-queue in c++ (使用intrinsics编写向量化代码) --- Created By Blink`团队 ([Pisces](https://github.com/WANG-lp) and [yche](https://github.com/CheYulin))。 8 | 9 | 10 | ## 本地构建代码 11 | 12 | ```zsh 13 | mkdir -p build && cd build 14 | cmake .. -Dtest=ON -DenableLOG=ON -DenableWall=ON -DCMAKE_INSTALL_PREFIX=/alidata1/race2018/work 15 | make -j && make install 16 | ``` 17 | 18 | ## 本仓库的文件 19 | 20 | 文件夹/文件 | 说明 21 | --- | --- 22 | [include](include) | 动态链接库export出来时候的头文件 23 | [src](src) | 源代码文件 24 | [cmake](cmake), [CMakeLists.txt](CMakeLists.txt) | 项目构建文件 25 | 26 | ## 最高跑分统计数据 27 | 28 | ``` 29 | [2018-07-11 23:49:55,890] Send: 478408 ms Num: 2000000009 30 | [2018-07-11 23:49:55,890] Put message per second: 4180532.12 31 | [2018-07-11 23:51:55,199] Index Check: 107483 ms Num:1000010 32 | [2018-07-11 23:55:46,916] phase3 start 33 | [2018-07-11 23:55:46,916] Check: 231732 ms Num: 200000000 34 | [2018-07-11 23:55:46,916] Tps: 2691946.44 35 | ``` -------------------------------------------------------------------------------- /src/utils/utils.cpp: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | int64_t getCurrentTimeInMS() { 8 | return std::chrono::duration_cast( 9 | std::chrono::system_clock::now().time_since_epoch()).count(); 10 | } 11 | 12 | void save_to_binary_file(char *filename, char *buf, int size) { 13 | assert(size > 0); 14 | printf("save to binary size %d\n", size); 15 | FILE *ptr = fopen(filename, "wb"); 16 | fwrite(buf, sizeof(char), size, ptr); 17 | fflush(ptr); 18 | fclose(ptr); 19 | } 20 | 21 | char *read_binary_file(char *filename, size_t *size) { 22 | FILE *ptr = fopen(filename, "rb"); 23 | fseek(ptr, 0, SEEK_END); // seek to end of file 24 | *size = ftell(ptr); // get current file pointer 25 | fseek(ptr, 0, SEEK_SET); // seek back to beginning of file 26 | char *buf = (char *) malloc(sizeof(char) * (*size)); 27 | size_t ret = fread(buf, sizeof(char), *size, ptr); 28 | printf("read binary size %ld\n", ret); 29 | return buf; 30 | } 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Lipeng WANG and Yulin CHE 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /cmake/FindLZ4.cmake: -------------------------------------------------------------------------------- 1 | # Finds liblz4. 2 | # 3 | # This module defines: 4 | # LZ4_FOUND 5 | # LZ4_INCLUDE_DIR 6 | # LZ4_LIBRARY 7 | # 8 | 9 | find_path(LZ4_INCLUDE_DIR NAMES lz4.h) 10 | find_library(LZ4_LIBRARY NAMES lz4) 11 | 12 | # We require LZ4_compress_default() which was added in v1.7.0 13 | if (LZ4_LIBRARY) 14 | include(CheckCSourceRuns) 15 | set(CMAKE_REQUIRED_INCLUDES ${LZ4_INCLUDE_DIR}) 16 | set(CMAKE_REQUIRED_LIBRARIES ${LZ4_LIBRARY}) 17 | check_c_source_runs(" 18 | #include 19 | int main() { 20 | int good = (LZ4_VERSION_MAJOR > 1) || 21 | ((LZ4_VERSION_MAJOR == 1) && (LZ4_VERSION_MINOR >= 7)); 22 | return !good; 23 | }" LZ4_GOOD_VERSION) 24 | set(CMAKE_REQUIRED_INCLUDES) 25 | set(CMAKE_REQUIRED_LIBRARIES) 26 | endif() 27 | 28 | include(FindPackageHandleStandardArgs) 29 | FIND_PACKAGE_HANDLE_STANDARD_ARGS( 30 | LZ4 DEFAULT_MSG 31 | LZ4_LIBRARY LZ4_INCLUDE_DIR LZ4_GOOD_VERSION) 32 | 33 | if (NOT LZ4_FOUND) 34 | message(STATUS "Using third-party bundled LZ4") 35 | else() 36 | message(STATUS "Found LZ4: ${LZ4_LIBRARY}") 37 | endif (NOT LZ4_FOUND) 38 | 39 | mark_as_advanced(LZ4_INCLUDE_DIR LZ4_LIBRARY) -------------------------------------------------------------------------------- /cmake/FindTcmalloc.cmake: -------------------------------------------------------------------------------- 1 | # - Find Tcmalloc 2 | # Find the native Tcmalloc includes and library 3 | # 4 | # Tcmalloc_INCLUDE_DIR - where to find Tcmalloc.h, etc. 5 | # Tcmalloc_LIBRARIES - List of libraries when using Tcmalloc. 6 | # Tcmalloc_FOUND - True if Tcmalloc found. 7 | 8 | find_path(Tcmalloc_INCLUDE_DIR google/tcmalloc.h NO_DEFAULT_PATH PATHS 9 | ${HT_DEPENDENCY_INCLUDE_DIR} 10 | /usr/include 11 | /opt/local/include 12 | /usr/local/include 13 | ) 14 | 15 | if (USE_TCMALLOC) 16 | set(Tcmalloc_NAMES tcmalloc) 17 | else () 18 | set(Tcmalloc_NAMES tcmalloc_minimal tcmalloc) 19 | endif () 20 | 21 | find_library(Tcmalloc_LIBRARY NO_DEFAULT_PATH 22 | NAMES ${Tcmalloc_NAMES} 23 | PATHS ${HT_DEPENDENCY_LIB_DIR} /lib /usr/lib /usr/local/lib /opt/local/lib 24 | ) 25 | 26 | if (Tcmalloc_INCLUDE_DIR AND Tcmalloc_LIBRARY) 27 | set(Tcmalloc_FOUND TRUE) 28 | set( Tcmalloc_LIBRARIES ${Tcmalloc_LIBRARY} ) 29 | else () 30 | set(Tcmalloc_FOUND FALSE) 31 | set( Tcmalloc_LIBRARIES ) 32 | endif () 33 | 34 | if (Tcmalloc_FOUND) 35 | message(STATUS "Found Tcmalloc: ${Tcmalloc_LIBRARY}") 36 | else () 37 | message(STATUS "Not Found Tcmalloc: ${Tcmalloc_LIBRARY}") 38 | if (Tcmalloc_FIND_REQUIRED) 39 | message(STATUS "Looked for Tcmalloc libraries named ${Tcmalloc_NAMES}.") 40 | message(FATAL_ERROR "Could NOT find Tcmalloc library") 41 | endif () 42 | endif () 43 | 44 | mark_as_advanced( 45 | Tcmalloc_LIBRARY 46 | Tcmalloc_INCLUDE_DIR 47 | ) 48 | -------------------------------------------------------------------------------- /cmake/GetGitRevisionDescription.cmake.in: -------------------------------------------------------------------------------- 1 | # 2 | # Internal file for GetGitRevisionDescription.cmake 3 | # 4 | # Requires CMake 2.6 or newer (uses the 'function' command) 5 | # 6 | # Original Author: 7 | # 2009-2010 Ryan Pavlik 8 | # http://academic.cleardefinition.com 9 | # Iowa State University HCI Graduate Program/VRAC 10 | # 11 | # Copyright Iowa State University 2009-2010. 12 | # Distributed under the Boost Software License, Version 1.0. 13 | # (See accompanying file LICENSE_1_0.txt or copy at 14 | # http://www.boost.org/LICENSE_1_0.txt) 15 | 16 | set(HEAD_HASH) 17 | 18 | file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024) 19 | 20 | string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS) 21 | if(HEAD_CONTENTS MATCHES "ref") 22 | # named branch 23 | string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}") 24 | if(EXISTS "@GIT_DIR@/${HEAD_REF}") 25 | configure_file("@GIT_DIR@/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY) 26 | else() 27 | configure_file("@GIT_DIR@/packed-refs" "@GIT_DATA@/packed-refs" COPYONLY) 28 | file(READ "@GIT_DATA@/packed-refs" PACKED_REFS) 29 | if(${PACKED_REFS} MATCHES "([0-9a-z]*) ${HEAD_REF}") 30 | set(HEAD_HASH "${CMAKE_MATCH_1}") 31 | endif() 32 | endif() 33 | else() 34 | # detached HEAD 35 | configure_file("@GIT_DIR@/HEAD" "@GIT_DATA@/head-ref" COPYONLY) 36 | endif() 37 | 38 | if(NOT HEAD_HASH) 39 | file(READ "@GIT_DATA@/head-ref" HEAD_HASH LIMIT 1024) 40 | string(STRIP "${HEAD_HASH}" HEAD_HASH) 41 | endif() 42 | -------------------------------------------------------------------------------- /src/utils/log.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2017 rxi 3 | * 4 | * This library is free software; you can redistribute it and/or modify it 5 | * under the terms of the MIT license. See `log.c` for details. 6 | */ 7 | 8 | #ifndef LOG_H 9 | #define LOG_H 10 | 11 | #ifdef USE_LOG 12 | 13 | #include 14 | #include 15 | 16 | #define LOG_VERSION "0.1.0" 17 | 18 | typedef void (*log_LockFn)(void *udata, int lock); 19 | 20 | enum { 21 | LOG_TRACE, LOG_DEBUG, LOG_INFO, LOG_WARN, LOG_ERROR, LOG_FATAL 22 | }; 23 | 24 | #define log_trace(...) log_log(LOG_TRACE, __FILE__, __LINE__, __VA_ARGS__) 25 | #define log_debug(...) log_log(LOG_DEBUG, __FILE__, __LINE__, __VA_ARGS__) 26 | #define log_info(...) log_log(LOG_INFO, __FILE__, __LINE__, __VA_ARGS__) 27 | #define log_warn(...) log_log(LOG_WARN, __FILE__, __LINE__, __VA_ARGS__) 28 | #define log_error(...) log_log(LOG_ERROR, __FILE__, __LINE__, __VA_ARGS__) 29 | #define log_fatal(...) log_log(LOG_FATAL, __FILE__, __LINE__, __VA_ARGS__) 30 | 31 | #ifdef __cplusplus 32 | extern "C" { 33 | #endif 34 | 35 | void log_set_udata(void *udata); 36 | 37 | void log_set_lock(log_LockFn fn); 38 | 39 | void log_set_fp(FILE *fp); 40 | 41 | void log_set_level(int level); 42 | 43 | void log_set_quiet(int enable); 44 | 45 | void log_log(int level, const char *file, int line, const char *fmt, ...); 46 | 47 | #ifdef __cplusplus 48 | } 49 | #endif 50 | #else //use log 51 | #define log_trace(...) 52 | #define log_debug(...) 53 | #define log_info(...) 54 | #define log_warn(...) 55 | #define log_error(...) 56 | #define log_fatal(...) 57 | #endif //use log 58 | #endif 59 | -------------------------------------------------------------------------------- /cmake/FindLibuv.cmake: -------------------------------------------------------------------------------- 1 | #============================================================================= 2 | # Copyright 2016 The Luvit Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | #============================================================================= 16 | # Locate libuv library 17 | # This module defines 18 | # LIBUV_FOUND, if false, do not try to link to libuv 19 | # LIBUV_LIBRARIES 20 | # LIBUV_INCLUDE_DIR, where to find uv.h 21 | 22 | FIND_PATH(LIBUV_INCLUDE_DIR NAMES uv.h 23 | HINTS ${CMAKE_CURRENT_SOURCE_DIR}/3rd-deps/libuv/include /usr/local/libuv/include /usr/include) 24 | FIND_LIBRARY(LIBUV_LIBRARIES NAMES uv libuv HINTS ${CMAKE_CURRENT_SOURCE_DIR}/3rd-deps/libuv/lib /usr/local/libuv/lib /usr/lib) 25 | 26 | if (WIN32) 27 | list(APPEND LIBUV_LIBRARIES iphlpapi) 28 | list(APPEND LIBUV_LIBRARIES psapi) 29 | list(APPEND LIBUV_LIBRARIES userenv) 30 | list(APPEND LIBUV_LIBRARIES ws2_32) 31 | endif () 32 | 33 | INCLUDE(FindPackageHandleStandardArgs) 34 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LIBUV DEFAULT_MSG LIBUV_LIBRARIES LIBUV_INCLUDE_DIR) 35 | -------------------------------------------------------------------------------- /src/utils/serialization.h: -------------------------------------------------------------------------------- 1 | #ifndef QUEUERACE_SERIALIZATION_H 2 | #define QUEUERACE_SERIALIZATION_H 3 | 4 | #define BASE64_INFO_LEN (2u) 5 | #define INDEX_LEN (4u) 6 | #define VARYING_VERIFY_LEN (4u) 7 | #define FIXED_PART_LEN (10u) 8 | 9 | #define MAX_FIVE_BITS_INT ((uint8_t) 0x1f) // 31 10 | 11 | int serialize(uint8_t *message, uint16_t len, uint8_t *serialized); 12 | 13 | uint8_t *deserialize(const uint8_t *serialized, int &len); 14 | 15 | int serialize_base64_decoding(uint8_t *message, uint16_t len, uint8_t *serialized); 16 | 17 | uint8_t *deserialize_base64_encoding(const uint8_t *serialized, uint16_t total_serialized_len, int &len); 18 | 19 | // ================= skip index =================================================================== 20 | int serialize_base64_decoding_skip_index(uint8_t *message, uint16_t len, uint8_t *serialized); 21 | 22 | // attention: new[] inside 23 | uint8_t *deserialize_base64_encoding_add_index(const uint8_t *serialized, uint16_t total_serialized_len, 24 | int &deserialized_len, int32_t idx); 25 | 26 | // not able to use in the current testing environment 27 | void deserialize_base64_encoding_add_index_in_place(const uint8_t *serialized, uint16_t total_serialized_len, 28 | uint8_t *deserialized, int &deserialized_len, int32_t idx); 29 | 30 | // ======================================================= end of base64 31 | 32 | // =========================== start of base36 ============================================ 33 | 34 | int serialize_base36_decoding_skip_index(uint8_t *message, uint16_t len, uint8_t *serialized); 35 | 36 | uint8_t *deserialize_base36_encoding_add_index(const uint8_t *serialized, uint16_t total_serialized_len, 37 | int &deserialized_len, int32_t idx); 38 | 39 | // ============================ end of base36 ============================================= 40 | 41 | #endif //QUEUERACE_SERIALIZATION_H 42 | -------------------------------------------------------------------------------- /include/queue_store.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "queue_store_config.h" 11 | 12 | class asyncfileio_t; 13 | class Barrier; 14 | 15 | using std::string; 16 | using std::vector; 17 | using std::unordered_map; 18 | using std::mutex; 19 | 20 | void print_version(); 21 | 22 | namespace race2018 { 23 | 24 | struct MemBlock { 25 | /** 26 | * Pointer to the data 27 | */ 28 | void *ptr; 29 | 30 | /** 31 | * Length of the data in bytes 32 | */ 33 | size_t size; 34 | 35 | MemBlock(void* ptr, size_t size): ptr(ptr), size(size){ 36 | } 37 | }; 38 | 39 | class queue_store { 40 | public: 41 | 42 | /** 43 | * Default constructor is REQUIRED and will be used to initialize your implementation. You may modify it but 44 | * please make sure it exists. 45 | */ 46 | queue_store(); 47 | 48 | /** 49 | * Note: Competitors need to implement this function and it will be called concurrently. 50 | * 51 | * 把一条消息写入一个队列; 52 | * 这个接口需要是线程安全的,也即评测程序会并发调用该接口进行put; 53 | * 每个queue中的内容,按发送顺序存储消息(可以理解为Java中的List),同时每个消息会有一个索引,索引从0开始; 54 | * 不同queue中的内容,相互独立,互不影响; 55 | * @param queue_name 代表queue名字,如果是第一次put,则自动生产一个queue 56 | * @param message message,代表消息的内容,评测时内容会随机产生,大部分长度在64字节左右,会有少量消息在1k左右 57 | */ 58 | void put(const std::string &queue_name, const MemBlock &message); 59 | 60 | /** 61 | * Note: Competitors need to implement this function and it will be called concurrently. 62 | * 63 | * 从一个队列中读出一批消息,读出的消息要按照发送顺序来; 64 | * 这个接口需要是线程安全的,也即评测程序会并发调用该接口进行get; 65 | * 返回的vector会被并发读,但不涉及写,因此只需要是线程读安全就可以了; 66 | * @param queue_name 代表队列的名字 67 | * @param offset 代表消息的在这个队列中的起始消息索引 68 | * @param num 代表读取的消息的条数,如果消息足够,则返回num条,否则只返回已有的消息即可;没有消息了,则返回一个空的集合 69 | */ 70 | std::vector get(const std::string &queue_name, long offset, long number); 71 | 72 | std::vector doPhase2(int tid, const std::string &queue_name, long offset, long number); 73 | 74 | std::vector doPhase3(int tid, const std::string &queue_name, long offset, long number); 75 | private: 76 | asyncfileio_t *asyncfileio; 77 | Barrier *barrier1; 78 | }; 79 | } -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | project(queue_race) 3 | 4 | set(CMAKE_CXX_STANDARD 11) 5 | set(CMAKE_C_STANDARD 11) 6 | set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH}) # To allow CMake to locate our Find*.cmake files 7 | set(CMAKE_BUILD_TYPE "Release") 8 | # Options. Turn on with 'cmake -Dmyvarname=ON'. 9 | option(test "Build all tests." OFF) # Makes boolean 'test' available. 10 | option(enableLOG "enable loging" OFF) 11 | set_property(GLOBAL PROPERTY RULE_MESSAGES OFF) 12 | option(enableWall "enable -Wall" OFF) 13 | 14 | add_compile_options(-O3) 15 | 16 | if(enableWall) 17 | add_compile_options(-Wall -g) 18 | endif() 19 | 20 | if (enableLOG) 21 | add_definitions(-DUSE_LOG) 22 | endif () 23 | include(GetGitRevisionDescription) 24 | get_git_head_revision(GIT_REFSPEC GIT_SHA1) 25 | add_definitions("-DGIT_SHA1=\"${GIT_SHA1}\"") 26 | 27 | include(FindSSE) 28 | if (AVX2_FOUND) 29 | #message("found AVX2") 30 | add_compile_options(-mavx2) 31 | elseif (AVX_FOUND) 32 | message("found AVX") 33 | add_compile_options(-mavx) 34 | elseif (SSE4_1_FOUND) 35 | message("found sse4.1") 36 | add_compile_options(-msse4.1) 37 | elseif (SSE2_FOUND) 38 | message("found sse2") 39 | add_compile_options(-msse2) 40 | endif () 41 | 42 | include(CheckCXXCompilerFlag) 43 | CHECK_CXX_COMPILER_FLAG("-mpopcnt" COMPILER_OPT_POPCNT_SUPPORTED) 44 | if (COMPILER_OPT_POPCNT_SUPPORTED) 45 | add_compile_options(-mpopcnt) 46 | endif () 47 | 48 | find_package(Threads) 49 | find_package(Tcmalloc) 50 | if (Tcmalloc_FOUND) 51 | include_directories(${Tcmalloc_INCLUDE_DIR}) 52 | set(TCMALLOC_LIBS ${Tcmalloc_LIBRARIES}) 53 | endif() 54 | 55 | message("-- Found Snappy: TRUE") 56 | 57 | #find_package(TBB REQUIRED) 58 | #include_directories(${TBB_INCLUDE_DIRS}) 59 | 60 | if (AVX2_FOUND) 61 | set(BASE64_AVX2_FILES_LEMIRE src/dependencies/fast_base64/fastavxbase64.c src/dependencies/fast_base64/fastavxbase64.h) 62 | endif () 63 | 64 | set(CPP_SRC 65 | include/queue_store.h 66 | src/utils/utils.cpp src/utils/utils.h 67 | src/utils/log.c src/utils/log.h 68 | src/io/asyncfileio.hpp src/io/blockingQueue.hpp 69 | src/utils/serialization.h src/utils/serialization.cpp 70 | src/dependencies/fast_base64/chromiumbase64.c ${BASE64_AVX2_FILES_LEMIRE} include/queue_store_config.h src/io/Barrier.hpp src/io/ConcurrentSet.hpp) 71 | 72 | add_library(queue_race SHARED include/queue_store.h src/queue_store.cpp ${CPP_SRC}) 73 | target_link_libraries(queue_race ${CMAKE_THREAD_LIBS_INIT} ${TCMALLOC_LIBS}) 74 | 75 | install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/ 76 | DESTINATION include) 77 | install(TARGETS queue_race 78 | LIBRARY DESTINATION lib) 79 | -------------------------------------------------------------------------------- /src/dependencies/fast_base64/klompavxbase64.h: -------------------------------------------------------------------------------- 1 | #ifndef AVX_B64 2 | #define AVX_B64 3 | 4 | /** 5 | * Assumes recent x64 hardware with AVX2 instructions. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif /* __cplusplus */ 14 | 15 | /** 16 | * This code is based on Alfred Klomp's https://github.com/aklomp/base64 (published under BSD) 17 | * with minor modifications by D. Lemire. 18 | **/ 19 | /* 20 | Copyright (c) 2013-2015, Alfred Klomp 21 | All rights reserved. 22 | 23 | Redistribution and use in source and binary forms, with or without 24 | modification, are permitted provided that the following conditions are 25 | met: 26 | 27 | - Redistributions of source code must retain the above copyright notice, 28 | this list of conditions and the following disclaimer. 29 | 30 | - Redistributions in binary form must reproduce the above copyright 31 | notice, this list of conditions and the following disclaimer in the 32 | documentation and/or other materials provided with the distribution. 33 | 34 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 35 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 36 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 37 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 40 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 41 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 42 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 43 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 44 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 | */ 46 | 47 | 48 | 49 | /* Wrapper function to decode a plain string of given length. Output is written 50 | * to *out without trailing zero. Output length in bytes is written to *outlen. 51 | * The buffer in `out` has been allocated by the caller and is at least 3/4 the 52 | * size of the input. */ 53 | int klomp_avx2_base64_decode 54 | ( const char *src 55 | , size_t srclen 56 | , char *out 57 | , size_t *outlen 58 | ) ; 59 | 60 | 61 | /* Wrapper function to encode a plain string of given length. Output is written 62 | * to *out without trailing zero. Output length in bytes is written to *outlen. 63 | * The buffer in `out` has been allocated by the caller and is at least 4/3 the 64 | * size of the input. */ 65 | void klomp_avx2_base64_encode 66 | ( const char *src 67 | , size_t srclen 68 | , char *out 69 | , size_t *outlen 70 | ) ; 71 | 72 | #ifdef __cplusplus 73 | } 74 | #endif /* __cplusplus */ 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /src/utils/log.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 rxi 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to 6 | * deal in the Software without restriction, including without limitation the 7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | * sell copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | * IN THE SOFTWARE. 21 | */ 22 | #ifdef USE_LOG 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include "log.h" 30 | 31 | static struct { 32 | void *udata; 33 | log_LockFn lock; 34 | FILE *fp; 35 | int level; 36 | int quiet; 37 | } L; 38 | 39 | 40 | static const char *level_names[] = { 41 | "TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL" 42 | }; 43 | #ifndef LOG_USE_COLOR 44 | #define LOG_USE_COLOR 45 | #endif 46 | #ifdef LOG_USE_COLOR 47 | static const char *level_colors[] = { 48 | "\x1b[94m", "\x1b[36m", "\x1b[32m", "\x1b[33m", "\x1b[31m", "\x1b[35m" 49 | }; 50 | #endif 51 | 52 | 53 | static void lock(void) { 54 | if (L.lock) { 55 | L.lock(L.udata, 1); 56 | } 57 | } 58 | 59 | 60 | static void unlock(void) { 61 | if (L.lock) { 62 | L.lock(L.udata, 0); 63 | } 64 | } 65 | 66 | 67 | void log_set_udata(void *udata) { 68 | L.udata = udata; 69 | } 70 | 71 | 72 | void log_set_lock(log_LockFn fn) { 73 | L.lock = fn; 74 | } 75 | 76 | 77 | void log_set_fp(FILE *fp) { 78 | L.fp = fp; 79 | } 80 | 81 | 82 | void log_set_level(int level) { 83 | L.level = level; 84 | } 85 | 86 | 87 | void log_set_quiet(int enable) { 88 | L.quiet = enable ? 1 : 0; 89 | } 90 | 91 | 92 | void log_log(int level, const char *file, int line, const char *fmt, ...) { 93 | if (level < L.level) { 94 | return; 95 | } 96 | 97 | /* Acquire lock */ 98 | lock(); 99 | 100 | /* Get current time */ 101 | time_t t = time(NULL); 102 | struct tm *lt = localtime(&t); 103 | 104 | /* Log to stdout */ 105 | if (!L.quiet) { 106 | va_list args; 107 | char buf[16]; 108 | buf[strftime(buf, sizeof(buf), "%H:%M:%S", lt)] = '\0'; 109 | #ifdef LOG_USE_COLOR 110 | fprintf( 111 | stdout, "%s %s%-5s\x1b[0m \x1b[90m%s:%d:\x1b[0m ", 112 | buf, level_colors[level], level_names[level], file, line); 113 | #else 114 | fprintf(stdout, "%s %-5s %s:%d: ", buf, level_names[level], file, line); 115 | #endif 116 | va_start(args, fmt); 117 | vfprintf(stdout, fmt, args); 118 | va_end(args); 119 | fprintf(stdout, "\n"); 120 | } 121 | 122 | /* Log to file */ 123 | if (L.fp) { 124 | va_list args; 125 | char buf[32]; 126 | buf[strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", lt)] = '\0'; 127 | fprintf(L.fp, "%s %-5s %s:%d: ", buf, level_names[level], file, line); 128 | va_start(args, fmt); 129 | vfprintf(L.fp, fmt, args); 130 | va_end(args); 131 | fprintf(L.fp, "\n"); 132 | } 133 | 134 | /* Release lock */ 135 | unlock(); 136 | } 137 | 138 | #endif -------------------------------------------------------------------------------- /src/dependencies/fast_base64/chromiumbase64.h: -------------------------------------------------------------------------------- 1 | /*************** 2 | * Taken more or less as-is from the chromium project 3 | ****************/ 4 | 5 | 6 | 7 | /** 8 | * \file 9 | *
 10 |  * High performance base64 encoder / decoder
 11 |  * Version 1.3 -- 17-Mar-2006
 12 |  *
 13 |  * Copyright © 2005, 2006, Nick Galbreath -- nickg [at] modp [dot] com
 14 |  * All rights reserved.
 15 |  *
 16 |  * http://modp.com/release/base64
 17 |  *
 18 |  * Released under bsd license.  See modp_b64.c for details.
 19 |  * 
20 | * 21 | * The default implementation is the standard b64 encoding with padding. 22 | * It's easy to change this to use "URL safe" characters and to remove 23 | * padding. See the modp_b64.c source code for details. 24 | * 25 | */ 26 | 27 | #ifndef MODP_B64 28 | #define MODP_B64 29 | 30 | #include 31 | #include 32 | 33 | #ifdef __cplusplus 34 | extern "C" { 35 | #endif 36 | 37 | #define MODP_B64_ERROR ((size_t)-1) 38 | /** 39 | * Encode a raw binary string into base 64. 40 | * src contains the bytes 41 | * len contains the number of bytes in the src 42 | * dest should be allocated by the caller to contain 43 | * at least chromium_base64_encode_len(len) bytes (see below) 44 | * This will contain the null-terminated b64 encoded result 45 | * returns length of the destination string plus the ending null byte 46 | * i.e. the result will be equal to strlen(dest) + 1 47 | * 48 | * Example 49 | * 50 | * \code 51 | * char* src = ...; 52 | * int srclen = ...; //the length of number of bytes in src 53 | * char* dest = (char*) malloc(chromium_base64_decode_len(srclen)); 54 | * int len = chromium_base64_encode(dest, src, sourcelen); 55 | * if (len == MODP_B64_ERROR) { 56 | * printf("Error\n"); 57 | * } else { 58 | * printf("b64 = %s\n", dest); 59 | * } 60 | * \endcode 61 | * 62 | */ 63 | size_t chromium_base64_encode(char* dest, const char* str, size_t len); 64 | 65 | /** 66 | * Decode a base64 encoded string 67 | * 68 | * 69 | * src should contain exactly len bytes of b64 characters. 70 | * if src contains -any- non-base characters (such as white 71 | * space, MODP_B64_ERROR is returned. 72 | * 73 | * dest should be allocated by the caller to contain at least 74 | * len * 3 / 4 bytes. 75 | * 76 | * Returns the length (strlen) of the output, or MODP_B64_ERROR if unable to 77 | * decode 78 | * 79 | * \code 80 | * char* src = ...; 81 | * int srclen = ...; // or if you don't know use strlen(src) 82 | * char* dest = (char*) malloc(chromium_base64_encode_len(srclen)); 83 | * int len = chromium_base64_decode(dest, src, sourcelen); 84 | * if (len == MODP_B64_ERROR) { error } 85 | * \endcode 86 | */ 87 | size_t chromium_base64_decode(char* dest, const char* src, size_t len); 88 | 89 | /** 90 | * Given a source string of length len, this returns the amount of 91 | * memory the destination string should have. 92 | * 93 | * remember, this is integer math 94 | * 3 bytes turn into 4 chars 95 | * ceiling[len / 3] * 4 + 1 96 | * 97 | * +1 is for any extra null. 98 | */ 99 | #define chromium_base64_encode_len(A) ((A+2)/3 * 4 + 1) 100 | 101 | /** 102 | * Given a base64 string of length len, 103 | * this returns the amount of memory required for output string 104 | * It maybe be more than the actual number of bytes written. 105 | * NOTE: remember this is integer math 106 | * this allocates a bit more memory than traditional versions of b64 107 | * decode 4 chars turn into 3 bytes 108 | * floor[len * 3/4] + 2 109 | */ 110 | #define chromium_base64_decode_len(A) (A / 4 * 3 + 2) 111 | 112 | /** 113 | * Will return the strlen of the output from encoding. 114 | * This may be less than the required number of bytes allocated. 115 | * 116 | * This allows you to 'deserialized' a struct 117 | * \code 118 | * char* b64encoded = "..."; 119 | * int len = strlen(b64encoded); 120 | * 121 | * struct datastuff foo; 122 | * if (chromium_base64_encode_strlen(sizeof(struct datastuff)) != len) { 123 | * // wrong size 124 | * return false; 125 | * } else { 126 | * // safe to do; 127 | * if (chromium_base64_encode((char*) &foo, b64encoded, len) == MODP_B64_ERROR) { 128 | * // bad characters 129 | * return false; 130 | * } 131 | * } 132 | * // foo is filled out now 133 | * \endcode 134 | */ 135 | #define chromium_base64_encode_strlen(A) ((A + 2)/ 3 * 4) 136 | 137 | 138 | 139 | #ifdef __cplusplus 140 | } 141 | 142 | #include 143 | 144 | 145 | /** 146 | * base 64 decode a string (self-modifing) 147 | * On failure, the string is empty. 148 | * 149 | * This function is for C++ only (duh) 150 | * 151 | * \param[in,out] s the string to be decoded 152 | * \return a reference to the input string 153 | */ 154 | inline std::string& chromium_base64_encode(std::string& s) 155 | { 156 | std::string x(chromium_base64_encode_len(s.size()), '\0'); 157 | size_t d = chromium_base64_encode(const_cast(x.data()), s.data(), (int)s.size()); 158 | if (d == MODP_B64_ERROR) { 159 | x.clear(); 160 | } else { 161 | x.erase(d, std::string::npos); 162 | } 163 | s.swap(x); 164 | return s; 165 | } 166 | 167 | #endif /* __cplusplus */ 168 | #endif 169 | -------------------------------------------------------------------------------- /cmake/GetGitRevisionDescription.cmake: -------------------------------------------------------------------------------- 1 | # - Returns a version string from Git 2 | # 3 | # These functions force a re-configure on each git commit so that you can 4 | # trust the values of the variables in your build system. 5 | # 6 | # get_git_head_revision( [ ...]) 7 | # 8 | # Returns the refspec and sha hash of the current head revision 9 | # 10 | # git_describe( [ ...]) 11 | # 12 | # Returns the results of git describe on the source tree, and adjusting 13 | # the output so that it tests false if an error occurs. 14 | # 15 | # git_get_exact_tag( [ ...]) 16 | # 17 | # Returns the results of git describe --exact-match on the source tree, 18 | # and adjusting the output so that it tests false if there was no exact 19 | # matching tag. 20 | # 21 | # git_local_changes() 22 | # 23 | # Returns either "CLEAN" or "DIRTY" with respect to uncommitted changes. 24 | # Uses the return code of "git diff-index --quiet HEAD --". 25 | # Does not regard untracked files. 26 | # 27 | # Requires CMake 2.6 or newer (uses the 'function' command) 28 | # 29 | # Original Author: 30 | # 2009-2010 Ryan Pavlik 31 | # http://academic.cleardefinition.com 32 | # Iowa State University HCI Graduate Program/VRAC 33 | # 34 | # Copyright Iowa State University 2009-2010. 35 | # Distributed under the Boost Software License, Version 1.0. 36 | # (See accompanying file LICENSE_1_0.txt or copy at 37 | # http://www.boost.org/LICENSE_1_0.txt) 38 | 39 | if (__get_git_revision_description) 40 | return() 41 | endif () 42 | set(__get_git_revision_description YES) 43 | 44 | # We must run the following at "include" time, not at function call time, 45 | # to find the path to this module rather than the path to a calling list file 46 | get_filename_component(_gitdescmoddir ${CMAKE_CURRENT_LIST_FILE} PATH) 47 | 48 | function(get_git_head_revision _refspecvar _hashvar) 49 | set(GIT_PARENT_DIR "${CMAKE_CURRENT_SOURCE_DIR}") 50 | set(GIT_DIR "${GIT_PARENT_DIR}/.git") 51 | while (NOT EXISTS "${GIT_DIR}") # .git dir not found, search parent directories 52 | set(GIT_PREVIOUS_PARENT "${GIT_PARENT_DIR}") 53 | get_filename_component(GIT_PARENT_DIR ${GIT_PARENT_DIR} PATH) 54 | if (GIT_PARENT_DIR STREQUAL GIT_PREVIOUS_PARENT) 55 | # We have reached the root directory, we are not in git 56 | set(${_refspecvar} "GITDIR-NOTFOUND" PARENT_SCOPE) 57 | set(${_hashvar} "GITDIR-NOTFOUND" PARENT_SCOPE) 58 | return() 59 | endif () 60 | set(GIT_DIR "${GIT_PARENT_DIR}/.git") 61 | endwhile () 62 | # check if this is a submodule 63 | if (NOT IS_DIRECTORY ${GIT_DIR}) 64 | file(READ ${GIT_DIR} submodule) 65 | string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" GIT_DIR_RELATIVE ${submodule}) 66 | get_filename_component(SUBMODULE_DIR ${GIT_DIR} PATH) 67 | get_filename_component(GIT_DIR ${SUBMODULE_DIR}/${GIT_DIR_RELATIVE} ABSOLUTE) 68 | endif () 69 | set(GIT_DATA "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/git-data") 70 | if (NOT EXISTS "${GIT_DATA}") 71 | file(MAKE_DIRECTORY "${GIT_DATA}") 72 | endif () 73 | 74 | if (NOT EXISTS "${GIT_DIR}/HEAD") 75 | return() 76 | endif () 77 | set(HEAD_FILE "${GIT_DATA}/HEAD") 78 | configure_file("${GIT_DIR}/HEAD" "${HEAD_FILE}" COPYONLY) 79 | 80 | configure_file("${_gitdescmoddir}/GetGitRevisionDescription.cmake.in" 81 | "${GIT_DATA}/grabRef.cmake" 82 | @ONLY) 83 | include("${GIT_DATA}/grabRef.cmake") 84 | 85 | set(${_refspecvar} "${HEAD_REF}" PARENT_SCOPE) 86 | set(${_hashvar} "${HEAD_HASH}" PARENT_SCOPE) 87 | endfunction() 88 | 89 | function(git_describe _var) 90 | if (NOT GIT_FOUND) 91 | find_package(Git QUIET) 92 | endif () 93 | get_git_head_revision(refspec hash) 94 | if (NOT GIT_FOUND) 95 | set(${_var} "GIT-NOTFOUND" PARENT_SCOPE) 96 | return() 97 | endif () 98 | if (NOT hash) 99 | set(${_var} "HEAD-HASH-NOTFOUND" PARENT_SCOPE) 100 | return() 101 | endif () 102 | 103 | # TODO sanitize 104 | #if((${ARGN}" MATCHES "&&") OR 105 | # (ARGN MATCHES "||") OR 106 | # (ARGN MATCHES "\\;")) 107 | # message("Please report the following error to the project!") 108 | # message(FATAL_ERROR "Looks like someone's doing something nefarious with git_describe! Passed arguments ${ARGN}") 109 | #endif() 110 | 111 | #message(STATUS "Arguments to execute_process: ${ARGN}") 112 | 113 | execute_process(COMMAND 114 | "${GIT_EXECUTABLE}" 115 | describe 116 | ${hash} 117 | ${ARGN} 118 | WORKING_DIRECTORY 119 | "${CMAKE_CURRENT_SOURCE_DIR}" 120 | RESULT_VARIABLE 121 | res 122 | OUTPUT_VARIABLE 123 | out 124 | ERROR_QUIET 125 | OUTPUT_STRIP_TRAILING_WHITESPACE) 126 | if (NOT res EQUAL 0) 127 | set(out "${out}-${res}-NOTFOUND") 128 | endif () 129 | 130 | set(${_var} "${out}" PARENT_SCOPE) 131 | endfunction() 132 | 133 | function(git_get_exact_tag _var) 134 | git_describe(out --exact-match ${ARGN}) 135 | set(${_var} "${out}" PARENT_SCOPE) 136 | endfunction() 137 | 138 | function(git_local_changes _var) 139 | if (NOT GIT_FOUND) 140 | find_package(Git QUIET) 141 | endif () 142 | get_git_head_revision(refspec hash) 143 | if (NOT GIT_FOUND) 144 | set(${_var} "GIT-NOTFOUND" PARENT_SCOPE) 145 | return() 146 | endif () 147 | if (NOT hash) 148 | set(${_var} "HEAD-HASH-NOTFOUND" PARENT_SCOPE) 149 | return() 150 | endif () 151 | 152 | execute_process(COMMAND 153 | "${GIT_EXECUTABLE}" 154 | diff-index --quiet HEAD -- 155 | WORKING_DIRECTORY 156 | "${CMAKE_CURRENT_SOURCE_DIR}" 157 | RESULT_VARIABLE 158 | res 159 | OUTPUT_VARIABLE 160 | out 161 | ERROR_QUIET 162 | OUTPUT_STRIP_TRAILING_WHITESPACE) 163 | if (res EQUAL 0) 164 | set(${_var} "CLEAN" PARENT_SCOPE) 165 | else () 166 | set(${_var} "DIRTY" PARENT_SCOPE) 167 | endif () 168 | endfunction() 169 | -------------------------------------------------------------------------------- /cmake/FindSSE.cmake: -------------------------------------------------------------------------------- 1 | # Check if SSE/AVX instructions are available on the machine where 2 | # the project is compiled. 3 | 4 | IF(CMAKE_SYSTEM_NAME MATCHES "Linux") 5 | EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO) 6 | 7 | STRING(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE ${CPUINFO}) 8 | STRING(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE) 9 | IF (SSE2_TRUE) 10 | set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") 11 | ELSE (SSE2_TRUE) 12 | set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") 13 | ENDIF (SSE2_TRUE) 14 | 15 | # /proc/cpuinfo apparently omits sse3 :( 16 | STRING(REGEX REPLACE "^.*[^s](sse3).*$" "\\1" SSE_THERE ${CPUINFO}) 17 | STRING(COMPARE EQUAL "sse3" "${SSE_THERE}" SSE3_TRUE) 18 | IF (NOT SSE3_TRUE) 19 | STRING(REGEX REPLACE "^.*(T2300).*$" "\\1" SSE_THERE ${CPUINFO}) 20 | STRING(COMPARE EQUAL "T2300" "${SSE_THERE}" SSE3_TRUE) 21 | ENDIF (NOT SSE3_TRUE) 22 | 23 | STRING(REGEX REPLACE "^.*(ssse3).*$" "\\1" SSE_THERE ${CPUINFO}) 24 | STRING(COMPARE EQUAL "ssse3" "${SSE_THERE}" SSSE3_TRUE) 25 | IF (SSE3_TRUE OR SSSE3_TRUE) 26 | set(SSE3_FOUND true CACHE BOOL "SSE3 available on host") 27 | ELSE (SSE3_TRUE OR SSSE3_TRUE) 28 | set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") 29 | ENDIF (SSE3_TRUE OR SSSE3_TRUE) 30 | IF (SSSE3_TRUE) 31 | set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host") 32 | ELSE (SSSE3_TRUE) 33 | set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") 34 | ENDIF (SSSE3_TRUE) 35 | 36 | STRING(REGEX REPLACE "^.*(sse4_1).*$" "\\1" SSE_THERE ${CPUINFO}) 37 | STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE) 38 | IF (SSE41_TRUE) 39 | set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") 40 | ELSE (SSE41_TRUE) 41 | set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") 42 | ENDIF (SSE41_TRUE) 43 | 44 | STRING(REGEX REPLACE "^.*(avx).*$" "\\1" SSE_THERE ${CPUINFO}) 45 | STRING(COMPARE EQUAL "avx" "${SSE_THERE}" AVX_TRUE) 46 | IF (AVX_TRUE) 47 | set(AVX_FOUND true CACHE BOOL "AVX available on host") 48 | ELSE (AVX_TRUE) 49 | set(AVX_FOUND false CACHE BOOL "AVX available on host") 50 | ENDIF (AVX_TRUE) 51 | 52 | STRING(REGEX REPLACE "^.*(avx2).*$" "\\1" SSE_THERE ${CPUINFO}) 53 | STRING(COMPARE EQUAL "avx2" "${SSE_THERE}" AVX2_TRUE) 54 | IF (AVX2_TRUE) 55 | set(AVX2_FOUND true CACHE BOOL "AVX2 available on host") 56 | ELSE (AVX2_TRUE) 57 | set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") 58 | ENDIF (AVX2_TRUE) 59 | 60 | ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") 61 | EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE 62 | CPUINFO) 63 | 64 | STRING(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE ${CPUINFO}) 65 | STRING(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE) 66 | IF (SSE2_TRUE) 67 | set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") 68 | ELSE (SSE2_TRUE) 69 | set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") 70 | ENDIF (SSE2_TRUE) 71 | 72 | STRING(REGEX REPLACE "^.*[^S](SSE3).*$" "\\1" SSE_THERE ${CPUINFO}) 73 | STRING(COMPARE EQUAL "SSE3" "${SSE_THERE}" SSE3_TRUE) 74 | IF (SSE3_TRUE) 75 | set(SSE3_FOUND true CACHE BOOL "SSE3 available on host") 76 | ELSE (SSE3_TRUE) 77 | set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") 78 | ENDIF (SSE3_TRUE) 79 | 80 | STRING(REGEX REPLACE "^.*(SSSE3).*$" "\\1" SSE_THERE ${CPUINFO}) 81 | STRING(COMPARE EQUAL "SSSE3" "${SSE_THERE}" SSSE3_TRUE) 82 | IF (SSSE3_TRUE) 83 | set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host") 84 | ELSE (SSSE3_TRUE) 85 | set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") 86 | ENDIF (SSSE3_TRUE) 87 | 88 | STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE ${CPUINFO}) 89 | STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE) 90 | IF (SSE41_TRUE) 91 | set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") 92 | ELSE (SSE41_TRUE) 93 | set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") 94 | ENDIF (SSE41_TRUE) 95 | 96 | STRING(REGEX REPLACE "^.*(AVX).*$" "\\1" SSE_THERE ${CPUINFO}) 97 | STRING(COMPARE EQUAL "AVX" "${SSE_THERE}" AVX_TRUE) 98 | IF (AVX_TRUE) 99 | set(AVX_FOUND true CACHE BOOL "AVX available on host") 100 | ELSE (AVX_TRUE) 101 | set(AVX_FOUND false CACHE BOOL "AVX available on host") 102 | ENDIF (AVX_TRUE) 103 | 104 | STRING(REGEX REPLACE "^.*(AVX2).*$" "\\1" SSE_THERE ${CPUINFO}) 105 | STRING(COMPARE EQUAL "AVX2" "${SSE_THERE}" AVX2_TRUE) 106 | IF (AVX2_TRUE) 107 | set(AVX2_FOUND true CACHE BOOL "AVX2 available on host") 108 | ELSE (AVX2_TRUE) 109 | set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") 110 | ENDIF (AVX2_TRUE) 111 | 112 | ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") 113 | # TODO 114 | set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") 115 | set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") 116 | set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") 117 | set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") 118 | set(AVX_FOUND false CACHE BOOL "AVX available on host") 119 | set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") 120 | ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux") 121 | set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") 122 | set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") 123 | set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") 124 | set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") 125 | set(AVX_FOUND false CACHE BOOL "AVX available on host") 126 | set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") 127 | ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux") 128 | 129 | if(NOT SSE2_FOUND) 130 | MESSAGE(STATUS "Could not find hardware support for SSE2 on this machine.") 131 | endif(NOT SSE2_FOUND) 132 | if(NOT SSE3_FOUND) 133 | MESSAGE(STATUS "Could not find hardware support for SSE3 on this machine.") 134 | endif(NOT SSE3_FOUND) 135 | if(NOT SSSE3_FOUND) 136 | MESSAGE(STATUS "Could not find hardware support for SSSE3 on this machine.") 137 | endif(NOT SSSE3_FOUND) 138 | if(NOT SSE4_1_FOUND) 139 | MESSAGE(STATUS "Could not find hardware support for SSE4.1 on this machine.") 140 | endif(NOT SSE4_1_FOUND) 141 | if(NOT AVX_FOUND) 142 | MESSAGE(STATUS "Could not find hardware support for AVX on this machine.") 143 | endif(NOT AVX_FOUND) 144 | if(NOT AVX2_FOUND) 145 | MESSAGE(STATUS "Could not find hardware support for AVX2 on this machine.") 146 | endif(NOT AVX2_FOUND) 147 | 148 | mark_as_advanced(SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND, AVX_FOUND, AVX2_FOUND) 149 | -------------------------------------------------------------------------------- /src/dependencies/fast_base64/fastavxbase64.c: -------------------------------------------------------------------------------- 1 | #include "fastavxbase64.h" 2 | 3 | #include 4 | #include 5 | 6 | /** 7 | * This code borrows from Wojciech Mula's library at 8 | * https://github.com/WojciechMula/base64simd (published under BSD) 9 | * as well as code from Alfred Klomp's library https://github.com/aklomp/base64 (published under BSD) 10 | * 11 | */ 12 | 13 | 14 | 15 | 16 | /** 17 | * Note : Hardware such as Knights Landing might do poorly with this AVX2 code since it relies on shuffles. Alternatives might be faster. 18 | */ 19 | 20 | 21 | static inline __m256i enc_reshuffle(const __m256i input) { 22 | 23 | // translation from SSE into AVX2 of procedure 24 | // https://github.com/WojciechMula/base64simd/blob/master/encode/unpack_bigendian.cpp 25 | const __m256i in = _mm256_shuffle_epi8(input, _mm256_set_epi8( 26 | 10, 11, 9, 10, 27 | 7, 8, 6, 7, 28 | 4, 5, 3, 4, 29 | 1, 2, 0, 1, 30 | 31 | 14, 15, 13, 14, 32 | 11, 12, 10, 11, 33 | 8, 9, 7, 8, 34 | 5, 6, 4, 5 35 | )); 36 | 37 | const __m256i t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00)); 38 | const __m256i t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040)); 39 | 40 | const __m256i t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0)); 41 | const __m256i t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010)); 42 | 43 | return _mm256_or_si256(t1, t3); 44 | } 45 | 46 | static inline __m256i enc_translate(const __m256i in) { 47 | const __m256i lut = _mm256_setr_epi8( 48 | 65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0, 65, 71, 49 | -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0); 50 | __m256i indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51)); 51 | __m256i mask = _mm256_cmpgt_epi8((in), _mm256_set1_epi8(25)); 52 | indices = _mm256_sub_epi8(indices, mask); 53 | __m256i out = _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices)); 54 | return out; 55 | } 56 | 57 | static inline __m256i dec_reshuffle(__m256i in) { 58 | 59 | // inlined procedure pack_madd from https://github.com/WojciechMula/base64simd/blob/master/decode/pack.avx2.cpp 60 | // The only difference is that elements are reversed, 61 | // only the multiplication constants were changed. 62 | 63 | const __m256i merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140)); //_mm256_maddubs_epi16 is likely expensive 64 | __m256i out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000)); 65 | // end of inlined 66 | 67 | // Pack bytes together within 32-bit words, discarding words 3 and 7: 68 | out = _mm256_shuffle_epi8(out, _mm256_setr_epi8( 69 | 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1, 70 | 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1 71 | )); 72 | // the call to _mm256_permutevar8x32_epi32 could be replaced by a call to _mm256_storeu2_m128i but it is doubtful that it would help 73 | return _mm256_permutevar8x32_epi32( 74 | out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1)); 75 | } 76 | 77 | 78 | size_t fast_avx2_base64_encode(char* dest, const char* str, size_t len) { 79 | const char* const dest_orig = dest; 80 | if(len >= 32 - 4) { 81 | // first load is masked 82 | __m256i inputvector = _mm256_maskload_epi32((int const*)(str - 4), _mm256_set_epi32( 83 | 0x80000000, 84 | 0x80000000, 85 | 0x80000000, 86 | 0x80000000, 87 | 88 | 0x80000000, 89 | 0x80000000, 90 | 0x80000000, 91 | 0x00000000 // we do not load the first 4 bytes 92 | )); 93 | ////////// 94 | // Intel docs: Faults occur only due to mask-bit required memory accesses that caused the faults. 95 | // Faults will not occur due to referencing any memory location if the corresponding mask bit for 96 | //that memory location is 0. For example, no faults will be detected if the mask bits are all zero. 97 | //////////// 98 | while(true) { 99 | inputvector = enc_reshuffle(inputvector); 100 | inputvector = enc_translate(inputvector); 101 | _mm256_storeu_si256((__m256i *)dest, inputvector); 102 | str += 24; 103 | dest += 32; 104 | len -= 24; 105 | if(len >= 32) { 106 | inputvector = _mm256_loadu_si256((__m256i *)(str - 4)); // no need for a mask here 107 | // we could do a mask load as long as len >= 24 108 | } else { 109 | break; 110 | } 111 | } 112 | } 113 | size_t scalarret = chromium_base64_encode(dest, str, len); 114 | if(scalarret == MODP_B64_ERROR) return MODP_B64_ERROR; 115 | return (dest - dest_orig) + scalarret; 116 | } 117 | 118 | size_t fast_avx2_base64_decode(char *out, const char *src, size_t srclen) { 119 | char* out_orig = out; 120 | while (srclen >= 45) { 121 | 122 | // The input consists of six character sets in the Base64 alphabet, 123 | // which we need to map back to the 6-bit values they represent. 124 | // There are three ranges, two singles, and then there's the rest. 125 | // 126 | // # From To Add Characters 127 | // 1 [43] [62] +19 + 128 | // 2 [47] [63] +16 / 129 | // 3 [48..57] [52..61] +4 0..9 130 | // 4 [65..90] [0..25] -65 A..Z 131 | // 5 [97..122] [26..51] -71 a..z 132 | // (6) Everything else => invalid input 133 | 134 | __m256i str = _mm256_loadu_si256((__m256i *)src); 135 | 136 | // code by @aqrit from 137 | // https://github.com/WojciechMula/base64simd/issues/3#issuecomment-271137490 138 | // transated into AVX2 139 | const __m256i lut_lo = _mm256_setr_epi8( 140 | 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 141 | 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A, 142 | 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 143 | 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A 144 | ); 145 | const __m256i lut_hi = _mm256_setr_epi8( 146 | 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, 147 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 148 | 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, 149 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 150 | ); 151 | const __m256i lut_roll = _mm256_setr_epi8( 152 | 0, 16, 19, 4, -65, -65, -71, -71, 153 | 0, 0, 0, 0, 0, 0, 0, 0, 154 | 0, 16, 19, 4, -65, -65, -71, -71, 155 | 0, 0, 0, 0, 0, 0, 0, 0 156 | ); 157 | 158 | const __m256i mask_2F = _mm256_set1_epi8(0x2f); 159 | 160 | // lookup 161 | __m256i hi_nibbles = _mm256_srli_epi32(str, 4); 162 | __m256i lo_nibbles = _mm256_and_si256(str, mask_2F); 163 | 164 | const __m256i lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles); 165 | const __m256i eq_2F = _mm256_cmpeq_epi8(str, mask_2F); 166 | 167 | hi_nibbles = _mm256_and_si256(hi_nibbles, mask_2F); 168 | const __m256i hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles); 169 | const __m256i roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles)); 170 | 171 | if (!_mm256_testz_si256(lo, hi)) { 172 | break; 173 | } 174 | 175 | str = _mm256_add_epi8(str, roll); 176 | // end of copied function 177 | 178 | srclen -= 32; 179 | src += 32; 180 | 181 | // end of inlined function 182 | 183 | // Reshuffle the input to packed 12-byte output format: 184 | str = dec_reshuffle(str); 185 | _mm256_storeu_si256((__m256i *)out, str); 186 | out += 24; 187 | } 188 | size_t scalarret = chromium_base64_decode(out, src, srclen); 189 | if(scalarret == MODP_B64_ERROR) return MODP_B64_ERROR; 190 | return (out - out_orig) + scalarret; 191 | } 192 | -------------------------------------------------------------------------------- /src/benchmarker.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include // std::shuffle 6 | 7 | #include "../include/queue_store.h" 8 | #include "utils/utils.h" 9 | #include "utils/log.h" 10 | 11 | using namespace std::chrono; 12 | using std::unordered_map; 13 | using std::atomic_int; 14 | using std::atomic; 15 | using std::thread; 16 | using std::mutex; 17 | using std::unique_ptr; 18 | 19 | using namespace race2018; 20 | 21 | vector putLockArr; 22 | 23 | void Producer(queue_store *queueStore, int number, int64_t maxTimeStamp, int maxMsgNum, 24 | std::atomic *counter, unordered_map *queueCounter) { 25 | int64_t count; 26 | while ((count = counter->fetch_add(1, std::memory_order_seq_cst)) < maxMsgNum && 27 | getCurrentTimeInMS() <= maxTimeStamp) { 28 | int32_t qid = static_cast(count % queueCounter->size()); 29 | string queueName = "Queue-" + std::to_string(count % queueCounter->size()); 30 | putLockArr[qid].lock(); 31 | int counter_int = queueCounter->find(queueName)->second->fetch_add(1, std::memory_order_seq_cst); 32 | //log_info("%d", counter_int); 33 | string counter_str = std::to_string(counter_int); 34 | char *buf = new char[counter_str.length() + 1]; 35 | strcpy(buf, counter_str.c_str()); 36 | MemBlock msg = {static_cast(buf), 37 | static_cast(counter_str.length()) 38 | }; 39 | queueStore->put(queueName, msg); 40 | putLockArr[qid].unlock(); 41 | } 42 | } 43 | 44 | void IndexChecker(queue_store *queueStore, int number, int64_t maxTimeStamp, int maxMsgNum, 45 | std::atomic *counter, unordered_map *queueCounter) { 46 | std::default_random_engine generator; 47 | std::uniform_int_distribution distribution(0, queueCounter->size() - 1); 48 | std::uniform_int_distribution queue_distribution[queueCounter->size()]; 49 | for (unsigned long i = 0; i < queueCounter->size(); i++) { 50 | string queueName = "Queue-" + std::to_string(i); 51 | int counter_int = queueCounter->find(queueName)->second->load(); 52 | queue_distribution[i] = std::uniform_int_distribution(0, counter_int - 1); 53 | } 54 | while (counter->fetch_add(1, std::memory_order_seq_cst) < maxMsgNum && getCurrentTimeInMS() <= maxTimeStamp) { 55 | int queueID = distribution(generator); 56 | string queueName = "Queue-" + std::to_string(queueID); 57 | int index = queue_distribution[queueID](generator) - 10; 58 | if (index < 0) index = 0; 59 | vector msgs = queueStore->get(queueName, index, 10); 60 | for (auto &msg: msgs) { 61 | string got_str = string((char *) msg.ptr, msg.size); 62 | string expected_str = std::to_string(index++); 63 | delete[] (char *) msg.ptr; 64 | if (got_str != expected_str) { 65 | log_error("check failed, got %s, expected: %s", got_str.c_str(), expected_str.c_str()); 66 | exit(-1); 67 | } 68 | } 69 | } 70 | } 71 | 72 | void Consumer(queue_store *queueStore, int number, int64_t maxTimeStamp, std::atomic *counter, 73 | unordered_map *queueCheckNames) { 74 | unordered_map pullOffsets; 75 | for (auto pair: *queueCheckNames) { 76 | int *count = static_cast(malloc(sizeof(int))); 77 | *count = 0; 78 | pullOffsets.insert(std::make_pair(pair.first, count)); 79 | //log_info("%s %d", pair.first.c_str(), pair.second); 80 | } 81 | 82 | while (!pullOffsets.empty() && getCurrentTimeInMS() < maxTimeStamp) { 83 | for (auto pair = pullOffsets.begin(); pair != pullOffsets.end();) { 84 | string queueName = pair->first; 85 | int *count = pair->second; 86 | int index = *count; 87 | vector msgs = queueStore->get(queueName, index, 10); 88 | //log_info("queueName %s msgs size %d", queueName.c_str(), msgs.size()); 89 | if (msgs.size() > 0) { 90 | *count += msgs.size(); 91 | for (auto &msg: msgs) { 92 | string got_str = string((char *) msg.ptr, msg.size); 93 | string expected_str = std::to_string(index++); 94 | delete[] (char *) msg.ptr; 95 | if (got_str != expected_str) { 96 | log_error("check failed, got %s, expected: %s", got_str.c_str(), expected_str.c_str()); 97 | exit(-1); 98 | } 99 | } 100 | counter->fetch_add(msgs.size(), std::memory_order_seq_cst); 101 | } 102 | if (msgs.size() < 10) { 103 | int got = *count; 104 | int expected = queueCheckNames->find(queueName)->second; 105 | if (got != expected) { 106 | log_error("Queue Number Error, got %d, expected: %d", got, expected); 107 | exit(-1); 108 | } 109 | pair = pullOffsets.erase(pair); 110 | } else { 111 | pair++; 112 | } 113 | } 114 | } 115 | } 116 | 117 | int main(int argc, char **argv) { 118 | //评测相关配置 119 | //发送阶段的发送数量,也即发送阶段必须要在规定时间内把这些消息发送完毕方可 120 | // int msgNum = 10000; 121 | int msgNum = 100000000; 122 | // int msgNum = 2000000000; 123 | //发送阶段的最大持续时间,也即在该时间内,如果消息依然没有发送完毕,则退出评测 124 | int sendTime = 10 * 60 * 1000; 125 | //消费阶段的最大持续时间,也即在该时间内,如果消息依然没有消费完毕,则退出评测 126 | int checkTime = 10 * 60 * 1000; 127 | //队列的数量 128 | int queueNum = 10000; 129 | //正确性检测的次数 130 | int checkNum = 1000; 131 | //消费阶段的总队列数量 132 | int checkQueueNum = 100; 133 | //发送的线程数量 134 | int sendTsNum = 10; 135 | //消费的线程数量 136 | int checkTsNum = 10; 137 | 138 | std::vector list(queueNum); 139 | putLockArr.swap(list); 140 | 141 | auto *queueNumMap = new unordered_map; 142 | for (int i = 0; i < queueNum; i++) { 143 | queueNumMap->insert(std::make_pair("Queue-" + std::to_string(i), new atomic_int(0))); 144 | } 145 | 146 | 147 | auto *queueStore = new queue_store(); 148 | 149 | //Step1: 发送消息 150 | int64_t sendStart = getCurrentTimeInMS(); 151 | int64_t maxTimeStamp = sendStart + sendTime; 152 | 153 | auto *sendCounter = new std::atomic(0); 154 | 155 | thread sends[sendTsNum]; 156 | for (int i = 0; i < sendTsNum; i++) { 157 | sends[i] = std::move(thread(Producer, queueStore, i, maxTimeStamp, msgNum, sendCounter, queueNumMap)); 158 | } 159 | 160 | for (int i = 0; i < sendTsNum; i++) { 161 | sends[i].join(); 162 | } 163 | 164 | int64_t sendSend = getCurrentTimeInMS(); 165 | log_info("Send: %d ms Num:%d\n", sendSend - sendStart, sendCounter->load()); 166 | 167 | //Step2: 索引的正确性校验 168 | int64_t indexCheckStart = getCurrentTimeInMS(); 169 | int64_t maxCheckTime = indexCheckStart + checkTime; 170 | auto *indexCheckCounter = new std::atomic(0); 171 | 172 | thread indexChecks[sendTsNum]; 173 | for (int i = 0; i < sendTsNum; i++) { 174 | indexChecks[i] = std::move( 175 | thread(IndexChecker, queueStore, i, maxCheckTime, checkNum, indexCheckCounter, queueNumMap)); 176 | } 177 | 178 | for (int i = 0; i < sendTsNum; i++) { 179 | indexChecks[i].join(); 180 | } 181 | int64_t indexCheckEnd = getCurrentTimeInMS(); 182 | log_info("Index Check: %d ms Num:%d\n", indexCheckEnd - indexCheckStart, 183 | indexCheckCounter->load()); 184 | 185 | //Step3: 消费消息,并验证顺序性 186 | int64_t checkStart = getCurrentTimeInMS(); 187 | auto *checkCounter = new std::atomic(0); 188 | 189 | thread checks[sendTsNum]; 190 | std::default_random_engine generator; 191 | std::uniform_int_distribution distribution(0, queueNum - 1); 192 | 193 | vector allQueueName; 194 | for (int i = 0; i < queueNum; i++) { 195 | allQueueName.push_back("Queue-" + std::to_string(i)); 196 | } 197 | std::shuffle(allQueueName.begin(), allQueueName.end(), 198 | std::default_random_engine((unsigned long long) getCurrentTimeInMS())); 199 | 200 | for (int i = 0; i < sendTsNum; i++) { 201 | int eachCheckQueueNum = checkQueueNum / checkTsNum; 202 | int myCheckQueueNumEnd = (i + 1) * eachCheckQueueNum; 203 | if (i == sendTsNum - 1) { 204 | myCheckQueueNumEnd = checkQueueNum; 205 | } 206 | auto *myCheckQueueName = new unordered_map; 207 | for (int j = i * eachCheckQueueNum; j < myCheckQueueNumEnd; j++) { 208 | myCheckQueueName->insert( 209 | std::make_pair(allQueueName[j], queueNumMap->find(allQueueName[j])->second->load())); 210 | } 211 | checks[i] = std::move(thread(Consumer, queueStore, i, maxCheckTime, checkCounter, myCheckQueueName)); 212 | } 213 | for (int i = 0; i < sendTsNum; i++) { 214 | checks[i].join(); 215 | } 216 | long checkEnd = getCurrentTimeInMS(); 217 | log_info("Check: %d ms Num: %d\n", checkEnd - checkStart, checkCounter->load()); 218 | 219 | //评测结果 220 | log_info("Tps:%f\n", ((sendCounter->load() + checkCounter->load() + indexCheckCounter->load()) + 0.1) * 1000 / 221 | ((sendSend - sendStart) + (checkEnd - checkStart) + (indexCheckEnd - indexCheckStart))); 222 | 223 | return 0; 224 | } -------------------------------------------------------------------------------- /src/io/asyncfileio.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | 18 | #include "blockingQueue.hpp" 19 | 20 | #include "../../include/queue_store_config.h" 21 | #include "../utils/log.h" 22 | #include "Barrier.hpp" 23 | #include "../utils/utils.h" 24 | 25 | #define CLUSTER_SIZE (77) 26 | #define INDEX_ENTRY_SIZE (42) 27 | #define RAW_NORMAL_MESSAGE_SIZE (58) 28 | #define MAX_CONCURRENT_INDEX_MAPPED_BLOCK_NUM 2 29 | #define FILESYSTEM_BLOCK_SIZE 512 30 | #define QUEUE_NUM_PER_IO_THREAD (TOTAL_QUEUE_NUM/IO_THREAD) 31 | #define INDEX_MAPPED_BLOCK_RAW_SIZE (INDEX_ENTRY_SIZE * QUEUE_NUM_PER_IO_THREAD * CLUSTER_SIZE) 32 | #define INDEX_MAPPED_BLOCK_PADDING_SIZE (FILESYSTEM_BLOCK_SIZE - INDEX_MAPPED_BLOCK_RAW_SIZE % FILESYSTEM_BLOCK_SIZE) 33 | #define INDEX_MAPPED_BLOCK_ALIGNED_SIZE (INDEX_MAPPED_BLOCK_RAW_SIZE + INDEX_MAPPED_BLOCK_PADDING_SIZE) 34 | #define INDEX_BLOCK_WRITE_TIMES_TO_FULL (INDEX_MAPPED_BLOCK_RAW_SIZE/INDEX_ENTRY_SIZE) 35 | #define MAX_MAPED_CHUNK_NUM (120l * 1024 * 1024 * 1024 / INDEX_MAPPED_BLOCK_RAW_SIZE / IO_THREAD) 36 | #define LARGE_MESSAGE_MAGIC_CHAR (0) 37 | #define DATA_FILE_MAX_SIZE (120l * 1024 * 1024 * 1024 / IO_THREAD) 38 | 39 | using std::atomic; 40 | using std::string; 41 | using std::thread; 42 | 43 | enum asyncfileio_thread_status { 44 | AT_CLOSING = 1, AT_RUNNING = 2, AT_INITING = 3 45 | }; 46 | 47 | 48 | class asyncio_task_t { 49 | public: 50 | ssize_t global_offset; 51 | 52 | asyncio_task_t() { 53 | global_offset = 0; 54 | } 55 | 56 | asyncio_task_t(ssize_t global_offset) { 57 | this->global_offset = global_offset; 58 | } 59 | }; 60 | 61 | class asyncfileio_thread_t { 62 | public: 63 | const int thread_id; 64 | 65 | atomic data_file_current_size; 66 | int data_file_fd; 67 | size_t index_file_size; 68 | 69 | int index_file_fd; 70 | 71 | size_t current_index_mapped_start_offset; 72 | size_t current_index_mapped_end_offset; 73 | size_t current_index_mapped_start_chunk; 74 | size_t index_mapped_flush_start_chunkID; 75 | size_t index_mapped_flush_end_chunkID; 76 | atomic *index_mapped_block_write_counter; 77 | std::mutex *mapped_block_mtx; 78 | std::condition_variable *mapped_block_cond; 79 | 80 | char **index_file_memory_block; 81 | 82 | uint32_t *queue_counter; 83 | 84 | BlockingQueue *blockingQueue; 85 | enum asyncfileio_thread_status status; 86 | 87 | asyncfileio_thread_t(int tid, std::string file_prefix) : thread_id(tid) { 88 | 89 | this->data_file_current_size.store(0); 90 | this->index_file_size = 0; 91 | 92 | index_mapped_block_write_counter = new atomic[MAX_MAPED_CHUNK_NUM]; 93 | index_file_memory_block = new char *[MAX_MAPED_CHUNK_NUM]; 94 | for (int i = 0; i < MAX_MAPED_CHUNK_NUM; i++) { 95 | index_mapped_block_write_counter[i].store(0); 96 | index_file_memory_block[i] = nullptr; 97 | } 98 | 99 | for (int i = 0; i < MAX_CONCURRENT_INDEX_MAPPED_BLOCK_NUM; i++) { 100 | index_file_memory_block[i] = (char *) memalign(FILESYSTEM_BLOCK_SIZE, INDEX_MAPPED_BLOCK_ALIGNED_SIZE); 101 | // index_file_memory_block[i] = ;new char[INDEX_MAPPED_BLOCK_SIZE]; 102 | } 103 | 104 | queue_counter = new uint32_t[QUEUE_NUM_PER_IO_THREAD]; 105 | memset(queue_counter, 0, sizeof(uint32_t) * QUEUE_NUM_PER_IO_THREAD); 106 | mapped_block_mtx = new std::mutex[MAX_MAPED_CHUNK_NUM]; 107 | mapped_block_cond = new std::condition_variable[MAX_MAPED_CHUNK_NUM]; 108 | 109 | this->blockingQueue = new BlockingQueue; 110 | 111 | string tmp_str = file_prefix + "_" + std::to_string(tid) + ".data"; 112 | this->data_file_fd = open(tmp_str.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 113 | ftruncate(data_file_fd, 0); 114 | ftruncate(data_file_fd, DATA_FILE_MAX_SIZE); 115 | 116 | tmp_str = file_prefix + "_" + std::to_string(tid) + ".idx"; 117 | this->index_file_fd = open(tmp_str.c_str(), O_WRONLY | O_CREAT | O_DIRECT | O_SYNC, S_IRUSR | S_IWUSR); 118 | ftruncate(index_file_fd, 0); 119 | 120 | this->current_index_mapped_start_chunk = 0; 121 | this->current_index_mapped_start_offset = 0; 122 | this->current_index_mapped_end_offset = 123 | ((size_t) INDEX_MAPPED_BLOCK_ALIGNED_SIZE) * MAX_CONCURRENT_INDEX_MAPPED_BLOCK_NUM; 124 | this->index_file_size = 0; 125 | } 126 | 127 | void doIO(asyncio_task_t *asyncio_task) { 128 | for (; current_index_mapped_start_chunk < MAX_MAPED_CHUNK_NUM; current_index_mapped_start_chunk++) { 129 | if (index_mapped_block_write_counter[current_index_mapped_start_chunk].load() >= 130 | INDEX_BLOCK_WRITE_TIMES_TO_FULL) { 131 | 132 | index_file_size += INDEX_MAPPED_BLOCK_ALIGNED_SIZE; 133 | ftruncate(index_file_fd, index_file_size); 134 | 135 | pwrite(index_file_fd, index_file_memory_block[current_index_mapped_start_chunk], 136 | INDEX_MAPPED_BLOCK_ALIGNED_SIZE, 137 | INDEX_MAPPED_BLOCK_ALIGNED_SIZE * current_index_mapped_start_chunk); 138 | 139 | int next_chunk = current_index_mapped_start_chunk + MAX_CONCURRENT_INDEX_MAPPED_BLOCK_NUM; 140 | current_index_mapped_start_offset += INDEX_MAPPED_BLOCK_ALIGNED_SIZE; 141 | current_index_mapped_end_offset += INDEX_MAPPED_BLOCK_ALIGNED_SIZE; 142 | 143 | { 144 | std::unique_lock lock(mapped_block_mtx[next_chunk]); 145 | index_file_memory_block[next_chunk] = index_file_memory_block[current_index_mapped_start_chunk]; 146 | } 147 | mapped_block_cond[next_chunk].notify_all(); 148 | 149 | index_file_memory_block[current_index_mapped_start_chunk] = nullptr; 150 | 151 | log_info("io thread %d advanced to %d", this->thread_id, next_chunk); 152 | 153 | } else { 154 | break; 155 | } 156 | } 157 | } 158 | }; 159 | 160 | bool allFlushFlag = false; 161 | bool ioFinished = false; 162 | Barrier *barrier; 163 | atomic *finish_thread_counter; 164 | 165 | void ioThreadFunction(asyncfileio_thread_t *args) { 166 | asyncfileio_thread_t *work_thread = args; 167 | work_thread->status = AT_RUNNING; 168 | 169 | for (;;) { 170 | asyncio_task_t *task = work_thread->blockingQueue->take(); 171 | 172 | if (work_thread->status == AT_CLOSING || task->global_offset == -1) { 173 | size_t force_flush_chunk_num = 0; 174 | if (work_thread->thread_id < 1) { 175 | force_flush_chunk_num = 1; 176 | } 177 | work_thread->index_mapped_flush_start_chunkID = 178 | work_thread->current_index_mapped_start_chunk + force_flush_chunk_num; 179 | work_thread->index_mapped_flush_end_chunkID = work_thread->index_mapped_flush_start_chunkID; 180 | for (size_t i = work_thread->current_index_mapped_start_chunk; 181 | i < work_thread->index_mapped_flush_start_chunkID && 182 | work_thread->index_file_memory_block[i] != nullptr; i++) { 183 | work_thread->index_file_size += INDEX_MAPPED_BLOCK_ALIGNED_SIZE; 184 | ftruncate(work_thread->index_file_fd, work_thread->index_file_size); 185 | 186 | pwrite(work_thread->index_file_fd, work_thread->index_file_memory_block[i], 187 | INDEX_MAPPED_BLOCK_ALIGNED_SIZE, INDEX_MAPPED_BLOCK_ALIGNED_SIZE * i); 188 | free(work_thread->index_file_memory_block[i]); 189 | } 190 | fsync(work_thread->index_file_fd); 191 | finish_thread_counter->fetch_add(1); 192 | ftruncate(work_thread->data_file_fd, work_thread->data_file_current_size.load()); 193 | break; 194 | } 195 | work_thread->doIO(task); 196 | delete task; 197 | } 198 | } 199 | 200 | 201 | void flush_all_func(void *args); 202 | 203 | class asyncfileio_t { 204 | 205 | public: 206 | asyncfileio_t(std::string file_prefix) { 207 | malloc_stats(); 208 | MallocExtension::instance()->ReleaseFreeMemory(); 209 | malloc_stats(); 210 | this->file_prefix = file_prefix; 211 | finish_thread_counter = new atomic(0); 212 | barrier = new Barrier(SEND_THREAD_NUM); 213 | for (int i = 0; i < IO_THREAD; i++) { 214 | work_threads_object[i] = new asyncfileio_thread_t(i, file_prefix); 215 | } 216 | } 217 | 218 | void startIOThread() { 219 | for (int i = 0; i < IO_THREAD; i++) { 220 | work_threads_handle[i] = std::thread(ioThreadFunction, work_threads_object[i]); 221 | work_threads_handle[i].detach(); 222 | } 223 | } 224 | 225 | 226 | void waitFinishIO(int tid) { 227 | if (!ioFinished) { 228 | if (tid == 0) { 229 | printf("in wait_flush function %ld\n", getCurrentTimeInMS()); 230 | } 231 | barrier->Wait([this] { 232 | printf("start send flush cmd %ld\n", getCurrentTimeInMS()); 233 | for (int i = 0; i < IO_THREAD; i++) { 234 | asyncio_task_t *task = new asyncio_task_t(-1); 235 | work_threads_object[i]->blockingQueue->put(task); 236 | } 237 | printf("after send flush cmd %ld\n", getCurrentTimeInMS()); 238 | }); 239 | if (tid == 0) { 240 | printf("before wait flush finish %ld\n", getCurrentTimeInMS()); 241 | } 242 | while (finish_thread_counter->load() < IO_THREAD) {}; 243 | if (tid == 0) { 244 | printf("after wait flush finish %ld\n", getCurrentTimeInMS()); 245 | } 246 | barrier->Wait([this] { 247 | // malloc_stats(); 248 | MallocExtension::instance()->ReleaseFreeMemory(); 249 | // malloc_stats(); 250 | for (int i = 0; i < IO_THREAD; i++) { 251 | string tmp_str = file_prefix + "_" + std::to_string(i) + ".idx"; 252 | index_fds[i] = open(tmp_str.c_str(), O_RDONLY, S_IRUSR | S_IWUSR); 253 | data_fds[i] = work_threads_object[i]->data_file_fd; 254 | mapped_index_files_length[i] = work_threads_object[i]->index_file_size; 255 | 256 | int ret = posix_fadvise(index_fds[i], 0, 257 | work_threads_object[i]->index_file_size, 258 | POSIX_FADV_RANDOM); 259 | printf("ret %d\n", ret); 260 | } 261 | 262 | }); 263 | if (tid == 0) { 264 | printf("finish wait_flush function %ld\n", getCurrentTimeInMS()); 265 | } 266 | ioFinished = true; 267 | } 268 | } 269 | 270 | ~asyncfileio_t() { 271 | printf("f\n"); 272 | std::thread flush_thread(flush_all_func, this); 273 | flush_thread.detach(); 274 | } 275 | 276 | string file_prefix; 277 | 278 | asyncfileio_thread_t *work_threads_object[IO_THREAD]; 279 | std::thread work_threads_handle[IO_THREAD]; 280 | 281 | size_t mapped_index_files_length[IO_THREAD]; 282 | 283 | int index_fds[IO_THREAD]; 284 | int data_fds[IO_THREAD]; 285 | 286 | }; 287 | 288 | void flush_all_func(void *args) { 289 | asyncfileio_t *asyncfileio = (asyncfileio_t *) args; 290 | for (int tid = 0; tid < IO_THREAD; tid++) { 291 | asyncfileio_thread_t *work_thread = asyncfileio->work_threads_object[tid]; 292 | for (size_t i = work_thread->index_mapped_flush_start_chunkID; 293 | i < MAX_MAPED_CHUNK_NUM && work_thread->index_file_memory_block[i] != nullptr; i++) { 294 | work_thread->index_mapped_flush_end_chunkID++; 295 | work_thread->index_file_size += INDEX_MAPPED_BLOCK_ALIGNED_SIZE; 296 | ftruncate(work_thread->index_file_fd, work_thread->index_file_size); 297 | 298 | pwrite(work_thread->index_file_fd, work_thread->index_file_memory_block[i], 299 | INDEX_MAPPED_BLOCK_ALIGNED_SIZE, INDEX_MAPPED_BLOCK_ALIGNED_SIZE * i); 300 | free(work_thread->index_file_memory_block[i]); 301 | } 302 | fsync(work_thread->index_file_fd); 303 | } 304 | } -------------------------------------------------------------------------------- /src/utils/serialization.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "serialization.h" 5 | 6 | 7 | #include "../dependencies/fast_base64/fastavxbase64.h" 8 | #include "log.h" 9 | 10 | int serialize_base64_decoding(uint8_t *message, uint16_t len, uint8_t *serialized) { 11 | auto serialize_len = len - FIXED_PART_LEN; 12 | int padding_chars = (4 - serialize_len % 4) % 4; 13 | uint8_t *buf = message; 14 | 15 | size_t estimated_length = 3 * (serialize_len / 4 + (serialize_len % 4 == 0 ? 0 : 1)); 16 | memcpy(serialized + estimated_length, message + serialize_len, FIXED_PART_LEN); 17 | // attention: add padding chars, assume following chars enough >= 3 18 | memcpy(message + serialize_len, "BLINK", padding_chars); 19 | 20 | #ifdef __AVX2__ 21 | fast_avx2_base64_decode(reinterpret_cast(serialized), 22 | reinterpret_cast(buf), 23 | serialize_len + padding_chars); 24 | #else 25 | 26 | chromium_base64_decode(reinterpret_cast(serialized), 27 | reinterpret_cast(buf), 28 | serialize_len + padding_chars); 29 | #endif 30 | serialized[estimated_length + FIXED_PART_LEN] = padding_chars; 31 | return estimated_length + FIXED_PART_LEN + 1; 32 | } 33 | 34 | uint8_t *deserialize_base64_encoding(const uint8_t *serialized, uint16_t total_serialized_len, int &len) { 35 | auto serialize_len = total_serialized_len - FIXED_PART_LEN - 1; 36 | auto *deserialized = new uint8_t[total_serialized_len / 3 * 4 + 16]; 37 | 38 | #ifdef __AVX2__ 39 | size_t length = fast_avx2_base64_encode(reinterpret_cast(deserialized), 40 | reinterpret_cast(serialized), serialize_len); 41 | #else 42 | size_t length = chromium_base64_encode(reinterpret_cast(deserialized), 43 | reinterpret_cast(serialized), serialize_len); 44 | 45 | #endif 46 | memcpy(deserialized + length - serialized[total_serialized_len - 1], serialized + serialize_len, FIXED_PART_LEN); 47 | len = length - serialized[total_serialized_len - 1] + FIXED_PART_LEN; 48 | return deserialized; 49 | } 50 | 51 | // Skip index ================================================================================================= 52 | int serialize_base64_decoding_skip_index(uint8_t *message, uint16_t len, uint8_t *serialized) { 53 | auto serialize_len = len - FIXED_PART_LEN; 54 | int padding_chars = (4 - serialize_len % 4) % 4; 55 | uint8_t *buf = message; 56 | 57 | size_t estimated_length = 3 * (serialize_len / 4 + (serialize_len % 4 == 0 ? 0 : 1)); 58 | memcpy(serialized + estimated_length, message + serialize_len, BASE64_INFO_LEN); 59 | memcpy(serialized + estimated_length + BASE64_INFO_LEN, message + serialize_len + BASE64_INFO_LEN + INDEX_LEN, 60 | VARYING_VERIFY_LEN); 61 | // attention: add padding chars, assume following chars enough >= 3 62 | memcpy(message + serialize_len, "BLINK", padding_chars); 63 | 64 | #ifdef __AVX2__ 65 | fast_avx2_base64_decode(reinterpret_cast(serialized), 66 | reinterpret_cast(buf), 67 | serialize_len + padding_chars); 68 | #else 69 | 70 | chromium_base64_decode(reinterpret_cast(serialized), 71 | reinterpret_cast(buf), 72 | serialize_len + padding_chars); 73 | #endif 74 | serialized[estimated_length + FIXED_PART_LEN - INDEX_LEN] = padding_chars; 75 | return estimated_length + FIXED_PART_LEN - INDEX_LEN + 1; 76 | } 77 | 78 | uint8_t *deserialize_base64_encoding_add_index(const uint8_t *serialized, uint16_t total_serialized_len, 79 | int &deserialized_len, int32_t idx) { 80 | auto serialize_len = total_serialized_len - (FIXED_PART_LEN - INDEX_LEN) - 1; 81 | auto *deserialized = new uint8_t[total_serialized_len / 3 * 4 + 16]; 82 | 83 | #ifdef __AVX2__ 84 | size_t length = fast_avx2_base64_encode(reinterpret_cast(deserialized), 85 | reinterpret_cast(serialized), serialize_len); 86 | #else 87 | size_t length = chromium_base64_encode(reinterpret_cast(deserialized), 88 | reinterpret_cast(serialized), serialize_len); 89 | 90 | #endif 91 | size_t offset = length - serialized[total_serialized_len - 1]; 92 | memcpy(deserialized + offset, serialized + serialize_len, BASE64_INFO_LEN); 93 | offset += BASE64_INFO_LEN; 94 | memcpy(deserialized + offset, &idx, sizeof(int32_t)); 95 | offset += INDEX_LEN; 96 | memcpy(deserialized + offset, serialized + serialize_len + BASE64_INFO_LEN, VARYING_VERIFY_LEN); 97 | 98 | deserialized_len = length - serialized[total_serialized_len - 1] + FIXED_PART_LEN; 99 | return deserialized; 100 | } 101 | 102 | void deserialize_base64_encoding_add_index_in_place(const uint8_t *serialized, uint16_t total_serialized_len, 103 | uint8_t *deserialized, int &deserialized_len, int32_t idx) { 104 | auto serialize_len = total_serialized_len - (FIXED_PART_LEN - INDEX_LEN) - 1; 105 | 106 | #ifdef __AVX2__ 107 | size_t length = fast_avx2_base64_encode(reinterpret_cast(deserialized), 108 | reinterpret_cast(serialized), serialize_len); 109 | #else 110 | size_t length = chromium_base64_encode(reinterpret_cast(deserialized), 111 | reinterpret_cast(serialized), serialize_len); 112 | 113 | #endif 114 | size_t offset = length - serialized[total_serialized_len - 1]; 115 | memcpy(deserialized + offset, serialized + serialize_len, BASE64_INFO_LEN); 116 | offset += BASE64_INFO_LEN; 117 | memcpy(deserialized + offset, &idx, sizeof(int32_t)); 118 | offset += INDEX_LEN; 119 | memcpy(deserialized + offset, serialized + serialize_len + BASE64_INFO_LEN, VARYING_VERIFY_LEN); 120 | 121 | deserialized_len = length - serialized[total_serialized_len - 1] + FIXED_PART_LEN; 122 | } 123 | // End of Skip index ======================================================================================== 124 | 125 | // ------------------------------- Begin of Base36 ------------------------------------------------------------- 126 | int serialize_base36_decoding_skip_index(uint8_t *message, uint16_t len, uint8_t *serialized) { 127 | auto serialize_len = len - FIXED_PART_LEN; 128 | int padding_chars = (4 - serialize_len % 4) % 4; 129 | uint8_t *buf = message; 130 | 131 | size_t estimated_length = 3 * (serialize_len / 4 + (serialize_len % 4 == 0 ? 0 : 1)); 132 | memcpy(serialized + estimated_length, message + serialize_len, BASE64_INFO_LEN); 133 | memcpy(serialized + estimated_length + BASE64_INFO_LEN, message + serialize_len + BASE64_INFO_LEN + INDEX_LEN, 134 | VARYING_VERIFY_LEN); 135 | // attention: add padding chars, assume following chars enough >= 3 136 | memcpy(message + serialize_len, "BLINK", padding_chars); 137 | 138 | #ifdef __AVX2__ 139 | fast_avx2_base64_decode(reinterpret_cast(serialized), 140 | reinterpret_cast(buf), 141 | serialize_len + padding_chars); 142 | #else 143 | 144 | chromium_base64_decode(reinterpret_cast(serialized), 145 | reinterpret_cast(buf), 146 | serialize_len + padding_chars); 147 | #endif 148 | return estimated_length + FIXED_PART_LEN - INDEX_LEN; 149 | } 150 | 151 | uint8_t *deserialize_base36_encoding_add_index(const uint8_t *serialized, uint16_t total_serialized_len, 152 | int &deserialized_len, int32_t idx) { 153 | auto serialize_len = total_serialized_len - (FIXED_PART_LEN - INDEX_LEN); 154 | auto *deserialized = new uint8_t[total_serialized_len / 3 * 4 + 16]; 155 | // 1st: deserialize preparation: base64 encoding 156 | #ifdef __AVX2__ 157 | size_t length = fast_avx2_base64_encode(reinterpret_cast(deserialized), 158 | reinterpret_cast(serialized), serialize_len); 159 | #else 160 | size_t length = chromium_base64_encode(reinterpret_cast(deserialized), 161 | reinterpret_cast(serialized), serialize_len); 162 | 163 | #endif 164 | // 2nd: skip padding (padding could be 'A'-'Z', '+', '/', '=') 165 | for (; deserialized[length - 1] >= 'A' && deserialized[length - 1] <= 'Z' && length >= 0; length--) {} 166 | 167 | // 3rd: append other info 168 | size_t offset = length; 169 | memcpy(deserialized + offset, serialized + serialize_len, BASE64_INFO_LEN); 170 | offset += BASE64_INFO_LEN; 171 | memcpy(deserialized + offset, &idx, sizeof(int32_t)); 172 | offset += INDEX_LEN; 173 | memcpy(deserialized + offset, serialized + serialize_len + BASE64_INFO_LEN, VARYING_VERIFY_LEN); 174 | 175 | // 4th: assign the correct length 176 | deserialized_len = length + FIXED_PART_LEN; 177 | return deserialized; 178 | } 179 | // ------------------------------ End of Base36, do not support A-Z yet -------------------------------------------- 180 | 181 | int serialize(uint8_t *message, uint16_t len, uint8_t *serialized) { 182 | // add the header to indicate raw message varying-length part size 183 | int serialize_len = len - FIXED_PART_LEN; 184 | if (len < 128) { 185 | serialized[0] = static_cast(len - FIXED_PART_LEN); 186 | serialized += 1; 187 | } else { 188 | uint16_t tmp = (len - FIXED_PART_LEN); 189 | serialized[0] = static_cast((tmp >> 7u) | 0x80); // assume < 32767 190 | serialized[1] = static_cast(tmp & (uint8_t) 0x7f); 191 | serialized += 2; 192 | } 193 | uint32_t next_extra_3bits_idx = 5u * serialize_len; 194 | uint32_t next_5bits_idx = 0; 195 | 196 | // attention: message is not usable later 197 | for (int i = 0; i < serialize_len; i++) { 198 | message[i] = message[i] >= 'a' ? message[i] - 'a' : message[i] - '0' + (uint8_t) 26; 199 | } 200 | // attention: must clear to be correct 201 | memset(serialized, 0, (len - FIXED_PART_LEN)); 202 | // 1) construct the compressed part 203 | for (int i = 0; i < serialize_len; i++) { 204 | uint16_t cur_uchar = message[i]; 205 | uint16_t expand_uchar = cur_uchar < MAX_FIVE_BITS_INT ? (cur_uchar << 11u) : (MAX_FIVE_BITS_INT << 11u); 206 | 207 | int shift_bits = (next_5bits_idx & 0x7u); 208 | expand_uchar >>= shift_bits; 209 | int idx = (next_5bits_idx >> 3u); 210 | serialized[idx] |= (expand_uchar >> 8u); 211 | serialized[idx + 1] |= (expand_uchar & 0xffu); 212 | next_5bits_idx += 5; 213 | 214 | if (cur_uchar >= MAX_FIVE_BITS_INT) { 215 | // do extra bits operations 216 | expand_uchar = ((cur_uchar - MAX_FIVE_BITS_INT) << 13u); 217 | shift_bits = (next_extra_3bits_idx & 0x7u); 218 | expand_uchar >>= shift_bits; 219 | // assume little-endian 220 | idx = (next_extra_3bits_idx >> 3u); 221 | serialized[idx] |= (expand_uchar >> 8u); 222 | serialized[idx + 1] |= (expand_uchar & 0xffu); 223 | next_extra_3bits_idx += 3; 224 | } 225 | } 226 | 227 | // 2) left FIXED_PART_LEN, should use memcpy 228 | int start_copy_byte_idx = (next_extra_3bits_idx >> 3u) + ((next_extra_3bits_idx & 0x7u) != 0); 229 | memcpy(serialized + start_copy_byte_idx, message + serialize_len, FIXED_PART_LEN); 230 | return start_copy_byte_idx + FIXED_PART_LEN + (len < 128 ? 1 : 2); 231 | } 232 | 233 | uint8_t *deserialize(const uint8_t *serialized, int &len) { 234 | // get the length of varying part 235 | uint16_t varying_byte_len; 236 | if ((serialized[0] & 0x80u) == 0) { 237 | varying_byte_len = serialized[0]; 238 | serialized += 1; 239 | } else { 240 | varying_byte_len = static_cast(((serialized[0] & 0x7fu) << 7u) + serialized[1]); 241 | serialized += 2; 242 | } 243 | uint32_t next_extra_3bits_idx = 5u * varying_byte_len; 244 | uint32_t next_5bits_idx = 0; 245 | 246 | auto *deserialized = new uint8_t[varying_byte_len + 8]; 247 | len = varying_byte_len + FIXED_PART_LEN; 248 | // deserialize 249 | for (int i = 0; i < varying_byte_len; i++) { 250 | int idx = (next_5bits_idx >> 3u); 251 | uint16_t value = (serialized[idx] << 8u) + serialized[idx + 1]; 252 | value = (value >> (11u - (next_5bits_idx & 07u))) & MAX_FIVE_BITS_INT; 253 | if (value != MAX_FIVE_BITS_INT) { 254 | deserialized[i] = static_cast(value < 26 ? 'a' + value : value - 26 + '0'); 255 | } else { 256 | idx = (next_extra_3bits_idx >> 3u); 257 | value = (serialized[idx] << 8u) + serialized[idx + 1]; 258 | value = (value >> (13u - (next_extra_3bits_idx & 07u))) & (uint8_t) 0x7; 259 | deserialized[i] = value + '5'; 260 | next_extra_3bits_idx += 3; 261 | } 262 | next_5bits_idx += 5; 263 | } 264 | 265 | // 2) copy the fixed part 266 | memcpy(deserialized + varying_byte_len, 267 | serialized + (next_extra_3bits_idx >> 3u) + ((next_extra_3bits_idx & 0x7u) != 0), FIXED_PART_LEN); 268 | return deserialized; 269 | } -------------------------------------------------------------------------------- /cmake/FindTBB.cmake: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | # 3 | # Copyright (c) 2015 Justus Calvin 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # 24 | # FindTBB 25 | # ------- 26 | # 27 | # Find TBB include directories and libraries. 28 | # 29 | # Usage: 30 | # 31 | # find_package(TBB [major[.minor]] [EXACT] 32 | # [QUIET] [REQUIRED] 33 | # [[COMPONENTS] [components...]] 34 | # [OPTIONAL_COMPONENTS components...]) 35 | # 36 | # where the allowed components are tbbmalloc and tbb_preview. Users may modify 37 | # the behavior of this module with the following variables: 38 | # 39 | # * TBB_ROOT_DIR - The base directory the of TBB installation. 40 | # * TBB_INCLUDE_DIR - The directory that contains the TBB headers files. 41 | # * TBB_LIBRARY - The directory that contains the TBB library files. 42 | # * TBB__LIBRARY - The path of the TBB the corresponding TBB library. 43 | # These libraries, if specified, override the 44 | # corresponding library search results, where 45 | # may be tbb, tbb_debug, tbbmalloc, tbbmalloc_debug, 46 | # tbb_preview, or tbb_preview_debug. 47 | # * TBB_USE_DEBUG_BUILD - The debug version of tbb libraries, if present, will 48 | # be used instead of the release version. 49 | # 50 | # Users may modify the behavior of this module with the following environment 51 | # variables: 52 | # 53 | # * TBB_INSTALL_DIR 54 | # * TBBROOT 55 | # * LIBRARY_PATH 56 | # 57 | # This module will set the following variables: 58 | # 59 | # * TBB_FOUND - Set to false, or undefined, if we haven’t found, or 60 | # don’t want to use TBB. 61 | # * TBB__FOUND - If False, optional part of TBB sytem is 62 | # not available. 63 | # * TBB_VERSION - The full version string 64 | # * TBB_VERSION_MAJOR - The major version 65 | # * TBB_VERSION_MINOR - The minor version 66 | # * TBB_INTERFACE_VERSION - The interface version number defined in 67 | # tbb/tbb_stddef.h. 68 | # * TBB__LIBRARY_RELEASE - The path of the TBB release version of 69 | # , where may be tbb, tbb_debug, 70 | # tbbmalloc, tbbmalloc_debug, tbb_preview, or 71 | # tbb_preview_debug. 72 | # * TBB__LIBRARY_DEGUG - The path of the TBB release version of 73 | # , where may be tbb, tbb_debug, 74 | # tbbmalloc, tbbmalloc_debug, tbb_preview, or 75 | # tbb_preview_debug. 76 | # 77 | # The following varibles should be used to build and link with TBB: 78 | # 79 | # * TBB_INCLUDE_DIRS - The include directory for TBB. 80 | # * TBB_LIBRARIES - The libraries to link against to use TBB. 81 | # * TBB_LIBRARIES_RELEASE - The release libraries to link against to use TBB. 82 | # * TBB_LIBRARIES_DEBUG - The debug libraries to link against to use TBB. 83 | # * TBB_DEFINITIONS - Definitions to use when compiling code that uses 84 | # TBB. 85 | # * TBB_DEFINITIONS_RELEASE - Definitions to use when compiling release code that 86 | # uses TBB. 87 | # * TBB_DEFINITIONS_DEBUG - Definitions to use when compiling debug code that 88 | # uses TBB. 89 | # 90 | # This module will also create the "tbb" target that may be used when building 91 | # executables and libraries. 92 | 93 | include(FindPackageHandleStandardArgs) 94 | 95 | if (NOT TBB_FOUND) 96 | 97 | ################################## 98 | # Check the build type 99 | ################################## 100 | 101 | if (NOT DEFINED TBB_USE_DEBUG_BUILD) 102 | if (CMAKE_BUILD_TYPE MATCHES "(Debug|DEBUG|debug|RelWithDebInfo|RELWITHDEBINFO|relwithdebinfo)") 103 | set(TBB_BUILD_TYPE DEBUG) 104 | else () 105 | set(TBB_BUILD_TYPE RELEASE) 106 | endif () 107 | elseif (TBB_USE_DEBUG_BUILD) 108 | set(TBB_BUILD_TYPE DEBUG) 109 | else () 110 | set(TBB_BUILD_TYPE RELEASE) 111 | endif () 112 | 113 | ################################## 114 | # Set the TBB search directories 115 | ################################## 116 | 117 | # Define search paths based on user input and environment variables 118 | set(TBB_SEARCH_DIR ${TBB_ROOT_DIR} $ENV{TBB_INSTALL_DIR} $ENV{TBBROOT}) 119 | 120 | # Define the search directories based on the current platform 121 | if (CMAKE_SYSTEM_NAME STREQUAL "Windows") 122 | set(TBB_DEFAULT_SEARCH_DIR "C:/Program Files/Intel/TBB" 123 | "C:/Program Files (x86)/Intel/TBB") 124 | 125 | # Set the target architecture 126 | if (CMAKE_SIZEOF_VOID_P EQUAL 8) 127 | set(TBB_ARCHITECTURE "intel64") 128 | else () 129 | set(TBB_ARCHITECTURE "ia32") 130 | endif () 131 | 132 | # Set the TBB search library path search suffix based on the version of VC 133 | if (WINDOWS_STORE) 134 | set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc11_ui") 135 | elseif (MSVC14) 136 | set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc14") 137 | elseif (MSVC12) 138 | set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc12") 139 | elseif (MSVC11) 140 | set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc11") 141 | elseif (MSVC10) 142 | set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc10") 143 | endif () 144 | 145 | # Add the library path search suffix for the VC independent version of TBB 146 | list(APPEND TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc_mt") 147 | 148 | elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin") 149 | # OS X 150 | set(TBB_DEFAULT_SEARCH_DIR "/opt/intel/tbb") 151 | 152 | # TODO: Check to see which C++ library is being used by the compiler. 153 | if (NOT ${CMAKE_SYSTEM_VERSION} VERSION_LESS 13.0) 154 | # The default C++ library on OS X 10.9 and later is libc++ 155 | set(TBB_LIB_PATH_SUFFIX "lib/libc++" "lib") 156 | else () 157 | set(TBB_LIB_PATH_SUFFIX "lib") 158 | endif () 159 | elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux") 160 | # Linux 161 | set(TBB_DEFAULT_SEARCH_DIR "/opt/intel/tbb") 162 | 163 | # TODO: Check compiler version to see the suffix should be /gcc4.1 or 164 | # /gcc4.1. For now, assume that the compiler is more recent than 165 | # gcc 4.4.x or later. 166 | if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") 167 | set(TBB_LIB_PATH_SUFFIX "lib/intel64/gcc4.4") 168 | elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$") 169 | set(TBB_LIB_PATH_SUFFIX "lib/ia32/gcc4.4") 170 | endif () 171 | endif () 172 | 173 | ################################## 174 | # Find the TBB include dir 175 | ################################## 176 | 177 | find_path(TBB_INCLUDE_DIRS tbb/tbb.h 178 | HINTS ${TBB_INCLUDE_DIR} ${TBB_SEARCH_DIR} 179 | PATHS ${TBB_DEFAULT_SEARCH_DIR} 180 | PATH_SUFFIXES include) 181 | 182 | ################################## 183 | # Set version strings 184 | ################################## 185 | 186 | if (TBB_INCLUDE_DIRS) 187 | file(READ "${TBB_INCLUDE_DIRS}/tbb/tbb_stddef.h" _tbb_version_file) 188 | string(REGEX REPLACE ".*#define TBB_VERSION_MAJOR ([0-9]+).*" "\\1" 189 | TBB_VERSION_MAJOR "${_tbb_version_file}") 190 | string(REGEX REPLACE ".*#define TBB_VERSION_MINOR ([0-9]+).*" "\\1" 191 | TBB_VERSION_MINOR "${_tbb_version_file}") 192 | string(REGEX REPLACE ".*#define TBB_INTERFACE_VERSION ([0-9]+).*" "\\1" 193 | TBB_INTERFACE_VERSION "${_tbb_version_file}") 194 | set(TBB_VERSION "${TBB_VERSION_MAJOR}.${TBB_VERSION_MINOR}") 195 | endif () 196 | 197 | ################################## 198 | # Find TBB components 199 | ################################## 200 | 201 | if (TBB_VERSION VERSION_LESS 4.3) 202 | set(TBB_SEARCH_COMPOMPONENTS tbb_preview tbbmalloc tbb) 203 | else () 204 | set(TBB_SEARCH_COMPOMPONENTS tbb_preview tbbmalloc_proxy tbbmalloc tbb) 205 | endif () 206 | 207 | # Find each component 208 | foreach (_comp ${TBB_SEARCH_COMPOMPONENTS}) 209 | if (";${TBB_FIND_COMPONENTS};tbb;" MATCHES ";${_comp};") 210 | 211 | # Search for the libraries 212 | find_library(TBB_${_comp}_LIBRARY_RELEASE ${_comp} 213 | HINTS ${TBB_LIBRARY} ${TBB_SEARCH_DIR} 214 | PATHS ${TBB_DEFAULT_SEARCH_DIR} ENV LIBRARY_PATH 215 | PATH_SUFFIXES ${TBB_LIB_PATH_SUFFIX}) 216 | 217 | find_library(TBB_${_comp}_LIBRARY_DEBUG ${_comp}_debug 218 | HINTS ${TBB_LIBRARY} ${TBB_SEARCH_DIR} 219 | PATHS ${TBB_DEFAULT_SEARCH_DIR} ENV LIBRARY_PATH 220 | PATH_SUFFIXES ${TBB_LIB_PATH_SUFFIX}) 221 | 222 | if (TBB_${_comp}_LIBRARY_DEBUG) 223 | list(APPEND TBB_LIBRARIES_DEBUG "${TBB_${_comp}_LIBRARY_DEBUG}") 224 | endif () 225 | if (TBB_${_comp}_LIBRARY_RELEASE) 226 | list(APPEND TBB_LIBRARIES_RELEASE "${TBB_${_comp}_LIBRARY_RELEASE}") 227 | endif () 228 | if (TBB_${_comp}_LIBRARY_${TBB_BUILD_TYPE} AND NOT TBB_${_comp}_LIBRARY) 229 | set(TBB_${_comp}_LIBRARY "${TBB_${_comp}_LIBRARY_${TBB_BUILD_TYPE}}") 230 | endif () 231 | 232 | if (TBB_${_comp}_LIBRARY AND EXISTS "${TBB_${_comp}_LIBRARY}") 233 | set(TBB_${_comp}_FOUND TRUE) 234 | else () 235 | set(TBB_${_comp}_FOUND FALSE) 236 | endif () 237 | 238 | # Mark internal variables as advanced 239 | mark_as_advanced(TBB_${_comp}_LIBRARY_RELEASE) 240 | mark_as_advanced(TBB_${_comp}_LIBRARY_DEBUG) 241 | mark_as_advanced(TBB_${_comp}_LIBRARY) 242 | 243 | endif () 244 | endforeach () 245 | 246 | ################################## 247 | # Set compile flags and libraries 248 | ################################## 249 | 250 | set(TBB_DEFINITIONS_RELEASE "") 251 | set(TBB_DEFINITIONS_DEBUG "-DTBB_USE_DEBUG=1") 252 | 253 | if (TBB_LIBRARIES_${TBB_BUILD_TYPE}) 254 | set(TBB_DEFINITIONS "${TBB_DEFINITIONS_${TBB_BUILD_TYPE}}") 255 | set(TBB_LIBRARIES "${TBB_LIBRARIES_${TBB_BUILD_TYPE}}") 256 | elseif (TBB_LIBRARIES_RELEASE) 257 | set(TBB_DEFINITIONS "${TBB_DEFINITIONS_RELEASE}") 258 | set(TBB_LIBRARIES "${TBB_LIBRARIES_RELEASE}") 259 | elseif (TBB_LIBRARIES_DEBUG) 260 | set(TBB_DEFINITIONS "${TBB_DEFINITIONS_DEBUG}") 261 | set(TBB_LIBRARIES "${TBB_LIBRARIES_DEBUG}") 262 | endif () 263 | 264 | find_package_handle_standard_args(TBB 265 | REQUIRED_VARS TBB_INCLUDE_DIRS TBB_LIBRARIES 266 | HANDLE_COMPONENTS 267 | VERSION_VAR TBB_VERSION) 268 | 269 | ################################## 270 | # Create targets 271 | ################################## 272 | 273 | if (NOT CMAKE_VERSION VERSION_LESS 3.0 AND TBB_FOUND) 274 | add_library(tbb SHARED IMPORTED) 275 | set_target_properties(tbb PROPERTIES 276 | INTERFACE_INCLUDE_DIRECTORIES ${TBB_INCLUDE_DIRS} 277 | IMPORTED_LOCATION ${TBB_LIBRARIES}) 278 | if (TBB_LIBRARIES_RELEASE AND TBB_LIBRARIES_DEBUG) 279 | set_target_properties(tbb PROPERTIES 280 | INTERFACE_COMPILE_DEFINITIONS "$<$,$>:TBB_USE_DEBUG=1>" 281 | IMPORTED_LOCATION_DEBUG ${TBB_LIBRARIES_DEBUG} 282 | IMPORTED_LOCATION_RELWITHDEBINFO ${TBB_LIBRARIES_DEBUG} 283 | IMPORTED_LOCATION_RELEASE ${TBB_LIBRARIES_RELEASE} 284 | IMPORTED_LOCATION_MINSIZEREL ${TBB_LIBRARIES_RELEASE} 285 | ) 286 | elseif (TBB_LIBRARIES_RELEASE) 287 | set_target_properties(tbb PROPERTIES IMPORTED_LOCATION ${TBB_LIBRARIES_RELEASE}) 288 | else () 289 | set_target_properties(tbb PROPERTIES 290 | INTERFACE_COMPILE_DEFINITIONS "${TBB_DEFINITIONS_DEBUG}" 291 | IMPORTED_LOCATION ${TBB_LIBRARIES_DEBUG} 292 | ) 293 | endif () 294 | endif () 295 | 296 | mark_as_advanced(TBB_INCLUDE_DIRS TBB_LIBRARIES) 297 | 298 | unset(TBB_ARCHITECTURE) 299 | unset(TBB_BUILD_TYPE) 300 | unset(TBB_LIB_PATH_SUFFIX) 301 | unset(TBB_DEFAULT_SEARCH_DIR) 302 | 303 | endif () -------------------------------------------------------------------------------- /src/queue_store.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "io/asyncfileio.hpp" 5 | #include "../include/queue_store.h" 6 | #include "utils/serialization.h" 7 | #include "io/ConcurrentSet.hpp" 8 | 9 | 10 | using namespace race2018; 11 | using std::move; 12 | 13 | atomic tidCounter(-1); 14 | 15 | void print_version() { 16 | printf("VERSION: %s\n", GIT_SHA1); 17 | } 18 | 19 | int64_t getQueueID(const std::string &queue_name) { 20 | long res = 0; 21 | long multiplier = 1; 22 | for (int64_t i = queue_name.length() - 1; i >= 0 && queue_name[i] >= '0' && queue_name[i] <= '9'; i--) { 23 | res += (queue_name[i] - '0') * multiplier; 24 | multiplier *= 10; 25 | } 26 | return res; 27 | } 28 | 29 | void queue_store::put(const std::string &queue_name, const MemBlock &message) { 30 | // 1st: queue id 31 | auto queueID = static_cast(getQueueID(queue_name)); 32 | auto thread_id = static_cast(queueID % IO_THREAD); 33 | asyncfileio_thread_t *ioThread = asyncfileio->work_threads_object[thread_id]; 34 | 35 | uint64_t which_queue_in_this_io_thread = queueID / IO_THREAD; 36 | uint64_t queue_offset = ioThread->queue_counter[which_queue_in_this_io_thread]++; 37 | uint64_t chunk_id = ((queue_offset / CLUSTER_SIZE) * (CLUSTER_SIZE * QUEUE_NUM_PER_IO_THREAD) + 38 | (which_queue_in_this_io_thread * CLUSTER_SIZE) + 39 | queue_offset % CLUSTER_SIZE); 40 | uint64_t idx_file_offset = INDEX_ENTRY_SIZE * chunk_id; 41 | 42 | int which_mapped_chunk = static_cast(idx_file_offset / INDEX_MAPPED_BLOCK_RAW_SIZE); 43 | 44 | uint64_t offset_in_mapped_chunk = idx_file_offset % INDEX_MAPPED_BLOCK_RAW_SIZE; 45 | 46 | if (ioThread->index_file_memory_block[which_mapped_chunk] == nullptr) { 47 | std::unique_lock lock(ioThread->mapped_block_mtx[which_mapped_chunk]); 48 | ioThread->mapped_block_cond[which_mapped_chunk].wait(lock, [ioThread, which_mapped_chunk]() -> bool { 49 | return ioThread->index_file_memory_block[which_mapped_chunk] != nullptr; 50 | }); 51 | 52 | } 53 | char *buf = ioThread->index_file_memory_block[which_mapped_chunk] + offset_in_mapped_chunk; 54 | 55 | if (message.size <= RAW_NORMAL_MESSAGE_SIZE) { 56 | serialize_base36_decoding_skip_index((uint8_t *) message.ptr, message.size, 57 | (uint8_t *) buf); 58 | } else { 59 | unsigned char large_msg_buf[4096]; 60 | uint64_t length = (uint64_t) serialize_base36_decoding_skip_index((uint8_t *) message.ptr, message.size, 61 | (uint8_t *) large_msg_buf); 62 | uint64_t offset = ioThread->data_file_current_size.fetch_add(length); 63 | pwrite(ioThread->data_file_fd, large_msg_buf, length, offset); 64 | 65 | // index info 66 | buf[0] = LARGE_MESSAGE_MAGIC_CHAR; 67 | memcpy(buf + 4, &offset, 8); 68 | memcpy(buf + 12, &length, 8); 69 | } 70 | 71 | delete[] ((char *) (message.ptr)); 72 | 73 | int write_times = ++(ioThread->index_mapped_block_write_counter[which_mapped_chunk]); 74 | if (write_times == INDEX_BLOCK_WRITE_TIMES_TO_FULL) { 75 | asyncio_task_t *task = new asyncio_task_t(0); 76 | ioThread->blockingQueue->put(task); 77 | } 78 | } 79 | 80 | vector queue_store::get(const std::string &queue_name, long offset, long number) { 81 | static thread_local int tid = ++tidCounter; 82 | if (tid < CHECK_THREAD_NUM) { 83 | return doPhase2(tid, queue_name, offset, number); 84 | } 85 | return doPhase3(tid, queue_name, offset, number); 86 | } 87 | 88 | std::vector queue_store::doPhase2(int tid, const std::string &queue_name, long offset, long number) { 89 | asyncfileio->waitFinishIO(tid); 90 | 91 | static thread_local vector result; 92 | 93 | result.clear(); 94 | auto queueID = static_cast(getQueueID(queue_name)); 95 | int threadID = queueID % IO_THREAD; 96 | asyncfileio_thread_t *asyncfileio_thread = asyncfileio->work_threads_object[threadID]; 97 | uint32_t which_queue_in_this_io_thread = queueID / IO_THREAD; 98 | 99 | auto max_offset = std::min(static_cast(offset + number), 100 | asyncfileio_thread->queue_counter[which_queue_in_this_io_thread]); 101 | 102 | uint64_t chunk_offset = (which_queue_in_this_io_thread * CLUSTER_SIZE); 103 | 104 | static thread_local unsigned char *index_record = (unsigned char *) memalign(FILESYSTEM_BLOCK_SIZE, 105 | (INDEX_ENTRY_SIZE * CLUSTER_SIZE) + 106 | FILESYSTEM_BLOCK_SIZE); 107 | for (auto queue_offset = static_cast(offset); queue_offset < max_offset;) { 108 | uint64_t chunk_id = ((queue_offset / CLUSTER_SIZE) * (CLUSTER_SIZE * QUEUE_NUM_PER_IO_THREAD) + chunk_offset + 109 | queue_offset % CLUSTER_SIZE); 110 | 111 | auto remaining_num = static_cast(CLUSTER_SIZE - queue_offset % CLUSTER_SIZE); // >= 1 112 | if (max_offset - queue_offset < remaining_num) { 113 | remaining_num = static_cast(max_offset - queue_offset); 114 | } 115 | uint64_t idx_file_offset = INDEX_ENTRY_SIZE * chunk_id; 116 | idx_file_offset = (idx_file_offset / INDEX_MAPPED_BLOCK_RAW_SIZE * INDEX_MAPPED_BLOCK_ALIGNED_SIZE) + 117 | (idx_file_offset % INDEX_MAPPED_BLOCK_RAW_SIZE); 118 | 119 | uint64_t idx_file_offset_aligned_start = (idx_file_offset / FILESYSTEM_BLOCK_SIZE * FILESYSTEM_BLOCK_SIZE); 120 | 121 | size_t which_mapped_chunk = idx_file_offset_aligned_start / INDEX_MAPPED_BLOCK_ALIGNED_SIZE; 122 | if (which_mapped_chunk < asyncfileio_thread->index_mapped_flush_start_chunkID) { 123 | pread(asyncfileio->index_fds[queueID % IO_THREAD], index_record, 124 | ((INDEX_ENTRY_SIZE * remaining_num + (idx_file_offset - idx_file_offset_aligned_start)) / 125 | FILESYSTEM_BLOCK_SIZE + 1) * FILESYSTEM_BLOCK_SIZE, 126 | idx_file_offset_aligned_start); 127 | } else { 128 | memcpy(index_record, 129 | asyncfileio_thread->index_file_memory_block[which_mapped_chunk] + 130 | (idx_file_offset_aligned_start % INDEX_MAPPED_BLOCK_ALIGNED_SIZE), 131 | (INDEX_ENTRY_SIZE * remaining_num) + (idx_file_offset - idx_file_offset_aligned_start)); 132 | } 133 | 134 | for (uint32_t i = 0; i < remaining_num; i++) { 135 | char *output_buf = nullptr; 136 | int output_length; 137 | unsigned char *serialized = 138 | index_record + INDEX_ENTRY_SIZE * i + idx_file_offset - idx_file_offset_aligned_start; 139 | if ((serialized[0] & 0xff) >> 2 != LARGE_MESSAGE_MAGIC_CHAR) { 140 | output_buf = (char *) deserialize_base36_encoding_add_index(serialized, INDEX_ENTRY_SIZE, 141 | output_length, queue_offset + i); 142 | } else { 143 | log_info("big msg"); 144 | size_t large_msg_size; 145 | size_t large_msg_offset; 146 | memcpy(&large_msg_offset, serialized + 4, 8); 147 | memcpy(&large_msg_size, serialized + 12, 8); 148 | unsigned char large_msg_buf[4096]; 149 | pread(asyncfileio->data_fds[queueID % IO_THREAD], large_msg_buf, large_msg_size, 150 | large_msg_offset); 151 | output_buf = (char *) deserialize_base36_encoding_add_index((uint8_t *) large_msg_buf, large_msg_size, 152 | output_length, queue_offset + i); 153 | } 154 | result.emplace_back(output_buf, (size_t) output_length); 155 | } 156 | queue_offset += remaining_num; 157 | } 158 | 159 | return result; 160 | } 161 | 162 | volatile bool startedReaderThreadFlag = false; 163 | 164 | std::vector queue_store::doPhase3(int tid, const std::string &queue_name, long offset, long number) { 165 | auto queueID = static_cast(getQueueID(queue_name)); 166 | static thread_local vector result; 167 | 168 | result.clear(); 169 | 170 | int threadID = queueID % IO_THREAD; 171 | asyncfileio_thread_t *asyncfileio_thread = asyncfileio->work_threads_object[threadID]; 172 | uint32_t which_queue_in_this_io_thread = queueID / IO_THREAD; 173 | 174 | size_t max_queue_offset = asyncfileio_thread->queue_counter[which_queue_in_this_io_thread]; 175 | size_t max_result_num = 10 < (max_queue_offset - offset) ? 10 : (max_queue_offset - offset); 176 | 177 | if (offset == 0 && !startedReaderThreadFlag) { 178 | barrier1->Wait([this] { 179 | for (int i = 0; i < IO_THREAD; i++) { 180 | // for (size_t chunkID = asyncfileio->work_threads_object[i]->index_mapped_flush_start_chunkID; 181 | // chunkID < asyncfileio->work_threads_object[i]->index_mapped_flush_end_chunkID; chunkID++) { 182 | // //free(asyncfileio->work_threads_object[i]->index_file_memory_block[chunkID]); 183 | // log_debug("free thread %d chunk id %ld", i, chunkID); 184 | // } 185 | //MallocExtension::instance()->ReleaseFreeMemory(); 186 | close(asyncfileio->index_fds[i]); 187 | string tmp_str = asyncfileio->file_prefix + "_" + std::to_string(i) + ".idx"; 188 | asyncfileio->index_fds[i] = open(tmp_str.c_str(), O_RDONLY | O_DIRECT, S_IRUSR | S_IWUSR); 189 | // posix_fadvise(asyncfileio->index_fds[i], 0, 190 | // asyncfileio->mapped_index_files_length[i], 191 | // POSIX_FADV_NORMAL); 192 | } 193 | printf("phase3 start\n"); 194 | }); 195 | startedReaderThreadFlag = true; 196 | } 197 | 198 | if (max_result_num <= 0) { 199 | return result; 200 | } 201 | 202 | static thread_local unsigned char **reader_hash_buffer = new unsigned char *[TOTAL_QUEUE_NUM](); 203 | static thread_local short *reader_hash_buffer_start_offset = new short[TOTAL_QUEUE_NUM](); 204 | 205 | if (reader_hash_buffer[queueID] == nullptr) { 206 | reader_hash_buffer[queueID] = (unsigned char *) memalign(FILESYSTEM_BLOCK_SIZE, 207 | (INDEX_ENTRY_SIZE * CLUSTER_SIZE) + 208 | FILESYSTEM_BLOCK_SIZE); 209 | } 210 | 211 | size_t read_num_left = max_result_num; 212 | 213 | for (size_t new_offset = offset; new_offset < offset + max_result_num;) { 214 | 215 | size_t this_max_read = std::min(read_num_left, CLUSTER_SIZE - (new_offset % CLUSTER_SIZE)); 216 | 217 | if (new_offset % CLUSTER_SIZE == 0) { 218 | uint64_t chunk_offset = (which_queue_in_this_io_thread * CLUSTER_SIZE); 219 | 220 | uint64_t chunk_id = ((new_offset / CLUSTER_SIZE) * (CLUSTER_SIZE * QUEUE_NUM_PER_IO_THREAD) + chunk_offset + 221 | new_offset % CLUSTER_SIZE); 222 | 223 | uint64_t idx_file_offset = INDEX_ENTRY_SIZE * chunk_id; 224 | 225 | idx_file_offset = (idx_file_offset / INDEX_MAPPED_BLOCK_RAW_SIZE * INDEX_MAPPED_BLOCK_ALIGNED_SIZE) + 226 | (idx_file_offset % INDEX_MAPPED_BLOCK_RAW_SIZE); 227 | 228 | uint64_t idx_file_offset_aligned_start = (idx_file_offset / FILESYSTEM_BLOCK_SIZE * FILESYSTEM_BLOCK_SIZE); 229 | reader_hash_buffer_start_offset[queueID] = static_cast(idx_file_offset - 230 | idx_file_offset_aligned_start); 231 | 232 | size_t which_mapped_chunk = idx_file_offset_aligned_start / INDEX_MAPPED_BLOCK_ALIGNED_SIZE; 233 | 234 | if (which_mapped_chunk < asyncfileio_thread->index_mapped_flush_start_chunkID) { 235 | pread(asyncfileio->index_fds[threadID], reader_hash_buffer[queueID], 236 | ((INDEX_ENTRY_SIZE * CLUSTER_SIZE + (idx_file_offset - idx_file_offset_aligned_start)) / 237 | FILESYSTEM_BLOCK_SIZE + 1) * FILESYSTEM_BLOCK_SIZE, 238 | idx_file_offset_aligned_start); 239 | } else { 240 | memcpy(reader_hash_buffer[queueID], 241 | asyncfileio_thread->index_file_memory_block[which_mapped_chunk] + 242 | (idx_file_offset_aligned_start % INDEX_MAPPED_BLOCK_ALIGNED_SIZE), 243 | (INDEX_ENTRY_SIZE * CLUSTER_SIZE) + (idx_file_offset - idx_file_offset_aligned_start)); 244 | } 245 | 246 | } 247 | 248 | long in_cluster_offset = new_offset % CLUSTER_SIZE; 249 | 250 | for (uint32_t i = 0; i < this_max_read; i++) { 251 | char *output_buf = nullptr; 252 | int output_length; 253 | unsigned char *serialized = reader_hash_buffer[queueID] + INDEX_ENTRY_SIZE * (in_cluster_offset + i) + 254 | reader_hash_buffer_start_offset[queueID]; 255 | if ((serialized[0] & 0xff) >> 2 != LARGE_MESSAGE_MAGIC_CHAR) { 256 | output_buf = (char *) deserialize_base36_encoding_add_index(serialized, INDEX_ENTRY_SIZE, 257 | output_length, new_offset + i); 258 | } else { 259 | size_t large_msg_size; 260 | size_t large_msg_offset; 261 | memcpy(&large_msg_offset, serialized + 4, 8); 262 | memcpy(&large_msg_size, serialized + 12, 8); 263 | unsigned char large_msg_buf[4096]; 264 | pread(asyncfileio->data_fds[queueID % IO_THREAD], large_msg_buf, large_msg_size, 265 | large_msg_offset); 266 | output_buf = (char *) deserialize_base36_encoding_add_index((uint8_t *) large_msg_buf, large_msg_size, 267 | output_length, new_offset + i); 268 | } 269 | result.emplace_back(output_buf, (size_t) output_length); 270 | } 271 | 272 | new_offset += this_max_read; 273 | read_num_left -= this_max_read; 274 | } 275 | 276 | return result; 277 | } 278 | 279 | 280 | queue_store::queue_store() { 281 | print_version(); 282 | barrier1 = new Barrier(CHECK_THREAD_NUM); 283 | 284 | asyncfileio = new asyncfileio_t(DATA_FILE_PATH); 285 | asyncfileio->startIOThread(); 286 | } -------------------------------------------------------------------------------- /src/dependencies/fast_base64/chromiumbase64.c: -------------------------------------------------------------------------------- 1 | #include "chromiumbase64.h" 2 | 3 | #define CHAR62 '+' 4 | #define CHAR63 '/' 5 | #define CHARPAD '=' 6 | static const char e0[256] = { 7 | 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 8 | 'C', 'C', 'D', 'D', 'D', 'D', 'E', 'E', 'E', 'E', 9 | 'F', 'F', 'F', 'F', 'G', 'G', 'G', 'G', 'H', 'H', 10 | 'H', 'H', 'I', 'I', 'I', 'I', 'J', 'J', 'J', 'J', 11 | 'K', 'K', 'K', 'K', 'L', 'L', 'L', 'L', 'M', 'M', 12 | 'M', 'M', 'N', 'N', 'N', 'N', 'O', 'O', 'O', 'O', 13 | 'P', 'P', 'P', 'P', 'Q', 'Q', 'Q', 'Q', 'R', 'R', 14 | 'R', 'R', 'S', 'S', 'S', 'S', 'T', 'T', 'T', 'T', 15 | 'U', 'U', 'U', 'U', 'V', 'V', 'V', 'V', 'W', 'W', 16 | 'W', 'W', 'X', 'X', 'X', 'X', 'Y', 'Y', 'Y', 'Y', 17 | 'Z', 'Z', 'Z', 'Z', 'a', 'a', 'a', 'a', 'b', 'b', 18 | 'b', 'b', 'c', 'c', 'c', 'c', 'd', 'd', 'd', 'd', 19 | 'e', 'e', 'e', 'e', 'f', 'f', 'f', 'f', 'g', 'g', 20 | 'g', 'g', 'h', 'h', 'h', 'h', 'i', 'i', 'i', 'i', 21 | 'j', 'j', 'j', 'j', 'k', 'k', 'k', 'k', 'l', 'l', 22 | 'l', 'l', 'm', 'm', 'm', 'm', 'n', 'n', 'n', 'n', 23 | 'o', 'o', 'o', 'o', 'p', 'p', 'p', 'p', 'q', 'q', 24 | 'q', 'q', 'r', 'r', 'r', 'r', 's', 's', 's', 's', 25 | 't', 't', 't', 't', 'u', 'u', 'u', 'u', 'v', 'v', 26 | 'v', 'v', 'w', 'w', 'w', 'w', 'x', 'x', 'x', 'x', 27 | 'y', 'y', 'y', 'y', 'z', 'z', 'z', 'z', '0', '0', 28 | '0', '0', '1', '1', '1', '1', '2', '2', '2', '2', 29 | '3', '3', '3', '3', '4', '4', '4', '4', '5', '5', 30 | '5', '5', '6', '6', '6', '6', '7', '7', '7', '7', 31 | '8', '8', '8', '8', '9', '9', '9', '9', '+', '+', 32 | '+', '+', '/', '/', '/', '/' 33 | }; 34 | 35 | static const char e1[256] = { 36 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 37 | 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 38 | 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 39 | 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 40 | 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 41 | 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', 42 | '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 43 | 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 44 | 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 45 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 46 | 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 47 | 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', 48 | '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 49 | 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 50 | 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 51 | 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 52 | 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 53 | 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 54 | '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 55 | '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 56 | 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 57 | 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 58 | 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 59 | 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 60 | 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', 61 | '6', '7', '8', '9', '+', '/' 62 | }; 63 | 64 | static const char e2[256] = { 65 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 66 | 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 67 | 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 68 | 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 69 | 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 70 | 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', 71 | '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 72 | 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 73 | 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 74 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 75 | 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 76 | 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', 77 | '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 78 | 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 79 | 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 80 | 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 81 | 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 82 | 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 83 | '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 84 | '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 85 | 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 86 | 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 87 | 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 88 | 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 89 | 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', 90 | '6', '7', '8', '9', '+', '/' 91 | }; 92 | 93 | 94 | 95 | /* SPECIAL DECODE TABLES FOR LITTLE ENDIAN (INTEL) CPUS */ 96 | 97 | static const uint32_t d0[256] = { 98 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 99 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 100 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 101 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 102 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 103 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 104 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 105 | 0x01ffffff, 0x000000f8, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000fc, 106 | 0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4, 107 | 0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0x01ffffff, 0x01ffffff, 108 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, 109 | 0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018, 110 | 0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030, 111 | 0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048, 112 | 0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060, 113 | 0x00000064, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 114 | 0x01ffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078, 115 | 0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090, 116 | 0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8, 117 | 0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0, 118 | 0x000000c4, 0x000000c8, 0x000000cc, 0x01ffffff, 0x01ffffff, 0x01ffffff, 119 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 120 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 121 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 122 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 123 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 124 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 125 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 126 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 127 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 128 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 129 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 130 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 131 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 132 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 133 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 134 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 135 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 136 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 137 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 138 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 139 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 140 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff 141 | }; 142 | 143 | 144 | static const uint32_t d1[256] = { 145 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 146 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 147 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 148 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 149 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 150 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 151 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 152 | 0x01ffffff, 0x0000e003, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000f003, 153 | 0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003, 154 | 0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0x01ffffff, 0x01ffffff, 155 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, 156 | 0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000, 157 | 0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000, 158 | 0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001, 159 | 0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001, 160 | 0x00009001, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 161 | 0x01ffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001, 162 | 0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002, 163 | 0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002, 164 | 0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003, 165 | 0x00001003, 0x00002003, 0x00003003, 0x01ffffff, 0x01ffffff, 0x01ffffff, 166 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 167 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 168 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 169 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 170 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 171 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 172 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 173 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 174 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 175 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 176 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 177 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 178 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 179 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 180 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 181 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 182 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 183 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 184 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 185 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 186 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 187 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff 188 | }; 189 | 190 | 191 | static const uint32_t d2[256] = { 192 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 193 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 194 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 195 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 196 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 197 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 198 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 199 | 0x01ffffff, 0x00800f00, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00c00f00, 200 | 0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00, 201 | 0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0x01ffffff, 0x01ffffff, 202 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, 203 | 0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100, 204 | 0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300, 205 | 0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400, 206 | 0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600, 207 | 0x00400600, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 208 | 0x01ffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700, 209 | 0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900, 210 | 0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00, 211 | 0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00, 212 | 0x00400c00, 0x00800c00, 0x00c00c00, 0x01ffffff, 0x01ffffff, 0x01ffffff, 213 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 214 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 215 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 216 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 217 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 218 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 219 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 220 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 221 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 222 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 223 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 224 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 225 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 226 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 227 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 228 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 229 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 230 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 231 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 232 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 233 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 234 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff 235 | }; 236 | 237 | 238 | static const uint32_t d3[256] = { 239 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 240 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 241 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 242 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 243 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 244 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 245 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 246 | 0x01ffffff, 0x003e0000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003f0000, 247 | 0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000, 248 | 0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0x01ffffff, 0x01ffffff, 249 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, 250 | 0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000, 251 | 0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000, 252 | 0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000, 253 | 0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000, 254 | 0x00190000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 255 | 0x01ffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000, 256 | 0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000, 257 | 0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000, 258 | 0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000, 259 | 0x00310000, 0x00320000, 0x00330000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 260 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 261 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 262 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 263 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 264 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 265 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 266 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 267 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 268 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 269 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 270 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 271 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 272 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 273 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 274 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 275 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 276 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 277 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 278 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 279 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 280 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 281 | 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff 282 | }; 283 | 284 | 285 | 286 | #define BADCHAR 0x01FFFFFF 287 | 288 | /** 289 | * you can control if we use padding by commenting out this 290 | * next line. However, I highly recommend you use padding and not 291 | * using it should only be for compatability with a 3rd party. 292 | * Also, 'no padding' is not tested! 293 | */ 294 | #define DOPAD 1 295 | 296 | /* 297 | * if we aren't doing padding 298 | * set the pad character to NULL 299 | */ 300 | #ifndef DOPAD 301 | #undef CHARPAD 302 | #define CHARPAD '\0' 303 | #endif 304 | 305 | size_t chromium_base64_encode(char* dest, const char* str, size_t len) 306 | { 307 | size_t i = 0; 308 | uint8_t* p = (uint8_t*) dest; 309 | 310 | /* unsigned here is important! */ 311 | uint8_t t1, t2, t3; 312 | 313 | if (len > 2) { 314 | for (; i < len - 2; i += 3) { 315 | t1 = str[i]; t2 = str[i+1]; t3 = str[i+2]; 316 | *p++ = e0[t1]; 317 | *p++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; 318 | *p++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; 319 | *p++ = e2[t3]; 320 | } 321 | } 322 | 323 | switch (len - i) { 324 | case 0: 325 | break; 326 | case 1: 327 | t1 = str[i]; 328 | *p++ = e0[t1]; 329 | *p++ = e1[(t1 & 0x03) << 4]; 330 | *p++ = CHARPAD; 331 | *p++ = CHARPAD; 332 | break; 333 | default: /* case 2 */ 334 | t1 = str[i]; t2 = str[i+1]; 335 | *p++ = e0[t1]; 336 | *p++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; 337 | *p++ = e2[(t2 & 0x0F) << 2]; 338 | *p++ = CHARPAD; 339 | } 340 | 341 | *p = '\0'; 342 | return p - (uint8_t*)dest; 343 | } 344 | 345 | 346 | size_t chromium_base64_decode(char* dest, const char* src, size_t len) 347 | { 348 | if (len == 0) return 0; 349 | 350 | #ifdef DOPAD 351 | /* 352 | * if padding is used, then the message must be at least 353 | * 4 chars and be a multiple of 4 354 | */ 355 | if (len < 4 || (len % 4 != 0)) { 356 | return MODP_B64_ERROR; /* error */ 357 | } 358 | /* there can be at most 2 pad chars at the end */ 359 | if (src[len-1] == CHARPAD) { 360 | len--; 361 | if (src[len -1] == CHARPAD) { 362 | len--; 363 | } 364 | } 365 | #endif 366 | 367 | size_t i; 368 | int leftover = len % 4; 369 | size_t chunks = (leftover == 0) ? len / 4 - 1 : len /4; 370 | 371 | uint8_t* p = (uint8_t*)dest; 372 | uint32_t x = 0; 373 | const uint8_t* y = (uint8_t*)src; 374 | for (i = 0; i < chunks; ++i, y += 4) { 375 | x = d0[y[0]] | d1[y[1]] | d2[y[2]] | d3[y[3]]; 376 | if (x >= BADCHAR) return MODP_B64_ERROR; 377 | *p++ = ((uint8_t*)(&x))[0]; 378 | *p++ = ((uint8_t*)(&x))[1]; 379 | *p++ = ((uint8_t*)(&x))[2]; 380 | } 381 | 382 | switch (leftover) { 383 | case 0: 384 | x = d0[y[0]] | d1[y[1]] | d2[y[2]] | d3[y[3]]; 385 | 386 | if (x >= BADCHAR) return MODP_B64_ERROR; 387 | *p++ = ((uint8_t*)(&x))[0]; 388 | *p++ = ((uint8_t*)(&x))[1]; 389 | *p = ((uint8_t*)(&x))[2]; 390 | return (chunks+1)*3; 391 | break; 392 | case 1: /* with padding this is an impossible case */ 393 | x = d0[y[0]]; 394 | *p = *((uint8_t*)(&x)); // i.e. first char/byte in int 395 | break; 396 | case 2: // * case 2, 1 output byte */ 397 | x = d0[y[0]] | d1[y[1]]; 398 | *p = *((uint8_t*)(&x)); // i.e. first char 399 | break; 400 | default: /* case 3, 2 output bytes */ 401 | x = d0[y[0]] | d1[y[1]] | d2[y[2]]; /* 0x3c */ 402 | *p++ = ((uint8_t*)(&x))[0]; 403 | *p = ((uint8_t*)(&x))[1]; 404 | break; 405 | } 406 | 407 | if (x >= BADCHAR) return MODP_B64_ERROR; 408 | 409 | return 3*chunks + (6*leftover)/8; 410 | } 411 | -------------------------------------------------------------------------------- /src/dependencies/fast_base64/klompavxbase64.c: -------------------------------------------------------------------------------- 1 | #include "klompavxbase64.h" 2 | 3 | #include 4 | 5 | /** 6 | * This code is based on Alfred Klomp's https://github.com/aklomp/base64 (published under BSD) 7 | * with minor modifications by D. Lemire. 8 | **/ 9 | /* 10 | Copyright (c) 2013-2015, Alfred Klomp 11 | All rights reserved. 12 | 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are 15 | met: 16 | 17 | - Redistributions of source code must retain the above copyright notice, 18 | this list of conditions and the following disclaimer. 19 | 20 | - Redistributions in binary form must reproduce the above copyright 21 | notice, this list of conditions and the following disclaimer in the 22 | documentation and/or other materials provided with the distribution. 23 | 24 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 25 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 26 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 27 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 28 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 29 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 30 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 31 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 32 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 33 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 34 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 | */ 36 | 37 | 38 | /** 39 | * Note : Hardware such as Knights Landing might do poorly with this AVX2 code since it relies on shuffles. Alternatives might be faster. 40 | */ 41 | 42 | static const uint8_t base64_table_enc[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 43 | "abcdefghijklmnopqrstuvwxyz" 44 | "0123456789+/"; 45 | 46 | 47 | // In the lookup table below, note that the value for '=' (character 61) is 48 | // 254, not 255. This character is used for in-band signaling of the end of 49 | // the datastream, and we will use that later. The characters A-Z, a-z, 0-9 50 | // and + / are mapped to their "decoded" values. The other bytes all map to 51 | // the value 255, which flags them as "invalid input". 52 | 53 | 54 | 55 | static 56 | const uint8_t base64_table_dec[] = { 57 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 58 | 255, 255, // 0..15 59 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 60 | 255, 255, // 16..31 61 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 62 | 255, 63, // 32..47 63 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 64 | 255, 255, // 48..63 65 | 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 66 | 13, 14, // 64..79 67 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 68 | 255, 255, // 80..95 69 | 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 70 | 39, 40, // 96..111 71 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 72 | 255, 255, // 112..127 73 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 74 | 255, 255, // 128..143 75 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 76 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 77 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 78 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 79 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 80 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 81 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 82 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 83 | }; 84 | 85 | 86 | 87 | struct base64_state { 88 | int eof; 89 | int bytes; 90 | //int flags;// unused 91 | unsigned char carry; 92 | }; 93 | // Cast away unused variable, silence compiler: 94 | #define UNUSED(x) ((void)(x)) 95 | 96 | // Stub function when encoder arch unsupported: 97 | #define BASE64_ENC_STUB \ 98 | UNUSED(state); \ 99 | UNUSED(src); \ 100 | UNUSED(srclen); \ 101 | UNUSED(out); \ 102 | \ 103 | *outlen = 0; 104 | 105 | // Stub function when decoder arch unsupported: 106 | #define BASE64_DEC_STUB \ 107 | UNUSED(state); \ 108 | UNUSED(src); \ 109 | UNUSED(srclen); \ 110 | UNUSED(out); \ 111 | UNUSED(outlen); \ 112 | \ 113 | return -1; 114 | 115 | struct codec { 116 | void (*enc)(struct base64_state *state, const char *src, size_t srclen, 117 | char *out, size_t *outlen); 118 | int (*dec)(struct base64_state *state, const char *src, size_t srclen, 119 | char *out, size_t *outlen); 120 | }; 121 | 122 | // Define machine endianness. This is for GCC: 123 | #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) 124 | #define BASE64_LITTLE_ENDIAN 1 125 | #else 126 | #define BASE64_LITTLE_ENDIAN 0 127 | #endif 128 | 129 | // This is for Clang: 130 | #ifdef __LITTLE_ENDIAN__ 131 | #define BASE64_LITTLE_ENDIAN 1 132 | #endif 133 | 134 | #ifdef __BIG_ENDIAN__ 135 | #define BASE64_LITTLE_ENDIAN 0 136 | #endif 137 | 138 | // Endian conversion functions: 139 | #if BASE64_LITTLE_ENDIAN 140 | #if defined(_MSC_VER) 141 | // Microsoft Visual C++: 142 | #define cpu_to_be32(x) _byteswap_ulong(x) 143 | #define cpu_to_be64(x) _byteswap_uint64(x) 144 | #define be32_to_cpu(x) _byteswap_ulong(x) 145 | #define be64_to_cpu(x) _byteswap_uint64(x) 146 | #else 147 | // GCC and Clang: 148 | #define cpu_to_be32(x) __builtin_bswap32(x) 149 | #define cpu_to_be64(x) __builtin_bswap64(x) 150 | #define be32_to_cpu(x) __builtin_bswap32(x) 151 | #define be64_to_cpu(x) __builtin_bswap64(x) 152 | #endif 153 | #else 154 | // No conversion needed: 155 | #define cpu_to_be32(x) (x) 156 | #define cpu_to_be64(x) (x) 157 | #define be32_to_cpu(x) (x) 158 | #define be64_to_cpu(x) (x) 159 | #endif 160 | 161 | // detect word size 162 | #ifdef _INTEGRAL_MAX_BITS 163 | #define BASE64_WORDSIZE _INTEGRAL_MAX_BITS 164 | #else 165 | #define BASE64_WORDSIZE __WORDSIZE 166 | #endif 167 | 168 | // end-of-file definitions 169 | // Almost end-of-file when waiting for the last '=' character: 170 | #define BASE64_AEOF 1 171 | // End-of-file when stream end has been reached or invalid input provided: 172 | #define BASE64_EOF 2 173 | 174 | 175 | #define CMPGT(s, n) _mm256_cmpgt_epi8((s), _mm256_set1_epi8(n)) 176 | #define CMPEQ(s, n) _mm256_cmpeq_epi8((s), _mm256_set1_epi8(n)) 177 | #define REPLACE(s, n) _mm256_and_si256((s), _mm256_set1_epi8(n)) 178 | #define RANGE(s, a, b) _mm256_andnot_si256(CMPGT((s), (b)), CMPGT((s), (a)-1)) 179 | 180 | static inline __m256i _mm256_bswap_epi32(const __m256i in) { 181 | // _mm256_shuffle_epi8() works on two 128-bit lanes separately: 182 | return _mm256_shuffle_epi8(in, _mm256_setr_epi8(3, 2, 1, 0, 7, 6, 5, 4, 11, 183 | 10, 9, 8, 15, 14, 13, 12, 3, 184 | 2, 1, 0, 7, 6, 5, 4, 11, 10, 185 | 9, 8, 15, 14, 13, 12)); 186 | } 187 | 188 | static inline __m256i enc_reshuffle(__m256i in) { 189 | // Spread out 32-bit words over both halves of the input register: 190 | in = _mm256_permutevar8x32_epi32(in, 191 | _mm256_setr_epi32(0, 1, 2, -1, 3, 4, 5, -1)); 192 | 193 | // Slice into 32-bit chunks and operate on all chunks in parallel. 194 | // All processing is done within the 32-bit chunk. First, shuffle: 195 | // before: [eeeeeeff|ccdddddd|bbbbcccc|aaaaaabb] 196 | // after: [00000000|aaaaaabb|bbbbcccc|ccdddddd] 197 | in = _mm256_shuffle_epi8(in, 198 | _mm256_set_epi8(-1, 9, 10, 11, -1, 6, 7, 8, -1, 3, 4, 199 | 5, -1, 0, 1, 2, -1, 9, 10, 11, -1, 6, 200 | 7, 8, -1, 3, 4, 5, -1, 0, 1, 2)); 201 | 202 | // merged = [0000aaaa|aabbbbbb|bbbbcccc|ccdddddd] 203 | const __m256i merged = _mm256_blend_epi16(_mm256_slli_epi32(in, 4), in, 0x55); 204 | 205 | // bd = [00000000|00bbbbbb|00000000|00dddddd] 206 | const __m256i bd = _mm256_and_si256(merged, _mm256_set1_epi32(0x003F003F)); 207 | 208 | // ac = [00aaaaaa|00000000|00cccccc|00000000] 209 | const __m256i ac = _mm256_and_si256(_mm256_slli_epi32(merged, 2), 210 | _mm256_set1_epi32(0x3F003F00)); 211 | 212 | // indices = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] 213 | const __m256i indices = _mm256_or_si256(ac, bd); 214 | 215 | // return = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] 216 | return _mm256_bswap_epi32(indices); 217 | } 218 | 219 | static inline __m256i enc_translate(const __m256i in) { 220 | // LUT contains Absolute offset for all ranges: 221 | const __m256i lut = _mm256_setr_epi8( 222 | 65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0, 65, 71, 223 | -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0); 224 | // Translate values 0..63 to the Base64 alphabet. There are five sets: 225 | // # From To Abs Index Characters 226 | // 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ 227 | // 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz 228 | // 2 [52..61] [48..57] -4 [2..11] 0123456789 229 | // 3 [62] [43] -19 12 + 230 | // 4 [63] [47] -16 13 / 231 | 232 | // Create LUT indices from input: 233 | // the index for range #0 is right, others are 1 less than expected: 234 | __m256i indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51)); 235 | 236 | // mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: 237 | __m256i mask = CMPGT(in, 25); 238 | 239 | // substract -1, so add 1 to indices for range #[1..4], All indices are now 240 | // correct: 241 | indices = _mm256_sub_epi8(indices, mask); 242 | 243 | // Add offsets to input values: 244 | __m256i out = _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices)); 245 | 246 | return out; 247 | } 248 | 249 | static inline __m256i dec_reshuffle(__m256i in) { 250 | // Mask in a single byte per shift: 251 | const __m256i maskB2 = _mm256_set1_epi32(0x003F0000); 252 | const __m256i maskB1 = _mm256_set1_epi32(0x00003F00); 253 | 254 | // Pack bytes together: 255 | __m256i out = _mm256_srli_epi32(in, 16); 256 | 257 | out = 258 | _mm256_or_si256(out, _mm256_srli_epi32(_mm256_and_si256(in, maskB2), 2)); 259 | 260 | out = 261 | _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, maskB1), 12)); 262 | 263 | out = _mm256_or_si256(out, _mm256_slli_epi32(in, 26)); 264 | 265 | // Pack bytes together within 32-bit words, discarding words 3 and 7: 266 | out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(3, 2, 1, 7, 6, 5, 11, 10, 9, 267 | 15, 14, 13, -1, -1, -1, -1, 3, 268 | 2, 1, 7, 6, 5, 11, 10, 9, 15, 269 | 14, 13, -1, -1, -1, -1)); 270 | 271 | // Pack 32-bit words together, squashing empty words 3 and 7: 272 | return _mm256_permutevar8x32_epi32( 273 | out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1)); 274 | } 275 | 276 | static void base64_stream_encode_avx2(struct base64_state *state, const char *src, 277 | size_t srclen, char *out, size_t *outlen) { 278 | // Assume that *out is large enough to contain the output. 279 | // Theoretically it should be 4/3 the length of src. 280 | const uint8_t *c = (const uint8_t *)src; 281 | uint8_t *o = (uint8_t *)out; 282 | 283 | // Use local temporaries to avoid cache thrashing: 284 | size_t outl = 0; 285 | struct base64_state st; 286 | st.bytes = state->bytes; 287 | st.carry = state->carry; 288 | 289 | // Turn three bytes into four 6-bit numbers: 290 | // in[0] = 00111111 291 | // in[1] = 00112222 292 | // in[2] = 00222233 293 | // in[3] = 00333333 294 | 295 | // Duff's device, a for() loop inside a switch() statement. Legal! 296 | switch (st.bytes) { 297 | for (;;) { 298 | case 0: 299 | // If we have AVX2 support, pick off 24 bytes at a time for as long as we 300 | // can. 301 | // But because we read 32 bytes at a time, ensure we have enough room to 302 | // do a 303 | // full 32-byte read without segfaulting: 304 | while (srclen >= 32) { 305 | // Load string: 306 | __m256i str = _mm256_loadu_si256((__m256i *)c); 307 | 308 | // Reshuffle: 309 | str = enc_reshuffle(str); 310 | 311 | // Translate reshuffled bytes to the Base64 alphabet: 312 | str = enc_translate(str); 313 | 314 | // Store: 315 | _mm256_storeu_si256((__m256i *)o, str); 316 | 317 | c += 24; // 6 * 4 bytes of input 318 | o += 32; // 8 * 4 bytes of output 319 | outl += 32; 320 | srclen -= 24; 321 | } 322 | if (srclen-- == 0) { 323 | break; 324 | } 325 | *o++ = base64_table_enc[*c >> 2]; 326 | st.carry = (*c++ << 4) & 0x30; 327 | st.bytes++; 328 | outl += 1; 329 | 330 | case 1: 331 | if (srclen-- == 0) { 332 | break; 333 | } 334 | *o++ = base64_table_enc[st.carry | (*c >> 4)]; 335 | st.carry = (*c++ << 2) & 0x3C; 336 | st.bytes++; 337 | outl += 1; 338 | 339 | case 2: 340 | if (srclen-- == 0) { 341 | break; 342 | } 343 | *o++ = base64_table_enc[st.carry | (*c >> 6)]; 344 | *o++ = base64_table_enc[*c++ & 0x3F]; 345 | st.bytes = 0; 346 | outl += 2; 347 | } 348 | } 349 | state->bytes = st.bytes; 350 | state->carry = st.carry; 351 | *outlen = outl; 352 | } 353 | 354 | static int base64_stream_decode_avx2(struct base64_state *state, const char *src, 355 | size_t srclen, char *out, size_t *outlen) { 356 | int ret = 0; 357 | const uint8_t *c = (const uint8_t *)src; 358 | uint8_t *o = (uint8_t *)out; 359 | uint8_t q; 360 | 361 | // Use local temporaries to avoid cache thrashing: 362 | size_t outl = 0; 363 | struct base64_state st; 364 | st.eof = state->eof; 365 | st.bytes = state->bytes; 366 | st.carry = state->carry; 367 | 368 | // If we previously saw an EOF or an invalid character, bail out: 369 | if (st.eof) { 370 | *outlen = 0; 371 | ret = 0; 372 | // If there was a trailing '=' to check, check it: 373 | if (srclen && (st.eof == BASE64_AEOF)) { 374 | state->bytes = 0; 375 | state->eof = BASE64_EOF; 376 | ret = ((base64_table_dec[*c++] == 254) && (srclen == 1)) ? 1 : 0; 377 | } 378 | return ret; 379 | } 380 | 381 | // Turn four 6-bit numbers into three bytes: 382 | // out[0] = 11111122 383 | // out[1] = 22223333 384 | // out[2] = 33444444 385 | 386 | // Duff's device again: 387 | switch (st.bytes) { 388 | for (;;) { 389 | case 0: 390 | // If we have AVX2 support, pick off 32 bytes at a time for as long as we 391 | // can, 392 | // but make sure that we quit before seeing any == markers at the end of 393 | // the 394 | // string. Also, because we write 8 zeroes at the end of the output, 395 | // ensure 396 | // that there are at least 11 valid bytes of input data remaining to close 397 | // the 398 | // gap. 32 + 2 + 11 = 45 bytes: 399 | while (srclen >= 45) { 400 | // Load string: 401 | __m256i str = _mm256_loadu_si256((__m256i *)c); 402 | 403 | // The input consists of six character sets in the Base64 alphabet, 404 | // which we need to map back to the 6-bit values they represent. 405 | // There are three ranges, two singles, and then there's the rest. 406 | // 407 | // # From To Add Characters 408 | // 1 [43] [62] +19 + 409 | // 2 [47] [63] +16 / 410 | // 3 [48..57] [52..61] +4 0..9 411 | // 4 [65..90] [0..25] -65 A..Z 412 | // 5 [97..122] [26..51] -71 a..z 413 | // (6) Everything else => invalid input 414 | 415 | const __m256i set1 = CMPEQ(str, '+'); 416 | const __m256i set2 = CMPEQ(str, '/'); 417 | const __m256i set3 = RANGE(str, '0', '9'); 418 | const __m256i set4 = RANGE(str, 'A', 'Z'); 419 | const __m256i set5 = RANGE(str, 'a', 'z'); 420 | 421 | __m256i delta = REPLACE(set1, 19); 422 | delta = _mm256_or_si256(delta, REPLACE(set2, 16)); 423 | delta = _mm256_or_si256(delta, REPLACE(set3, 4)); 424 | delta = _mm256_or_si256(delta, REPLACE(set4, -65)); 425 | delta = _mm256_or_si256(delta, REPLACE(set5, -71)); 426 | 427 | // Check for invalid input: if any of the delta values are zero, 428 | // fall back on bytewise code to do error checking and reporting: 429 | if (_mm256_movemask_epi8(CMPEQ(delta, 0))) { 430 | break; 431 | } 432 | 433 | // Now simply add the delta values to the input: 434 | str = _mm256_add_epi8(str, delta); 435 | 436 | // Reshuffle the input to packed 12-byte output format: 437 | str = dec_reshuffle(str); 438 | 439 | // Store back: 440 | _mm256_storeu_si256((__m256i *)o, str); 441 | 442 | c += 32; 443 | o += 24; 444 | outl += 24; 445 | srclen -= 32; 446 | } 447 | if (srclen-- == 0) { 448 | ret = 1; 449 | break; 450 | } 451 | if ((q = base64_table_dec[*c++]) >= 254) { 452 | st.eof = BASE64_EOF; 453 | // Treat character '=' as invalid for byte 0: 454 | break; 455 | } 456 | st.carry = q << 2; 457 | st.bytes++; 458 | 459 | case 1: 460 | if (srclen-- == 0) { 461 | ret = 1; 462 | break; 463 | } 464 | if ((q = base64_table_dec[*c++]) >= 254) { 465 | st.eof = BASE64_EOF; 466 | // Treat character '=' as invalid for byte 1: 467 | break; 468 | } 469 | *o++ = st.carry | (q >> 4); 470 | st.carry = q << 4; 471 | st.bytes++; 472 | outl++; 473 | 474 | case 2: 475 | if (srclen-- == 0) { 476 | ret = 1; 477 | break; 478 | } 479 | if ((q = base64_table_dec[*c++]) >= 254) { 480 | st.bytes++; 481 | // When q == 254, the input char is '='. 482 | // Check if next byte is also '=': 483 | if (q == 254) { 484 | if (srclen-- != 0) { 485 | st.bytes = 0; 486 | // EOF: 487 | st.eof = BASE64_EOF; 488 | q = base64_table_dec[*c++]; 489 | ret = ((q == 254) && (srclen == 0)) ? 1 : 0; 490 | break; 491 | } else { 492 | // Almost EOF 493 | st.eof = BASE64_AEOF; 494 | ret = 1; 495 | break; 496 | } 497 | } 498 | // If we get here, there was an error: 499 | break; 500 | } 501 | *o++ = st.carry | (q >> 2); 502 | st.carry = q << 6; 503 | st.bytes++; 504 | outl++; 505 | 506 | case 3: 507 | if (srclen-- == 0) { 508 | ret = 1; 509 | break; 510 | } 511 | if ((q = base64_table_dec[*c++]) >= 254) { 512 | st.bytes = 0; 513 | st.eof = BASE64_EOF; 514 | // When q == 254, the input char is '='. Return 1 and EOF. 515 | // When q == 255, the input char is invalid. Return 0 and EOF. 516 | ret = ((q == 254) && (srclen == 0)) ? 1 : 0; 517 | break; 518 | } 519 | *o++ = st.carry | q; 520 | st.carry = 0; 521 | st.bytes = 0; 522 | outl++; 523 | } 524 | } 525 | state->eof = st.eof; 526 | state->bytes = st.bytes; 527 | state->carry = st.carry; 528 | *outlen = outl; 529 | return ret; 530 | } 531 | 532 | 533 | // In the lookup table below, note that the value for '=' (character 61) is 534 | // 254, not 255. This character is used for in-band signaling of the end of 535 | // the datastream, and we will use that later. The characters A-Z, a-z, 0-9 536 | // and + / are mapped to their "decoded" values. The other bytes all map to 537 | // the value 255, which flags them as "invalid input". 538 | 539 | static void base64_stream_encode_init(struct base64_state *state) { 540 | state->eof = 0; 541 | state->bytes = 0; 542 | state->carry = 0; 543 | //state->flags = flags; // useless 544 | } 545 | 546 | static void base64_stream_encode(struct base64_state *state, const char *src, 547 | size_t srclen, char *out, size_t *outlen) { 548 | base64_stream_encode_avx2(state, src, srclen, out, outlen); 549 | } 550 | 551 | static void base64_stream_encode_final(struct base64_state *state, char *out, 552 | size_t *outlen) { 553 | uint8_t *o = (uint8_t *)out; 554 | 555 | if (state->bytes == 1) { 556 | *o++ = base64_table_enc[state->carry]; 557 | *o++ = '='; 558 | *o++ = '='; 559 | *outlen = 3; 560 | return; 561 | } 562 | if (state->bytes == 2) { 563 | *o++ = base64_table_enc[state->carry]; 564 | *o++ = '='; 565 | *outlen = 2; 566 | return; 567 | } 568 | *outlen = 0; 569 | } 570 | 571 | static void base64_stream_decode_init(struct base64_state *state) { 572 | 573 | state->eof = 0; 574 | state->bytes = 0; 575 | state->carry = 0; 576 | //state->flags = flags; // useless 577 | } 578 | 579 | static int base64_stream_decode(struct base64_state *state, const char *src, 580 | size_t srclen, char *out, size_t *outlen) { 581 | return base64_stream_decode_avx2(state, src, srclen, out, outlen); 582 | } 583 | 584 | void klomp_avx2_base64_encode(const char *src, size_t srclen, char *out, size_t *outlen) { 585 | size_t s; 586 | size_t t; 587 | struct base64_state state; 588 | 589 | // Init the stream reader: 590 | base64_stream_encode_init(&state); 591 | 592 | // Feed the whole string to the stream reader: 593 | base64_stream_encode(&state, src, srclen, out, &s); 594 | 595 | // Finalize the stream by writing trailer if any: 596 | base64_stream_encode_final(&state, out + s, &t); 597 | 598 | // Final output length is stream length plus tail: 599 | *outlen = s + t; 600 | } 601 | 602 | int klomp_avx2_base64_decode(const char *src, size_t srclen, char *out, size_t *outlen) { 603 | int ret; 604 | struct base64_state state; 605 | 606 | // Init the stream reader: 607 | base64_stream_decode_init(&state); 608 | 609 | // Feed the whole string to the stream reader: 610 | ret = base64_stream_decode(&state, src, srclen, out, outlen); 611 | 612 | // If when decoding a whole block, we're still waiting for input then fail: 613 | if (ret && (state.bytes == 0)) { 614 | return ret; 615 | } 616 | return 0; 617 | } 618 | --------------------------------------------------------------------------------