├── .gitignore ├── scripts ├── .python-version ├── aws │ ├── .gitignore │ ├── servers-colocated.yaml │ └── servers.yaml ├── .gitignore ├── gen_capnp_files.sh ├── code_count.sh ├── figure │ ├── 8_micro_ts.sh │ ├── 6_tpcc_scalability.sh │ ├── 5_tpcc_sundial.sh │ ├── 4_tpcc_coco.sh │ ├── 0_motivation.sh │ ├── 10_micro_cache_effect.sh │ ├── 1_tpcc_tput.sh │ ├── 2_tpcc_lat.sh │ ├── 7_micro_contention.sh │ ├── 9_micro_batch_effect.sh │ └── 3_factor_analysis.sh ├── exclude.txt ├── send_code.sh ├── gen_ssh_config.py ├── requirements.txt ├── fine_coarse_occ_cmp.sh ├── parse_log.py ├── get_result_sn.py ├── eval │ ├── 10_micro_cache_effect.py │ ├── 7_micro_contention.py │ ├── 7_micro_contention_sundial.py │ ├── 2_tpcc_lat.py │ ├── 1_tpcc_tput_lat.py │ ├── 0_tput_lat.py │ ├── 8_micro_ts.py │ ├── 3_factor_analysis.py │ ├── 0_motivation_ycsb.py │ ├── 6_tpcc_scalability.py │ ├── 4_tpcc_coco.py │ ├── 5_tpcc_sundial.py │ ├── 9_micro_batch_effect.py │ └── get_result.py └── run_all.sh ├── dataflow_api ├── .gitignore ├── plot.sh ├── src │ ├── main.cpp │ ├── graph │ │ ├── node.cc │ │ └── node.hpp │ ├── benchmark │ │ └── tpcc │ │ │ ├── tpcc.cc │ │ │ ├── txn_order_status.cc │ │ │ ├── txn_stock_level.cc │ │ │ ├── txn_payment.cc │ │ │ ├── txn_delivery.cc │ │ │ └── txn_new_order.cc │ ├── type │ │ ├── row.hpp │ │ ├── input.hpp │ │ └── value.hpp │ ├── api │ │ ├── schema.hpp │ │ └── txn.hpp │ └── util │ │ └── logging.h └── CMakeLists.txt ├── src ├── rpc │ ├── .gitignore │ ├── CMakeLists.txt │ ├── message_buffer.h │ └── SNinterface.capnp ├── servers │ ├── CONFIG_OPTIONS.md │ ├── clients.h │ └── time_server.h ├── txn │ ├── page_snapshot.h │ ├── pipeline_scheduler.h │ ├── log_sequence.h │ ├── batch_manager.h │ └── temp_log.h ├── util │ ├── macros.h │ ├── core_binding.h │ ├── barrier.h │ ├── memory_arena.h │ ├── exceptions.h │ ├── utils.h │ ├── str.h │ ├── statistic.h │ ├── fast_random.h │ ├── dbug_logging.h │ ├── zipf.h │ └── txn_lat.h ├── benchmarks │ └── interface.h ├── index │ ├── interface.h │ └── record_lock.h └── storage │ ├── page.h │ ├── sequence_manager.h │ ├── storage.h │ ├── multi_ver_record.h │ ├── txn_info.h │ └── page_manager.h ├── CMakeLists.txt ├── doc ├── hackwrench │ ├── build.md │ └── run.md └── dataflow_api │ ├── api.md │ ├── graphs │ ├── stock_level.svg │ ├── order_status.svg │ └── payment.svg │ └── tpcc.md └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *build 2 | .vscode -------------------------------------------------------------------------------- /scripts/.python-version: -------------------------------------------------------------------------------- 1 | 3.7.5 -------------------------------------------------------------------------------- /dataflow_api/.gitignore: -------------------------------------------------------------------------------- 1 | build/ -------------------------------------------------------------------------------- /src/rpc/.gitignore: -------------------------------------------------------------------------------- 1 | *.capnp.c++ 2 | *.capnp.h -------------------------------------------------------------------------------- /scripts/aws/.gitignore: -------------------------------------------------------------------------------- 1 | logs/* 2 | hackwrench* 3 | ssh_config -------------------------------------------------------------------------------- /scripts/.gitignore: -------------------------------------------------------------------------------- 1 | results 2 | __pycache__ 3 | temp.txt 4 | finalV2-results* -------------------------------------------------------------------------------- /scripts/gen_capnp_files.sh: -------------------------------------------------------------------------------- 1 | cd src/rpc 2 | capnp compile -oc++ SNinterface.capnp 3 | -------------------------------------------------------------------------------- /scripts/code_count.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cloc --exclude-list-file=exclude.txt . 3 | 4 | -------------------------------------------------------------------------------- /scripts/figure/8_micro_ts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./eval/8_micro_ts.py aws ycsb10 0 hackwrench_ts_bottleneck 3 | -------------------------------------------------------------------------------- /scripts/exclude.txt: -------------------------------------------------------------------------------- 1 | execution_graph 2 | third_party 3 | build 4 | rpc/SNinterface.capnp.c++ 5 | rpc/SNinterface.capnp.h 6 | scripts/data 7 | scripts/localhost -------------------------------------------------------------------------------- /scripts/figure/6_tpcc_scalability.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./eval/6_tpcc_scalability.py aws tpcc 0 hackwrench_normal 3 | ./eval/6_tpcc_scalability.py aws tpcc 1 hackwrench_fast 4 | -------------------------------------------------------------------------------- /scripts/figure/5_tpcc_sundial.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./eval/5_tpcc_sundial.py aws tpcc 0 hackwrench 3 | ./eval/5_tpcc_sundial.py aws tpcc 1 hackwrench_normal 4 | ./eval/5_tpcc_sundial.py aws tpcc 2 hackwrench_fast 5 | -------------------------------------------------------------------------------- /scripts/figure/4_tpcc_coco.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./eval/4_tpcc_coco.py aws tpcc 0 hackwrench_coco 3 | ./eval/4_tpcc_coco.py aws tpcc 1 hackwrench_coco_normal 4 | ./eval/4_tpcc_coco.py aws tpcc 2 hackwrench_coco_fast 5 | -------------------------------------------------------------------------------- /dataflow_api/plot.sh: -------------------------------------------------------------------------------- 1 | dot -Tsvg new_order.dot > new_order.svg 2 | dot -Tsvg payment.dot > payment.svg 3 | dot -Tsvg delivery.dot > delivery.svg 4 | dot -Tsvg order_status.dot > order_status.svg 5 | dot -Tsvg stock_level.dot > stock_level.svg -------------------------------------------------------------------------------- /scripts/figure/0_motivation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./eval/0_motivation_ycsb.py aws ycsb10 0 hackwrench_occ_non_caching 3 | ./eval/0_motivation_ycsb.py aws ycsb10 1 hackwrench_occ 4 | ./eval/0_motivation_ycsb.py aws ycsb10 2 hackwrench_batch_abort 5 | 6 | -------------------------------------------------------------------------------- /src/servers/CONFIG_OPTIONS.md: -------------------------------------------------------------------------------- 1 | RPC_CLIENTS: enable a dedicate client server for issuing client requests. 2 | TEST_LOCAL_COMMIT: Batches will directly commit after execution, with no rpc msgs 3 | INDEPENDENT_GC: Batches will assign gc(end_txn) tasks to workers 4 | -------------------------------------------------------------------------------- /dataflow_api/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #if defined(BENCH_TPCC) 4 | #include "benchmark/tpcc/tpcc.hpp" 5 | #endif 6 | 7 | uint64_t nthreads = 1; 8 | 9 | int main(int argc, char** argv) { 10 | do_bench(argc, argv); 11 | 12 | return 0; 13 | } -------------------------------------------------------------------------------- /scripts/figure/10_micro_cache_effect.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./eval/10_micro_cache_effect.py aws ycsb10 6 hackwrench_occ_non_caching 3 | ./eval/10_micro_cache_effect.py aws ycsb10 7 hackwrench_normal_non_caching 4 | ./eval/10_micro_cache_effect.py aws ycsb10 8 hackwrench_fast_non_caching 5 | -------------------------------------------------------------------------------- /src/txn/page_snapshot.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "util/types.h" 4 | 5 | class PageSnapshot { 6 | public: 7 | bool is_read = false; 8 | bool is_write = false; 9 | }; 10 | 11 | struct BatchSnapshot { 12 | bool is_read = false; 13 | bool is_write = false; 14 | }; -------------------------------------------------------------------------------- /scripts/figure/1_tpcc_tput.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./eval/1_tpcc_tput_lat.py aws tpcc 0 hackwrench_occ_non_caching 3 | ./eval/1_tpcc_tput_lat.py aws tpcc 1 hackwrench_occ 4 | ./eval/1_tpcc_tput_lat.py aws tpcc 2 hackwrench 5 | ./eval/1_tpcc_tput_lat.py aws tpcc 3 hackwrench_normal 6 | ./eval/1_tpcc_tput_lat.py aws tpcc 4 hackwrench_fast -------------------------------------------------------------------------------- /src/util/macros.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #define CACHE_LINE_SIZE 64 7 | #define COMPILER_MEMORY_FENCE() asm volatile("" ::: "memory") 8 | #define CPU_MEMORY_FENCE() __sync_synchronize() 9 | #define CACHE_PADOUT char __padout__COUNTER__[0] __attribute__((aligned(CACHE_LINE_SIZE))) 10 | -------------------------------------------------------------------------------- /scripts/figure/2_tpcc_lat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./eval/2_tpcc_lat.py aws tpcc 0 hackwrench_occ_non_caching_lat 3 | ./eval/2_tpcc_lat.py aws tpcc 1 hackwrench_occ_lat 4 | ./eval/2_tpcc_lat.py aws tpcc 2 hackwrench_normal_lat 5 | ./eval/2_tpcc_lat.py aws tpcc 11 hackwrench_normal_lat 6 | ./eval/2_tpcc_lat.py aws tpcc 3 hackwrench_fast_lat 7 | ./eval/2_tpcc_lat.py aws tpcc 12 hackwrench_fast_lat 8 | -------------------------------------------------------------------------------- /scripts/send_code.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" 3 | CODE_DIR=$(dirname $SCRIPTPATH) 4 | 5 | for token in ${2}; do 6 | # echo ${token} 7 | ssh -F ${1}/ssh_config ${token} 'rm -rf ~/logs/*' 8 | rsync -e "ssh -o StrictHostKeyChecking=no -F ${1}/ssh_config" -aqzP ${1}/* ${token}:~/ --exclude={'logs'} & 9 | done 10 | 11 | # echo "send files to ${i} nodes..." 12 | 13 | wait 14 | -------------------------------------------------------------------------------- /scripts/figure/7_micro_contention.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./eval/7_micro_contention_sundial.py aws ycsb10 0 hackwrench_occ 3 | ./eval/7_micro_contention_sundial.py aws ycsb10 1 hackwrench_normal 4 | ./eval/7_micro_contention_sundial.py aws ycsb10 2 hackwrench_fast 5 | ./eval/7_micro_contention.py aws ycsb10 3 hackwrench_occ 6 | ./eval/7_micro_contention.py aws ycsb10 4 hackwrench_normal 7 | ./eval/7_micro_contention.py aws ycsb10 5 hackwrench_fast 8 | -------------------------------------------------------------------------------- /dataflow_api/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.2) 2 | project(QueryPlaner) 3 | 4 | ## C++ flags 5 | ADD_DEFINITIONS(-std=c++0x) 6 | set(CMAKE_CXX_FLAGS "-O2 -g -Wno-deprecated-declarations -Wreturn-type") 7 | 8 | ## Source files 9 | include_directories("./src/") 10 | file(GLOB_RECURSE SRC "src/*.cc") 11 | 12 | ## Executable files 13 | add_executable(analyze ${SRC} "src/main.cpp" ) 14 | target_compile_options(analyze PRIVATE "-DBENCH_TPCC") 15 | -------------------------------------------------------------------------------- /src/rpc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(CapnProto) 2 | find_package(cppzmq) 3 | 4 | capnp_generate_cpp(CAPNP_SRCS CAPNP_HDRS SNinterface.capnp) 5 | 6 | add_library(rpc STATIC ${CAPNP_SRCS}) 7 | target_link_libraries(rpc PRIVATE CapnProto::capnp-rpc) 8 | target_link_libraries(rpc PRIVATE CapnProto::capnp) 9 | target_link_libraries(rpc PRIVATE cppzmq) 10 | target_link_libraries(rpc PUBLIC jemalloc) 11 | target_include_directories(rpc PRIVATE ${PROJECT_BINARY_DIR}/rpc) -------------------------------------------------------------------------------- /scripts/figure/9_micro_batch_effect.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./eval/9_micro_batch_effect.py aws ycsb10 0 hackwrench_fast 3 | ./eval/9_micro_batch_effect.py aws ycsb10 1 hackwrench_fast 4 | # ./eval/9_micro_batch_effect.py aws ycsb10 2 hackwrench_normal 5 | # ./eval/9_micro_batch_effect.py aws ycsb10 3 hackwrench_normal 6 | # ./eval/9_micro_batch_effect.py aws ycsb10 4 hackwrench_fast 7 | # ./eval/9_micro_batch_effect.py aws ycsb10 5 hackwrench_fast 8 | # ./eval/9_micro_batch_effect.py aws ycsb10 6 hackwrench_normal 9 | # ./eval/9_micro_batch_effect.py aws ycsb10 7 hackwrench_normal 10 | -------------------------------------------------------------------------------- /scripts/gen_ssh_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import yaml 3 | import sys 4 | 5 | data_dir = sys.argv[1] 6 | 7 | with open(f"{data_dir}/servers.yaml", "r") as f: 8 | hosts = yaml.load(f, Loader=yaml.loader.FullLoader) 9 | 10 | ips = {} 11 | 12 | with open(f"{data_dir}/ssh_config", "w") as f: 13 | i = 0 14 | for ip in hosts["ips"]: 15 | # if ips.get(ip) == 1: 16 | # print("redundant ip: " + ip) 17 | # assert False 18 | 19 | f.write(f'Host host{i}\n') 20 | f.write(f' HostName {ip}\n') 21 | i += 1 22 | 23 | ips[ip] = 1 24 | 25 | -------------------------------------------------------------------------------- /scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | astroid==2.3.3 2 | autopep8==1.4.4 3 | bcrypt==3.1.7 4 | boto3==1.10.46 5 | botocore==1.13.46 6 | certifi==2019.11.28 7 | cffi==1.13.2 8 | cryptography==2.8 9 | DateTime==4.3 10 | docutils==0.15.2 11 | fabric==2.5.0 12 | invoke==1.3.0 13 | isort==4.3.21 14 | jmespath==0.9.4 15 | lazy-object-proxy==1.4.3 16 | mccabe==0.6.1 17 | paramiko==2.7.1 18 | pycodestyle==2.5.0 19 | pycparser==2.19 20 | pylint==2.4.4 21 | PyNaCl==1.3.0 22 | python-dateutil==2.8.1 23 | pytz==2019.3 24 | PyYAML==5.3 25 | s3transfer==0.2.1 26 | six==1.13.0 27 | typed-ast==1.4.1 28 | urllib3==1.25.7 29 | wrapt==1.11.2 30 | zope.interface==4.7.1 31 | -------------------------------------------------------------------------------- /dataflow_api/src/graph/node.cc: -------------------------------------------------------------------------------- 1 | #include "node.hpp" 2 | 3 | #include "api/schema.hpp" 4 | 5 | void OpNode::printNode(std::ostream &out) { 6 | TableName_t tbl_name = tbl_schema.getTableName(); 7 | std::stringstream ss; 8 | ss << type_name << " " << tbl_name; 9 | // ss << " Scope: " << scope_id; 10 | // ss << " Part: " << part_id; 11 | ss << "\nstaticKey: " << (static_key ? "true" : "false"); 12 | ss << "\nlocalTable: " << (tbl_schema.isLocalTable() ? "true" : "false"); 13 | out << id << " [label=\"" << ss.str() << "\""; 14 | out << ", color=" << (part_id == partition_affinity ? "red" : "blue"); 15 | out << "] # " << ref_count << ", " << tbl_name << std::endl; 16 | } -------------------------------------------------------------------------------- /src/util/core_binding.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | class CPUBinder { 10 | public: 11 | static void bind(std::thread::native_handle_type thread_handle, uint64_t core) { 12 | #ifndef BIND_CORES 13 | return; 14 | #endif 15 | core = core % std::thread::hardware_concurrency(); 16 | cpu_set_t cpuset; 17 | CPU_ZERO(&cpuset); 18 | CPU_SET(core, &cpuset); 19 | int rc = pthread_setaffinity_np(thread_handle, sizeof(cpu_set_t), &cpuset); 20 | if (rc != 0) { 21 | std::cout << "error" << std::endl; 22 | exit(-1); 23 | } 24 | } 25 | }; -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.2) 2 | project(TransactionRepair) 3 | 4 | if(DEFINED ENV{LOG_LEVEL}) 5 | add_definitions(-DLOG_LEVEL=$ENV{LOG_LEVEL}) 6 | message("setting LOG_LEVEL to $ENV{LOG_LEVEL}") 7 | endif() 8 | 9 | # set C++ flags 10 | ADD_DEFINITIONS(-std=c++14) 11 | set(CMAKE_CXX_FLAGS "-O2 -g -Wall -Wformat-truncation=0") 12 | # following is used for AddressSanitizer 13 | # set(CMAKE_CXX_FLAGS "-fsanitize=address -fno-omit-frame-pointer -fsanitize-recover=address -O2 -g -Wall -Wformat-truncation=0") 14 | 15 | # set the executable binary to build directory 16 | set(EXECUTABLE_OUTPUT_PATH "${CMAKE_BINARY_DIR}/") 17 | 18 | # link_libraries(profiler pthread rt) 19 | link_libraries(pthread rt) 20 | 21 | add_subdirectory(src) 22 | -------------------------------------------------------------------------------- /src/txn/pipeline_scheduler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "txn/batch_txn.h" 4 | 5 | class PipelineScheduler { 6 | volatile batch_id_t last_batch_id; 7 | // std::mutex mutex; 8 | 9 | public: 10 | PipelineScheduler(batch_id_t starting_batch_id) : last_batch_id(starting_batch_id) {} 11 | 12 | inline batch_id_t next_get_timestamp() { 13 | // std::lock_guard guard(mutex); 14 | batch_id_t ret = last_batch_id + 1; 15 | return ret; 16 | } 17 | 18 | void finish_get_timestamp(batch_id_t batch_id) { 19 | // std::lock_guard guard(mutex); 20 | ASSERT(last_batch_id + 1 == batch_id) 21 | << "wait_ts_batch is " << last_batch_id << " " << batch_id; 22 | last_batch_id = batch_id; 23 | } 24 | }; -------------------------------------------------------------------------------- /scripts/figure/3_factor_analysis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./eval/3_factor_analysis.py aws tpcc 0 hackwrench_pure_occ_non_caching 3 | ./eval/3_factor_analysis.py aws tpcc 1 hackwrench_pure_occ 4 | ./eval/3_factor_analysis.py aws tpcc 2 hackwrench_occ_non_caching 5 | ./eval/3_factor_analysis.py aws tpcc 3 hackwrench_occ 6 | ./eval/3_factor_analysis.py aws tpcc 4 hackwrench_batch_abort_committed 7 | ./eval/3_factor_analysis.py aws tpcc 5 hackwrench_batch_abort 8 | ./eval/3_factor_analysis.py aws tpcc 6 hackwrench_batch_abort_0 9 | ./eval/3_factor_analysis.py aws tpcc 7 hackwrench_ts 10 | ./eval/3_factor_analysis.py aws tpcc 8 hackwrench 11 | ./eval/3_factor_analysis.py aws tpcc 9 hackwrench_normal 12 | ./eval/3_factor_analysis.py aws tpcc 10 hackwrench_fast 13 | ./eval/3_factor_analysis.py aws tpcc 11 hackwrench_normal_to 14 | ./eval/3_factor_analysis.py aws tpcc 12 hackwrench_fast_to 15 | 16 | -------------------------------------------------------------------------------- /src/util/barrier.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | /** 7 | * a simple wrapper over pthread 8 | */ 9 | class Barrier { 10 | public: 11 | explicit Barrier(uint32_t num) : wait_num_(num) { 12 | pthread_barrier_init(&barrier_, nullptr, num); 13 | } 14 | 15 | ~Barrier() { pthread_barrier_destroy(&barrier_); } 16 | 17 | void wait() { 18 | wait_num_ -= 1; 19 | pthread_barrier_wait(&barrier_); 20 | } 21 | 22 | void done() { wait_num_ -= 1; } 23 | 24 | bool ready() const { return wait_num_ == 0; } 25 | 26 | uint32_t wait_num() const { return wait_num_; } 27 | 28 | private: 29 | pthread_barrier_t barrier_; 30 | std::atomic wait_num_; 31 | 32 | private: 33 | Barrier(const Barrier &) = delete; 34 | Barrier &operator=(const Barrier &) = delete; 35 | }; 36 | -------------------------------------------------------------------------------- /scripts/fine_coarse_occ_cmp.sh: -------------------------------------------------------------------------------- 1 | contention_choice=(0) 2 | rl_data=`./eval/1_tpcc_tput_lat.py aws tpcc 0 hackwrench_fine_occ | grep -A 2 parameter | awk '/parameter/{getline a; print a}' ` 3 | sl_data=`./eval/1_tpcc_tput_lat.py aws tpcc 0 hackwrench_occ | grep -A 2 parameter | awk '/parameter/{getline a; print a}' ` 4 | rl_arr=(`echo $rl_data | tr "," "\n"`) 5 | sl_arr=(`echo $sl_data | tr "," "\n"`) 6 | index=1 7 | echo -e 'ContentionFactor \t rl_tput \t sl_tput \t rl_remote_abort_ratio \t sl_remote_abort_ratio \t rl_local_abort_ratio \t sl_local_abort_ratio' > ../plot/data/record-vs-segment-lock-occ.data 8 | for contention in ${contention_choice[@]}; 9 | do 10 | result=$contention" \t "${rl_arr[$index]}" \t "${sl_arr[$index]}" \t "${rl_arr[(($index+1))]}" \t "${sl_arr[(($index+1))]}" \t "${rl_arr[(($index+2))]}" \t "${sl_arr[(($index+2))]} 11 | ((index+=11)); 12 | echo -e $result >> ../plot/data/record-vs-segment-lock-occ.data 13 | done 14 | -------------------------------------------------------------------------------- /src/benchmarks/interface.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "txn/db_txn.h" 4 | 5 | class DatabaseNode; 6 | class InMemSegments; 7 | class BenchmarkInterface { 8 | public: 9 | BenchmarkInterface() {} 10 | 11 | virtual ~BenchmarkInterface(){}; 12 | virtual uint32_t get_num_clients() = 0; 13 | 14 | virtual uint64_t get_time_duration() = 0; 15 | 16 | virtual void init_database(DatabaseNode &db_node) = 0; 17 | 18 | virtual void init_storage(InMemSegments &segments) = 0; 19 | 20 | virtual DbTxn::txn_logic_t get_txn_logic(uint32_t txn_seed = 0) = 0; 21 | virtual DbTxn::partition_logic_func get_partition_logic() = 0; 22 | 23 | virtual void *get_input(DbClient *c) = 0; 24 | virtual uint32_t get_input_size(uint32_t txn_seed) = 0; 25 | 26 | virtual uint64_t check_correctness_map(InMemSegments &) = 0; 27 | virtual void check_correctness_reduce(std::vector &) = 0; 28 | 29 | virtual GlobalPageId get_g_page_id(void *input, uint i) = 0; 30 | 31 | // debug only 32 | virtual void init_page_manager(PageManager &page_manager) = 0; 33 | virtual void print_debug_msg(){}; 34 | }; -------------------------------------------------------------------------------- /dataflow_api/src/benchmark/tpcc/tpcc.cc: -------------------------------------------------------------------------------- 1 | #include "tpcc.hpp" 2 | 3 | #include "api/txn.hpp" 4 | #include "type/input.hpp" 5 | 6 | void do_bench(int argc, char **argv) { 7 | DatabaseSchema db_schema = init_schema(); 8 | 9 | Txn new_order_txn(db_schema); 10 | { 11 | new_order_input(new_order_txn); 12 | new_order_graph(new_order_txn); 13 | } 14 | 15 | Txn payment_txn(db_schema); 16 | { 17 | payment_input(payment_txn); 18 | payment_graph(payment_txn); 19 | } 20 | 21 | Txn delivery_txn(db_schema); 22 | { 23 | delivery_input(delivery_txn); 24 | delivery_graph(delivery_txn); 25 | } 26 | 27 | Txn order_status_txn(db_schema); 28 | { 29 | order_status_input(order_status_txn); 30 | order_status_graph(order_status_txn); 31 | } 32 | 33 | Txn stock_level_txn(db_schema); 34 | { 35 | stock_level_input(stock_level_txn); 36 | stock_level_graph(stock_level_txn); 37 | } 38 | 39 | new_order_txn.print_graph("new_order.dot"); 40 | payment_txn.print_graph("payment.dot"); 41 | delivery_txn.print_graph("delivery.dot"); 42 | order_status_txn.print_graph("order_status.dot"); 43 | stock_level_txn.print_graph("stock_level.dot"); 44 | } -------------------------------------------------------------------------------- /src/util/memory_arena.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | #include "servers/config.h" 5 | #include "util/utils.h" 6 | 7 | template 8 | class MemoryArena { 9 | static const size_t object_size = round_up_mult_of_4(sizeof(T)); 10 | 11 | private: 12 | char arena[ArenaSize * object_size]; 13 | size_t next = 0; 14 | 15 | public: 16 | MemoryArena() {} 17 | inline T *alloc() { 18 | ASSERT(next < ArenaSize); 19 | T *p = (T *)&arena[(next++) * object_size]; 20 | p = new (p) T(); 21 | return p; 22 | } 23 | inline void clear() { next = 0; }; 24 | }; 25 | 26 | template 27 | class DynamicArena { 28 | private: 29 | std::queue arenas[16]; 30 | 31 | public: 32 | T *alloc(thread_id_t thread_id) { 33 | std::queue &arena = arenas[thread_id]; 34 | if (arena.empty()) { 35 | T *p = new T; 36 | memset(p, 0, sizeof(*p)); 37 | return p; 38 | } else { 39 | T *p = arena.front(); 40 | arena.pop(); 41 | return p; 42 | } 43 | } 44 | 45 | void free(T *p, thread_id_t thread_id) { 46 | std::queue &arena = arenas[thread_id]; 47 | arena.push(p); 48 | } 49 | }; -------------------------------------------------------------------------------- /dataflow_api/src/type/row.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "graph/node.hpp" 4 | 5 | class Row { 6 | public: 7 | Row(Node *node, TableSchema &tbl_schema) : node(node), tbl_schema(tbl_schema) {} 8 | 9 | Node *getNode() { return node; } 10 | 11 | Value getColumn(const ColumnName_t &col_name) { 12 | BuiltInType btype = tbl_schema.getBuiltInType(col_name); 13 | return Value(btype, {node}, false); 14 | } 15 | 16 | Value isFound() { 17 | return Value{BuiltInType::BOOL, {node}, false}; 18 | } 19 | 20 | void setColumn(const ColumnName_t &col_name, Value &value) { 21 | std::vector &others = value.getDeps(); 22 | std::vector &mine = col_deps[col_name]; 23 | mine.clear(); 24 | for (Node *other : others) { 25 | if (node && node == other) { 26 | continue; 27 | } 28 | mine.push_back(other); 29 | } 30 | } 31 | 32 | void assignDepsTo(Node *n) { 33 | n->addDep(node, Node::ValueDep); 34 | for (auto &pair : col_deps) { 35 | n->addDeps(pair.second, Node::ValueDep); 36 | } 37 | } 38 | 39 | private: 40 | Node *node = nullptr; 41 | std::map> col_deps; 42 | TableSchema &tbl_schema; 43 | }; -------------------------------------------------------------------------------- /src/util/exceptions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | struct TxnRepairException : public std::exception { 5 | const char *what() const throw() { return "Transaction Repair Exception"; } 6 | }; 7 | 8 | struct TxnAbortException : public TxnRepairException { 9 | const char *what() const throw() { return "The txn has aborted"; } 10 | }; 11 | 12 | struct TxnRangeLockAbortException : public TxnAbortException { 13 | const char *what() const throw() { return "The record written is protected by range lock"; } 14 | }; 15 | 16 | struct WorkloadException : public std::exception { 17 | const char *what() const throw() { return "Workload Exception"; } 18 | }; 19 | 20 | struct WorkloadNoClientException : public WorkloadException { 21 | const char *what() const throw() { return "There is no client left"; } 22 | }; 23 | 24 | struct TxnNoTsException : public std::exception { 25 | const char *what() const throw() { return "txn doesn't have timestamp yet"; } 26 | }; 27 | 28 | struct BatchLockException : public TxnRepairException { 29 | BatchLockException(batch_id_t b_id) : b_id(b_id) {} 30 | batch_id_t b_id; 31 | 32 | const char *what() const throw() { return "Batch lock fails"; } 33 | }; 34 | 35 | struct RecordLockException : public TxnRepairException { 36 | const char *what() const throw() { return "Record lock fails"; } 37 | }; -------------------------------------------------------------------------------- /dataflow_api/src/benchmark/tpcc/txn_order_status.cc: -------------------------------------------------------------------------------- 1 | #include "api/txn.hpp" 2 | #include "tpcc.hpp" 3 | #include "type/input.hpp" 4 | 5 | void order_status_input(Txn &txn) { 6 | Input &input = txn.getInput(); 7 | input.add("W_ID", BuiltInType::INT); 8 | input.add("D_ID", BuiltInType::INT); 9 | input.add("C_ID", BuiltInType::INT); 10 | 11 | txn.setPartitionAffinity(input["W_ID"]); 12 | } 13 | 14 | void order_status_graph(Txn &txn) { 15 | Input &input = txn.getInput(); 16 | Value &w_id = input["W_ID"]; 17 | Value &d_id = input["D_ID"]; 18 | Value &c_id = input["C_ID"]; 19 | 20 | // Customer 21 | Row cust = txn.get(CUST, {w_id, d_id, c_id}); 22 | // skip some reading operations 23 | 24 | Row cust_index = txn.get(CUST_INDEX, {w_id, d_id, c_id}); 25 | Value c_o_id = cust_index.getColumn(CI_LAST_ORDER); 26 | 27 | // Order 28 | Row order = txn.get(ORDR, {w_id, d_id, c_o_id}); 29 | Value o_ol_count = order.getColumn(O_OL_COUNT); 30 | 31 | auto iter_logic = [&w_id, &d_id, &c_o_id](Txn &txn, Input &loop_input, Value &loop_num) { 32 | // Order Line 33 | Row order_line = txn.get(ORLI, {w_id, d_id, c_o_id, loop_num}); 34 | 35 | Values res; 36 | return res; 37 | }; 38 | 39 | Input empty_input; 40 | Values resArray = txn.map(iter_logic, empty_input, o_ol_count); 41 | txn.commit(); 42 | } 43 | -------------------------------------------------------------------------------- /src/util/utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #define likely(x) __builtin_expect(!!(x), 1) 8 | #define unlikely(x) __builtin_expect(!!(x), 0) 9 | 10 | inline bool ptr_aligned_to(void *ptr, uint64_t size) { return ((uint64_t)ptr) % size == 0; } 11 | 12 | inline constexpr size_t round_up_pow_of_2(size_t v) { 13 | v--; 14 | v |= v >> 1; 15 | v |= v >> 2; 16 | v |= v >> 4; 17 | v |= v >> 8; 18 | v |= v >> 16; 19 | v++; 20 | return v; 21 | } 22 | 23 | inline constexpr size_t round_up_mult_of_4(size_t v) { return (v + 3) & ~0x03; } 24 | 25 | template 26 | inline T round_up(const T &numToRound, const T &multiple) { 27 | T remainder = numToRound % multiple; 28 | if (remainder == 0) 29 | return numToRound; 30 | return numToRound + multiple - remainder; 31 | } 32 | 33 | template 34 | inline T min(T a, T b) { 35 | return a < b ? a : b; 36 | } 37 | 38 | template 39 | inline T max(T a, T b) { 40 | return a > b ? a : b; 41 | } 42 | 43 | void print_trace(void) { 44 | char **strings; 45 | size_t i, size; 46 | enum Constexpr { MAX_SIZE = 1024 }; 47 | void *array[MAX_SIZE]; 48 | size = backtrace(array, MAX_SIZE); 49 | strings = backtrace_symbols(array, size); 50 | for (i = 0; i < size; i++) 51 | printf("%s\n", strings[i]); 52 | puts(""); 53 | free(strings); 54 | } -------------------------------------------------------------------------------- /dataflow_api/src/type/input.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "value.hpp" 6 | 7 | class Values { 8 | public: 9 | Values() {} 10 | Values(const Values &other) { 11 | elems = other.elems; 12 | } 13 | 14 | void add(const std::string &name, BuiltInType btype) { elems[name] = Value(btype, name); } 15 | void add(const std::string &name, Value& value) { elems[name] = value; } 16 | Value &operator[](const std::string &name) { return elems[name]; } 17 | 18 | // For array of values 19 | Values reduce(const std::string &method_name) { 20 | Values res; 21 | for (auto& pair: elems) { 22 | res.add(pair.first, pair.second); 23 | } 24 | return res; 25 | } 26 | 27 | private: 28 | std::map elems; 29 | }; 30 | 31 | class Input { 32 | public: 33 | Input() {} 34 | Input(const Input &other) { 35 | values = other.values; 36 | ASSERT(other.array_elems.size() == 0); 37 | } 38 | 39 | void add(const std::string &name, BuiltInType btype) { values.add(name, btype); } 40 | void addArray(const std::string &name, Input &input) { 41 | array_elems[name] = std::move(input); 42 | } 43 | 44 | Value &operator[](const std::string &name) { return values[name]; } 45 | 46 | Input &getArray(const std::string &name) { return array_elems[name]; } 47 | 48 | private: 49 | Values values; 50 | std::map array_elems; 51 | }; 52 | -------------------------------------------------------------------------------- /src/index/interface.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "record_lock.h" 4 | #include "storage/multi_ver_record.h" 5 | // #include "index/scanlist.h" 6 | #include "storage/page.h" 7 | 8 | template 9 | class IndexInterface { 10 | typedef Key_t key_type; 11 | typedef Value_t value_type; 12 | typedef MultiVersionRecord record_type; 13 | 14 | protected: 15 | // RecordLockManager record_locks; 16 | 17 | public: 18 | virtual ~IndexInterface() {} 19 | 20 | virtual IndexInterface &get_leaf_node(const key_type &key) = 0; 21 | virtual std::vector *> get_leaf_nodes(const key_type &lo, 22 | const key_type &hi) = 0; 23 | 24 | virtual PageMeta *get_page_meta() = 0; 25 | 26 | virtual record_type *get(const key_type &key, uint8_t *data = nullptr) = 0; 27 | 28 | virtual record_type *insert(const key_type &key, const value_type &value, txn_id_t writer) = 0; 29 | 30 | virtual std::vector scan_oneshot(const key_type &lo, const key_type &hi, 31 | bool lo_inclusive, bool hi_inclusive, 32 | uint8_t *data = nullptr) = 0; 33 | 34 | uint32_t get_table_id() const { return table_id; } 35 | 36 | uint32_t set_table_id(uint32_t table_id) { return this->table_id = table_id; } 37 | 38 | virtual void print() {} 39 | 40 | uint32_t table_id; 41 | }; -------------------------------------------------------------------------------- /src/util/str.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | template 5 | class Str { 6 | public: 7 | Str(){}; 8 | Str(const char* src) { 9 | memcpy(data, src, N); 10 | } 11 | Str(const uint8_t* src) { 12 | memcpy(data, src, N); 13 | } 14 | 15 | Str& operator=(const char* src) { 16 | memcpy(data, src, N); 17 | return *this; 18 | } 19 | 20 | Str& operator=(const uint8_t* src) { 21 | memcpy(data, src, N); 22 | return *this; 23 | } 24 | 25 | bool operator<(const Str& other) const { 26 | return memcmp(data, other.data, N) < 0; 27 | } 28 | 29 | bool operator==(const Str& other) const { 30 | return memcmp(data, other.data, N) == 0; 31 | } 32 | 33 | private: 34 | uint8_t data[N]; 35 | }; 36 | 37 | template 38 | class varibale_str { 39 | public: 40 | uint8_t buf[N]; 41 | 42 | inline void assign(const char *s) { 43 | size_t n = strlen(s); 44 | memcpy(buf, s, n < N ? n : N); 45 | if (N > n) { 46 | memset(&buf[n], FILL_CHAR, N - n); 47 | } 48 | } 49 | 50 | inline void assign(const char *s, size_t n) { 51 | memcpy(buf, s, n < N ? n : N); 52 | if (N > n) { 53 | memset(&buf[n], FILL_CHAR, N - n); 54 | } 55 | } 56 | 57 | inline void assign(const std::string &s) { assign(s.data()); } 58 | 59 | inline bool operator==(const varibale_str &other) const { 60 | return memcmp(buf, other.buf, N) == 0; 61 | } 62 | 63 | inline bool operator!=(const varibale_str &other) const { 64 | return !operator==(other); 65 | } 66 | 67 | std::string to_string() { return std::string((char*)buf); } 68 | }; -------------------------------------------------------------------------------- /src/txn/log_sequence.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "redo_log.h" 7 | #include "util/types.h" 8 | 9 | class LogSequence { 10 | static const uint64_t APPLY_THRESHOLD = 100; 11 | // 12 | using log_seq_t = std::map>; 13 | 14 | private: 15 | log_seq_t logs; 16 | 17 | public: 18 | std::pair get_log_range( 19 | ts_t cts) { 20 | return {logs.upper_bound(cts), logs.end()}; 21 | } 22 | 23 | std::pair get_dep_log_range( 24 | ts_t begin_cts, ts_t end_cts) { 25 | auto begin_iter = logs.upper_bound(begin_cts); 26 | auto end_iter = logs.upper_bound(end_cts); 27 | return {begin_iter, end_iter}; 28 | } 29 | 30 | log_seq_t::iterator get_applyable_log_range() { 31 | auto iter = logs.begin(); 32 | while (iter != logs.end() && iter->second.first != nullptr) { 33 | iter++; 34 | } 35 | ASSERT(iter == logs.end()); 36 | return iter; 37 | } 38 | 39 | void erase_until(log_seq_t::iterator &end) { logs.erase(logs.begin(), end); } 40 | 41 | std::pair get_last_log_range( 42 | ts_t cts) { 43 | return {--logs.end(), logs.end()}; 44 | } 45 | 46 | void put_log(ts_t cts, log_t log, uint32_t log_size) { 47 | logs[cts] = std::make_pair(log, log_size); 48 | } 49 | 50 | uint64_t get_num_logs() { return logs.size(); } 51 | 52 | bool need_apply_bg() { return logs.size() > APPLY_THRESHOLD; } 53 | 54 | log_seq_t &get_logs() { return logs; } 55 | }; -------------------------------------------------------------------------------- /scripts/parse_log.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import functools 4 | def compareConfig(c1, c2): 5 | client1 = int(c1.split("-")[1].split("client")[0]) 6 | client2 = int(c2.split("-")[1].split("client")[0]) 7 | if client1 <= client2: 8 | return -1 9 | else : 10 | return 1 11 | 12 | if len(sys.argv) > 1: 13 | path = sys.argv[1] 14 | else: 15 | print("lacks log path!") 16 | 17 | configs = [] 18 | for config in os.listdir(path): 19 | configs.append(config) 20 | configs = sorted(configs, key=functools.cmp_to_key(compareConfig)) 21 | 22 | hashmap = {} 23 | namemap = {} 24 | files = [] 25 | for config in configs: 26 | files.append(os.path.join(path, config, "db.log")) 27 | 28 | max_i = 0 29 | i = 0 30 | for filename in files: 31 | file = open(filename) 32 | lines = file.readlines() 33 | for line in lines: 34 | if "timer.h:405" in line: 35 | elements = line.split(" ") 36 | if elements[1] in hashmap: 37 | max_i = max(max_i, int(elements[1])) 38 | hashmap[elements[1]][i] = [int(elements[3]), int(elements[4]), int(elements[5])] 39 | else: 40 | dictionary = dict() 41 | dictionary[i] = [int(elements[3]), int(elements[4]), int(elements[5])] 42 | hashmap[elements[1]] = dictionary 43 | namemap[elements[1]] = elements[2] 44 | i += 1 45 | 46 | for i in hashmap.keys(): 47 | key = namemap[i] 48 | dictionary = hashmap[i] 49 | s = "" 50 | for j in range(len(files)): 51 | if j in dictionary.keys(): 52 | s += " " + ' '.join(map(str, dictionary[j])) 53 | else: 54 | s += " 0 0 0" 55 | # for num in hashmap[i]: 56 | # s += " " + str(num) 57 | print(key, s) 58 | -------------------------------------------------------------------------------- /scripts/get_result_sn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import subprocess 4 | from subprocess import run 5 | import sys 6 | 7 | 8 | def grep_number(log_dir, c, name): 9 | x = run(f'cat {log_dir}/*-{c}clients/sn.log | grep "{name}"', shell=True, stdout=subprocess.PIPE) 10 | if x.stdout.decode('utf-8') == '': 11 | return -1 12 | output = x.stdout.decode('utf-8') 13 | output = output[output.find(']')+1:] 14 | return float(output.split(':')[1].split()[0]) 15 | 16 | def grep_batch_profile(log_dir, c, name): 17 | x = run(f'cat {log_dir}/*-{c}clients/sn.log | grep "{name}"', shell=True, stdout=subprocess.PIPE) 18 | if x.stdout.decode('utf-8') == '': 19 | return -1 20 | output = x.stdout.decode('utf-8') 21 | output = output[output.find(']')+1:] 22 | return float(output.split(f'{name}:')[1].split()[0]) 23 | 24 | 25 | 26 | def get_test_result(log_dir, output_file=None): 27 | output = run(f'ls {log_dir}', shell=True, stdout=subprocess.PIPE) 28 | clients = output.stdout.decode('utf-8').replace('clients', '').split() 29 | clients = sorted([int(i.split('-')[1]) for i in clients]) 30 | output_str = '' 31 | for t in clients: 32 | thpt = grep_number(log_dir, t, "SN throughput") 33 | if thpt == -1: 34 | continue 35 | output_str += (f'{t}, {thpt}\n') 36 | # print(f'{t}, {thpt}, {repair_ratio}, {batch_repair_ratio}, {abort_ratio}') 37 | # print(f'{t}, {thpt}, {abort_ratio}, {batch_repair_ratio}') 38 | # print(f'{t}, {thpt}') 39 | if output_file: 40 | with open(output_file, 'w') as f: 41 | f.write(output_str) 42 | print(output_str) 43 | 44 | if __name__ == '__main__': 45 | log_dir = sys.argv[1] 46 | output_file = None 47 | if len(sys.argv) > 2: 48 | output_file = sys.argv[2] 49 | get_test_result(log_dir, output_file) 50 | -------------------------------------------------------------------------------- /dataflow_api/src/benchmark/tpcc/txn_stock_level.cc: -------------------------------------------------------------------------------- 1 | #include "api/txn.hpp" 2 | #include "tpcc.hpp" 3 | #include "type/input.hpp" 4 | 5 | void stock_level_input(Txn &txn) { 6 | Input &input = txn.getInput(); 7 | input.add("W_ID", BuiltInType::INT); 8 | input.add("D_ID", BuiltInType::INT); 9 | 10 | txn.setPartitionAffinity(input["W_ID"]); 11 | } 12 | 13 | void stock_level_graph(Txn &txn) { 14 | Input &input = txn.getInput(); 15 | Value &w_id = input["W_ID"]; 16 | Value &d_id = input["D_ID"]; 17 | 18 | Row dist = txn.get(DIST, {w_id, d_id}); 19 | Value d_tax = dist.getColumn(D_TAX); 20 | Value d_next_o_id = dist.getColumn(D_NEXT_O_ID); 21 | 22 | auto iter1_logic = [&w_id, &d_id, &d_next_o_id](Txn &txn, Input &loop_input, Value &loop_num) { 23 | Value o_id = d_next_o_id.apply("Substract", {loop_num}); 24 | auto iter2_logic = [&w_id, &d_id, &o_id](Txn &txn, Input &loop_input, Value &loop_num) { 25 | Row order_line = txn.get(ORLI, {w_id, d_id, o_id, loop_num}); 26 | Value found = order_line.isFound(); 27 | 28 | Value count(BuiltInType::INT, "count"); 29 | txn.beginIf(found); 30 | { 31 | Value ol_i_id = order_line.getColumn(OL_I_ID); 32 | Row stoc = txn.get(STOC, {w_id, ol_i_id}); 33 | count.apply("Add", {stoc.getColumn(S_QUANTITY)}); 34 | } 35 | txn.endIf(); 36 | Values res; 37 | res.add("COUNT", count); 38 | return res; 39 | }; 40 | 41 | Value int_15(BuiltInType::INT, "15"); 42 | Input empty_input; 43 | Values resArray = txn.map(iter2_logic, empty_input, int_15); 44 | Values iter2_res = resArray.reduce("SUM"); 45 | Value count = iter2_res["COUNT"]; 46 | 47 | Values res; 48 | res.add("COUNT", count); 49 | return res; 50 | }; 51 | 52 | Input empty_input; 53 | Value int_20(BuiltInType::INT, "20"); 54 | Values resArray = txn.map(iter1_logic, empty_input, int_20); 55 | Values res = resArray.reduce("SUM"); 56 | Value total_count = res["COUNT"]; 57 | txn.commit(); 58 | } 59 | -------------------------------------------------------------------------------- /scripts/aws/servers-colocated.yaml: -------------------------------------------------------------------------------- 1 | ips: 2 | - 172.31.31.131 3 | - 172.31.21.82 4 | - 172.31.28.83 5 | - 172.31.31.198 6 | - 172.31.20.249 7 | - 172.31.26.205 8 | - 172.31.21.9 9 | - 172.31.21.146 10 | - 172.31.18.169 11 | - 172.31.27.6 12 | - 172.31.22.66 13 | - 172.31.31.131 14 | - 172.31.21.82 15 | - 172.31.28.83 16 | - 172.31.31.198 17 | - 172.31.20.249 18 | - 172.31.26.205 19 | - 172.31.21.9 20 | - 172.31.21.146 21 | - 172.31.18.169 22 | - 172.31.27.6 23 | # - 172.31.27.115 24 | # - 172.31.29.110 25 | # - 172.31.18.46 26 | # - 172.31.27.10 27 | # - 172.31.26.173 28 | # - 172.31.16.152 29 | # - 172.31.18.96 30 | # - 172.31.19.160 31 | # - 172.31.23.191 32 | # - 172.31.26.8 33 | # - 172.31.24.42 34 | # - 172.31.22.102 35 | # - 172.31.17.99 36 | # - 172.31.20.211 37 | # - 172.31.29.79 38 | # - 172.31.17.178 39 | # - 172.31.16.65 40 | # - 172.31.25.34 41 | # - 172.31.21.91 42 | # - 172.31.25.27 43 | # - 172.31.23.88 44 | # - 172.31.30.91 45 | # - 172.31.18.156 46 | # - 172.31.28.132 47 | # - 172.31.26.197 48 | # - 172.31.30.67 49 | # - 172.31.24.67 50 | # - 172.31.17.86 51 | # - 172.31.25.55 52 | # - 172.31.26.240 53 | # - 172.31.25.85 54 | # - 172.31.20.46 55 | # - 172.31.26.15 56 | # - 172.31.28.201 57 | # - 172.31.23.171 58 | type: 59 | - StorageNode 60 | - StorageNode 61 | - StorageNode 62 | - StorageNode 63 | - StorageNode 64 | - StorageNode 65 | - StorageNode 66 | - StorageNode 67 | - StorageNode 68 | - StorageNode 69 | # - StorageNode 70 | # - StorageNode 71 | # - StorageNode 72 | # - StorageNode 73 | # - StorageNode 74 | # - StorageNode 75 | # - StorageNode 76 | # - StorageNode 77 | # - StorageNode 78 | # - StorageNode 79 | # - StorageNode 80 | # - StorageNode 81 | # - StorageNode 82 | # - StorageNode 83 | # - StorageNode 84 | # - StorageNode 85 | # - StorageNode 86 | # - StorageNode 87 | # - StorageNode 88 | # - StorageNode 89 | - TimeServer 90 | - DataBase 91 | - DataBase 92 | - DataBase 93 | - DataBase 94 | - DataBase 95 | - DataBase 96 | - DataBase 97 | - DataBase 98 | - DataBase 99 | - DataBase 100 | # - DataBase 101 | # - DataBase 102 | # - DataBase 103 | # - DataBase 104 | # - DataBase -------------------------------------------------------------------------------- /dataflow_api/src/benchmark/tpcc/txn_payment.cc: -------------------------------------------------------------------------------- 1 | #include "tpcc.hpp" 2 | #include "api/txn.hpp" 3 | #include "type/input.hpp" 4 | 5 | void payment_input(Txn &txn) { 6 | Input &input = txn.getInput(); 7 | input.add("W_ID", BuiltInType::INT); 8 | input.add("D_ID", BuiltInType::INT); 9 | input.add("C_W_ID", BuiltInType::INT); 10 | input.add("C_D_ID", BuiltInType::INT); 11 | input.add("C_ID", BuiltInType::INT); 12 | input.add("H_AMOUNT", BuiltInType::FLOAT); 13 | 14 | txn.setPartitionAffinity(input["W_ID"]); 15 | } 16 | 17 | void payment_graph(Txn &txn) { 18 | Input &input = txn.getInput(); 19 | Value &w_id = input["W_ID"]; 20 | Value &d_id = input["D_ID"]; 21 | Value &c_w_id = input["C_W_ID"]; 22 | Value &c_d_id = input["C_D_ID"]; 23 | Value &c_id = input["C_ID"]; 24 | Value &h_amount = input["H_AMOUNT"]; 25 | 26 | // Warehouse 27 | Row ware = txn.get(WARE, {w_id}); 28 | Value w_ytd = ware.getColumn(W_YTD).apply("Add", {h_amount}); 29 | ware.setColumn(W_YTD, w_ytd); 30 | txn.put(WARE, {w_id}, ware); 31 | 32 | // District 33 | Row dist = txn.get(DIST, {w_id, d_id}); 34 | Value d_ytd = dist.getColumn(D_YTD).apply("Add", {h_amount}); 35 | dist.setColumn(D_YTD, d_ytd); 36 | txn.put(DIST, {w_id, d_id}, dist); 37 | 38 | // Customer 39 | Row cust = txn.get(CUST, {c_w_id, c_d_id, c_id}); 40 | Value c_balance = cust.getColumn(C_BALANCE).apply("Sub", {h_amount}); 41 | cust.setColumn(C_BALANCE, c_balance); 42 | Value c_ytd_payment = cust.getColumn(C_YTD_PAYMENT).apply("Add", {h_amount}); 43 | cust.setColumn(C_YTD_PAYMENT, c_ytd_payment); 44 | Value c_payment_cnt = cust.getColumn(C_PAYMENT_CNT).apply("Add", {}); 45 | cust.setColumn(C_PAYMENT_CNT, c_payment_cnt); 46 | Value c_credit = cust.getColumn(C_CREDIT).apply("STATIC_UPDATE", {}); 47 | cust.setColumn(C_CREDIT, c_credit); 48 | txn.put(CUST, {c_w_id, c_d_id, c_id}, cust); 49 | 50 | Row history = txn.alloc(HIST); 51 | history.setColumn(H_D_ID, d_id); 52 | history.setColumn(H_W_ID, w_id); 53 | history.setColumn(H_AMOUNT, h_amount); 54 | txn.put(HIST, {c_w_id, c_d_id, c_id, w_id, d_id}, history); 55 | 56 | txn.commit(); 57 | } 58 | -------------------------------------------------------------------------------- /src/util/statistic.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "timer.h" 4 | 5 | #include 6 | 7 | #define STATICS 0 8 | 9 | #if STATICS == 1 10 | // Performance counting stats 11 | // To be more self-contained 12 | inline __attribute__((always_inline)) uint64_t db_rdtsc(void) { 13 | uint32_t hi, lo; 14 | __asm volatile("rdtsc" : "=a"(lo), "=d"(hi)); 15 | return ((uint64_t)lo) | (((uint64_t)hi) << 32); 16 | } 17 | 18 | #define LAT_VARS(X) \ 19 | uint64_t _##X##_cycles_ = 0; \ 20 | uint64_t _pre_##X##_cycles_ = 0; \ 21 | uint64_t _##X##count_ = 0; \ 22 | uint64_t _pre_##X##count_ = 0; \ 23 | uint64_t _##X##start = 0; 24 | 25 | #define INIT_LAT_VARS(X) \ 26 | _##X##_cycles_ = 0, _pre_##X##_cycles_ = 0, _##X##count_ = 0, _pre_##X##count_ = 0; 27 | 28 | #define START(X) _##X##start = db_rdtsc(); 29 | 30 | #define END(X) \ 31 | if (_##X##start != 0) { \ 32 | _##X##_cycles_ += db_rdtsc() - _##X##start; \ 33 | _##X##count_ += 1; \ 34 | } 35 | 36 | #define REPORT(X) \ 37 | { \ 38 | auto counts = _##X##count_ - _pre_##X##count_; \ 39 | counts = counts == 0 ? 1 : counts; \ 40 | auto temp = _##X##_cycles_; \ 41 | auto total_time = (temp - _pre_##X##_cycles_) / CYCLES_PER_NS; \ 42 | auto latency = total_time / (double)counts; \ 43 | LOG(2) << "(" << #X << "):" << VAR2(latency, "ns,") << VAR2(counts, ",") \ 44 | << VAR2(total_time, "ns"); \ 45 | _pre_##X##count_ = _##X##count_; \ 46 | _pre_##X##_cycles_ = temp; \ 47 | } 48 | 49 | #else 50 | 51 | #define LAT_VARS(X) ; 52 | #define INIT_LAT_VARS(X) ; 53 | #define START(X) ; 54 | #define END(X) ; 55 | #define END_C(C, X) ; 56 | #define REPORT(X) ; 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /dataflow_api/src/type/value.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "graph/node.hpp" 15 | 16 | enum BuiltInType { INT, FLOAT, STRING, BOOL, NOPE }; 17 | 18 | static uint32_t g_value_id = 0; 19 | class Value { 20 | public: 21 | Value() : btype(BuiltInType::NOPE) {} 22 | Value(BuiltInType btype, const std::string &name) 23 | : value_id(++g_value_id), btype(btype), is_static(true) { 24 | deps.push_back(new InputNode(name)); 25 | } 26 | Value(BuiltInType btype, std::initializer_list &&nodes, bool is_static) 27 | : value_id(++g_value_id), btype(btype), deps(std::move(nodes)), is_static(is_static) {} 28 | Value(BuiltInType btype, const std::vector &nodes, bool is_static) 29 | : value_id(++g_value_id), btype(btype), deps(nodes), is_static(is_static) {} 30 | Value(const Value &other) : value_id(other.value_id), btype(other.btype), is_static(other.is_static) { 31 | deps.insert(deps.begin(), other.deps.begin(), other.deps.end()); 32 | } 33 | 34 | std::vector &getDeps() { return deps; } 35 | 36 | Value apply(const std::string &method_name, std::initializer_list &&_values) { 37 | std::vector values = std::move(_values); 38 | std::vector temp_deps; 39 | temp_deps.insert(temp_deps.begin(), deps.begin(), deps.end()); 40 | bool op_static = this->is_static; 41 | for (Value &value : values) { 42 | temp_deps.insert(temp_deps.begin(), value.deps.begin(), value.deps.end()); 43 | if (!value.is_static) { 44 | op_static = false; 45 | } 46 | } 47 | return Value(btype, std::move(temp_deps), false); 48 | } 49 | 50 | bool isStatic() const { return is_static; } 51 | 52 | uint32_t getId() const {return value_id; } 53 | 54 | private: 55 | uint32_t value_id; 56 | BuiltInType btype; 57 | bool is_static = false; 58 | std::vector deps; 59 | }; 60 | 61 | using PKey_t = std::vector; 62 | using TableName_t = std::string; 63 | using ColumnName_t = std::string; -------------------------------------------------------------------------------- /doc/hackwrench/build.md: -------------------------------------------------------------------------------- 1 | # How to Build Hackwrench 2 | 3 | We build and run Hackwrench with AWS EC2 instances. 4 | For simplicity, we have prepared a public AWS image with all environments and code prepared. 5 | The image is available at `xxx`. 6 | 7 | ## Step 1: Install the Environments 8 | 9 | ### Install [cap'n proto](https://capnproto.org/) 10 | ```bash 11 | curl -O https://capnproto.org/capnproto-c++-0.9.1.tar.gz 12 | tar zxf capnproto-c++-0.9.1.tar.gz 13 | cd capnproto-c++-0.9.1 14 | ./configure 15 | make -j6 check 16 | sudo make install 17 | ``` 18 | 19 | ### Install [cppzmq](https://github.com/zeromq/cppzmq) 20 | ```bash 21 | # install zeromq 22 | git clone https://github.com/zeromq/libzmq.git 23 | cd libzmq 24 | git reset --hard 81a8211e 25 | mkdir build && cd build 26 | cmake .. 27 | sudo make -j4 install 28 | 29 | # install cppzmq 30 | git clone https://github.com/zeromq/cppzmq.git 31 | cd cppzmq 32 | git checkout v4.7.0 33 | mkdir build && cd build 34 | cmake .. -DCPPZMQ_BUILD_TESTS=OFF 35 | sudo make -j4 install 36 | ``` 37 | 38 | ### Install [jemalloc](https://github.com/jemalloc) 39 | ``` 40 | git clone https://github.com/jemalloc/jemalloc.git 41 | cd jemalloc 42 | ./autogen.sh 43 | make dist 44 | make -j4 45 | sudo make install 46 | ``` 47 | 48 | ## Step 2: Compile Hackwrench 49 | 50 | In the `root` directory, run the following commands. Then the binaries of Hackwrench are compiled. 51 | 52 | ```bash 53 | ./scripts/gen_capnp_files.sh 54 | mkdir build && cd build 55 | cmake .. 56 | make -j4 57 | ``` 58 | 59 | -------------------------------------------------------------------------------- /scripts/eval/10_micro_cache_effect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import basic_run 3 | import sys 4 | from datetime import datetime 5 | from get_result import get_test_result 6 | 7 | def config_by_workload(workload): 8 | if workload == "ycsb10": 9 | basic_run.num_of_tables = 12000 10 | basic_run.num_of_clients = 2000 11 | else: 12 | assert False 13 | 14 | def run_one_line(data_dir, workload, option): 15 | trial_name = datetime.now().strftime("%m-%d-%H-%M-%S") 16 | choice = [0, 1, 10, 100, 1000, 10000, 100000, 1000000] 17 | for x in choice: 18 | basic_run.cache_miss_ratio = x 19 | 20 | config_by_workload(workload) 21 | basic_run.run_test( 22 | data_dir, 23 | workload, 24 | basic_run.num_of_tables, 25 | basic_run.num_of_clients, 26 | basic_run.contention_factor, 27 | basic_run.batch_size, 28 | basic_run.split_num, 29 | 8, 30 | basic_run.cache_miss_ratio, 31 | basic_run.sn_replication, 32 | f'{trial_name}/{basic_run.num_of_nodes["DataBase"]}nodes-{x}clients' 33 | ) 34 | 35 | result_of_line = get_test_result( 36 | f"{data_dir}/logs/{trial_name}", 37 | f"results/{choice}-{workload}-{option}.data", 38 | ) 39 | return result_of_line 40 | 41 | if __name__ == "__main__": 42 | data_dir = sys.argv[1] 43 | workload = sys.argv[2] 44 | option = int(sys.argv[3]) 45 | if len(sys.argv) == 5: 46 | basic_run.bin_file_name = sys.argv[4] 47 | 48 | basic_run.prepare_binaries(data_dir) 49 | basic_run.num_of_threads["StorageNode"] = 8 50 | basic_run.num_of_threads["DataBase"] = 8 51 | basic_run.sn_replication = True 52 | basic_run.num_of_nodes["DataBase"] = 8 53 | basic_run.num_of_nodes["StorageNode"] = 18 54 | basic_run.split_num = 160 55 | basic_run.batch_size = 160 56 | basic_run.contention_factor1 = 10 57 | 58 | choice = [10, 99] 59 | results = [] 60 | for x in choice: 61 | basic_run.contention_factor = x 62 | result = str(basic_run.run_one_line_wrapper(run_one_line, data_dir, workload, option)) 63 | results.append(result) 64 | 65 | lines = basic_run.get_res_lines(results) 66 | csv_res = basic_run.get_csv_res(choice, lines) 67 | basic_run.write_to(f'{data_dir}/results/{option}.csv', csv_res) 68 | -------------------------------------------------------------------------------- /src/util/fast_random.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | // not thread-safe 9 | // 10 | // taken from java: 11 | // http://developer.classpath.org/doc/java/util/Random-source.html 12 | class fast_random { 13 | public: 14 | fast_random(unsigned long seed) : seed(0) { set_seed0(seed); } 15 | // fast_random() : fast_random(time(NULL)) {} 16 | 17 | inline unsigned long next() { return ((unsigned long)next(32) << 32) + next(32); } 18 | 19 | inline uint32_t next_u32() { return next(32); } 20 | 21 | inline uint16_t next_u16() { return next(16); } 22 | 23 | // a random number form x (inclusive) to y (inclusive) 24 | inline uint32_t randint(uint32_t x, uint32_t y) { return next_u32() % (y - x + 1) + x; } 25 | // random for TPCC 26 | inline uint32_t NURand(int A, int C, int x, int y) { 27 | return (((randint(0, A) | randint(x, y)) + C) % (y - x + 1)) + x; 28 | } 29 | 30 | /** [0.0, 1.0) */ 31 | inline double next_uniform() { 32 | return (((unsigned long)next(26) << 27) + next(27)) / (double)(1L << 53); 33 | } 34 | 35 | inline char next_char() { return next(8) % 256; } 36 | 37 | inline char next_readable_char() { 38 | static const char readables[] = 39 | "0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"; 40 | return readables[next(6)]; 41 | } 42 | 43 | inline std::string next_string(size_t len) { 44 | std::string s(len, 0); 45 | for (size_t i = 0; i < len; i++) s[i] = next_char(); 46 | return s; 47 | } 48 | 49 | inline std::string next_readable_string(size_t len) { 50 | std::string s(len, 0); 51 | for (size_t i = 0; i < len; i++) s[i] = next_readable_char(); 52 | return s; 53 | } 54 | 55 | inline unsigned long get_seed() { return seed; } 56 | 57 | inline void set_seed(unsigned long seed) { this->seed = seed; } 58 | 59 | private: 60 | inline void set_seed0(unsigned long seed) { 61 | this->seed = (seed ^ 0x5DEECE66DL) & ((1L << 48) - 1); 62 | } 63 | 64 | inline unsigned long next(unsigned int bits) { 65 | seed = (seed * 0x5DEECE66DL + 0xBL) & ((1L << 48) - 1); 66 | return (unsigned long)(seed >> (48 - bits)); 67 | } 68 | 69 | unsigned long seed; 70 | }; 71 | 72 | static thread_local fast_random thread_rand(time(NULL)); -------------------------------------------------------------------------------- /src/storage/page.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /** 3 | * Page structure (Format) 4 | */ 5 | 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "util/types.h" 13 | #include "txn/page_snapshot.h" 14 | 15 | static const page_id_t INVALID_PAGE_ID = -1; 16 | struct PageMeta { 17 | GlobalPageId gp_id; 18 | page_size_t cur_page_size; 19 | page_size_t max_page_size; 20 | bool need_async_update = false; 21 | 22 | private: 23 | uint8_t *data = nullptr; 24 | 25 | public: 26 | std::mutex mutex; 27 | 28 | // TODO: split DbPageMeta and SnPageMeta 29 | void *reserved_ptr; // used for different usage on different type of server 30 | // LogSequence for storage node 31 | 32 | public: 33 | void set_data(uint8_t *d) { this->data = d; } 34 | 35 | inline uint8_t *get_data() const { return data; } 36 | 37 | void copy_data(const uint8_t *that, uint64_t len = PAGE_SIZE) { memcpy(this->data, that, len); } 38 | 39 | bool operator==(const PageMeta &other) { 40 | return (this->gp_id.g_page_id == other.gp_id.g_page_id) && 41 | (memcmp(this->data, other.data, PAGE_SIZE) == 0); 42 | } 43 | 44 | static uint8_t *new_data() { return new uint8_t[PAGE_SIZE]; } 45 | 46 | // the first 64 bits of data is global timestamp, refer to multi_ver_record.h 47 | inline static ts_t get_data_ts(uint8_t *data) { return *((ts_t *)data); } 48 | inline static void set_data_ts(uint8_t *data, ts_t ts) { *((ts_t *)data) = ts; } 49 | inline static uint32_t get_value_size(uint8_t *data) { return *((ts_t *)(data+sizeof(ts_t))); } 50 | inline static void set_value_size(uint8_t *data, uint32_t value_size) { 51 | *((ts_t *)(data+sizeof(ts_t))) = value_size; 52 | } 53 | inline ts_t get_cts() const { return get_data_ts(data); } 54 | inline ts_t get_value_size() const { return get_value_size(data); } 55 | inline void set_cts(ts_t ts) { 56 | ts = std::max(ts, get_data_ts(data)); 57 | set_data_ts(data, ts); 58 | } 59 | 60 | inline offset_t get_offset(void* ptr) { 61 | return reinterpret_cast(ptr) - reinterpret_cast(data); 62 | } 63 | 64 | void require_async_update() { 65 | need_async_update = true; 66 | } 67 | 68 | ~PageMeta() { 69 | if (data) 70 | delete[] data; 71 | } 72 | }; 73 | -------------------------------------------------------------------------------- /scripts/eval/7_micro_contention.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import basic_run 3 | import sys 4 | from datetime import datetime 5 | from get_result import get_test_result 6 | 7 | def config_by_workload(workload): 8 | if workload == "ycsb10": 9 | basic_run.num_of_tables = 12000 10 | basic_run.num_of_clients = 2000 11 | else: 12 | assert False 13 | 14 | def run_one_line(data_dir, workload, option): 15 | trial_name = datetime.now().strftime("%m-%d-%H-%M-%S") 16 | choice = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 17 | basic_run.cache_miss_ratio = 0 18 | for x in choice: 19 | # basic_run.cache_miss_ratio = x 20 | basic_run.contention_factor1 = x 21 | config_by_workload(workload) 22 | basic_run.run_test( 23 | data_dir, 24 | workload, 25 | basic_run.num_of_tables, 26 | basic_run.num_of_clients, 27 | basic_run.contention_factor, 28 | basic_run.batch_size, 29 | basic_run.split_num, 30 | 8, 31 | basic_run.cache_miss_ratio, 32 | basic_run.sn_replication, 33 | f'{trial_name}/{basic_run.num_of_nodes["DataBase"]}nodes-{x}clients' 34 | ) 35 | 36 | result_of_line = get_test_result( 37 | f"{data_dir}/logs/{trial_name}", 38 | f"results/{choice}-{workload}-{option}.data", 39 | ) 40 | return result_of_line 41 | 42 | if __name__ == "__main__": 43 | data_dir = sys.argv[1] 44 | workload = sys.argv[2] 45 | option = int(sys.argv[3]) 46 | if len(sys.argv) == 5: 47 | basic_run.bin_file_name = sys.argv[4] 48 | 49 | basic_run.prepare_binaries(data_dir) 50 | 51 | basic_run.num_of_threads["StorageNode"] = 8 52 | basic_run.num_of_threads["DataBase"] = 8 53 | basic_run.sn_replication = True 54 | basic_run.num_of_nodes["DataBase"] = 8 55 | basic_run.num_of_nodes["StorageNode"] = 18 56 | basic_run.split_num = 160 57 | basic_run.batch_size = 160 58 | 59 | choice = [10, 99] 60 | results = [] 61 | for x in choice: 62 | basic_run.contention_factor = x 63 | result = str(basic_run.run_one_line_wrapper(run_one_line, data_dir, workload, option)) 64 | results.append(result) 65 | 66 | lines = basic_run.get_res_lines(results) 67 | csv_res = basic_run.get_csv_res(choice, lines) 68 | basic_run.write_to(f'{data_dir}/results/{option}.csv', csv_res) 69 | 70 | 71 | -------------------------------------------------------------------------------- /src/storage/sequence_manager.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #if defined(TOTAL_ORDER_TS) || defined(DETERMINISTIC_VALIDATION) 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "txn_info.h" 10 | 11 | class SequenceManager { 12 | public: 13 | std::atomic seq; 14 | BatchThreadSafeMap waiting_txns; 15 | 16 | SequenceManager(thread_id_t num_threads) : waiting_txns(num_threads) { 17 | seq.store(0); 18 | } 19 | 20 | bool check_for_noblock(TxnInfo* txn_info) { 21 | bool success; 22 | uint64_t batch_seq = txn_info->seq; 23 | uint64_t cur_seq = seq.load(); 24 | // LOG(2) << "ready_to_prepare: " << std::hex << txn_info << " " << txn_info->txn_id << " " << txn_info->seq << " " << batch_seq << " " << cur_seq; 25 | if (cur_seq == batch_seq) { 26 | // LOG(2) << "ready_to_prepare1: " << std::hex << txn_info << " " << txn_info->txn_id << " " << batch_seq << " " << cur_seq; 27 | success = true; 28 | } else { 29 | txn_info->num_blocked_segs.fetch_add(1); 30 | waiting_txns.put(batch_seq, txn_info); 31 | success = false; 32 | if (seq.load() == batch_seq && waiting_txns.try_get_then_erase(batch_seq, txn_info)) { 33 | uint64_t o = txn_info->num_blocked_segs.fetch_sub(1); 34 | ASSERT(o == 1); 35 | success = true; 36 | // LOG(2) << "ready_to_prepare2: " << std::hex << txn_info << " " << txn_info->txn_id << " " << batch_seq << " " << cur_seq; 37 | } 38 | } 39 | return success; 40 | } 41 | 42 | TxnInfo* unblock(TxnInfo* txn_info) { 43 | TxnInfo* ret = nullptr; 44 | uint64_t batch_seq = txn_info->seq; 45 | uint64_t cur_seq = seq.fetch_add(1); 46 | ASSERT(cur_seq == batch_seq) << std::hex << cur_seq << " " << batch_seq << " " << txn_info->seq << " " << txn_info->txn_id << " " << txn_info; 47 | TxnInfo* info = nullptr; 48 | ++batch_seq; 49 | bool get_success = waiting_txns.try_get_then_erase(batch_seq, info); 50 | if (get_success) { 51 | // LOG(2) << "unblock: " << std::hex << info << " " << batch_seq << " " << info->txn_id << " success " << txn_info << " " << txn_info->txn_id; 52 | ret = info; 53 | waiting_txns.erase(batch_seq); 54 | } 55 | return ret; 56 | } 57 | 58 | }; 59 | 60 | #endif -------------------------------------------------------------------------------- /scripts/eval/7_micro_contention_sundial.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import basic_run 3 | import sys 4 | from datetime import datetime 5 | from get_result import get_test_result 6 | 7 | def config_by_workload(workload): 8 | if workload == "ycsb10": 9 | basic_run.num_of_tables = 12000 10 | basic_run.num_of_clients = 2000 11 | if basic_run.bin_file_name == "hackwrench_occ": 12 | basic_run.num_of_clients = 500 13 | else: 14 | assert False 15 | 16 | def run_one_line(data_dir, workload, option): 17 | trial_name = datetime.now().strftime("%m-%d-%H-%M-%S") 18 | choice = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 19 | for x in choice: 20 | basic_run.contention_factor1 = x 21 | config_by_workload(workload) 22 | basic_run.run_test( 23 | data_dir, 24 | workload, 25 | basic_run.num_of_tables, 26 | basic_run.num_of_clients, 27 | basic_run.contention_factor, 28 | basic_run.batch_size, 29 | basic_run.split_num, 30 | 8, 31 | basic_run.cache_miss_ratio, 32 | basic_run.sn_replication, 33 | f'{trial_name}/{basic_run.num_of_nodes["DataBase"]}nodes-{x}clients' 34 | ) 35 | 36 | result_of_line = get_test_result( 37 | f"{data_dir}/logs/{trial_name}", 38 | f"results/{choice}-{workload}-{option}.data", 39 | ) 40 | return result_of_line 41 | 42 | if __name__ == "__main__": 43 | data_dir = sys.argv[1] 44 | workload = sys.argv[2] 45 | option = int(sys.argv[3]) 46 | if len(sys.argv) == 5: 47 | basic_run.bin_file_name = sys.argv[4] 48 | 49 | basic_run.prepare_binaries(data_dir) 50 | 51 | basic_run.num_of_threads["StorageNode"] = 8 52 | basic_run.num_of_threads["DataBase"] = 8 53 | basic_run.sn_replication = False 54 | basic_run.num_of_nodes["DataBase"] = 8 55 | basic_run.num_of_nodes["StorageNode"] = 6 56 | basic_run.split_num = 160 57 | basic_run.batch_size = 160 58 | 59 | choice = [10, 90, 99] 60 | results = [] 61 | for x in choice: 62 | basic_run.contention_factor = x 63 | result = str(basic_run.run_one_line_wrapper(run_one_line, data_dir, workload, option)) 64 | results.append(result) 65 | 66 | lines = basic_run.get_res_lines(results) 67 | csv_res = basic_run.get_csv_res(choice, lines) 68 | basic_run.write_to(f'{data_dir}/results/{option}.csv', csv_res) 69 | 70 | 71 | -------------------------------------------------------------------------------- /scripts/eval/2_tpcc_lat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import basic_run 3 | import sys 4 | from datetime import datetime 5 | from get_result import get_test_result 6 | 7 | def config_by_workload(workload): 8 | if workload == "tpcc": 9 | basic_run.num_of_warehouses = basic_run.num_of_nodes["DataBase"] * 8 10 | basic_run.num_of_tables = basic_run.num_tables_per_warehouse * basic_run.num_of_warehouses 11 | else: 12 | assert False 13 | 14 | def run_one_line(data_dir, workload, option): 15 | trial_name = datetime.now().strftime("%m-%d-%H-%M-%S") 16 | num_clients = 8 17 | if num_clients < basic_run.batch_size: 18 | num_clients = basic_run.batch_size 19 | choice = [num_clients] 20 | for x in choice: 21 | basic_run.num_of_clients = x 22 | 23 | config_by_workload(workload) 24 | basic_run.run_test( 25 | data_dir, 26 | workload, 27 | basic_run.num_of_tables, 28 | basic_run.num_of_clients, 29 | basic_run.contention_factor, 30 | basic_run.batch_size, 31 | basic_run.split_num, 32 | 8, 33 | basic_run.cache_miss_ratio, 34 | basic_run.sn_replication, 35 | f'{trial_name}/{basic_run.num_of_nodes["DataBase"]}nodes-{x}clients' 36 | ) 37 | 38 | result_of_line = get_test_result( 39 | f"{data_dir}/logs/{trial_name}", 40 | f"results/{choice}-{workload}-{option}.data", 41 | ) 42 | return result_of_line 43 | 44 | if __name__ == "__main__": 45 | data_dir = sys.argv[1] 46 | workload = sys.argv[2] 47 | option = int(sys.argv[3]) 48 | if len(sys.argv) == 5: 49 | basic_run.bin_file_name = sys.argv[4] 50 | 51 | basic_run.prepare_binaries(data_dir) 52 | basic_run.num_of_nodes["DataBase"] = 6 53 | basic_run.num_of_nodes["StorageNode"] = 12 54 | basic_run.num_of_threads["StorageNode"] = 8 55 | basic_run.num_of_threads["DataBase"] = 8 56 | basic_run.contention_factor = 10 57 | basic_run.sn_replication = True 58 | basic_run.split_num = 1 59 | basic_run.batch_size = 1 60 | 61 | if option > 10: 62 | basic_run.split_num = 50 63 | basic_run.batch_size = 50 64 | 65 | choice = [0] 66 | results = [] 67 | for x in choice: 68 | result = str(basic_run.run_one_line_wrapper(run_one_line, data_dir, workload, option)) 69 | results.append(result) 70 | 71 | lines = basic_run.get_res_lines(results) 72 | csv_res = basic_run.get_csv_res(choice, lines) 73 | basic_run.write_to(f'{data_dir}/results/{option}.csv', csv_res) 74 | 75 | -------------------------------------------------------------------------------- /scripts/eval/1_tpcc_tput_lat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import basic_run 3 | import sys 4 | from datetime import datetime 5 | from get_result import get_test_result 6 | 7 | def config_by_workload(workload): 8 | if basic_run.contention_factor == 0: 9 | basic_run.contention_factor1 = 0 10 | else: 11 | basic_run.contention_factor1 = 15 12 | if workload == "tpcc": 13 | if "hackwrench_fast" in basic_run.bin_file_name: 14 | basic_run.num_of_clients = 600 15 | else: 16 | basic_run.num_of_clients = 300 17 | basic_run.num_of_warehouses = basic_run.num_of_nodes["DataBase"] * 8 18 | basic_run.num_of_tables = basic_run.num_tables_per_warehouse * basic_run.num_of_warehouses 19 | else: 20 | assert False 21 | 22 | def run_one_line(data_dir, workload, option): 23 | trial_name = datetime.now().strftime("%m-%d-%H-%M-%S") 24 | choice = [0,10,20,30,50,70,90,100, 150, 200] 25 | for x in choice: 26 | basic_run.contention_factor = x 27 | config_by_workload(workload) 28 | basic_run.run_test( 29 | data_dir, 30 | workload, 31 | basic_run.num_of_tables, 32 | basic_run.num_of_clients, 33 | basic_run.contention_factor, 34 | basic_run.batch_size, 35 | basic_run.split_num, 36 | 8, 37 | basic_run.cache_miss_ratio, 38 | basic_run.sn_replication, 39 | f'{trial_name}/{basic_run.num_of_nodes["DataBase"]}nodes-{x}clients' 40 | ) 41 | 42 | result_of_line = get_test_result( 43 | f"{data_dir}/logs/{trial_name}", 44 | f"results/{choice}-{workload}-{option}.data", 45 | ) 46 | return result_of_line 47 | 48 | if __name__ == "__main__": 49 | data_dir = sys.argv[1] 50 | workload = sys.argv[2] 51 | option = int(sys.argv[3]) 52 | if len(sys.argv) == 5: 53 | basic_run.bin_file_name = sys.argv[4] 54 | 55 | basic_run.prepare_binaries(data_dir) 56 | basic_run.num_of_nodes["DataBase"] = 6 57 | basic_run.num_of_nodes["StorageNode"] = 12 58 | basic_run.num_of_threads["StorageNode"] = 8 59 | basic_run.num_of_threads["DataBase"] = 8 60 | basic_run.sn_replication = True 61 | 62 | choice = [50] 63 | results = [] 64 | for x in choice: 65 | basic_run.split_num = x 66 | basic_run.batch_size = x 67 | result = str(basic_run.run_one_line_wrapper(run_one_line, data_dir, workload, option)) 68 | results.append(result) 69 | 70 | lines = basic_run.get_res_lines(results) 71 | csv_res = basic_run.get_csv_res(choice, lines) 72 | basic_run.write_to(f'{data_dir}/results/{option}.csv', csv_res) 73 | 74 | 75 | -------------------------------------------------------------------------------- /scripts/eval/0_tput_lat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import basic_run 3 | import sys 4 | from datetime import datetime 5 | from get_result import get_test_result 6 | 7 | def config_by_workload(workload): 8 | if workload == "ycsb10": 9 | basic_run.num_of_tables = 12000 10 | elif workload == "tpcc": 11 | if basic_run.contention_factor == 0: 12 | basic_run.contention_factor1 = 0 13 | else: 14 | basic_run.contention_factor1 = 15 15 | 16 | if basic_run.bin_file_name == "hackwrench_fast": 17 | basic_run.num_of_clients = 600 18 | else: 19 | basic_run.num_of_clients = 100 20 | basic_run.num_of_warehouses = basic_run.num_of_nodes["DataBase"] * 8 21 | basic_run.num_of_tables = basic_run.num_tables_per_warehouse * basic_run.num_of_warehouses 22 | else: 23 | assert False 24 | 25 | def run_one_line(data_dir, workload, option): 26 | trial_name = datetime.now().strftime("%m-%d-%H-%M-%S") 27 | choice = [10] 28 | for x in choice: 29 | basic_run.contention_factor = x 30 | config_by_workload(workload) 31 | basic_run.run_test( 32 | data_dir, 33 | workload, 34 | basic_run.num_of_tables, 35 | basic_run.num_of_clients, 36 | basic_run.contention_factor, 37 | basic_run.batch_size, 38 | basic_run.split_num, 39 | 8, 40 | basic_run.cache_miss_ratio, 41 | basic_run.sn_replication, 42 | f'{trial_name}/{basic_run.num_of_nodes["DataBase"]}nodes-{x}clients' 43 | ) 44 | 45 | result_of_line = get_test_result( 46 | f"{data_dir}/logs/{trial_name}", 47 | f"results/{choice}-{workload}-{option}.data", 48 | ) 49 | return result_of_line 50 | 51 | if __name__ == "__main__": 52 | data_dir = sys.argv[1] 53 | workload = sys.argv[2] 54 | option = int(sys.argv[3]) 55 | if len(sys.argv) == 5: 56 | basic_run.bin_file_name = sys.argv[4] 57 | 58 | basic_run.prepare_binaries(data_dir) 59 | basic_run.num_of_nodes["DataBase"] = 2 60 | basic_run.num_of_nodes["StorageNode"] = 2 61 | basic_run.num_of_threads["StorageNode"] = 8 62 | basic_run.num_of_threads["DataBase"] = 8 63 | basic_run.sn_replication = False 64 | 65 | choice = [100] 66 | results = [] 67 | for x in choice: 68 | basic_run.contention_factor1 = x 69 | result = str(basic_run.run_one_line_wrapper(run_one_line, data_dir, workload, option)) 70 | results.append(result) 71 | 72 | lines = basic_run.get_res_lines(results) 73 | csv_res = basic_run.get_csv_res(choice, lines) 74 | basic_run.write_to(f'{data_dir}/results/{option}.csv', csv_res) 75 | 76 | 77 | -------------------------------------------------------------------------------- /scripts/eval/8_micro_ts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import basic_run 3 | import sys 4 | from datetime import datetime 5 | from get_result import get_test_result 6 | 7 | def config_by_workload(workload): 8 | if workload == "ycsb10": 9 | basic_run.num_of_tables = 12000 10 | basic_run.num_of_clients = 1000 11 | if basic_run.bin_file_name == "hackwrench_occ": 12 | basic_run.num_of_clients = 500 13 | else: 14 | assert False 15 | 16 | def run_one_line(data_dir, workload, option): 17 | trial_name = datetime.now().strftime("%m-%d-%H-%M-%S") 18 | choice = [32] 19 | basic_run.contention_factor1 = 10 20 | basic_run.cache_miss_ratio = 0 21 | for x in choice: 22 | # basic_run.cache_miss_ratio = x 23 | # basic_run.contention_factor1 = x 24 | # basic_run.split_num = x 25 | # basic_run.batch_size = x 26 | basic_run.num_of_nodes["DataBase"] = x 27 | config_by_workload(workload) 28 | basic_run.run_test( 29 | data_dir, 30 | workload, 31 | basic_run.num_of_tables, 32 | basic_run.num_of_clients, 33 | basic_run.contention_factor, 34 | basic_run.batch_size, 35 | basic_run.split_num, 36 | basic_run.optional, 37 | basic_run.cache_miss_ratio, 38 | basic_run.sn_replication, 39 | f'{trial_name}/{basic_run.num_of_nodes["DataBase"]}nodes-{x}clients' 40 | ) 41 | 42 | result_of_line = get_test_result( 43 | f"{data_dir}/logs/{trial_name}", 44 | f"results/{choice}-{workload}-{option}.data", 45 | ) 46 | return result_of_line 47 | 48 | if __name__ == "__main__": 49 | data_dir = sys.argv[1] 50 | workload = sys.argv[2] 51 | option = int(sys.argv[3]) 52 | if len(sys.argv) == 5: 53 | basic_run.bin_file_name = sys.argv[4] 54 | 55 | basic_run.prepare_binaries(data_dir) 56 | 57 | basic_run.num_of_threads["StorageNode"] = 8 58 | basic_run.num_of_threads["DataBase"] = 8 59 | basic_run.num_of_threads["TimeServer"] = 8 60 | basic_run.sn_replication = False 61 | basic_run.num_of_nodes["DataBase"] = 48 62 | basic_run.num_of_nodes["StorageNode"] = 2 63 | basic_run.split_num = 1 64 | basic_run.batch_size = 1 65 | basic_run.contention_factor = 10 66 | 67 | choice = [40, 80, 160] 68 | results = [] 69 | for x in choice: 70 | basic_run.optional = x 71 | result = str(basic_run.run_one_line_wrapper(run_one_line, data_dir, workload, option)) 72 | results.append(result) 73 | 74 | lines = basic_run.get_res_lines(results) 75 | csv_res = basic_run.get_csv_res(choice, lines) 76 | basic_run.write_to(f'{data_dir}/results/{option}.csv', csv_res) 77 | 78 | 79 | -------------------------------------------------------------------------------- /src/txn/batch_manager.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "index/interface.h" 7 | #include "servers/config.h" 8 | #include "storage/multi_ver_record.h" 9 | #include "util/exceptions.h" 10 | #include "util/macros.h" 11 | #include "util/types.h" 12 | 13 | struct RedoLogKvKey { 14 | seg_id_t seg_id; // different tables might have same keys. 15 | Key_t key; 16 | }; 17 | 18 | bool operator==(const RedoLogKvKey &one, const RedoLogKvKey &other) { 19 | return (one.seg_id == other.seg_id) && (one.key == other.key); 20 | } 21 | 22 | struct RedoLogKvKeyHasher { 23 | std::size_t operator()(const RedoLogKvKey &k) const { 24 | using std::hash; 25 | using std::size_t; 26 | size_t k1 = hash()(k.seg_id); 27 | size_t k2 = hash()(k.key); 28 | // https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function 29 | return (k1 + k2) * (k1 + k2 + 1) / 2 + k2; 30 | } 31 | }; 32 | 33 | class BatchManager { 34 | public: 35 | 36 | inline void set_batch_id(batch_id_t batch_id) { this->batch_id = batch_id; } 37 | 38 | inline batch_id_t get_batch_id() { return batch_id; } 39 | 40 | template 41 | bool acquire_lock(IndexInterface &table, Key_t key) { 42 | auto *pair = table.get(key); 43 | RecordLock *rl = get_record_lock(pair); 44 | bool success = rl->lock(999999999999, 999999); 45 | if (!success) { 46 | return false; 47 | } 48 | ASSERT(success) << "batch record lock fail"; 49 | hold_locks.insert(rl); 50 | // LOG(3) << batch_id << " acquire " << key; 51 | return false; 52 | } 53 | 54 | bool acquire_lock(void *record) { 55 | RecordLock *rl = get_record_lock(record); 56 | bool success = rl->lock(999999999999, 999999); 57 | // ASSERT(success) << "batch record lock fail"; 58 | if (!success) { 59 | return false; 60 | } 61 | hold_locks.insert(rl); 62 | return true; 63 | } 64 | 65 | void release_locks() { 66 | for (RecordLock *rl : hold_locks) { 67 | // LOG(4) << batch_id << " release " << rl->key; 68 | rl->unlock(999999999999); 69 | #ifdef READ_COMMITTED 70 | rl->unlock_batch(999999); 71 | #endif 72 | } 73 | 74 | hold_locks.clear(); 75 | } 76 | 77 | struct RedoLogKvCache { 78 | uint8_t* val; 79 | version_ts_t version; 80 | }; 81 | 82 | std::mutex redo_log_mu; 83 | 84 | std::unordered_map redo_log_kv; 85 | 86 | batch_id_t batch_id; 87 | 88 | private: 89 | std::unordered_set hold_locks; 90 | }; -------------------------------------------------------------------------------- /dataflow_api/src/api/schema.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "type/value.hpp" 8 | 9 | using NameType = std::pair; 10 | 11 | class DatabaseSchema; 12 | class TableSchema { 13 | public: 14 | TableSchema() {} 15 | TableSchema(TableSchema &&other) { *this = std::move(other); } 16 | TableSchema &operator=(TableSchema &&other) { 17 | tbl_name = other.tbl_name; 18 | columns.swap(other.columns); 19 | pkey.swap(other.pkey); 20 | part_key = other.part_key; 21 | read_only = other.read_only; 22 | partitionable = other.partitionable; 23 | is_local = other.is_local; 24 | return *this; 25 | } 26 | TableSchema(TableName_t tbl_name, bool read_only, std::initializer_list &&cols, 27 | std::initializer_list &&pkey_il, ColumnName_t part_key) 28 | : tbl_name(tbl_name), pkey(std::move(pkey_il)), part_key(part_key), read_only(read_only) { 29 | for (const NameType &pair : cols) { 30 | columns[pair.first] = pair.second; 31 | } 32 | 33 | partitionable = !part_key.empty(); 34 | if (partitionable) { 35 | for (const ColumnName_t &name : pkey) { 36 | if (name == part_key) { 37 | break; 38 | } 39 | part_pkey_index += 1; 40 | } 41 | } 42 | } 43 | 44 | void access(bool local_op) { 45 | if (!local_op) { 46 | is_local = false; 47 | } 48 | } 49 | 50 | bool isPartitionable() { return partitionable; } 51 | 52 | bool isReadOnly() { return read_only; } 53 | 54 | bool isLocalTable() { return is_local; } 55 | 56 | uint32_t getPartPkeyIndex() { return part_pkey_index; } 57 | 58 | BuiltInType getBuiltInType(const ColumnName_t &col_name) { return columns.at(col_name); } 59 | 60 | TableName_t getTableName() const { return tbl_name; } 61 | 62 | private: 63 | TableName_t tbl_name; 64 | std::map columns; 65 | std::vector pkey; 66 | ColumnName_t part_key; // partition ley 67 | uint32_t part_pkey_index = 0; // the index of partition key in pkey 68 | bool read_only = false; 69 | bool partitionable = false; 70 | bool is_local = true; 71 | 72 | friend class DatabaseSchema; 73 | }; 74 | 75 | class DatabaseSchema { 76 | public: 77 | DatabaseSchema() {} 78 | 79 | void addTable(TableSchema &&tbl_schema) { tbls[tbl_schema.tbl_name] = std::move(tbl_schema); } 80 | 81 | TableSchema &getTable(TableName_t tbl_name) { return tbls[tbl_name]; } 82 | 83 | private: 84 | std::map tbls; 85 | }; -------------------------------------------------------------------------------- /scripts/eval/3_factor_analysis.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import basic_run 3 | import sys 4 | from datetime import datetime 5 | from get_result import get_test_result 6 | 7 | def config_by_workload(workload): 8 | if basic_run.contention_factor == 0: 9 | basic_run.contention_factor1 = 0 10 | else: 11 | basic_run.contention_factor1 = 15 12 | if workload == "tpcc": 13 | if "hackwrench_fast" in basic_run.bin_file_name: 14 | basic_run.num_of_clients = 600 15 | elif "hackwrench_pure_occ" in basic_run.bin_file_name: 16 | basic_run.num_of_clients = 8 17 | else: 18 | basic_run.num_of_clients = 300 19 | basic_run.num_of_warehouses = basic_run.num_of_nodes["DataBase"] * 8 20 | basic_run.num_of_tables = basic_run.num_tables_per_warehouse * basic_run.num_of_warehouses 21 | else: 22 | assert False 23 | 24 | def run_one_line(data_dir, workload, option): 25 | trial_name = datetime.now().strftime("%m-%d-%H-%M-%S") 26 | choice = [0, 10, 50, 100, 200] 27 | for x in choice: 28 | basic_run.contention_factor = x 29 | config_by_workload(workload) 30 | basic_run.run_test( 31 | data_dir, 32 | workload, 33 | basic_run.num_of_tables, 34 | basic_run.num_of_clients, 35 | basic_run.contention_factor, 36 | basic_run.batch_size, 37 | basic_run.split_num, 38 | 8, 39 | basic_run.cache_miss_ratio, 40 | basic_run.sn_replication, 41 | f'{trial_name}/{basic_run.num_of_nodes["DataBase"]}nodes-{x}clients' 42 | ) 43 | 44 | result_of_line = get_test_result( 45 | f"{data_dir}/logs/{trial_name}", 46 | f"results/{choice}-{workload}-{option}.data", 47 | ) 48 | return result_of_line 49 | 50 | if __name__ == "__main__": 51 | data_dir = sys.argv[1] 52 | workload = sys.argv[2] 53 | option = int(sys.argv[3]) 54 | if len(sys.argv) == 5: 55 | basic_run.bin_file_name = sys.argv[4] 56 | 57 | basic_run.prepare_binaries(data_dir) 58 | basic_run.num_of_nodes["DataBase"] = 6 59 | basic_run.num_of_nodes["StorageNode"] = 12 60 | basic_run.num_of_threads["StorageNode"] = 8 61 | basic_run.num_of_threads["DataBase"] = 8 62 | basic_run.sn_replication = True 63 | 64 | choice = [50] 65 | results = [] 66 | for x in choice: 67 | basic_run.split_num = x 68 | basic_run.batch_size = x 69 | result = str(basic_run.run_one_line_wrapper(run_one_line, data_dir, workload, option)) 70 | results.append(result) 71 | 72 | lines = basic_run.get_res_lines(results) 73 | csv_res = basic_run.get_csv_res(choice, lines) 74 | basic_run.write_to(f'{data_dir}/results/{option}.csv', csv_res) -------------------------------------------------------------------------------- /scripts/eval/0_motivation_ycsb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import basic_run 3 | import sys 4 | from datetime import datetime 5 | from get_result import get_test_result 6 | 7 | def config_by_workload(workload): 8 | if workload == "ycsb10": 9 | basic_run.num_of_tables = 12000 10 | basic_run.num_of_clients = 2000 11 | if "hackwrench_occ" in basic_run.bin_file_name: 12 | basic_run.num_of_clients = 500 13 | else: 14 | assert False 15 | 16 | def run_one_line(data_dir, workload, option): 17 | trial_name = datetime.now().strftime("%m-%d-%H-%M-%S") 18 | choice = [10,70,80,90,95,99] 19 | # choice = [70, 80, 90, 95, 99] 20 | choice = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 21 | choice = [160] 22 | basic_run.contention_factor1 = 100 23 | basic_run.cache_miss_ratio = 100 24 | for x in choice: 25 | # basic_run.cache_miss_ratio = x 26 | # basic_run.contention_factor1 = x 27 | basic_run.split_num = x 28 | basic_run.batch_size = x 29 | config_by_workload(workload) 30 | basic_run.run_test( 31 | data_dir, 32 | workload, 33 | basic_run.num_of_tables, 34 | basic_run.num_of_clients, 35 | basic_run.contention_factor, 36 | basic_run.batch_size, 37 | basic_run.split_num, 38 | 8, 39 | basic_run.cache_miss_ratio, 40 | basic_run.sn_replication, 41 | f'{trial_name}/{basic_run.num_of_nodes["DataBase"]}nodes-{x}clients' 42 | ) 43 | 44 | result_of_line = get_test_result( 45 | f"{data_dir}/logs/{trial_name}", 46 | f"results/{choice}-{workload}-{option}.data", 47 | ) 48 | return result_of_line 49 | 50 | if __name__ == "__main__": 51 | data_dir = sys.argv[1] 52 | workload = sys.argv[2] 53 | option = int(sys.argv[3]) 54 | if len(sys.argv) == 5: 55 | basic_run.bin_file_name = sys.argv[4] 56 | 57 | basic_run.prepare_binaries(data_dir) 58 | 59 | basic_run.num_of_threads["StorageNode"] = 8 60 | basic_run.num_of_threads["DataBase"] = 8 61 | basic_run.sn_replication = True 62 | basic_run.num_of_nodes["DataBase"] = 8 63 | basic_run.num_of_nodes["StorageNode"] = 18 64 | basic_run.split_num = 160 65 | basic_run.batch_size = 160 66 | 67 | choice = [1, 5, 10, 20] 68 | choice = [10, 99] 69 | results = [] 70 | for x in choice: 71 | basic_run.contention_factor = x 72 | result = str(basic_run.run_one_line_wrapper(run_one_line, data_dir, workload, option)) 73 | results.append(result) 74 | 75 | lines = basic_run.get_res_lines(results) 76 | csv_res = basic_run.get_csv_res(choice, lines) 77 | basic_run.write_to(f'{data_dir}/results/{option}.csv', csv_res) 78 | 79 | 80 | -------------------------------------------------------------------------------- /src/txn/temp_log.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "servers/config.h" 6 | 7 | // An append-only log, with auto-increasing size 8 | // | ----------------------------------------------- alloc_size --------------------------------- | 9 | // start current end | 10 | // | | | | 11 | // | ---- reserved_size ---- | ----------- entry ----------- | | 12 | template 13 | class TempLog { 14 | public: 15 | bool in_use_; 16 | 17 | // start of the memory of temp log 18 | uint8_t *start_, *current_, *end_; 19 | 20 | uint32_t alloced_size_; 21 | 22 | TempLog() 23 | : in_use_(false), start_(nullptr), current_(nullptr), end_(nullptr), alloced_size_(0) { 24 | alloced_size_ = BASIC_MEM_ALLOC_SIZE; 25 | start_ = (uint8_t *)malloc(alloced_size_); 26 | end_ = current_ = get_reserved_ptr(); 27 | 28 | // alignment assert 29 | // ASSERT(RESERVED_SIZE % ROUND_UP_BASE == 0); 30 | } 31 | 32 | ~TempLog() { free(start_); } 33 | 34 | void start() { 35 | ASSERT(in_use_ == false); 36 | 37 | end_ = current_ = get_reserved_ptr(); 38 | in_use_ = true; 39 | } 40 | 41 | void end() { 42 | ASSERT(in_use_ = true); 43 | in_use_ = false; 44 | } 45 | 46 | void restart() { 47 | in_use_ = false; 48 | end_ = current_ = get_reserved_ptr(); 49 | } 50 | 51 | inline uint8_t *append_entry(uint32_t size) { 52 | ASSERT(current_ == end_); 53 | resize(size); 54 | end_ += size; 55 | return current_; 56 | } 57 | 58 | inline void close_entry() { current_ = end_; } 59 | 60 | void resize(uint32_t size) { 61 | bool need_resize = false; 62 | uint32_t log_size = get_log_size(); 63 | while (log_size + size > alloced_size_) { 64 | alloced_size_ = alloced_size_ << 1; 65 | need_resize = true; 66 | } 67 | if (need_resize) { 68 | uint8_t *new_start = (uint8_t *)malloc(alloced_size_); 69 | uint32_t old_size = log_size; 70 | memcpy(new_start, start_, old_size); 71 | free(start_); 72 | start_ = new_start; 73 | end_ = current_ = start_ + old_size; 74 | } 75 | } 76 | 77 | inline uint8_t *get_start_ptr() { return start_; } 78 | 79 | inline uint8_t *get_current_ptr() { return current_; } 80 | 81 | inline uint8_t *get_reserved_ptr() { return start_ + RESERVED_SIZE; } 82 | 83 | inline uint32_t get_log_size() const { return (uint32_t)(end_ - start_); } 84 | }; 85 | -------------------------------------------------------------------------------- /scripts/eval/6_tpcc_scalability.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import basic_run 3 | import sys 4 | from datetime import datetime 5 | from get_result import get_test_result 6 | 7 | def config_by_workload(workload): 8 | if basic_run.contention_factor == 0: 9 | basic_run.contention_factor1 = 0 10 | else: 11 | basic_run.contention_factor1 = 15 12 | if workload == "tpcc": 13 | if "hackwrench_fast" in basic_run.bin_file_name: 14 | basic_run.num_of_clients = 600 15 | else: 16 | basic_run.num_of_clients = 300 17 | basic_run.optional = 8 18 | basic_run.num_of_warehouses = basic_run.num_of_nodes["DataBase"] * basic_run.optional 19 | basic_run.num_of_tables = basic_run.num_tables_per_warehouse * basic_run.num_of_warehouses 20 | else: 21 | assert False 22 | 23 | def run_one_line(data_dir, workload, option): 24 | trial_name = datetime.now().strftime("%m-%d-%H-%M-%S") 25 | choice = [15,12,9,6,3,1] 26 | for x in choice: 27 | basic_run.num_of_nodes["DataBase"] = x 28 | basic_run.num_of_nodes["StorageNode"] = x * 2 29 | if x == 1: 30 | basic_run.num_of_nodes["StorageNode"] = 3 31 | 32 | config_by_workload(workload) 33 | basic_run.run_test( 34 | data_dir, 35 | workload, 36 | basic_run.num_of_tables, 37 | basic_run.num_of_clients, 38 | basic_run.contention_factor, 39 | basic_run.batch_size, 40 | basic_run.split_num, 41 | basic_run.optional, 42 | basic_run.cache_miss_ratio, 43 | basic_run.sn_replication, 44 | f'{trial_name}/{basic_run.num_of_nodes["DataBase"]}nodes-{x}clients' 45 | ) 46 | 47 | result_of_line = get_test_result( 48 | f"{data_dir}/logs/{trial_name}", 49 | f"results/{choice}-{workload}-{option}.data", 50 | ) 51 | return result_of_line 52 | 53 | if __name__ == "__main__": 54 | data_dir = sys.argv[1] 55 | workload = sys.argv[2] 56 | option = int(sys.argv[3]) 57 | if len(sys.argv) == 5: 58 | basic_run.bin_file_name = sys.argv[4] 59 | 60 | basic_run.prepare_binaries(data_dir) 61 | basic_run.num_of_threads["StorageNode"] = 8 62 | basic_run.num_of_threads["DataBase"] = 8 63 | basic_run.sn_replication = True 64 | basic_run.split_num = 50 65 | basic_run.batch_size = 50 66 | 67 | choice = [10, 50, 100, 200] 68 | results = [] 69 | for x in choice: 70 | basic_run.contention_factor = x 71 | result = str(basic_run.run_one_line_wrapper(run_one_line, data_dir, workload, option)) 72 | results.append(result) 73 | 74 | lines = basic_run.get_res_lines(results) 75 | csv_res = basic_run.get_csv_res(choice, lines) 76 | basic_run.write_to(f'{data_dir}/results/{option}.csv', csv_res) 77 | -------------------------------------------------------------------------------- /scripts/eval/4_tpcc_coco.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import basic_run 3 | import sys 4 | from datetime import datetime 5 | from get_result import get_test_result 6 | 7 | def config_by_workload(workload): 8 | if basic_run.contention_factor == 0: 9 | basic_run.contention_factor1 = 0 10 | else: 11 | basic_run.contention_factor1 = 15 12 | if workload == "tpcc": 13 | if basic_run.bin_file_name == "hackwrench_coco_fast": 14 | basic_run.num_of_clients = 900 15 | else: 16 | basic_run.num_of_clients = 600 17 | basic_run.optional = 8 18 | basic_run.num_of_warehouses = basic_run.num_of_nodes["DataBase"] * basic_run.optional 19 | basic_run.num_of_tables = basic_run.num_tables_per_warehouse * basic_run.num_of_warehouses 20 | else: 21 | assert False 22 | 23 | def run_one_line(data_dir, workload, option): 24 | trial_name = datetime.now().strftime("%m-%d-%H-%M-%S") 25 | choice = [0, 10, 20, 30, 40, 50, 60,70,80,90, 100, 150, 200, 250] 26 | for x in choice: 27 | # basic_run.split_num = x 28 | # basic_run.batch_size = x 29 | basic_run.contention_factor = x 30 | config_by_workload(workload) 31 | basic_run.run_test( 32 | data_dir, 33 | workload, 34 | basic_run.num_of_tables, 35 | basic_run.num_of_clients, 36 | basic_run.contention_factor, 37 | basic_run.batch_size, 38 | basic_run.split_num, 39 | basic_run.optional, 40 | basic_run.cache_miss_ratio, 41 | basic_run.sn_replication, 42 | f'{trial_name}/{basic_run.num_of_nodes["DataBase"]}nodes-{x}clients' 43 | ) 44 | 45 | result_of_line = get_test_result( 46 | f"{data_dir}/logs/{trial_name}", 47 | f"results/{choice}-{workload}-{option}.data", 48 | ) 49 | return result_of_line 50 | 51 | if __name__ == "__main__": 52 | data_dir = sys.argv[1] 53 | workload = sys.argv[2] 54 | option = int(sys.argv[3]) 55 | if len(sys.argv) == 5: 56 | basic_run.bin_file_name = sys.argv[4] 57 | 58 | basic_run.prepare_binaries(data_dir) 59 | basic_run.num_of_nodes["DataBase"] = 6 60 | basic_run.num_of_nodes["StorageNode"] = 12 61 | basic_run.num_of_threads["StorageNode"] = 8 62 | basic_run.num_of_threads["DataBase"] = 8 63 | basic_run.sn_replication = True 64 | 65 | choice = [100] 66 | results = [] 67 | for x in choice: 68 | basic_run.split_num = x 69 | basic_run.batch_size = x 70 | result = str(basic_run.run_one_line_wrapper(run_one_line, data_dir, workload, option)) 71 | results.append(result) 72 | 73 | lines = basic_run.get_res_lines(results) 74 | csv_res = basic_run.get_csv_res(choice, lines) 75 | basic_run.write_to(f'{data_dir}/results/{option}.csv', csv_res) 76 | -------------------------------------------------------------------------------- /scripts/eval/5_tpcc_sundial.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import basic_run 3 | import sys 4 | from datetime import datetime 5 | from get_result import get_test_result 6 | 7 | def config_by_workload(workload): 8 | if basic_run.contention_factor == 0: 9 | basic_run.contention_factor1 = 0 10 | else: 11 | basic_run.contention_factor1 = 15 12 | if workload == "tpcc": 13 | if basic_run.bin_file_name == "hackwrench_fast": 14 | basic_run.num_of_clients = 600 15 | else: 16 | basic_run.num_of_clients = 300 17 | basic_run.optional = 8 18 | basic_run.num_of_warehouses = basic_run.num_of_nodes["DataBase"] * basic_run.optional 19 | basic_run.num_of_tables = basic_run.num_tables_per_warehouse * basic_run.num_of_warehouses 20 | else: 21 | assert False 22 | 23 | def run_one_line(data_dir, workload, option): 24 | trial_name = datetime.now().strftime("%m-%d-%H-%M-%S") 25 | choice = [10,20,30,40,50,60,70,80,90,100, 150, 200, 250] 26 | choice = [10, 50, 100, 200] 27 | for x in choice: 28 | # x = t[0] 29 | basic_run.contention_factor = x 30 | # basic_run.contention_factor1 = 15 31 | config_by_workload(workload) 32 | basic_run.run_test( 33 | data_dir, 34 | workload, 35 | basic_run.num_of_tables, 36 | basic_run.num_of_clients, 37 | basic_run.contention_factor, 38 | basic_run.batch_size, 39 | basic_run.split_num, 40 | basic_run.optional, 41 | basic_run.cache_miss_ratio, 42 | basic_run.sn_replication, 43 | f'{trial_name}/{basic_run.num_of_nodes["DataBase"]}nodes-{x}clients' 44 | ) 45 | 46 | result_of_line = get_test_result( 47 | f"{data_dir}/logs/{trial_name}", 48 | f"results/{choice}-{workload}-{option}.data", 49 | ) 50 | return result_of_line 51 | 52 | if __name__ == "__main__": 53 | data_dir = sys.argv[1] 54 | workload = sys.argv[2] 55 | option = int(sys.argv[3]) 56 | if len(sys.argv) == 5: 57 | basic_run.bin_file_name = sys.argv[4] 58 | 59 | basic_run.prepare_binaries(data_dir) 60 | basic_run.num_of_nodes["DataBase"] = 6 61 | basic_run.num_of_nodes["StorageNode"] = 4 62 | basic_run.num_of_threads["DataBase"] = 8 63 | basic_run.num_of_threads["StorageNode"] = 8 64 | basic_run.sn_replication = False 65 | 66 | choice = [30] 67 | results = [] 68 | for x in choice: 69 | # basic_run.contention_factor = x 70 | basic_run.split_num = x 71 | basic_run.batch_size = x 72 | result = str(basic_run.run_one_line_wrapper(run_one_line, data_dir, workload, option)) 73 | results.append(result) 74 | 75 | lines = basic_run.get_res_lines(results) 76 | csv_res = basic_run.get_csv_res(choice, lines) 77 | basic_run.write_to(f'{data_dir}/results/{option}.csv', csv_res) 78 | 79 | 80 | -------------------------------------------------------------------------------- /scripts/aws/servers.yaml: -------------------------------------------------------------------------------- 1 | ips: 2 | - 172.31.31.131 3 | - 172.31.22.66 4 | - 172.31.21.82 5 | - 172.31.28.83 6 | - 172.31.31.198 7 | - 172.31.20.249 8 | - 172.31.26.205 9 | - 172.31.21.9 10 | - 172.31.21.146 11 | - 172.31.18.169 12 | - 172.31.27.6 13 | - 172.31.27.115 14 | - 172.31.29.110 15 | - 172.31.18.46 16 | - 172.31.27.10 17 | - 172.31.26.173 18 | - 172.31.16.152 19 | - 172.31.18.96 20 | - 172.31.19.160 21 | - 172.31.23.191 22 | - 172.31.26.8 23 | - 172.31.24.42 24 | - 172.31.22.102 25 | - 172.31.17.99 26 | - 172.31.20.211 27 | - 172.31.29.79 28 | - 172.31.17.178 29 | # - 172.31.16.65 30 | # - 172.31.25.34 31 | # - 172.31.21.91 32 | # - 172.31.25.27 33 | # - 172.31.23.88 34 | # - 172.31.30.91 35 | # - 172.31.18.156 36 | # - 172.31.28.132 37 | # - 172.31.26.197 38 | # - 172.31.30.67 39 | # - 172.31.24.67 40 | # - 172.31.17.86 41 | # - 172.31.25.55 42 | # - 172.31.26.240 43 | # - 172.31.25.85 44 | # - 172.31.20.46 45 | # - 172.31.26.15 46 | # - 172.31.28.201 47 | # - 172.31.23.171 48 | # - 172.31.23.255 49 | # - 172.31.19.153 50 | # - 172.31.16.87 51 | # - 172.31.18.251 52 | # - 172.31.22.158 53 | # - 172.31.20.187 54 | # - 172.31.24.247 55 | # - 172.31.17.223 56 | # - 172.31.26.141 57 | # - 172.31.21.4 58 | # - 172.31.27.146 59 | # - 172.31.20.134 60 | # - 172.31.25.166 61 | type: 62 | - StorageNode 63 | - StorageNode 64 | - StorageNode 65 | - StorageNode 66 | - StorageNode 67 | - StorageNode 68 | - StorageNode 69 | - StorageNode 70 | - StorageNode 71 | - StorageNode 72 | - StorageNode 73 | - StorageNode 74 | - StorageNode 75 | - StorageNode 76 | - StorageNode 77 | - StorageNode 78 | - StorageNode 79 | - StorageNode 80 | # - StorageNode 81 | # - StorageNode 82 | # - StorageNode 83 | # - StorageNode 84 | # - StorageNode 85 | # - StorageNode 86 | # - StorageNode 87 | # - StorageNode 88 | # - StorageNode 89 | # - StorageNode 90 | # - StorageNode 91 | # - StorageNode 92 | - TimeServer 93 | - DataBase 94 | - DataBase 95 | - DataBase 96 | - DataBase 97 | - DataBase 98 | - DataBase 99 | - DataBase 100 | - DataBase 101 | # - DataBase 102 | # - DataBase 103 | # - DataBase 104 | # - DataBase 105 | # - DataBase 106 | # - DataBase 107 | # - DataBase 108 | # - DataBase 109 | # - DataBase 110 | # - DataBase 111 | # - DataBase 112 | # - DataBase 113 | # - DataBase 114 | # - DataBase 115 | # - DataBase 116 | # - DataBase 117 | # - DataBase 118 | # - DataBase 119 | # - DataBase 120 | # - DataBase 121 | # - DataBase 122 | # - DataBase 123 | # - DataBase 124 | # - DataBase 125 | # - DataBase 126 | # - DataBase 127 | # - DataBase 128 | # - DataBase 129 | # - DataBase 130 | # - DataBase 131 | # - DataBase 132 | # - DataBase 133 | # - DataBase 134 | # - DataBase 135 | # - DataBase 136 | # - DataBase 137 | # - DataBase 138 | # - DataBase 139 | # - DataBase 140 | # - DataBase 141 | # - DataBase 142 | # - DataBase 143 | # - DataBase 144 | # - DataBase 145 | # - DataBase 146 | # - DataBase 147 | # - DataBase 148 | # - DataBase -------------------------------------------------------------------------------- /src/storage/storage.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "servers/config.h" 8 | #include "rpc/SNinterface.capnp.h" 9 | #include "util/macros.h" 10 | #include "segment.h" 11 | 12 | class InMemSegments { 13 | public: 14 | const Configuration &conf; 15 | InMemSegments(const Configuration &conf) : conf(conf) { 16 | segments = (Segment **)malloc(sizeof(Segment *) * conf.numSegments()); 17 | for (seg_id_t seg_id = 0; seg_id < conf.numSegments(); ++seg_id) { 18 | if (conf.isMySeg(seg_id)) { 19 | segments[seg_id] = new Segment(seg_id); 20 | } else { 21 | segments[seg_id] = nullptr; 22 | } 23 | } 24 | } 25 | 26 | ~InMemSegments() {} 27 | 28 | PageMeta *try_alloc_page(seg_id_t seg_id, page_id_t page_id) { 29 | auto &segment = get_segment(seg_id); 30 | if (segment.page_is_free(page_id)) { 31 | PageMeta *page_meta = segment.acquire_page(); 32 | ASSERT(page_meta->gp_id.page_id == page_id); 33 | 34 | return page_meta; 35 | } else { 36 | ASSERT(false); 37 | return nullptr; 38 | } 39 | } 40 | 41 | PageMeta *alloc_next_page(seg_id_t seg_id) { 42 | auto &segment = get_segment(seg_id); 43 | PageMeta *page_meta = segment.acquire_page(); 44 | return page_meta; 45 | } 46 | 47 | bool setPage(seg_id_t seg_id, page_id_t page_id, ts_t cts, const uint8_t *data) { 48 | auto &segment = get_segment(seg_id); 49 | auto page_meta = segment.get_page_meta(page_id); 50 | 51 | if (page_meta == nullptr) { 52 | // pages[page_id] = std::make_unique(page_id, cts, data); 53 | // TODO: should we use it as insert? 54 | LOG(FATAL) << "No such page (page_id=" << page_id << ")\n"; 55 | return false; 56 | } else { 57 | page_meta->copy_data(data); 58 | page_meta->set_cts(cts); 59 | ASSERT(page_meta->gp_id.page_id == page_id); 60 | return true; 61 | } 62 | } 63 | 64 | PageMeta *get_page_meta(GlobalPageId gp_id) { 65 | auto &segment = get_segment(gp_id.seg_id); 66 | return segment.get_page_meta(gp_id.page_id); 67 | } 68 | 69 | void finalize_value_size() { 70 | for (seg_id_t seg_id = 0; seg_id < conf.numSegments(); ++seg_id) { 71 | Segment* seg = segments[seg_id]; 72 | if (seg != nullptr && seg->cur_page_id != 0) { 73 | seg->value_size = seg->get_page_meta(0)->get_value_size(); 74 | } 75 | } 76 | } 77 | 78 | inline Segment &get_segment(seg_id_t seg_id) { 79 | return *segments[seg_id]; 80 | } 81 | 82 | inline Segment **&get_segments() { return segments; } 83 | 84 | private: 85 | // std::unordered_map> segments; 86 | Segment **segments; 87 | }; -------------------------------------------------------------------------------- /src/rpc/message_buffer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | template 8 | class blockingQueue { 9 | private: 10 | std::mutex d_mutex; 11 | std::condition_variable d_condition; 12 | std::deque d_queue; 13 | 14 | public: 15 | inline void push(T &value) { 16 | { 17 | std::lock_guard lock(this->d_mutex); 18 | d_queue.emplace_front(); 19 | d_queue.front().move(value); 20 | } 21 | this->d_condition.notify_one(); 22 | } 23 | 24 | inline T pop() { 25 | std::unique_lock lock(this->d_mutex); 26 | this->d_condition.wait(lock, [=] { return !this->d_queue.empty(); }); 27 | T rc(std::move(this->d_queue.back())); 28 | this->d_queue.pop_back(); 29 | return rc; 30 | } 31 | 32 | inline std::vector pop_batch(int &pop_batch_size) { 33 | std::vector ret; 34 | std::unique_lock lock(this->d_mutex); 35 | this->d_condition.wait(lock, [=] { return !this->d_queue.empty(); }); 36 | int i = 0; 37 | for (; i < pop_batch_size && !d_queue.empty(); i++) { 38 | ret.emplace_back(std::move(this->d_queue.back())); 39 | this->d_queue.pop_back(); 40 | } 41 | pop_batch_size = i; 42 | return ret; 43 | } 44 | 45 | inline bool pop_once(T &rc) { 46 | std::lock_guard lock(this->d_mutex); 47 | if (this->d_queue.empty()) { 48 | return false; 49 | } else { 50 | rc.move(this->d_queue.back()); 51 | this->d_queue.pop_back(); 52 | return true; 53 | } 54 | } 55 | }; 56 | 57 | template 58 | class blockingHash { 59 | private: 60 | std::mutex d_mutex; 61 | std::condition_variable d_condition; 62 | std::unordered_map hash; 63 | 64 | public: 65 | inline void push(uint64_t id, T &value) { 66 | { 67 | std::lock_guard lock(this->d_mutex); 68 | hash.emplace(std::piecewise_construct, std::forward_as_tuple(id), 69 | std::forward_as_tuple()); 70 | hash.at(id).move(value); 71 | } 72 | this->d_condition.notify_all(); 73 | } 74 | 75 | inline T get(uint64_t id) { 76 | std::unique_lock lock(this->d_mutex); 77 | this->d_condition.wait(lock, [=] { return this->hash.find(id) != this->hash.end(); }); 78 | T rc(std::move(this->hash.at(id))); 79 | this->hash.erase(id); 80 | return rc; 81 | } 82 | 83 | inline bool get_once(uint64_t id, T &rc) { 84 | std::lock_guard lock(this->d_mutex); 85 | auto iter = this->hash.find(id); 86 | if (iter == this->hash.end()) { 87 | return false; 88 | } else { 89 | rc.move(iter->second); 90 | this->hash.erase(iter); 91 | return true; 92 | } 93 | } 94 | }; -------------------------------------------------------------------------------- /dataflow_api/src/benchmark/tpcc/txn_delivery.cc: -------------------------------------------------------------------------------- 1 | #include "tpcc.hpp" 2 | #include "api/txn.hpp" 3 | #include "type/input.hpp" 4 | 5 | void delivery_input(Txn &txn) { 6 | Input &input = txn.getInput(); 7 | input.add("W_ID", BuiltInType::INT); 8 | input.add("D_NUM", BuiltInType::INT); // 10 9 | input.add("O_CARRIER_ID", BuiltInType::INT); 10 | 11 | txn.setPartitionAffinity(input["W_ID"]); 12 | } 13 | 14 | void delivery_graph(Txn &txn) { 15 | Input &input = txn.getInput(); 16 | Value &w_id = input["W_ID"]; 17 | Value &o_carrier_id = input["O_CARRIER_ID"]; 18 | Value &ol_delivery_d = input["OL_DELIVERY_D"]; 19 | 20 | auto iter1_logic = [&w_id, &o_carrier_id, &ol_delivery_d](Txn &txn, Input &loop_input, Value &loop_num) { 21 | Value &d_id = loop_input["D_ID"]; 22 | // District 23 | Row dist_deli_index = txn.get(DIST_DELI_INDEX, {w_id, d_id}); 24 | Value ddi_o_id = dist_deli_index.getColumn(DDI_O_ID); 25 | 26 | // New Order 27 | Row new_order = txn.get(NORD, {w_id, d_id, ddi_o_id}); 28 | Value found = new_order.isFound(); 29 | 30 | txn.beginIf(found); 31 | { 32 | Value new_ddi_o_id = ddi_o_id.apply("Add", {}); 33 | dist_deli_index.setColumn(DDI_O_ID, new_ddi_o_id); 34 | txn.put(DIST_DELI_INDEX, {w_id, d_id}, dist_deli_index); 35 | 36 | // delete 37 | txn.put(NORD, {w_id, d_id, ddi_o_id}, new_order); 38 | 39 | Row order = txn.get(ORDR, {w_id, d_id, ddi_o_id}); 40 | Value o_c_id = order.getColumn(O_C_ID); 41 | Value o_ol_cnt = order.getColumn(O_OL_COUNT); 42 | order.setColumn(O_CARRIER_ID, o_carrier_id); 43 | txn.put(ORDR, {w_id, d_id, ddi_o_id}, order); 44 | 45 | auto iter2_logic = [&w_id, &d_id, &ddi_o_id, &ol_delivery_d](Txn &txn, Input &loop_input, Value &loop_num) { 46 | Row order_line = txn.get(ORLI, {w_id, d_id, ddi_o_id, loop_num}); 47 | Value ol_amount = order_line.getColumn(OL_AMOUNT); 48 | order_line.setColumn(OL_DELIVERY_D, ol_delivery_d); 49 | txn.put(ORLI, {w_id, d_id, ddi_o_id, loop_num}, order_line); 50 | 51 | Values res; 52 | res.add("OL_AMOUNT", ol_amount); 53 | return res; 54 | }; 55 | 56 | Input empty_input; 57 | Values resArray = txn.map(iter2_logic, empty_input, o_ol_cnt); 58 | Values res = resArray.reduce("SUM"); 59 | Value total_amount = res["OL_AMOUNT"]; 60 | 61 | Row cust = txn.get(CUST, {w_id, d_id, o_c_id}); 62 | Value c_delivery_cnt = cust.getColumn(C_DELIVERY_CNT).apply("Add", {}); 63 | cust.setColumn(C_DELIVERY_CNT, c_delivery_cnt); 64 | Value c_balance = cust.getColumn(C_BALANCE).apply("Add", {total_amount}); 65 | cust.setColumn(C_BALANCE, c_balance); 66 | txn.put(CUST, {w_id, d_id, o_c_id}, cust); 67 | } 68 | txn.endIf(); 69 | 70 | Values res; 71 | return res; 72 | }; 73 | 74 | Input empty_input; 75 | Values resArray = txn.map(iter1_logic, empty_input, input["D_NUM"]); 76 | txn.commit(); 77 | } 78 | -------------------------------------------------------------------------------- /scripts/eval/9_micro_batch_effect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import basic_run 3 | import sys 4 | from datetime import datetime 5 | from get_result import get_test_result 6 | 7 | def config_by_workload(workload): 8 | if workload == "ycsb10": 9 | basic_run.num_of_tables = 12000 10 | else: 11 | assert False 12 | 13 | def run_one_line(data_dir, workload, option): 14 | trial_name = datetime.now().strftime("%m-%d-%H-%M-%S") 15 | 16 | choice = [] 17 | # if basic_run.contention_factor == 10: 18 | # # choice = [50, 100, 200, 500, 1000, 1500, 2000] 19 | # choice = [2000, 1600, 1200, 800, 600, 400, 200, 160, 120, 80, 40] 20 | # else: 21 | # choice = [2000, 1600, 1200, 800, 600, 400, 200, 160, 120, 80, 40] 22 | # choice = [50, 100, 200, 300, 400, 500] 23 | 24 | choice = [4000, 3200, 2400, 1600, 800, 400, 200, 120, 40] 25 | # choice = [1] 26 | for x in choice: 27 | basic_run.num_of_clients = x 28 | # basic_run.num_of_clients = basic_run.batch_size 29 | # basic_run.batch_size = x 30 | # basic_run.split_num = x 31 | 32 | config_by_workload(workload) 33 | basic_run.run_test( 34 | data_dir, 35 | workload, 36 | basic_run.num_of_tables, 37 | basic_run.num_of_clients, 38 | basic_run.contention_factor, 39 | basic_run.batch_size, 40 | basic_run.split_num, 41 | 8, 42 | basic_run.cache_miss_ratio, 43 | basic_run.sn_replication, 44 | f'{trial_name}/{basic_run.num_of_nodes["DataBase"]}nodes-{x}clients' 45 | ) 46 | 47 | result_of_line = get_test_result( 48 | f"{data_dir}/logs/{trial_name}", 49 | f"results/{choice}-{workload}-{option}.data", 50 | ) 51 | return result_of_line 52 | 53 | 54 | if __name__ == "__main__": 55 | data_dir = sys.argv[1] 56 | workload = sys.argv[2] 57 | option = int(sys.argv[3]) 58 | if len(sys.argv) == 5: 59 | basic_run.bin_file_name = sys.argv[4] 60 | 61 | basic_run.prepare_binaries(data_dir) 62 | 63 | basic_run.num_of_nodes["DataBase"] = 8 64 | basic_run.num_of_nodes["StorageNode"] = 18 65 | basic_run.num_of_threads["StorageNode"] = 8 66 | basic_run.num_of_threads["DataBase"] = 8 67 | basic_run.cache_miss_ratio = 0 68 | basic_run.sn_replication = True 69 | 70 | if option % 2 == 0: 71 | basic_run.contention_factor = 10 72 | else: 73 | basic_run.contention_factor = 99 74 | if option < 4: 75 | basic_run.contention_factor1 = 10 76 | else: 77 | basic_run.contention_factor1 = 100 78 | 79 | # choice = [1, 2, 5, 10, 20, 40, 80, 120, 160, 200] 80 | choice = [1, 5, 10, 20, 40, 80, 160] 81 | results = [] 82 | for x in choice: 83 | # basic_run.num_of_clients = x 84 | basic_run.batch_size = x 85 | basic_run.split_num = x 86 | 87 | result = str(basic_run.run_one_line_wrapper(run_one_line, data_dir, workload, option)) 88 | results.append(result) 89 | 90 | lines = basic_run.get_res_lines(results) 91 | csv_res = basic_run.get_csv_res(choice, lines) 92 | basic_run.write_to(f'{data_dir}/results/{option}.csv', csv_res) 93 | 94 | -------------------------------------------------------------------------------- /scripts/run_all.sh: -------------------------------------------------------------------------------- 1 | useBinary(){ 2 | rm -rf ../build 3 | cp -r ../${1} ../build 4 | } 5 | 6 | cleanResult(){ 7 | rm -rf aws/results 8 | mkdir aws/results 9 | } 10 | 11 | mkdir finalV2-results 12 | 13 | # 1. ycsb ratio 14 | cleanResult 15 | useBinary hw-build 16 | python3 finalV2/1_ycsb_ratio.py aws ycsb10 0 17 | mv aws/results finalV2-results/1_ycsb_ratio_results 18 | 19 | # 2-1. ycsb batch effect with contention free 20 | cleanResult 21 | useBinary hw-build 22 | python3 finalV2/2_ycsb_batch_effect.py aws ycsb10 0 23 | mv aws/results finalV2-results/2-1_ycsb_batch_effect_contention_free 24 | 25 | # 2-2. ycsb batch effect with uniform 26 | cleanResult 27 | useBinary hw-build 28 | python3 finalV2/2_ycsb_batch_effect.py aws ycsb10 101 29 | mv aws/results finalV2-results/2-2_ycsb_batch_effect_uniform 30 | 31 | # 3-1. ycsb cache effect with contention free 32 | cleanResult 33 | useBinary hw-build 34 | python3 finalV2/3_ycsb_cache_effect.py aws ycsb10 0 35 | mv aws/results finalV2-results/3-1_ycsb_cache_effect_contention_free 36 | 37 | # 3-2. ycsb cache effect with uniform 38 | cleanResult 39 | useBinary hw-build 40 | python3 finalV2/3_ycsb_cache_effect.py aws ycsb10 101 41 | mv aws/results finalV2-results/3-2_ycsb_cache_effect_uniform 42 | 43 | # 4-1. ycsb contention with hw 44 | cleanResult 45 | useBinary hw-build 46 | python3 finalV2/4_ycsb_contention.py aws ycsb10 0 47 | mv aws/results finalV2-results/4-1_ycsb_contention_hw 48 | 49 | # 4-2. ycsb contentino with occ 50 | cleanResult 51 | useBinary occ-build 52 | python3 finalV2/4_ycsb_contention.py aws ycsb10 1 53 | mv aws/results finalV2-results/4-2_ycsb_contention_occ 54 | 55 | # 5. tpcc ratio 56 | cleanResult 57 | useBinary hw-build 58 | python3 finalV2/5_tpcc_ratio.py aws tpcc 0 59 | mv aws/results finalV2-results/5_tpcc_ratio 60 | 61 | # 6. tpcc scalability 62 | cleanResult 63 | useBinary hw-build 64 | python3 finalV2/6_tpcc_scalability.py aws tpcc 0 65 | mv aws/results finalV2-results/6_tpcc_scalability 66 | 67 | # 7-1. tpcc tput-lat hw 68 | cleanResult 69 | useBinary hw-build 70 | python3 finalV2/7_tpcc_tput_lat.py aws tpcc 0 71 | mv aws/results finalV2-results/7_tpcc_tput_lat_hw_batch 72 | 73 | # 7-2. tpcc tput-lat occ 74 | cleanResult 75 | useBinary occ-build 76 | python3 finalV2/7_tpcc_tput_lat.py aws tpcc 1 77 | mv aws/results finalV2-results/7_tpcc_tput_lat_occ 78 | 79 | # 8-1. tpcc abort 80 | cleanResult 81 | useBinary abort-build 82 | python3 finalV2/8_abort_repair.py aws tpcc 0 83 | mv aws/results finalV2-results/8_tpcc_abort 84 | 85 | # 8-2. tpcc nfc 86 | cleanResult 87 | useBinary nfc-build 88 | python3 finalV2/8_abort_repair.py aws tpcc 0 89 | mv aws/results finalV2-results/8_tpcc_nfc 90 | 91 | # 9. tpcc replication affect 92 | cleanResult 93 | useBinary hw-build 94 | python3 finalV2/9_tpcc_replication_effect.py aws tpcc 0 95 | mv aws/results finalV2-results/9_tpcc_replication_effect 96 | 97 | # 10-1. occ ycsb batch effect 98 | cleanResult 99 | useBinary occ-build 100 | python3 finalV2/10_occ_batch_effect.py aws ycsb10 0 101 | mv aws/results finalV2-results/10_occ_batch_effect_ycsb 102 | 103 | # 10-2. occ tpcc batch effect 104 | cleanResult 105 | useBinary occ-build 106 | python3 finalV2/10_occ_batch_effect.py aws tpcc 0 107 | mv aws/results finalV2-results/10_occ_batch_effect_tpcc 108 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hackwrench 2 | 3 | Hackwrench is a cloud native database which separates computation (transaction execution) and data storage logic. 4 | 5 | # Publication 6 | 7 | * Zhiyuan Dong, Zhaoguo Wang, Xiaodong Zhang, Xian Xu, Changgeng Zhao, Haibo Chen, Aurojit Panda, Jinyang Li. **Fine-Grained Re-Execution for Efficient Batched Commit of Distributed Transactions**. The 49th International Conference on Very Large Data Bases (VLDB '23), Vancouver, Canada, 2023. 8 | 9 | # Source Repo 10 | 11 | The primarily used repository for this project resides in [https://ipads.se.sjtu.edu.cn:1312/opensource/hackwrench](https://ipads.se.sjtu.edu.cn:1312/opensource/hackwrench) 12 | 13 | ## Code organization 14 | 15 | The following figure shows the code structure of the overall project. 16 | 17 | ``` 18 | root 19 | │ CMakeLists.txt 20 | │ README.md 21 | └─── dataflow_api 22 | └─── doc 23 | └─── scripts 24 | └─── src 25 | ``` 26 | | Folder | description | 27 | |--------|----------| 28 | | `dataflow_api` | A static analysis tool. It provides the dataflow-based programmingabstraction for users to write transactions' store procedures (`Sec 3.2`) | 29 | | `doc` | The documents of Hackwrench. | 30 | | `scripts` | The scripts used to build and run Hackwrench | 31 | | `src` | Hackwrench's source code | 32 | 33 | ## The Dataflow Programming Abastraction 34 | 35 | Hackwrench requires the users to write transactions in the form of dataflow-based stored procedures. We provide a dataflow programming abstraction to track the dependency inside each transaction and statically analyze whether the transactions can use fast path optimization. The code organization is shown as following: 36 | 37 | ``` 38 | dataflow_api 39 | │ main.cpp 40 | └─── src 41 | └─── api 42 | └─── benchmark 43 | └─── graph 44 | └─── type 45 | └─── util 46 | ``` 47 | 48 | 49 | | Folder | description | 50 | |--------|----------| 51 | | `api` | The API used by developers to write stored procedures. | 52 | | `benchmark` | An illustrating example of the dataflow API, using TPC-C. | 53 | | `graph` | The data structures used to represent a dataflow graph. | 54 | | `type` | The internal data type used. | 55 | | `util` | utilities. | 56 | 57 | 58 | We first describe the programming interfaces and then use TPC-C as an illustrating example, indicating `how to use the APIs` and `the capability of applying fast path optimization to TPC-C`. 59 | 60 | [The Dataflow Programming Abastraction](./doc/dataflow_api/api.md) 61 | 62 | [TPC-C as an example](./doc/dataflow_api/tpcc.md) 63 | 64 | ## The Hackwrench System 65 | 66 | Hackwrench's code organization is shown as following: 67 | 68 | ``` 69 | src 70 | │ main.cpp 71 | └─── src 72 | └─── benchmarks 73 | └─── index 74 | └─── rpc 75 | └─── servers 76 | └─── storage 77 | └─── txn 78 | └─── util 79 | ``` 80 | 81 | | Folder | description | 82 | |--------|----------| 83 | | `benchmarks` | The benchmarks used for evaluation. | 84 | | `index` | The data structure for in-memory data indexing. | 85 | | `rpc` | The rpc library. | 86 | | `servers` | The event loop and rpc handling logic of `database node`, `time server`, and `storage node`. | 87 | | `storage` | The storage related data structures for organizing `segment` and `page`. | 88 | | `txn` | The code logic related to `transaction local execution` and `batched transactions`. | 89 | | `util` | utilities. | 90 | 91 | [How to Build Hackwrench](./doc/hackwrench/build.md) 92 | 93 | [How to Run Hackwrench](./doc/hackwrench/run.md) 94 | 95 | 98 | -------------------------------------------------------------------------------- /doc/dataflow_api/api.md: -------------------------------------------------------------------------------- 1 | # The Programming Interface 2 | 3 | The Dataflow APIs are shown in `root/dataflow_api/api`. We now describe them in detail. 4 | ## Schema 5 | 6 | Hackwrench requires the users to define the database schema ([schema.hpp](../../dataflow_api/src/api/schema.hpp)): 7 | 8 | ```C++ 9 | class TableSchema { 10 | TableSchema(string tbl_name, 11 | bool read_only, 12 | vector> columns, 13 | vector pkey_def, 14 | string part_key_def); 15 | }; 16 | ``` 17 | 18 | For each table in the database, the users should define: 19 | 1. `table_name`: the table's name. 20 | 2. `read-only`: whether the table is read only. 21 | 3. `columns` : the list of columns, which is pair of column name and the column's BuiltInType (`Int`, `Float`, or `String`). 22 | 4. `pkey_def` : the names of columns which consist of the primary key 23 | 5. `part_key_def` : the name of column which is used to partition the table. 24 | 25 | ## Transaction 26 | 27 | After the schemas are defined, the users can write the stored procedure with APIs provided in [txn.hpp](../../dataflow_api/src/api/txn.hpp). 28 | 29 | 30 | ```C++ 31 | using IterationLogic = std::function; 32 | 33 | class Txn { 34 | // Read & Write Data Flow 35 | Row get(string table_name, vector pkey); 36 | void put(string table_name, vector pkey, Row row); 37 | 38 | // If-Branch Control Flow 39 | void beginIf(Value value); 40 | void endIf(); 41 | 42 | // Loop Control Flow 43 | Values map(IterationLogic iter_logic, Input input, Value loop_count); 44 | 45 | // Commit or Abort Transaction 46 | void commit(); 47 | void abort(); 48 | 49 | // Hint given by the user 50 | // Specify the parition (thus the database node) where the transaction should execute 51 | void setPartitionAffinity(Value value); 52 | }; 53 | ``` 54 | 55 | ### Data Flow Operation 56 | 57 | The `Get/Put` APIs are the basic data access operations, which read and write one database row (`Row`) of target table and primary key. Specifically, `Value` is an instance of given `BuiltInType`, which is also responsible for tracking the dependency inside the transaction. The primary key is a list of values, and each value corresponds to one column of the primary key definition. 58 | 59 | ### Control Flow Operation 60 | 61 | The API also captures the control flows. `beginIf` and `EndIf` are related to if-branch. `beginIf` accepts a value as the branch condition. 62 | 63 | On the other hand, `map` handles the loops, whose parameters are: 64 | 1. `iter_logic` : The loop body of type `IterationLogic`. `IterationLogic`'s `loop_num` parameter indicates the number of times iteration is executed. 65 | 2. `input` : The input of each iteration, which is a map of name and corresponding value. 66 | 3. `loop_count` : The value indicating the number of loops executed. 67 | 68 | As the loop body (`IterationLogic`) is a C++ lambda function. The shared values among iterations can be `captured` by the lambda, instead of explicitly passing to the `map` API. 69 | 70 | ### Others 71 | 72 | `Commit` and `Abort` are used to commit or abort transactions, accordingly. 73 | 74 | Finally, users can leverage `setPartitionAffinity` to give the hint that most data operations of the transaction is executed on target parition. Therefore, the transactions should be executed on the dedicated database of target partition. 75 | 76 | ## Current Limitation 77 | 78 | Due to the time limitation, Hackwrench does not integrate the dataflow-based transaction execution currently. The `dataflow_api` is an individual static analysis tool. Its analyzed result is used to guides the implementation of Hackwrench's hand-written C++ stored procedures. 79 | 80 | -------------------------------------------------------------------------------- /scripts/eval/get_result.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import subprocess 4 | from subprocess import run 5 | import sys 6 | import traceback 7 | from pathlib import Path 8 | 9 | def grep_number(log_dir, c, name): 10 | x = run(f'cat {log_dir}/*-{c}clients/db.log | grep "{name}"', shell=True, stdout=subprocess.PIPE) 11 | if x.stdout.decode('utf-8') == '': 12 | return -1 13 | output = x.stdout.decode('utf-8') 14 | output = output[output.find(']')+1:] 15 | return float(output.split(':')[1].split()[0]) 16 | 17 | def grep_batch_profile(log_dir, c, name): 18 | x = run(f'cat {log_dir}/*-{c}clients/db.log | grep "{name}"', shell=True, stdout=subprocess.PIPE) 19 | if x.stdout.decode('utf-8') == '': 20 | return -1 21 | output = x.stdout.decode('utf-8') 22 | output = output[output.find(']')+1:] 23 | return float(output.split(f'{name}:')[1].split()[0]) 24 | 25 | def grep_batch_profile_non_float(log_dir, c, name): 26 | x = run(f'cat {log_dir}/*-{c}clients/db.log | grep "{name}"', shell=True, stdout=subprocess.PIPE) 27 | if x.stdout.decode('utf-8') == '': 28 | return -1 29 | output = x.stdout.decode('utf-8') 30 | output = output[output.find(']')+1:] 31 | return output.split(f'{name}:')[1].split()[0] 32 | 33 | def get_test_result(log_dir, output_file=None): 34 | output = run(f'ls {log_dir}', shell=True, stdout=subprocess.PIPE) 35 | clients = output.stdout.decode('utf-8').replace('clients', '').split() 36 | clients = sorted([int(i.split('-')[1]) for i in clients]) 37 | output_str = '' 38 | for t in clients: 39 | thpt = grep_number(log_dir, t, "Total throu") 40 | if thpt == -1: 41 | continue 42 | run_times = grep_number(log_dir, t, "Total run_times") 43 | commit_times = grep_number(log_dir, t, "Total commit_times") 44 | repair_times = grep_number(log_dir, t, "Total repair_times") 45 | batch_commit_times = grep_batch_profile(log_dir, t, "batch_commit_times") 46 | if batch_commit_times == 0: 47 | batch_commit_times = 1 48 | batch_repair_ratio = grep_batch_profile(log_dir, t, "batch_repair_ratio") 49 | batch_abort_ratio = grep_batch_profile(log_dir, t, "batch_abort_ratio") 50 | abort_ratio = grep_batch_profile(log_dir, t, "local_abort_ratio") 51 | remote_abort_ratio = grep_batch_profile(log_dir, t, "remote_abort_ratio") 52 | repair_ratio = repair_times / (commit_times + 0.00000000000001) 53 | avg_batch_size = commit_times / batch_commit_times 54 | average_transaction_latency = grep_batch_profile_non_float(log_dir, t, "Average Transaction Latency") 55 | p50_transaction_latency = grep_batch_profile_non_float(log_dir, t, "P50 Transaction Latency") 56 | p90_transaction_latency = grep_batch_profile_non_float(log_dir, t, "P90 Transaction Latency") 57 | p99_transaction_latency = grep_batch_profile_non_float(log_dir, t, "P99 Transaction Latency") 58 | if average_transaction_latency == -1: 59 | output_str += (f'{t}, {thpt}, {avg_batch_size}, {batch_repair_ratio}\n') 60 | else: 61 | output_str += (f'{t}, {thpt}, {remote_abort_ratio}, {abort_ratio}, {repair_ratio}, {batch_repair_ratio}, {batch_abort_ratio}, {avg_batch_size}, {average_transaction_latency}, {p50_transaction_latency}, {p90_transaction_latency}, {p99_transaction_latency}\n') 62 | 63 | if output_file: 64 | o_file = Path(output_file) 65 | o_file.parent.mkdir(exist_ok=True, parents=True) 66 | o_file.write_text(output_str) 67 | print(output_str) 68 | return output_str 69 | 70 | if __name__ == '__main__': 71 | log_dir = sys.argv[1] 72 | output_file = None 73 | if len(sys.argv) > 2: 74 | output_file = sys.argv[2] 75 | get_test_result(log_dir, output_file) 76 | -------------------------------------------------------------------------------- /dataflow_api/src/benchmark/tpcc/txn_new_order.cc: -------------------------------------------------------------------------------- 1 | #include "tpcc.hpp" 2 | #include "api/txn.hpp" 3 | #include "type/input.hpp" 4 | 5 | void new_order_input(Txn &txn) { 6 | Input &input = txn.getInput(); 7 | input.add("W_ID", BuiltInType::INT); 8 | input.add("D_ID", BuiltInType::INT); 9 | input.add("C_ID", BuiltInType::INT); 10 | input.add("OL_COUNT", BuiltInType::INT); 11 | 12 | Input loop_vars; 13 | loop_vars.add("I_ID", BuiltInType::INT); 14 | loop_vars.add("S_W_ID", BuiltInType::INT); 15 | loop_vars.add("QUANTITY", BuiltInType::INT); 16 | input.addArray("LOOP_ORDERLINE", loop_vars); 17 | 18 | txn.setPartitionAffinity(input["W_ID"]); 19 | } 20 | 21 | void new_order_graph(Txn &txn) { 22 | Input &input = txn.getInput(); 23 | Value &w_id = input["W_ID"]; 24 | Value &d_id = input["D_ID"]; 25 | Value &c_id = input["C_ID"]; 26 | 27 | // Warehouse 28 | Row ware = txn.get(WARE, {w_id}); 29 | Value w_tax = ware.getColumn(W_TAX); 30 | 31 | // District 32 | Row dist = txn.get(DIST, {w_id, d_id}); 33 | Value d_tax = dist.getColumn(D_TAX); 34 | Value d_next_o_id = dist.getColumn(D_NEXT_O_ID).apply("Add", {}); 35 | dist.setColumn(D_NEXT_O_ID, d_next_o_id); 36 | txn.put(DIST, {w_id, d_id}, dist); 37 | 38 | // Customer 39 | Row cust = txn.get(CUST, {w_id, d_id, c_id}); 40 | Value c_discount = cust.getColumn(C_DISCOUNT); 41 | 42 | Row cust_index = txn.alloc(CUST_INDEX); 43 | cust_index.setColumn(CI_LAST_ORDER, d_next_o_id); 44 | txn.put(CUST_INDEX, {w_id, d_id, c_id}, cust_index); 45 | 46 | // Order 47 | Row order = txn.alloc(ORDR); 48 | // skip static modification to o_ol_cnt, o_all_local, o_carrier_id, o_c_id, o_entry_d 49 | txn.put(ORDR, {w_id, d_id, d_next_o_id}, order); 50 | 51 | // New Order 52 | Row new_order = txn.alloc(NORD); 53 | txn.put(NORD, {w_id, d_id, d_next_o_id}, new_order); 54 | 55 | auto iter_logic = [&w_id, &d_id, &d_next_o_id](Txn &txn, Input &loop_input, Value &loop_num) { 56 | Value &i_id = loop_input["I_ID"]; 57 | Value &s_w_id = loop_input["S_W_ID"]; 58 | Value &quantity = loop_input["QUANTITY"]; 59 | 60 | // Item 61 | Row item = txn.get(ITEM, {i_id}); 62 | Value price = item.getColumn(I_PRICE); 63 | 64 | // Stock 65 | Row stoc = txn.get(STOC, {s_w_id, i_id}); 66 | Value s_quantity = stoc.getColumn(S_QUANTITY).apply("StaticUpdates", {quantity}); 67 | stoc.setColumn(S_QUANTITY, s_quantity); 68 | Value s_ytd = stoc.getColumn(S_YTD).apply("Add", {quantity}); 69 | stoc.setColumn(S_YTD, s_ytd); 70 | Value s_order_cnt = stoc.getColumn(S_ORDER_CNT).apply("Add", {}); 71 | stoc.setColumn(S_ORDER_CNT, s_order_cnt); 72 | Value s_remote_cnt = stoc.getColumn(S_REMOTE_CNT).apply("StaticUpdates", {w_id, s_w_id}); 73 | stoc.setColumn(S_REMOTE_CNT, s_remote_cnt); 74 | txn.put(STOC, {s_w_id, i_id}, stoc); 75 | 76 | Value ol_amount = quantity.apply("MULTIPLY", {price}); 77 | 78 | // Order Line 79 | Row order_line = txn.alloc(ORLI); 80 | order_line.setColumn(OL_I_ID, i_id); 81 | order_line.setColumn(OL_AMOUNT, ol_amount); 82 | order_line.setColumn(OL_SUPPLY_W_ID, s_w_id); 83 | order_line.setColumn(OL_QUANTITY, quantity); 84 | txn.put(ORLI, {w_id, d_id, d_next_o_id, loop_num}, order_line); 85 | 86 | Values loop_res; 87 | loop_res.add("OL_AMOUNT", ol_amount); 88 | return loop_res; 89 | }; 90 | 91 | 92 | Values loop_res_list = txn.map(iter_logic, input.getArray("LOOP_ORDERLINE"), input["OL_COUNT"]); 93 | Values res = loop_res_list.reduce("SUM"); 94 | Value total_amount = res["OL_AMOUNT"].apply("CALCULATE", {w_tax, d_tax, c_discount}); 95 | 96 | txn.commit(); 97 | } 98 | -------------------------------------------------------------------------------- /src/storage/multi_ver_record.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "index/record_lock.h" 6 | #include "util/types.h" 7 | 8 | template 9 | class Record { 10 | public: 11 | Key_t key; 12 | Value_t value; 13 | txn_id_t writer; // NULL_TXN_ID means it's a global version. 14 | 15 | Record(Key_t key, Value_t value, txn_id_t writer) : key(key), value(value), writer(writer) {} 16 | }; 17 | 18 | struct SimpleRecord { 19 | uint8_t* v; 20 | version_ts_t writer; 21 | offset_t page_offset; 22 | }; 23 | 24 | struct MultiVersionRecord { 25 | RecordLock lock; 26 | txn_id_t writer; // NULL_TXN_ID means initial value 27 | Key_t key; 28 | // Val_t value; 29 | uint8_t value[0]; 30 | 31 | MultiVersionRecord() : writer(NULL_TXN_ID), key(0) {} 32 | 33 | inline void set_value(const void* value_ptr, uint32_t value_size) { 34 | memcpy(value, value_ptr, value_size); 35 | } 36 | }; 37 | 38 | inline RecordLock* get_record_lock(void* ptr) { 39 | return &(reinterpret_cast(ptr)->lock); 40 | } 41 | 42 | inline void set_writer(void* ptr, txn_id_t writer) { 43 | reinterpret_cast(ptr)->writer = writer; 44 | } 45 | 46 | inline void set_key(void* ptr, uint8_t* key_ptr) { 47 | reinterpret_cast(ptr)->key = 48 | *reinterpret_cast(key_ptr); 49 | } 50 | 51 | inline uint8_t* r_get_key_ptr(void* ptr) { 52 | auto* p = &reinterpret_cast(ptr)->key; 53 | return reinterpret_cast(p); 54 | } 55 | 56 | inline Key_t get_key(void* ptr) { 57 | return reinterpret_cast(ptr)->key; 58 | } 59 | 60 | inline Key_t r_get_key(void* ptr) { 61 | return reinterpret_cast(ptr)->key; 62 | } 63 | 64 | inline txn_id_t r_get_writer(void* ptr) { 65 | return reinterpret_cast(ptr)->writer; 66 | } 67 | 68 | inline uint8_t* r_get_value(void* ptr) { 69 | return reinterpret_cast(ptr)->value; 70 | } 71 | 72 | inline void set_value(void* ptr, uint8_t* value_ptr, uint32_t value_size) { 73 | reinterpret_cast(ptr)->set_value(value_ptr, value_size); 74 | } 75 | 76 | inline void r_copy(void* to_ptr, void* from_ptr, uint32_t value_size) { 77 | auto *from = reinterpret_cast(from_ptr); 78 | auto *to = reinterpret_cast(to_ptr); 79 | to->writer = from->writer; 80 | to->key = from->key; 81 | to->set_value(from->value, value_size); 82 | } 83 | 84 | inline constexpr uint64_t get_record_size(uint32_t value_size) { 85 | return sizeof(MultiVersionRecord) + value_size; 86 | } 87 | 88 | // template 89 | // class MultiVersionRecord { 90 | // typedef Record record_t; 91 | // public: 92 | // Key_t key; 93 | // txn_id_t writer; // NULL_TXN_ID means initial value 94 | // RecordLock lock; 95 | // Value_t value; 96 | // // std::shared_ptr last_version; 97 | 98 | // MultiVersionRecord() 99 | // : key(0), 100 | // value(0), 101 | // writer(NULL_TXN_ID) {} 102 | 103 | // // last_version(std::make_shared(0, 0, 0)) 104 | // // inline void update_value(std::shared_ptr new_record) { last_version = new_record; } 105 | // // inline std::shared_ptr get_last_version() { return last_version; } 106 | 107 | // // inline static std::shared_ptr make_version(Key_t key, Value_t value, 108 | // // txn_id_t writer) { 109 | // // return std::make_shared(key, value, writer); 110 | // // } 111 | // }; -------------------------------------------------------------------------------- /dataflow_api/src/graph/node.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "util/logging.h" 7 | 8 | static uint32_t g_node_id = 0; 9 | 10 | static std::string depsTostyle[] = { 11 | "solid", 12 | "dashed", 13 | "dotted" 14 | }; 15 | 16 | class TableSchema; 17 | class Node { 18 | public: 19 | enum DepType { KeyDep = 0, ValueDep, CtrlDep }; 20 | Node() : id(++g_node_id) {} 21 | 22 | ~Node() { 23 | for (auto &pair : deps) { 24 | Node *node = pair.first; 25 | if (node && --node->ref_count == 0) { 26 | delete node; 27 | } 28 | } 29 | } 30 | 31 | inline void addDeps(std::vector &nodes, DepType dep_type) { 32 | for (Node *node : nodes) { 33 | addDep(node, dep_type); 34 | } 35 | } 36 | 37 | inline void addDep(Node *dep, DepType dep_type) { 38 | if (dep == nullptr) 39 | return; 40 | if (dep == this) 41 | return; 42 | ++dep->ref_count; 43 | deps.push_back({dep, dep_type}); 44 | } 45 | 46 | // inline std::vector &getDeps() { return deps; } 47 | 48 | uint32_t getRefCount() const { return ref_count; } 49 | 50 | virtual bool isDbOp() { return false; } 51 | 52 | virtual void printNode(std::ostream &out) {} 53 | virtual void printDebugInfo(std::ostream &out) { 54 | if (printed) 55 | return; 56 | printed = true; 57 | 58 | for (auto &pair : deps) { 59 | Node *node = pair.first; 60 | if (!node->isDbOp()) 61 | continue; 62 | node->printDebugInfo(out); 63 | } 64 | 65 | for (auto &pair : deps) { 66 | Node *node = pair.first; 67 | if (!node->isDbOp()) 68 | continue; 69 | out << node->id << "->" << id << " "; 70 | out << "[style=" << depsTostyle[pair.second] << "]"; 71 | out << std::endl; 72 | } 73 | } 74 | 75 | protected: 76 | void printNode(std::ostream &out, const std::string &nodeType, const std::string &comment) { 77 | out << id << " [label=\"" << nodeType << "\"] # " << ref_count << ", " << comment 78 | << std::endl; 79 | } 80 | 81 | uint32_t id = 0; 82 | uint32_t ref_count = 0; 83 | std::vector> deps; 84 | bool printed = false; 85 | }; 86 | 87 | class OpNode : public Node { 88 | public: 89 | OpNode(const std::string &type_name, TableSchema &tbl_schema, uint32_t scope_id, 90 | uint32_t part_id, uint32_t partition_affinity, bool static_key) 91 | : type_name(type_name), 92 | tbl_schema(tbl_schema), 93 | scope_id(scope_id), 94 | part_id(part_id), 95 | partition_affinity(partition_affinity), 96 | static_key(static_key) {} 97 | 98 | virtual void printNode(std::ostream &out); 99 | 100 | virtual bool isDbOp() { return true; } 101 | 102 | private: 103 | TableSchema &tbl_schema; 104 | const std::string type_name; 105 | uint32_t scope_id, part_id, partition_affinity; 106 | bool static_key; 107 | }; 108 | class GetNode : public OpNode { 109 | public: 110 | GetNode(TableSchema &tbl_schema, uint32_t scope_id, uint32_t part_id, 111 | uint32_t partition_affinity, bool static_key) 112 | : OpNode("Get", tbl_schema, scope_id, part_id, partition_affinity, static_key) {} 113 | }; 114 | 115 | class PutNode : public OpNode { 116 | public: 117 | PutNode(TableSchema &tbl_schema, uint32_t scope_id, uint32_t part_id, 118 | uint32_t partition_affinity, bool static_key) 119 | : OpNode("Put", tbl_schema, scope_id, part_id, partition_affinity, static_key) {} 120 | }; 121 | class InputNode : public Node { 122 | public: 123 | InputNode(const std::string &name) : Node(), name(name) {} 124 | virtual void printNode(std::ostream &out) { Node::printNode(out, "Input", name); } 125 | 126 | private: 127 | std::string name; 128 | }; 129 | -------------------------------------------------------------------------------- /doc/hackwrench/run.md: -------------------------------------------------------------------------------- 1 | # How to Run Hackwrench 2 | 3 | ## Step 1: Prepare Script Environment and configuration 4 | 5 | ### Python environment 6 | 7 | The scripts for running Hackwrench binaries are in directory `root/scripts`. To use the python scripts, we need to install the requirements: 8 | 9 | ```bash 10 | cd scripts 11 | pip install -r requirements.txt 12 | ``` 13 | 14 | ### SSH Environment 15 | 16 | The script uses ssh to control remote machines. For the ease of usage, we need to setup sshd for machines, and add your public key (`.ssh/id_rsa.pub`) to your own `.ssh/authorized_keys`. Therefore, typing ssh password is avoided during experiments. 17 | 18 | ### Machine Configuration 19 | 20 | Directory `root/scripts/aws` contains the cofiguration, log, and results for experiments. 21 | 22 | In this directory, `servers.yaml` is the machine configuration with the ip and server type of each machine (aws instance): 23 | 24 | ```yaml 25 | ips: 26 | - 172.31.23.33 27 | - 172.31.22.66 28 | ... 29 | - 172.31.19.160 30 | - 172.31.23.191 31 | - 172.31.26.8 32 | ... 33 | type: 34 | - StorageNode 35 | - StorageNode 36 | ... 37 | - TimeServer 38 | - DataBase 39 | - DataBase 40 | ... 41 | ``` 42 | 43 | Please note that, AWS provides both public and private IP address for each instance. We recommend to use private IP. 44 | 45 | ## Step 2: Use the Experiment Scripts for Running 46 | 47 | To run the experiments, please go to `root/scripts` directories. The template command and all commands used in evaluation are shown as following: 48 | * The `script_name` corresponding to the python scripts starts with number in `root/scripts`, which automatically run Hackwrench and collect the execution result distributedly. 49 | * `workload` has two options: `tpcc` corresponds to TPC-C workload and `ycsb10` corresponds to FDB-micro microbenchmark in the paper. 50 | * `option value` is an optional configuration value for each script and its meaning changes accordingly. 51 | 52 | ```bash 53 | ./figure/0_motivation.sh 54 | ./figure/1_tpcc_tput.sh 55 | ./figure/2_tpcc_lat.sh 56 | ./figure/3_factor_analysis.sh 57 | ./figure/4_tpcc_coco.sh 58 | ./figure/5_tpcc_sundial.sh 59 | ./figure/6_tpcc_scalability.sh 60 | ./figure/7_micro_contention.sh 61 | ./figure/8_micro_ts.sh 62 | ./figure/9_micro_batch_effect.sh 63 | ./figure/10_micro_cache_effect.sh 64 | ``` 65 | ### Prepare enough AWS instances 66 | 67 | In our experiments, we use a maximal number of 46 m5.2xlarge AWS instances. 68 | The actual number of instances used for each experiment is shown in the comments. 69 | If you want to reproduce the experiment, please prepare enough AWS instances. 70 | 71 | ## Check the Experiment results 72 | 73 | The scripts will output the CSV-style results after the experiments. The results are also stored in `root/scripts/aws/results/{option}.csv`. The format is shown as following: 74 | ``` 75 | parameter, 2000-tput, 2000-remote_abort_ratio, 2000-abort_ratio, 2000-repair_ratio, 2000-batch_repair_ratio, 2000-batch_abort_ratio, 2000-average_batch_size, 2000-mean_lat, 2000-p50, 2000-p90, 2000-p99 76 | 0, 270203.0, 0.0, 0.0, 0.016899090214084252, 0.018511, 0.0, 1.1115702445344795, 31745023, 30995657, 43072579, 56000276 77 | 100, 33770.3, 0.0, 0.0, 0.002342003582584494, 0.00251236, 0.0, 1.0740953931676662, 395469873, 383369138, 731584954, 1108964075 78 | ``` 79 | 80 | The results have two dimension, one is `parameter` and the other is `{x}-{col_name}`'s `x`. The meanings of `col_name` are: 81 | | `col_name` | description | 82 | |--------|----------| 83 | | `tput` | Throughput. | 84 | | `remote_abort_ratio` | The ratio of transactions aborted by cross database nodes conflicts. | 85 | | `abort_ratio` | The ratio of transactions aborted by internal database nodes conflicts. | 86 | | `repair_ratio` | The ratio of transactions repaired. | 87 | | `batch_repair_ratio` | The ratio of `logical transactions`(Sec 4) repaired. | 88 | | `batch_abort_ratio` | The ratio of `logical transactions`(Sec 4) aborted by cross database nodes conflicts. | 89 | | `average_batch_size` | The average number of transactions in one `logical transaction`. | 90 | | `mean_lat`/`p50`/`p90`/`p99` | The average, p50, p90, and p99 latency. | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /dataflow_api/src/util/logging.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | /** 7 | * Different logging level 8 | * 9 | * \def FATAL 10 | * Used for fatal and probably irrecoverable conditions 11 | * \def ERROR 12 | * Used for errors which are recoverable within the scope of the function 13 | * \def WARNING 14 | * Logs interesting conditions which are probably not fatal 15 | * \def EMPH 16 | * Outputs as INFO, but in WARNING colors. Useful for 17 | * outputting information you want to emphasize. 18 | * \def INFO 19 | * Used for providing general useful information 20 | * \def DEBUG 21 | * Debugging purposes only 22 | * \def EVERYTHING 23 | * Log everything 24 | */ 25 | 26 | enum loglevel { 27 | NONE = 7, 28 | FATAL = 6, 29 | ERROR = 5, 30 | WARNING = 4, 31 | EMPH = 3, 32 | INFO = 2, 33 | DEBUG = 1, 34 | EVERYTHING = 0 35 | }; 36 | 37 | #ifndef LOG_LEVEL 38 | #define LOG_LEVEL INFO 39 | #endif 40 | 41 | /** 42 | * Used to determine whether file and line number should be presented 43 | */ 44 | #define LOG_FILE_LINE 45 | 46 | // logging macro definiations 47 | #define LOG(n) \ 48 | if (n >= LOG_LEVEL) \ 49 | MessageLogger((char *)__FILE__, __LINE__, n).stream() 50 | 51 | // log with tag 52 | #define TLOG(n, t) \ 53 | if (n >= LOG_LEVEL) \ 54 | MessageLogger((char *)__FILE__, __LINE__, n).stream() << "[" << (t) << "]" 55 | 56 | #define LOG_IF(n, condition) \ 57 | if (n >= LOG_LEVEL && (condition)) \ 58 | MessageLogger((char *)__FILE__, __LINE__, n).stream() 59 | 60 | #ifdef NO_ASSERTION 61 | #define ASSERT(condition) \ 62 | if (false) \ 63 | MessageLogger((char *)__FILE__, __LINE__, FATAL + 1).stream() << "Assertion! " 64 | #else 65 | #define ASSERT(condition) \ 66 | if (!(condition)) \ 67 | MessageLogger((char *)__FILE__, __LINE__, FATAL + 1).stream() << "Assertion! " 68 | #endif 69 | 70 | #define VERIFY(n, condition) LOG_IF(n, (!(condition))) 71 | 72 | class MessageLogger { 73 | public: 74 | MessageLogger(const char *file, int line, int level) : level_(level) { 75 | if (level_ < LOG_LEVEL) 76 | return; 77 | if (level > FATAL) { 78 | stream_ << "[" << StripBasename(std::string(file)) << ":" << line << "] "; 79 | } else { 80 | #ifdef LOG_FILE_LINE 81 | stream_ << "[" << StripBasename(std::string(file)) << ":" << line << "] "; 82 | #endif 83 | } 84 | } 85 | 86 | ~MessageLogger() { 87 | if (level_ >= LOG_LEVEL) { 88 | stream_ << "\n"; 89 | std::cout << "\033[" << DEBUG_LEVEL_COLOR[std::min(level_, 6)] << "m" << stream_.str() 90 | << EndcolorFlag(); 91 | if (level_ >= FATAL) 92 | abort(); 93 | } 94 | } 95 | 96 | // Return the stream associated with the logger object. 97 | std::stringstream &stream() { return stream_; } 98 | 99 | private: 100 | std::stringstream stream_; 101 | int level_; 102 | 103 | // control flags for color 104 | enum { 105 | R_BLACK = 39, 106 | R_RED = 31, 107 | R_GREEN = 32, 108 | R_YELLOW = 33, 109 | R_BULE = 34, 110 | R_MAGENTA = 35, 111 | R_CYAN = 36, 112 | R_WHITE = 37 113 | }; 114 | 115 | const int DEBUG_LEVEL_COLOR[7] = {R_BLACK, R_YELLOW, R_BLACK, R_GREEN, R_MAGENTA, R_RED, R_RED}; 116 | 117 | static std::string StripBasename(const std::string &full_path) { 118 | const char kSeparator = '/'; 119 | size_t pos = full_path.rfind(kSeparator); 120 | if (pos != std::string::npos) { 121 | return full_path.substr(pos + 1, std::string::npos); 122 | } else { 123 | return full_path; 124 | } 125 | } 126 | 127 | static std::string EndcolorFlag() { 128 | char flag[7]; 129 | snprintf(flag, 7, "%c[0m", 0x1B); 130 | return std::string(flag); 131 | } 132 | }; 133 | 134 | // #define VAR3(begin, n, end) begin << #n << ":" << n << end 135 | #define VAR2(n, end) #n << ":" << n << end 136 | #define VAR(n) #n << ":" << n -------------------------------------------------------------------------------- /src/storage/txn_info.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #include "rpc/rpc.h" 6 | #include "servers/config.h" 7 | 8 | 9 | // Forward declaration can't declare a nested class 10 | struct TxnInfo { 11 | enum Status { NORMAL = 0, COUNT_BLOCKED_SEGS }; 12 | TxnInfo(MsgBuffer &&_zmq_msg, const Configuration& conf) 13 | : zmq_msg(std::move(_zmq_msg)), 14 | flat_array(CAST_CAPNP(zmq_msg.data(), zmq_msg.size())), 15 | num_blocked_segs(0), 16 | status(NORMAL), old_txn_info(nullptr) { 17 | #ifdef FINE_VALIDATION 18 | num_blocked_locks.store(0); 19 | #endif 20 | msg = flat_array.getRoot(); 21 | req_db = msg.getSender(); 22 | auto args = msg.getData().getPrepareargs(); 23 | txn_id = args.getTxnId(); 24 | fastCommitEnabled = args.getFastPathEnabled(); 25 | primary_sn_id = args.getPrimarySnId(); 26 | #if defined(TOTAL_ORDER_TS) || defined(DETERMINISTIC_VALIDATION) 27 | seq = args.getSeq(); 28 | #endif 29 | r_no = replica_no_of(conf.get_physical_sn_id(primary_sn_id), 30 | conf.get_my_sn_id(), conf.numSN()); 31 | copied = false; 32 | } 33 | 34 | TxnInfo(PrepareArgs::Reader args, node_id_t sender, 35 | const Configuration& conf) 36 | : zmq_msg(), flat_array(CAST_CAPNP(zmq_msg.data(), zmq_msg.size())), 37 | num_blocked_segs(0), 38 | status(NORMAL), old_txn_info(nullptr) { 39 | #if defined(TOTAL_ORDER_TS) || defined(DETERMINISTIC_VALIDATION) 40 | seq = args.getSeq(); 41 | #endif 42 | #ifdef FINE_VALIDATION 43 | num_blocked_locks.store(0); 44 | #endif 45 | node_id_t receiver = conf.get_my_sn_id(); 46 | txn_id = args.getTxnId(); 47 | primary_sn_id = args.getPrimarySnId(); 48 | fastCommitEnabled = args.getFastPathEnabled(); 49 | req_db = sender; 50 | r_no = replica_no_of(conf.get_physical_sn_id(primary_sn_id), 51 | conf.get_my_sn_id(), conf.numSN()); 52 | 53 | RpcMessage::Builder request = msgBuilder.initRoot(); 54 | request.setReceiver(receiver); 55 | request.setSender(sender); 56 | PrepareArgs::Builder prepareArgs = request.initData().initPrepareargs(); 57 | prepareArgs.setTxnId(txn_id); 58 | prepareArgs.setSegments(args.getSegments()); 59 | prepareArgs.setTxns(args.getTxns()); 60 | prepareArgs.setTxnInputs(args.getTxnInputs()); 61 | prepareArgs.setPrimarySnId(primary_sn_id); 62 | msg = msgBuilder.getRoot(); 63 | copied = true; 64 | } 65 | 66 | ~TxnInfo() { 67 | if (old_txn_info) { 68 | delete old_txn_info; 69 | } 70 | } 71 | 72 | static uint32_t replica_no_of(node_id_t primary_sn_id, node_id_t my_sn_id, uint32_t num_sn) { 73 | uint32_t r_no = my_sn_id - primary_sn_id; 74 | if (r_no < 0) { 75 | r_no = my_sn_id + num_sn - primary_sn_id; 76 | } 77 | return r_no; 78 | } 79 | 80 | txn_id_t txn_id; 81 | uint64_t primary_sn_id; 82 | // set to corresponding msg_id, binding the client to certain worker thread... 83 | 84 | ::capnp::MallocMessageBuilder msgBuilder; 85 | MsgBuffer zmq_msg; 86 | RpcMessage::Reader msg; 87 | ::capnp::FlatArrayMessageReader flat_array; 88 | std::atomic num_blocked_segs; 89 | std::atomic msg_count; 90 | MultiThreadTimer timer; 91 | RdtscTimer rdtsc_timer; // TODO 92 | MultiThreadTimer total_timer; 93 | Status status; 94 | node_id_t req_db; 95 | uint32_t r_no; 96 | TxnInfo *old_txn_info; 97 | std::vector ordered_txns; 98 | bool fastCommitEnabled = false; 99 | // std::mutex mtx; 100 | 101 | bool copied; 102 | bool repaired = false; 103 | bool aborted = false; 104 | bool prepare_replicated = false; 105 | std::vector repaired_txns; 106 | uint64_t seq; 107 | #ifdef FINE_VALIDATION 108 | std::atomic num_blocked_locks; 109 | bool lock_acquired = false; 110 | #endif 111 | // std::vector> blocked_segs; 112 | }; -------------------------------------------------------------------------------- /src/util/dbug_logging.h: -------------------------------------------------------------------------------- 1 | /** 2 | * The logging utilities. 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include "utils.h" 11 | 12 | /** 13 | * Different logging level 14 | * 15 | * \def FATAL 16 | * Used for fatal and probably irrecoverable conditions 17 | * \def ERROR 18 | * Used for errors which are recoverable within the scope of the function 19 | * \def WARNING 20 | * Logs interesting conditions which are probably not fatal 21 | * \def EMPH 22 | * Outputs as INFO, but in WARNING colors. Useful for 23 | * outputting information you want to emphasize. 24 | * \def INFO 25 | * Used for providing general useful information 26 | * \def DEBUG 27 | * Debugging purposes only 28 | * \def EVERYTHING 29 | * Log everything 30 | */ 31 | 32 | enum loglevel { 33 | NONE = 7, 34 | FATAL = 6, 35 | ERROR = 5, 36 | WARNING = 4, 37 | EMPH = 3, 38 | INFO = 2, 39 | DEBUG = 1, 40 | EVERYTHING = 0 41 | }; 42 | 43 | #ifndef LOG_LEVEL 44 | #define LOG_LEVEL INFO 45 | #endif 46 | 47 | /** 48 | * Used to determine whether file and line number should be presented 49 | */ 50 | #define LOG_FILE_LINE 51 | 52 | // logging macro definiations 53 | #define LOG(n) \ 54 | if (n >= LOG_LEVEL) \ 55 | MessageLogger((char *)__FILE__, __LINE__, n).stream() 56 | 57 | // log with tag 58 | #define TLOG(n, t) \ 59 | if (n >= LOG_LEVEL) \ 60 | MessageLogger((char *)__FILE__, __LINE__, n).stream() << "[" << (t) << "]" 61 | 62 | #define LOG_IF(n, condition) \ 63 | if (n >= LOG_LEVEL && (condition)) \ 64 | MessageLogger((char *)__FILE__, __LINE__, n).stream() 65 | 66 | #ifdef NO_ASSERTION 67 | #define ASSERT(condition) \ 68 | if (false) \ 69 | MessageLogger((char *)__FILE__, __LINE__, FATAL + 1).stream() << "Assertion! " 70 | #else 71 | #define ASSERT(condition) \ 72 | if (unlikely(!(condition))) \ 73 | MessageLogger((char *)__FILE__, __LINE__, FATAL + 1).stream() << "Assertion! " 74 | #endif 75 | 76 | #define VERIFY(n, condition) LOG_IF(n, (!(condition))) 77 | 78 | class MessageLogger { 79 | public: 80 | MessageLogger(const char *file, int line, int level) : level_(level) { 81 | if (level_ < LOG_LEVEL) 82 | return; 83 | if (level > FATAL) { 84 | stream_ << "[" << StripBasename(std::string(file)) << ":" << line << "] "; 85 | } else { 86 | #ifdef LOG_FILE_LINE 87 | stream_ << "[" << StripBasename(std::string(file)) << ":" << line << "] "; 88 | #endif 89 | } 90 | } 91 | 92 | ~MessageLogger() { 93 | if (level_ >= LOG_LEVEL) { 94 | stream_ << "\n"; 95 | std::cout << "\033[" << DEBUG_LEVEL_COLOR[std::min(level_, 6)] << "m" << stream_.str() 96 | << EndcolorFlag(); 97 | if (level_ >= FATAL) 98 | abort(); 99 | } 100 | } 101 | 102 | // Return the stream associated with the logger object. 103 | std::stringstream &stream() { return stream_; } 104 | 105 | private: 106 | std::stringstream stream_; 107 | int level_; 108 | 109 | // control flags for color 110 | enum { 111 | R_BLACK = 39, 112 | R_RED = 31, 113 | R_GREEN = 32, 114 | R_YELLOW = 33, 115 | R_BULE = 34, 116 | R_MAGENTA = 35, 117 | R_CYAN = 36, 118 | R_WHITE = 37 119 | }; 120 | 121 | const int DEBUG_LEVEL_COLOR[7] = {R_BLACK, R_YELLOW, R_BLACK, R_GREEN, R_MAGENTA, R_RED, R_RED}; 122 | 123 | static std::string StripBasename(const std::string &full_path) { 124 | const char kSeparator = '/'; 125 | size_t pos = full_path.rfind(kSeparator); 126 | if (pos != std::string::npos) { 127 | return full_path.substr(pos + 1, std::string::npos); 128 | } else { 129 | return full_path; 130 | } 131 | } 132 | 133 | static std::string EndcolorFlag() { 134 | char flag[7]; 135 | snprintf(flag, 7, "%c[0m", 0x1B); 136 | return std::string(flag); 137 | } 138 | }; 139 | 140 | // #define VAR3(begin, n, end) begin << #n << ":" << n << end 141 | #define VAR2(n, end) #n << ":" << n << end 142 | #define VAR(n) #n << ":" << n -------------------------------------------------------------------------------- /doc/dataflow_api/graphs/stock_level.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 9 | 10 | G 11 | 12 | stock_level.dot 13 | 14 | cluster_1 15 | 16 | Loop#1 17 | 18 | 19 | cluster_2 20 | 21 | Loop#2 22 | 23 | 24 | cluster_3 25 | 26 | IfBranch#3 27 | 28 | 29 | 30 | 36 31 | 32 | Get DIST 33 | staticKey: true 34 | localTable: true 35 | 36 | 37 | 38 | 37 39 | 40 | Get ORLI 41 | staticKey: false 42 | localTable: true 43 | 44 | 45 | 46 | 36->37 47 | 48 | 49 | 50 | 51 | 52 | 38 53 | 54 | Get STOC 55 | staticKey: false 56 | localTable: false 57 | 58 | 59 | 60 | 37->38 61 | 62 | 63 | 64 | 65 | 66 | 37->38 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /src/servers/clients.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "config.h" 6 | #include "rpc/rpc.h" 7 | #include "util/macros.h" 8 | #include "util/statistic.h" 9 | #include "util/thread_safe_structures.h" 10 | #include "util/timer.h" 11 | 12 | struct Client { 13 | Key_t key = 0; 14 | Client(Key_t key) : key(key) {} 15 | }; 16 | class ClientServer : public RpcServerBase { 17 | const BenchmarkInterface &bench; 18 | ThreadSafeQueue free_clients; 19 | uint32_t num_clients; 20 | 21 | public: 22 | ClientServer(const Configuration &conf, const BenchmarkInterface &bench, uint32_t num_clients) 23 | : RpcServerBase(conf), bench(bench), num_clients(num_clients) { 24 | // TODO: bench.init_clients(); 25 | for (uint i = 0; i < num_clients; ++i) { 26 | free_clients.enqueue(new Client(i)); 27 | } 28 | } 29 | 30 | void send_thread(bool &sending, thread_id_t thread_id) { 31 | socket_t_id = thread_id; 32 | node_id_t count = 0; 33 | node_id_t sum = 0; 34 | while (sending) { 35 | Client *c = free_clients.dequeue(); // this is blocked by recv_thread 36 | node_id_t to_id = rr_db_id(); 37 | // node_id_t to_id = (c->key % conf.numDB()) + conf.numSN() + conf.numTS(); 38 | // node_id_t to_id = 2; 39 | sum += to_id; 40 | count++; 41 | BUILD_REQUEST(to_id); 42 | auto req = request.initData().initClientargs(); 43 | req.setClientId((uint64_t)c); 44 | req.setData(c->key); 45 | req.setTxnType(thread_rand.randint(0, 99)); 46 | send(msgBuilder); 47 | } 48 | LOG(2) << "send thread quit " << (double)sum / (double)count; 49 | } 50 | 51 | void recv_thread() { 52 | bool benchmarking = true; 53 | while (benchmarking || free_clients.size() != num_clients) { 54 | Event event = pop_event(0); 55 | if (event.event_type == Event::NONE) { 56 | benchmarking = false; 57 | // keep running until we receive all replies. 58 | LOG(2) << "recv thread waiting for all clients"; 59 | continue; 60 | } 61 | ASSERT(event.event_type == Event::RPC); 62 | MsgBuffer &msg = *reinterpret_cast(event.ptr); 63 | BUILD_MESSAGE_FROM_BUFFER(msg, reply); 64 | ASSERT(reply.getData().isClientresponse()); 65 | auto clientIds = reply.getData().getClientresponse().getClientIds(); 66 | for (uint i = 0; i < clientIds.size(); ++i) { 67 | Client *c = (Client *)clientIds[i]; 68 | ASSERT(c); 69 | free_clients.enqueue(c); 70 | } 71 | } 72 | LOG(2) << "recv thread quit"; 73 | for (uint i = 0; i < conf.numSN(); ++i) { 74 | BUILD_REQUEST(i); 75 | auto req = request.initData().initSyncargs(); 76 | req.setStatus(ServerStatus::END); 77 | send(msgBuilder); 78 | } 79 | } 80 | 81 | void run() { 82 | bool sending = true; 83 | ServerStatus status = recv_sync(); 84 | ASSERT(status == ServerStatus::START); 85 | std::vector workers; 86 | 87 | for (uint i = 0; i < conf.get_primary_db_node()->num_threads; i++) { 88 | std::thread *t = 89 | new std::thread([this, &sending](auto i) { this->send_thread(sending, i); }, i); 90 | workers.push_back(t); 91 | } 92 | std::thread t2([this]() { this->recv_thread(); }); 93 | 94 | status = recv_sync(); 95 | ASSERT(status == ServerStatus::END); 96 | sending = false; // stop the send thread immediately 97 | // stop the recv thread when all requests are replied 98 | this->get_specific_event_channel(0)->push_null_event(); 99 | for (auto t : workers) { 100 | t->join(); 101 | } 102 | t2.join(); 103 | } 104 | 105 | inline uint64_t random_db_id() { 106 | return conf.numSN() + conf.numTS() + thread_rand.randint(0, conf.numDB() - 1); 107 | } 108 | 109 | // std::atomic rr; 110 | inline uint64_t rr_db_id() { 111 | thread_local uint64_t rr = 0; 112 | uint64_t ret = conf.numSN() + conf.numTS() + (rr % conf.numDB()); 113 | rr++; 114 | return ret; 115 | } 116 | }; -------------------------------------------------------------------------------- /src/servers/time_server.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "rpc/rpc.h" 7 | #include "servers/config.h" 8 | #include "storage/segment.h" 9 | #include "util/event_channel.h" 10 | 11 | class TimestampLayer { 12 | private: 13 | TxnDependencyWithLock *deps; 14 | #ifdef TOTAL_ORDER_TS 15 | std::atomic seq; 16 | #endif 17 | public: 18 | TimestampLayer(Configuration &config) { 19 | deps = new TxnDependencyWithLock[config.numSegments()]; 20 | #ifdef TOTAL_ORDER_TS 21 | seq.store(0); 22 | #endif 23 | } 24 | 25 | ~TimestampLayer() { delete[] deps; } 26 | 27 | void get_timestamp(GetTimestampArgs::Reader args, GetTimestampResponse::Builder resp) { 28 | resp.setTxnId(args.getTxnId()); 29 | #ifndef TOTAL_ORDER_TS 30 | auto rw_seg_ids = args.getRwSegIds(); 31 | auto reply_deps = resp.initDeps(rw_seg_ids.size()); 32 | uint32_t i = 0; 33 | std::vector> locks; 34 | for (const seg_id_t &rw_seg_id : rw_seg_ids) { 35 | seg_id_t seg_id = SEG_GET_ID(rw_seg_id); 36 | locks.emplace_back(deps[seg_id].mtx); 37 | TxnDependency &dep = deps[seg_id].dep; 38 | 39 | auto reply_dep = reply_deps[i]; 40 | reply_dep.setTimestamp(dep.ts); 41 | if (SEG_IS_WRITE(rw_seg_id)) { 42 | reply_dep.setNumReads(dep.num_reads); 43 | dep.ts++; 44 | dep.num_reads = 0; 45 | } else { 46 | reply_dep.setNumReads(dep.num_reads); 47 | dep.num_reads += 1; 48 | } 49 | ++i; 50 | } 51 | #else 52 | uint64_t old_seq = seq.fetch_add(1); 53 | resp.setSeq(old_seq); 54 | #endif 55 | resp.setOk(true); 56 | } 57 | }; 58 | 59 | class TimestampRpcServer : public RpcServerBase { 60 | private: 61 | TimestampLayer *timeServer; 62 | std::atomic thpt_counter; 63 | std::vector *> requests_queues; 64 | 65 | public: 66 | TimestampRpcServer(const Configuration &conf, TimestampLayer *timeServer) 67 | : RpcServerBase(conf), timeServer(timeServer) { 68 | for (uint i = 0; i < conf.get_my_node().num_threads; i++) { 69 | requests_queues.emplace_back(new ThreadSafeQueue); 70 | } 71 | } 72 | 73 | ~TimestampRpcServer() { 74 | for (auto p : requests_queues) { 75 | delete p; 76 | } 77 | } 78 | 79 | void serve_one_request(EventChannel* channel, thread_id_t tid) { 80 | Event e = channel->pop(); 81 | MsgBuffer &zmq_msg = *reinterpret_cast(e.ptr); 82 | 83 | BUILD_MESSAGE_FROM_BUFFER(zmq_msg, msg); 84 | BUILD_REPLY_NO_HASH(msg) 85 | if (msg.getData().isGettimestampargs()) { 86 | assert(msg.getReceiver() == conf.get_my_id()); 87 | 88 | auto dat = msg.getData(); 89 | auto args = dat.getGettimestampargs(); 90 | auto data = reply.initData().initGettimestampresponse(); 91 | timeServer->get_timestamp(args, data); 92 | } else { 93 | auto dat = msg.getData(); 94 | auto args = dat.getBatchgettimestampargs().getGettimestampargs(); 95 | auto data = reply.initData().initBatchgettimestampresponse(); 96 | data.setLbatchId(dat.getBatchgettimestampargs().getLbatchId()); 97 | auto resp = data.initGettimestampresponse(args.size()); 98 | for (uint i = 0; i < args.size(); i++) { 99 | timeServer->get_timestamp(args[i], resp[i]); 100 | } 101 | } 102 | send(msgBuilder); 103 | 104 | 105 | // this->thpt_counter++; 106 | } 107 | 108 | void run() { 109 | auto worker_func = [this](thread_id_t tid) { 110 | socket_t_id = tid; 111 | while (1) { 112 | serve_one_request(event_channels[tid], tid); 113 | } 114 | }; 115 | std::vector worker_threads; 116 | for (thread_id_t t_id = 0; t_id < num_threads; ++t_id) { 117 | worker_threads.push_back(std::thread(worker_func, t_id)); 118 | } 119 | 120 | thpt_counter = 0; 121 | while (1) { 122 | thread_sleep(1); 123 | LOG(2) << "realtime thpt: " << thpt_counter << " reqs/s"; 124 | thpt_counter = 0; 125 | 126 | // for (uint32_t i = 0; i < batch_ids.size(); ++i) { 127 | // LOG(2) << "i: " << i << " " << std::hex << batch_ids[i]; 128 | // } 129 | } 130 | 131 | for (auto &thread : worker_threads) { 132 | thread.join(); 133 | } 134 | } 135 | }; 136 | -------------------------------------------------------------------------------- /doc/dataflow_api/graphs/order_status.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 9 | 10 | G 11 | 12 | order_status.dot 13 | 14 | cluster_1 15 | 16 | Loop#1 17 | 18 | 19 | 20 | 32 21 | 22 | Get CUST 23 | staticKey: true 24 | localTable: false 25 | 26 | 27 | 28 | 33 29 | 30 | Get CUST_INDEX 31 | staticKey: true 32 | localTable: true 33 | 34 | 35 | 36 | 34 37 | 38 | Get ORDR 39 | staticKey: false 40 | localTable: true 41 | 42 | 43 | 44 | 33->34 45 | 46 | 47 | 48 | 49 | 50 | 35 51 | 52 | Get ORLI 53 | staticKey: false 54 | localTable: true 55 | 56 | 57 | 58 | 33->35 59 | 60 | 61 | 62 | 63 | 64 | 34->35 65 | 66 | 67 | 68 | 69 | 70 | 34->35 71 | 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /doc/dataflow_api/tpcc.md: -------------------------------------------------------------------------------- 1 | # TPC-C as am Example 2 | 3 | The code of TPC-C is shown in [`root/dataflow_api/src/benchmark`](../../dataflow_api/src/benchmark), which contains the defition of TPC-C's [`schema`](../../dataflow_api/src/benchmark/tpcc/tpcc_schema.cc) and five transactions: 4 | * [`new_order`](../../dataflow_api/src/benchmark/tpcc/txn_new_order.cc) 5 | * [`payment`](../../dataflow_api/src/benchmark/tpcc/txn_payment.cc) 6 | * [`delivery`](../../dataflow_api/src/benchmark/tpcc/txn_delivery.cc) 7 | * [`order_status`](../../dataflow_api/src/benchmark/tpcc/txn_order_status.cc) 8 | * [`stock_level`](../../dataflow_api/src/benchmark/tpcc/txn_stock_level.cc) 9 | 10 | ## Build 11 | 12 | In the `root/dataflow_api` directory, run the following commands. Then the binary `analyze` is compiled. 13 | 14 | ```bash 15 | mkdir build && cd build 16 | cmake .. 17 | make -j4 18 | ``` 19 | 20 | ## Run 21 | 22 | After compiling the executable file, we direct run it to generate the output files describing the dataflow graphs. 23 | 24 | In the `root/dataflow_api/build` directory, run the following commands. 25 | 26 | ```bash 27 | ./analyze 28 | ``` 29 | 30 | Then, five `dot` files are generated, one for each type of transaction in TPC-C. 31 | 32 | ```bash 33 | new_order.dot 34 | payment.dot 35 | delivery.dot 36 | order_status.dot 37 | stock_level.dot 38 | ``` 39 | 40 | ## Plot the Dataflow Graph 41 | 42 | To visualize the dataflow graph, we use the `graphviz` tool. 43 | 44 | First, install `graphviz` with 45 | 46 | ```bash 47 | sudo apt install graphviz 48 | ``` 49 | 50 | Then, in the `root/dataflow_api/build` directory where output files are generated, run the following commands to generate `svg` files. 51 | 52 | ```bash 53 | ../plot.sh 54 | ``` 55 | 56 | The resulted graphes are generated in the same directories and shown as following. 57 | ### How to Read the Dataflow Graph 58 | 59 | The dataflow graphs shown below contains two kinds of nodes: `Get` and `Put`, corresponding to the data flow operations. The `red` frame means whether the operation is analyzed to be exeuted at the prefered partition (specified by `setPartitionAffinity`). Otherwise, the frame is `blue`. Inside each node, the text contains: 60 | 1. `Get` or `Put`: The node's type 61 | 2. `Ware`, ... : The name of accessed table 62 | 3. `staticKey` : Whether the primary key used is determined statically 63 | 4. `localTable` : Whether the table is only accessed by transactions has the same partition affinity. 64 | 65 | The directed edges representing the `key` (solid edges), `value` (dashed edges), and `control` (dotted edges) dependencies between operations. 66 | 67 | The black quad represents a scope of operations generated by loops or if-branches. 68 | 69 | ### Static Analysis for Fast Path Optimizatiion 70 | 71 | A transaction qualifies for the fast path optimization if: 72 | 1. it contains no user-initiated aborts. 73 | 2. it can be divided into multiple pieces, each of which can execute individually on one storage node. 74 | 3. each piece's read and write sets remain unchanged before and after repair (re-execution). 75 | 76 | For condition 1, we can directly analyze whether `abort` apis are invoked. 77 | 78 | For condition 2, we need to ensure transactions can be divided into invidual pieces without dependencies among them. In TPC-C, only `New Order` and `Payment` transactions can access remote data. When checking their dataflow graphs, we can observer that there are no dependencies among the red sub-graph and blue sub-graph. Therefore, all TPC-C transactions satisfy the second condition. 79 | 80 | For condition 3, we need to determine whether the read and write sets of target transaction (or piece) do not change before and after repair. First, if the primary key of an operation can be determined statically, then corresponding read and write sets must not change. Unfortunately, not all operations in TPC-C use static primary key. Some operations' primary keys depend on previous `Get` operations' outputs. For example, in `New Order` transaction, the primary key of inserting `Order` table is determined by output of reading `District` table. If such `Get` operations' outputs change during repair, then the read or write sets change. 81 | 82 | However, if such `Get` operations' outputs do not change after repair, then the overll read and write sets do not change. 83 | The opportunity is that, the tables targeted by such `Get` operations can be partitioned well and only be accessed by transactions with the same partiton affinity (The red flame part in the dataflow graph). Taking `District` table as the example again, each of its partition is only accessed by transactions executed on the same database node, which is resiponsible for the same set of partition affinity. 84 | Therefore, no cross database nodes conflicts happen on these tables and the outputs are guaranteed to not change after repair. 85 | Finally, we can determine that, for all TPC-C transactions, their read and write sets do not change before or after repair. 86 | 87 | ### New Order 88 | ![new_order](./graphs/new_order.svg) 89 | ### Payment 90 | ![payment](./graphs/payment.svg) 91 | ### Delivery 92 | ![delivery](./graphs/delivery.svg) 93 | ### Order Status 94 | ![order_status](./graphs/order_status.svg) 95 | ### Stock Level 96 | ![stock_level](./graphs/stock_level.svg) 97 | -------------------------------------------------------------------------------- /src/index/record_lock.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "servers/config.h" 8 | #include "util/thread_safe_structures.h" 9 | #include "util/timer.h" 10 | 11 | #define LOCKS_PER_MANAGER 1024 12 | 13 | #ifdef NON_CACHING 14 | static const uint32_t RETRY_TIMES = 1000; 15 | #else 16 | static const uint32_t RETRY_TIMES = 10000000; 17 | #endif 18 | 19 | enum LockStatus { 20 | SUCCESS = 0, 21 | BATCH_FAIL = 1, 22 | RECORD_FAIL = 2 23 | }; 24 | 25 | class RecordRWLock { 26 | private: 27 | std::atomic content; 28 | #ifdef READ_COMMITTED 29 | std::atomic batch_owner; 30 | #endif 31 | public: 32 | uint64_t key; // for debugging 33 | RecordRWLock(uint64_t key = 0) : key(key) { 34 | init(); 35 | } 36 | void init() { 37 | content = 0; 38 | #ifdef READ_COMMITTED 39 | batch_owner = NULL_BATCH_ID; 40 | #endif 41 | } 42 | 43 | LockStatus read_lock(txn_id_t txn_id, batch_id_t batch_id = NULL_BATCH_ID) { 44 | #ifdef READ_COMMITTED 45 | batch_id_t old_owner = acquire_batch(batch_id); 46 | if (old_owner != NULL_BATCH_ID) { 47 | #ifdef LOCK_FAIL_BLOCK 48 | throw BatchLockException(old_owner); 49 | #endif 50 | return LockStatus::BATCH_FAIL; 51 | } 52 | #endif 53 | 54 | int64_t x; 55 | uint try_times = 0; 56 | while (true) { 57 | x = content.load(); 58 | if (x >= 0 && content.compare_exchange_weak(x, x+1)) { 59 | break; 60 | } 61 | if (++try_times > RETRY_TIMES) { 62 | return LockStatus::RECORD_FAIL; 63 | } 64 | } 65 | return LockStatus::SUCCESS; 66 | } 67 | 68 | union Wrapper { 69 | Wrapper(uint64_t word) : word(word){} 70 | Wrapper(uint64_t b_id, uint16_t count) : b_id(b_id), count(count){} 71 | struct { 72 | uint64_t b_id : 48; 73 | uint64_t count : 16; 74 | }; 75 | uint64_t word; 76 | }; 77 | 78 | LockStatus lock(txn_id_t txn_id, batch_id_t batch_id = NULL_BATCH_ID) { 79 | #ifdef READ_COMMITTED 80 | batch_id_t old_owner = acquire_batch(batch_id); 81 | if (old_owner != NULL_BATCH_ID) { 82 | #ifdef LOCK_FAIL_BLOCK 83 | throw BatchLockException(old_owner); 84 | #endif 85 | return LockStatus::BATCH_FAIL; 86 | } 87 | #endif 88 | 89 | uint try_times = 0; 90 | int64_t x; 91 | while (true) { 92 | x = content.load(); 93 | if (x == 0 && content.compare_exchange_weak(x, -1)) { 94 | break; 95 | } 96 | if (++try_times > RETRY_TIMES) { 97 | // ASSERT(false); 98 | // LOG(2) << "lock record " << this << " failed " << std::hex << x << " " << batch_id; 99 | // try_times = 0; 100 | return LockStatus::RECORD_FAIL; 101 | } 102 | } 103 | return LockStatus::SUCCESS; 104 | } 105 | 106 | void unlock(txn_id_t txn_id) { 107 | int64_t x = content.load(); 108 | content.fetch_add(x < 0 ? 1 : -1); 109 | } 110 | 111 | #ifdef READ_COMMITTED 112 | batch_id_t acquire_batch(batch_id_t batch_id) { 113 | uint try_times = 0; 114 | batch_id_t tmp_owner = NULL_BATCH_ID; 115 | Wrapper w(batch_id, 1); 116 | Wrapper temp(tmp_owner); 117 | while (true) { 118 | temp.word = batch_owner.load(); 119 | if (temp.word == NULL_BATCH_ID) { 120 | w.count = 1; 121 | if (batch_owner.compare_exchange_weak(temp.word, w.word)) { 122 | break; 123 | } 124 | } else if (temp.b_id == w.b_id) { 125 | w.count = temp.count + 1; 126 | if (batch_owner.compare_exchange_weak(temp.word, w.word)) { 127 | break; 128 | } 129 | } 130 | if (++try_times > 1000) { 131 | // LOG(2) << "lock batch " << this << " failed " << std::hex << temp.word << " " << batch_id; 132 | // ASSERT(!(w.count > temp.count) || w.count - temp.count < 20000) 133 | // << "lock batch " << this << " failed " << std::hex << temp.word << " " << batch_id;; 134 | return temp.b_id; 135 | } 136 | } 137 | // LOG(2) << "lock batch " << this << " success " << std::hex << temp.word << " " << w.word; 138 | ASSERT(temp.count + 1 == w.count); 139 | return NULL_BATCH_ID; 140 | } 141 | 142 | void unlock_batch(batch_id_t batch_id) { 143 | Wrapper temp(batch_owner); 144 | ASSERT(temp.b_id == batch_id) << this << " " << std::hex << temp.word << " " << batch_id; 145 | ASSERT(temp.count > 0); 146 | Wrapper w(temp.word); 147 | if (w.count == 1) { 148 | w.word = NULL_BATCH_ID; 149 | } else { 150 | w.count -= 1; 151 | } 152 | 153 | while (!batch_owner.compare_exchange_weak(temp.word, w.word)) { 154 | w.word = temp.word; 155 | ASSERT(temp.b_id == batch_id); 156 | ASSERT(temp.count > 0); 157 | if (w.count == 1) { 158 | w.word = NULL_BATCH_ID; 159 | } else { 160 | w.count -= 1; 161 | } 162 | } 163 | } 164 | #endif 165 | }; 166 | 167 | using RecordLock = RecordRWLock; 168 | -------------------------------------------------------------------------------- /doc/dataflow_api/graphs/payment.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 9 | 10 | G 11 | 12 | payment.dot 13 | 14 | 15 | 12 16 | 17 | Get WARE 18 | staticKey: true 19 | localTable: true 20 | 21 | 22 | 23 | 13 24 | 25 | Put WARE 26 | staticKey: true 27 | localTable: true 28 | 29 | 30 | 31 | 12->13 32 | 33 | 34 | 35 | 36 | 37 | 14 38 | 39 | Get DIST 40 | staticKey: true 41 | localTable: true 42 | 43 | 44 | 45 | 15 46 | 47 | Put DIST 48 | staticKey: true 49 | localTable: true 50 | 51 | 52 | 53 | 14->15 54 | 55 | 56 | 57 | 58 | 59 | 16 60 | 61 | Get CUST 62 | staticKey: true 63 | localTable: false 64 | 65 | 66 | 67 | 17 68 | 69 | Put CUST 70 | staticKey: true 71 | localTable: false 72 | 73 | 74 | 75 | 16->17 76 | 77 | 78 | 79 | 80 | 81 | 18 82 | 83 | Put HIST 84 | staticKey: true 85 | localTable: false 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /src/storage/page_manager.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "page.h" 6 | #include "rpc/SNinterface.capnp.h" 7 | #include "servers/config.h" 8 | 9 | class PageManager { 10 | static const uint64_t MAX_PAGE_NUM = 1024 * 1024; 11 | 12 | struct Seg { 13 | PageMeta *page_metas[NUM_PAGES_PER_SEGMENT]; 14 | Seg() { 15 | memset(page_metas, 0, sizeof(PageMeta*) * NUM_PAGES_PER_SEGMENT); 16 | } 17 | }; 18 | 19 | public: 20 | PageManager(const Configuration &conf) 21 | : conf(conf), num_segs(conf.numSegments()) { 22 | segs = new Seg[num_segs]; 23 | } 24 | 25 | ~PageManager() { 26 | for (seg_id_t seg_id = 0; seg_id < num_segs; ++seg_id) { 27 | Seg& seg = segs[seg_id]; 28 | for (PageMeta* page_meta : seg.page_metas) { 29 | if(page_meta) { 30 | delete page_meta; 31 | } 32 | } 33 | } 34 | delete [] segs; 35 | // for (uint i = 0; i < num_pages; ++i) { 36 | // PageMeta *p = page_cache[i]; 37 | // if (p) 38 | // delete p; 39 | // } 40 | 41 | // delete[] page_cache; 42 | } 43 | 44 | PageMeta *optimistic_alloc_new_page(seg_id_t seg_id) { 45 | // This API does not add the data to multi version page, as it does not know the ts 46 | // The caller should do so 47 | auto &seg_info = local_seg_infos[seg_id]; 48 | 49 | PageMeta *new_page_meta = get_new_page_meta(); 50 | new_page_meta->gp_id.seg_id = seg_id; 51 | new_page_meta->gp_id.page_id = seg_info.next_page_id++; 52 | return new_page_meta; 53 | } 54 | 55 | inline PageMeta *get_page_from_cache(g_page_id_t g_page_id) { 56 | GlobalPageId gp_id(g_page_id); 57 | return get_page_from_cache(gp_id.seg_id, gp_id.page_id); 58 | } 59 | 60 | inline PageMeta *get_page_from_cache(seg_id_t seg_id, page_id_t page_id) { 61 | Seg& seg = segs[seg_id]; 62 | PageMeta* ret = seg.page_metas[page_id]; 63 | ASSERT(ret != nullptr) << std::hex << seg_id << " " << page_id; 64 | return ret; 65 | } 66 | 67 | inline PageMeta *try_get_page_from_cache(g_page_id_t g_page_id) { 68 | GlobalPageId gp_id(g_page_id); 69 | return try_get_page_from_cache(gp_id.seg_id, gp_id.page_id); 70 | } 71 | 72 | inline PageMeta *try_get_page_from_cache(seg_id_t seg_id, page_id_t page_id) { 73 | Seg& seg = segs[seg_id]; 74 | return seg.page_metas[page_id]; 75 | } 76 | 77 | PageMeta *init_page_cache(g_page_id_t g_page_id) { 78 | PageMeta *new_page_meta = get_new_page_meta(); 79 | new_page_meta->gp_id.g_page_id = g_page_id; 80 | new_page_meta->cur_page_size = 0; 81 | new_page_meta->set_cts(0); 82 | 83 | GlobalPageId gp_id(g_page_id); 84 | Seg& seg = segs[gp_id.seg_id]; 85 | seg.page_metas[gp_id.page_id] = new_page_meta; 86 | 87 | // page_cache[g_page_id] = new_page_meta; 88 | // LOG(2) << new_page_meta->gp_id.g_page_id << " " << new_page_meta; 89 | 90 | // try_update_max_page_id(new_page_meta->gp_id.seg_id, new_page_meta->gp_id.page_id); 91 | return new_page_meta; 92 | } 93 | 94 | PageMeta *set_page_cache(MultiLogPage::Reader page) { 95 | g_page_id_t g_page_id = page.getGlobalPageId(); 96 | PageMeta *page_meta = get_page_from_cache(g_page_id); 97 | ASSERT(page_meta->gp_id.g_page_id == g_page_id); 98 | 99 | auto array = page.getData(); 100 | auto byte_array = array.asBytes(); 101 | uint8_t *data = const_cast(byte_array.begin()); 102 | page_meta->cur_page_size = byte_array.size(); 103 | 104 | auto* meta = reinterpret_cast(data); 105 | auto* cache_meta = reinterpret_cast(page_meta->get_data()); 106 | ASSERT(meta->value_size == cache_meta->value_size); 107 | for (slot_t i = 0; i < meta->max_leaf_slots; ++i) { 108 | uint8_t* pair = meta->get_record_ptr(meta->get_offset(i)); 109 | uint8_t* cache_pair = cache_meta->get_record_ptr(cache_meta->get_offset(i)); 110 | r_copy(cache_pair, pair, meta->value_size); 111 | } 112 | 113 | // new_page_meta->mutli_version_data->put_data(page.getCts(), new_page_meta->get_data()); 114 | // try_update_max_page_id(page_meta->gp_id.seg_id, page_meta->gp_id.page_id); 115 | return page_meta; 116 | } 117 | 118 | void set_page_cache(PageMeta *page_meta) { 119 | GlobalPageId gp_id(page_meta->gp_id.g_page_id); 120 | Seg& seg = segs[gp_id.seg_id]; 121 | seg.page_metas[gp_id.page_id] = page_meta; 122 | 123 | // page_cache[page_meta->gp_id.g_page_id] = page_meta; 124 | 125 | // try_update_max_page_id(page_meta->gp_id.seg_id, page_meta->gp_id.page_id); 126 | } 127 | 128 | const Configuration &get_conf() const { return conf; } 129 | 130 | private: 131 | const Configuration &conf; 132 | 133 | private: 134 | inline PageMeta *get_new_page_meta() { 135 | PageMeta *new_page_meta = new PageMeta; 136 | new_page_meta->set_data(PageMeta::new_data()); 137 | memset(new_page_meta->get_data(), 0, PAGE_SIZE); 138 | 139 | return new_page_meta; 140 | }; 141 | 142 | // inline void try_update_max_page_id(seg_id_t seg_id, page_id_t page_id) { 143 | // return; 144 | // auto &seg_info = local_seg_infos[seg_id]; 145 | // if (seg_info.next_page_id <= page_id) { 146 | // seg_info.next_page_id = page_id + 1; 147 | // } 148 | // } 149 | 150 | struct LocalSegInfo { 151 | page_id_t next_page_id = 0; 152 | }; 153 | page_id_t cur_page_id = 1; // NOTE: page id starts from 1 154 | std::map local_seg_infos; 155 | 156 | Seg* segs; 157 | uint32_t num_segs; 158 | }; -------------------------------------------------------------------------------- /src/rpc/SNinterface.capnp: -------------------------------------------------------------------------------- 1 | @0xcf5c69b17b3dd15f; 2 | 3 | # ------ interface for storage node ------ 4 | 5 | struct Dependency { 6 | timestamp @0 : UInt64; 7 | numReads @1 : UInt32; 8 | } 9 | 10 | struct Page { 11 | globalPageId @0 :UInt64; 12 | redoLog @1 : Data; 13 | } 14 | 15 | struct PageReq { 16 | globalPageId @0 :UInt64; 17 | offsets @1 :List(UInt32); 18 | } 19 | 20 | struct MultiLogPage { 21 | globalPageId @0 :UInt64; 22 | cts @1 :UInt64; 23 | data @2 :Data; 24 | redoLogs @3 : List(Data); 25 | } 26 | 27 | struct Seg { 28 | segId @0 : UInt32; 29 | dep @1 : Dependency; 30 | 31 | readPages @2 :List(Page); 32 | writePages @3 :List(Page); 33 | } 34 | 35 | struct ReadSet { 36 | offset @0 :UInt32; 37 | writer @1 :UInt64; 38 | } 39 | 40 | struct TxnInput { 41 | txnId @0 :UInt64; 42 | input @1 :Data; 43 | txnType @2 :UInt32; 44 | } 45 | 46 | struct TxnReadSet { 47 | readSet @0 :List(ReadSet); 48 | txnI @1 :UInt32; 49 | } 50 | 51 | struct GetPageArgs { 52 | globalPageId @0 :UInt64; 53 | txnId @1 :UInt64; 54 | } 55 | 56 | struct GetPageResponse { 57 | ok @0 :Bool; 58 | page @1 : MultiLogPage; 59 | txnId @2 :UInt64; 60 | } 61 | 62 | struct GetPagesArgs { 63 | pageReqs @0 : List(PageReq); 64 | txnId @1 :UInt64; 65 | } 66 | 67 | struct GetPagesResponse { 68 | pages @0 : List(Page); 69 | txnId @1 :UInt64; 70 | } 71 | 72 | struct SetPageArgs { 73 | page @0 :Page; 74 | } 75 | 76 | struct SetPageResponse { 77 | ok @0 :Bool; 78 | } 79 | 80 | struct PrepareArgs { 81 | txnId @0 :UInt64; 82 | segments @1 :List(Seg); 83 | txns @2 :List(TxnReadSet); 84 | txnInputs @3 : List(TxnInput); 85 | timestamps @4 : List(Dependency); 86 | primarySnId @5 :UInt64; 87 | seq @6 :UInt64; 88 | fastPathEnabled @7 : Bool; 89 | } 90 | 91 | struct PrepareResponse { 92 | ok @0 :Bool; 93 | txnId @1 :UInt64; 94 | primarySnId @2 :UInt64; 95 | diff @3 :List(MultiLogPage); 96 | } 97 | 98 | struct CommitArgs { 99 | txnId @0 :UInt64; 100 | primarySnId @1 :UInt64; 101 | commit @2 :Bool; # false means abort 102 | } 103 | 104 | struct CommitResponse { 105 | ok @0 :Bool; 106 | isBroadCast @1 :Bool; 107 | txnId @2 :UInt64; 108 | primarySnId @3 :UInt64; 109 | diff @4 :List(MultiLogPage); 110 | repairedTxns @5 :List(UInt32); 111 | } 112 | 113 | # ------- interface for time server ------ 114 | struct GetTimestampArgs { 115 | rwSegIds @0 :List(UInt32); # the 31th bit indicates whether this is write segment 116 | txnId @1 :UInt64; 117 | } 118 | 119 | struct GetTimestampResponse { 120 | ok @0 :Bool; 121 | deps @1 :List(Dependency); 122 | txnId @2 :UInt64; 123 | seq @3 :UInt64; 124 | } 125 | 126 | # ------- interface for server synchronization ------ 127 | 128 | struct SyncArgs { 129 | status @0 :UInt32; 130 | count @1 :UInt64; 131 | } 132 | 133 | struct ReportArgs { 134 | runTimes @0 :UInt64; 135 | commitTimes @1 :UInt64; 136 | dbRepairTimes @2 :UInt64; 137 | snRepairTimes @3 :UInt64; 138 | batchCommitTimes @4 :UInt64; 139 | dbBatchRepairTimes @5 :UInt64; 140 | snBatchRepairTimes @6 :UInt64; 141 | batchAbortTimes @7 :UInt64; 142 | remoteAbortTimes @8 :UInt64; 143 | localAbortTimes @9 :UInt64; 144 | throughput @10 :Float64; 145 | } 146 | 147 | struct ClientArgs { 148 | clientId @0 :UInt64; 149 | txnType @1 :UInt32; 150 | data @2 :UInt64; 151 | } 152 | 153 | struct ClientResponse { 154 | clientIds @0 :List(UInt64); 155 | success @1 :Bool; 156 | data @2 :Data; 157 | } 158 | 159 | struct BatchPrepareArgs { 160 | prepareargs @0 :List(PrepareArgs); 161 | lbatchId @1 :UInt64; 162 | } 163 | 164 | struct BatchCommitArgs { 165 | commitargs @0 :List(CommitArgs); 166 | lbatchId @1 :UInt64; 167 | } 168 | 169 | struct BatchGetTimestampArgs { 170 | gettimestampargs @0 :List(GetTimestampArgs); 171 | lbatchId @1 :UInt64; 172 | } 173 | 174 | struct BatchGetTimestampResponse { 175 | gettimestampresponse @0 :List(GetTimestampResponse); 176 | lbatchId @1 :UInt64; 177 | } 178 | 179 | 180 | # ------ all messages in one struct ------ 181 | struct RpcMessage { 182 | isReply @0 :Bool; 183 | receiver @1 :UInt32; 184 | sender @2 :UInt32; 185 | epoch @3 :UInt64; # an unique id to identify a conversation 186 | 187 | data :union { 188 | gettimestampargs @4 :GetTimestampArgs; 189 | gettimestampresponse @5 :GetTimestampResponse; 190 | getpageargs @6 :GetPageArgs; 191 | getpageresponse @7 :GetPageResponse; 192 | setpageargs @8 :SetPageArgs; 193 | setpageresponse @9 :SetPageResponse; 194 | prepareargs @10 :PrepareArgs; 195 | prepareresponse @11 :PrepareResponse; 196 | commitargs @12 :CommitArgs; 197 | commitresponse @13 :CommitResponse; 198 | syncargs @14 :SyncArgs; 199 | reportargs @15 :ReportArgs; 200 | clientargs @16 :ClientArgs; 201 | clientresponse @17 :ClientResponse; 202 | batchprepareargs @18 :BatchPrepareArgs; 203 | batchcommitargs @19 :BatchCommitArgs; 204 | batchgettimestampargs @20 :BatchGetTimestampArgs; 205 | batchgettimestampresponse @21 :BatchGetTimestampResponse; 206 | getpagesargs @22 :GetPagesArgs; 207 | getpagesresponse @23 :GetPagesResponse; 208 | } 209 | # version @16 :Text; 210 | } 211 | -------------------------------------------------------------------------------- /src/util/zipf.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | /********************************************************************** 17 | * zipf distribution 18 | *********************************************************************/ 19 | 20 | int64_t FNV_OFFSET_BASIS_64 = 0xCBF29CE484222325; 21 | int64_t FNV_PRIME_64 = 1099511628211; 22 | 23 | class ZipfianGenerator { 24 | public: 25 | ZipfianGenerator(uint64_t min, uint64_t max) 26 | : items(max - min + 1), 27 | base(min), 28 | zipfianconstant(0.99), 29 | theta(0.99), 30 | gen(rd()), 31 | dis(0, 1) { 32 | zetan = zeta(0, max - min + 1, zipfianconstant, 0); 33 | zeta2theta = zeta(0, 2, theta, 0); 34 | alpha = 1.0 / (1.0 - theta); 35 | eta = (1 - pow(2.0 / items, 1 - theta)) / (1 - zeta2theta / zetan); 36 | countforzeta = items; 37 | nextValue(items); 38 | pthread_rwlock_init(&lock, NULL); 39 | } 40 | 41 | ZipfianGenerator(uint64_t min, uint64_t max, double thet) 42 | : items(max - min + 1), 43 | base(min), 44 | zipfianconstant(thet), 45 | theta(thet), 46 | gen(rd()), 47 | dis(0, 1) { 48 | zetan = zeta(0, max - min + 1, zipfianconstant, 0); 49 | zeta2theta = zeta(0, 2, theta, 0); 50 | alpha = 1.0 / (1.0 - theta); 51 | eta = (1 - pow(2.0 / items, 1 - theta)) / (1 - zeta2theta / zetan); 52 | countforzeta = items; 53 | nextValue(items); 54 | pthread_rwlock_init(&lock, NULL); 55 | } 56 | 57 | ZipfianGenerator(uint64_t min, uint64_t max, double thet, double zet) 58 | : items(max - min + 1), 59 | base(min), 60 | zipfianconstant(thet), 61 | theta(thet), 62 | zetan(zet), 63 | gen(rd()), 64 | dis(0, 1) { 65 | zeta2theta = zeta(0, 2, theta, 0); 66 | alpha = 1.0 / (1.0 - theta); 67 | eta = (1 - pow(2.0 / items, 1 - theta)) / (1 - zeta2theta / zetan); 68 | countforzeta = items; 69 | nextValue(items); 70 | pthread_rwlock_init(&lock, NULL); 71 | } 72 | 73 | ~ZipfianGenerator() { pthread_rwlock_destroy(&lock); } 74 | 75 | inline double zeta(uint64_t st, uint64_t n, double theta, double initialsum) { 76 | countforzeta = n; 77 | double sum = initialsum; 78 | for (size_t i = st; i < n; i++) { 79 | sum += 1 / (pow(i + 1, theta)); 80 | } 81 | return sum; 82 | } 83 | 84 | inline uint64_t nextValue(uint64_t item_cnt) { 85 | if (item_cnt != countforzeta) { 86 | // have to recompute zetan and eta, since they depend onitem_cnt 87 | pthread_rwlock_wrlock(&lock); 88 | if (item_cnt > countforzeta) { 89 | // we have added more items. can compute zetan incrementally, which is 90 | // cheaper 91 | zetan = zeta(countforzeta, item_cnt, theta, zetan); 92 | eta = (1 - pow(2.0 / items, 1 - theta)) / (1 - zeta2theta / zetan); 93 | } else { 94 | std::cout << "WARNING: Recomputing Zipfian distribtion. This is slow and " 95 | "should be avoided. " 96 | << "(item_cnt=" << item_cnt << " countforzeta=" << countforzeta 97 | << ")"; 98 | // zetan = zeta(0, item_cnt, theta, 0); 99 | // eta = (1 - pow(2.0 / items, 1 - theta)) / (1 - zeta2theta / zetan); 100 | } 101 | pthread_rwlock_unlock(&lock); 102 | } 103 | // from "Quickly Generating Billion-Record Synthetic Databases", Jim Gray 104 | // et al, SIGMOD 1994 105 | double u = dis(gen); 106 | double uz = u * zetan; 107 | if (uz < 1.0) 108 | return base; 109 | if (uz < 1.0 + pow(0.5, theta)) 110 | return base + 1; 111 | uint64_t ret = base + (uint64_t)(item_cnt * pow(eta * u - eta + 1, alpha)); 112 | return ret; 113 | } 114 | inline uint64_t nextValue() { return nextValue(items); } 115 | 116 | private: 117 | // Number of items. 118 | uint64_t items; 119 | // Min item to generate. 120 | uint64_t base; 121 | uint64_t countforzeta; 122 | // The zipfian constant to use. 123 | double zipfianconstant; 124 | // Computed parameters for generating the distribution. 125 | double theta, zetan, zeta2theta, alpha, eta; 126 | pthread_rwlock_t lock; 127 | std::random_device rd; 128 | std::mt19937 gen; 129 | std::uniform_real_distribution<> dis; 130 | }; 131 | 132 | /********************************************************************** 133 | * Scrambled Zipfian Generator 134 | *********************************************************************/ 135 | class ScrambledZipfianGenerator { 136 | public: 137 | ScrambledZipfianGenerator(uint64_t mi, uint64_t ma, double zipfian_constant = 0.99) 138 | : min(mi), max(ma), itemcount(ma - mi + 1), gen(0, itemcount, zipfian_constant, ZETAN) {} 139 | 140 | uint64_t next() { 141 | uint64_t ret = gen.nextValue() % itemcount; 142 | // LOG(2) << ret; 143 | return ret; 144 | } 145 | 146 | uint64_t next_hashed() { 147 | uint64_t ret = gen.nextValue(); 148 | ret = min + fnvhash64(ret) % itemcount; 149 | // LOG(2) << ret; 150 | return ret; 151 | } 152 | 153 | uint64_t fnvhash64(uint64_t val) { 154 | // from http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash 155 | uint64_t hashval = FNV_OFFSET_BASIS_64; 156 | for (size_t i = 0; i < 8; i++) { 157 | uint64_t octet = val & 0x00ff; 158 | val = val >> 8; 159 | hashval = hashval ^ octet; 160 | hashval = hashval * FNV_PRIME_64; 161 | } 162 | return hashval; 163 | } 164 | 165 | private: 166 | constexpr static double ZETAN = 26.46902820178302; 167 | constexpr static uint64_t ITEM_COUNT = 1000; 168 | 169 | uint64_t min, max, itemcount; 170 | ZipfianGenerator gen; 171 | }; 172 | -------------------------------------------------------------------------------- /dataflow_api/src/api/txn.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "graph/node.hpp" 9 | #include "schema.hpp" 10 | #include "type/input.hpp" 11 | #include "type/value.hpp" 12 | #include "type/row.hpp" 13 | #include "util/logging.h" 14 | 15 | class Txn; 16 | class Input; 17 | using IterationLogic = std::function; 18 | 19 | enum ScopeType { LOOP = 0, IF_BRANCH, ELSE_BRANCH }; 20 | 21 | static const char *ScopeStrings[] = {"Loop", "IfBranch", "ElseBranch"}; 22 | class Txn { 23 | public: 24 | Txn(DatabaseSchema &db_schema) : db_schema(db_schema) { scopeTrace.push_back(scope_id); } 25 | 26 | Row get(const TableName_t &table_name, std::initializer_list &&pkey_il) { 27 | TableSchema &tbl_schema = db_schema.getTable(table_name); 28 | PKey_t pkey = std::move(pkey_il); 29 | 30 | uint32_t part_id = 0; 31 | if (tbl_schema.isPartitionable()) { 32 | Value &value = pkey[tbl_schema.getPartPkeyIndex()]; 33 | if (value.isStatic()) { 34 | part_id = value.getId(); 35 | } 36 | } 37 | if (tbl_schema.isReadOnly()) { 38 | part_id = partition_affinity; 39 | } 40 | 41 | GetNode *get_node = 42 | new GetNode(tbl_schema, scope_id, part_id, partition_affinity, isStaticKey(pkey)); 43 | for (Value &value : pkey) { 44 | get_node->addDeps(value.getDeps(), Node::KeyDep); 45 | } 46 | for (Value &value : depValues) { 47 | get_node->addDeps(value.getDeps(), Node::CtrlDep); 48 | } 49 | addNode(get_node); 50 | return Row(get_node, tbl_schema); 51 | } 52 | 53 | void put(const TableName_t &table_name, std::initializer_list &&pkey_il, Row &row) { 54 | TableSchema &tbl_schema = db_schema.getTable(table_name); 55 | PKey_t pkey = std::move(pkey_il); 56 | 57 | uint32_t part_id = 0; 58 | if (tbl_schema.isPartitionable()) { 59 | Value &value = pkey[tbl_schema.getPartPkeyIndex()]; 60 | if (value.isStatic()) { 61 | part_id = value.getId(); 62 | } 63 | } 64 | tbl_schema.access(part_id == partition_affinity); 65 | 66 | PutNode *put_node = 67 | new PutNode(tbl_schema, scope_id, part_id, partition_affinity, isStaticKey(pkey)); 68 | for (Value &value : pkey) { 69 | put_node->addDeps(value.getDeps(), Node::KeyDep); 70 | } 71 | for (Value &value : depValues) { 72 | put_node->addDeps(value.getDeps(), Node::CtrlDep); 73 | } 74 | 75 | row.assignDepsTo(put_node); 76 | addNode(put_node); 77 | } 78 | 79 | Values map(IterationLogic iter_logic, Input &input, Value &loop_count) { 80 | pushScope(ScopeType::LOOP, loop_count); 81 | Values res = iter_logic(*this, input, loop_count); 82 | popScope(); 83 | 84 | return res; 85 | } 86 | 87 | 88 | template 89 | Values map(Logic iter_logic, Input &input, Value &loop_count) { 90 | pushScope(ScopeType::LOOP, loop_count); 91 | Values res = iter_logic(*this, input, loop_count); 92 | popScope(); 93 | 94 | return res; 95 | } 96 | 97 | void beginIf(Value &value) { pushScope(ScopeType::IF_BRANCH, value); } 98 | 99 | void endIf() { popScope(); } 100 | 101 | void setPartitionAffinity(Value &value) { 102 | ASSERT(value.isStatic()); 103 | partition_affinity = value.getId(); 104 | } 105 | 106 | Row alloc(const TableName_t &table_name) { 107 | TableSchema &tbl_schema = db_schema.getTable(table_name); 108 | return Row(nullptr, tbl_schema); 109 | } 110 | 111 | void commit() {} 112 | 113 | void abort() {} 114 | 115 | Input &getInput() { return input; } 116 | 117 | 118 | void print_graph(const std::string &fileName) { 119 | std::ofstream out(fileName.c_str()); 120 | std::set usedScopes; 121 | 122 | out << "digraph G {" << std::endl; 123 | out << "label=\"" << fileName << "\"" << std::endl; 124 | print_subgraph(out, 0); 125 | for (Node *node : nodes) { 126 | if (node->getRefCount() == 0) { 127 | node->printDebugInfo(out); 128 | } 129 | } 130 | out << "}" << std::endl; 131 | } 132 | 133 | private: 134 | bool isStaticKey(const PKey_t &pkey) { 135 | bool is_static = true; 136 | for (const Value &value : pkey) { 137 | if (!value.isStatic()) { 138 | is_static = false; 139 | } 140 | } 141 | return is_static; 142 | } 143 | 144 | void pushScope(ScopeType scopeType, Value &value) { 145 | uint32_t temp_loop_id = ++scope_id; 146 | uint32_t cur_scope_id = scopeTrace.back(); 147 | scopeCallGraph[cur_scope_id].push_back(temp_loop_id); 148 | scopeTrace.push_back(temp_loop_id); 149 | scopeToValues[temp_loop_id] = scopeType; 150 | depValues.push_back(value); 151 | } 152 | 153 | void popScope() { 154 | scopeTrace.pop_back(); 155 | depValues.pop_back(); 156 | } 157 | 158 | void print_subgraph(std::ostream &out, uint32_t scope) { 159 | if (scope != 0) { 160 | out << "subgraph cluster_" << scope << "{" << std::endl; 161 | out << "color=black" << std::endl; 162 | out << "label = \"" << ScopeStrings[scopeToValues[scope]] << "#" << scope << "\"" 163 | << std::endl; 164 | } 165 | for (Node *node : scopeToNodes[scope]) { 166 | node->printNode(out); 167 | } 168 | for (uint32_t sub_scope : scopeCallGraph[scope]) { 169 | print_subgraph(out, sub_scope); 170 | } 171 | if (scope != 0) { 172 | out << "}" << std::endl; 173 | } 174 | } 175 | 176 | inline void addNode(Node *node) { 177 | nodes.push_back(node); 178 | scopeToNodes[scopeTrace.back()].push_back(node); 179 | } 180 | 181 | private: 182 | DatabaseSchema &db_schema; 183 | Input input; 184 | uint32_t part_key_id = 0; 185 | std::vector nodes; 186 | std::map> scopeToNodes; 187 | std::map scopeToValues; 188 | std::map> scopeCallGraph; 189 | std::deque scopeTrace; 190 | 191 | uint32_t scope_id = 0; 192 | std::deque depValues; 193 | 194 | uint32_t partition_affinity; 195 | }; -------------------------------------------------------------------------------- /src/util/txn_lat.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef TXN_LAT_STAT 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "servers/config.h" 14 | #include "util/types.h" 15 | 16 | 17 | // This file is copied from timer.h, we use these file to stat transactions' latency only 18 | #define TXN_LAT_STAT_FREQ 10 19 | static double CPU_CYCLES_PER_NS = -1.0; 20 | static thread_local thread_id_t txn_latency_worker_id; 21 | static thread_local uint64_t worker_records_count; 22 | #ifdef DETAILED_TXN_LAT 23 | static std::vector>> txn_latency_records; 24 | #else 25 | static std::vector> txn_latency_records; 26 | #endif 27 | // Modify _RdtscTimer a little and then get this class 28 | class TxnLatencyTimer { 29 | public: 30 | TxnLatencyTimer() { 31 | if (unlikely(CPU_CYCLES_PER_NS < 0)) { 32 | std::chrono::high_resolution_clock clock; 33 | 34 | auto start = clock.now(); 35 | uint64_t cycles_start = read_tsc(); 36 | std::this_thread::sleep_for(std::chrono::microseconds(100000)); 37 | uint64_t cycles_duration = read_tsc() - cycles_start; 38 | double duration = std::chrono::duration(clock.now() - start).count(); 39 | 40 | CPU_CYCLES_PER_NS = cycles_duration / duration; 41 | // LOG(4) << "CPU_CYCLES_PER_NS is set to: " << CPU_CYCLES_PER_NS; 42 | } 43 | } 44 | 45 | void start() { passed_cycles = read_tsc(); } 46 | 47 | uint64_t passed_sec() { 48 | return (uint64_t)((read_tsc() - passed_cycles) / CPU_CYCLES_PER_NS / 1000000000); 49 | } 50 | 51 | uint64_t passed_msec() { 52 | return (uint64_t)((read_tsc() - passed_cycles) / CPU_CYCLES_PER_NS / 1000000); 53 | } 54 | 55 | uint64_t passed_usec() { 56 | return (uint64_t)((read_tsc() - passed_cycles) / CPU_CYCLES_PER_NS / 1000); 57 | } 58 | 59 | double double_passed_sec() { return passed_usec() / 1000000.; } 60 | 61 | uint64_t passed_nsec() { return (uint64_t)((read_tsc() - passed_cycles) / CPU_CYCLES_PER_NS); } 62 | 63 | #ifdef DETAILED_TXN_LAT 64 | void end(int txn_type) { 65 | ASSERT(txn_type != -1); 66 | if(worker_records_count % TXN_LAT_STAT_FREQ == 0){ 67 | txn_latency_records[txn_latency_worker_id][txn_type].emplace_back(passed_nsec()); 68 | } 69 | worker_records_count++; 70 | } 71 | #else 72 | void end(int txn_type) { 73 | if(worker_records_count % TXN_LAT_STAT_FREQ == 0){ 74 | txn_latency_records[txn_latency_worker_id].emplace_back(passed_nsec()); 75 | } 76 | worker_records_count++; 77 | } 78 | #endif 79 | void summary(uint64_t num_op) { 80 | uint64_t nsec = passed_nsec(); 81 | uint64_t msec = nsec / 1000000; 82 | LOG(0) << "passed_msec: " << msec << "ms. Per op: " << nsec / (double)num_op << "ns."; 83 | } 84 | 85 | static inline unsigned long read_tsc(void) { 86 | unsigned a, d; 87 | __asm __volatile("rdtsc" : "=a"(a), "=d"(d)); 88 | return ((unsigned long)a) | (((unsigned long)d) << 32); 89 | } 90 | 91 | static const uint64_t NSEC_PER_SEC = 1000000000; 92 | static const uint64_t USEC_PER_SEC = 1000000; 93 | static const uint64_t MSEC_PER_SEC = 1000; 94 | 95 | private: 96 | uint64_t passed_cycles; 97 | }; 98 | 99 | class TxnLatencyReporter { 100 | public: 101 | static void worker_register(thread_id_t _worker_id) { txn_latency_worker_id = _worker_id; } 102 | #ifdef DETAILED_TXN_LAT 103 | static void init(thread_id_t num_threads) { 104 | txn_latency_records.resize(num_threads, std::vector>()); 105 | for(uint i=0; i()); 107 | } 108 | } 109 | 110 | static void report() { 111 | std::vector ave; 112 | std::vector p50; 113 | std::vector p90; 114 | std::vector p99; 115 | for(int txn_type=0; txn_type<5; txn_type++){ 116 | // Combine all workers' latencies 117 | for (uint32_t i = 1; i < txn_latency_records.size(); ++i) { 118 | txn_latency_records[0][txn_type].insert(txn_latency_records[0][txn_type].end(), 119 | txn_latency_records[i][txn_type].begin(), txn_latency_records[i][txn_type].end()); 120 | } 121 | auto &lat = txn_latency_records[0][txn_type]; 122 | std::sort(lat.begin(), lat.end()); 123 | uint64_t sum = 0; 124 | for (uint64_t i = 0; i < lat.size(); ++i) { 125 | sum += lat[i]; 126 | } 127 | ave.emplace_back(sum / lat.size()); 128 | p50.emplace_back(lat[lat.size() * 0.5]); 129 | p90.emplace_back(lat[lat.size() * 0.9]); 130 | p99.emplace_back(lat[lat.size() * 0.99]); 131 | } 132 | LOG(4) << "Average Transaction Latency:" << ave[0] << "-" << ave[1] << "-" << ave[2] << "-" << ave[3] << "-" << ave[4]; 133 | LOG(4) << "P50 Transaction Latency:" << p50[0] << "-" << p50[1] << "-" << p50[2] << "-" << p50[3] << "-" << p50[4]; 134 | LOG(4) << "P90 Transaction Latency:" << p90[0] << "-" << p90[1] << "-" << p90[2] << "-" << p90[3] << "-" << p90[4]; 135 | LOG(4) << "P99 Transaction Latency:" << p99[0] << "-" << p99[1] << "-" << p99[2] << "-" << p99[3] << "-" << p99[4]; 136 | } 137 | #else 138 | static void init(thread_id_t num_threads) { 139 | txn_latency_records.resize(num_threads, std::vector()); 140 | } 141 | 142 | static void report() { 143 | // Combine all workers' latencies 144 | for (uint32_t i = 1; i < txn_latency_records.size(); ++i) { 145 | txn_latency_records[0].insert(txn_latency_records[0].end(), 146 | txn_latency_records[i].begin(), txn_latency_records[i].end()); 147 | } 148 | auto &lat = txn_latency_records[0]; 149 | std::sort(lat.begin(), lat.end()); 150 | uint64_t sum = 0; 151 | for (uint64_t i = 0; i < lat.size(); ++i) { 152 | sum += lat[i]; 153 | } 154 | LOG(4) << "Average Transaction Latency:" << sum / lat.size(); 155 | LOG(4) << "P50 Transaction Latency:" << lat[lat.size() * 0.5]; 156 | LOG(4) << "P90 Transaction Latency:" << lat[lat.size() * 0.9]; 157 | LOG(4) << "P99 Transaction Latency:" << lat[lat.size() * 0.99]; 158 | } 159 | #endif 160 | }; 161 | 162 | #else 163 | 164 | class TxnLatencyReporter { 165 | public: 166 | static void worker_register(thread_id_t _worker_id) { } 167 | static void init(thread_id_t num_threads) { } 168 | static void report() { } 169 | }; 170 | 171 | #endif --------------------------------------------------------------------------------