├── .clang-format ├── .gitignore ├── LICENSE ├── README.md ├── experiments ├── .keep ├── config.py └── p4db_exp.py ├── figures ├── data │ ├── exp_a.csv │ ├── exp_b.csv │ ├── exp_c.csv │ ├── exp_cycles.csv │ ├── exp_cycles_pp.csv │ ├── exp_d.csv │ ├── exp_e.csv │ ├── exp_f.csv │ ├── exp_g.csv │ ├── exp_h.csv │ ├── exp_i.csv │ ├── exp_j.csv │ ├── exp_k.csv │ ├── exp_l.csv │ ├── exp_partcomp_cycles.csv │ ├── exp_partcomp_cycles_pp.csv │ ├── exp_ycsb_optis.csv │ ├── exp_ycsb_optis_fast.csv │ ├── exp_ycsb_optis_slow.csv │ ├── exp_ycsb_overhot.csv │ └── exp_ycsb_skew.csv ├── front_plot.R ├── out │ ├── exp_a.pdf │ ├── exp_a_A3.pdf │ ├── exp_b.pdf │ ├── exp_b_A2.pdf │ ├── exp_c.pdf │ ├── exp_c_A2.pdf │ ├── exp_f.pdf │ ├── exp_f_A2.pdf │ ├── exp_g.pdf │ ├── exp_g_A2.pdf │ ├── exp_j.pdf │ ├── exp_j_A2.pdf │ ├── exp_k.pdf │ ├── exp_k_A2.pdf │ ├── exp_m.pdf │ ├── exp_skew.pdf │ ├── exp_skew_A2.pdf │ ├── frontpage.pdf │ ├── frontpage_A2.pdf │ ├── partitioning_comparison.pdf │ ├── partitioning_comparison_lat.pdf │ └── ycsb_bighotset.pdf ├── overhot.R ├── partitioning_comparison.R ├── skew.R ├── smallbank_exp.R ├── switch_optis.R ├── tpcc_exp.R ├── utils.R └── ycsb_exp.R ├── meson.build ├── src ├── benchmarks │ ├── benchmarks.hpp │ ├── meson.build │ ├── micro_recirc │ │ ├── args.hpp │ │ ├── meson.build │ │ ├── micro_recirc.cpp │ │ ├── micro_recirc.hpp │ │ ├── random.hpp │ │ ├── switch.hpp │ │ ├── table.hpp │ │ ├── transaction.hpp │ │ └── txn │ │ │ ├── arg.cpp │ │ │ └── switch.cpp │ ├── smallbank │ │ ├── args.hpp │ │ ├── meson.build │ │ ├── random.hpp │ │ ├── smallbank.cpp │ │ ├── smallbank.hpp │ │ ├── switch.hpp │ │ ├── table.hpp │ │ ├── transaction.hpp │ │ └── txn │ │ │ ├── amalgamate.cpp │ │ │ ├── balance.cpp │ │ │ ├── deposit_checking.cpp │ │ │ ├── send_payment.cpp │ │ │ ├── switch.cpp │ │ │ ├── transact_saving.cpp │ │ │ └── write_check.cpp │ ├── tpcc │ │ ├── args.hpp │ │ ├── meson.build │ │ ├── random.hpp │ │ ├── switch.hpp │ │ ├── table.hpp │ │ ├── tpcc.cpp │ │ ├── tpcc.hpp │ │ ├── transaction.hpp │ │ ├── txn │ │ │ ├── new_order.cpp │ │ │ ├── payment.cpp │ │ │ └── switch.cpp │ │ └── utils.hpp │ └── ycsb │ │ ├── args.hpp │ │ ├── meson.build │ │ ├── random.hpp │ │ ├── switch.hpp │ │ ├── table.hpp │ │ ├── transaction.hpp │ │ ├── txn │ │ ├── multi.cpp │ │ ├── read.cpp │ │ ├── switch.cpp │ │ └── write.cpp │ │ ├── ycsb.cpp │ │ └── ycsb.hpp ├── comm │ ├── bigendian.hpp │ ├── comm.hpp │ ├── dpdk.cpp │ ├── dpdk.hpp │ ├── eth_hdr.hpp │ ├── handlers │ │ ├── barrier.cpp │ │ ├── barrier.hpp │ │ ├── init.cpp │ │ ├── init.hpp │ │ ├── meson.build │ │ ├── tuple_put_res.cpp │ │ └── tuple_put_res.hpp │ ├── meson.build │ ├── msg.hpp │ ├── msg_handler.cpp │ ├── msg_handler.hpp │ ├── server.hpp │ ├── udp.cpp │ └── udp.hpp ├── datastructures │ ├── array_hashmap.hpp │ ├── linked_list.hpp │ ├── meson.build │ └── stupid_hashmap.hpp ├── db │ ├── buffers.hpp │ ├── config.cpp │ ├── config.hpp │ ├── database.hpp │ ├── defs.hpp │ ├── errors.hpp │ ├── future.hpp │ ├── hex_dump.cpp │ ├── hex_dump.hpp │ ├── mempools.hpp │ ├── meson.build │ ├── spinlock.hpp │ ├── transaction.hpp │ ├── ts_factory.hpp │ ├── types.hpp │ ├── undolog.cpp │ ├── undolog.hpp │ ├── undolog_novirtual.hpp │ ├── util.cpp │ └── util.hpp ├── declustered_layout │ ├── .gitignore │ ├── Makefile │ ├── declustered_layout.cpp │ ├── declustered_layout.hpp │ ├── dotwriter.cpp │ ├── dotwriter.hpp │ ├── graph.cpp │ ├── graph.hpp │ ├── graph_maxcut.cpp │ ├── graph_maxcut.hpp │ ├── graph_toposort.cpp │ ├── graph_toposort.hpp │ ├── meson.build │ ├── mqlib.patch │ ├── partitioning.cpp │ ├── partitioning.hpp │ ├── setup.sh │ ├── switch_simulator.cpp │ ├── switch_simulator.hpp │ ├── test.cpp │ ├── transaction.cpp │ ├── transaction.hpp │ └── tuple_location.hpp ├── dpdk_lib │ ├── cpuset.hpp │ ├── device.cpp │ ├── device.hpp │ ├── dpdk.cpp │ ├── dpdk.hpp │ ├── enums.hpp │ ├── mbufrawpacket.cpp │ ├── mbufrawpacket.hpp │ ├── meson.build │ └── worker_thread.hpp ├── main.cpp ├── meson.build ├── stats │ ├── collector.cpp │ ├── collector.hpp │ ├── context.cpp │ ├── context.hpp │ ├── counter.cpp │ ├── counter.hpp │ ├── cycles.cpp │ ├── cycles.hpp │ ├── meson.build │ ├── moving_avg.hpp │ ├── periodic.cpp │ ├── periodic.hpp │ ├── scheduler.hpp │ └── stats.hpp ├── table │ ├── concurrency_control │ │ ├── meson.build │ │ ├── no_wait.hpp │ │ ├── none.hpp │ │ ├── row.hpp │ │ └── wait_die.hpp │ ├── meson.build │ ├── partition.hpp │ ├── table.cpp │ └── table.hpp └── utils │ ├── dist.hpp │ ├── meson.build │ └── zipf.hpp ├── subprojects ├── cxxopts.wrap ├── dpdk.wrap └── fmt.wrap ├── switch_src ├── 01_control_plane │ ├── Makefile │ └── p4db.cpp ├── 02_codegen │ ├── __init__.py │ ├── blocks.py │ ├── indent.py │ ├── snippet.py │ └── util.py ├── 03_tests │ ├── .gitignore │ ├── Makefile │ ├── lock_manager.cpp │ ├── micro_recirc.cpp │ ├── network_interface.hpp │ ├── smallbank.cpp │ ├── tpcc.cpp │ └── ycsb.cpp ├── lock_manager │ ├── codegen │ ├── codegen.py │ └── p4db_lock_manager.p4 ├── micro_recirc │ └── p4db_micro_recirc.p4 ├── smallbank │ ├── codegen.py │ ├── codegen │ │ ├── __init__.py │ │ ├── blocks.py │ │ ├── indent.py │ │ ├── snippet.py │ │ └── util.py │ └── p4db_smallbank.p4 ├── tpcc │ ├── codegen │ ├── codegen.py │ └── p4db_tpcc.p4 ├── ycsb │ ├── codegen │ ├── codegen.py │ └── p4db_ycsb.p4 └── ycsb_slow │ ├── codegen │ ├── codegen.py │ └── p4db_ycsb_slow.p4 └── tests ├── .gitignore ├── Makefile ├── access_dist.cpp ├── nolock_test.cpp ├── nurand.cpp ├── pcap_timediff.py ├── pcap_timing.py ├── remote_prob.py ├── shared_lock.cpp ├── switch_lock.py ├── tpcc_neworder.cpp ├── wireshark_dissector.lua └── zipf.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: LLVM 3 | AccessModifierOffset: '-4' 4 | AllowShortIfStatementsOnASingleLine: Never 5 | BreakBeforeBraces: Attach 6 | ColumnLimit: '0' 7 | FixNamespaceComments: 'true' 8 | IncludeBlocks: Regroup 9 | IndentCaseLabels: 'true' 10 | IndentWidth: '4' 11 | MaxEmptyLinesToKeep: '2' 12 | PointerAlignment: Left 13 | TabWidth: '4' 14 | UseTab: Never 15 | 16 | ... 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # P4DB - The Case for In-Network OLTP 2 | 3 | This is the source code for our (Matthias Jasny, Lasse Thostrup, Tobias Ziegler and Carsten Binnig) published paper at SIGMOD’22: P4DB - The Case for In-Network OLTP. Paper can be found here: [Paper Link](https://www.informatik.tu-darmstadt.de/media/datamanagement/pdf_publications/P4DB_preprint.pdf) 4 | 5 | ## Abstract 6 | 7 | In this paper we present a new approach for distributed DBMSs called P4DB, that uses a programmable switch to accelerate OLTP workloads. The main idea of P4DB is that it implements a transaction processing engine on top of a P4-programmable switch. The switch can thus act as an accelerator in the network, especially when it is used to store and process hot (contended) tuples on the switch. In our experiments, we show that P4DB hence provides significant benefits compared to traditional DBMS architectures and can achieve a speedup of up to 8x. 8 | 9 | ## Citation 10 | 11 | ``` 12 | @inproceedings{mjasny22, 13 | author = {Matthias Jasny and Lasse Thostrup and Tobias Ziegler and Carsten Binnig}, 14 | title = {P4DB - The Case for In-Network OLTP}, 15 | booktitle = {SIGMOD}, 16 | year = {2022} 17 | } 18 | 19 | ``` 20 | 21 | ## Directory structure 22 | 23 | - `./src` contains P4DB code for database nodes: 24 | - one instance which spawns a server + N workers 25 | - requires DPDK to access NIC 26 | 27 | - `./switch_src` contains P4DB code for the switch: 28 | - control plane in C++ 29 | - P4 code-generator 30 | - P4DB Firmwares for YCSB, SmallBank, TPC-C (+ Microbenchmarks) 31 | 32 | - `./figures` contains R scripts to generate the paper figures 33 | - `data/` contains CSV files with measurements 34 | - `out/` contains rendered PDFs 35 | 36 | - `./experiments` contains scripts for evaluation 37 | - Use [distexprunner](https://github.com/mjasny/distexprunner/) 38 | 39 | 40 | 41 | ### Build & Run 42 | 43 | This is the database node server: 44 | 45 | ``` 46 | meson build 47 | ninja -C build 48 | ./build/p4db --help 49 | ``` 50 | -------------------------------------------------------------------------------- /experiments/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/experiments/.keep -------------------------------------------------------------------------------- /experiments/config.py: -------------------------------------------------------------------------------- 1 | from distexprunner import ServerList, Server 2 | 3 | 4 | SERVER_PORT = 3000 5 | 6 | server_list = ServerList( 7 | Server('node01', '', SERVER_PORT, src_mac=''), 8 | Server('node02', '', SERVER_PORT, src_mac=''), 9 | Server('node03', '', SERVER_PORT, src_mac=''), 10 | Server('node04', '', SERVER_PORT, src_mac=''), 11 | Server('node05', '', SERVER_PORT, src_mac=''), 12 | Server('node06', '', SERVER_PORT, src_mac=''), 13 | Server('node07', '', SERVER_PORT, src_mac=''), 14 | Server('node08', '', SERVER_PORT, src_mac=''), 15 | 16 | Server('switch', '', SERVER_PORT), 17 | working_directory='/home/mjasny' 18 | ) 19 | -------------------------------------------------------------------------------- /figures/front_plot.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(ggsci) 4 | library(tidyr) 5 | 6 | csv_path <- "./data" 7 | source("utils.R") 8 | 9 | df <- rbind( 10 | p4db_read(csv_file="exp_a.csv") %>% 11 | filter(ycsb_write_prob==50 & cc_scheme=="no_wait") %>% 12 | mutate(facet_label="YCSB", facet_id=1) 13 | , 14 | p4db_read(csv_file="exp_e.csv") %>% 15 | filter(smallbank_hot_size==5 & cc_scheme=="no_wait") %>% 16 | mutate(facet_label="SmallBank", facet_id=2) 17 | , 18 | p4db_read(csv_file="exp_i.csv") %>% 19 | filter(tpcc_num_warehouses==8 & cc_scheme=="no_wait") %>% 20 | mutate(facet_label="TPC-C", facet_id=3) 21 | ) %>% 22 | mutate( 23 | color_label=case_when( 24 | (use_switch=="false") ~ "No-Switch", 25 | (use_switch=="true") ~ "P4DB", 26 | TRUE ~ "Unknown" 27 | ), 28 | color_id=case_when( 29 | (use_switch=="false") ~ 0, 30 | (use_switch=="true") ~ 1, 31 | TRUE ~ NaN 32 | ) 33 | ) 34 | 35 | 36 | p <- df %>% 37 | filter(metric == 'total_commits') %>% 38 | ggplot() + 39 | theme( 40 | axis.title.x=element_blank(), 41 | axis.text.x=element_blank(), 42 | axis.ticks.x=element_blank(), 43 | axis.title.y=element_text(hjust=0), 44 | legend.position="top", 45 | legend.margin=margin(0, b=-3, unit='mm'), 46 | legend.text=element_text(size=10), 47 | legend.key.size=unit(0.7, "line"), 48 | legend.spacing.y=unit(0, 'mm') 49 | ) + 50 | geom_col(aes( 51 | x=use_switch, 52 | y=throughput, 53 | fill=reorder(color_label, color_id), 54 | ), show.legend=T, position=position_dodge()) + 55 | scale_x_discrete("") + 56 | scale_y_continuous(name="Throughput [txn/sec]", labels=addUnits) + 57 | scale_fill_jco(name="") + 58 | facet_wrap(~reorder(facet_label, facet_id), scales="free_y") 59 | print(p) 60 | ggsave(file="out/frontpage.pdf", plot=p, device=cairo_pdf, width=210*0.5, height=297*0.16, units="mm") 61 | 62 | 63 | 64 | 65 | p <- df %>% 66 | filter(metric == 'total_commits') %>% 67 | ungroup() %>% 68 | group_by(cc_scheme, facet_label) %>% # first x axis then facet variable 69 | arrange(cc_scheme, use_switch, lm_on_switch, .by_group=T) %>% 70 | mutate(speedup=throughput/throughput[1]) %>% 71 | filter(use_switch == 'true' | lm_on_switch=="true") %>% 72 | ggplot() + 73 | theme( 74 | legend.position="top", 75 | legend.margin=margin(0, unit='cm') 76 | ) + 77 | geom_col(aes( 78 | x=reorder(facet_label, facet_id), 79 | y=speedup, 80 | fill=reorder(facet_label, facet_id) 81 | ), show.legend=F, position=position_dodge()) + 82 | geom_hline(yintercept=1, linetype="longdash", color = "red", alpha=0.5) + 83 | scale_x_discrete("") + 84 | scale_y_continuous(name="Speedup", labels=function(x){ 85 | sprintf("%gX", x) 86 | }) + 87 | scale_fill_manual(name="", values=pal_jco()(10)[4:10]) 88 | print(p) 89 | ggsave(file="out/frontpage_A2.pdf", plot=p, device=cairo_pdf, width=210*0.3, height=297*0.19, units="mm") -------------------------------------------------------------------------------- /figures/out/exp_a.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_a.pdf -------------------------------------------------------------------------------- /figures/out/exp_a_A3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_a_A3.pdf -------------------------------------------------------------------------------- /figures/out/exp_b.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_b.pdf -------------------------------------------------------------------------------- /figures/out/exp_b_A2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_b_A2.pdf -------------------------------------------------------------------------------- /figures/out/exp_c.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_c.pdf -------------------------------------------------------------------------------- /figures/out/exp_c_A2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_c_A2.pdf -------------------------------------------------------------------------------- /figures/out/exp_f.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_f.pdf -------------------------------------------------------------------------------- /figures/out/exp_f_A2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_f_A2.pdf -------------------------------------------------------------------------------- /figures/out/exp_g.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_g.pdf -------------------------------------------------------------------------------- /figures/out/exp_g_A2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_g_A2.pdf -------------------------------------------------------------------------------- /figures/out/exp_j.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_j.pdf -------------------------------------------------------------------------------- /figures/out/exp_j_A2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_j_A2.pdf -------------------------------------------------------------------------------- /figures/out/exp_k.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_k.pdf -------------------------------------------------------------------------------- /figures/out/exp_k_A2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_k_A2.pdf -------------------------------------------------------------------------------- /figures/out/exp_m.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_m.pdf -------------------------------------------------------------------------------- /figures/out/exp_skew.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_skew.pdf -------------------------------------------------------------------------------- /figures/out/exp_skew_A2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/exp_skew_A2.pdf -------------------------------------------------------------------------------- /figures/out/frontpage.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/frontpage.pdf -------------------------------------------------------------------------------- /figures/out/frontpage_A2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/frontpage_A2.pdf -------------------------------------------------------------------------------- /figures/out/partitioning_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/partitioning_comparison.pdf -------------------------------------------------------------------------------- /figures/out/partitioning_comparison_lat.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/partitioning_comparison_lat.pdf -------------------------------------------------------------------------------- /figures/out/ycsb_bighotset.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/figures/out/ycsb_bighotset.pdf -------------------------------------------------------------------------------- /figures/skew.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(ggsci) 4 | library(tidyr) 5 | 6 | csv_path <- "./data" 7 | source("utils.R") 8 | 9 | 10 | 11 | 12 | df <- p4db_read(csv_file="exp_ycsb_skew.csv") %>% 13 | mutate(facet_label="YCSB", facet_id=1, hot_prob=ycsb_hot_prob) 14 | 15 | 16 | p <- df %>% 17 | filter(metric == 'total_commits') %>% 18 | ggplot(aes( 19 | x=hot_prob, 20 | y=throughput, 21 | color=type_label, 22 | shape=type_label 23 | )) + 24 | theme( 25 | legend.position="top", 26 | legend.margin=margin(0, r=-3, t=-2, b=-3, unit='mm'), 27 | axis.title.y=element_text(margin=margin(0, r=-0.5, unit='mm')), 28 | legend.text=element_text(size=10), 29 | legend.key.size = unit(0.7, "line"), 30 | legend.spacing.y = unit(0, 'mm'), 31 | ) + 32 | guides(color=guide_legend(nrow=2, byrow=TRUE)) + 33 | geom_line() + 34 | geom_point() + 35 | scale_x_continuous("% Hot Txns", labels=function(x) sprintf("%d%%", x)) + 36 | scale_y_continuous(name="Throughput [txn/sec]", labels=addUnits) + 37 | scale_color_manual( 38 | name="", 39 | values=pal_jco()(10)[c(3,4,1,2)], 40 | breaks=c("No-Switch (NO_WAIT)", "No-Switch (WAIT_DIE)") 41 | ) + 42 | scale_shape_manual( 43 | name="", 44 | values=c(15,3,16,17), 45 | breaks=c("No-Switch (NO_WAIT)", "No-Switch (WAIT_DIE)") 46 | ) 47 | 48 | print(p) 49 | ggsave(file="out/exp_skew.pdf", plot=p, device=cairo_pdf, width=210*0.3, height=297*0.21, units="mm") 50 | 51 | 52 | 53 | p <- df %>% 54 | filter(metric == 'total_commits') %>% 55 | ungroup() %>% 56 | group_by(cc_scheme, facet_label, hot_prob) %>% # first x axis then facet variable 57 | arrange(cc_scheme, use_switch, lm_on_switch, .by_group=T) %>% 58 | mutate(speedup=throughput/throughput[1]) %>% 59 | filter(use_switch == 'true' | lm_on_switch=="true") %>% 60 | ggplot(aes( 61 | x=hot_prob, 62 | y=speedup, 63 | color=type_label, 64 | shape=type_label 65 | )) + 66 | theme( 67 | plot.title=element_text(hjust = 0.5), 68 | plot.subtitle=element_text(hjust = 0.5), 69 | legend.position="top", 70 | legend.margin=margin(0, l=-14, b=-3, t=-2, unit='mm'), # l=-38 71 | legend.text=element_text(size=10), 72 | legend.key.size = unit(0.7, "line"), 73 | legend.spacing.y = unit(0, 'mm'), 74 | #legend.spacing.x = unit(0.25, 'mm'), 75 | axis.title.y=element_text(margin=margin(0, r=-0.5, unit='mm')), 76 | ) + 77 | guides(color=guide_legend(nrow=2, byrow=TRUE)) + 78 | geom_line() + 79 | geom_point() + 80 | geom_hline(yintercept=1, linetype="longdash", color = "red", alpha=0.5) + 81 | scale_x_continuous("% Hot Txns", labels=function(x) sprintf("%d%%", x)) + 82 | scale_y_continuous(name="Speedup ~ w/o", labels=function(x){ 83 | sprintf("%gX", x) 84 | }) + 85 | scale_color_manual(name="", values=pal_jco()(10)[c(1,2,3,4)])+ 86 | scale_shape_manual(name="", values=c(16,17,15,3)) 87 | print(p) 88 | ggsave(file="out/exp_skew_A2.pdf", plot=p, device=cairo_pdf, width=210*0.3, height=297*0.21, units="mm") 89 | 90 | -------------------------------------------------------------------------------- /figures/switch_optis.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(ggsci) 4 | library(tidyr) 5 | 6 | csv_path <- "./data" 7 | source("utils.R") 8 | 9 | 10 | 11 | 12 | no_fine_no_fast = (p4db_read(csv_file="exp_ycsb_optis_slow.csv") %>% 13 | filter(metric=="total_commits"&ycsb_opti_test=="false"&switch_no_conflict=="false"))$throughput 14 | fine_no_fast = (p4db_read(csv_file="exp_ycsb_optis_slow.csv") %>% 15 | filter(metric=="total_commits"&ycsb_opti_test=="true"&switch_no_conflict=="false"))$throughput 16 | 17 | 18 | no_fine_fast = (p4db_read(csv_file="exp_ycsb_optis.csv") %>% 19 | filter(metric=="total_commits"&ycsb_opti_test=="false"&switch_no_conflict=="false"))$throughput 20 | fine_fast = (p4db_read(csv_file="exp_ycsb_optis.csv") %>% 21 | filter(metric=="total_commits"&ycsb_opti_test=="true"&switch_no_conflict=="false"))$throughput 22 | 23 | single_pass = (p4db_read(csv_file="exp_ycsb_optis.csv") %>% 24 | filter(metric=="total_commits"&ycsb_opti_test=="false"&switch_no_conflict=="true"))$throughput 25 | 26 | 27 | df <- data.frame( 28 | x=c("Unoptimized", "+Fast-Recirculate", "+Fine-Locking", "+Declustered"), 29 | y=c(no_fine_no_fast, no_fine_fast, fine_fast, single_pass)/no_fine_no_fast 30 | ) 31 | 32 | df$x = factor(df$x, df$x) 33 | 34 | p <- ggplot(df) + 35 | theme( 36 | legend.position="top", 37 | legend.margin=margin(0, l=-10, t=-1, b=-3, unit='mm'), 38 | legend.text=element_text(size=10), 39 | legend.key.size = unit(0.7, "line"), 40 | legend.spacing.y = unit(0, 'cm'), 41 | legend.spacing.x = unit(0.5, 'mm'), 42 | axis.title.x=element_blank(), 43 | axis.text.x=element_blank(), 44 | axis.ticks.x=element_blank(), 45 | axis.title.y=element_text(margin=margin(0, r=-0.5, unit='mm')), 46 | ) + 47 | guides(fill=guide_legend(nrow=2, byrow=TRUE)) + 48 | geom_col(aes(x=x, y=y, fill=x), show.legend=T) + 49 | geom_text(aes(x=x, y=y, label=sprintf("%0.2fX", y)), vjust=1.5, size=2.5) + 50 | scale_x_discrete("") + 51 | scale_y_continuous("Speedup", labels=function(x){ 52 | sprintf("%gX", x) 53 | }) + 54 | scale_fill_jco(name="") 55 | 56 | 57 | print(p) 58 | ggsave(file="out/exp_m.pdf", plot=p, device=cairo_pdf, width=210*0.3, height=297*0.19, units="mm") -------------------------------------------------------------------------------- /src/benchmarks/benchmarks.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "micro_recirc/micro_recirc.hpp" 5 | #include "smallbank/smallbank.hpp" 6 | #include "tpcc/tpcc.hpp" 7 | #include "ycsb/ycsb.hpp" 8 | -------------------------------------------------------------------------------- /src/benchmarks/meson.build: -------------------------------------------------------------------------------- 1 | subdir('ycsb') 2 | subdir('smallbank') 3 | subdir('tpcc') 4 | subdir('micro_recirc') 5 | 6 | 7 | project_headers += files( 8 | 'benchmarks.hpp', 9 | ) 10 | -------------------------------------------------------------------------------- /src/benchmarks/micro_recirc/args.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | namespace benchmark { 9 | namespace micro_recirc { 10 | 11 | 12 | struct MicroRecircArgs { 13 | 14 | struct Arg { 15 | uint32_t recircs = 0; 16 | bool on_switch = true; 17 | }; 18 | 19 | 20 | using Arg_t = std::variant; 21 | }; 22 | 23 | } // namespace micro_recirc 24 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/micro_recirc/meson.build: -------------------------------------------------------------------------------- 1 | 2 | 3 | project_headers += files( 4 | 'micro_recirc.hpp', 5 | 'random.hpp', 6 | 'table.hpp', 7 | 'transaction.hpp', 8 | 'switch.hpp', 9 | ) 10 | 11 | 12 | project_sources += files( 13 | 'micro_recirc.cpp', 14 | 'txn/arg.cpp', 15 | 'txn/switch.cpp', 16 | ) 17 | -------------------------------------------------------------------------------- /src/benchmarks/micro_recirc/micro_recirc.cpp: -------------------------------------------------------------------------------- 1 | #include "micro_recirc.hpp" 2 | 3 | #include "db/config.hpp" 4 | #include "random.hpp" 5 | #include "transaction.hpp" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace benchmark { 12 | namespace micro_recirc { 13 | 14 | 15 | void micro_recirc_worker(int id, Database& db, TxnExecutorStats& stats) { 16 | auto& config = Config::instance(); 17 | 18 | MicroRecircRandom rnd(config.node_id << 16 | id); 19 | 20 | std::vector txns; 21 | txns.reserve(config.num_txns); 22 | for (size_t i = 0; i < config.num_txns; ++i) { 23 | 24 | MicroRecircArgs::Arg txn; 25 | txn.recircs = rnd.is_multipass(); 26 | 27 | txns.push_back(txn); 28 | } 29 | 30 | db.msg_handler->barrier.wait_workers(); 31 | 32 | stats = txn_executor(db, txns); 33 | stats.count_on_switch(txns); 34 | } 35 | 36 | 37 | int micro_recirc() { 38 | auto& config = Config::instance(); 39 | 40 | Database db; 41 | 42 | db.msg_handler->barrier.wait_nodes(); 43 | 44 | std::vector workers; 45 | workers.reserve(config.num_txn_workers); 46 | std::vector stats; 47 | stats.reserve(config.num_txn_workers); 48 | 49 | for (uint32_t i = 0; i < config.num_txn_workers; ++i) { 50 | auto& stat = stats.emplace_back(); 51 | workers.emplace_back(std::thread([&, i]() { 52 | const WorkerContext::guard worker_ctx; 53 | pin_worker(i); 54 | micro_recirc_worker(i, db, stat); 55 | })); 56 | } 57 | 58 | for (auto& w : workers) { 59 | w.join(); 60 | } 61 | std::cout << TxnExecutorStats::accumulate(stats) << '\n'; 62 | db.msg_handler->barrier.wait_nodes(); 63 | 64 | return 0; 65 | } 66 | 67 | 68 | } // namespace micro_recirc 69 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/micro_recirc/micro_recirc.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db/database.hpp" 4 | #include "db/transaction.hpp" 5 | 6 | #include 7 | #include 8 | 9 | 10 | namespace benchmark { 11 | namespace micro_recirc { 12 | 13 | 14 | int micro_recirc(); 15 | void micro_recirc_worker(int id, Database& db, TxnExecutorStats& stats); 16 | 17 | 18 | } // namespace micro_recirc 19 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/micro_recirc/random.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | namespace benchmark { 9 | namespace micro_recirc { 10 | 11 | 12 | class MicroRecircRandom { 13 | public: 14 | using RandomDevice = std::mt19937; 15 | 16 | private: 17 | RandomDevice gen; 18 | Config& config; 19 | 20 | public: 21 | MicroRecircRandom(uint32_t seed) 22 | : gen(seed), config(Config::instance()) {} 23 | 24 | template 25 | T random(T lower, T upper) { 26 | std::uniform_int_distribution dist(lower, upper); 27 | return dist(gen); 28 | } 29 | 30 | bool is_multipass() { 31 | return random(1, 100) <= config.micro_recirc.recirc_prob; 32 | } 33 | }; 34 | 35 | 36 | } // namespace micro_recirc 37 | } // namespace benchmark 38 | -------------------------------------------------------------------------------- /src/benchmarks/micro_recirc/switch.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "args.hpp" 5 | #include "comm/eth_hdr.hpp" 6 | #include "db/buffers.hpp" 7 | 8 | namespace benchmark { 9 | namespace micro_recirc { 10 | 11 | // Switch specific 12 | 13 | struct lock_t { 14 | uint8_t left = 0; 15 | uint8_t right = 0; 16 | } __attribute__((packed)); 17 | 18 | struct info_t { 19 | be_uint8_t multipass = 0; 20 | be_uint32_t recircs = 0; 21 | lock_t locks{}; 22 | 23 | friend std::ostream& operator<<(std::ostream& os, const info_t& self) { 24 | os << "multipass=" << self.multipass << " recircs=" << self.recircs << " locks=" << +self.locks.left << ',' << +self.locks.right; 25 | return os; 26 | } 27 | } __attribute__((packed)); 28 | 29 | 30 | struct InstrType_t { 31 | union { 32 | uint8_t val = 0; 33 | struct { 34 | uint8_t num : 4; 35 | uint8_t id : 2; 36 | uint8_t skip : 1; 37 | uint8_t stop : 1; 38 | } bitwise; // does not work to set manually __attribute__ ((reverse_bitfields)); 39 | }; 40 | 41 | 42 | static constexpr auto RECIRC() { 43 | InstrType_t type; 44 | type.bitwise.id = 0b00; 45 | type.bitwise.num = 0b01; 46 | return type; 47 | } 48 | 49 | static constexpr auto SKIP() { 50 | InstrType_t type; 51 | type.bitwise.skip = true; 52 | return type; 53 | } 54 | 55 | static constexpr auto ABORT() { 56 | InstrType_t type; 57 | type.bitwise.stop = true; 58 | return type; 59 | } 60 | 61 | static constexpr auto STOP() { 62 | InstrType_t type; 63 | type.bitwise.stop = true; 64 | return type; 65 | } 66 | 67 | auto unset_stop() { 68 | bitwise.stop = false; 69 | return *this; 70 | } 71 | 72 | auto set_stop(bool stop = true) { 73 | bitwise.stop = stop; 74 | return *this; 75 | } 76 | 77 | bool operator==(const InstrType_t& other) const { 78 | return val == other.val; 79 | } 80 | 81 | friend std::ostream& operator<<(std::ostream& os, const InstrType_t& self) { 82 | os << (self.bitwise.skip ? '*' : '-'); 83 | os << (self.bitwise.stop ? '*' : '-'); 84 | 85 | switch (self.bitwise.id) { 86 | case 0b00: 87 | os << "RECIRC[" << +self.bitwise.num << "]"; 88 | break; 89 | } 90 | return os; 91 | } 92 | } __attribute__((packed)); 93 | static_assert(sizeof(InstrType_t) == 1); 94 | 95 | 96 | struct recirc_t { 97 | InstrType_t type = InstrType_t::RECIRC(); 98 | 99 | friend std::ostream& operator<<(std::ostream& os, const recirc_t& self) { 100 | os << "type=" << self.type; 101 | return os; 102 | } 103 | } __attribute__((packed)); 104 | 105 | 106 | struct MicroRecircSwitchInfo { 107 | struct Recirc { 108 | uint32_t recircs; 109 | 110 | be_uint32_t get_recircs() const { 111 | return recircs; 112 | } 113 | }; 114 | 115 | 116 | void make_txn(const Recirc& arg, BufferWriter& bw); 117 | 118 | struct RecircOut {}; 119 | 120 | RecircOut parse_txn(const Recirc& arg, BufferReader& br); 121 | }; 122 | 123 | 124 | } // namespace micro_recirc 125 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/micro_recirc/table.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db/database.hpp" 4 | #include "db/defs.hpp" 5 | #include "db/types.hpp" 6 | #include "switch.hpp" 7 | #include "table/table.hpp" 8 | 9 | 10 | namespace benchmark { 11 | namespace micro_recirc { 12 | 13 | 14 | struct MicroRecircTableInfo { 15 | template 16 | using Table_t = StructTable; 17 | 18 | MicroRecircSwitchInfo p4_switch; 19 | 20 | void link_tables(Database&) {} 21 | }; 22 | 23 | 24 | } // namespace micro_recirc 25 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/micro_recirc/transaction.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "args.hpp" 4 | #include "db/transaction.hpp" 5 | #include "table.hpp" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | namespace benchmark { 13 | namespace micro_recirc { 14 | 15 | 16 | struct MicroRecirc final : public TransactionBase { 17 | MicroRecirc(Database& db) : TransactionBase(db) { 18 | link_tables(db); 19 | } 20 | 21 | RC operator()(MicroRecircArgs::Arg& arg); 22 | }; 23 | 24 | 25 | } // namespace micro_recirc 26 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/micro_recirc/txn/arg.cpp: -------------------------------------------------------------------------------- 1 | #include "../transaction.hpp" 2 | 3 | #include 4 | 5 | namespace benchmark { 6 | namespace micro_recirc { 7 | 8 | 9 | MicroRecirc::RC MicroRecirc::operator()(MicroRecircArgs::Arg& arg) { 10 | if (arg.on_switch) { 11 | auto txn_f = atomic(p4_switch, MicroRecircSwitchInfo::Recirc{arg.recircs}); 12 | txn_f->get(); 13 | return commit(); 14 | } 15 | 16 | throw std::runtime_error("not implemented"); 17 | } 18 | 19 | } // namespace micro_recirc 20 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/micro_recirc/txn/switch.cpp: -------------------------------------------------------------------------------- 1 | #include "../switch.hpp" 2 | 3 | #include "../table.hpp" 4 | 5 | 6 | namespace benchmark { 7 | namespace micro_recirc { 8 | 9 | 10 | void MicroRecircSwitchInfo::make_txn(const Recirc& arg, BufferWriter& bw) { 11 | constexpr auto NUM_INSTR = 15; 12 | if (arg.recircs > NUM_INSTR - 1) { 13 | throw std::runtime_error("increase NUM_INSTR"); 14 | } 15 | 16 | 17 | auto info = bw.write(info_t{}); 18 | bw.write(recirc_t{}); 19 | 20 | for (uint32_t i = 0; i < arg.recircs; ++i) { 21 | auto instr = bw.write(recirc_t{}); 22 | instr->type.set_stop(true); 23 | // instr->type.bitwise.stop = true; // In case compiler removes call 24 | } 25 | if (arg.recircs > 0) { 26 | info->multipass = 1; 27 | info->locks = lock_t{1, 1}; 28 | } 29 | 30 | bw.write(InstrType_t::STOP()); 31 | } 32 | 33 | 34 | MicroRecircSwitchInfo::RecircOut MicroRecircSwitchInfo::parse_txn(const Recirc& arg [[maybe_unused]], BufferReader& br [[maybe_unused]]) { 35 | auto info = br.read(); 36 | 37 | WorkerContext::get().cntr.incr(stats::Counter::micro_recirc_recircs, *info->recircs); 38 | 39 | return RecircOut{}; 40 | } 41 | 42 | } // namespace micro_recirc 43 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/smallbank/args.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db/defs.hpp" 4 | #include "db/types.hpp" 5 | 6 | #include 7 | #include 8 | 9 | 10 | namespace benchmark { 11 | namespace smallbank { 12 | 13 | 14 | struct SmallbankArgs { 15 | struct Balance { 16 | uint64_t customer_id; 17 | bool on_switch; 18 | }; 19 | struct DepositChecking { 20 | uint64_t customer_id; 21 | int32_t val; 22 | bool on_switch; 23 | }; 24 | struct TransactSaving { 25 | uint64_t customer_id; 26 | int32_t val; 27 | bool on_switch; 28 | }; 29 | struct Amalgamate { 30 | uint64_t customer_id_1; 31 | uint64_t customer_id_2; 32 | bool on_switch; 33 | }; 34 | struct WriteCheck { 35 | uint64_t customer_id; 36 | int32_t val; 37 | bool on_switch; 38 | }; 39 | struct SendPayment { 40 | uint64_t customer_id_1; 41 | uint64_t customer_id_2; 42 | int32_t val; 43 | bool on_switch; 44 | }; 45 | 46 | 47 | using Arg_t = std::variant; 48 | }; 49 | 50 | 51 | } // namespace smallbank 52 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/smallbank/meson.build: -------------------------------------------------------------------------------- 1 | project_headers += files( 2 | 'smallbank.hpp', 3 | 'random.hpp', 4 | 'table.hpp', 5 | 'transaction.hpp', 6 | ) 7 | 8 | 9 | project_sources += files( 10 | 'smallbank.cpp', 11 | 'txn/amalgamate.cpp', 12 | 'txn/balance.cpp', 13 | 'txn/deposit_checking.cpp', 14 | 'txn/send_payment.cpp', 15 | 'txn/transact_saving.cpp', 16 | 'txn/write_check.cpp', 17 | 'txn/switch.cpp' 18 | ) 19 | -------------------------------------------------------------------------------- /src/benchmarks/smallbank/random.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db/config.hpp" 4 | #include "db/defs.hpp" 5 | 6 | #include 7 | 8 | 9 | namespace benchmark { 10 | namespace smallbank { 11 | 12 | class SmallbankRandom { 13 | public: 14 | using RandomDevice = std::mt19937; 15 | 16 | private: 17 | RandomDevice gen; 18 | Config& config; 19 | const uint64_t local_part_size; 20 | 21 | public: 22 | SmallbankRandom(uint32_t seed) 23 | : gen(seed), config(Config::instance()), local_part_size(config.smallbank.table_size / config.num_nodes) {} 24 | 25 | template 26 | T random(T lower, T upper) { 27 | std::uniform_int_distribution dist(lower, upper); 28 | return dist(gen); 29 | } 30 | 31 | template 32 | T random_except(T lower, T upper, T except) { 33 | std::uniform_int_distribution dist(lower, upper - 1); 34 | auto num = dist(gen); 35 | if (num == except) { 36 | return upper; 37 | } 38 | return num; 39 | } 40 | 41 | 42 | bool is_hot_txn() { 43 | return random(1, 100) <= config.smallbank.hot_prob; 44 | } 45 | bool is_remote() { 46 | return random(1, 100) <= config.smallbank.remote_prob; 47 | } 48 | 49 | auto hot_cid() { 50 | auto id = random(0, config.smallbank.hot_size - 1); 51 | if (is_remote()) { 52 | id += random_except(0, config.num_nodes - 1, config.node_id) * local_part_size; 53 | } else { 54 | id += config.node_id * local_part_size; 55 | } 56 | return id; 57 | } 58 | auto cold_cid() { 59 | auto id = random(config.smallbank.hot_size, local_part_size - config.smallbank.hot_size); 60 | if (is_remote()) { 61 | id += random_except(0, config.num_nodes - 1, config.node_id) * local_part_size; 62 | } else { 63 | id += config.node_id * local_part_size; 64 | } 65 | return id; 66 | } 67 | 68 | template 69 | T balance(T min = MIN_BALANCE, T max = MAX_BALANCE) { 70 | std::normal_distribution dist; 71 | 72 | const T range = max - min; 73 | while (true) { 74 | double gaussian = (dist(gen) + 2.0) / 4.0; 75 | T value = std::round(gaussian * range); 76 | if (value < 0 || value > range) { 77 | continue; 78 | } 79 | return value + min; 80 | } 81 | } 82 | }; 83 | 84 | 85 | } // namespace smallbank 86 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/smallbank/smallbank.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db/database.hpp" 4 | #include "db/transaction.hpp" 5 | 6 | #include 7 | #include 8 | 9 | 10 | namespace benchmark { 11 | namespace smallbank { 12 | 13 | 14 | int smallbank(); 15 | void smallbank_worker(int id, Database& db, TxnExecutorStats& stats); 16 | 17 | 18 | } // namespace smallbank 19 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/smallbank/table.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "switch.hpp" 4 | #include "table/table.hpp" 5 | 6 | 7 | namespace benchmark { 8 | namespace smallbank { 9 | 10 | struct SmallbankTableInfo { 11 | template 12 | using Table_t = StructTable; 13 | 14 | struct Customer { 15 | static constexpr auto TABLE_NAME = "customer"; 16 | // using PartitionInfo_t = PartitionInfo; 17 | using PartitionInfo_t = PartitionInfo; 18 | 19 | uint64_t id; 20 | char name[16]; 21 | 22 | static constexpr auto pk(uint64_t id) { 23 | return p4db::key_t{id}; 24 | } 25 | 26 | void print() {} 27 | }; 28 | 29 | struct Saving { 30 | static constexpr auto TABLE_NAME = "saving"; 31 | // using PartitionInfo_t = PartitionInfo; 32 | using PartitionInfo_t = PartitionInfo; 33 | 34 | uint64_t id; 35 | int32_t balance; 36 | 37 | static constexpr auto pk(uint64_t id) { 38 | return p4db::key_t{id}; 39 | } 40 | 41 | void print() { 42 | if (balance != 0) { 43 | std::cout << "id=" << id << " bal=" << balance << '\n'; 44 | } 45 | } 46 | }; 47 | 48 | struct Checking { 49 | static constexpr auto TABLE_NAME = "checking"; 50 | // using PartitionInfo_t = PartitionInfo; 51 | using PartitionInfo_t = PartitionInfo; 52 | 53 | uint64_t id; 54 | int32_t balance; 55 | 56 | static constexpr auto pk(uint64_t id) { 57 | return p4db::key_t{id}; 58 | } 59 | 60 | void print() { 61 | if (balance != 0) { 62 | std::cout << "id=" << id << " bal=" << balance << '\n'; 63 | } 64 | } 65 | }; 66 | 67 | Table_t* customer; 68 | Table_t* saving; 69 | Table_t* checking; 70 | 71 | SmallbankSwitchInfo p4_switch; 72 | 73 | using tables = parameter_pack, Table_t, Table_t>; 74 | 75 | void link_tables(Database& db) { 76 | db.get_casted(Customer::TABLE_NAME, customer); 77 | db.get_casted(Saving::TABLE_NAME, saving); 78 | db.get_casted(Checking::TABLE_NAME, checking); 79 | } 80 | }; 81 | 82 | 83 | } // namespace smallbank 84 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/smallbank/transaction.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "args.hpp" 5 | #include "db/transaction.hpp" 6 | #include "table.hpp" 7 | 8 | 9 | namespace benchmark { 10 | namespace smallbank { 11 | 12 | 13 | struct Smallbank final : public TransactionBase { 14 | Smallbank(Database& db) : TransactionBase(db) { 15 | link_tables(db); 16 | } 17 | 18 | RC operator()(SmallbankArgs::Balance& arg); 19 | RC operator()(SmallbankArgs::DepositChecking& arg); 20 | RC operator()(SmallbankArgs::TransactSaving& arg); 21 | RC operator()(SmallbankArgs::Amalgamate& arg); 22 | RC operator()(SmallbankArgs::WriteCheck& arg); 23 | RC operator()(SmallbankArgs::SendPayment& arg); 24 | }; 25 | 26 | 27 | } // namespace smallbank 28 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/smallbank/txn/amalgamate.cpp: -------------------------------------------------------------------------------- 1 | #include "../transaction.hpp" 2 | 3 | 4 | namespace benchmark { 5 | namespace smallbank { 6 | 7 | Smallbank::RC Smallbank::operator()(SmallbankArgs::Amalgamate& arg) { 8 | if (arg.on_switch) { 9 | auto txn_f = atomic(p4_switch, SmallbankSwitchInfo::Amalgamate{arg.customer_id_1, arg.customer_id_2}); 10 | txn_f->get(); 11 | WorkerContext::get().cntr.incr(stats::Counter::smallbank_amalgamate_commits); 12 | return commit(); 13 | } 14 | 15 | 16 | auto saving_1_f = write(saving, Saving::pk(arg.customer_id_1)); 17 | check(saving_1_f); 18 | auto checking_1_f = write(checking, Checking::pk(arg.customer_id_1)); 19 | check(checking_1_f); 20 | auto checking_2_f = write(checking, Checking::pk(arg.customer_id_2)); 21 | check(checking_2_f); 22 | 23 | auto saving_1 = saving_1_f->get(); 24 | check(saving_1); 25 | auto checking_1 = checking_1_f->get(); 26 | check(checking_1); 27 | auto checking_2 = checking_2_f->get(); 28 | check(checking_2); 29 | 30 | checking_2->balance += (saving_1->balance + checking_1->balance); 31 | 32 | saving_1->balance = 0; 33 | checking_1->balance = 0; 34 | 35 | WorkerContext::get().cntr.incr(stats::Counter::smallbank_amalgamate_commits); 36 | return commit(); 37 | } 38 | 39 | } // namespace smallbank 40 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/smallbank/txn/balance.cpp: -------------------------------------------------------------------------------- 1 | #include "../transaction.hpp" 2 | 3 | 4 | namespace benchmark { 5 | namespace smallbank { 6 | 7 | Smallbank::RC Smallbank::operator()(SmallbankArgs::Balance& arg) { 8 | if (arg.on_switch) { 9 | auto txn_f = atomic(p4_switch, SmallbankSwitchInfo::Balance{arg.customer_id}); 10 | txn_f->get(); 11 | WorkerContext::get().cntr.incr(stats::Counter::smallbank_balance_commits); 12 | return commit(); 13 | } 14 | 15 | auto saving_f = read(saving, Saving::pk(arg.customer_id)); 16 | check(saving_f); 17 | auto checking_f = read(checking, Checking::pk(arg.customer_id)); 18 | check(checking_f); 19 | 20 | const auto saving = saving_f->get(); 21 | check(saving); 22 | const auto checking = checking_f->get(); 23 | check(checking); 24 | 25 | const auto sum = saving->balance + checking->balance; 26 | do_not_optimize(sum); 27 | 28 | WorkerContext::get().cntr.incr(stats::Counter::smallbank_balance_commits); 29 | return commit(); 30 | } 31 | 32 | } // namespace smallbank 33 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/smallbank/txn/deposit_checking.cpp: -------------------------------------------------------------------------------- 1 | #include "../transaction.hpp" 2 | 3 | 4 | namespace benchmark { 5 | namespace smallbank { 6 | 7 | 8 | Smallbank::RC Smallbank::operator()(SmallbankArgs::DepositChecking& arg) { 9 | if (arg.val < 0) { 10 | return rollback(); 11 | } 12 | 13 | if (arg.on_switch) { 14 | auto txn_f = atomic(p4_switch, SmallbankSwitchInfo::DepositChecking{arg.customer_id, arg.val}); 15 | txn_f->get(); 16 | WorkerContext::get().cntr.incr(stats::Counter::smallbank_deposit_checking_commits); 17 | return commit(); 18 | } 19 | 20 | 21 | auto checking_f = write(checking, Checking::pk(arg.customer_id)); 22 | check(checking_f); 23 | auto checking = checking_f->get(); 24 | check(checking); 25 | checking->balance += arg.val; 26 | 27 | WorkerContext::get().cntr.incr(stats::Counter::smallbank_deposit_checking_commits); 28 | return commit(); 29 | } 30 | 31 | } // namespace smallbank 32 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/smallbank/txn/send_payment.cpp: -------------------------------------------------------------------------------- 1 | #include "../transaction.hpp" 2 | 3 | 4 | namespace benchmark { 5 | namespace smallbank { 6 | 7 | Smallbank::RC Smallbank::operator()(SmallbankArgs::SendPayment& arg) { 8 | if (arg.on_switch) { 9 | auto txn_f = atomic(p4_switch, SmallbankSwitchInfo::SendPayment{arg.customer_id_1, arg.customer_id_2, arg.val}); 10 | auto& _switch_payment = txn_f->get(); 11 | if (_switch_payment.abort) { 12 | return rollback(); 13 | } 14 | WorkerContext::get().cntr.incr(stats::Counter::smallbank_send_payment_commits); 15 | return commit(); 16 | } 17 | 18 | auto checking_1_f = write(checking, Checking::pk(arg.customer_id_1)); // Opportunity for lock upgrade 19 | check(checking_1_f); 20 | auto checking_1 = checking_1_f->get(); 21 | check(checking_1); 22 | 23 | if (checking_1->balance < arg.val) { 24 | return rollback(); 25 | } 26 | 27 | auto checking_2_f = write(checking, Checking::pk(arg.customer_id_2)); 28 | check(checking_2_f); 29 | auto checking_2 = checking_2_f->get(); 30 | check(checking_2); 31 | 32 | checking_1->balance -= arg.val; 33 | checking_2->balance += arg.val; 34 | 35 | WorkerContext::get().cntr.incr(stats::Counter::smallbank_send_payment_commits); 36 | return commit(); 37 | } 38 | 39 | } // namespace smallbank 40 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/smallbank/txn/transact_saving.cpp: -------------------------------------------------------------------------------- 1 | #include "../transaction.hpp" 2 | 3 | 4 | namespace benchmark { 5 | namespace smallbank { 6 | 7 | Smallbank::RC Smallbank::operator()(SmallbankArgs::TransactSaving& arg) { 8 | if (arg.on_switch) { 9 | auto txn_f = atomic(p4_switch, SmallbankSwitchInfo::TransactSaving{arg.customer_id, arg.val}); 10 | txn_f->get(); 11 | WorkerContext::get().cntr.incr(stats::Counter::smallbank_transact_saving_commits); 12 | return commit(); 13 | } 14 | 15 | auto saving_f = write(saving, Saving::pk(arg.customer_id)); // Opportunity for lock upgrade 16 | check(saving_f); 17 | auto saving = saving_f->get(); 18 | check(saving); 19 | 20 | if ((saving->balance + arg.val) < 0) { 21 | return rollback(); 22 | } 23 | saving->balance += arg.val; 24 | 25 | WorkerContext::get().cntr.incr(stats::Counter::smallbank_transact_saving_commits); 26 | return commit(); 27 | } 28 | 29 | } // namespace smallbank 30 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/smallbank/txn/write_check.cpp: -------------------------------------------------------------------------------- 1 | #include "../transaction.hpp" 2 | 3 | 4 | namespace benchmark { 5 | namespace smallbank { 6 | 7 | 8 | Smallbank::RC Smallbank::operator()(SmallbankArgs::WriteCheck& arg) { 9 | if (arg.on_switch) { 10 | auto txn_f = atomic(p4_switch, SmallbankSwitchInfo::WriteCheck{arg.customer_id, arg.val}); 11 | txn_f->get(); 12 | WorkerContext::get().cntr.incr(stats::Counter::smallbank_write_check_commits); 13 | return commit(); 14 | } 15 | 16 | 17 | auto saving_f = read(saving, Saving::pk(arg.customer_id)); 18 | check(saving_f); 19 | auto checking_f = write(checking, Checking::pk(arg.customer_id)); 20 | check(checking_f); 21 | 22 | const auto saving = saving_f->get(); 23 | check(saving); 24 | auto checking = checking_f->get(); 25 | check(checking); 26 | 27 | if ((saving->balance + checking->balance) < arg.val) { 28 | checking->balance -= (arg.val + 1); 29 | } else { 30 | checking->balance -= arg.val; 31 | } 32 | 33 | WorkerContext::get().cntr.incr(stats::Counter::smallbank_write_check_commits); 34 | return commit(); 35 | } 36 | 37 | } // namespace smallbank 38 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/tpcc/args.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace benchmark { 9 | namespace tpcc { 10 | 11 | 12 | struct TPCCArgs { 13 | struct NewOrder { 14 | uint64_t w_id; 15 | uint64_t d_id; 16 | uint64_t c_id; 17 | 18 | NewOrder(uint64_t w_id, uint64_t d_id, uint64_t c_id) : w_id(w_id), d_id(d_id), c_id(c_id) {} 19 | 20 | static constexpr int MAX_ORDERS = 15; 21 | struct OrderItem { 22 | uint64_t ol_i_id; 23 | uint64_t ol_supply_w_id; 24 | uint32_t ol_quantity; 25 | bool is_hot; 26 | }; 27 | static_assert(sizeof(OrderItem) == 24); 28 | size_t ol_cnt; // 5-15 orders, avg.: 10 29 | std::array orders; // should be sorted to avoid deadlock, maybe use vector?? 30 | 31 | bool on_switch = false; 32 | }; 33 | 34 | struct Payment { 35 | uint64_t w_id; 36 | uint64_t d_id; 37 | uint64_t c_w_id; 38 | uint64_t c_d_id; 39 | uint64_t c_id; 40 | int64_t h_amount; 41 | 42 | bool on_switch = false; 43 | }; 44 | 45 | 46 | using Arg_t = std::variant; 47 | }; 48 | 49 | } // namespace tpcc 50 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/tpcc/meson.build: -------------------------------------------------------------------------------- 1 | 2 | 3 | project_headers += files( 4 | 'tpcc.hpp', 5 | 'random.hpp', 6 | 'table.hpp', 7 | 'transaction.hpp', 8 | 'switch.hpp', 9 | 'utils.hpp', 10 | ) 11 | 12 | 13 | project_sources += files( 14 | 'tpcc.cpp', 15 | 'txn/new_order.cpp', 16 | 'txn/payment.cpp', 17 | 'txn/switch.cpp', 18 | ) 19 | -------------------------------------------------------------------------------- /src/benchmarks/tpcc/random.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | namespace benchmark { 9 | namespace tpcc { 10 | 11 | 12 | // Stuff for generating random input 13 | class TPCCRandom { 14 | public: 15 | using RandomDevice = std::mt19937; 16 | 17 | private: 18 | RandomDevice gen; 19 | 20 | public: 21 | TPCCRandom(uint32_t seed) : gen(seed) {} 22 | 23 | void astring(int x, int y, char* s) { 24 | static constexpr const char* alphanum = "0123456789" 25 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 26 | "abcdefghijklmnopqrstuvwxyz"; 27 | int len = random(x, y); 28 | for (int i = 0; i < len; ++i) { 29 | s[i] = alphanum[random(0, 61)]; 30 | } 31 | s[len] = '\0'; 32 | } 33 | void nstring(int x, int y, char* s) { 34 | static constexpr const char* numeric = "0123456789"; 35 | int len = random(x, y); 36 | for (int i = 0; i < len; ++i) { 37 | s[i] = numeric[random(0, 9)]; 38 | } 39 | s[len] = '\0'; 40 | } 41 | void cLastName(int num, char* s) { 42 | static constexpr const char* n[] = { 43 | "BAR", "OUGHT", "ABLE", "PRI", "PRES", 44 | "ESE", "ANTI", "CALLY", "ATION", "EING"}; 45 | 46 | strcpy(s, n[num / 100]); 47 | strcat(s, n[(num / 10) % 10]); 48 | strcat(s, n[num % 10]); 49 | } 50 | 51 | template 52 | T random(T lower, T upper) { 53 | std::uniform_int_distribution dist(lower, upper); 54 | return dist(gen); 55 | } 56 | 57 | template 58 | T NURand(T A, T x, T y) { 59 | constexpr T C = 0; 60 | return (((random(0, A) | random(x, y)) + C) % (y - x + 1)) + x; 61 | } 62 | }; 63 | 64 | } // namespace tpcc 65 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/tpcc/tpcc.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db/database.hpp" 4 | #include "db/transaction.hpp" 5 | 6 | #include 7 | #include 8 | 9 | 10 | namespace benchmark { 11 | namespace tpcc { 12 | 13 | int tpcc(); 14 | void tpcc_worker(int id, Database& db, TxnExecutorStats& stats); 15 | 16 | 17 | } // namespace tpcc 18 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/tpcc/transaction.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "args.hpp" 4 | #include "db/transaction.hpp" 5 | #include "table.hpp" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | namespace benchmark { 13 | namespace tpcc { 14 | 15 | struct TPCC final : public TransactionBase { 16 | TPCC(Database& db) : TransactionBase(db) { 17 | link_tables(db); 18 | } 19 | 20 | RC operator()(TPCCArgs::NewOrder& arg); 21 | RC operator()(TPCCArgs::Payment& arg); 22 | }; 23 | 24 | 25 | } // namespace tpcc 26 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/tpcc/utils.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "db/defs.hpp" 5 | #include "random.hpp" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | 13 | struct TPCCHotInfo { 14 | struct Entry { 15 | uint64_t idx; 16 | uint64_t hits = 0; 17 | 18 | bool operator<(const Entry& other) { 19 | return hits > other.hits; 20 | } 21 | }; 22 | 23 | using info_array_t = std::array; 24 | std::unique_ptr info; 25 | uint64_t num_warehouses; 26 | 27 | static constexpr uint64_t SAMPLES = 5'000'000; 28 | 29 | TPCCHotInfo(uint64_t num_warehouses, uint64_t switch_slots) : num_warehouses(num_warehouses) { 30 | 31 | info = std::make_unique(num_warehouses); 32 | 33 | uint64_t hot_items = switch_slots / num_warehouses; 34 | for (uint64_t w_id = 0; w_id < num_warehouses; ++w_id) { 35 | benchmark::tpcc::TPCCRandom rnd(w_id); 36 | 37 | std::array cntr{}; 38 | for (size_t i = 0; i < cntr.size(); ++i) { 39 | cntr[i].idx = i; 40 | } 41 | 42 | for (size_t i = 0; i < SAMPLES; ++i) { 43 | auto val = rnd.NURand(8191, 1, NUM_ITEMS) - 1; 44 | cntr[val].hits++; 45 | } 46 | 47 | std::sort(cntr.begin(), cntr.end()); 48 | 49 | for (size_t i = 0; i < NUM_ITEMS; i++) { 50 | info[w_id][cntr[i].idx] = (i < hot_items); 51 | } 52 | } 53 | } 54 | 55 | // ~TPCCHotInfo() { 56 | // for (uint64_t w_id = 0; w_id < num_warehouses; ++w_id) { 57 | // uint64_t num_hot = 0; 58 | // std::array cntr{}; 59 | // for (size_t i_id = 0; i_id < NUM_ITEMS; ++i_id) { 60 | // num_hot += is_hot(w_id, i_id); 61 | // } 62 | // std::cout << "num_hot=" << num_hot << '\n'; 63 | // } 64 | // } 65 | 66 | bool is_hot(uint64_t w_id, uint64_t i_id) { 67 | if (!(w_id < num_warehouses && i_id < NUM_ITEMS)) { 68 | return false; 69 | } 70 | return info[w_id][i_id]; 71 | } 72 | }; -------------------------------------------------------------------------------- /src/benchmarks/ycsb/args.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db/defs.hpp" 4 | #include "db/types.hpp" 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | namespace benchmark { 12 | namespace ycsb { 13 | 14 | struct YCSBArgs { 15 | struct Write { 16 | uint64_t id; 17 | uint32_t value; 18 | bool on_switch; 19 | bool is_hot; 20 | }; 21 | 22 | struct Read { 23 | uint64_t id; 24 | bool on_switch; 25 | bool is_hot; 26 | }; 27 | 28 | template 29 | struct Multi { 30 | struct OP { 31 | uint64_t id; 32 | AccessMode mode; 33 | uint32_t value; 34 | }; 35 | std::array ops; 36 | bool on_switch; 37 | bool is_hot; 38 | }; 39 | 40 | 41 | using Arg_t = std::variant>; 42 | }; 43 | 44 | } // namespace ycsb 45 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/ycsb/meson.build: -------------------------------------------------------------------------------- 1 | 2 | 3 | project_headers += files( 4 | 'ycsb.hpp', 5 | 'random.hpp', 6 | 'table.hpp', 7 | 'transaction.hpp', 8 | ) 9 | 10 | 11 | project_sources += files( 12 | 'ycsb.cpp', 13 | 'txn/multi.cpp', 14 | 'txn/read.cpp', 15 | 'txn/write.cpp', 16 | 'txn/switch.cpp', 17 | ) 18 | -------------------------------------------------------------------------------- /src/benchmarks/ycsb/random.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db/config.hpp" 4 | #include "db/defs.hpp" 5 | 6 | #include 7 | 8 | 9 | namespace benchmark { 10 | namespace ycsb { 11 | 12 | class YCSBRandom { 13 | public: 14 | using RandomDevice = std::mt19937; 15 | RandomDevice gen; 16 | 17 | private: 18 | Config& config; 19 | const uint64_t local_part_size; 20 | 21 | public: 22 | YCSBRandom(uint32_t seed) 23 | : gen(seed), config(Config::instance()), local_part_size(config.ycsb.table_size / config.num_nodes) {} 24 | 25 | template 26 | T random(T lower, T upper) { 27 | std::uniform_int_distribution dist(lower, upper); 28 | return dist(gen); 29 | } 30 | 31 | template 32 | T random_except(T lower, T upper, T except) { 33 | std::uniform_int_distribution dist(lower, upper - 1); 34 | auto num = dist(gen); 35 | if (num == except) { 36 | return upper; 37 | } 38 | return num; 39 | } 40 | 41 | 42 | bool is_hot_txn() { 43 | return random(1, 100) <= config.ycsb.hot_prob; 44 | } 45 | bool is_write() { 46 | return random(1, 100) <= config.ycsb.write_prob; 47 | } 48 | bool is_multi() { 49 | return random(1, 100) <= MULTI_OP_PERCENTAGE; 50 | } 51 | bool is_remote() { 52 | return random(1, 100) <= config.ycsb.remote_prob; 53 | } 54 | 55 | auto hot_id() { 56 | auto id = random(0, config.ycsb.hot_size - 1); 57 | if (is_remote()) { 58 | id += random_except(0, config.num_nodes - 1, config.node_id) * local_part_size; 59 | } else { 60 | id += config.node_id * local_part_size; 61 | } 62 | return id; 63 | } 64 | auto cold_id() { 65 | auto id = random(config.ycsb.hot_size, local_part_size - config.ycsb.hot_size - 1); 66 | if (is_remote()) { 67 | id += random_except(0, config.num_nodes - 1, config.node_id) * local_part_size; 68 | } else { 69 | id += config.node_id * local_part_size; 70 | } 71 | return id; 72 | } 73 | 74 | template 75 | T value() { 76 | std::uniform_int_distribution dist; // 0 ... T_max 77 | return dist(gen); 78 | } 79 | }; 80 | 81 | } // namespace ycsb 82 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/ycsb/table.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db/database.hpp" 4 | #include "switch.hpp" 5 | #include "table/table.hpp" 6 | 7 | 8 | namespace benchmark { 9 | namespace ycsb { 10 | 11 | struct YCSBTableInfo { 12 | template 13 | using Table_t = StructTable; 14 | 15 | struct KV { 16 | static constexpr auto TABLE_NAME = "kvs"; 17 | // using PartitionInfo_t = PartitionInfo; 18 | using PartitionInfo_t = PartitionInfo; 19 | 20 | uint64_t id; 21 | uint32_t value; 22 | 23 | static constexpr p4db::key_t pk(uint64_t id) { 24 | return p4db::key_t{id}; 25 | } 26 | 27 | void print() { 28 | if (value != 0) { 29 | std::cout << "id=" << id << " value=" << value << '\n'; 30 | } 31 | } 32 | }; 33 | 34 | Table_t* kvs; 35 | YCSBSwitchInfo p4_switch; 36 | 37 | using tables = parameter_pack>; 38 | 39 | void link_tables(Database& db) { 40 | db.get_casted(KV::TABLE_NAME, kvs); 41 | } 42 | }; 43 | } // namespace ycsb 44 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/ycsb/transaction.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "args.hpp" 5 | #include "db/transaction.hpp" 6 | #include "table.hpp" 7 | 8 | 9 | namespace benchmark { 10 | namespace ycsb { 11 | 12 | struct YCSB : public TransactionBase { 13 | YCSB(Database& db) : TransactionBase(db) { 14 | link_tables(db); 15 | } 16 | 17 | RC operator()(YCSBArgs::Write& arg); 18 | RC operator()(YCSBArgs::Read& arg); 19 | RC operator()(YCSBArgs::Multi& arg); 20 | }; 21 | 22 | } // namespace ycsb 23 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/ycsb/txn/multi.cpp: -------------------------------------------------------------------------------- 1 | #include "../transaction.hpp" 2 | 3 | 4 | namespace benchmark { 5 | namespace ycsb { 6 | 7 | YCSB::RC YCSB::operator()(YCSBArgs::Multi& arg) { 8 | if (arg.on_switch) { 9 | WorkerContext::get().cycl.reset(stats::Cycles::switch_txn_latency); 10 | WorkerContext::get().cycl.start(stats::Cycles::switch_txn_latency); 11 | auto multi_f = atomic(p4_switch, YCSBSwitchInfo::MultiOp{arg}); 12 | const auto values = multi_f->get().values; 13 | do_not_optimize(values); 14 | 15 | if constexpr (!YCSB_MULTI_MIX_RW) { 16 | if (arg.ops[0].mode == AccessMode::WRITE) { 17 | WorkerContext::get().cntr.incr(stats::Counter::ycsb_write_commits); 18 | } else { 19 | WorkerContext::get().cntr.incr(stats::Counter::ycsb_read_commits); 20 | } 21 | } 22 | WorkerContext::get().cycl.stop(stats::Cycles::switch_txn_latency); 23 | WorkerContext::get().cycl.save(stats::Cycles::switch_txn_latency); 24 | return commit(); 25 | } 26 | 27 | 28 | // acquire all locks first, ex and shared. Can rollback within loop 29 | TupleFuture* ops[NUM_OPS]; 30 | for (size_t i = 0; auto& op : arg.ops) { 31 | if (op.mode == AccessMode::WRITE) { 32 | ops[i] = write(kvs, KV::pk(op.id)); 33 | } else { 34 | ops[i] = read(kvs, KV::pk(op.id)); 35 | } 36 | check(ops[i]); 37 | ++i; 38 | } 39 | 40 | // Use obtained write-locks to write values 41 | for (size_t i = 0; auto& op : arg.ops) { 42 | if (op.mode == AccessMode::WRITE) { 43 | auto x = ops[i]->get(); 44 | check(x); 45 | x->value = op.value; 46 | } else { 47 | const auto x = ops[i]->get(); 48 | check(x); 49 | const auto value = x->value; 50 | do_not_optimize(value); 51 | } 52 | ++i; 53 | } 54 | 55 | if constexpr (!YCSB_MULTI_MIX_RW) { 56 | if (arg.ops[0].mode == AccessMode::WRITE) { 57 | WorkerContext::get().cntr.incr(stats::Counter::ycsb_write_commits); 58 | } else { 59 | WorkerContext::get().cntr.incr(stats::Counter::ycsb_read_commits); 60 | } 61 | } 62 | 63 | // locks automatically released 64 | return commit(); 65 | } 66 | 67 | } // namespace ycsb 68 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/ycsb/txn/read.cpp: -------------------------------------------------------------------------------- 1 | #include "../transaction.hpp" 2 | 3 | 4 | namespace benchmark { 5 | namespace ycsb { 6 | 7 | YCSB::RC YCSB::operator()(YCSBArgs::Read& arg) { 8 | if (arg.on_switch) { 9 | auto read_f = atomic(p4_switch, YCSBSwitchInfo::SingleRead{arg.id}); 10 | const auto value = read_f->get(); 11 | do_not_optimize(value); 12 | WorkerContext::get().cntr.incr(stats::Counter::ycsb_read_commits); 13 | return commit(); 14 | } 15 | 16 | auto entry_f = read(kvs, KV::pk(arg.id)); 17 | check(entry_f); 18 | const auto entry = entry_f->get(); 19 | check(entry); 20 | const auto value = entry->value; 21 | do_not_optimize(value); 22 | 23 | WorkerContext::get().cntr.incr(stats::Counter::ycsb_read_commits); 24 | return commit(); 25 | } 26 | 27 | } // namespace ycsb 28 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/ycsb/txn/write.cpp: -------------------------------------------------------------------------------- 1 | #include "../transaction.hpp" 2 | 3 | namespace benchmark { 4 | namespace ycsb { 5 | 6 | YCSB::RC YCSB::operator()(YCSBArgs::Write& arg) { 7 | if (arg.on_switch) { 8 | auto write_f = atomic(p4_switch, YCSBSwitchInfo::SingleWrite{arg.id, arg.value}); 9 | write_f->get(); 10 | WorkerContext::get().cntr.incr(stats::Counter::ycsb_write_commits); 11 | return commit(); 12 | } 13 | 14 | auto entry_f = write(kvs, KV::pk(arg.id)); 15 | check(entry_f); 16 | auto entry = entry_f->get(); 17 | check(entry); 18 | entry->value = arg.value; 19 | 20 | WorkerContext::get().cntr.incr(stats::Counter::ycsb_write_commits); 21 | return commit(); 22 | } 23 | 24 | } // namespace ycsb 25 | } // namespace benchmark -------------------------------------------------------------------------------- /src/benchmarks/ycsb/ycsb.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db/database.hpp" 4 | #include "db/defs.hpp" 5 | #include "db/transaction.hpp" 6 | #include "db/types.hpp" 7 | #include "table/table.hpp" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | 16 | namespace benchmark { 17 | namespace ycsb { 18 | 19 | int ycsb(); 20 | void ycsb_worker(int id, Database& db, TxnExecutorStats& stats); 21 | 22 | 23 | } // namespace ycsb 24 | } // namespace benchmark -------------------------------------------------------------------------------- /src/comm/bigendian.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | 7 | template 8 | struct be_bytes_t { 9 | private: 10 | T value = 0; // stored in big-endian order 11 | 12 | constexpr T bswap(const T bytes) const { 13 | switch (sizeof(T)) { 14 | case 1: 15 | return bytes; 16 | case 2: 17 | return __builtin_bswap16(bytes); 18 | case 4: 19 | return __builtin_bswap32(bytes); 20 | case 8: 21 | return __builtin_bswap64(bytes); 22 | default: 23 | throw std::invalid_argument("Unknown bswap for size=" + std::to_string(sizeof(T))); 24 | } 25 | } 26 | 27 | public: 28 | be_bytes_t() = default; 29 | be_bytes_t(const T value) : value(bswap(value)) {} 30 | 31 | void operator=(const T value) { 32 | this->value = bswap(value); 33 | } 34 | 35 | bool operator==(const T& other) { 36 | return this->value == bswap(other); 37 | } 38 | bool operator==(const be_bytes_t& other) { 39 | return this->value == other.value; 40 | } 41 | 42 | T operator*() const { 43 | return bswap(value); 44 | } 45 | 46 | friend std::ostream& operator<<(std::ostream& os, const be_bytes_t& self) { 47 | std::ios_base::fmtflags f(os.flags()); 48 | os << "0x" << std::setfill('0') << std::setw(2 * sizeof(T)) << std::right << std::hex << +self.bswap(self.value); // + is a hack to not thread byte as char 49 | os.flags(f); 50 | return os; 51 | } 52 | } __attribute__((packed)); 53 | 54 | 55 | using be_uint8_t = be_bytes_t; 56 | using be_uint16_t = be_bytes_t; 57 | using be_uint32_t = be_bytes_t; 58 | using be_uint64_t = be_bytes_t; 59 | 60 | static_assert(std::is_trivially_copyable::value); 61 | static_assert(std::is_trivially_copyable::value); 62 | static_assert(std::is_trivially_copyable::value); 63 | static_assert(std::is_trivially_copyable::value); -------------------------------------------------------------------------------- /src/comm/comm.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | // #include "udp.hpp" 5 | // using Communicator = UDPCommunicator; 6 | 7 | #include "dpdk.hpp" 8 | using Communicator = DPDKCommunicator; 9 | -------------------------------------------------------------------------------- /src/comm/eth_hdr.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | // #include 5 | // #include 6 | 7 | 8 | #include "bigendian.hpp" 9 | 10 | 11 | struct eth_addr_t { 12 | uint8_t addr_bytes[6]; 13 | 14 | bool operator==(const eth_addr_t& other) const { 15 | const uint16_t* w1 = (const uint16_t*)addr_bytes; 16 | const uint16_t* w2 = (const uint16_t*)other.addr_bytes; 17 | 18 | return ((w1[0] ^ w2[0]) | (w1[1] ^ w2[1]) | (w1[2] ^ w2[2])) == 0; 19 | } 20 | 21 | friend std::ostream& operator<<(std::ostream& os, const eth_addr_t& a) { 22 | // os << fmt::format(FMT_COMPILE("{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}"), 23 | // a.addr_bytes[0], a.addr_bytes[1], a.addr_bytes[2], 24 | // a.addr_bytes[3], a.addr_bytes[4], a.addr_bytes[5] 25 | // ); 26 | (void)a; 27 | return os; 28 | } 29 | } __attribute__((packed)); 30 | static_assert(sizeof(eth_addr_t) == 6); 31 | 32 | 33 | struct eth_hdr_t { 34 | eth_addr_t dst; 35 | eth_addr_t src; 36 | be_uint16_t type; 37 | 38 | friend std::ostream& operator<<(std::ostream& os, const eth_hdr_t& p) { 39 | // os << "Src: " << p.src; 40 | // os << " --> Dst: " << p.dst; 41 | // os << fmt::format(FMT_COMPILE(" (Type: 0x{:04x})"), *p.type); 42 | (void)p; 43 | return os; 44 | } 45 | } __attribute__((packed)); 46 | static_assert(sizeof(eth_hdr_t) == 14); -------------------------------------------------------------------------------- /src/comm/handlers/barrier.cpp: -------------------------------------------------------------------------------- 1 | #include "barrier.hpp" 2 | 3 | #include "db/config.hpp" 4 | 5 | 6 | BarrierHandler::BarrierHandler(Communicator* comm) : comm(comm) { 7 | auto& config = Config::instance(); 8 | pthread_barrier_init(&local_barrier, nullptr, config.num_txn_workers); 9 | num_nodes = comm->num_nodes; 10 | } 11 | 12 | BarrierHandler::~BarrierHandler() { 13 | pthread_barrier_destroy(&local_barrier); 14 | } 15 | 16 | void BarrierHandler::handle() { 17 | received.fetch_add(1, std::memory_order_relaxed); 18 | } 19 | 20 | void BarrierHandler::wait_nodes() { 21 | wait(); 22 | } 23 | 24 | void BarrierHandler::wait_workers() { 25 | uint32_t my_wait = local.fetch_add(1); 26 | pthread_barrier_wait(&local_barrier); 27 | if (my_wait == 0) { // execute only by one 28 | wait(); 29 | } 30 | pthread_barrier_wait(&local_barrier); 31 | } 32 | 33 | // private 34 | void BarrierHandler::wait() { 35 | for (uint32_t i = 0; i < num_nodes; ++i) { 36 | auto pkt = comm->make_pkt(); 37 | auto msg = pkt->ctor(); 38 | msg->sender = comm->node_id; 39 | comm->send(msg::node_t{i}, pkt); 40 | } 41 | 42 | while (received.load(std::memory_order_relaxed) < num_nodes) { 43 | __builtin_ia32_pause(); 44 | } 45 | 46 | std::cerr << "Barrier wait done.\n"; 47 | // std::this_thread::sleep_for(std::chrono::milliseconds(10)); 48 | local.store(0, std::memory_order_release); 49 | received.fetch_sub(num_nodes, std::memory_order_release); 50 | } -------------------------------------------------------------------------------- /src/comm/handlers/barrier.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "comm/comm.hpp" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | struct BarrierHandler { 10 | Communicator* comm; 11 | uint32_t num_nodes; 12 | std::atomic received{0}; 13 | std::atomic local{0}; 14 | 15 | pthread_barrier_t local_barrier; 16 | 17 | BarrierHandler(Communicator* comm); 18 | ~BarrierHandler(); 19 | 20 | void handle(); 21 | void wait_nodes(); 22 | void wait_workers(); 23 | 24 | private: 25 | void wait(); 26 | }; -------------------------------------------------------------------------------- /src/comm/handlers/init.cpp: -------------------------------------------------------------------------------- 1 | #include "init.hpp" 2 | 3 | 4 | InitHandler::InitHandler(Communicator* comm) : comm(comm) { 5 | num_nodes = comm->num_nodes; 6 | nodes.resize(num_nodes); 7 | } 8 | 9 | void InitHandler::handle(msg::node_t node) { 10 | nodes.at(node) = true; 11 | } 12 | 13 | void InitHandler::wait() { 14 | while (true) { 15 | bool done = std::all_of(nodes.begin(), nodes.end(), [](const auto& b) { return b; }); 16 | 17 | // send out one more time, even after we received all 18 | for (uint32_t i = 0; i < num_nodes; ++i) { 19 | auto pkt = comm->make_pkt(); 20 | auto msg = pkt->ctor(); 21 | msg->sender = comm->node_id; 22 | comm->send(msg::node_t{i}, pkt); 23 | } 24 | 25 | if (done) { 26 | break; 27 | } 28 | std::this_thread::sleep_for(std::chrono::milliseconds(100)); 29 | } 30 | std::cerr << "Init done.\n"; 31 | } 32 | -------------------------------------------------------------------------------- /src/comm/handlers/init.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "comm/comm.hpp" 4 | 5 | 6 | struct InitHandler { 7 | Communicator* comm; 8 | uint32_t num_nodes; 9 | std::vector nodes; 10 | 11 | InitHandler(Communicator* comm); 12 | 13 | void handle(msg::node_t node); 14 | void wait(); 15 | }; 16 | -------------------------------------------------------------------------------- /src/comm/handlers/meson.build: -------------------------------------------------------------------------------- 1 | 2 | 3 | project_headers += files( 4 | 'init.hpp', 5 | 'barrier.hpp', 6 | 'tuple_put_res.hpp', 7 | ) 8 | 9 | 10 | project_sources += files( 11 | 'init.cpp', 12 | 'barrier.cpp', 13 | 'tuple_put_res.cpp', 14 | ) 15 | -------------------------------------------------------------------------------- /src/comm/handlers/tuple_put_res.cpp: -------------------------------------------------------------------------------- 1 | #include "tuple_put_res.hpp" 2 | 3 | #include "db/config.hpp" 4 | 5 | 6 | TuplePutResHandler::TuplePutResHandler() : counts(Config::instance().num_txn_workers) {} 7 | 8 | 9 | void TuplePutResHandler::add(size_t index) { 10 | counts[index].incr(); 11 | } 12 | 13 | void TuplePutResHandler::handle(msg::node_t node) { 14 | counts[node.get_tid()].decr(); 15 | } 16 | 17 | void TuplePutResHandler::wait(size_t index) { 18 | counts[index].wait_zero(); 19 | } 20 | 21 | void TuplePutResHandler::Counter::incr() { 22 | cnt.fetch_add(1); 23 | } 24 | void TuplePutResHandler::Counter::decr() { 25 | cnt.fetch_sub(1); 26 | } 27 | void TuplePutResHandler::Counter::wait_zero() { 28 | while (cnt.load(std::memory_order_relaxed) != 0) { 29 | __builtin_ia32_pause(); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/comm/handlers/tuple_put_res.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "comm/comm.hpp" 5 | 6 | #include 7 | 8 | 9 | struct TuplePutResHandler { 10 | struct Counter { 11 | alignas(64) std::atomic cnt{}; 12 | void incr(); 13 | void decr(); 14 | void wait_zero(); 15 | }; 16 | std::vector counts; 17 | 18 | TuplePutResHandler(); 19 | 20 | void add(size_t index); 21 | void handle(msg::node_t node); 22 | void wait(size_t index); 23 | }; 24 | -------------------------------------------------------------------------------- /src/comm/meson.build: -------------------------------------------------------------------------------- 1 | subdir('handlers') 2 | 3 | 4 | project_headers += files( 5 | 'msg.hpp', 6 | 'msg_handler.hpp', 7 | 'comm.hpp', 8 | # 'udp.hpp', 9 | 'dpdk.hpp', 10 | ) 11 | 12 | 13 | project_sources += files( 14 | 'msg_handler.cpp', 15 | # 'udp.cpp', 16 | 'dpdk.cpp', 17 | ) 18 | -------------------------------------------------------------------------------- /src/comm/msg_handler.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "comm/comm.hpp" 4 | #include "comm/msg.hpp" 5 | #include "datastructures/array_hashmap.hpp" 6 | #include "datastructures/stupid_hashmap.hpp" 7 | #include "db/config.hpp" 8 | #include "db/defs.hpp" 9 | #include "db/errors.hpp" 10 | #include "db/future.hpp" 11 | #include "handlers/barrier.hpp" 12 | #include "handlers/init.hpp" 13 | #include "handlers/tuple_put_res.hpp" 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | 24 | struct Database; 25 | 26 | struct MessageHandler { 27 | using Pkt_t = Communicator::Pkt_t; 28 | static constexpr auto NUM_FUTURES = 1024; 29 | 30 | Database& db; 31 | Communicator* comm; 32 | uint32_t tid; 33 | 34 | 35 | InitHandler init; 36 | BarrierHandler barrier; 37 | TuplePutResHandler putresponses; 38 | 39 | 40 | std::atomic next_id{0}; 41 | // ArrayHashMap open_futures; // HINT Not 100% threadsafe 42 | StupidHashMap open_futures; 43 | 44 | 45 | MessageHandler(Database& db, Communicator* comm); 46 | MessageHandler(MessageHandler&&) = default; 47 | MessageHandler(const MessageHandler&) = delete; 48 | 49 | 50 | msg::id_t set_new_id(msg::Header* msg); 51 | 52 | void add_future(msg::id_t msg_id, AbstractFuture* future); 53 | 54 | 55 | void handle(Pkt_t* pkt); 56 | 57 | private: 58 | void handle(Pkt_t* pkt, msg::Init* msg); 59 | void handle(Pkt_t* pkt, msg::Barrier* msg); 60 | 61 | void handle(Pkt_t* pkt, msg::TupleGetReq* req); 62 | void handle(Pkt_t* pkt, msg::TupleGetRes* res); 63 | void handle(Pkt_t* pkt, msg::TuplePutReq* req); 64 | void handle(Pkt_t* pkt, msg::TuplePutRes* res); 65 | 66 | void handle(Pkt_t* pkt, msg::SwitchTxn* txn); 67 | }; -------------------------------------------------------------------------------- /src/comm/server.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "eth_hdr.hpp" 5 | 6 | #include 7 | #include 8 | 9 | 10 | struct Server { 11 | std::string ip; 12 | uint16_t port; 13 | eth_addr_t mac; 14 | }; 15 | -------------------------------------------------------------------------------- /src/comm/udp.cpp: -------------------------------------------------------------------------------- 1 | #include "udp.hpp" 2 | 3 | #include "db/config.hpp" 4 | #include "msg_handler.hpp" 5 | 6 | 7 | UDPCommunicator::UDPCommunicator() { 8 | auto& config = Config::instance(); 9 | node_id = config.node_id; 10 | num_nodes = config.num_nodes; 11 | switch_id = config.switch_id; 12 | mh_tid = config.num_txn_workers; 13 | 14 | 15 | setup(config.servers.at(node_id).port); 16 | 17 | addresses.reserve(config.servers.size()); 18 | for (auto& server : config.servers) { 19 | auto& client_addr = addresses.emplace_back(); 20 | 21 | client_addr.sin_family = AF_INET; 22 | inet_pton(AF_INET, server.ip.c_str(), &client_addr.sin_addr); 23 | client_addr.sin_port = htons(server.port); 24 | } 25 | } 26 | 27 | UDPCommunicator::~UDPCommunicator() { 28 | shutdown(sock, SHUT_RDWR); 29 | close(sock); 30 | if (recv_buffer) { 31 | recv_buffer->free(); 32 | } 33 | } 34 | 35 | 36 | void UDPCommunicator::set_handler(MessageHandler* handler) { 37 | this->handler = handler; 38 | auto& config = Config::instance(); 39 | thread = std::jthread([&, handler](std::stop_token token) { 40 | pin_worker(config.num_txn_workers); 41 | while (!token.stop_requested()) { 42 | auto pkt = receive(); 43 | if (!pkt) { 44 | continue; 45 | } 46 | handler->handle(pkt); 47 | } 48 | }); 49 | } 50 | 51 | void UDPCommunicator::send(msg::node_t target, Pkt_t*& pkt, uint32_t) { 52 | return send(target, pkt); 53 | } 54 | 55 | void UDPCommunicator::send(msg::node_t target, Pkt_t*& pkt) { 56 | if (target >= addresses.size()) { 57 | throw std::runtime_error("target " + std::to_string(target) + " out of bounds"); 58 | } 59 | 60 | int len; 61 | { 62 | const std::lock_guard lock(mutex); // Interestingly locking is faster 63 | len = sendto(sock, pkt, pkt->size(), 0, (const struct sockaddr*)&addresses[target], sizeof(struct sockaddr_in)); 64 | } 65 | 66 | if (len != pkt->size()) { 67 | std::perror("sendto failed"); 68 | std::exit(EXIT_FAILURE); 69 | } 70 | 71 | pkt->free(); 72 | pkt = nullptr; // to detect cause segfault on write 73 | } 74 | 75 | 76 | UDPCommunicator::Pkt_t* UDPCommunicator::make_pkt() { 77 | return UDPPacketBuffer::alloc(); 78 | } 79 | 80 | 81 | /* Private Methods */ 82 | 83 | void UDPCommunicator::setup(uint16_t port) { 84 | struct sockaddr_in servaddr; 85 | memset(&servaddr, 0, sizeof(servaddr)); 86 | 87 | servaddr.sin_family = AF_INET; 88 | servaddr.sin_addr.s_addr = htonl(INADDR_ANY); 89 | servaddr.sin_port = htons(port); 90 | 91 | if ((sock = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { 92 | perror("socket creation failed"); 93 | std::exit(EXIT_FAILURE); 94 | } 95 | 96 | if (bind(sock, (const struct sockaddr*)&servaddr, sizeof(servaddr)) < 0) { 97 | perror("bind failed"); 98 | std::exit(EXIT_FAILURE); 99 | } 100 | } 101 | 102 | 103 | UDPCommunicator::Pkt_t* UDPCommunicator::receive() { 104 | if (!recv_buffer) { 105 | recv_buffer = Pkt_t::alloc(); 106 | } 107 | int len = recv(sock, recv_buffer, Pkt_t::BUF_SIZE, MSG_DONTWAIT); 108 | if (len <= 0 && (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)) { 109 | return nullptr; 110 | } 111 | if (len == 0) { 112 | return nullptr; 113 | } 114 | if (len == -1) { // socket close 115 | return nullptr; 116 | } 117 | recv_buffer->len = len; 118 | auto msg = recv_buffer; 119 | recv_buffer = nullptr; 120 | return msg; 121 | } -------------------------------------------------------------------------------- /src/comm/udp.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "comm/msg.hpp" 5 | #include "db/defs.hpp" 6 | #include "db/errors.hpp" 7 | #include "server.hpp" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | 23 | struct MessageHandler; 24 | 25 | 26 | struct UDPPacketBuffer { 27 | UDPPacketBuffer() = delete; 28 | ~UDPPacketBuffer() = delete; 29 | 30 | UDPPacketBuffer(const UDPPacketBuffer& other) = delete; 31 | UDPPacketBuffer(UDPPacketBuffer&& other) = delete; 32 | 33 | UDPPacketBuffer& operator=(const UDPPacketBuffer& other) = delete; 34 | UDPPacketBuffer& operator=(UDPPacketBuffer&& other) = delete; 35 | 36 | 37 | static constexpr std::size_t BUF_SIZE = 1500; 38 | 39 | uint8_t buffer[BUF_SIZE]; // size stored at end 40 | int len = 0; 41 | 42 | static auto alloc() { 43 | void* data = std::malloc(BUF_SIZE + sizeof(int)); // MTU for now 44 | return static_cast(data); 45 | } 46 | 47 | template 48 | auto ctor(Args&&... args) { 49 | len = sizeof(T); 50 | return new (this) T{std::forward(args)...}; 51 | } 52 | 53 | template 54 | auto as() { 55 | return reinterpret_cast(this); 56 | } 57 | 58 | void resize(const std::size_t len) { 59 | if (len > BUF_SIZE) { 60 | throw error::PacketBufferTooSmall(); 61 | } 62 | this->len = len; 63 | } 64 | 65 | auto size() { 66 | return len; 67 | } 68 | 69 | operator uint8_t*() { 70 | return reinterpret_cast(this); 71 | } 72 | 73 | void free() { 74 | std::free(this); 75 | } 76 | 77 | void dump(std::ostream& os) { 78 | auto bytes = as(); 79 | hex_dump(os, bytes, size()); 80 | } 81 | }; 82 | 83 | 84 | class UDPCommunicator { 85 | // using lock_t = std::mutex; 86 | using lock_t = SpinLock; 87 | 88 | lock_t mutex; 89 | 90 | int sock; 91 | UDPPacketBuffer* recv_buffer = nullptr; 92 | 93 | public: 94 | using Pkt_t = UDPPacketBuffer; 95 | 96 | std::vector addresses; 97 | msg::node_t node_id; 98 | msg::node_t switch_id; 99 | uint32_t num_nodes; 100 | MessageHandler* handler = nullptr; 101 | std::jthread thread; 102 | 103 | 104 | // uint16_t num_rx_queues; 105 | // uint16_t num_tx_queues; 106 | uint32_t mh_tid; 107 | // uint16_t spin_tx_queue; 108 | 109 | 110 | public: 111 | UDPCommunicator(); 112 | 113 | ~UDPCommunicator(); 114 | 115 | 116 | void set_handler(MessageHandler* handler); 117 | 118 | void send(msg::node_t target, UDPPacketBuffer*& pkt); 119 | void send(msg::node_t target, Pkt_t*& pkt, uint32_t); 120 | 121 | 122 | UDPPacketBuffer* make_pkt(); 123 | 124 | private: 125 | UDPPacketBuffer* receive(); 126 | void setup(uint16_t port); 127 | }; -------------------------------------------------------------------------------- /src/datastructures/array_hashmap.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | template 10 | struct ArrayHashMap { 11 | static_assert(std::is_pointer::value, "T not pointer type"); 12 | static_assert(N && ((N & (N - 1)) == 0), "N not power of 2"); 13 | 14 | ~ArrayHashMap() { 15 | print(); 16 | } 17 | 18 | std::array ht{}; 19 | 20 | void insert(K key, V val) { 21 | auto& entry = ht[key % N]; 22 | if (entry) { 23 | throw std::runtime_error("Entry not empty"); 24 | } 25 | entry = val; 26 | } 27 | 28 | auto erase(K key) { 29 | auto& entry = ht[key % N]; 30 | if (!entry) { 31 | throw std::runtime_error("Entry not set"); 32 | } 33 | return std::exchange(entry, nullptr); 34 | } 35 | 36 | void print() { 37 | for (std::size_t i = 0; i < N; ++i) { 38 | auto& entry = ht[i]; 39 | if (entry) { 40 | std::cout << "i=" << i << " v=" << entry << '\n'; 41 | } 42 | } 43 | } 44 | }; 45 | -------------------------------------------------------------------------------- /src/datastructures/linked_list.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "db/mempools.hpp" 5 | 6 | #include 7 | 8 | 9 | template 10 | struct LinkedList { 11 | struct Node { 12 | Node* next; 13 | T val; 14 | 15 | bool operator<(const Node& other) { 16 | return val < other.val; 17 | } 18 | }; 19 | 20 | 21 | static inline FixedThreadsafeMempool pool{4096}; 22 | // FixedMempool pool{80}; 23 | Node* head = nullptr; 24 | 25 | ~LinkedList() { 26 | if (!empty()) { 27 | int nodes = 0; 28 | remove_until([&](const auto&) { 29 | ++nodes; 30 | return true; 31 | }); 32 | std::cerr << "linked list still filled with " << nodes << " nodes!\n"; 33 | } 34 | } 35 | 36 | void add_sorted(T&& val) { 37 | Node* node = pool.allocate(); 38 | node->val = val; 39 | 40 | Node** pp = &head; 41 | while (*pp && **pp < *node) { 42 | pp = &(*pp)->next; 43 | } 44 | node->next = *pp; 45 | *pp = node; 46 | } 47 | 48 | template 49 | void remove_if(Fn&& fn) { 50 | Node** pp = &head; 51 | while (*pp) { 52 | Node* node = *pp; 53 | if (fn(node->val)) { 54 | *pp = node->next; 55 | pool.deallocate(node); 56 | } else { 57 | pp = &(node->next); 58 | } 59 | } 60 | } 61 | 62 | 63 | template 64 | void remove_if_one(Fn&& fn) { 65 | Node** pp = &head; 66 | while (*pp) { 67 | Node* node = *pp; 68 | if (fn(node->val)) { 69 | *pp = node->next; 70 | pool.deallocate(node); 71 | break; 72 | } else { 73 | pp = &(node->next); 74 | } 75 | } 76 | } 77 | 78 | 79 | template 80 | void remove_until(Fn&& fn) { 81 | Node** pp = &head; 82 | while (*pp) { 83 | Node* node = *pp; 84 | bool rm = fn(node->val); 85 | if (!rm) { 86 | break; 87 | } 88 | *pp = node->next; 89 | pool.deallocate(node); 90 | } 91 | } 92 | 93 | bool empty() { 94 | return head == nullptr; 95 | } 96 | }; 97 | -------------------------------------------------------------------------------- /src/datastructures/meson.build: -------------------------------------------------------------------------------- 1 | 2 | 3 | project_headers += files( 4 | 'array_hashmap.hpp', 5 | 'stupid_hashmap.hpp', 6 | 'linked_list.hpp', 7 | ) 8 | 9 | 10 | project_sources += files( 11 | 12 | ) 13 | -------------------------------------------------------------------------------- /src/datastructures/stupid_hashmap.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | 7 | template 8 | struct StupidHashMap { 9 | static_assert(std::is_pointer::value, "T not pointer type"); 10 | static_assert(N && ((N & (N - 1)) == 0), "N not power of 2"); 11 | 12 | ~StupidHashMap() { 13 | print(); // p db.msg_handler->open_futures.print() 14 | } 15 | 16 | struct Entry { 17 | K k; 18 | std::atomic v{nullptr}; 19 | }; 20 | 21 | struct Bucket { 22 | static constexpr auto NUM_SLOTS = 64 / sizeof(Entry); // one cacheline 23 | std::array slots{}; 24 | }; 25 | 26 | std::array ht{}; 27 | 28 | void insert(K key, V val) { 29 | auto& bucket = ht[key % N]; 30 | for (auto& slot : bucket.slots) { 31 | V expected = nullptr; 32 | if (slot.v.compare_exchange_strong(expected, val, std::memory_order_relaxed, std::memory_order_relaxed)) { 33 | slot.k = key; 34 | return; 35 | } 36 | } 37 | throw std::runtime_error("Bucket full."); 38 | } 39 | 40 | auto erase(K key) { 41 | auto& bucket = ht[key % N]; 42 | for (auto& slot : bucket.slots) { 43 | if (slot.k == key) { 44 | return slot.v.exchange(nullptr, std::memory_order_release); 45 | } 46 | } 47 | throw std::runtime_error("Key not found."); 48 | } 49 | 50 | void print() { 51 | for (std::size_t i = 0; i < N; ++i) { 52 | auto& bucket = ht[i]; 53 | for (auto& slot : bucket.slots) { 54 | if (slot.v) { 55 | std::cout << "k=" << slot.k << " v=" << slot.v << '\n'; 56 | } 57 | } 58 | } 59 | } 60 | }; -------------------------------------------------------------------------------- /src/db/buffers.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | 7 | struct BufferWriter { 8 | uint8_t* buffer; 9 | std::size_t size = 0; 10 | 11 | BufferWriter(uint8_t* buffer) : buffer(buffer) {} 12 | 13 | template 14 | auto write(const T& data) { 15 | auto dst = reinterpret_cast(buffer + size); 16 | 17 | // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84212 18 | #pragma GCC diagnostic push 19 | #pragma GCC diagnostic ignored "-Wstringop-overflow" 20 | std::memcpy(dst, &data, sizeof(T)); 21 | #pragma GCC diagnostic pop 22 | 23 | size += sizeof(T); 24 | return dst; 25 | } 26 | }; 27 | 28 | struct BufferReader { 29 | uint8_t* buffer; 30 | 31 | BufferReader(uint8_t* buffer) : buffer(buffer) {} 32 | 33 | template 34 | auto read() { 35 | auto dst = reinterpret_cast(buffer); 36 | buffer += sizeof(T); 37 | return dst; 38 | } 39 | 40 | template 41 | auto peek() { 42 | return reinterpret_cast(buffer); 43 | } 44 | }; -------------------------------------------------------------------------------- /src/db/config.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "comm/comm.hpp" 5 | #include "comm/server.hpp" 6 | #include "db/defs.hpp" 7 | #include "db/util.hpp" 8 | 9 | #include 10 | 11 | 12 | class Config : public HeapSingleton { 13 | friend class HeapSingleton; 14 | 15 | 16 | protected: 17 | Config() = default; 18 | 19 | public: 20 | void parse_cli(int argc, char** argv); 21 | void print(); 22 | 23 | 24 | std::vector servers = {}; 25 | 26 | msg::node_t node_id; 27 | uint32_t num_nodes; 28 | uint32_t num_txn_workers; 29 | msg::node_t switch_id; 30 | uint64_t switch_entries; 31 | 32 | BenchmarkType workload; 33 | uint64_t num_txns; 34 | bool use_switch; 35 | bool verify; 36 | std::string csv_file_cycles{"cycles.csv"}; 37 | 38 | struct YCSB { 39 | uint64_t table_size; 40 | int write_prob; 41 | int remote_prob; 42 | uint64_t hot_size; 43 | int hot_prob; 44 | } ycsb; 45 | 46 | struct Smallbank { 47 | uint64_t table_size; 48 | uint64_t hot_size; 49 | int hot_prob; 50 | int remote_prob; 51 | } smallbank; 52 | 53 | struct TPCC { 54 | uint64_t num_warehouses; 55 | uint64_t num_districts; 56 | uint64_t home_w_id; 57 | int new_order_remote_prob = 1; // default by tpcc-spec ( = 10 for test) 58 | int payment_remote_prob = 15; // default by tpcc-spec 59 | } tpcc; 60 | 61 | struct MicroRecirc { 62 | int recirc_prob; 63 | } micro_recirc; 64 | }; 65 | -------------------------------------------------------------------------------- /src/db/database.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "comm/comm.hpp" 4 | #include "comm/msg_handler.hpp" 5 | #include "table/table.hpp" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | 14 | class Database { 15 | std::vector table_ids; 16 | std::unordered_map table_names; 17 | 18 | public: 19 | std::unique_ptr msg_handler; 20 | std::unique_ptr comm; 21 | 22 | public: 23 | Database() { 24 | comm = std::make_unique(); 25 | 26 | msg_handler = std::make_unique(*this, comm.get()); 27 | 28 | msg_handler->init.wait(); 29 | } 30 | 31 | Database(Database&&) = default; 32 | Database(const Database&) = delete; 33 | 34 | ~Database() { 35 | for (auto& table : table_ids) { 36 | delete table; 37 | } 38 | } 39 | 40 | template 41 | auto make_table(std::string key, Args&&... args) { 42 | if (has_table(key)) { 43 | throw std::logic_error("Table already present in database"); 44 | } 45 | std::cout << "Allocating Table: " << key << '\n'; 46 | auto table = new T{std::forward(args)..., *comm}; 47 | table->id = p4db::table_t{table_ids.size()}; 48 | table->name = key; 49 | table_ids.emplace_back(table); 50 | table_names[key] = table; 51 | return table; 52 | } 53 | 54 | Table* operator[](std::string key) { 55 | return table_names.at(key); 56 | } 57 | Table* operator[](p4db::table_t id) { 58 | return table_ids[id]; 59 | } 60 | bool has_table(std::string name) { 61 | return table_names.find(name) != table_names.end(); 62 | } 63 | 64 | template 65 | void get_casted(std::string key, T*& dest) { 66 | dest = dynamic_cast((*this)[key]); 67 | if (!dest) { 68 | throw error::TableCastFailed(); 69 | } 70 | } 71 | }; 72 | -------------------------------------------------------------------------------- /src/db/defs.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db/types.hpp" 4 | 5 | #include 6 | #include 7 | 8 | 9 | using namespace std::chrono_literals; 10 | 11 | 12 | // constexpr auto CC_SCHEME = CC_Scheme::NONE; 13 | constexpr auto CC_SCHEME = CC_Scheme::NO_WAIT; 14 | // constexpr auto CC_SCHEME = CC_Scheme::WAIT_DIE; 15 | 16 | enum class StatsBitmask : uint64_t { 17 | NONE = 0x00, 18 | COUNTER = 0x01, 19 | CYCLES = 0x02, 20 | PERIODIC = 0x04, 21 | 22 | ALL = 0xffffffffffffffff, 23 | }; 24 | constexpr StatsBitmask operator|(StatsBitmask lhs, StatsBitmask rhs) { 25 | using T = std::underlying_type::type; 26 | return static_cast(static_cast(lhs) | static_cast(rhs)); 27 | } 28 | constexpr bool operator&(StatsBitmask lhs, StatsBitmask rhs) { 29 | using T = std::underlying_type::type; 30 | return static_cast(lhs) & static_cast(rhs); 31 | } 32 | 33 | // constexpr StatsBitmask ENABLED_STATS = StatsBitmask::COUNTER | StatsBitmask::CYCLES | StatsBitmask::PERIODIC; 34 | 35 | constexpr StatsBitmask ENABLED_STATS = StatsBitmask::CYCLES; 36 | constexpr bool STATS_PER_WORKER = false; 37 | constexpr auto STATS_CYCLE_SAMPLE_TIME = 10ms; //100us; 38 | constexpr auto STATS_PERIODIC_SAMPLE_TIME = 500ms; 39 | constexpr auto PERIODIC_CSV_FILENAME = "periodic.csv"; 40 | constexpr auto SINGLE_NUMA = false; 41 | 42 | 43 | namespace error { 44 | 45 | constexpr bool PRINT_ABORT_CAUSE = false; 46 | constexpr bool LOG_TABLE = false; 47 | constexpr bool DUMP_SWITCH_PKTS = false; 48 | 49 | } // namespace error 50 | 51 | // ALL 52 | constexpr bool SWITCH_NO_CONFLICT = false; 53 | constexpr bool LM_ON_SWITCH = false; 54 | constexpr bool YCSB_OPTI_TEST = false; 55 | 56 | // YCSB 57 | // constexpr uint64_t NUM_KVS = 10'000'000; 58 | constexpr int NUM_OPS = 8; 59 | constexpr int MULTI_OP_PERCENTAGE = 100; 60 | constexpr bool YCSB_SORT_ACCESSES = false; 61 | constexpr bool YCSB_MULTI_MIX_RW = true; // set to false for fairness analysis 62 | 63 | // SMALLBANK 64 | constexpr int FREQUENCY_AMALGAMATE = 15; 65 | constexpr int FREQUENCY_BALANCE = 15; 66 | constexpr int FREQUENCY_DEPOSIT_CHECKING = 15; 67 | constexpr int FREQUENCY_SEND_PAYMENT = 25; 68 | constexpr int FREQUENCY_TRANSACT_SAVINGS = 15; 69 | constexpr int FREQUENCY_WRITE_CHECK = 15; 70 | 71 | // constexpr uint64_t NUM_ACCOUNTS = 1'000'000; 72 | 73 | constexpr int MIN_BALANCE = 10000 * 100; // fixed-point instead of float 74 | constexpr int MAX_BALANCE = 50000 * 100; 75 | 76 | // TPCC 77 | // constexpr uint64_t NUM_WAREHOUSES = 1; 78 | // constexpr uint64_t HOME_W_ID = 0; 79 | // constexpr uint64_t NUM_DISTRICTS = NUM_WAREHOUSES*DISTRICTS_PER_WAREHOUSE; 80 | 81 | constexpr uint64_t DISTRICTS_PER_WAREHOUSE = 10; 82 | constexpr uint64_t CUSTOMER_PER_DISTRICT = 3000; 83 | constexpr uint64_t NUM_ITEMS = 100'000; 84 | 85 | // How many orders contains each NewOrder Transaction? 86 | constexpr uint64_t ORDER_CNT_MIN = 5; 87 | // constexpr uint64_t ORDER_CNT_MAX = 15; 88 | constexpr uint64_t ORDER_CNT_MAX = 10; 89 | 90 | // access remote warehouse 1% 91 | // constexpr int ORDER_REMOTE_WH_PROB = 1; 92 | // constexpr int ORDER_REMOTE_WH_PROB = 10; 93 | 94 | // whether paying customer is from remote wh and random district 95 | // constexpr int PAYMENT_REMOTE_PROB = 15; 96 | 97 | // NewOrder: 45% Payment: 43% 98 | constexpr int FREQUENCY_NEW_ORDER = 51; 99 | // constexpr int FREQUENCY_NEW_ORDER = 100; -------------------------------------------------------------------------------- /src/db/errors.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | 6 | namespace error { 7 | 8 | 9 | struct TableFull : public std::exception { 10 | const char* what() const noexcept { 11 | return "Table is full, allocate more memory"; 12 | } 13 | }; 14 | struct TableCastFailed : public std::exception { 15 | const char* what() const noexcept { 16 | return "Could not dynamic_cast table to type"; 17 | } 18 | }; 19 | struct InvalidAccessMode : public std::exception { 20 | const char* what() const noexcept { 21 | return "Invalid Access mode requested"; 22 | } 23 | }; 24 | struct FutureException : public std::exception { 25 | const char* what() const noexcept { 26 | return "Exception within Future"; 27 | } 28 | }; 29 | struct UndoException : public std::exception { 30 | const char* what() const noexcept { 31 | return "Exception within Undo"; 32 | } 33 | }; 34 | struct PacketBufferTooSmall : public std::exception { 35 | const char* what() const noexcept { 36 | return "PacketBuffer too small"; 37 | } 38 | }; 39 | struct SerializerReadCmp : public std::exception { 40 | const char* what() const noexcept { 41 | return "SerializerReadCmp got wrong value"; 42 | } 43 | }; 44 | 45 | 46 | } // namespace error 47 | 48 | 49 | enum class [[nodiscard]] ErrorCode{ 50 | SUCCESS = 0, 51 | 52 | READ_LOCK_FAILED, 53 | WRITE_LOCK_FAILED, 54 | INVALID_ROW_ID, 55 | INVALID_ACCESS_MODE, 56 | }; 57 | 58 | inline bool operator!(ErrorCode e) { 59 | return e != ErrorCode::SUCCESS; 60 | } -------------------------------------------------------------------------------- /src/db/future.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "comm/comm.hpp" 5 | #include "db/errors.hpp" 6 | 7 | #include 8 | #include 9 | 10 | 11 | struct AbstractFuture { 12 | std::atomic pkt{nullptr}; // msg::TupleGetRes 13 | 14 | void set_pkt(Communicator::Pkt_t* pkt) { 15 | this->pkt.store(pkt, std::memory_order_release); 16 | } 17 | 18 | auto get_pkt() { 19 | Communicator::Pkt_t* pkt; 20 | // Wait for pkt without generating cache misses 21 | while (!(pkt = this->pkt.load(std::memory_order_relaxed))) { 22 | __builtin_ia32_pause(); 23 | } 24 | return pkt; 25 | } 26 | }; 27 | 28 | 29 | template 30 | struct TupleFuture final : public AbstractFuture { 31 | static inline Tuple_t* EXCEPTION = reinterpret_cast(0xffffffff'ffffffff); 32 | 33 | // Tuple_t* tuple; 34 | std::atomic tuple{nullptr}; 35 | // char __cache_align[64-16]; 36 | 37 | TupleFuture() : AbstractFuture{}, tuple(nullptr) {} 38 | TupleFuture(Tuple_t* tuple) : AbstractFuture{}, tuple(tuple) {} 39 | 40 | // not threadsafe 41 | Tuple_t* get() { 42 | if (tuple == EXCEPTION) [[unlikely]] { // fast path 43 | return nullptr; 44 | } else if (tuple) [[likely]] { 45 | return tuple; 46 | } 47 | 48 | return wait(); 49 | } 50 | 51 | private: 52 | Tuple_t* wait() { 53 | while (true) { 54 | if (auto pkt = this->pkt.load(std::memory_order_relaxed)) [[likely]] { 55 | auto res = pkt->as(); 56 | if (res->mode == AccessMode::INVALID) [[unlikely]] { 57 | tuple = EXCEPTION; 58 | pkt->free(); 59 | return nullptr; 60 | } 61 | tuple = reinterpret_cast(res->tuple); 62 | return tuple; 63 | } 64 | 65 | if (tuple == EXCEPTION) [[unlikely]] { 66 | return nullptr; 67 | } else if (tuple) [[likely]] { 68 | return tuple; 69 | } 70 | 71 | __builtin_ia32_pause(); 72 | } 73 | } 74 | }; 75 | 76 | 77 | template 78 | struct SwitchFuture final : public AbstractFuture { 79 | Parse_fn parse_fn; 80 | 81 | SwitchFuture(Parse_fn&& parse_fn) 82 | : AbstractFuture{}, parse_fn(parse_fn) {} 83 | 84 | const auto get() { // can be only called once 85 | auto pkt = get_pkt(); 86 | auto ret = parse_fn(pkt); 87 | pkt->free(); 88 | return ret; 89 | } 90 | }; 91 | -------------------------------------------------------------------------------- /src/db/hex_dump.cpp: -------------------------------------------------------------------------------- 1 | #include "hex_dump.hpp" 2 | 3 | #include 4 | 5 | 6 | std::ostream& render_printable_chars(std::ostream& os, const char* buffer, size_t bufsize) { 7 | os << " | "; 8 | for (size_t i = 0; i < bufsize; ++i) { 9 | if (std::isprint(buffer[i])) { 10 | os << buffer[i]; 11 | } else { 12 | os << "."; 13 | } 14 | } 15 | return os; 16 | } 17 | 18 | std::ostream& hex_dump(std::ostream& os, const uint8_t* buffer, size_t bufsize, bool showPrintableChars) { 19 | auto oldFormat = os.flags(); 20 | auto oldFillChar = os.fill(); 21 | 22 | os << std::hex; 23 | os.fill('0'); 24 | bool printBlank = false; 25 | size_t i = 0; 26 | for (; i < bufsize; ++i) { 27 | if (i % 8 == 0) { 28 | if (i != 0 && showPrintableChars) { 29 | render_printable_chars(os, reinterpret_cast(&buffer[i] - 8), 8); 30 | } 31 | os << std::endl; 32 | printBlank = false; 33 | } 34 | if (printBlank) { 35 | os << ' '; 36 | } 37 | os << std::setw(2) << std::right << unsigned(buffer[i]); 38 | if (!printBlank) { 39 | printBlank = true; 40 | } 41 | } 42 | if (i % 8 != 0 && showPrintableChars) { 43 | for (size_t j = 0; j < 8 - (i % 8); ++j) { 44 | os << " "; 45 | } 46 | render_printable_chars(os, reinterpret_cast(&buffer[i] - (i % 8)), (i % 8)); 47 | } else if (i > 0 && showPrintableChars) { 48 | render_printable_chars(os, reinterpret_cast(&buffer[i] - 8), 8); 49 | } 50 | 51 | os << std::endl; 52 | 53 | os.fill(oldFillChar); 54 | os.flags(oldFormat); 55 | 56 | return os; 57 | } -------------------------------------------------------------------------------- /src/db/hex_dump.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | std::ostream& render_printable_chars(std::ostream& os, const char* buffer, 6 | size_t bufsize); 7 | 8 | std::ostream& hex_dump(std::ostream& os, const uint8_t* buffer, size_t bufsize, 9 | bool showPrintableChars = true); -------------------------------------------------------------------------------- /src/db/meson.build: -------------------------------------------------------------------------------- 1 | 2 | 3 | project_headers += files( 4 | 'buffers.hpp', 5 | 'config.hpp', 6 | 'database.hpp', 7 | 'defs.hpp', 8 | 'errors.hpp', 9 | 'future.hpp', 10 | 'hex_dump.hpp', 11 | 'mempools.hpp', 12 | 'spinlock.hpp', 13 | 'transaction.hpp', 14 | 'ts_factory.hpp', 15 | 'types.hpp', 16 | 'undolog.hpp', 17 | 'util.hpp', 18 | ) 19 | 20 | 21 | project_sources += files ( 22 | 'config.cpp', 23 | 'hex_dump.cpp', 24 | 'undolog.cpp', 25 | 'util.cpp', 26 | ) 27 | -------------------------------------------------------------------------------- /src/db/spinlock.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class SpinLock { 6 | // TODO For the Exclusive-Spinlock, we use a 64-bit atomic, as we saw 2-3x better throughput in micro-benchmarks compared to a single byte implementation 7 | alignas(64) std::atomic lock_ = {0}; 8 | 9 | public: 10 | void lock() noexcept { 11 | for (;;) { 12 | // Optimistically assume the lock is free on the first try 13 | if (!lock_.exchange(true, std::memory_order_acquire)) { 14 | return; 15 | } 16 | // Wait for lock to be released without generating cache misses 17 | while (lock_.load(std::memory_order_relaxed)) { 18 | // Issue X86 PAUSE or ARM YIELD instruction to reduce contention 19 | // between hyper-threads 20 | __builtin_ia32_pause(); 21 | } 22 | } 23 | } 24 | 25 | bool try_lock() noexcept { 26 | // First do a relaxed load to check if lock is free in order to prevent 27 | // unnecessary cache misses if someone does while(!try_lock()) 28 | return !lock_.load(std::memory_order_relaxed) && 29 | !lock_.exchange(true, std::memory_order_acquire); 30 | } 31 | 32 | void unlock() noexcept { lock_.store(false, std::memory_order_release); } 33 | }; 34 | -------------------------------------------------------------------------------- /src/db/ts_factory.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "types.hpp" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | struct ClockTimestampFactory { 11 | using clock = std::chrono::high_resolution_clock; 12 | 13 | clock::time_point start = clock::now(); 14 | 15 | ClockTimestampFactory() { 16 | // std::stringstream ss; 17 | // ss << "start_ts=" << get() << '\n'; 18 | // std::cout << ss.str(); 19 | } 20 | 21 | timestamp_t get() { 22 | uint64_t ts = std::chrono::duration_cast(clock::now() - start).count(); 23 | return timestamp_t{ts}; 24 | } 25 | }; 26 | 27 | struct UniqueClockTimestampFactory { 28 | using clock = std::chrono::high_resolution_clock; 29 | 30 | clock::time_point start = clock::now(); 31 | uint64_t mask; 32 | 33 | UniqueClockTimestampFactory() { 34 | auto& config = Config::instance(); 35 | mask = (config.node_id << 8) | WorkerContext::get().tid; 36 | 37 | // std::stringstream ss; 38 | // ss << "start_ts=" << get() << " mask: " << mask << '\n'; 39 | // std::cout << ss.str(); 40 | } 41 | 42 | timestamp_t get() { 43 | uint64_t ts = std::chrono::duration_cast(clock::now() - start).count(); 44 | return timestamp_t{(ts << 16) | mask}; // 2^48 ns -> 3.25781223 days 45 | } 46 | }; 47 | 48 | 49 | struct AtomicTimestampFactory { 50 | static inline std::atomic cntr{1}; 51 | 52 | timestamp_t get() { 53 | uint64_t ts = cntr.fetch_add(1); 54 | return timestamp_t{ts}; 55 | } 56 | }; 57 | 58 | 59 | // using TimestampFactory = AtomicTimestampFactory; 60 | // using TimestampFactory = ClockTimestampFactory; 61 | using TimestampFactory = UniqueClockTimestampFactory; -------------------------------------------------------------------------------- /src/db/types.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | // enum class AccessMode : uint32_t { 10 | // INVALID = 0x00000000, 11 | // READ = 0x00000001, 12 | // WRITE = 0x00000002, 13 | // READ_GRANTED = 0x0000ff01, 14 | // WRITE_GRANTED = 0x0000ff02, 15 | // }; 16 | 17 | struct AccessMode { 18 | using value_t = uint32_t; 19 | 20 | static constexpr value_t INVALID = 0x00000000; 21 | static constexpr value_t READ = 0x00000001; 22 | static constexpr value_t WRITE = 0x00000002; 23 | 24 | value_t value; 25 | 26 | constexpr AccessMode() : value(AccessMode::INVALID) {} 27 | 28 | constexpr AccessMode(value_t value) : value(value) {} 29 | 30 | operator value_t() const { 31 | return get_clean(); 32 | } 33 | 34 | bool operator==(const value_t& rhs) const { 35 | return value == rhs; 36 | } 37 | 38 | // bool operator==(const AccessMode& rhs) const { 39 | // return value == rhs.value; 40 | // } 41 | 42 | value_t get_clean() const { 43 | return value & 0x000000ff; 44 | } 45 | 46 | bool by_switch() const { 47 | return (value >> 8) & 0xff; 48 | } 49 | 50 | void set_switch_index(uint16_t idx) { 51 | value |= static_cast(__builtin_bswap16(idx)) << 16; 52 | value |= 0x0000aa00; 53 | } 54 | }; 55 | 56 | 57 | struct datetime_t { 58 | uint64_t value; 59 | 60 | static datetime_t now() { 61 | const auto p1 = std::chrono::system_clock::now(); 62 | using duration = std::chrono::duration; 63 | const auto ts = std::chrono::duration_cast(p1.time_since_epoch()).count(); 64 | return datetime_t{ts}; 65 | } 66 | 67 | operator uint64_t() const { 68 | return value; 69 | } 70 | }; 71 | 72 | 73 | namespace p4db { 74 | 75 | 76 | struct table_t { 77 | uint64_t value; 78 | operator uint64_t() const { 79 | return value; 80 | } 81 | }; 82 | static_assert(std::is_trivial::value, "table_t is not a POD"); 83 | 84 | struct key_t { 85 | uint64_t value; 86 | operator uint64_t() const { 87 | return value; 88 | } 89 | }; 90 | static_assert(std::is_trivial::value, "key_t is not a POD"); 91 | 92 | 93 | } // namespace p4db 94 | 95 | 96 | struct timestamp_t { 97 | uint64_t value; 98 | operator uint64_t() const { 99 | return value; 100 | } 101 | }; 102 | 103 | 104 | enum class BenchmarkType { 105 | YCSB, 106 | SMALLBANK, 107 | TPCC, 108 | MICRO_RECIRC, 109 | }; 110 | inline std::istream& operator>>(std::istream& is, BenchmarkType& type) { 111 | std::string s; 112 | is >> s; 113 | if (s == "ycsb") { 114 | type = BenchmarkType::YCSB; 115 | } else if (s == "smallbank") { 116 | type = BenchmarkType::SMALLBANK; 117 | } else if (s == "tpcc") { 118 | type = BenchmarkType::TPCC; 119 | } else if (s == "micro_recirc") { 120 | type = BenchmarkType::MICRO_RECIRC; 121 | } else { 122 | throw std::invalid_argument("Could not parse BenchmarkType."); 123 | } 124 | return is; 125 | } 126 | 127 | 128 | enum class CC_Scheme { 129 | NO_WAIT, 130 | WAIT_DIE, 131 | NONE, 132 | }; 133 | inline std::ostream& operator<<(std::ostream& os, const CC_Scheme& scheme) { 134 | switch (scheme) { 135 | case CC_Scheme::NO_WAIT: 136 | os << "no_wait"; 137 | break; 138 | case CC_Scheme::WAIT_DIE: 139 | os << "wait_die"; 140 | break; 141 | case CC_Scheme::NONE: 142 | os << "none"; 143 | break; 144 | } 145 | return os; 146 | } -------------------------------------------------------------------------------- /src/db/undolog.cpp: -------------------------------------------------------------------------------- 1 | #include "undolog.hpp" 2 | 3 | #include "comm/msg_handler.hpp" 4 | 5 | 6 | /* Private methods */ 7 | 8 | void Undolog::clear(const timestamp_t ts) { 9 | for (auto& action : actions) { 10 | action->clear(comm, tid, ts); 11 | } 12 | pool.clear(); 13 | actions.clear(); 14 | comm->handler->putresponses.wait(tid); // wait for all remote responses 15 | } 16 | 17 | 18 | void Undolog::clear_last_n(const timestamp_t ts, const size_t n) { 19 | if (actions.size() < n) { 20 | throw std::runtime_error("tried clearing more in undolog than that is there..."); 21 | } 22 | for (size_t i = 0; i < n; i++) { 23 | auto action = actions.back(); 24 | action->clear(comm, tid, ts); 25 | actions.pop_back(); 26 | } 27 | // putresponses += 1 on remote.clear() 28 | comm->handler->putresponses.wait(tid); // wait for all remote responses 29 | } -------------------------------------------------------------------------------- /src/db/undolog_novirtual.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db/mempools.hpp" 4 | 5 | 6 | // without virtual, is somehow slower 7 | template 8 | struct WriteVal { 9 | Table_t* table; 10 | p4db::key_t index; 11 | 12 | WriteVal(Table_t* table, p4db::key_t index) 13 | : table(table), index(index) {} 14 | 15 | void clear() { 16 | table->template unlock(index); 17 | } 18 | }; 19 | template 20 | struct ReadVal { 21 | Table_t* table; 22 | p4db::key_t index; 23 | 24 | ReadVal(Table_t* table, p4db::key_t index) : table(table), index(index) {} 25 | 26 | void clear() { 27 | table->template unlock(index); 28 | } 29 | }; 30 | 31 | 32 | template 33 | struct Undolog { 34 | using Action = typename Tables_t::wrap2::apply; 35 | std::vector actions; 36 | 37 | 38 | template 39 | void add_write(Table_t* table, p4db::key_t index) { 40 | actions.emplace_back(WriteVal{table, index}); 41 | } 42 | 43 | template 44 | void add_read(Table_t* table, p4db::key_t index) { 45 | actions.emplace_back(ReadVal{table, index}); 46 | } 47 | 48 | void commit() { 49 | clear(); 50 | } 51 | 52 | void rollback() { 53 | clear(); 54 | } 55 | 56 | private: 57 | void clear() { 58 | for (auto& action : actions) { 59 | std::visit([](auto& action) { 60 | action.clear(); 61 | }, 62 | action); 63 | } 64 | actions.clear(); 65 | } 66 | }; -------------------------------------------------------------------------------- /src/db/util.cpp: -------------------------------------------------------------------------------- 1 | #include "util.hpp" 2 | 3 | #include "stats/context.hpp" 4 | 5 | #include 6 | 7 | 8 | void pin_worker(uint32_t core, pthread_t pid /*= pthread_self()*/) { 9 | WorkerContext::get().tid = core; 10 | core += 2; // make space for dpdk main and receiver thread 11 | 12 | constexpr auto NUM_SOCKETS = 2; 13 | constexpr auto NUM_HYPERTHREADS = 2; 14 | static const auto cpu_map = []() { 15 | std::vector map; 16 | 17 | // socket 0: real-cores , hyper threads, then socket 1: 18 | auto threads = std::thread::hardware_concurrency(); 19 | if (threads == 0) { 20 | throw std::runtime_error("std::thread::hardware_concurrency() failed."); 21 | } 22 | map.reserve(threads); 23 | 24 | auto per_socket = threads / NUM_SOCKETS; 25 | auto real_cores = per_socket / NUM_HYPERTHREADS; 26 | for (auto socket = 0; socket < NUM_SOCKETS; ++socket) { 27 | for (auto i = socket * real_cores; i < (socket + 1) * real_cores; ++i) { 28 | map.emplace_back(i); 29 | } 30 | for (auto i = (NUM_SOCKETS + socket) * real_cores; i < (NUM_SOCKETS + socket + 1) * real_cores; ++i) { 31 | map.emplace_back(i); 32 | } 33 | } 34 | 35 | if constexpr (SINGLE_NUMA) { 36 | map.resize(map.size() / 2); 37 | } 38 | 39 | std::stringstream ss; 40 | ss << "CPU MAP:\n"; 41 | for (auto& c : map) { 42 | ss << c << ' '; 43 | } 44 | ss << '\n'; 45 | std::cout << ss.str(); 46 | 47 | return map; 48 | }(); 49 | 50 | 51 | cpu_set_t mask; 52 | CPU_ZERO(&mask); 53 | CPU_SET(cpu_map.at(core % cpu_map.size()), &mask); 54 | 55 | if (core >= cpu_map.size()) { 56 | std::cout << "WARNING more than one pinned thread per core!\n"; 57 | } 58 | 59 | // (void) pid; 60 | if (pthread_setaffinity_np(pid, sizeof(cpu_set_t), &mask) != 0) { 61 | std::perror("pthread_setaffinity_np"); 62 | } 63 | } 64 | 65 | 66 | // formatting of bytes 67 | 68 | std::string stringifyFraction(const uint64_t num, const unsigned den, const unsigned precision) { 69 | constexpr unsigned base = 10; 70 | 71 | // prevent division by zero if necessary 72 | if (den == 0) { 73 | return "inf"; 74 | } 75 | 76 | // integral part can be computed using regular division 77 | std::string result = std::to_string(num / den); 78 | 79 | // perform first step of long division 80 | // also cancel early if there is no fractional part 81 | unsigned tmp = num % den; 82 | if (tmp == 0 || precision == 0) { 83 | return result; 84 | } 85 | 86 | // reserve characters to avoid unnecessary re-allocation 87 | result.reserve(result.size() + precision + 1); 88 | 89 | // fractional part can be computed using long divison 90 | result += '.'; 91 | for (size_t i = 0; i < precision; ++i) { 92 | tmp *= base; 93 | char nextDigit = '0' + static_cast(tmp / den); 94 | result.push_back(nextDigit); 95 | tmp %= den; 96 | } 97 | 98 | return result; 99 | } 100 | -------------------------------------------------------------------------------- /src/declustered_layout/.gitignore: -------------------------------------------------------------------------------- 1 | *.d 2 | *.o 3 | main 4 | 5 | *.txt 6 | *.tar.gz 7 | *.gz 8 | *.dot 9 | *.png 10 | 11 | rmaxcut/ 12 | MQLib/ 13 | FaspHeuristic/ 14 | Feedback-Arc-Set/ 15 | -------------------------------------------------------------------------------- /src/declustered_layout/Makefile: -------------------------------------------------------------------------------- 1 | CXX=g++ 2 | CFLAGS=-std=c++2a -O0 -g -MD -Wall -Wextra -fmax-errors=1 -I. -I../../src/ -I./MQLib/include/ 3 | # CFLAGS=-std=c++2a -O3 -g -MD -Wall -Wextra -fmax-errors=1 -I. -I../../src 4 | LIBS=-pthread -L./MQLib/bin -l:MQLib.a -fsanitize=address 5 | 6 | 7 | #OBJS = test.o declustered_layout.o transaction.o 8 | SRCS := $(wildcard *.cpp) 9 | OBJS := $(patsubst %.cpp,%.o,$(SRCS)) 10 | 11 | 12 | %.o: %.cpp #$(DEPS) 13 | $(CXX) -c -o $@ $< $(CFLAGS) 14 | 15 | test: $(OBJS) 16 | $(CXX) -o $@ $^ $(CFLAGS) $(LIBS) 17 | 18 | 19 | .PHONY: clean 20 | 21 | clean: 22 | rm -f *.o *.d *~ test *.dot *.png 23 | 24 | -include $(OBJS:.o=.d) 25 | -------------------------------------------------------------------------------- /src/declustered_layout/declustered_layout.cpp: -------------------------------------------------------------------------------- 1 | #include "declustered_layout.hpp" 2 | 3 | #include "dotwriter.hpp" 4 | #include "graph_maxcut.hpp" 5 | #include "graph_toposort.hpp" 6 | 7 | 8 | namespace declustered_layout { 9 | 10 | 11 | void DeclusteredLayout::add_sample(Transaction& txn) { 12 | const std::size_t N = txn.accesses.size(); 13 | for (size_t i = 0; i < N; i++) { 14 | for (size_t j = i; j < N; j++) { 15 | if (i == j) { 16 | continue; 17 | } 18 | g.add_undirected_edge(txn.accesses[i], txn.accesses[j], txn.repeats); 19 | } 20 | } 21 | 22 | for (auto& dep : txn.deps) { 23 | g.add_directed_edge(dep.tid1, dep.tid2, txn.repeats); 24 | } 25 | } 26 | 27 | void DeclusteredLayout::compute_layout(bool topo_sort, bool write_dot) { 28 | g.print_stats(); 29 | auto part = GraphMaxCut::part(g, GraphMaxCut::RMAXCUT, PARTITIONS); 30 | // auto part = GraphMaxCut::part(g, GraphMaxCut::MQLIB, PARTITIONS); 31 | part.print_stats(); 32 | 33 | GraphTopoSort gts; 34 | gts.setup(g, part); 35 | if (write_dot) { 36 | DotWriter::write_directed("graph_topo.dot", gts.topo).render(); 37 | } 38 | 39 | // actually only needed when we have dependencies, e.g. SmallBank 40 | if (topo_sort) { 41 | auto ordering = gts.topo_sort(GraphTopoSort::TIGHT); 42 | // auto ordering = gts.topo_sort(GraphTopoSort::FAS); // some bug where topo graph has < 8 nodes 43 | // auto ordering = gts.topo_sort(GraphTopoSort::RANDOM); 44 | 45 | if (write_dot) { 46 | DotWriter::write_directed("graph_topo_dag.dot", gts.topo).render(); // contains DAG 47 | } 48 | 49 | // part.print(); 50 | std::cout << "ordering=" << ordering << '\n'; 51 | for (auto& [tid, pid] : part.map) { 52 | pid = ordering[pid]; 53 | } 54 | // part.print(); 55 | } 56 | 57 | if (write_dot) { 58 | DotWriter::write("graph_1.dot", g, part).render(); 59 | DotWriter::write_clustered("graph_2.dot", g, part).render(); 60 | } 61 | 62 | 63 | std::array reg_fill{}; 64 | for (auto& [tid, pid] : part.map) { 65 | TupleLocation tl; 66 | tl.stage_id = pid; 67 | tl.reg_array_id = 0; // unused 0-4 68 | tl.reg_array_idx = reg_fill[tl.stage_id * REGS_PER_STAGE + tl.reg_array_id]++; 69 | tl.lock_bit = 0; // unsued 0 | 1 70 | 71 | 72 | if (!(tl.stage_id < STAGES)) { 73 | throw std::runtime_error("tl.stage_id >= STAGES"); 74 | } 75 | if (!(tl.reg_array_id < REGS_PER_STAGE)) { 76 | throw std::runtime_error("tl.reg_array_id >= REGS_PER_STAGE"); 77 | } 78 | if (!(tl.reg_array_idx < REG_SIZE)) { 79 | std::cout << "idx=" << tl.reg_array_idx << "\n"; 80 | throw std::runtime_error("tl.reg_array_idx >= REG_SIZE"); 81 | } 82 | 83 | 84 | // We partition the register-array internally by #lock-bits using Maxcut 85 | // then assign each tuple within a register-array 0 or 1 as lock-bit 86 | // this lock-bit distribution does NOT have to be uniform. 87 | 88 | switch_tuples[tid] = tl; 89 | } 90 | } 91 | 92 | bool DeclusteredLayout::is_hot(uint64_t idx) const { 93 | return switch_tuples.find(idx) != switch_tuples.end(); 94 | } 95 | 96 | const TupleLocation& DeclusteredLayout::get_location(const uint64_t idx) const { 97 | return switch_tuples.at(idx); 98 | } 99 | 100 | void DeclusteredLayout::clear() { 101 | switch_tuples.clear(); 102 | // g.clear(); 103 | } 104 | 105 | void DeclusteredLayout::print() { 106 | for (auto& [tid, tl] : switch_tuples) { 107 | std::cout << "tuple[" << tid << "]=" << tl << '\n'; 108 | } 109 | } 110 | 111 | 112 | } // namespace declustered_layout -------------------------------------------------------------------------------- /src/declustered_layout/declustered_layout.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "graph.hpp" 4 | #include "transaction.hpp" 5 | #include "tuple_location.hpp" 6 | 7 | #include 8 | #include 9 | 10 | 11 | namespace declustered_layout { 12 | 13 | 14 | struct DeclusteredLayout { 15 | // Intel confidential 16 | static constexpr auto STAGES = 42; 17 | static constexpr auto REGS_PER_STAGE = 1; 18 | static constexpr auto REG_SIZE = 42; 19 | static constexpr auto LOCK_BITS = 2; 20 | static constexpr auto PARTITIONS = STAGES * REGS_PER_STAGE; 21 | static constexpr auto MAX_ACCESSES = 10; 22 | 23 | 24 | Graph g; 25 | std::unordered_map switch_tuples; 26 | 27 | void add_sample(Transaction& txn); 28 | 29 | void compute_layout(bool topo_sort, bool write_dot); 30 | 31 | bool is_hot(uint64_t idx) const; 32 | 33 | const TupleLocation& get_location(uint64_t idx) const; 34 | 35 | void clear(); 36 | 37 | void print(); 38 | }; 39 | 40 | 41 | } // namespace declustered_layout 42 | -------------------------------------------------------------------------------- /src/declustered_layout/dotwriter.cpp: -------------------------------------------------------------------------------- 1 | #include "dotwriter.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | namespace declustered_layout { 10 | 11 | 12 | void DotWriter::Render::render() { 13 | std::stringstream ss; 14 | ss << "dot -Gnewrank=true -Tpng " << filename << " -o " << filename << ".png"; 15 | int rc = std::system(ss.str().c_str()); 16 | if (rc != 0) { 17 | throw std::runtime_error("DotWriter.Render failed"); 18 | } 19 | } 20 | 21 | DotWriter::Render DotWriter::write(std::string filename, Graph& g, Partitioning& part) { 22 | std::ofstream file; 23 | file.open(filename); 24 | file << "strict graph {\n"; 25 | 26 | std::set nodes; 27 | for (auto& [e, w] : g.undirected_ewgts) { 28 | auto u = g.nid_lut.rev(e.u); 29 | auto v = g.nid_lut.rev(e.v); 30 | nodes.insert(v); 31 | nodes.insert(u); 32 | 33 | file << u << " -- " << v << " [label=\"" << w << "\",weight=" 34 | << w << ",penwidth=" << 1 << "];\n"; 35 | } 36 | 37 | for (auto& node : nodes) { 38 | auto pid = part.get(node); 39 | file << node << " [color=" << colors.at(pid % colors.size()) << "];\n"; 40 | } 41 | 42 | file << "}\n"; 43 | file.close(); 44 | return Render{filename}; 45 | } 46 | 47 | DotWriter::Render DotWriter::write_directed(std::string filename, Graph& g) { 48 | std::ofstream file; 49 | file.open(filename); 50 | file << "digraph {\n"; 51 | 52 | for (auto& [e, w] : g.directed_ewgts) { 53 | auto u = g.nid_lut.rev(e.u); 54 | auto v = g.nid_lut.rev(e.v); 55 | 56 | file << u << " -> " << v << " [label=\"" << w << "\",weight=" << w 57 | << ",penwidth=" << 1 << "];\n"; 58 | } 59 | 60 | file << "}\n"; 61 | file.close(); 62 | return Render{filename}; 63 | } 64 | 65 | DotWriter::Render DotWriter::write_clustered(std::string filename, Graph& g, Partitioning& part) { 66 | std::ofstream file; 67 | file.open(filename); 68 | file << "graph {\n"; 69 | // file << "splines=line;\n"; 70 | 71 | std::set nodes; 72 | for (auto& [e, w] : g.undirected_ewgts) { 73 | auto u = g.nid_lut.rev(e.u); 74 | auto v = g.nid_lut.rev(e.v); 75 | nodes.insert(v); 76 | nodes.insert(u); 77 | 78 | if (part.get(u) == part.get(v)) { 79 | file << u << " -- " << v << " [label=\"" << w 80 | << "\",weight=" << w << ",penwidth=2.0,color=orange];\n"; 81 | continue; 82 | } else { 83 | file << u << " -- " << v << " [label=\"" << w 84 | << "\",weight=" << w 85 | << ",style=dashed,color=grey,fontcolor=grey];\n"; 86 | continue; 87 | } 88 | 89 | file << u << " -- " << v << " [label=\"" << w << "\",weight=\"" << w 90 | << "\",penwidth=\"" << 1 << "\"];\n"; 91 | } 92 | 93 | for (size_t i = 0; i < part.parts; ++i) { 94 | file << "subgraph cluster_" << i << "{\n"; 95 | file << "label=<Part " << i << ">\n"; 96 | file << "color=lightgrey\n"; 97 | file << "style=\"rounded,filled\"\n"; 98 | for (auto& node : nodes) { 99 | auto pid = part.get(node); 100 | if (pid != i) { 101 | continue; 102 | } 103 | file << node << " [color=" << colors.at(pid % colors.size()) 104 | << "];\n"; 105 | } 106 | file << "}\n\n"; 107 | } 108 | 109 | file << "}\n"; 110 | file.close(); 111 | return Render{filename}; 112 | } 113 | 114 | 115 | } // namespace declustered_layout -------------------------------------------------------------------------------- /src/declustered_layout/dotwriter.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "graph.hpp" 4 | #include "partitioning.hpp" 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | namespace declustered_layout { 12 | 13 | 14 | struct DotWriter { 15 | // https://graphs.grevian.org/example 16 | // dot -Tpng example.dot -o example.png 17 | 18 | static constexpr std::array colors = {"red", "blue", "green", 19 | "yellow", "pink", "lightblue", 20 | "forestgreen", "brown"}; 21 | 22 | struct Render { 23 | std::string filename; 24 | 25 | void render(); 26 | }; 27 | 28 | static Render write(std::string filename, Graph& g, Partitioning& part); 29 | 30 | static Render write_directed(std::string filename, Graph& g); 31 | 32 | static Render write_clustered(std::string filename, Graph& g, Partitioning& part); 33 | }; 34 | 35 | 36 | } // namespace declustered_layout 37 | -------------------------------------------------------------------------------- /src/declustered_layout/graph.cpp: -------------------------------------------------------------------------------- 1 | #include "graph.hpp" 2 | 3 | 4 | namespace declustered_layout { 5 | 6 | 7 | void Graph::add_undirected_edge(uint64_t t1, uint64_t t2, uint64_t wgt) { 8 | auto edge = Edge::undirected(nid_lut.get(t1), nid_lut.get(t2)); 9 | undirected_ewgts[edge] += wgt; 10 | } 11 | 12 | Edge Graph::add_directed_edge(uint64_t t1, uint64_t t2, uint64_t wgt) { 13 | auto edge = Edge::directed(nid_lut.get(t1), nid_lut.get(t2)); 14 | directed_ewgts[edge] += wgt; 15 | return edge; 16 | } 17 | 18 | void Graph::remove_directed_edge(Edge& e) { 19 | auto it = directed_ewgts.find(e); 20 | if (it == directed_ewgts.end()) { 21 | throw std::runtime_error("Could not find directed edge to delete"); 22 | } 23 | directed_ewgts.erase(it); 24 | } 25 | 26 | void Graph::add_dangling(uint64_t t) { 27 | dangling.insert(nid_lut.get(t)); 28 | } 29 | 30 | void Graph::print_stats() { 31 | uint64_t max_weight = 0; 32 | for (auto& [e, w] : undirected_ewgts) { 33 | max_weight = std::max(max_weight, w); 34 | } 35 | std::cout << "undirected:\n"; 36 | std::cout << "\t#nodes=" << nid_lut.current_node_id << "\n"; 37 | std::cout << "\t#edges=" << undirected_ewgts.size() << "\n"; 38 | std::cout << "\tmax_weight=" << max_weight << "\n"; 39 | } 40 | 41 | void Graph::clear() { 42 | nid_lut.clear(); 43 | undirected_ewgts.clear(); 44 | directed_ewgts.clear(); 45 | dangling.clear(); 46 | } 47 | 48 | } // namespace declustered_layout -------------------------------------------------------------------------------- /src/declustered_layout/graph.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db/util.hpp" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | 17 | namespace declustered_layout { 18 | 19 | 20 | template 21 | struct IncrementalLUT { 22 | std::unordered_map map; 23 | std::unordered_map mapr; 24 | To current_node_id = 0; 25 | 26 | To get(From tuple_id) { 27 | if (!map.contains(tuple_id)) { 28 | mapr[current_node_id] = tuple_id; 29 | map[tuple_id] = current_node_id; 30 | current_node_id++; 31 | } 32 | return map.at(tuple_id); 33 | } 34 | 35 | From rev(To node_id) const { return mapr.at(node_id); } 36 | 37 | void clear() { 38 | current_node_id = 0; 39 | map.clear(); 40 | mapr.clear(); 41 | } 42 | }; 43 | 44 | 45 | struct Edge { 46 | uint64_t u; 47 | uint64_t v; 48 | 49 | static auto undirected(uint64_t u, uint64_t v) { 50 | return Edge{std::min(u, v), std::max(u, v)}; 51 | } 52 | 53 | static auto directed(uint64_t u, uint64_t v) { return Edge{u, v}; } 54 | 55 | auto reverse() const { return Edge{v, u}; } 56 | 57 | friend bool operator==(const Edge& e1, const Edge& e2) { 58 | return (e1.u == e2.u) && (e1.v == e2.v); 59 | } 60 | 61 | struct hash { 62 | std::size_t operator()(const Edge& edge) const { 63 | // return ((std::size_t)edge.u << 32) | edge.v; 64 | return multi_hash(edge.u, edge.v); 65 | } 66 | }; 67 | 68 | private: 69 | Edge() = delete; 70 | Edge(uint64_t u, uint64_t v) : u(u), v(v) {} 71 | }; 72 | 73 | 74 | struct Graph { 75 | IncrementalLUT nid_lut; 76 | 77 | std::unordered_map undirected_ewgts; 78 | std::unordered_map directed_ewgts; 79 | std::unordered_set dangling; 80 | 81 | void add_undirected_edge(uint64_t t1, uint64_t t2, uint64_t wgt = 1); 82 | 83 | Edge add_directed_edge(uint64_t t1, uint64_t t2, uint64_t wgt = 1); 84 | 85 | void remove_directed_edge(Edge& e); 86 | 87 | void add_dangling(uint64_t t); 88 | 89 | void print_stats(); 90 | 91 | void clear(); 92 | }; 93 | 94 | 95 | } // namespace declustered_layout 96 | -------------------------------------------------------------------------------- /src/declustered_layout/graph_maxcut.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "graph.hpp" 4 | #include "partitioning.hpp" 5 | 6 | #include 7 | #include 8 | 9 | 10 | namespace declustered_layout { 11 | 12 | 13 | struct GraphMaxCut { 14 | enum Algo { 15 | RMAXCUT, 16 | MQLIB, 17 | }; 18 | 19 | static Partitioning part(Graph& graph, const Algo algo, uint32_t npart); 20 | 21 | 22 | private: 23 | static std::tuple split(Graph& g, Partitioning& part); 24 | 25 | static Partitioning part_rmaxcut(Graph& g); 26 | 27 | static Partitioning part_mqlib(Graph& g); 28 | }; 29 | 30 | 31 | } // namespace declustered_layout -------------------------------------------------------------------------------- /src/declustered_layout/graph_toposort.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "graph.hpp" 4 | #include "partitioning.hpp" 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | namespace declustered_layout { 12 | 13 | 14 | struct GraphTopoSort { 15 | // Minimum weighted feedback arc set 16 | // opti: remove small-weight edges beforehand 17 | enum Algo { 18 | TIGHT, 19 | FAS, 20 | RANDOM, 21 | }; 22 | 23 | Graph topo; 24 | std::vector> adj_lut; 25 | uint64_t nnodes = 0; 26 | 27 | GraphTopoSort() = default; 28 | 29 | void setup(Graph& g, Partitioning& part); 30 | 31 | std::vector topo_sort(const Algo algo); 32 | 33 | private: 34 | std::vector tight(); 35 | 36 | 37 | std::vector fas(); 38 | 39 | private: 40 | std::vector tsort(); 41 | 42 | bool has_cycle(); 43 | 44 | 45 | bool has_cycle_until(uint64_t v, std::vector& visited, std::vector& recStack); 46 | }; 47 | 48 | 49 | } // namespace declustered_layout -------------------------------------------------------------------------------- /src/declustered_layout/meson.build: -------------------------------------------------------------------------------- 1 | 2 | 3 | project_headers += files( 4 | 'declustered_layout.hpp', 5 | 'dotwriter.hpp', 6 | 'graph.hpp', 7 | 'graph_maxcut.hpp', 8 | 'graph_toposort.hpp', 9 | 'partitioning.hpp', 10 | 'switch_simulator.hpp', 11 | 'transaction.hpp', 12 | 'tuple_location.hpp', 13 | ) 14 | 15 | 16 | project_sources += files( 17 | 'declustered_layout.cpp', 18 | 'dotwriter.cpp', 19 | 'graph.cpp', 20 | 'graph_maxcut.cpp', 21 | 'graph_toposort.cpp', 22 | 'partitioning.cpp', 23 | 'switch_simulator.cpp', 24 | ) 25 | -------------------------------------------------------------------------------- /src/declustered_layout/mqlib.patch: -------------------------------------------------------------------------------- 1 | diff --git a/src/heuristics/qubo/glover2010.cpp b/src/heuristics/qubo/glover2010.cpp 2 | index e926b6e..522a9cc 100644 3 | --- a/src/heuristics/qubo/glover2010.cpp 4 | +++ b/src/heuristics/qubo/glover2010.cpp 5 | @@ -2,6 +2,7 @@ 6 | #include 7 | #include 8 | #include 9 | +#include 10 | #include 11 | #include "heuristics/qubo/glover2010.h" 12 | #include "util/random.h" 13 | diff --git a/src/heuristics/qubo/katayama2000.cpp b/src/heuristics/qubo/katayama2000.cpp 14 | index 9a3c909..04595d5 100644 15 | --- a/src/heuristics/qubo/katayama2000.cpp 16 | +++ b/src/heuristics/qubo/katayama2000.cpp 17 | @@ -1,5 +1,6 @@ 18 | #include 19 | #include 20 | +#include 21 | #include "heuristics/qubo/katayama2000.h" 22 | #include "util/random.h" 23 | 24 | diff --git a/src/heuristics/qubo/merz2002.cpp b/src/heuristics/qubo/merz2002.cpp 25 | index 2c46d63..052bce0 100644 26 | --- a/src/heuristics/qubo/merz2002.cpp 27 | +++ b/src/heuristics/qubo/merz2002.cpp 28 | @@ -1,5 +1,6 @@ 29 | #include 30 | #include 31 | +#include 32 | #include "heuristics/qubo/merz2002.h" 33 | #include "util/random.h" 34 | 35 | diff --git a/src/heuristics/qubo/merz2004.cpp b/src/heuristics/qubo/merz2004.cpp 36 | index 40f2bb2..0e471b3 100644 37 | --- a/src/heuristics/qubo/merz2004.cpp 38 | +++ b/src/heuristics/qubo/merz2004.cpp 39 | @@ -1,4 +1,5 @@ 40 | #include 41 | +#include 42 | #include 43 | #include "heuristics/qubo/merz2004.h" 44 | #include "util/random.h" 45 | -------------------------------------------------------------------------------- /src/declustered_layout/partitioning.cpp: -------------------------------------------------------------------------------- 1 | #include "partitioning.hpp" 2 | 3 | 4 | namespace declustered_layout { 5 | 6 | 7 | Partitioning::Partitioning(uint64_t parts) : parts(parts) {} 8 | 9 | void Partitioning::insert(uint64_t tid, uint64_t pid) { 10 | if (map.contains(tid)) { 11 | throw std::invalid_argument("mapping already contains tid"); 12 | } 13 | if (!(pid < parts)) { 14 | throw std::invalid_argument("pid is not within parts"); 15 | } 16 | map[tid] = pid; 17 | } 18 | 19 | bool Partitioning::has(uint64_t tid) const { 20 | return map.contains(tid); 21 | } 22 | 23 | 24 | const uint64_t& Partitioning::get(const uint64_t tid) const { 25 | return map.at(tid); 26 | } 27 | 28 | Partitioning& Partitioning::operator+=(const Partitioning& rhs) { 29 | uint64_t offset = parts; 30 | parts += rhs.parts; 31 | for (auto& [tid, pid] : rhs.map) { 32 | insert(tid, pid + offset); 33 | } 34 | return *this; 35 | } 36 | 37 | void Partitioning::print() { 38 | struct Pair { 39 | uint64_t tid; 40 | uint64_t pid; 41 | 42 | bool operator<(const Pair& rhs) const { return tid < rhs.tid; } 43 | }; 44 | std::vector part_vec; 45 | part_vec.reserve(map.size()); 46 | for (auto& [tid, reg] : map) { 47 | part_vec.emplace_back(tid, reg); 48 | } 49 | std::sort(part_vec.begin(), part_vec.end()); 50 | for (auto& [tid, reg] : part_vec) { 51 | std::cout << "tuple[" << tid << "] --> " << reg << "\n"; 52 | } 53 | } 54 | 55 | void Partitioning::print_stats() { 56 | std::vector part_sizes(parts); 57 | for (auto& [tid, reg] : map) { 58 | part_sizes[reg]++; 59 | } 60 | std::cout << "#parts=" << parts << " partition_sizes=" << part_sizes 61 | << "\n"; 62 | } 63 | 64 | 65 | } // namespace declustered_layout 66 | -------------------------------------------------------------------------------- /src/declustered_layout/partitioning.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db/util.hpp" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | namespace declustered_layout { 13 | 14 | 15 | struct Partitioning { 16 | using map_t = std::unordered_map; 17 | 18 | uint64_t parts; 19 | map_t map; 20 | 21 | Partitioning(uint64_t parts); 22 | 23 | void insert(uint64_t tid, uint64_t pid); 24 | 25 | bool has(uint64_t tid) const; 26 | 27 | const uint64_t& get(const uint64_t tid) const; 28 | 29 | Partitioning& operator+=(const Partitioning& rhs); 30 | 31 | void print(); 32 | 33 | void print_stats(); 34 | }; 35 | 36 | 37 | } // namespace declustered_layout 38 | -------------------------------------------------------------------------------- /src/declustered_layout/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | git clone --depth=1 https://github.com/lh3/rmaxcut 4 | pushd rmaxcut 5 | make 6 | popd 7 | 8 | git clone --depth=1 https://github.com/MQLib/MQLib 9 | pushd MQLib 10 | git apply ../mqlib.patch 11 | make 12 | popd 13 | 14 | 15 | git clone --depth=1 https://git.mpi-cbg.de/mosaic/FaspHeuristic.git 16 | pushd FaspHeuristic 17 | mkdir build 18 | cd build 19 | cmake .. 20 | make 21 | popd 22 | 23 | 24 | git clone --depth=1 https://github.com/DRMacIver/Feedback-Arc-Set 25 | pushd Feedback-Arc-Set 26 | make fas 27 | popd 28 | -------------------------------------------------------------------------------- /src/declustered_layout/switch_simulator.cpp: -------------------------------------------------------------------------------- 1 | #include "switch_simulator.hpp" 2 | 3 | 4 | namespace declustered_layout { 5 | 6 | 7 | SwitchSimulator::SwitchSimulator(DeclusteredLayout& dcl) : dcl(dcl) { 8 | // if (part.parts > NUM_REGS) { 9 | // throw std::invalid_argument("partitioning contains more partitions " 10 | // "than available registers"); 11 | // } 12 | } 13 | 14 | void SwitchSimulator::process(std::vector txns) { 15 | std::vector pass_hist(DeclusteredLayout::MAX_ACCESSES); 16 | std::vector reg_hist(DeclusteredLayout::STAGES); 17 | std::vector regs(DeclusteredLayout::MAX_ACCESSES); // trail of accesses 18 | uint64_t total_txns = 0; // inclusive repeats 19 | 20 | for (size_t i = 0; auto& txn : txns) { 21 | if (!(txn.accesses.size() <= DeclusteredLayout::MAX_ACCESSES)) { 22 | std::cout << "txn.accesses.size()=" << txn.accesses.size() << '\n'; 23 | throw std::invalid_argument("transaction needs more accesses than allowed"); 24 | } 25 | 26 | regs.clear(); 27 | std::fill(reg_hist.begin(), reg_hist.end(), 0); 28 | for (auto& access : txn.accesses) { 29 | uint64_t reg = dcl.get_location(access).stage_id; 30 | reg_hist[reg] += 1; 31 | regs.emplace_back(reg); 32 | } 33 | 34 | uint64_t passes = *std::max_element(reg_hist.begin(), reg_hist.end()); 35 | 36 | 37 | uint64_t violated_deps = 0; 38 | if (include_deps) { 39 | for (auto& dep : txn.deps) { 40 | auto p1 = dcl.get_location(dep.tid1).stage_id; 41 | auto p2 = dcl.get_location(dep.tid2).stage_id; 42 | if (p1 >= p2) { 43 | // std::cout << "dep violated: " << p1 << "<" << p2 << '\n'; 44 | ++violated_deps; 45 | } 46 | } 47 | } 48 | 49 | pass_hist[passes - 1] += txn.repeats; 50 | total_txns += txn.repeats; 51 | std::cout << "txn[" << (i++) << "] passes=" << passes << " !deps=" << violated_deps 52 | << " --> " << txn.accesses << " regs=" << regs << "\n"; 53 | } 54 | 55 | // crop last zero elements 56 | auto it = std::find_if(pass_hist.rbegin(), pass_hist.rend(), [](auto x) { 57 | return x > 0; 58 | }); 59 | auto max_passes = std::distance(it, pass_hist.rend()); 60 | pass_hist.resize(max_passes); 61 | 62 | std::vector pass_dist; 63 | pass_dist.reserve(pass_hist.size()); 64 | for (auto& x : pass_hist) { 65 | pass_dist.emplace_back(static_cast(x) / total_txns); 66 | } 67 | 68 | // print results 69 | std::cout << "pass_hist={single,multi,...}=" << pass_hist << "\n"; 70 | std::streamsize ss = std::cout.precision(); 71 | std::cout.precision(4); 72 | std::cout << "pass_dist={single,multi,...}=" << pass_dist << "\n"; 73 | std::cout.precision(ss); 74 | } 75 | 76 | 77 | } // namespace declustered_layout -------------------------------------------------------------------------------- /src/declustered_layout/switch_simulator.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "declustered_layout.hpp" 4 | #include "partitioning.hpp" 5 | #include "transaction.hpp" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | 15 | namespace declustered_layout { 16 | 17 | 18 | struct SwitchSimulator { 19 | DeclusteredLayout& dcl; 20 | bool include_deps = false; 21 | 22 | SwitchSimulator(DeclusteredLayout& dcl); 23 | 24 | void process(std::vector txns); 25 | }; 26 | 27 | 28 | } // namespace declustered_layout 29 | -------------------------------------------------------------------------------- /src/declustered_layout/test.cpp: -------------------------------------------------------------------------------- 1 | #include "declustered_layout.hpp" 2 | #include "switch_simulator.hpp" 3 | #include "transaction.hpp" 4 | 5 | 6 | int main() { 7 | using namespace declustered_layout; 8 | 9 | 10 | DeclusteredLayout dcl; 11 | std::vector txns; 12 | 13 | Transaction t1; 14 | t1.generate_n(8); 15 | t1.rerun(5); 16 | t1.generate_chain_dep(); 17 | // t1.dependency(t1.accesses.back(), t1.accesses.front()); 18 | dcl.add_sample(t1); 19 | txns.emplace_back(t1); 20 | 21 | 22 | // Transaction t2; 23 | // t2.generate_n(8); 24 | // t2.rerun(5); 25 | // dcl.add_sample(t2); 26 | // txns.emplace_back(t2); 27 | 28 | Transaction t5; 29 | t5.generate_n(8); 30 | t5.rerun(5); 31 | t5.generate_chain_dep(); 32 | dcl.add_sample(t5); 33 | txns.emplace_back(t5); 34 | 35 | // add some cross-partition 36 | Transaction t6; 37 | for (auto& a : t1.accesses) { 38 | t6.access(a); 39 | } 40 | t6.access(t5.accesses[0]); 41 | t6.access(t5.accesses[1]); 42 | dcl.add_sample(t6); 43 | txns.emplace_back(t6); 44 | 45 | // for (int i = 0; i < 100000; ++i) { 46 | // Transaction t; 47 | // t.generate_n(8); 48 | // t.rerun(rand() % 100 + 1); 49 | // dcl.add_sample(t); 50 | // txns.emplace_back(t); 51 | // } 52 | 53 | dcl.compute_layout(true, true); 54 | dcl.print(); 55 | 56 | 57 | SwitchSimulator sim{dcl}; 58 | sim.include_deps = true; 59 | sim.process(txns); 60 | 61 | 62 | return 0; 63 | } -------------------------------------------------------------------------------- /src/declustered_layout/transaction.cpp: -------------------------------------------------------------------------------- 1 | #include "transaction.hpp" 2 | 3 | 4 | namespace declustered_layout { 5 | 6 | 7 | void Transaction::generate_n(std::size_t n) { 8 | accesses.reserve(accesses.size() + n); 9 | for (size_t i = 0; i < n; ++i) { 10 | auto access = gen(); 11 | if (std::find(accesses.begin(), accesses.end(), access) != accesses.end()) { 12 | --i; 13 | continue; // prevent duplicates 14 | } 15 | accesses.emplace_back(access); 16 | } 17 | } 18 | 19 | void Transaction::generate_chain_dep() { 20 | if (accesses.size() <= 2) { 21 | throw std::runtime_error("transaction must at least have 2 accesses"); 22 | } 23 | for (size_t i = 0; i < accesses.size() - 1; ++i) { 24 | dependency(accesses[i], accesses[i + 1]); 25 | } 26 | } 27 | 28 | void Transaction::dependency(uint64_t tid1, uint64_t tid2) { 29 | deps.emplace_back(tid1, tid2); 30 | } 31 | 32 | void Transaction::rerun(uint64_t times) { 33 | repeats = times; 34 | }; 35 | 36 | void Transaction::access(uint64_t tid) { 37 | accesses.emplace_back(tid); 38 | } 39 | 40 | 41 | } // namespace declustered_layout -------------------------------------------------------------------------------- /src/declustered_layout/transaction.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "graph.hpp" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | namespace declustered_layout { 11 | 12 | 13 | struct Generator { 14 | std::mt19937 gen; 15 | std::uniform_int_distribution dist{0, 1000}; 16 | Generator() { gen.seed(0); } 17 | uint64_t operator()() { return dist(gen); } 18 | }; 19 | 20 | 21 | struct Transaction { 22 | struct Dependency { 23 | uint64_t tid1; 24 | uint64_t tid2; 25 | }; 26 | 27 | inline static Generator gen; 28 | std::vector accesses; 29 | std::vector deps; 30 | uint64_t repeats = 1; 31 | 32 | Transaction() = default; 33 | 34 | void generate_n(std::size_t n); 35 | 36 | void generate_chain_dep(); 37 | 38 | void dependency(uint64_t tid1, uint64_t tid2); 39 | 40 | void rerun(uint64_t times); 41 | 42 | void access(uint64_t tid); 43 | }; 44 | 45 | 46 | } // namespace declustered_layout 47 | -------------------------------------------------------------------------------- /src/declustered_layout/tuple_location.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | 7 | namespace declustered_layout { 8 | 9 | 10 | struct TupleLocation { 11 | uint8_t stage_id; 12 | uint8_t reg_array_id; 13 | uint16_t reg_array_idx; 14 | uint8_t lock_bit; 15 | 16 | friend std::ostream& operator<<(std::ostream& os, const TupleLocation& self) { 17 | os << "stage=" << self.stage_id << " reg=" << self.reg_array_id 18 | << " idx=" << self.reg_array_idx << " lock=" << self.lock_bit; 19 | return os; 20 | } 21 | }; 22 | 23 | 24 | } // namespace declustered_layout -------------------------------------------------------------------------------- /src/dpdk_lib/cpuset.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | struct Utils { 11 | template 12 | static std::string to_hex(T i) { 13 | std::stringstream stream; 14 | stream << "0x" 15 | << std::setfill('0') << std::setw(sizeof(T) * 2) 16 | << std::hex << i; 17 | return stream.str(); 18 | } 19 | }; 20 | 21 | 22 | #define EXIT_WITH_ERROR(reason, ...) \ 23 | do { \ 24 | printf("Terminated in error: " reason "\n", ##__VA_ARGS__); \ 25 | exit(1); \ 26 | } while (0) 27 | 28 | 29 | struct CPUSet { 30 | std::set available; 31 | std::set in_use; 32 | 33 | CPUSet() = default; 34 | 35 | CPUSet(std::initializer_list cpus) : available(cpus) { 36 | if (available.size() != cpus.size()) { 37 | EXIT_WITH_ERROR("Elements in CPUSet not unique"); 38 | } 39 | } 40 | 41 | bool is_available(uint32_t core) { 42 | return available.contains(core); 43 | } 44 | 45 | void mark_in_use(uint32_t core) { 46 | size_t r = available.erase(core); 47 | if (r != 1) { 48 | EXIT_WITH_ERROR("mark_in_use(): Core %d not found", core); 49 | } 50 | in_use.insert(core); 51 | } 52 | 53 | void mark_unused(uint32_t core) { 54 | size_t r = in_use.erase(core); 55 | if (r != 1) { 56 | EXIT_WITH_ERROR("mark_unused(): Core %d not found", core); 57 | } 58 | available.insert(core); 59 | } 60 | 61 | uint32_t get_free_core() { 62 | auto it = available.begin(); 63 | if (it == available.end()) { 64 | EXIT_WITH_ERROR("No cores available"); 65 | } 66 | uint32_t core = *it; 67 | available.erase(it); 68 | in_use.insert(core); 69 | return core; 70 | } 71 | 72 | std::string hex_str() const { 73 | uint32_t max_core = *available.rbegin(); 74 | if (max_core >= 32) { 75 | EXIT_WITH_ERROR("rte does not support more that 32 core for now"); 76 | } 77 | uint32_t mask = 0; 78 | for (auto& core : available) { 79 | mask |= (1UL << (32 - core)); 80 | } 81 | return Utils::to_hex(mask); 82 | } 83 | 84 | std::string join(std::string delimiter) { 85 | std::stringstream stream; 86 | for (auto& core : available) { 87 | stream << core; 88 | if (core != *available.rbegin()) { 89 | stream << delimiter; 90 | } 91 | } 92 | return stream.str(); 93 | } 94 | }; -------------------------------------------------------------------------------- /src/dpdk_lib/enums.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | /** 5 | * An enum describing all PMD (poll mode driver) types supported by Dpdk. For more info about these PMDs please visit the Dpdk web-site 6 | */ 7 | enum DpdkPMDType { 8 | /** Unknown PMD type */ 9 | PMD_UNKNOWN, 10 | /** Link Bonding for 1GbE and 10GbE ports to allow the aggregation of multiple (slave) NICs into a single logical interface*/ 11 | PMD_BOND, 12 | /** Intel E1000 PMD */ 13 | PMD_E1000EM, 14 | /** Intel 1GbE PMD */ 15 | PMD_IGB, 16 | /** Intel 1GbE virtual function PMD */ 17 | PMD_IGBVF, 18 | /** Cisco enic (UCS Virtual Interface Card) PMD */ 19 | PMD_ENIC, 20 | /** Intel fm10k PMD */ 21 | PMD_FM10K, 22 | /** Intel 40GbE PMD */ 23 | PMD_I40E, 24 | /** Intel 40GbE virtual function PMD */ 25 | PMD_I40EVF, 26 | /** Intel 10GbE PMD */ 27 | PMD_IXGBE, 28 | /** Intel 10GbE virtual function PMD */ 29 | PMD_IXGBEVF, 30 | /** Mellanox ConnectX-3, ConnectX-3 Pro PMD */ 31 | PMD_MLX4, 32 | /** Null PMD */ 33 | PMD_NULL, 34 | /** pcap file PMD */ 35 | PMD_PCAP, 36 | /** ring-based (memory) PMD */ 37 | PMD_RING, 38 | /** VirtIO PMD */ 39 | PMD_VIRTIO, 40 | /** VMWare VMXNET3 PMD */ 41 | PMD_VMXNET3, 42 | /** Xen Project PMD */ 43 | PMD_XENVIRT, 44 | /** AF_PACKET PMD */ 45 | PMD_AF_PACKET 46 | }; -------------------------------------------------------------------------------- /src/dpdk_lib/mbufrawpacket.cpp: -------------------------------------------------------------------------------- 1 | #include "mbufrawpacket.hpp" 2 | 3 | #include "device.hpp" 4 | 5 | 6 | DPDKPacket* DPDKPacket::wrap(struct rte_mbuf* mbuf) { 7 | return static_cast(mbuf); 8 | } 9 | 10 | 11 | struct rte_mbuf* DPDKPacket::raw() { 12 | return static_cast(this); 13 | } 14 | 15 | 16 | uint8_t* DPDKPacket::reserve(uint16_t len) { 17 | if (unlikely(len > RTE_MBUF_DEFAULT_DATAROOM)) { // 2048 18 | throw std::invalid_argument("len > RTE_MBUF_DEFAULT_DATAROOM, currently only one segment mbufs supported"); 19 | } 20 | 21 | uint16_t current_len = this->size(); 22 | 23 | uint8_t* data; 24 | if (current_len < len) { 25 | data = (uint8_t*)rte_pktmbuf_append(this, len - current_len); 26 | } else if (current_len > len) { 27 | if (unlikely(rte_pktmbuf_trim(this, current_len - len) != 0)) { 28 | throw std::bad_alloc(); 29 | } 30 | data = rte_pktmbuf_mtod(this, uint8_t*); 31 | } else { 32 | data = rte_pktmbuf_mtod(this, uint8_t*); 33 | } 34 | 35 | if (unlikely(!data)) { 36 | throw std::bad_alloc(); 37 | } 38 | 39 | return data; 40 | } 41 | 42 | // void DPDKPacket::trim(uint16_t len) { 43 | // struct rte_mbuf *m_last; 44 | 45 | // if (unlikely(len > this->data_len)) { 46 | // throw std::invalid_argument("pkt trim failed because requested size is larger"); 47 | // } 48 | 49 | // this->data_len = (uint16_t)(this->data_len - len); 50 | // this->pkt_len = (this->pkt_len - len); 51 | // } 52 | 53 | uint8_t* DPDKPacket::data() { 54 | return rte_pktmbuf_mtod(this, uint8_t*); 55 | } 56 | 57 | uint16_t DPDKPacket::size() { 58 | return rte_pktmbuf_pkt_len(this); 59 | } 60 | 61 | void DPDKPacket::free() { 62 | rte_pktmbuf_free(this); 63 | } 64 | -------------------------------------------------------------------------------- /src/dpdk_lib/mbufrawpacket.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "rte_errno.h" 5 | #include "rte_mbuf.h" 6 | #include "rte_mempool.h" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | 18 | class DpdkDevice; 19 | 20 | 21 | struct DPDKPacket : private rte_mbuf { 22 | // struct EmptyDeleter { 23 | // void operator()(DPDKPacket*) const {} 24 | // }; 25 | // using dpdk_pkt_ptr = std::unique_ptr; 26 | 27 | DPDKPacket() = delete; 28 | ~DPDKPacket() = default; 29 | 30 | DPDKPacket(const DPDKPacket& other) = delete; 31 | DPDKPacket(DPDKPacket&& other) = delete; 32 | 33 | DPDKPacket& operator=(const DPDKPacket& other) = delete; 34 | DPDKPacket& operator=(DPDKPacket&& other) = delete; 35 | 36 | static DPDKPacket* wrap(struct rte_mbuf* mbuf); 37 | 38 | struct rte_mbuf* raw(); 39 | 40 | uint8_t* reserve(uint16_t len); 41 | 42 | // void trim(uint16_t len); 43 | 44 | uint8_t* data(); 45 | 46 | uint16_t size(); 47 | 48 | void free(); 49 | }; 50 | static_assert(sizeof(DPDKPacket) == sizeof(struct rte_mbuf)); 51 | -------------------------------------------------------------------------------- /src/dpdk_lib/meson.build: -------------------------------------------------------------------------------- 1 | 2 | 3 | project_headers += files( 4 | 'cpuset.hpp', 5 | 'device.hpp', 6 | 'dpdk.hpp', 7 | 'enums.hpp', 8 | 'mbufrawpacket.hpp', 9 | 'worker_thread.hpp', 10 | ) 11 | 12 | 13 | project_sources += files( 14 | 'device.cpp', 15 | 'dpdk.cpp', 16 | 'mbufrawpacket.cpp', 17 | ) 18 | 19 | 20 | -------------------------------------------------------------------------------- /src/dpdk_lib/worker_thread.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include 5 | 6 | 7 | class DpdkWorkerThread { 8 | public: 9 | /** 10 | * A virtual d'tor. Can be overridden by child class if needed 11 | */ 12 | virtual ~DpdkWorkerThread() {} 13 | 14 | /** 15 | * An abstract method that must be implemented by child class. It's the indication for the worker to start running 16 | * @param[in] coreId The core ID the worker is running on (should be returned in getCoreId() ) 17 | * @return True if all went well or false otherwise 18 | */ 19 | virtual bool run(uint32_t coreId) = 0; 20 | 21 | /** 22 | * An abstract method that must be implemented by child class. It's the indication for the worker to stop running. After 23 | * this method is called the caller expects the worker to stop running as fast as possible 24 | */ 25 | virtual void stop() = 0; 26 | 27 | /** 28 | * An abstract method that must be implemented by child class. Get the core ID the worker is running on (as sent to the run() method 29 | * as a parameter) 30 | * @return The core ID the worker is running on 31 | */ 32 | virtual uint32_t getCoreId() const = 0; 33 | }; -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmarks/benchmarks.hpp" 2 | #include "db/config.hpp" 3 | 4 | 5 | // #define USE_VTUNE 6 | #ifdef USE_VTUNE 7 | #include 8 | #endif 9 | 10 | #ifdef USE_VTUNE 11 | __itt_pause(); 12 | #endif // USE_VTUNE 13 | 14 | #ifdef USE_VTUNE 15 | __itt_resume(); 16 | #endif // USE_VTUNE 17 | 18 | 19 | int main(int argc, char** argv) { 20 | auto& config = Config::instance(); 21 | config.parse_cli(argc, argv); 22 | config.print(); 23 | 24 | switch (config.workload) { 25 | case BenchmarkType::YCSB: { 26 | using namespace benchmark::ycsb; 27 | return ycsb(); 28 | } 29 | case BenchmarkType::SMALLBANK: { 30 | using namespace benchmark::smallbank; 31 | return smallbank(); 32 | } 33 | case BenchmarkType::TPCC: { 34 | using namespace benchmark::tpcc; 35 | return tpcc(); 36 | } 37 | case BenchmarkType::MICRO_RECIRC: { 38 | using namespace benchmark::micro_recirc; 39 | return micro_recirc(); 40 | } 41 | } 42 | } -------------------------------------------------------------------------------- /src/meson.build: -------------------------------------------------------------------------------- 1 | subdir('db') 2 | subdir('dpdk_lib') 3 | subdir('benchmarks') 4 | subdir('stats') 5 | subdir('comm') 6 | subdir('table') 7 | subdir('datastructures') 8 | subdir('declustered_layout') 9 | 10 | 11 | project_headers += files( 12 | 13 | ) 14 | 15 | project_sources += files( 16 | 17 | ) 18 | 19 | 20 | main_source = files('main.cpp') 21 | -------------------------------------------------------------------------------- /src/stats/collector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "counter.hpp" 4 | #include "cycles.hpp" 5 | #include "moving_avg.hpp" 6 | #include "periodic.hpp" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | 16 | namespace stats { 17 | 18 | struct StatsCollector { 19 | std::mutex mutex; 20 | std::jthread thread; 21 | 22 | StatsCollector(); 23 | ~StatsCollector(); 24 | 25 | static StatsCollector& get(); 26 | 27 | std::vector cntrs; 28 | std::array sum_counters{}; 29 | void reg(Counter* cntr); 30 | void dereg(Counter* cntr); 31 | 32 | template 33 | struct TotalAverage { 34 | void add(T sample) { 35 | total += sample; 36 | ++n; 37 | } 38 | double avg() const { 39 | if (n == 0) { 40 | return 0.0; 41 | } 42 | return static_cast(total) / n; 43 | } 44 | 45 | private: 46 | T total = 0; 47 | uint64_t n = 0; 48 | }; 49 | 50 | struct CyclesInfo { 51 | using avg_t = MovingAverage; 52 | // using avg_t = TotalAverage; 53 | Cycles* cycles; 54 | std::array avgs{}; 55 | }; 56 | std::vector cycles; 57 | std::array, Cycles::__MAX> avg_cycles{}; 58 | 59 | void reg(Cycles* c); 60 | void dereg(Cycles* c); 61 | 62 | std::vector pcntrs; 63 | std::array last_pcntrs{}; 64 | void reg(Periodic* cntr); 65 | void dereg(Periodic* cntr); 66 | }; 67 | 68 | } // namespace stats 69 | -------------------------------------------------------------------------------- /src/stats/context.cpp: -------------------------------------------------------------------------------- 1 | #include "context.hpp" 2 | 3 | 4 | void WorkerContext::init() { 5 | context = new WorkerContext(); 6 | } 7 | 8 | void WorkerContext::deinit() { 9 | if (context) { 10 | delete context; 11 | } 12 | } 13 | 14 | WorkerContext::guard::guard() { 15 | WorkerContext::init(); 16 | } 17 | WorkerContext::guard::~guard() { 18 | WorkerContext::deinit(); 19 | } 20 | 21 | WorkerContext& WorkerContext::get() { // requires constructed object in thread_local 22 | return *context; 23 | } 24 | -------------------------------------------------------------------------------- /src/stats/context.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "counter.hpp" 4 | #include "cycles.hpp" 5 | #include "periodic.hpp" 6 | 7 | 8 | struct WorkerContext { 9 | inline static thread_local WorkerContext* context; 10 | 11 | static void init(); 12 | 13 | static void deinit(); 14 | 15 | struct guard { 16 | guard(); 17 | ~guard(); 18 | }; 19 | 20 | static WorkerContext& get(); // requires constructed object in thread_local 21 | 22 | 23 | uint32_t tid; 24 | stats::Counter cntr; 25 | stats::Cycles cycl; 26 | stats::Periodic pcntr; 27 | }; 28 | -------------------------------------------------------------------------------- /src/stats/counter.cpp: -------------------------------------------------------------------------------- 1 | #include "counter.hpp" 2 | 3 | #include "collector.hpp" 4 | #include "stats/context.hpp" 5 | 6 | #include 7 | 8 | namespace stats { 9 | 10 | Counter::Counter() { 11 | if constexpr (!(ENABLED_STATS & StatsBitmask::COUNTER)) { 12 | return; 13 | } 14 | StatsCollector::get().reg(this); 15 | } 16 | 17 | 18 | Counter::~Counter() { 19 | if constexpr (!(ENABLED_STATS & StatsBitmask::COUNTER)) { 20 | return; 21 | } 22 | StatsCollector::get().dereg(this); 23 | 24 | if constexpr (STATS_PER_WORKER) { 25 | std::stringstream ss; 26 | ss << "*** Stats Worker " << WorkerContext::get().tid << " ***\n"; 27 | for (size_t i = 0; auto& name : enum2str) { 28 | ss << name << '=' << counters[i++] << '\n'; 29 | } 30 | std::cout << ss.str(); 31 | } 32 | } 33 | 34 | } // namespace stats -------------------------------------------------------------------------------- /src/stats/cycles.cpp: -------------------------------------------------------------------------------- 1 | #include "cycles.hpp" 2 | 3 | #include "collector.hpp" 4 | 5 | #include 6 | 7 | 8 | namespace stats { 9 | 10 | Cycles::Cycles() { 11 | if constexpr (!(ENABLED_STATS & StatsBitmask::CYCLES)) { 12 | return; 13 | } 14 | StatsCollector::get().reg(this); 15 | } 16 | 17 | Cycles::~Cycles() { 18 | if constexpr (!(ENABLED_STATS & StatsBitmask::CYCLES)) { 19 | return; 20 | } 21 | StatsCollector::get().dereg(this); 22 | 23 | if constexpr (STATS_PER_WORKER) { 24 | std::stringstream ss; 25 | ss << "*** Cycles Worker ***\n"; 26 | for (size_t i = 0; auto& name : enum2str) { 27 | ss << name << '=' << cycles[i++].cycles << '\n'; 28 | } 29 | std::cout << ss.str(); 30 | } 31 | } 32 | 33 | } // namespace stats -------------------------------------------------------------------------------- /src/stats/cycles.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "db/defs.hpp" 4 | #include "db/util.hpp" 5 | 6 | #include 7 | #include 8 | #include 9 | #if defined(__x86_64__) 10 | #include 11 | #endif 12 | 13 | namespace stats { 14 | 15 | struct Cycles { 16 | Cycles(); 17 | ~Cycles(); 18 | 19 | 20 | enum Name { 21 | commit_latency, 22 | latch_contention, 23 | remote_latency, 24 | local_latency, 25 | switch_txn_latency, 26 | __MAX 27 | }; 28 | 29 | static constexpr std::array enum2str{ 30 | "commit_latency", 31 | "latch_contention", 32 | "remote_latency", 33 | "local_latency", 34 | "switch_txn_latency", 35 | }; 36 | static_assert(enum2str.size() == __MAX); 37 | 38 | struct Entry { 39 | uint64_t sum = 0; 40 | uint64_t start = 0; 41 | std::atomic cycles{}; 42 | }; 43 | std::array cycles{}; 44 | 45 | 46 | #define __forceinline inline __attribute__((always_inline)) 47 | 48 | 49 | __forceinline void start(const Name name [[maybe_unused]]) { 50 | if constexpr (!(ENABLED_STATS & StatsBitmask::CYCLES)) { 51 | return; 52 | } 53 | #if defined(__x86_64__) 54 | auto& lat = cycles[name]; 55 | // _mm_lfence(); // optionally wait for earlier insns to retire before reading the clock 56 | lat.start = __builtin_ia32_rdtsc(); 57 | // _mm_lfence(); // optionally block later instructions until rdtsc retires 58 | #endif 59 | } 60 | 61 | __forceinline void stop(const Name name [[maybe_unused]]) { 62 | if constexpr (!(ENABLED_STATS & StatsBitmask::CYCLES)) { 63 | return; 64 | } 65 | #if defined(__x86_64__) 66 | auto& lat = cycles[name]; 67 | auto cycles = __builtin_ia32_rdtsc() - lat.start; 68 | lat.sum += cycles; 69 | #endif 70 | } 71 | 72 | __forceinline void save(const Name name [[maybe_unused]]) { 73 | if constexpr (!(ENABLED_STATS & StatsBitmask::CYCLES)) { 74 | return; 75 | } 76 | #if defined(__x86_64__) 77 | auto& lat = cycles[name]; 78 | // lat.cycles.store(lat.sum, std::memory_order_relaxed); 79 | lat.cycles.fetch_add(lat.sum, std::memory_order_relaxed); 80 | #endif 81 | } 82 | 83 | __forceinline void reset(const Name name [[maybe_unused]]) { 84 | if constexpr (!(ENABLED_STATS & StatsBitmask::CYCLES)) { 85 | return; 86 | } 87 | auto& lat = cycles[name]; 88 | lat.sum = 0; 89 | } 90 | }; 91 | 92 | } // namespace stats -------------------------------------------------------------------------------- /src/stats/meson.build: -------------------------------------------------------------------------------- 1 | 2 | 3 | project_headers += files( 4 | 'stats.hpp', 5 | 'scheduler.hpp', 6 | 'collector.hpp', 7 | 'counter.hpp', 8 | 'cycles.hpp', 9 | 'periodic.hpp', 10 | 'context.hpp', 11 | ) 12 | 13 | 14 | project_sources += files( 15 | 'collector.cpp', 16 | 'counter.cpp', 17 | 'cycles.cpp', 18 | 'periodic.cpp', 19 | 'context.cpp', 20 | ) 21 | -------------------------------------------------------------------------------- /src/stats/moving_avg.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | 7 | template 8 | struct MovingAverage { 9 | static_assert(N && ((N & (N - 1)) == 0), "N not power of 2"); 10 | 11 | void add(T sample) { 12 | if (size < N) { 13 | samples[size++] = sample; 14 | total_sum += sample; 15 | } else { 16 | T& oldest = samples[size++ % N]; 17 | total_sum += sample - oldest; 18 | oldest = sample; 19 | } 20 | } 21 | 22 | double avg() const { 23 | if (size == 0) { 24 | return 0.0; 25 | } 26 | return total_sum / std::min(size, N); 27 | } 28 | 29 | size_t size{0}; 30 | Total total_sum{0}; 31 | T samples[N]; 32 | }; -------------------------------------------------------------------------------- /src/stats/periodic.cpp: -------------------------------------------------------------------------------- 1 | #include "periodic.hpp" 2 | 3 | #include "collector.hpp" 4 | #include "stats/context.hpp" 5 | 6 | #include 7 | 8 | 9 | namespace stats { 10 | 11 | Periodic::Periodic() { 12 | if constexpr (!(ENABLED_STATS & StatsBitmask::PERIODIC)) { 13 | return; 14 | } 15 | StatsCollector::get().reg(this); 16 | } 17 | 18 | Periodic::~Periodic() { 19 | if constexpr (!(ENABLED_STATS & StatsBitmask::PERIODIC)) { 20 | return; 21 | } 22 | StatsCollector::get().dereg(this); 23 | 24 | if constexpr (STATS_PER_WORKER) { 25 | std::stringstream ss; 26 | ss << "*** Periodic Counter Worker " << WorkerContext::get().tid << " ***\n"; 27 | for (size_t i = 0; auto& name : enum2str) { 28 | ss << name << '=' << counters[i++] << '\n'; 29 | } 30 | std::cout << ss.str(); 31 | } 32 | } 33 | 34 | } // namespace stats 35 | -------------------------------------------------------------------------------- /src/stats/periodic.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "db/defs.hpp" 5 | #include "db/util.hpp" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | namespace stats { 13 | 14 | 15 | struct Periodic { 16 | Periodic(); 17 | ~Periodic(); 18 | 19 | enum Name { 20 | commits, 21 | __MAX 22 | }; 23 | 24 | static constexpr std::array enum2str{ 25 | "commits", 26 | }; 27 | 28 | std::atomic counters[__MAX]{}; 29 | 30 | 31 | #define __forceinline inline __attribute__((always_inline)) 32 | 33 | __forceinline void incr(const Name name) { 34 | if constexpr (!(ENABLED_STATS & StatsBitmask::PERIODIC)) { 35 | return; 36 | } 37 | auto& cntr = counters[name]; 38 | auto local = cntr.load(std::memory_order_release); 39 | ++local; 40 | cntr.store(local, std::memory_order_relaxed); 41 | } 42 | }; 43 | 44 | } // namespace stats 45 | -------------------------------------------------------------------------------- /src/stats/scheduler.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | template 11 | struct Scheduler { 12 | struct Entry { 13 | std::function function; 14 | std::chrono::microseconds interval; 15 | std::chrono::time_point next{}; 16 | 17 | bool operator<(const Entry& other) { 18 | return next < other.next; 19 | } 20 | }; 21 | 22 | std::array entries; 23 | 24 | Scheduler(std::array&& entries) 25 | : entries(std::move(entries)) {} 26 | 27 | void run_next() { 28 | auto now = std::chrono::steady_clock::now(); 29 | auto e = std::min_element(entries.begin(), entries.end()); 30 | if (e->next > now) { 31 | std::this_thread::sleep_until(e->next); 32 | } 33 | 34 | now = std::chrono::steady_clock::now(); 35 | e->function(); 36 | e->next = now + e->interval; 37 | } 38 | }; 39 | -------------------------------------------------------------------------------- /src/stats/stats.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "context.hpp" -------------------------------------------------------------------------------- /src/table/concurrency_control/meson.build: -------------------------------------------------------------------------------- 1 | 2 | 3 | project_headers += files( 4 | 'row.hpp', 5 | 'no_wait.hpp', 6 | 'wait_die.hpp', 7 | ) 8 | 9 | 10 | project_sources += files( 11 | ) 12 | -------------------------------------------------------------------------------- /src/table/concurrency_control/none.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "db/future.hpp" 5 | #include "db/types.hpp" 6 | #include "row.hpp" 7 | #include "stats/stats.hpp" 8 | 9 | #include 10 | 11 | 12 | template 13 | struct Row { 14 | Tuple_t tuple; 15 | 16 | using Future_t = TupleFuture; 17 | 18 | 19 | ErrorCode local_lock(const AccessMode, timestamp_t, Future_t* future) { 20 | future->tuple.store(&tuple); 21 | 22 | WorkerContext::get().cntr.incr(stats::Counter::local_lock_success); 23 | return ErrorCode::SUCCESS; 24 | } 25 | 26 | void remote_lock(Communicator& comm, Communicator::Pkt_t* pkt, msg::TupleGetReq* req) { 27 | auto res = req->convert(); 28 | auto size = msg::TupleGetRes::size(sizeof(tuple)); 29 | pkt->resize(size); 30 | std::memcpy(res->tuple, &tuple, sizeof(tuple)); 31 | 32 | WorkerContext::get().cntr.incr(stats::Counter::remote_lock_success); 33 | comm.send(res->sender, pkt, comm.mh_tid); // always called from msg-handler 34 | } 35 | 36 | void remote_unlock(msg::TuplePutReq* req, Communicator& comm) { 37 | if (req->mode == AccessMode::WRITE) { 38 | std::memcpy(&tuple, req->tuple, sizeof(tuple)); 39 | } 40 | auto rc = local_unlock(req->mode, req->ts, comm); 41 | (void)rc; 42 | } 43 | 44 | ErrorCode local_unlock(const AccessMode, const timestamp_t, Communicator&) { 45 | return ErrorCode::SUCCESS; 46 | } 47 | 48 | bool check() { 49 | return true; 50 | } 51 | }; -------------------------------------------------------------------------------- /src/table/concurrency_control/row.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | template 5 | struct Row; -------------------------------------------------------------------------------- /src/table/meson.build: -------------------------------------------------------------------------------- 1 | subdir('concurrency_control') 2 | 3 | 4 | project_headers += files( 5 | 'table.hpp', 6 | 'partition.hpp', 7 | ) 8 | 9 | 10 | project_sources += files( 11 | 'table.cpp', 12 | ) 13 | -------------------------------------------------------------------------------- /src/table/partition.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "db/config.hpp" 5 | #include "db/types.hpp" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | enum class PartitionType { 13 | REPLICATED, 14 | RANGE, 15 | ROUND_ROBIN, // not implemented 16 | HASHED, // not implemented 17 | }; 18 | 19 | struct LocationInfo { 20 | bool is_local; 21 | msg::node_t target; 22 | bool is_hot; 23 | uint16_t abs_hot_index; 24 | }; 25 | 26 | 27 | template 28 | struct PartitionInfo; 29 | 30 | template <> 31 | struct PartitionInfo { 32 | PartitionInfo(const uint64_t) {} 33 | 34 | constexpr auto location(p4db::key_t index [[maybe_unused]]) { 35 | LocationInfo loc_info; 36 | loc_info.is_local = true; 37 | loc_info.target = msg::node_t{0}; // not used 38 | loc_info.is_hot = false; // replicated, never on switch 39 | 40 | return loc_info; 41 | } 42 | 43 | // translate key directly to data-array index 44 | constexpr p4db::key_t translate(p4db::key_t index) { 45 | return index; 46 | } 47 | }; 48 | 49 | 50 | template <> 51 | struct PartitionInfo { 52 | const uint64_t total_size; 53 | uint64_t partition_size; 54 | uint64_t offset; 55 | uint64_t hot_size; 56 | msg::node_t my_id; 57 | msg::node_t switch_id; 58 | 59 | PartitionInfo(const uint64_t total_size) 60 | : total_size(total_size) { 61 | auto& config = Config::instance(); 62 | 63 | if (total_size % config.num_nodes != 0) { 64 | throw std::runtime_error("total_size % num_nodes != 0"); 65 | } 66 | 67 | my_id = config.node_id; 68 | partition_size = total_size / config.num_nodes; 69 | offset = partition_size * config.node_id; 70 | switch_id = config.switch_id; 71 | 72 | 73 | if constexpr (LM_ON_SWITCH) { 74 | // refactor 75 | switch (config.workload) { 76 | case BenchmarkType::YCSB: 77 | hot_size = config.ycsb.hot_size; 78 | break; 79 | // case BenchmarkType::SMALLBANK: 80 | // hot_size = config.smallbank.hot_size; 81 | // break; 82 | // case BenchmarkType::TPCC: 83 | // hot_size = config.tpcc.hot_size; 84 | // break; 85 | default: 86 | throw std::runtime_error("workload not supported in partInfo for LM_ON_SWITCH"); 87 | } 88 | } 89 | 90 | std::stringstream ss; 91 | ss << "partinfo_total_size=" << total_size << '\n'; 92 | ss << "partinfo_partition_size=" << partition_size << '\n'; 93 | ss << "partinfo_offset=" << offset << '\n'; 94 | ss << "partinfo_my_id=" << my_id << '\n'; 95 | std::cout << ss.str(); 96 | } 97 | 98 | auto location(p4db::key_t index) { 99 | LocationInfo loc_info; 100 | loc_info.target = msg::node_t{static_cast(index / partition_size)}; 101 | loc_info.is_local = loc_info.target == my_id; 102 | 103 | if constexpr (LM_ON_SWITCH) { 104 | uint64_t local_idx = index - (loc_info.target * partition_size); 105 | loc_info.is_hot = local_idx < hot_size; // might be faster than % partition_size 106 | if (loc_info.is_hot) { 107 | loc_info.abs_hot_index = local_idx + (hot_size * loc_info.target); 108 | loc_info.target = switch_id; 109 | loc_info.is_local = false; 110 | } // enforce hot requests go to the switch 111 | } 112 | 113 | return loc_info; 114 | } 115 | 116 | p4db::key_t translate(p4db::key_t index) { 117 | // return p4db::key_t{index - offset}; 118 | return index; 119 | } 120 | }; -------------------------------------------------------------------------------- /src/table/table.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataManagementLab/p4db/a221708787e23a1394cd116fbefb31f9a5506f4e/src/table/table.cpp -------------------------------------------------------------------------------- /src/utils/dist.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | 7 | template 8 | struct UniformRemoteRank { 9 | T max; 10 | T except; 11 | std::uniform_int_distribution dist; 12 | 13 | explicit UniformRemoteRank(T max, T except) : max(max), except(except), dist(0, max - 2) { 14 | if (except > max - 1) { 15 | throw std::invalid_argument("except > max-1"); 16 | } 17 | } 18 | 19 | template 20 | T operator()(U& rng) { 21 | T rank = dist(rng); 22 | if (rank == except) { 23 | return max - 1; 24 | } 25 | return rank; 26 | } 27 | }; 28 | 29 | template 30 | struct PercentDist { 31 | T threshold; 32 | std::uniform_int_distribution dist{0, 99}; 33 | 34 | PercentDist(T threshhold) : threshold(threshhold) {} 35 | 36 | template 37 | bool operator()(U& rng) { 38 | return dist(rng) < threshold; 39 | } 40 | }; 41 | -------------------------------------------------------------------------------- /src/utils/meson.build: -------------------------------------------------------------------------------- 1 | 2 | 3 | project_headers += files( 4 | 'zipf.hpp', 5 | 'dist.hpp', 6 | ) 7 | 8 | 9 | project_sources += files( 10 | ) 11 | -------------------------------------------------------------------------------- /src/utils/zipf.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | 7 | /** Zipf-like random distribution. 8 | * 9 | * "Rejection-inversion to generate variates from monotone discrete 10 | * distributions", Wolfgang Hörmann and Gerhard Derflinger 11 | * ACM TOMACS 6.3 (1996): 169-184 12 | */ 13 | template 14 | class zipf_distribution { 15 | public: 16 | typedef RealType input_type; 17 | typedef IntType result_type; 18 | 19 | static_assert(std::numeric_limits::is_integer, ""); 20 | static_assert(!std::numeric_limits::is_integer, ""); 21 | 22 | zipf_distribution(const IntType n = std::numeric_limits::max(), const RealType q = 1.0) 23 | : n(n), q(q), H_x1(H(1.5) - 1.0), H_n(H(n + 0.5)), dist(H_x1, H_n) {} 24 | 25 | IntType operator()(std::mt19937& rng) { 26 | while (true) { 27 | const RealType u = dist(rng); 28 | const RealType x = H_inv(u); 29 | const IntType k = clamp(std::round(x), 1, n); 30 | if (u >= H(k + 0.5) - h(k)) { 31 | return k; 32 | } 33 | } 34 | } 35 | 36 | private: 37 | /** Clamp x to [min, max]. */ 38 | template 39 | static constexpr T clamp(const T x, const T min, const T max) { 40 | return std::max(min, std::min(max, x)); 41 | } 42 | 43 | /** exp(x) - 1 / x */ 44 | static double expxm1bx(const double x) { 45 | return (std::abs(x) > epsilon) 46 | ? std::expm1(x) / x 47 | : (1.0 + x / 2.0 * (1.0 + x / 3.0 * (1.0 + x / 4.0))); 48 | } 49 | 50 | /** H(x) = log(x) if q == 1, (x^(1-q) - 1)/(1 - q) otherwise. 51 | * H(x) is an integral of h(x). 52 | * 53 | * Note the numerator is one less than in the paper order to work with all 54 | * positive q. 55 | */ 56 | const RealType H(const RealType x) { 57 | const RealType log_x = std::log(x); 58 | return expxm1bx((1.0 - q) * log_x) * log_x; 59 | } 60 | 61 | /** log(1 + x) / x */ 62 | static RealType log1pxbx(const RealType x) { 63 | return (std::abs(x) > epsilon) 64 | ? std::log1p(x) / x 65 | : 1.0 - x * ((1 / 2.0) - x * ((1 / 3.0) - x * (1 / 4.0))); 66 | } 67 | 68 | /** The inverse function of H(x) */ 69 | const RealType H_inv(const RealType x) { 70 | const RealType t = std::max(-1.0, x * (1.0 - q)); 71 | return std::exp(log1pxbx(t) * x); 72 | } 73 | 74 | /** That hat function h(x) = 1 / (x ^ q) */ 75 | const RealType h(const RealType x) { 76 | return std::exp(-q * std::log(x)); 77 | } 78 | 79 | static constexpr RealType epsilon = 1e-8; 80 | 81 | IntType n; ///< Number of elements 82 | RealType q; ///< Exponent 83 | RealType H_x1; ///< H(x_1) 84 | RealType H_n; ///< H(n) 85 | std::uniform_real_distribution dist; ///< [H(x_1), H(n)] 86 | }; -------------------------------------------------------------------------------- /subprojects/cxxopts.wrap: -------------------------------------------------------------------------------- 1 | [wrap-file] 2 | directory = cxxopts-2.2.1 3 | source_url = https://github.com/jarro2783/cxxopts/archive/v2.2.1.tar.gz 4 | source_filename = cxxopts-2.2.1.tar.gz 5 | source_hash = 984aa3c8917d649b14d7f6277104ce38dd142ce378a9198ec926f03302399681 6 | patch_url = https://wrapdb.mesonbuild.com/v1/projects/cxxopts/2.2.1/1/get_zip 7 | patch_filename = cxxopts-2.2.1-1-wrap.zip 8 | patch_hash = 7b5b00456496a05769eef1b194d06d6784ba1695548c9af8e02a36e9f9c908e1 9 | 10 | -------------------------------------------------------------------------------- /subprojects/dpdk.wrap: -------------------------------------------------------------------------------- 1 | [wrap-file] 2 | directory = dpdk-20.08 3 | 4 | source_url = http://fast.dpdk.org/rel/dpdk-20.08.tar.xz 5 | source_filename = dpdk-20.08.0.tar.xz 6 | source_hash = 1a33ff04651b5c5ba3da212324bd93bce3e3976fe899d9420ac832d9a459b047 7 | 8 | 9 | [provide] 10 | dependency_names = libdpdk 11 | -------------------------------------------------------------------------------- /subprojects/fmt.wrap: -------------------------------------------------------------------------------- 1 | [wrap-file] 2 | directory = fmt-7.0.1 3 | 4 | source_url = https://github.com/fmtlib/fmt/archive/7.0.1.tar.gz 5 | source_filename = fmt-7.0.1.tar.gz 6 | source_hash = ac335a4ca6beaebec4ddb2bc35b9ae960b576f3b64a410ff2c379780f0cd4948 7 | 8 | patch_url = https://wrapdb.mesonbuild.com/v1/projects/fmt/7.0.1/1/get_zip 9 | patch_filename = fmt-7.0.1-1-wrap.zip 10 | patch_hash = 4fd7dc515c62ee2de1c8d7a3badce431174f29c02f36a06e257be5aa2d223592 11 | -------------------------------------------------------------------------------- /switch_src/01_control_plane/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # A simple Makefile for a program and its BfRt Control Plane 3 | # 4 | ifndef SDE_INSTALL 5 | $(error SDE_INSTALL is not set) 6 | endif 7 | 8 | PROG=p4db 9 | 10 | # 11 | # Final targets 12 | # 13 | all: $(PROG) 14 | 15 | # 16 | # Simple P4 Compilation rules 17 | # 18 | P4C=bf-p4c 19 | P4PPFLAGS= 20 | P4FLAGS=-g --verbose 2 --target=$(P4TARGET) 21 | 22 | P4TARGET=tofino 23 | P4DIR=%.$(P4TARGET) 24 | 25 | $(P4DIR) $(P4DIR)/bfrt.json $(P4DIR)/%.conf $(P4DIR)/pipe/context.json: %.p4 26 | $(P4C) $(P4PPFLAGS) $(P4FLAGS) $< 27 | 28 | p4: $(PROG).$(P4TARGET) 29 | 30 | # 31 | # C++ Compilation stuff 32 | # 33 | CPPFLAGS = -I$(SDE_INSTALL)/include -DSDE_INSTALL=\"$(SDE_INSTALL)\" \ 34 | #-DPROG_NAME=\"$(PROG)\" 35 | CXXFLAGS = -g -O3 -std=c++17 -Wall -Wextra -Werror -MMD -MF $@.d 36 | BF_LIBS = -L$(SDE_INSTALL)/lib -lbf_switchd_lib -ldriver -lbfutils -lbfsys 37 | LDLIBS = $(BF_LIBS) -lm -ldl -lpthread 38 | LDFLAGS = -Wl,-rpath,$(SDE_INSTALL)/lib 39 | 40 | DEPS := $(OBJS:.o=.o.d) 41 | 42 | .PHONY: p4 all clean 43 | 44 | clean: 45 | -@rm -rf $(PROG) *~ *.o *.d *.tofino *.tofino2 zlog-cfg-cur bf_drivers.log -------------------------------------------------------------------------------- /switch_src/02_codegen/__init__.py: -------------------------------------------------------------------------------- 1 | from .snippet import Snippet 2 | from .indent import indent 3 | from .util import * 4 | from .blocks import * -------------------------------------------------------------------------------- /switch_src/02_codegen/indent.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections.abc import Iterable 3 | 4 | 5 | def indent(snippet): 6 | if isinstance(snippet, Iterable): 7 | lines = list(itertools.chain.from_iterable(x.__str__().split('\n') for x in snippet)) 8 | else: 9 | lines = snippet.__str__().split('\n') 10 | 11 | indentation = 0 12 | in_params = False 13 | for i, line in enumerate(lines): 14 | line = line.strip() 15 | nc_line = line.split('//', 1)[0].strip() 16 | 17 | if re.search(r'{\s*}$', nc_line): 18 | pass 19 | elif re.search(r'^}', nc_line): 20 | indentation -= 1 21 | elif re.search(r'^#(?:else|endif)', nc_line): 22 | indentation -= 1 23 | elif re.search(r';$', nc_line) and in_params: 24 | in_params = False 25 | indentation -= 2 26 | 27 | newline = f'{" "*indentation}{line}' 28 | #print(f'{indentation:2} {newline}') 29 | 30 | if re.search(r'\($', nc_line): 31 | in_params = True 32 | indentation += 2 33 | elif re.search(r'\)\s*{$', nc_line) and in_params: 34 | in_params = False 35 | indentation -= 1 36 | elif re.search(r'{$', nc_line): 37 | indentation += 1 38 | elif re.search(r'^#(?:if|else)', nc_line): 39 | indentation += 1 40 | 41 | 42 | lines[i] = newline 43 | 44 | lines.append('') 45 | return '\n'.join(lines) -------------------------------------------------------------------------------- /switch_src/02_codegen/snippet.py: -------------------------------------------------------------------------------- 1 | import re 2 | import textwrap 3 | import types 4 | 5 | 6 | RE_FORMAT = re.compile(r'\${(?P\w+)(?:=(?P.+?)(?=}))?}') 7 | 8 | 9 | class Snippet: 10 | def __init__(self, **kwargs): 11 | code = self.__doc__.rstrip() 12 | self.__code = textwrap.dedent(code) 13 | for k, v in kwargs.items(): 14 | setattr(self, k, v) 15 | 16 | def __str__(self): 17 | start = 0 18 | end = len(self.__code) 19 | parts = [] 20 | 21 | for m in RE_FORMAT.finditer(self.__code): 22 | groups = m.groupdict() 23 | name = groups['name'] 24 | end = m.start() 25 | parts.append(self.__code[start:end]) 26 | 27 | if groups['default']: 28 | default = eval(groups['default']) 29 | attr = getattr(self, name, default) 30 | else: 31 | attr = getattr(self, name) 32 | 33 | if isinstance(attr, types.GeneratorType): 34 | variable = '\n'.join(map(str, attr)) 35 | else: 36 | variable = str(attr) 37 | 38 | parts.append(variable) 39 | start = m.end() 40 | end = len(self.__code) 41 | 42 | if start != len(self.__code): 43 | parts.append(self.__code[start:end]) 44 | 45 | return ''.join(parts) 46 | -------------------------------------------------------------------------------- /switch_src/02_codegen/util.py: -------------------------------------------------------------------------------- 1 | from .snippet import Snippet 2 | from .indent import indent 3 | 4 | 5 | class C(Snippet): 6 | def __init__(self, code, **kwargs): 7 | self.__doc__ = code 8 | super().__init__(**kwargs) 9 | 10 | 11 | def C_for(code, range, **kwargs): 12 | return '\n'.join(map(str, (C(code, i=i, **kwargs) for i in range))) 13 | 14 | 15 | 16 | def write(output, code): 17 | code = indent(code) 18 | output.write(code) 19 | return code.count('\n') + 1 20 | -------------------------------------------------------------------------------- /switch_src/03_tests/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore all 2 | * 3 | 4 | # Unignore all with extensions 5 | !*.* 6 | 7 | # Unignore all dirs 8 | !*/ 9 | 10 | 11 | *.o 12 | *.d 13 | -------------------------------------------------------------------------------- /switch_src/03_tests/Makefile: -------------------------------------------------------------------------------- 1 | CXX=g++ 2 | CFLAGS=-std=c++2a -O0 -g -MD -Wall -Wextra -fmax-errors=1 -I. -I../../src/ -fsanitize=address 3 | # CFLAGS=-std=c++2a -O3 -g -MD -Wall -Wextra -fmax-errors=1 -I. -I../../src/ 4 | LIBS=-pthread 5 | 6 | 7 | BINS = micro_recirc lock_manager ycsb smallbank tpcc 8 | 9 | SRCS = ../../src/db/hex_dump.cpp 10 | OBJS := $(notdir $(SRCS:.cpp=.o)) 11 | 12 | 13 | 14 | all: $(BINS) 15 | 16 | 17 | $(OBJS): $(SRCS) #$(DEPS) 18 | $(CXX) -c -o $@ $< $(CFLAGS) 19 | 20 | 21 | %.o: %.cpp #$(DEPS) 22 | $(CXX) -c -o $@ $< $(CFLAGS) 23 | 24 | 25 | 26 | $(BINS): %: %.o $(OBJS) 27 | $(CXX) -o $@ $^ $(CFLAGS) $(LIBS) 28 | 29 | 30 | .PHONY: clean 31 | clean: 32 | rm -f *.o *.d *~ $(BINS) 33 | 34 | -include $(OBJS:.o=.d) 35 | -------------------------------------------------------------------------------- /switch_src/03_tests/micro_recirc.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmarks/micro_recirc/switch.hpp" 2 | #include "comm/bigendian.hpp" 3 | #include "comm/msg.hpp" 4 | #include "db/buffers.hpp" 5 | #include "db/hex_dump.hpp" 6 | #include "network_interface.hpp" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | 15 | struct pkt_t { 16 | eth_hdr_t eth; 17 | #pragma GCC diagnostic push 18 | #pragma GCC diagnostic ignored "-Wpedantic" 19 | msg::Header msg[0]; 20 | #pragma GCC diagnostic pop 21 | } /*__attribute__((packed))*/; 22 | 23 | 24 | NetworkInterface net{"enp1s0f1"}; 25 | 26 | template 27 | void send_recv(BW_CB&& bw_cb, BR_CB&& br_cb) { 28 | auto pkt = reinterpret_cast(new uint8_t[1500]{}); 29 | pkt->eth.type = ETHER_TYPE; 30 | net.set_src_mac(*pkt); 31 | pkt->eth.dst = {0x1B, 0xAD, 0xC0, 0xDE, 0xBA, 0xBE}; 32 | // std::memcpy(&pkt->eth.dst, &pkt->eth.src, ETH_ALEN); 33 | 34 | auto txn = new (pkt->msg) msg::SwitchTxn(); 35 | BufferWriter bw{txn->data}; 36 | 37 | bw_cb(bw); // fill pkt 38 | 39 | uint16_t len = static_cast(sizeof(pkt_t) + msg::SwitchTxn::size(bw.size)); 40 | std::cout << "sent packet: " << len << '\n'; 41 | hex_dump(std::cerr, reinterpret_cast(pkt), len); 42 | net.send_pkt(*pkt, len); 43 | delete[] pkt; 44 | 45 | net.recv_pkt([&](const auto& pkt, int len) { 46 | std::cout << "received: " << len << '\n'; 47 | hex_dump(std::cerr, reinterpret_cast(&pkt), len); 48 | 49 | auto txn = reinterpret_cast(pkt.msg); 50 | BufferReader br{const_cast(txn->data)}; 51 | 52 | br_cb(br); 53 | }); 54 | } 55 | 56 | 57 | int main(int argc, char** argv) { 58 | (void)argc; 59 | (void)argv; 60 | 61 | if (argc < 2) { 62 | throw std::invalid_argument("./main <#recircs>"); 63 | } 64 | 65 | 66 | using namespace benchmark::micro_recirc; 67 | 68 | 69 | const uint32_t recircs = std::stoi(argv[1]); 70 | 71 | send_recv([&](auto& bw) { 72 | auto info = bw.write(info_t{}); 73 | bw.write(recirc_t{}); 74 | 75 | for (uint32_t i = 0; i < recircs; ++i) { 76 | auto instr = bw.write(recirc_t{}); 77 | instr->type.set_stop(true); 78 | } 79 | if (recircs > 0) { 80 | info->multipass = 1; 81 | info->locks = lock_t{1, 1}; 82 | } 83 | 84 | bw.write(InstrType_t::STOP()); }, [&](auto& br) { 85 | auto info = br.template read(); 86 | std::cout << *info << '\n'; 87 | for (uint32_t i = 0; i == 0 || i < recircs; ++i) { 88 | auto recirc = br.template read(); 89 | std::cout << *recirc << '\n'; 90 | } }); 91 | 92 | 93 | return 0; 94 | } -------------------------------------------------------------------------------- /switch_src/lock_manager/codegen: -------------------------------------------------------------------------------- 1 | ../02_codegen/ -------------------------------------------------------------------------------- /switch_src/smallbank/codegen/__init__.py: -------------------------------------------------------------------------------- 1 | from .snippet import Snippet 2 | from .indent import indent 3 | from .util import * 4 | from .blocks import * -------------------------------------------------------------------------------- /switch_src/smallbank/codegen/blocks.py: -------------------------------------------------------------------------------- 1 | from .snippet import Snippet 2 | 3 | 4 | class Register(Snippet): 5 | '''\ 6 | Register<${type='bit<32>'}, ${idx_type='bit<32>'}>(${size}, ${default_val=0}) ${name}; 7 | ''' 8 | 9 | 10 | class RegisterAction(Snippet): 11 | '''\ 12 | RegisterAction<${in_type='bit<32>'}, ${idx_type='bit<32>'}, ${out_type='bit<32>'}>(${reg_name}) ${name} = { 13 | void apply(inout ${in_type='bit<32>'} value, out ${out_type='bit<32>'} rv) { 14 | ${body} 15 | } 16 | }; 17 | ''' 18 | 19 | 20 | class Utils(Snippet): 21 | '''\ 22 | parser TofinoIngressParser( 23 | packet_in pkt, 24 | out ingress_intrinsic_metadata_t ig_intr_md) { 25 | state start { 26 | pkt.extract(ig_intr_md); 27 | transition select(ig_intr_md.resubmit_flag) { 28 | 1 : parse_resubmit; 29 | 0 : parse_port_metadata; 30 | } 31 | } 32 | 33 | state parse_resubmit { 34 | // Parse resubmitted packet here. 35 | transition reject; 36 | } 37 | 38 | state parse_port_metadata { 39 | pkt.advance(PORT_METADATA_SIZE); 40 | transition accept; 41 | } 42 | } 43 | 44 | 45 | parser TofinoEgressParser( 46 | packet_in pkt, 47 | out egress_intrinsic_metadata_t eg_intr_md) { 48 | state start { 49 | pkt.extract(eg_intr_md); 50 | transition accept; 51 | } 52 | } 53 | ''' 54 | 55 | 56 | class P4Program(Snippet): 57 | '''\ 58 | #include 59 | 60 | #if __TARGET_TOFINO__ == 2 61 | #include 62 | #else 63 | #include 64 | #endif 65 | 66 | /************************************************************/ 67 | // HEADERS 68 | /************************************************************/ 69 | ${ingress_headers} 70 | 71 | /************************************************************/ 72 | // UTILS 73 | /************************************************************/ 74 | ${utils} 75 | 76 | 77 | /************************************************************/ 78 | // PARSER 79 | /************************************************************/ 80 | ${ingress_parser} 81 | 82 | 83 | /************************************************************/ 84 | // INGRESS 85 | /************************************************************/ 86 | ${ingress} 87 | 88 | 89 | /************************************************************/ 90 | // EGRESS HEADERS 91 | /************************************************************/ 92 | ${egress_headers} 93 | 94 | /************************************************************/ 95 | // EGRESS PARSER 96 | /************************************************************/ 97 | ${egress_parser} 98 | 99 | /************************************************************/ 100 | // EGRESS 101 | /************************************************************/ 102 | ${egress} 103 | 104 | 105 | Pipeline( 106 | IngressParser(), 107 | Ingress(), 108 | IngressDeparser(), 109 | EgressParser(), 110 | Egress(), 111 | EgressDeparser() 112 | ) pipe; 113 | 114 | Switch(pipe) main; 115 | 116 | ''' 117 | -------------------------------------------------------------------------------- /switch_src/smallbank/codegen/indent.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections.abc import Iterable 3 | 4 | 5 | def indent(snippet): 6 | if isinstance(snippet, Iterable): 7 | lines = list(itertools.chain.from_iterable( 8 | x.__str__().split('\n') for x in snippet)) 9 | else: 10 | lines = snippet.__str__().split('\n') 11 | 12 | indentation = 0 13 | in_params = False 14 | for i, line in enumerate(lines): 15 | line = line.strip() 16 | nc_line = line.split('//', 1)[0].strip() 17 | 18 | if re.search(r'{\s*}$', nc_line): 19 | pass 20 | elif re.search(r'^}', nc_line): 21 | indentation -= 1 22 | elif re.search(r'^#(?:else|endif)', nc_line): 23 | indentation -= 1 24 | elif re.search(r';$', nc_line) and in_params: 25 | in_params = False 26 | indentation -= 2 27 | 28 | newline = f'{" "*indentation}{line}' 29 | #print(f'{indentation:2} {newline}') 30 | 31 | if re.search(r'\($', nc_line): 32 | in_params = True 33 | indentation += 2 34 | elif re.search(r'\)\s*{$', nc_line) and in_params: 35 | in_params = False 36 | indentation -= 1 37 | elif re.search(r'{$', nc_line): 38 | indentation += 1 39 | elif re.search(r'^#(?:if|else)', nc_line): 40 | indentation += 1 41 | 42 | lines[i] = newline 43 | 44 | lines.append('') 45 | return '\n'.join(lines) 46 | -------------------------------------------------------------------------------- /switch_src/smallbank/codegen/snippet.py: -------------------------------------------------------------------------------- 1 | import re 2 | import textwrap 3 | import types 4 | 5 | 6 | RE_FORMAT = re.compile(r'\${(?P\w+)(?:=(?P.+?)(?=}))?}') 7 | 8 | 9 | class Snippet: 10 | def __init__(self, **kwargs): 11 | code = self.__doc__.rstrip() 12 | self.__code = textwrap.dedent(code) 13 | for k, v in kwargs.items(): 14 | setattr(self, k, v) 15 | 16 | def __str__(self): 17 | start = 0 18 | end = len(self.__code) 19 | parts = [] 20 | 21 | for m in RE_FORMAT.finditer(self.__code): 22 | groups = m.groupdict() 23 | name = groups['name'] 24 | end = m.start() 25 | parts.append(self.__code[start:end]) 26 | 27 | if groups['default']: 28 | default = eval(groups['default']) 29 | attr = getattr(self, name, default) 30 | else: 31 | attr = getattr(self, name) 32 | 33 | if isinstance(attr, types.GeneratorType): 34 | variable = '\n'.join(map(str, attr)) 35 | else: 36 | variable = str(attr) 37 | 38 | parts.append(variable) 39 | start = m.end() 40 | end = len(self.__code) 41 | 42 | if start != len(self.__code): 43 | parts.append(self.__code[start:end]) 44 | 45 | return ''.join(parts) 46 | -------------------------------------------------------------------------------- /switch_src/smallbank/codegen/util.py: -------------------------------------------------------------------------------- 1 | from .snippet import Snippet 2 | from .indent import indent 3 | 4 | 5 | class C(Snippet): 6 | def __init__(self, code, **kwargs): 7 | self.__doc__ = code 8 | super().__init__(**kwargs) 9 | 10 | 11 | def C_for(code, range, **kwargs): 12 | return '\n'.join(map(str, (C(code, i=i, **kwargs) for i in range))) 13 | 14 | 15 | def write(output, code): 16 | code = indent(code) 17 | output.write(code) 18 | return code.count('\n') + 1 19 | -------------------------------------------------------------------------------- /switch_src/tpcc/codegen: -------------------------------------------------------------------------------- 1 | ../02_codegen/ -------------------------------------------------------------------------------- /switch_src/ycsb/codegen: -------------------------------------------------------------------------------- 1 | ../02_codegen/ -------------------------------------------------------------------------------- /switch_src/ycsb_slow/codegen: -------------------------------------------------------------------------------- 1 | ../02_codegen/ -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore all 2 | * 3 | 4 | # Unignore all with extensions 5 | !*.* 6 | 7 | # Unignore all dirs 8 | !*/ 9 | 10 | # Ignore all csv 11 | *.csv 12 | 13 | venv/ 14 | -------------------------------------------------------------------------------- /tests/Makefile: -------------------------------------------------------------------------------- 1 | CXX=g++ 2 | CFLAGS=-std=c++2a -O0 -g -MD -Wall -Wextra -fmax-errors=1 -I. -I../src/ -fsanitize=address 3 | # CFLAGS=-std=c++2a -O3 -g -MD -Wall -Wextra -fmax-errors=1 -I. -I../src 4 | LIBS=-pthread 5 | 6 | 7 | OBJS = #main.o 8 | 9 | 10 | BINS = access_dist nolock_test nurand shared_lock tpcc_neworder zipf 11 | 12 | 13 | all: $(BINS) 14 | 15 | 16 | %.o: %.cpp #$(DEPS) 17 | $(CXX) -c -o $@ $< $(CFLAGS) 18 | 19 | # main: $(OBJS) 20 | # $(CXX) -o $@ $^ $(CFLAGS) $(LIBS) 21 | 22 | 23 | $(BINS): %: %.o $(OBJS) 24 | $(CXX) -o $@ $^ $(CFLAGS) $(LIBS) 25 | 26 | 27 | .PHONY: clean 28 | clean: 29 | rm -f *.o *.d *~ $(BINS) 30 | 31 | -include $(OBJS:.o=.d) 32 | -------------------------------------------------------------------------------- /tests/nurand.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmarks/tpcc/random.hpp" 2 | #include "db/defs.hpp" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | // g++ -std=c++20 -g -O3 -march=native -pthread -I../src nurand.cpp -o nurand && ./nurand 9 | 10 | int main(void) { 11 | using namespace benchmark::tpcc; 12 | 13 | TPCCRandom rnd(0); 14 | std::array, NUM_ITEMS> cntr{}; 15 | for (size_t i = 0; auto& e : cntr) { 16 | e.first = i++; 17 | } 18 | 19 | constexpr uint64_t TOTAL = 10'000'000; 20 | 21 | for (size_t i = 0; i < TOTAL; ++i) { 22 | auto val = rnd.NURand(1023, 1, CUSTOMER_PER_DISTRICT) - 1; 23 | //auto val = rnd.NURand(8191, 1, NUM_ITEMS)-1; 24 | auto& e = cntr[val]; 25 | e.second++; 26 | } 27 | 28 | std::sort(cntr.begin(), cntr.end(), [](const auto& a, const auto& b) { 29 | return a.second > b.second; 30 | }); 31 | for (size_t i = 0; auto& e : cntr) { 32 | e.first = i++; 33 | } 34 | 35 | std::cout << "index,hits,percent\n"; 36 | uint64_t sum = 0; 37 | for (size_t i = 0; auto& e : cntr) { 38 | sum += e.second; 39 | std::cout << e.first << ',' << e.second << ',' << (double)sum * 100 / TOTAL << '\n'; 40 | if (i++ < 32) { 41 | std::cerr << e.second << " --> " << (double)sum * 100 / TOTAL << '\n'; 42 | } 43 | } 44 | 45 | return 0; 46 | } -------------------------------------------------------------------------------- /tests/pcap_timediff.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import dpkt 5 | import pathlib 6 | 7 | 8 | def pairs(iterable): 9 | a = iter(iterable) 10 | return zip(a, a) 11 | 12 | 13 | 14 | def avg_rtt(filename): 15 | f = open(filename, 'rb') 16 | pcap = dpkt.pcap.Reader(f) 17 | 18 | timestamps = [] 19 | for (req_ts, req_buf), (res_ts, res_buf) in pairs(pcap): 20 | req_eth = dpkt.ethernet.Ethernet(req_buf) 21 | res_eth = dpkt.ethernet.Ethernet(res_buf) 22 | 23 | assert(req_eth.type == 0x1000 and res_eth.type == 0x1000) 24 | assert(req_eth.src == res_eth.dst) 25 | assert(req_eth.dst == res_eth.src) 26 | 27 | ts_diff = (res_ts - req_ts) * 1000000 28 | 29 | assert(req_buf[-8:] == res_buf[-8:]) 30 | 31 | timestamps.append(ts_diff) 32 | 33 | avg_us = sum(timestamps) / len(timestamps) 34 | return min(timestamps), avg_us, max(timestamps) 35 | 36 | 37 | 38 | def avg_rtt2(filename): 39 | f = open(filename, 'rb') 40 | pcap = dpkt.pcap.Reader(f) 41 | 42 | timestamps = [0] * 101 43 | for ts, buf in pcap: 44 | eth = dpkt.ethernet.Ethernet(buf) 45 | 46 | txn_id = int.from_bytes(buf[-8:], byteorder='little') - 1 47 | ts *= 1000000 48 | 49 | if eth.src == b'\xac\x1fkAe\x0b': 50 | if txn_id <= 100: 51 | timestamps[txn_id] = ts 52 | else: 53 | # assert(ts >= timestamps[txn_id]) 54 | timestamps[txn_id] = ts - timestamps[txn_id] 55 | 56 | if txn_id == 100: 57 | break 58 | # assert(req_eth.type == 0x1000 and res_eth.type == 0x1000) 59 | # assert(req_eth.src == res_eth.dst) 60 | # assert(req_eth.dst == res_eth.src) 61 | 62 | # ts_diff = (res_ts - req_ts) * 1000000 63 | 64 | # assert(req_buf[-8:] == res_buf[-8:]) 65 | 66 | # timestamps.append(ts_diff) 67 | 68 | print(timestamps) 69 | 70 | timestamps = list(ts for ts in timestamps if ts != -1) 71 | print(len(timestamps)) 72 | avg_us = sum(timestamps) / len(timestamps) 73 | return min(timestamps), avg_us, max(timestamps) 74 | 75 | 76 | initial_us = None 77 | for filename in sys.argv[1:]: 78 | min_us, avg_us, max_us = avg_rtt(filename) 79 | 80 | if initial_us is None: 81 | initial_us = avg_us 82 | suffix = '' 83 | else: 84 | increase = (avg_us - initial_us) / initial_us 85 | suffix = f'{increase:+.3%}' 86 | 87 | print(f'{pathlib.Path(filename).name:16} Min RTT: {min_us:.4f}µs Avg RTT: {avg_us:.4f}µs Max RTT: {max_us:.4f}µs {suffix:>16}') -------------------------------------------------------------------------------- /tests/pcap_timing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import json 5 | import struct 6 | from collections import defaultdict 7 | 8 | 9 | msg_type = { 10 | 0x0000: 'HELLO', 11 | 0x0001: 'SHUTDOWN', 12 | 0x1000: 'LOCK_REQUEST', 13 | 0x1001: 'LOCK_RELEASE', 14 | 0x1002: 'VOTE_REQUEST', 15 | 0x1003: 'TX_END', 16 | 0x1004: 'TX_ABORT', 17 | 0x2000: 'LOCK_GRANT', 18 | 0x2001: 'VOTE_RESPONSE', 19 | 0x2002: 'TRANSACTION_END_RESPONSE' 20 | } 21 | 22 | 23 | class P4DBPacket: 24 | def __init__(self, ts, data): 25 | self.timestamp = float(ts) 26 | self.__data = data 27 | 28 | def __getattr__(self, key): 29 | val = self.__data.get(f'p4db.{key}') 30 | if key == 'msgtype': 31 | return msg_type.get(int(val), 'UNKOWN_TYPE') 32 | return val 33 | 34 | 35 | class Analyzer: 36 | def __init__(self): 37 | self.pairs = {} 38 | self.avg = [] 39 | self.min = 0xffffffff 40 | self.max = 0 41 | self.types = defaultdict(int) 42 | 43 | def process_pkt(self, packet): 44 | # if packet.msgtype == 'LOCK_REQUEST': 45 | # key = (packet.transactionNumber, packet.lockId) 46 | # self.pairs[key] = packet.timestamp 47 | # elif packet.msgtype == 'LOCK_GRANT': 48 | # key = (packet.transactionNumber, packet.lockId) 49 | # rtt = packet.timestamp - self.pairs[key] 50 | # del self.pairs[key] 51 | 52 | # rtt *= 1e6 53 | # # print(f'rtt={rtt:8.2f} µs') 54 | # self.min = min(self.min, rtt) 55 | # self.max = max(self.max, rtt) 56 | # self.avg.append(rtt) 57 | 58 | # if len(self.avg) == 1024: 59 | # avg_rtt = sum(self.avg) / len(self.avg) 60 | # print(f'LockReq<->LockGrant RTT min={self.min:8.2f}µs max={self.max:8.2f}µs avg={avg_rtt:8.2f}µs') 61 | # self.avg = [] 62 | # self.min = 0xffffffff 63 | # self.max = 0 64 | 65 | self.types[packet.msgtype] += 1 66 | 67 | def __del__(self): 68 | for k, v in self.types.items(): 69 | print(f'{k} --> {v} times') 70 | 71 | # tshark -X lua_script:/p4db/dissector.lua -r ~/dump.pcap -T json | python pcap_timing.p 72 | 73 | 74 | if __name__ == '__main__': 75 | analyzer = Analyzer() 76 | 77 | START = ' {' 78 | END = ' },' 79 | obj = [] 80 | for line in sys.stdin: 81 | line = line.rstrip() 82 | if line == START: 83 | obj = [] 84 | obj.append(line) 85 | if line == END: 86 | packet = json.loads(''.join(obj)[:-1]) 87 | p4db_pkt = P4DBPacket(packet['_source']['layers']['frame'] 88 | ['frame.time_epoch'], packet['_source']['layers']['p4db']) 89 | analyzer.process_pkt(p4db_pkt) 90 | -------------------------------------------------------------------------------- /tests/remote_prob.py: -------------------------------------------------------------------------------- 1 | s = ''' 2 | local_read_lock_failed=115 3 | local_write_lock_failed=263 4 | local_lock_success=14398807 5 | local_lock_waiting=340 6 | remote_lock_failed=33 7 | remote_lock_success=1600181 8 | remote_lock_waiting=48 9 | ''' 10 | 11 | 12 | d = {x[0]: int(x[1]) 13 | for x in map(lambda x: x.split('='), s.strip().split('\n'))} 14 | 15 | local = d['local_read_lock_failed'] + \ 16 | d['local_write_lock_failed'] + d['local_lock_success'] 17 | remote = d['remote_lock_failed'] + d['remote_lock_success'] 18 | 19 | print(local + remote) 20 | print(f'remote_prob={remote/(local+remote):0.4f}') 21 | -------------------------------------------------------------------------------- /tests/shared_lock.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | struct SharedLock { 10 | static constexpr uint32_t EX = 0xffffffff; 11 | static constexpr uint32_t EX_PENDING_MASK = 0x80000000; 12 | 13 | std::atomic state; 14 | 15 | void lock() { 16 | uint32_t val = 0; // try fast-path with 0, then mark as write-pending 17 | while (!state.compare_exchange_weak(val, EX, std::memory_order_acquire)) { 18 | state.fetch_or(EX_PENDING_MASK, std::memory_order_relaxed); 19 | __asm volatile ("pause" ::: ); 20 | val = EX_PENDING_MASK; 21 | } 22 | } 23 | 24 | void unlock() { 25 | state.store(0, std::memory_order_release); 26 | } 27 | 28 | void lock_shared() { 29 | uint32_t val; 30 | do { 31 | // wait for no pending writes and retrive current reference count 32 | while ((val = state.load(std::memory_order_relaxed)) & EX_PENDING_MASK) { 33 | __asm volatile ("pause" ::: ); 34 | } 35 | } while (!state.compare_exchange_weak(val, val+1, std::memory_order_acquire)); 36 | } 37 | 38 | 39 | void unlock_shared() { 40 | state.fetch_sub(1, std::memory_order_release); 41 | } 42 | }; 43 | 44 | 45 | 46 | 47 | int main(int argc, char **argv) { 48 | (void) argc; (void) argv; 49 | 50 | // g++ -std=c++20 -g -O3 -march=native -pthread shared_lock.cpp -o shared_lock && ./shared_lock 51 | 52 | constexpr size_t num_readers = 2; 53 | constexpr size_t num_writers = 1; 54 | 55 | SharedLock lock; 56 | // std::shared_mutex lock; 57 | bool stop = false; 58 | pthread_barrier_t barrier; 59 | pthread_barrier_init(&barrier, NULL, num_readers+num_writers+1); 60 | 61 | std::atomic reads{0}; 62 | std::thread readers[num_readers]; 63 | for (size_t i = 0; i < num_readers; i++) { 64 | readers[i] = std::thread([&]() { 65 | pthread_barrier_wait(&barrier); 66 | while (!stop) { 67 | lock.lock_shared(); 68 | // if (reads % 100000 == 0) 69 | // std::cout << "reader lock " + std::to_string(lock.state.load() & ~lock.EX_PENDING_MASK) + '\n'; 70 | ++reads; 71 | lock.unlock_shared(); 72 | } 73 | }); 74 | } 75 | 76 | std::atomic writes{0}; 77 | std::thread writers[num_writers]; 78 | for (size_t i = 0; i < num_writers; i++) { 79 | writers[i] = std::thread([&]() { 80 | pthread_barrier_wait(&barrier); 81 | while(!stop) { 82 | lock.lock(); 83 | // std::cout << "writer lock\n"; 84 | ++writes; 85 | lock.unlock(); 86 | } 87 | }); 88 | } 89 | 90 | pthread_barrier_wait(&barrier); 91 | std::cout << "Press [ENTER] to stop.\n"; 92 | std::cin.get(); 93 | stop = true; 94 | 95 | for (size_t i = 0; i < num_readers; i++) { 96 | readers[i].join(); 97 | } 98 | for (size_t i = 0; i < num_writers; i++) { 99 | writers[i].join(); 100 | } 101 | 102 | std::cout << "writes=" << writes << " reads=" << reads << " of that fraction " << ((double) writes) / (writes+reads) << " writes" << '\n'; 103 | 104 | return 0; 105 | } 106 | -------------------------------------------------------------------------------- /tests/switch_lock.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | ''' 4 | I need to implement something equivalent on the P4-Switch using only 5 | 6 | (register ± header/metadata ± constant) >/ 0 or LOCK[1] > 0: 41 | return True 42 | return False 43 | 44 | 45 | LOCK = [0, 0] 46 | assert(not is_locked()) 47 | LOCK = [0, 0] 48 | assert(try_lock(0, 1)) 49 | assert(not try_lock(0, 1)) 50 | assert(not try_lock(1, 1)) 51 | assert(is_locked()) 52 | 53 | LOCK = [0, 0] 54 | assert(try_lock(1, 0)) 55 | assert(not try_lock(1, 0)) 56 | assert(not try_lock(1, 1)) 57 | assert(is_locked()) 58 | 59 | LOCK = [0, 0] 60 | assert(try_lock(1, 1)) 61 | assert(not try_lock(0, 1)) 62 | assert(not try_lock(1, 0)) 63 | assert(not try_lock(1, 1)) 64 | 65 | assert(is_locked()) 66 | 67 | 68 | ''' 69 | struct pair { 70 | bit<32> left; 71 | bit<32> right; 72 | } 73 | 74 | Register>(1) switch_lock; 75 | RegisterAction, bit<1>>(switch_lock) try_lock = { 76 | void apply(inout pair value, out bit<1> rv) { 77 | if ((hdr.info.left + value.left) == 2) { 78 | rv = 0; 79 | } else if ((hdr.info.right + value.right) == 2) { 80 | rv = 0; 81 | } else { 82 | rv = 1; 83 | value.left = value.left + hdr.info.left; 84 | value.right = value.right + hdr.info.right; 85 | } 86 | } 87 | }; 88 | RegisterAction, bit<1>>(switch_lock) unlock = { 89 | void apply(inout pair value, out bit<1> rv) { 90 | value.left = value.left - hdr.info.left; 91 | value.right = value.right - hdr.info.right; 92 | } 93 | }; 94 | RegisterAction, bit<1>>(switch_lock) is_locked = { 95 | void apply(inout pair value, out bit<1> rv) { 96 | if (value.left > 0 || value.right > 0) { 97 | rv = 1; 98 | } else { 99 | rv = 0; 100 | } 101 | } 102 | }; 103 | ''' 104 | -------------------------------------------------------------------------------- /tests/tpcc_neworder.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmarks/tpcc/random.hpp" 2 | #include "benchmarks/tpcc/utils.hpp" 3 | #include "db/defs.hpp" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // g++ -std=c++20 -g -O3 -march=native -pthread -I../src tpcc_neworder.cpp -o tpcc_neworder && ./tpcc_neworder 11 | 12 | 13 | int main(void) { 14 | using namespace benchmark::tpcc; 15 | 16 | 17 | constexpr uint64_t SAMPLES = 5'000'000; 18 | 19 | 20 | uint64_t num_warehouses = 8; 21 | TPCCHotInfo hot_info{num_warehouses, 10 * (65536 / 2 + 32768 / 4)}; 22 | 23 | { 24 | for (uint64_t w_id = 0; w_id < num_warehouses; ++w_id) { 25 | TPCCRandom rnd(w_id); 26 | uint64_t hot = 0; 27 | for (uint64_t i = 0; i < SAMPLES; ++i) { 28 | auto val = rnd.NURand(8191, 1, NUM_ITEMS) - 1; 29 | hot += hot_info.is_hot(w_id, val); 30 | } 31 | std::cout << "w_id: " << w_id << " hot: " << (double)hot * 100 / SAMPLES << '\n'; 32 | } 33 | } 34 | 35 | TPCCRandom rnd(0); 36 | for (uint64_t i = 0; i < 20; ++i) { 37 | uint64_t hot = 0; 38 | for (uint64_t j = 0; j < 10; ++j) { 39 | auto val = rnd.NURand(8191, 1, NUM_ITEMS) - 1; 40 | hot += hot_info.is_hot(0, val); 41 | } 42 | std::cout << hot << '\n'; 43 | } 44 | 45 | 46 | return 0; 47 | } -------------------------------------------------------------------------------- /tests/wireshark_dissector.lua: -------------------------------------------------------------------------------- 1 | local p4db_protocol = Proto("P4DB", "P4DB Protocol") 2 | 3 | local msg_type = ProtoField.int32("p4db.msgtype", "MessageType", base.DEC) 4 | local msg_size = ProtoField.uint32("p4db.size", "Size", base.DEC) 5 | local clientGlobalRank = ProtoField.uint32("p4db.clientGlobalRank", "clientGlobalRank", base.DEC) 6 | local transactionNumber = ProtoField.uint64("p4db.transactionNumber", "transactionNumber", base.DEC) 7 | 8 | -- LockGrant fields 9 | local lockId = ProtoField.uint64("p4db.lockId", "lockId", base.DEC) 10 | local mode = ProtoField.int32("p4db.lockMode", "lockMode", base.DEC) 11 | local granted = ProtoField.int32("p4db.granted", "granted", base.DEC) 12 | local expiryTime = ProtoField.uint64("p4db.expiryTime", "expiryTime", base.DEC) 13 | 14 | 15 | p4db_protocol.fields = { 16 | msg_type, 17 | msg_size, 18 | clientGlobalRank, 19 | transactionNumber, 20 | 21 | lockId, 22 | mode, 23 | granted, 24 | } 25 | 26 | 27 | function p4db_protocol.dissector(buffer, pinfo, tree) 28 | length = buffer:len() 29 | if length == 0 then return end 30 | 31 | pinfo.cols.protocol = p4db_protocol.name 32 | 33 | local subtree = tree:add(p4db_protocol, buffer(), "P4DB Protocol Data") 34 | 35 | local type = buffer(0,4):le_int() 36 | local type_name = get_msg_type(type) 37 | subtree:add_le(msg_type, buffer(0,4)):append_text(" (" .. type_name .. ")") 38 | 39 | subtree:add_le(msg_size, buffer(4,4)) 40 | subtree:add_le(clientGlobalRank, buffer(8,4)) 41 | subtree:add_le(transactionNumber, buffer(16,8)) 42 | 43 | if type_name == 'LOCK_GRANT' then 44 | subtree:add_le(lockId, buffer(24,8)) 45 | subtree:add_le(mode, buffer(32,4)) 46 | subtree:add_le(granted, buffer(36,4)) 47 | elseif type_name == 'LOCK_REQUEST' then 48 | subtree:add_le(lockId, buffer(24,8)) 49 | subtree:add_le(mode, buffer(32,4)) 50 | end 51 | end 52 | 53 | 54 | function get_msg_type(type) 55 | local name = "Unknown" 56 | 57 | if type == 0x0000 then name = "HELLO" 58 | elseif type == 0x0001 then name = "SHUTDOWN" 59 | elseif type == 0x1000 then name = "LOCK_REQUEST" 60 | elseif type == 0x1001 then name = "LOCK_RELEASE" 61 | elseif type == 0x1002 then name = "VOTE_REQUEST" 62 | elseif type == 0x1003 then name = "TX_END" 63 | elseif type == 0x1004 then name = "TX_ABORT" 64 | elseif type == 0x2000 then name = "LOCK_GRANT" 65 | elseif type == 0x2001 then name = "VOTE_RESPONSE" 66 | elseif type == 0x2002 then name = "TRANSACTION_END_RESPONSE" end 67 | 68 | return name 69 | end 70 | 71 | 72 | 73 | local eth_type = DissectorTable.get("ethertype") 74 | eth_type:add(0x1001, p4db_protocol) 75 | 76 | 77 | local udp = DissectorTable.get("udp.port") 78 | udp:add(4000, p4db_protocol) 79 | 80 | 81 | --------------------------------------------------------------------------------