├── .ci └── check-format.sh ├── .clang-format ├── .github └── workflows │ └── main.yml ├── .gitignore ├── .gitmodules ├── LICENSE ├── Makefile ├── README.md ├── assets ├── demo.gif ├── light-demo.gif ├── lwan-demo.gif └── syscall-flow.png ├── configs ├── lighttpd.conf ├── lwan.conf └── nginx.conf ├── downloads └── .placeholder ├── include └── linux │ └── esca.h ├── lkm ├── Makefile └── esca.c ├── patches ├── lighttpd.patch ├── lwan_main.patch ├── lwan_thread.patch ├── nginx_module.patch └── nginx_process.patch ├── scripts ├── lighttpd.sh ├── lwan.sh └── ngx.sh └── wrapper ├── Makefile ├── lighttpd-preload.c ├── lwan-preload.c ├── nginx-preload.c ├── preload.h ├── shim.c ├── shim.h └── wrapper.c /.ci/check-format.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | SOURCES=$(find $(git rev-parse --show-toplevel) | egrep "\.(cpp|h)\$") 4 | 5 | set -x 6 | 7 | for file in ${SOURCES}; 8 | do 9 | clang-format-12 ${file} > expected-format 10 | diff -u -p --label="${file}" --label="expected coding style" ${file} expected-format 11 | done 12 | exit $(clang-format-12 --output-replacements-xml ${SOURCES} | egrep -c "") 13 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Chromium 2 | Language: Cpp 3 | MaxEmptyLinesToKeep: 3 4 | IndentCaseLabels: false 5 | AllowShortIfStatementsOnASingleLine: false 6 | AllowShortCaseLabelsOnASingleLine: false 7 | AllowShortLoopsOnASingleLine: false 8 | DerivePointerAlignment: false 9 | PointerAlignment: Right 10 | SpaceAfterCStyleCast: true 11 | TabWidth: 4 12 | UseTab: Never 13 | IndentWidth: 4 14 | BreakBeforeBraces: Linux 15 | AccessModifierOffset: -4 16 | ForEachMacros: 17 | - foreach 18 | - Q_FOREACH 19 | - BOOST_FOREACH 20 | - list_for_each 21 | - list_for_each_safe 22 | - list_for_each_entry 23 | - list_for_each_entry_safe 24 | - hlist_for_each_entry 25 | - rb_list_foreach 26 | - rb_list_foreach_safe 27 | - EV_FOREACH 28 | - LIST_FOREACH 29 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Automated Tests 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | ESCA: 7 | runs-on: ubuntu-20.04 8 | steps: 9 | - uses: actions/checkout@v3.0.2 10 | - name: make-nginx 11 | run: make TARGET=nginx 12 | - name: make-lighttpd 13 | run: make TARGET=lighttpd 14 | 15 | Style: 16 | runs-on: ubuntu-20.04 17 | steps: 18 | - name: checkout code 19 | uses: actions/checkout@v3.0.2 20 | - name: style check 21 | run: | 22 | sudo apt-get install -q -y clang-format-12 23 | sh .ci/check-format.sh 24 | shell: bash 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | 10 | # Linker output 11 | *.ilk 12 | *.map 13 | *.exp 14 | 15 | # Precompiled Headers 16 | *.gch 17 | *.pch 18 | 19 | # Libraries 20 | *.lib 21 | *.a 22 | *.la 23 | *.lo 24 | 25 | # Shared objects (inc. Windows DLLs) 26 | *.dll 27 | *.so 28 | *.so.* 29 | *.dylib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | *.i*86 36 | *.x86_64 37 | *.hex 38 | 39 | # Debug files 40 | *.dSYM/ 41 | *.su 42 | *.idb 43 | *.pdb 44 | 45 | # Kernel Module Compile Results 46 | .cache.mk 47 | *.mod* 48 | *.cmd 49 | .tmp_versions/ 50 | modules.order 51 | Module.symvers 52 | Mkfile.old 53 | dkms.conf 54 | 55 | # Generated files 56 | downloads/ 57 | wrapper/preload.c 58 | lkm/syscall-table.h 59 | config 60 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "web"] 2 | path = web 3 | url = https://github.com/eecheng87/web.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021-2022 National Cheng Kung University, Taiwan. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | WRK_SOURCE := https://github.com/wg/wrk/archive/refs/heads/master.zip 2 | WRK_NAME := wrk-master 3 | WRK_PATH := downloads/$(WRK_NAME) 4 | WRK := wrk 5 | 6 | NGX_SOURCE := http://nginx.org/download/nginx-1.22.0.tar.gz 7 | NGX_NAME := nginx-1.22.0 8 | NGX_PATH := downloads/$(NGX_NAME) 9 | NGX := nginx 10 | 11 | LIGHTY_SOURCE := https://github.com/lighttpd/lighttpd1.4/archive/refs/tags/lighttpd-1.4.58.tar.gz 12 | LIGHTY_ZIP_NAME := lighttpd-1.4.58 13 | LIGHTY_NAME := lighttpd1.4-lighttpd-1.4.58 14 | LIGHTY_PATH := downloads/$(LIGHTY_NAME) 15 | LIGHTY := lighttpd 16 | 17 | LWAN_SOURCE := https://github.com/lpereira/lwan/archive/master.zip 18 | LWAN_NAME := lwan-master 19 | LWAN_PATH := downloads/$(LWAN_NAME) 20 | LWAN := lwan 21 | 22 | LIBDUMMY_PATH := $(shell find $(shell pwd) -type f -name "libdummy.so") | sed 's_/_\\/_g' 23 | PWD := $(shell pwd) 24 | 25 | OUT := downloads 26 | 27 | all: lkm wrapper 28 | 29 | .PHONY: $(WRK) $(NGX) $(LIGHTY) $(LWAN) config lkm wrapper clean 30 | 31 | ifeq ($(strip $(TARGET)),nginx) 32 | TARGET = nginx 33 | else ifeq ($(strip $(TARGET)),lighttpd) 34 | TARGET = lighttpd 35 | else 36 | TARGET = lwan 37 | endif 38 | 39 | config: 40 | -$(RM) wrapper/preload.c 41 | (cd wrapper ; ln -s $(TARGET)-preload.c preload.c) 42 | touch $@ 43 | 44 | $(WRK): 45 | @echo "download wrk..." 46 | wget $(WRK_SOURCE) 47 | unzip -d $(OUT) master.zip 48 | $(RM) master.zip 49 | sudo $(MAKE) -j4 -C $(OUT)/$(WRK_NAME) all 50 | 51 | $(NGX): 52 | @echo "download nginx..." 53 | wget $(NGX_SOURCE) 54 | mkdir $(NGX_PATH) 55 | tar -zxvf $(NGX_NAME).tar.gz -C $(OUT) 56 | $(RM) $(NGX_NAME).tar.gz 57 | mkdir local 58 | cd $(NGX_PATH) && ./configure --prefix=$(PWD)/local 59 | scripts/ngx.sh $(NGX_PATH) 60 | cd $(OUT) && patch -p1 < ../patches/nginx_module.patch && patch -p1 < ../patches/nginx_process.patch 61 | cd $(NGX_PATH) && make && \ 62 | make install 63 | cp -f configs/nginx.conf local/conf/nginx.conf 64 | 65 | $(LIGHTY): 66 | @echo "download lighttpd..." 67 | wget $(LIGHTY_SOURCE) 68 | tar -zxvf $(LIGHTY_ZIP_NAME).tar.gz -C $(OUT) 69 | $(RM) $(LIGHTY_ZIP_NAME).tar.gz 70 | cd $(LIGHTY_PATH) && ./autogen.sh && ./configure --without-pcre 71 | scripts/lighttpd.sh $(LIGHTY_PATH) 72 | cd $(OUT) && patch -p1 < ../patches/lighttpd.patch 73 | cd $(LIGHTY_PATH) && sudo make install 74 | cp -f configs/lighttpd.conf $(LIGHTY_PATH)/src/lighttpd.conf 75 | 76 | $(LWAN): 77 | @echo "download lwan..." 78 | wget $(LWAN_SOURCE) 79 | unzip -d $(OUT) master.zip 80 | $(RM) master.zip 81 | scripts/lwan.sh $(LWAN_PATH) 82 | cd $(OUT) && patch -p1 < ../patches/lwan_thread.patch && patch -p1 < ../patches/lwan_main.patch 83 | cd $(LWAN_PATH) && mkdir build && cd build && \ 84 | cmake .. -DCMAKE_BUILD_TYPE=Release && make 85 | cp -f configs/lwan.conf $(LWAN_PATH)/lwan.conf 86 | 87 | nginx-launch: 88 | ./downloads/$(NGX_NAME)/objs/nginx 89 | 90 | nginx-esca-launch: 91 | LD_PRELOAD=wrapper/wrapper.so ./downloads/$(NGX_NAME)/objs/nginx 92 | 93 | lighttpd-launch: 94 | ./$(LIGHTY_PATH)/src/lighttpd -D -f $(LIGHTY_PATH)/src/lighttpd.conf 95 | 96 | lighttpd-esca-launch: 97 | LD_PRELOAD=wrapper/wrapper.so ./$(LIGHTY_PATH)/src/lighttpd -D -f $(LIGHTY_PATH)/src/lighttpd.conf 98 | 99 | lwan-launch: 100 | ./downloads/$(LWAN_NAME)/build/src/bin/lwan/lwan -c $(LWAN_PATH)/lwan.conf 101 | 102 | lwan-esca-launch: 103 | LD_PRELOAD=wrapper/wrapper.so ./downloads/$(LWAN_NAME)/build/src/bin/lwan/lwan -c $(LWAN_PATH)/lwan.conf 104 | 105 | lkm: config 106 | sudo $(MAKE) -C $@ $(MAKECMDGOALS) 107 | 108 | wrapper: config 109 | $(MAKE) -C $@ $(MAKECMDGOALS) 110 | 111 | load-lkm: 112 | sudo insmod lkm/esca.ko 113 | 114 | unload-lkm: 115 | sudo rmmod esca 116 | 117 | clean: 118 | rm -rf $(WRK_PATH) 119 | rm -rf $(NGX_PATH) 120 | rm -rf $(LIGHTY_PATH) 121 | rm -rf $(LWAN_PATH) 122 | rm -rf local 123 | $(MAKE) -C lkm clean 124 | $(MAKE) -C wrapper clean 125 | 126 | distclean: clean 127 | $(RM) wrapper/preload.c 128 | -$(RM) config 129 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Effective System Call Aggregation (ESCA) 2 | 3 | The main objective of this work was to reduce the per-syscall overhead through the use of effective syscall aggregation. 4 | For that purpose, ESCA takes advantages of system call batching and exploits the parallelism of event-driven applications by leveraging Linux I/O model to overcome the disadvantages of previous solutions. 5 | 6 | ESCA is capable of reducing the per-syscall overhead by up to 62% for embedded web servers. 7 | Real-world highly concurrent event-driven applications such as Nginx and Redis are known to benefit from ESCA, along with full compatibility with Linux syscall semantics and functionalities. 8 | 9 | ## Prerequisite 10 | For Nginx and wrk: 11 | ```shell 12 | sudo apt install build-essential libpcre3 libpcre3-dev zlib1g zlib1g-dev 13 | sudo apt install libssl-dev libgd-dev libxml2 libxml2-dev uuid-dev 14 | sudo apt install autoconf automake libtool 15 | ``` 16 | 17 | ## Download project 18 | ```shell 19 | git clone https://github.com/eecheng87/ESCA 20 | cd ESCA 21 | ``` 22 | 23 | ## Build from source 24 | Compile files under directory `lkm/` and `wrapper/` (The default target is lwan) 25 | ```shell 26 | make TARGET= 27 | ``` 28 | 29 | ### Build adaptation target 30 | Build `wrk` 31 | ```shell 32 | make wrk 33 | ``` 34 | 35 | Download and build nginx 36 | ```shell 37 | make nginx 38 | ``` 39 | 40 | Download and build lighttpd 41 | ```shell 42 | make lighttpd 43 | ``` 44 | 45 | Download and build lwan 46 | ```shell 47 | make lwan 48 | ``` 49 | 50 | ## Testing 51 | 52 | ### Launch Nginx 53 | Choose either 54 | ```shell 55 | make nginx-launch # origin nginx 56 | ``` 57 | or 58 | 59 | ```shell 60 | make load-lkm 61 | make nginx-esca-launch # nginx-esca 62 | ``` 63 | 64 | ### Launch lighttpd 65 | Choose either 66 | ```shell 67 | make lighttpd-launch # origin lighttpd 68 | ``` 69 | or 70 | 71 | ```shell 72 | make load-lkm 73 | make lighttpd-esca-launch # lighttpd-esca 74 | ``` 75 | 76 | ### Launch lwan 77 | Choose either 78 | ```shell 79 | make lwan-launch # origin lwan 80 | ``` 81 | or 82 | 83 | ```shell 84 | make load-lkm 85 | make lwan-esca-launch # lwan-esca 86 | ``` 87 | 88 | ### Download workloads 89 | ```shell 90 | git submodule init 91 | git submodule update 92 | ``` 93 | 94 | ### Benchmarking 95 | ```shell 96 | # nginx is at port 8081; lighttpd is at port 3000; lwan is at port 8080 97 | downloads/wrk-master/wrk -c 50 -d 5s -t 4 http://localhost:8081/a20.html 98 | ``` 99 | 100 | ### Demo 101 | ![image](assets/demo.gif) 102 | 103 | Nginx-ESCA led by about 11% over vanilla Nginx. 104 | 105 | ![image](assets/light-demo.gif) 106 | 107 | lighttpd-ESCA led by about 13% over vanilla lighttpd. 108 | 109 | ![image](assets/lwan-demo.gif) 110 | 111 | lwan-ESCA led by about 30% over vanilla lwan. 112 | 113 | ## Technical Description 114 | The code section enclosed by `batch_start()` and `batch_flush()` is called batching segment. 115 | It can appear more than one time in a single application. 116 | Compared with typical syscalls, ESCA eliminates mode switches in batching segments by decoupling syscalls. 117 | Instead of switching to the kernel or executing the corresponding service routine, syscalls in batching segment only record their syscall ID and arguments in the shared table. 118 | After `batch_flush` is invoked, ESCA finally switches to kernel mode, executes all syscalls in the shared table, and then switches back to user mode. 119 | 120 | ### Typical system call flow 121 | ![image](assets/syscall-flow.png) 122 | 123 | 1. User application call system call 124 | 2. From user mode switches to kernel mode by an interrupt 125 | 3. Search interrupt vector table and call interrupt service routine(ISR) 126 | 4. The corresponding interrupt service routine would search system call table 127 | 5. Call system call service routine 128 | 6. After finishing system call service routine, switch back to user mode 129 | 130 | ### System Call wrappers 131 | Because system call is invoked by assembly routines, wrapping the assembly code with the wrapper function would increase readability of the program. 132 | e.g., 133 | ```c 134 | #define SYSCALL(name, a1, a2, a3, a4, a5, a6) \ 135 | ({ \ 136 | long result; \ 137 | long __a1 = (long) (a1), __a2 = (long) (a2), __a3 = (long) (a3); \ 138 | long __a4 = (long) (a4), __a5 = (long) (a5), __a6 = (long) (a6); \ 139 | register long _a1 asm("rdi") = __a1; \ 140 | register long _a2 asm("rsi") = __a2; \ 141 | register long _a3 asm("rdx") = __a3; \ 142 | register long _a4 asm("r10") = __a4; \ 143 | register long _a5 asm("r8") = __a5; \ 144 | register long _a6 asm("r9") = __a6; \ 145 | asm volatile("syscall\n\t" \ 146 | : "=a"(result) \ 147 | : "0"(name), "r"(_a1), "r"(_a2), "r"(_a3), "r"(_a4), \ 148 | "r"(_a5), "r"(_a6) \ 149 | : "memory", "cc", "r11", "cx"); \ 150 | (long) result; \ 151 | }) 152 | 153 | #define SYSCALL1(name, a1) SYSCALL(name, a1, 0, 0, 0, 0, 0) 154 | #define SYSCALL2(name, a1, a2) SYSCALL(name, a1, a2, 0, 0, 0, 0) 155 | #define SYSCALL3(name, a1, a2, a3) SYSCALL(name, a1, a2, a3, 0, 0, 0) 156 | #define SYSCALL4(name, a1, a2, a3, a4) SYSCALL(name, a1, a2, a3, a4, 0, 0) 157 | #define SYSCALL5(name, a1, a2, a3, a4, a5) SYSCALL(name, a1, a2, a3, a4, a5, 0) 158 | #define SYSCALL6(name, a1, a2, a3, a4, a5, a6) \ 159 | SYSCALL(name, a1, a2, a3, a4, a5, a6) 160 | 161 | /* wrapper function */ 162 | static inline void *brk(void *addr) 163 | { 164 | return (void *) SYSCALL1(__NR_brk, addr); 165 | } 166 | ``` 167 | 168 | ### System Call Hooks 169 | ESCA locates the address of the syscall table through the kernel symbol table and replace the syscall table entry with our customized handler. 170 | Also, it is necessary to clear the write protection bit of the control register if modifying the syscall table is required. 171 | 172 | Two system calls ESCA intercepts 173 | * `sys_batch`: iterates shared table and execute all syscalls recorded in the shared table, and then switches back to user mode. 174 | * `sys_register`: maps userspace shared table to kernel space memory and initialization. 175 | 176 | Replace system call handlers: 177 | ```c 178 | // find out syscall table address 179 | scTab = (void **) (smSCTab + ((char *) &system_wq - smSysWQ)); 180 | // clear write protection bit 181 | allow_writes(); 182 | 183 | /* backup original system call service routine */ 184 | sys_oldcall0 = scTab[__NR_batch_flush]; 185 | sys_oldcall1 = scTab[__NR_register]; 186 | 187 | /* hooking */ 188 | scTab[__NR_batch_flush] = sys_batch; 189 | scTab[__NR_register] = sys_register; 190 | 191 | // set write protection bit 192 | disallow_writes(); 193 | ``` 194 | 195 | ### Share the same physical address space between kernel and user space 196 | ESCA deploys `get_user_pages` to get the physical page address which the userspace memory page corresponds to, and utilizes `kmap` to map the physical pages to the kernel address space. 197 | In this way, data sharing is without data copy, and the procedure is a one-time allocation. 198 | 199 | * `batch_register` syscall maps userspace shared table to kernel space memory and initialization. 200 | ```c 201 | asmlinkage long sys_register(const struct pt_regs *regs) 202 | { 203 | int n_page, i, j; 204 | unsigned long p1 = regs->di; 205 | 206 | /* map batch table from user-space to kernel */ 207 | n_page = get_user_pages( 208 | (p1), /* Start address to map */ 209 | MAX_THREAD_NUM, /* Number of pinned pages. 4096 btyes in this machine */ 210 | FOLL_FORCE | FOLL_WRITE, /* Force flag */ 211 | pinned_pages, /* struct page ** pointer to pinned pages */ 212 | NULL); 213 | 214 | for (i = 0; i < MAX_THREAD_NUM; i++) 215 | batch_table[i] = (struct batch_entry *) kmap(pinned_pages[i]); 216 | 217 | /* initial table status */ 218 | for (j = 0; j < MAX_THREAD_NUM; j++) 219 | for (i = 0; i < MAX_ENTRY_NUM; i++) 220 | batch_table[j][i].rstatus = BENTRY_EMPTY; 221 | 222 | global_i = global_j = 0; 223 | 224 | main_pid = current->pid; 225 | 226 | return 0; 227 | } 228 | ``` 229 | 230 | ### Change the typical system call behavior 231 | To change the behavior of the syscall, when the application is executed, the syscall wrapper of glibc is replaced with our shared library through `LD_PRELOAD`. 232 | Customized syscall wrapper will determine if the system call is in the `batch segment`. 233 | 1. Out of `batch segment`: call original glibc syscall wrapper we backup. 234 | 2. In `batch segment`: Record syscall ID and arguments in the shared table. 235 | 236 | The function `dlsym()` takes a "handle" of a dynamic library returned by `dlopen()` and the null-terminated symbol name, returning the address where that symbol is loaded into memory. 237 | 238 | Because the system call handler is dynamically linked to the customized system call handler during execution, we should backup original glibc system call handler by `dlsym()`. 239 | ```c 240 | __attribute__((constructor)) static void setup(void) 241 | { 242 | pgsize = getpagesize(); 243 | in_segment = 0; 244 | batch_num = 0; 245 | 246 | /* store glibc function */ 247 | real_writev = real_writev ? real_writev : dlsym(RTLD_NEXT, "writev"); 248 | real_shutdown = 249 | real_shutdown ? real_shutdown : dlsym(RTLD_NEXT, "shutdown"); 250 | real_sendfile = 251 | real_sendfile ? real_sendfile : dlsym(RTLD_NEXT, "sendfile"); 252 | real_send = 253 | real_send ? real_send : dlsym(RTLD_NEXT, "send"); 254 | 255 | global_i = global_j = 0; 256 | } 257 | ``` 258 | 259 | ## Citation 260 | 261 | Please see our [PDP 2022](https://pdp2022.infor.uva.es/) paper, available in the [IEEE Xplore](https://ieeexplore.ieee.org/abstract/document/9756707) digital library, and you can get a [preprint copy](https://eecheng87.github.io/ESCA/main.pdf). 262 | 263 | If you find this work useful in your research, please cite: 264 | ``` 265 | @inproceedings{cheng2022esca, 266 | author={Cheng, Yu-Cheng and Huang, Ching-Chun (Jim) and Tu, Chia-Heng}, 267 | booktitle={2022 30th Euromicro International Conference on Parallel, Distributed and Network-based Processing (PDP)}, 268 | title={ESCA: Effective System Call Aggregation for Event-Driven Servers}, 269 | year={2022}, 270 | pages={18-25}, 271 | doi={10.1109/PDP55904.2022.00012} 272 | } 273 | ``` 274 | 275 | ## License 276 | 277 | `ESCA` is released under the MIT license. Use of this source code is governed by 278 | a MIT-style license that can be found in the LICENSE file. 279 | 280 | ## Reference 281 | * B. M. Michelson, "Event-driven architecture overview," Patricia Seybold Group, vol. 2, no. 12, pp. 10–1571, 2006. 282 | * A. S. Rahul Jadhav, Zhen Cao, "Improved system call batching for network I/O," 2019. 283 | * A. Purohit, J. Spadavecchia, C. Wright, and E. Zadok, "Improving application performance through system call composition," 2003. 284 | * M. Rajagopalan, S. K. Debray, M. A. Hiltunen, and R. D. Schlichting, "System call clustering: A profile-directed optimization technique," 2002. 285 | * D. Hansen, [KAISER: unmap most of the kernel from userspace page tables](https://lwn.net/Articles/738997/), 2017. 286 | -------------------------------------------------------------------------------- /assets/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eecheng87/ESCA/5260fc44249f816f12d0d93df5f1d32417bb3317/assets/demo.gif -------------------------------------------------------------------------------- /assets/light-demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eecheng87/ESCA/5260fc44249f816f12d0d93df5f1d32417bb3317/assets/light-demo.gif -------------------------------------------------------------------------------- /assets/lwan-demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eecheng87/ESCA/5260fc44249f816f12d0d93df5f1d32417bb3317/assets/lwan-demo.gif -------------------------------------------------------------------------------- /assets/syscall-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eecheng87/ESCA/5260fc44249f816f12d0d93df5f1d32417bb3317/assets/syscall-flow.png -------------------------------------------------------------------------------- /configs/lighttpd.conf: -------------------------------------------------------------------------------- 1 | # correct path will be replaced by our script 2 | server.document-root = "/tmp/web" 3 | 4 | server.port = 3000 5 | 6 | mimetype.assign = ( 7 | ".html" => "text/html", 8 | ".txt" => "text/plain", 9 | ".jpg" => "image/jpeg", 10 | ".png" => "image/png" 11 | ) 12 | 13 | static-file.exclude-extensions = ( ".fcgi", ".php", ".rb", "~", ".inc" ) 14 | index-file.names = ( "index.html" ) 15 | 16 | $HTTP["host"] == "www2.example.org" { 17 | # correct path will be replaced by our script 18 | server.document-root = "/tmp/web" 19 | $HTTP["url"] =~ "^/download/" { 20 | dir-listing.activate = "enable" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /configs/lwan.conf: -------------------------------------------------------------------------------- 1 | # Timeout in seconds to keep a connection alive. 2 | keep_alive_timeout = 15 3 | 4 | # Set to true to not print any debugging messages. (Only effective in 5 | # release builds.) 6 | quiet = false 7 | 8 | # Value of "Expires" header. Default is 1 month and 1 week. 9 | expires = 1M 1w 10 | 11 | # Number of I/O threads. Default (0) is number of online CPUs. 12 | threads = 1 13 | 14 | # Disable HAProxy's PROXY protocol by default. Only enable if needed. 15 | proxy_protocol = false 16 | 17 | # Enable straitjacket by default. The `drop_capabilities` option is `true` 18 | # by default. Other options may require more privileges. 19 | straitjacket 20 | 21 | listener *:8080 22 | 23 | site { 24 | serve_files / { 25 | # correct path will be replaced by our script 26 | path = /tmp/web 27 | # When requesting for file.ext, look for a smaller/newer file.ext.gz, 28 | # and serve that instead if `Accept-Encoding: gzip` is in the 29 | # request headers. 30 | serve precompressed files = true 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /configs/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | 3 | events { 4 | worker_connections 1024; 5 | } 6 | 7 | daemon off; 8 | 9 | http { 10 | include mime.types; 11 | default_type application/octet-stream; 12 | sendfile on; 13 | keepalive_timeout 65; 14 | server { 15 | listen 8081; 16 | server_name localhost; 17 | location / { 18 | # correct path will be replaced by our script 19 | root /tmp/web; 20 | index index.html index.htm; 21 | } 22 | 23 | error_page 500 502 503 504 /50x.html; 24 | location = /50x.html { 25 | root html; 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /downloads/.placeholder: -------------------------------------------------------------------------------- 1 | . -------------------------------------------------------------------------------- /include/linux/esca.h: -------------------------------------------------------------------------------- 1 | #ifndef LINUX_ESCA_H 2 | #define LINUX_ESCA_H /* and syscall_t __NR_batch for the kernel module. */ 3 | 4 | #include /* __NR_* */ 5 | #define __NR_batch_flush 183 /* Hijack Andrew FS call slot for now */ 6 | #define __NR_register 184 /* Do register routine before using batch */ 7 | #define __NR_fpreg 403 8 | #define __NR_fpexit 404 9 | 10 | /* batch table entry info */ 11 | #define BENTRY_EMPTY 0 12 | #define BENTRY_BUSY 1 13 | 14 | #define MAX_THREAD_NUM 10 15 | #define MAX_ENTRY_NUM 64 16 | 17 | struct batch_entry { 18 | unsigned pid; /* or thread id */ 19 | short nargs; 20 | short rstatus; 21 | unsigned sysnum; 22 | unsigned sysret; 23 | long args[6]; 24 | }; 25 | 26 | #ifndef __KERNEL__ 27 | #include /* needed by syscall macro */ 28 | #ifndef syscall 29 | #include /* syscall() */ 30 | #endif 31 | 32 | static inline long batch_register(struct batch_entry *table) 33 | { 34 | syscall(__NR_register, table); 35 | } 36 | 37 | #define scall0(N, FL, Z, P) ((syscall_t){__NR_##N, FL, Z, P, 0}) 38 | #define scall1(N, FL, Z, P, A) ((syscall_t){__NR_##N, FL, Z, P, 1, (long) (A)}) 39 | #define scall2(N, FL, Z, P, A, B) \ 40 | ((syscall_t){__NR_##N, FL, Z, P, 2, (long) (A), (long) (B)}) 41 | #define scall3(N, FL, Z, P, A, B, C) \ 42 | ((syscall_t){__NR_##N, FL, Z, P, 3, (long) (A), (long) (B), (long) (C)}) 43 | #define scall4(N, FL, Z, P, A, B, C, D) \ 44 | ((syscall_t){__NR_##N, FL, Z, P, 4, (long) (A), (long) (B), (long) (C), \ 45 | (long) (D)}) 46 | #define scall5(N, FL, Z, P, A, B, C, D, E) \ 47 | ((syscall_t){__NR_##N, FL, Z, P, 5, (long) (A), (long) (B), (long) (C), \ 48 | (long) (D), (long) (E)}) 49 | #define scall6(N, FL, Z, P, A, B, C, D, E, F) \ 50 | ((syscall_t){__NR_##N, FL, Z, P, 6, (long) (A), (long) (B), (long) (C), \ 51 | (long) (D), (long) (E), (long) (F)}) 52 | #endif /* __KERNEL__ */ 53 | 54 | #endif /* LINUX_ESCA_H */ 55 | -------------------------------------------------------------------------------- /lkm/Makefile: -------------------------------------------------------------------------------- 1 | obj-m := esca.o 2 | EXTRA_CFLAGS := -I$M/../include 3 | EXTRA_CFLAGS += -D'DEBUG=0' 4 | 5 | all: syscall-table.h esca.o 6 | esca.o : syscall-table.h 7 | # $(shell pwd) is for zsh, $(PWD) seems good for bash 8 | make -C /lib/modules/`uname -r`/build M=$(shell pwd) modules 9 | 10 | syscall-table.h: 11 | (grep -w sys_call_table /boot/System.map-`uname -r` | \ 12 | sed 's/\([^ ]*\) .*/#define smSCTab ((char *)0x\1UL)/'; \ 13 | grep -w system_wq /boot/System.map-`uname -r` | \ 14 | sed 's/\([^ ]*\) .*/#define smSysWQ ((char *)0x\1UL)/') > syscall-table.h 15 | 16 | clean: 17 | rm -f syscall-table.h 18 | make -C /lib/modules/`uname -r`/build M=$(shell pwd) clean 19 | 20 | .PHONY: syscall-table.h clean 21 | -------------------------------------------------------------------------------- /lkm/esca.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Linux kernel module for Effective System Call Aggregation (ESCA). 3 | * 4 | * Copyright (c) 2021-2022 National Cheng Kung University, Taiwan. 5 | * Authored by Steven Cheng 6 | */ 7 | 8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 | 10 | #include /* __NR_syscall_max */ 11 | #include 12 | #include /* kallsyms_lookup_name, __NR_* */ 13 | #include /* Basic Linux module headers */ 14 | #include 15 | #include 16 | #include 17 | #include /* copy_from_user put_user */ 18 | #include 19 | #include 20 | 21 | #include "syscall-table.h" 22 | 23 | MODULE_DESCRIPTION("Generic batch system call API"); 24 | MODULE_LICENSE("Dual MIT/GPL"); 25 | MODULE_AUTHOR("National Cheng Kung University, Taiwan"); 26 | MODULE_VERSION("0.1"); 27 | 28 | struct page *pinned_pages[MAX_THREAD_NUM]; 29 | static void **scTab = 0; 30 | 31 | typedef asmlinkage long (*F0_t)(void); 32 | typedef asmlinkage long (*F1_t)(long); 33 | typedef asmlinkage long (*F2_t)(long, long); 34 | typedef asmlinkage long (*F3_t)(long, long, long); 35 | typedef asmlinkage long (*F4_t)(long, long, long, long); 36 | typedef asmlinkage long (*F5_t)(long, long, long, long, long); 37 | typedef asmlinkage long (*F6_t)(long, long, long, long, long, long); 38 | 39 | static inline long indirect_call(void *f, int argc, long *a) 40 | { 41 | /* x64 syscall calling convention changed in v4.17 to use struct pt_regs */ 42 | struct pt_regs regs; 43 | memset(®s, 0, sizeof regs); 44 | switch (argc) { 45 | case 6: 46 | regs.r9 = a[5]; /* Falls through. */ 47 | case 5: 48 | regs.r8 = a[4]; /* Falls through. */ 49 | case 4: 50 | regs.r10 = a[3]; /* Falls through. */ 51 | case 3: 52 | regs.dx = a[2]; /* Falls through. */ 53 | case 2: 54 | regs.si = a[1]; /* Falls through. */ 55 | case 1: 56 | regs.di = a[0]; /* Falls through. */ 57 | } 58 | return ((F1_t) f)((long) ®s); 59 | } 60 | 61 | #if defined(__x86_64__) 62 | extern unsigned long __force_order __weak; 63 | #define store_cr0(x) asm volatile("mov %0,%%cr0" : "+r"(x), "+m"(__force_order)) 64 | static void allow_writes(void) 65 | { 66 | unsigned long cr0 = read_cr0(); 67 | clear_bit(16, &cr0); 68 | store_cr0(cr0); 69 | } 70 | static void disallow_writes(void) 71 | { 72 | unsigned long cr0 = read_cr0(); 73 | set_bit(16, &cr0); 74 | store_cr0(cr0); 75 | } 76 | #elif defined(__aarch64__) 77 | // FIXME: port to ARM64 78 | #endif 79 | 80 | struct batch_entry *batch_table[MAX_THREAD_NUM]; 81 | int table_size = 64; 82 | int start_index[MAX_THREAD_NUM]; 83 | int global_i, global_j; 84 | int main_pid; /* PID of main thread */ 85 | 86 | asmlinkage long sys_register(const struct pt_regs *regs) 87 | { 88 | int n_page, i, j; 89 | unsigned long p1 = regs->di; 90 | 91 | /* map batch table from user-space to kernel */ 92 | n_page = get_user_pages( 93 | (p1), /* Start address to map */ 94 | MAX_THREAD_NUM, /* Number of pinned pages. 4096 btyes in this machine */ 95 | FOLL_FORCE | FOLL_WRITE, /* Force flag */ 96 | pinned_pages, /* struct page ** pointer to pinned pages */ 97 | NULL); 98 | 99 | for (i = 0; i < MAX_THREAD_NUM; i++) 100 | batch_table[i] = (struct batch_entry *) kmap(pinned_pages[i]); 101 | 102 | /* initial table status */ 103 | for (j = 0; j < MAX_THREAD_NUM; j++) 104 | for (i = 0; i < MAX_ENTRY_NUM; i++) 105 | batch_table[j][i].rstatus = BENTRY_EMPTY; 106 | 107 | global_i = global_j = 0; 108 | 109 | main_pid = current->pid; 110 | 111 | return 0; 112 | } 113 | 114 | asmlinkage long sys_batch(void) 115 | { 116 | int j = global_j, i = global_i, cnt = 0; 117 | 118 | #if DEBUG 119 | pr_info("Start flushing, started from index: %d\n", i); 120 | #endif 121 | while (batch_table[j][i].rstatus == BENTRY_BUSY) { 122 | #if DEBUG 123 | cnt++; 124 | pr_info("Index %d do syscall %d (%d %d)\n", i, batch_table[j][i].sysnum, 125 | j, i); 126 | #endif 127 | batch_table[j][i].sysret = 128 | indirect_call(scTab[batch_table[j][i].sysnum], 129 | batch_table[j][i].nargs, batch_table[j][i].args); 130 | batch_table[j][i].rstatus = BENTRY_EMPTY; 131 | 132 | if (i == MAX_ENTRY_NUM - 1) { 133 | if (j == MAX_THREAD_NUM - 1) { 134 | j = 0; 135 | } else { 136 | j++; 137 | } 138 | i = 0; 139 | } else { 140 | i++; 141 | } 142 | } 143 | #if DEBUG 144 | pr_info("batch %d syscalls\n", cnt); 145 | #endif 146 | global_i = i; 147 | global_j = j; 148 | return 0; 149 | } 150 | 151 | void *sys_oldcall0; 152 | void *sys_oldcall1; 153 | 154 | static int __init mod_init(void) 155 | { 156 | int rc; 157 | scTab = (void **) (smSCTab + ((char *) &system_wq - smSysWQ)); 158 | allow_writes(); 159 | 160 | /* backup */ 161 | sys_oldcall0 = scTab[__NR_batch_flush]; 162 | sys_oldcall1 = scTab[__NR_register]; 163 | 164 | /* hooking */ 165 | scTab[__NR_batch_flush] = sys_batch; 166 | scTab[__NR_register] = sys_register; 167 | 168 | disallow_writes(); 169 | 170 | pr_info("installed as %d\n", __NR_batch_flush); 171 | return 0; 172 | } 173 | static void __exit mod_cleanup(void) 174 | { 175 | pr_info("removed\n"); 176 | allow_writes(); 177 | 178 | /* restore */ 179 | scTab[__NR_batch_flush] = sys_oldcall0; 180 | scTab[__NR_register] = sys_oldcall1; 181 | 182 | disallow_writes(); 183 | /* correspond cleanup for kmap */ 184 | kunmap(pinned_pages[0]); 185 | } 186 | module_init(mod_init); 187 | module_exit(mod_cleanup); 188 | -------------------------------------------------------------------------------- /patches/lighttpd.patch: -------------------------------------------------------------------------------- 1 | --- old/lighttpd1.4-lighttpd-1.4.58/src/server.c 2020-12-28 11:42:56.000000000 +0800 2 | +++ new/lighttpd1.4-lighttpd-1.4.58/src/server.c 2022-04-27 16:35:03.727871000 +0800 3 | @@ -1871,3 +1871,3 @@ 4 | time_t last_active_ts = time(NULL); 5 | - 6 | +esca_init(); 7 | while (!srv_shutdown) { 8 | @@ -1916,3 +1916,3 @@ 9 | connections * const joblist = connection_joblist; 10 | - 11 | +batch_start(); 12 | if (fdevent_poll(srv->ev, joblist->used ? 0 : 1000) > 0) { 13 | @@ -1926,2 +1926,3 @@ 14 | server_run_con_queue(joblist); 15 | +batch_flush(); 16 | } 17 | -------------------------------------------------------------------------------- /patches/lwan_main.patch: -------------------------------------------------------------------------------- 1 | --- old/lwan-master/src/bin/lwan/main.c 2022-06-05 08:53:47.000000000 +0800 2 | +++ new/lwan-master/src/bin/lwan/main.c 2022-06-09 21:13:59.257682000 +0800 3 | @@ -224,7 +224,7 @@ 4 | 5 | c = *lwan_get_default_config(); 6 | c.listener = strdup("*:8080"); 7 | - 8 | + esca_init(); 9 | switch (parse_args(argc, argv, &c, root, &sj)) { 10 | case ARGS_SERVE_FILES: 11 | lwan_status_info("Serving files from %s", root); 12 | -------------------------------------------------------------------------------- /patches/lwan_thread.patch: -------------------------------------------------------------------------------- 1 | --- old/lwan-master/src/lib/lwan-thread.c 2022-06-05 08:53:47.000000000 +0800 2 | +++ new/lwan-master/src/lib/lwan-thread.c 2022-06-09 21:18:31.790748000 +0800 3 | @@ -924,7 +924,7 @@ 4 | break; 5 | continue; 6 | } 7 | - 8 | + batch_start(); 9 | for (struct epoll_event *event = events; n_fds--; event++) { 10 | struct lwan_connection *conn = event->data.ptr; 11 | 12 | @@ -959,6 +959,7 @@ 13 | 14 | if (created_coros) 15 | timeouts_add(t->wheel, &tq.timeout, 1000); 16 | + batch_flush(); 17 | } 18 | 19 | pthread_barrier_wait(&lwan->thread.barrier); 20 | -------------------------------------------------------------------------------- /patches/nginx_module.patch: -------------------------------------------------------------------------------- 1 | --- old/nginx-1.22.0/src/event/modules/ngx_epoll_module.c 2022-05-24 07:59:19.000000000 +0800 2 | +++ new/nginx-1.22.0/src/event/modules/ngx_epoll_module.c 2022-06-05 16:04:53.918118766 +0800 3 | @@ -832,7 +832,7 @@ 4 | "epoll_wait() returned no events without timeout"); 5 | return NGX_ERROR; 6 | } 7 | - 8 | + batch_start(); 9 | for (i = 0; i < events; i++) { 10 | c = event_list[i].data.ptr; 11 | 12 | @@ -931,7 +931,7 @@ 13 | } 14 | } 15 | } 16 | - 17 | + batch_flush(); 18 | return NGX_OK; 19 | } 20 | -------------------------------------------------------------------------------- /patches/nginx_process.patch: -------------------------------------------------------------------------------- 1 | --- old/nginx-1.22.0/src/os/unix/ngx_process.c 2022-05-24 07:59:19.000000000 +0800 2 | +++ new/nginx-1.22.0/src/os/unix/ngx_process.c 2022-06-05 16:03:34.503449915 +0800 3 | @@ -194,6 +194,7 @@ 4 | return NGX_INVALID_PID; 5 | 6 | case 0: 7 | + esca_init(); 8 | ngx_parent = ngx_pid; 9 | ngx_pid = ngx_getpid(); 10 | proc(cycle, data); 11 | -------------------------------------------------------------------------------- /scripts/lighttpd.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | libpath=$(find $(pwd) -type f -name "libshim.so" | sed 's_/_\\/_g') 4 | webpath=$(readlink --canonicalize web | sed 's_/_\\/_g') 5 | lightypath=$1 6 | 7 | # modify lighttpd 8 | sed -i "/^DL_LIB =/ s/$/ -Wl,-E ${libpath}/" ${lightypath}/src/Makefile 9 | sed -i "s/server.document-root = .*/server.document-root = \"${webpath}\"/g" configs/lighttpd.conf 10 | -------------------------------------------------------------------------------- /scripts/lwan.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | libpath=$(find $(pwd) -type f -name "libshim.so" | sed 's_/_\\/_g') 3 | webpath=$(readlink --canonicalize web | sed 's_/_\\/_g') 4 | lwanpath=$1 5 | # modify lwan 6 | sed -i "26s/path.*/path = ${webpath}/" configs/lwan.conf 7 | sed -i "37a add_library(libshim SHARED IMPORTED GLOBAL)\nset_target_properties(libshim PROPERTIES IMPORTED_LOCATION ${libpath})\nlist(APPEND ADDITIONAL_LIBRARIES libshim)" ${lwanpath}/CMakeLists.txt -------------------------------------------------------------------------------- /scripts/ngx.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | libpath=$(find $(pwd) -type f -name "libshim.so" | sed 's_/_\\/_g') 4 | webpath=$(readlink --canonicalize web | sed 's_/_\\/_g') 5 | ngxpath=$1 6 | 7 | # modify nginx 8 | sed -i 's/-Werror//' ${ngxpath}/objs/Makefile 9 | sed -i "s/-Wl,-E/-Wl,-E ${libpath}/" ${ngxpath}/objs/Makefile 10 | sed -i "19s/root.*;/root ${webpath};/" configs/nginx.conf 11 | -------------------------------------------------------------------------------- /wrapper/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS := -Wall -O2 2 | CFLAGS += -shared -fPIC 3 | LDFLAGS = -ldl -lpthread 4 | 5 | DLL = libshim.so wrapper.so 6 | all: $(DLL) 7 | 8 | libshim.so: shim.c 9 | $(CC) $(CFLAGS) -o $@ $< 10 | 11 | wrapper.so: wrapper.c 12 | $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) 13 | 14 | .PHONY: clean 15 | clean: 16 | $(RM) *.o $(DLL) 17 | -------------------------------------------------------------------------------- /wrapper/lighttpd-preload.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Syscall wrapper for Effective System Call Aggregation (ESCA). 3 | * 4 | * Copyright (c) 2021-2022 National Cheng Kung University, Taiwan. 5 | * Authored by Steven Cheng 6 | */ 7 | 8 | ssize_t shutdown(int fd, int how) 9 | { 10 | if (!in_segment) 11 | return real_shutdown(fd, how); 12 | 13 | batch_num++; 14 | int off = global_j << 6; 15 | 16 | btable[off + global_i].sysnum = 48; 17 | btable[off + global_i].rstatus = BENTRY_BUSY; 18 | btable[off + global_i].nargs = 2; 19 | btable[off + global_i].args[0] = fd; 20 | btable[off + global_i].args[1] = how; 21 | 22 | if (global_i == MAX_TABLE_SIZE - 1) { 23 | if (global_j == MAX_THREAD_NUM - 1) { 24 | global_j = 0; 25 | } else { 26 | global_j++; 27 | } 28 | global_i = 0; 29 | } else { 30 | global_i++; 31 | } 32 | 33 | /* assume success */ 34 | return 0; 35 | } 36 | 37 | ssize_t writev(int fd, const struct iovec *iov, int iovcnt) 38 | { 39 | if (!in_segment) 40 | return real_writev(fd, iov, iovcnt); 41 | 42 | batch_num++; 43 | 44 | int off, len = 0, i; 45 | off = global_j << 6; 46 | 47 | for (i = 0; i < iovcnt; i++) { 48 | int ll = iov[i].iov_len; 49 | len += iov[i].iov_len; 50 | } 51 | 52 | btable[off + global_i].sysnum = 20; 53 | btable[off + global_i].rstatus = BENTRY_BUSY; 54 | btable[off + global_i].nargs = 3; 55 | btable[off + global_i].args[0] = fd; 56 | btable[off + global_i].args[1] = (long) (iov); 57 | btable[off + global_i].args[2] = iovcnt; 58 | 59 | if (global_i == MAX_TABLE_SIZE - 1) { 60 | if (global_j == MAX_THREAD_NUM - 1) { 61 | global_j = 0; 62 | } else { 63 | global_j++; 64 | } 65 | global_i = 0; 66 | } else { 67 | global_i++; 68 | } 69 | 70 | /* assume always success */ 71 | return len; 72 | } 73 | -------------------------------------------------------------------------------- /wrapper/lwan-preload.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Syscall wrapper for Effective System Call Aggregation (ESCA). 3 | * 4 | * Copyright (c) 2021-2022 National Cheng Kung University, Taiwan. 5 | * Authored by Yao Hwang 6 | */ 7 | 8 | int close(int fd) 9 | { 10 | if (!in_segment) { 11 | real_close = real_close ? real_close : dlsym(RTLD_NEXT, "close"); 12 | return real_close(fd); 13 | } 14 | 15 | batch_num++; 16 | int off = global_j << 6; 17 | 18 | btable[off + global_i].sysnum = 3; 19 | btable[off + global_i].rstatus = BENTRY_BUSY; 20 | btable[off + global_i].nargs = 1; 21 | btable[off + global_i].args[0] = fd; 22 | 23 | if (global_i == MAX_TABLE_SIZE - 1) { 24 | if (global_j == MAX_THREAD_NUM - 1) { 25 | global_j = 0; 26 | } else { 27 | global_j++; 28 | } 29 | global_i = 0; 30 | } else { 31 | global_i++; 32 | } 33 | 34 | return 0; 35 | } 36 | 37 | ssize_t sendfile64(int outfd, int infd, off_t *offset, size_t count) 38 | { 39 | if (!in_segment) 40 | return real_sendfile(outfd, infd, offset, count); 41 | 42 | batch_num++; 43 | int off = global_j << 6; 44 | 45 | btable[off + global_i].sysnum = 40; 46 | btable[off + global_i].rstatus = BENTRY_BUSY; 47 | btable[off + global_i].nargs = 4; 48 | btable[off + global_i].args[0] = outfd; 49 | btable[off + global_i].args[1] = infd; 50 | btable[off + global_i].args[2] = offset; 51 | btable[off + global_i].args[3] = count; 52 | 53 | if (global_i == MAX_TABLE_SIZE - 1) { 54 | if (global_j == MAX_THREAD_NUM - 1) { 55 | global_j = 0; 56 | } else { 57 | global_j++; 58 | } 59 | global_i = 0; 60 | } else { 61 | global_i++; 62 | } 63 | 64 | /* assume always success */ 65 | return count; 66 | } 67 | 68 | ssize_t send(int sockfd, const void *buf, size_t len, int flags) 69 | { 70 | if (!in_segment) 71 | return real_send(sockfd, buf, len, flags); 72 | 73 | batch_num++; 74 | int off = global_j << 6; 75 | 76 | btable[off + global_i].sysnum = 44; 77 | btable[off + global_i].rstatus = BENTRY_BUSY; 78 | btable[off + global_i].nargs = 6; 79 | btable[off + global_i].args[0] = sockfd; 80 | btable[off + global_i].args[1] = buf; 81 | btable[off + global_i].args[2] = len; 82 | btable[off + global_i].args[3] = flags; 83 | btable[off + global_i].args[4] = NULL; 84 | btable[off + global_i].args[5] = 0; 85 | 86 | if (global_i == MAX_TABLE_SIZE - 1) { 87 | if (global_j == MAX_THREAD_NUM - 1) { 88 | global_j = 0; 89 | } else { 90 | global_j++; 91 | } 92 | global_i = 0; 93 | } else { 94 | global_i++; 95 | } 96 | 97 | /* assume always success */ 98 | return len; 99 | } 100 | -------------------------------------------------------------------------------- /wrapper/nginx-preload.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Syscall wrapper for Effective System Call Aggregation (ESCA). 3 | * 4 | * Copyright (c) 2021-2022 National Cheng Kung University, Taiwan. 5 | * Authored by Steven Cheng 6 | */ 7 | 8 | int close(int fd) 9 | { 10 | if (!in_segment) { 11 | real_close = real_close ? real_close : dlsym(RTLD_NEXT, "close"); 12 | return real_close(fd); 13 | } 14 | 15 | batch_num++; 16 | int off = global_j << 6; 17 | 18 | btable[off + global_i].sysnum = 3; 19 | btable[off + global_i].rstatus = BENTRY_BUSY; 20 | btable[off + global_i].nargs = 1; 21 | btable[off + global_i].args[0] = fd; 22 | 23 | if (global_i == MAX_TABLE_SIZE - 1) { 24 | if (global_j == MAX_THREAD_NUM - 1) { 25 | global_j = 0; 26 | } else { 27 | global_j++; 28 | } 29 | global_i = 0; 30 | } else { 31 | global_i++; 32 | } 33 | 34 | return 0; 35 | } 36 | 37 | ssize_t sendfile64(int outfd, int infd, off_t *offset, size_t count) 38 | { 39 | if (!in_segment) 40 | return real_sendfile(outfd, infd, offset, count); 41 | 42 | batch_num++; 43 | int off = global_j << 6; 44 | 45 | btable[off + global_i].sysnum = 40; 46 | btable[off + global_i].rstatus = BENTRY_BUSY; 47 | btable[off + global_i].nargs = 4; 48 | btable[off + global_i].args[0] = outfd; 49 | btable[off + global_i].args[1] = infd; 50 | btable[off + global_i].args[2] = 0; 51 | btable[off + global_i].args[3] = count; 52 | 53 | if (global_i == MAX_TABLE_SIZE - 1) { 54 | if (global_j == MAX_THREAD_NUM - 1) { 55 | global_j = 0; 56 | } else { 57 | global_j++; 58 | } 59 | global_i = 0; 60 | } else { 61 | global_i++; 62 | } 63 | 64 | /* assume always success */ 65 | return count; 66 | } 67 | -------------------------------------------------------------------------------- /wrapper/preload.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define _GNU_SOURCE 4 | 5 | #define MAX_TABLE_SIZE 64 6 | #define MAX_THREAD_NUM 10 7 | #define MAX_POOL_SIZE 130172 8 | #define POOL_UNIT 8 9 | #define BATCH_NUM 50 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "../include/linux/esca.h" 20 | 21 | struct pthread_fake { 22 | /* offset to find tid */ 23 | void *nothing[90]; 24 | pid_t tid; 25 | }; 26 | 27 | extern struct batch_entry *btable; 28 | 29 | typedef long (*close_t)(int fd); 30 | close_t real_close; 31 | typedef long (*sendfile_t)(int outfd, int infd, off_t *offset, size_t count); 32 | sendfile_t real_sendfile; 33 | typedef long (*shutdown_t)(int fd, int how); 34 | shutdown_t real_shutdown; 35 | typedef long (*writev_t)(int fd, const struct iovec *iov, int iovcnt); 36 | writev_t real_writev; 37 | typedef long (*send_t)(int sockfd, const void *buf, size_t len, int flags); 38 | send_t real_send; 39 | -------------------------------------------------------------------------------- /wrapper/shim.c: -------------------------------------------------------------------------------- 1 | #include "shim.h" 2 | 3 | long esca_init() 4 | { 5 | return 0; 6 | } 7 | 8 | long batch_start(int events) 9 | { 10 | return 0; 11 | } 12 | 13 | long batch_flush() 14 | { 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /wrapper/shim.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* Dummy shared lib */ 4 | long esca_init(); 5 | long batch_start(int); 6 | long batch_flush(); 7 | -------------------------------------------------------------------------------- /wrapper/wrapper.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Syscall wrapper for Effective System Call Aggregation (ESCA). 3 | * 4 | * Copyright (c) 2021-2022 National Cheng Kung University, Taiwan. 5 | * Authored by Steven Cheng 6 | */ 7 | 8 | #ifndef _GNU_SOURCE 9 | #define _GNU_SOURCE 10 | #endif 11 | 12 | #include 13 | 14 | #include "preload.h" 15 | 16 | int table_size = 64; 17 | int in_segment; 18 | void *mpool; /* memory pool */ 19 | int pool_offset; 20 | struct iovec *iovpool; /* pool for iovector */ 21 | int iov_offset; 22 | int batch_num; /* number of busy entry */ 23 | struct batch_entry *btable; 24 | int global_i, global_j; 25 | size_t pgsize; 26 | 27 | long esca_init() 28 | { 29 | btable = aligned_alloc(pgsize, pgsize * MAX_THREAD_NUM); 30 | syscall(__NR_register, btable); 31 | return 0; 32 | } 33 | 34 | long batch_start(int exp) 35 | { 36 | in_segment = 1; 37 | batch_num = 0; 38 | 39 | return 0; 40 | } 41 | 42 | long batch_flush() 43 | { 44 | in_segment = 0; 45 | 46 | /* avoid useless batch_flush */ 47 | if (batch_num == 0) 48 | return 0; 49 | batch_num = 0; 50 | return syscall(__NR_batch_flush); 51 | } 52 | 53 | #include "preload.c" 54 | 55 | __attribute__((constructor)) static void setup(void) 56 | { 57 | pgsize = getpagesize(); 58 | in_segment = 0; 59 | batch_num = 0; 60 | 61 | /* store glibc function */ 62 | real_writev = real_writev ? real_writev : dlsym(RTLD_NEXT, "writev"); 63 | real_shutdown = 64 | real_shutdown ? real_shutdown : dlsym(RTLD_NEXT, "shutdown"); 65 | real_sendfile = 66 | real_sendfile ? real_sendfile : dlsym(RTLD_NEXT, "sendfile"); 67 | real_send = 68 | real_send ? real_send : dlsym(RTLD_NEXT, "send"); 69 | 70 | global_i = global_j = 0; 71 | } 72 | --------------------------------------------------------------------------------