├── .gitignore ├── README.md ├── cluster-manager ├── Makefile ├── README.md ├── client.h ├── fifo.c ├── fifo.h ├── init.c ├── lite-cd-base.c ├── lite-cd-base.h ├── lite-cd.c ├── mgmt_server.h ├── network_handler.c └── uthash.h ├── core ├── .gitignore ├── Makefile ├── README.md ├── doxygen │ ├── LITE-doxygen │ └── doxygen.sh ├── lite.h ├── lite_api.c ├── lite_api.h ├── lite_core.c ├── lite_core.h ├── lite_distribution.h ├── lite_insmod.sh ├── lite_internal_tool.c ├── lite_internal_tool.h ├── lite_rmmod.sh ├── lite_syscall.h ├── lite_test.c └── lite_test.h ├── lite-syscall ├── .gitignore ├── Makefile ├── lite.h ├── lite_syscall.c └── lite_syscall.h ├── lite-userspace ├── Makefile ├── README.md ├── lite-lib.c ├── lite-lib.h ├── lite_join.c ├── lite_key.h ├── lite_rpc.c └── lite_write.c ├── lite_kernel_patch_3.10.108 └── lite_kernel_patch_3.11.1 /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | 10 | # Linker output 11 | *.ilk 12 | *.map 13 | *.exp 14 | 15 | # Precompiled Headers 16 | *.gch 17 | *.pch 18 | 19 | # Libraries 20 | *.lib 21 | *.a 22 | *.la 23 | *.lo 24 | 25 | # Shared objects (inc. Windows DLLs) 26 | *.dll 27 | *.so 28 | *.so.* 29 | *.dylib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | *.i*86 36 | *.x86_64 37 | *.hex 38 | 39 | # Debug files 40 | *.dSYM/ 41 | *.su 42 | *.idb 43 | *.pdb 44 | 45 | # Kernel Module Compile Results 46 | *.mod* 47 | *.cmd 48 | .tmp_versions/ 49 | modules.order 50 | Module.symvers 51 | Mkfile.old 52 | dkms.conf 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LITE Local Indirection TiEr (SOSP'17) 2 | 3 | [[SOSP 2017 Paper]](https://dl.acm.org/doi/10.1145/3132747.3132762) 4 | [[Slide]](https://www.sigops.org/s/conferences/sosp/2017/slides/lite-sosp17-slides.pdf) 5 | 6 | __LITE - Kernel RDMA Support for Datacenter Applications v0.2__ 7 | 8 | LITE stands for Local Indirection TiEr for RDMA in the Linux kernel. 9 | LITE virtualizes native RDMA into a flexible, high-level, easy-to-use abstraction and allows applications to safely share resources. 10 | Despite the widely-held belief that kernel bypassing is essential to RDMA's low-latency performance, LITE shows that using a kernel-level indirection can achieve both flexibility and lowlatency, scalable performance at the same time. 11 | 12 | This version of LITE has been tested for the following configuration: 13 | 14 | 1. Software 15 | * OS: CentOS 7.1 (kernel `3.11.1` - also supports kernel `3.10.108`, but uses kernel `3.11.1` if possible) 16 | * A port of LITE by [Yizhou Shan](http://lastweek.io) for kernel `4.9` can be found [here](https://github.com/lastweek/LITE) (Check Section Caution for more details). 17 | * RDMA drivers: `mlx4` from official libibverbs and verbs. 18 | 2. Hardware 19 | * RNICs: 20 | * ConnectX-3 354A (InfiniBand, RoCE) 21 | 3. Package (on CentOS7) 22 | * required packages: `libmthca infiniband-diags perftest qperf opensm libibverbs librdmacm librdmacm-devel libmlx4 libibverbs-utils` 23 | * add the following two lines to the end of /etc/security/limits.conf 24 | * `* soft memlock unlimited` 25 | * `* hard memlock unlimited` 26 | 4. LITE is not compatible with MLNX_OFED now 27 | 28 | We built LITE as a linux module for the Linux `3.11.1` (also `3.10.108`) kernel (patch for syscall is provided). 29 | The LITE kernel module is in `core/`. 30 | The folder `lite-userspace` contains simple examples of using LITE in user space. 31 | The code `core\lite_test.c` contains simple examples of using LITE in kernel space. 32 | 33 | # Caution: 34 | This is a BETA version. We will have our stable version ready soon. 35 | For more information please check [LITE Paper](https://dl.acm.org/citation.cfm?id=3132762) appeared in *SOSP'17*. 36 | 37 | LITE could also run in python by importing ctypes. There is an example code from [pyLITE](https://github.com/wilso467/pyLITE). 38 | This has not been verified by the LITE team. Use it with your own caution. 39 | 40 | LITE is also ported into kernel `4.9` by [Yizhou Shan](mailto:ys@purdue.edu) at [LITE-4.9](https://github.com/lastweek/LITE). This port is tested with Ubuntu kernel `4.9.103` and `mlx5`. There are several known differences between kernel `3.x` and kernel `4.x`, and also `mlx4` and `mlx5` driver. Some features and performance numbers could be different. Use it with your own caution. 41 | 42 | ## How To Run LITE 43 | 44 | ### Prerequisites 45 | 1. More than two machines connected via InfiniBand. 46 | 2. One of the machines (served as cluster manager) has installed InfiniBand OFED user-level library. The rest of the machines serve as LITE clients and need to compile kernel (see below). 47 | 48 | ### S1: Compile cluster manager 49 | LITE's cluster manager source code is located in `cluster-manager/`, which runs on user space. Assume this machine has installed all IB user libraries, you can go to this directory and simply do `make`. After that, you will have a `mgmt-server`, which is LITE's clueter manager. Also, get the IP address of this CD server, which will be used by all other LITE clients to establish connection. 50 | 51 | ### S2: Install and boot LITE kernel on LITE clients (required root privilege for steps 4, 5, and 6) 52 | 1. First, get linux tarball (we used `3.11.1` from `wget https://www.kernel.org/pub/linux/kernel/v3.x/linux-3.11.1.tar.gz` and `3.10.108` from `wget https://cdn.kernel.org/pub/linux/kernel/v3.x/linux-3.10.108.tar.xz`) 53 | 2. extract the tarball and cd into the kernel source code (e.g., `cd linux-[version]`) 54 | 3. apply lite-patch (mainly for syscall) `patch -p1 < ../lite_kernel_patch_[version]` 55 | 4. Compile the kernel with your machine's old config: 56 | `cp /boot/config-your-default-kernel-version lite-kernel/.config` 57 | `make oldconfig` (Recommended to have a special _CONFIG_LOCALVERSION="LITE"_) 58 | `make all [-j ncpus] ; make modules_install [-j ncpus] ; make install` 59 | 5. Change booting order if needed [grub2-set-default 0] 60 | 6. Reboot the machine and use `uname` to check if the kernel version matches. 61 | 64 | ### S3: Config LITE 65 | 66 | LITE has several options that can be configured at compile time at lite.h in `core/`. The default configurations have been tested to work well for our applications. We will provide a documentation of these configurations soon. 67 | 68 | Please enable `LITE_ROCE` at lite.h in `core/` and client.h in `cluster-manager` if you want to run LITE with RoCE. 69 | 70 | ### S4: Compile Modules 71 | After boot into `lite-kernel` successfully (S2), go to `lite` directory and type `make` to compile lite three modules. If the kernel is right, you will have 3 modules compiled: `lite_internal.ko`, `lite_api.ko`, and `lite_test.ko`. `lite_internal.ko` is the LITE core module and `lite_api.ko` is a module includes all LITE API. `lite_test.ko` is a module which shows how to use LITE in kernel space. 72 | 73 | ### S5: Run 74 | In general, to run LITE, you need to start cluster manager first, which will listen on port 18500. After that, start LITE clients one by one to establish the connection with cluster manager. 75 | 76 | #### S5.1 Run cluster manager 77 | You can start cluster manager like this: 78 | > `./mgmt-server` 79 | > `./mgmt-server -p [eth_port] -i [ib_port]` 80 | 81 | #### S5.2: Run LITE 82 | Start LITE clients one by one to establish the connection with cluster manager assuming the IP address of cluster manager is `192.168.1.1`. Client needs to install `lite_internal.ko` and `lite_api.ko` first in order. There is a simple script `lite_insmod.sh`, which help you to install these two modules. 83 | 84 | ##### S5.2.1: Run LITE in userspace 85 | call `userspace_liteapi_join("192.168.1.1", 18500, 1)` if you want to use port 18500 and IB port 1 to build LITE cluster. 86 | 87 | ##### S5.2.2: Run LITE in kernel space 88 | call `liteapi_establish_conn("192.168.1.1", 18500, 1)` if you want to use port 18500 and IB port 1 to build LITE cluster. 89 | 90 | ##### S5.3: establish_conn 91 | Even the program which is called liteapi_establish_conn is terminated, the node is still in LITE cluster. 92 | Therefore, I suggest to write an extra program (as lite_join.c) to join the cluster instead of doing join inside your testing program. 93 | How to join a node is illustrated in lite example code. 94 | 95 | In detail: 96 | 1. **insmod lite_internal.ko** 97 | This will insmod lite_internal module 98 | 2. **insmod lite_api.ko** 99 | This will insmod lite_api module 100 | 3. **userspace_liteapi_join("192.168.1.1", 18500, 1)** or **liteapi_establish_connection("192.168.1.1", 18500, 1)** 101 | This will connect with cluster manager and connent the client to LITE cluster 102 | 103 | ### S6: Run User Programs 104 | There are several code samples under `lite_userspace/`. Basically, we join LITE with `userspace_ibali_join()` and calling malloc/send/receive/read/write based on `lite_userspace/lite-lib.c`. 105 | 106 | ### S7: Leave LITE cluster 107 | Currenly, LITE doesn't provide complete instructions for leaving LITE cluster. 108 | If a node leaves, all nodes have to leave LITE and rebuild the whole cluster. By running `rmmod` `lite_api.ko` and `lite_internal.ko` in order can terminate the LITE module (or running `lite_rmmod.sh`). 109 | It could re-connect to LITE cluster manager to rebuild the whole LITE cluster again by following Step 5 (also ctrl+c to re-run for cluster manager). 110 | 111 | ## History: 112 | `LITE v0.1`: first opensource LITE 113 | 114 | `LITE v0.2`: beta version of LITE-RoCE 115 | 116 | Please check core/README to see current limitations 117 | 118 | ## Cite 119 | 120 | To cite LITE, please use: 121 | 122 | >\@inproceedings{SOSP17-LITE\, 123 | > author = {Shin-Yeh Tsai and Yiying Zhang}, 124 | > title = {LITE Kernel RDMA Support for Datacenter Applications}, 125 | > booktitle = {Proceedings of the 26th Symposium on Operating Systems Principles (SOSP '17)}, 126 | > year = {2017}, 127 | > address = {Shanghai, China}, 128 | > month = {October} 129 | >} 130 | -------------------------------------------------------------------------------- /cluster-manager/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean 2 | 3 | CFLAGS := -Wall -g 4 | LD := gcc 5 | LDLIBS := ${LDLIBS} -lrdmacm -libverbs -lpthread -ldl 6 | DEPS = client.h fifo.h 7 | 8 | APPS := mgmt-server 9 | 10 | all: ${APPS} 11 | 12 | 13 | mgmt-server: init.o network_handler.o lite-cd.o lite-cd-base.o fifo.o 14 | ${LD} -o $@ $^ ${LDLIBS} $(DEPS) 15 | 16 | clean: 17 | rm -f *.o ${APPS} 18 | 19 | -------------------------------------------------------------------------------- /cluster-manager/README.md: -------------------------------------------------------------------------------- 1 | LITE's cluster manager source code is located in cluster-manager/, which runs on user space. Assume this machine has installed all IB user libraries, you can go to this directory and simply do make. After that, you will have a mgmt-server, which is LITE's clueter manager. Also, get the IP address of this CD server, which will be used by all other LITE clients to establish connection. 2 | 3 | 4 | You can start cluster manager like this: 5 | 6 | ./mgmt-server 7 | ./mgmt-server -p [eth_port] -i [ib_port] 8 | 9 | -------------------------------------------------------------------------------- /cluster-manager/client.h: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | 3 | #ifndef HAVE_SERVER_H 4 | #define HAVE_SERVER_H 5 | 6 | 7 | //This is the version modified from 000be840c215d5da3011a2c7b486d5ae122540c4 8 | //It adds LOCKS, sge, and other things into the system 9 | //Client.h is also modified. 10 | //Server is also modified to match this patch 11 | //Patch SERIAL_VERSION_ID: 04202300 12 | //Please make sure that this version is not fully tested inside dsnvm (interactions are not fully tested) 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include "mgmt_server.h" 30 | #include "lite-cd-base.h" 31 | #include /* POSIX Threads */ 32 | #include 33 | 34 | 35 | struct list_head { 36 | struct list_head *next, *prev; 37 | }; 38 | 39 | struct atomic_struct{ 40 | void *vaddr; 41 | size_t len; 42 | }; 43 | 44 | #include "fifo.h" 45 | 46 | 47 | //#define LITE_ROCE 48 | #ifdef LITE_ROCE 49 | #define SGID_INDEX 0 50 | #else 51 | #define SGID_INDEX -1 52 | #endif 53 | 54 | #define MAX_NODE 16 55 | #define FIRST_ASK_MR_SET 16 56 | #define LID_SEND_RECV_FORMAT "0000:0000:000000:000000:00000000000000000000000000000000" 57 | #define LISTEN_BACKLOG 10 58 | #define LISTEN_PORT 18500 59 | #define SEND_BUF_LENGTH 4096 60 | #define RECV_DEPTH 2048 61 | #define NODE_ID 0 62 | #define MESSAGE_SIZE 4096 63 | #define MAX_LOCK 1024 64 | 65 | #define NUM_PARALLEL_CONNECTION 4 66 | #define MAX_ATOMIC_SEND_NUM 4096 67 | #define MAX_CONNECTION MAX_NODE*NUM_PARALLEL_CONNECTION 68 | #define MAX_PARALLEL_THREAD 32 //Assume that MAX_NODE * NUM_PARALLEL_CONNECT is smaller than 256 69 | #define WRAP_UP_NUM_FOR_WRID 256 //since there are 64 bits in wr_id, we are going to use 9-12 bits to do thread id waiting passing 70 | #define WRAP_UP_NUM_FOR_WAITING_INBOX 256 71 | #define WRAP_UP_NUM_FOR_CIRCULAR_ID 256 72 | #define WRAP_UP_NUM_FOR_TYPE 65536 //since there are 64 bits in wr_id, we are going to use 9-12 bits to do thread id waiting passing 73 | #define CIRCULAR_BUFFER_LENGTH 1024 74 | 75 | #ifdef LITE_ROCE 76 | #define LITE_MTU IBV_MTU_1024 77 | #else 78 | #define LITE_MTU IBV_MTU_4096 79 | #endif 80 | 81 | pthread_mutex_t atomic_accessing_lock[MAX_NODE]; 82 | sem_t get_thread_waiting_number_semaphore; 83 | pthread_mutex_t get_thread_waiting_number_mutex; 84 | //pthread_mutex_t connection_lock[MAX_CONNECTION]; 85 | pthread_mutex_t connection_lock; 86 | sem_t send_reply_wait_semaphore; 87 | pthread_mutex_t send_reply_wait_mutex; 88 | pthread_mutex_t num_lock_mutex; 89 | pthread_mutex_t fifo_lock_mutex; 90 | 91 | #define HIGH_PRIORITY 8 92 | #define LOW_PRIORITY 0 93 | #define CONGESTION_ALERT 2 94 | #define CONGESTION_WARNING 1 95 | #define CONGESTION_FREE 0 96 | 97 | volatile unsigned long long int shared_locks[MAX_LOCK] __attribute__((aligned(0x1000))); 98 | struct liteapi_two_ports{ 99 | int ib_port; 100 | int ethernet_port; 101 | int options; 102 | }; 103 | 104 | 105 | struct liteapi_post_receive_intermediate_struct 106 | { 107 | uintptr_t header; 108 | uintptr_t msg; 109 | }; 110 | struct liteapi_header{ 111 | uint32_t src_id; 112 | uint64_t store_addr; 113 | uint64_t store_semaphore; 114 | uint32_t length; 115 | int priority; 116 | int type; 117 | }; 118 | struct send_and_reply_format 119 | { 120 | uint32_t src_id; 121 | uint64_t store_addr; 122 | uint64_t store_semaphore; 123 | uint32_t length; 124 | int type; 125 | char *msg; 126 | int bridge_destination; 127 | int bridge_source; 128 | int bridge_remain_hops; 129 | struct list_head list; 130 | }; 131 | enum { 132 | MSG_MR, 133 | MSG_DONE, 134 | MSG_NODE_JOIN, 135 | MSG_NODE_JOIN_UD, 136 | MSG_SERVER_SEND, 137 | MSG_CLIENT_SEND, 138 | MSG_CREATE_LOCK, 139 | MSG_CREATE_LOCK_REPLY, 140 | MSG_RESERVE_LOCK, 141 | MSG_ASSIGN_LOCK, 142 | MSG_UNLOCK, 143 | MSG_ASK_LOCK, 144 | MSG_ASK_LOCK_REPLY, 145 | MSG_GET_REMOTEMR, 146 | MSG_GET_REMOTE_ATOMIC_OPERATION, 147 | MSG_GET_REMOTEMR_REPLY, 148 | MSG_GET_SEND_AND_REPLY_1, 149 | MSG_GET_SEND_AND_REPLY_2, 150 | MSG_GET_ATOMIC_START, 151 | MSG_GET_ATOMIC_MID, 152 | MSG_GET_ATOMIC_REPLY, 153 | MSG_GET_ATOMIC_SINGLE_START, 154 | MSG_GET_ATOMIC_SINGLE_MID, 155 | MSG_ASK_MR_1, 156 | MSG_ASK_MR_2, 157 | MSG_MR_REQUEST, 158 | MSG_GET_SEND_AND_REPLY_OPT_1, 159 | MSG_GET_SEND_AND_REPLY_OPT_2, 160 | MSG_GET_INTERNAL_EXCHANGE, 161 | MSG_DIST_BARRIER, 162 | MSG_GET_FINISH, 163 | MSG_QUERY_PORT_1, 164 | MSG_QUERY_PORT_2, 165 | MSG_PASS_LOCAL_IMM, 166 | MSG_DO_RC_POST_RECEIVE, 167 | MSG_DO_UD_POST_RECEIVE, 168 | MSG_DO_ACK 169 | }; 170 | enum lock_state{ 171 | LOCK_USED, 172 | LOCK_AVAILABLE, 173 | LOCK_LOCK, 174 | LOCK_ASSIGNED 175 | }; 176 | struct buf_message 177 | { 178 | char buf[MESSAGE_SIZE]; 179 | }; 180 | 181 | struct lmr_info { 182 | //struct ib_device *context; 183 | //struct ib_pd *pd; 184 | void *addr; 185 | size_t length; 186 | //uint32_t handle; 187 | uint32_t lkey; 188 | uint32_t rkey; 189 | uint32_t node_id; 190 | }; 191 | 192 | typedef struct lmr_info remote_spinlock_t; 193 | 194 | struct client_ah_combined 195 | { 196 | int qpn; 197 | int node_id; 198 | int qkey; 199 | int dlid; 200 | union ibv_gid gid; 201 | }; 202 | 203 | struct lite_context { 204 | struct ibv_context *context; 205 | struct ibv_comp_channel *channel; 206 | struct ibv_pd *pd; 207 | struct ibv_cq *cq; // one completion queue for all qps 208 | struct ibv_qp *qp; // multiple queue pair for multiple connections 209 | struct client_ah_combined *ah_attrUD; 210 | struct ibv_ah **ah; 211 | struct ibv_qp *loopback_in; 212 | struct ibv_qp *loopback_out; 213 | struct ibv_cq *loopback_cq; 214 | int size; 215 | int send_flags; 216 | int rx_depth; 217 | // int pending; 218 | struct ibv_port_attr portinfo; 219 | int num_connections; 220 | int num_node; 221 | int num_parallel_connection; 222 | int *num_alive_connection; 223 | 224 | int recv_num; 225 | unsigned int *atomic_request_num; 226 | int parallel_thread_num; 227 | 228 | enum s_state { 229 | SS_INIT, 230 | SS_MR_SENT, 231 | SS_RDMA_WAIT, 232 | SS_RDMA_SENT, 233 | SS_DONE_SENT, 234 | SS_MSG_WAIT, 235 | SS_MSG_SENT 236 | } *send_state; 237 | 238 | enum r_state { 239 | RS_INIT, 240 | RS_MR_RECV, 241 | RS_RDMA_WAIT, 242 | RS_RDMA_RECV, 243 | RS_DONE_RECV 244 | } *recv_state; 245 | enum t_state { 246 | TS_WAIT, 247 | TS_DONE 248 | } *thread_state; 249 | 250 | 251 | int send_reply_wait_num; 252 | 253 | 254 | struct atomic_struct **atomic_buffer; 255 | int *atomic_buffer_total_length; 256 | int *atomic_buffer_cur_length; 257 | 258 | int (*send_handler)(char *addr, uint32_t size); 259 | int (*send_reply_handler)(char *input_addr, uint32_t input_size, char *output_addr, uint32_t *output_size, int sender_id); 260 | int (*atomic_send_handler)(struct atomic_struct *input_list, uint32_t length, char *output_buf, uint32_t *output_size, int sender_id); 261 | int (*atomic_single_send_handler)(struct atomic_struct *input_list, uint32_t length, int sender_id); 262 | int num_used_lock; 263 | struct lmr_info shared_locks_mr[MAX_LOCK]; 264 | fifo_t **shared_locks_fifo_queue; 265 | union ibv_gid gid; 266 | }; 267 | struct lite_dest { 268 | int node_id; 269 | int lid; 270 | int qpn; 271 | int psn; 272 | union ibv_gid gid; 273 | }; 274 | 275 | struct client_data{ 276 | char server_name[INET6_ADDRSTRLEN]; 277 | char server_information_buffer[MAX_CONNECTION][sizeof(LID_SEND_RECV_FORMAT)]; 278 | }; 279 | struct server_reply_format{ 280 | int number_of_nodes; //This specifies the number of nodes excluded the last one(which initializes the latest connection to the server) 281 | struct client_data client_list[MAX_NODE]; 282 | }; 283 | 284 | 285 | struct ibv_mr *server_register_memory_api(int connection_id, void *addr, int size, int flag); 286 | int liteapi_reg_send_handler(int (*input_funptr)(char *addr, uint32_t length)); 287 | int liteapi_reg_send_reply_handler(int (*input_funptr)(char *input_buf, uint32_t size, char *output_buf, uint32_t *output_size, int sender_id)); 288 | int liteapi_reg_atomic_send_handler(int (*input_funptr)(struct atomic_struct *input_list, uint32_t length, char *output_buf, uint32_t *output_size, int sender_id)); 289 | int liteapi_reg_atomic_single_send_handler(int (*input_funptr)(struct atomic_struct *input_list, uint32_t length, int sender_id)); 290 | int server_get_waiting_id_by_semaphore(void); 291 | int server_send_request(int connection_id, enum mode s_mode, struct lmr_info *remote_mr, void *addr, int size); 292 | int liteapi_send_message(int target_node, char *msg, int size); 293 | int liteapi_send_reply(int target_node, char *msg, int size, char *output_msg); 294 | int server_rdma_read(int target_node, struct lmr_info *mr_addr, void *local_addr, int size); 295 | int server_rdma_write(int target_node, struct lmr_info *mr_addr, void *local_addr, int size); 296 | int server_atomic_send_reply(int target_node, struct atomic_struct *input_atomic, int length, char *output_msg, int *output_length); 297 | int server_get_remotemr(int target_node, void *addr, int size, struct lmr_info *ret_mr); 298 | 299 | int server_loopback_read(struct lmr_info *remote_mr, struct lmr_info *local_mr); 300 | int server_loopback_compare_swp(struct lmr_info *remote_mr, struct lmr_info *local_mr, unsigned long long guess_value, unsigned long long swp_value); 301 | 302 | #endif 303 | -------------------------------------------------------------------------------- /cluster-manager/fifo.c: -------------------------------------------------------------------------------- 1 | /* 2 | * CDDL HEADER START 3 | * 4 | * The contents of this file are subject to the terms of the 5 | * Common Development and Distribution License, Version 1.0 only 6 | * (the "License"). You may not use this file except in compliance 7 | * with the License. 8 | * 9 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 | * or http://www.opensolaris.org/os/licensing. 11 | * See the License for the specific language governing permissions 12 | * and limitations under the License. 13 | * 14 | * When distributing Covered Code, include this CDDL HEADER in each 15 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 | * If applicable, add the following below this CDDL HEADER, with the 17 | * fields enclosed by brackets "[]" replaced with your own identifying 18 | * information: Portions Copyright [yyyy] [name of copyright owner] 19 | * 20 | * CDDL HEADER END 21 | */ 22 | /* 23 | * Copyright 2002 Sun Microsystems, Inc. All rights reserved. 24 | * Use is subject to license terms. 25 | */ 26 | 27 | //#pragma ident "@(#)fifo.c 1.2 05/06/08 SMI" 28 | 29 | /* 30 | * Routines for manipulating a FIFO queue 31 | */ 32 | 33 | #include 34 | 35 | #include "fifo.h" 36 | #include "memory.h" 37 | 38 | typedef struct fifonode { 39 | void *fn_data; 40 | struct fifonode *fn_next; 41 | } fifonode_t; 42 | 43 | struct fifo { 44 | fifonode_t *f_head; 45 | fifonode_t *f_tail; 46 | }; 47 | 48 | fifo_t * 49 | fifo_new(void) 50 | { 51 | fifo_t *f; 52 | 53 | f = calloc(1, sizeof (fifo_t)); 54 | 55 | return (f); 56 | } 57 | 58 | /* Add to the end of the fifo */ 59 | void 60 | fifo_add(fifo_t *f, void *data) 61 | { 62 | fifonode_t *fn = malloc(sizeof (fifonode_t)); 63 | 64 | fn->fn_data = data; 65 | fn->fn_next = NULL; 66 | 67 | if (f->f_tail == NULL) 68 | f->f_head = f->f_tail = fn; 69 | else { 70 | f->f_tail->fn_next = fn; 71 | f->f_tail = fn; 72 | } 73 | } 74 | 75 | /* Remove from the front of the fifo */ 76 | void * 77 | fifo_remove(fifo_t *f) 78 | { 79 | fifonode_t *fn; 80 | void *data; 81 | 82 | if ((fn = f->f_head) == NULL) 83 | return (NULL); 84 | 85 | data = fn->fn_data; 86 | if ((f->f_head = fn->fn_next) == NULL) 87 | f->f_tail = NULL; 88 | 89 | free(fn); 90 | 91 | return (data); 92 | } 93 | 94 | /*ARGSUSED*/ 95 | static void 96 | fifo_nullfree(void *arg) 97 | { 98 | /* this function intentionally left blank */ 99 | } 100 | 101 | /* Free an entire fifo */ 102 | void 103 | fifo_free(fifo_t *f, void (*freefn)(void *)) 104 | { 105 | fifonode_t *fn = f->f_head; 106 | fifonode_t *tmp; 107 | 108 | if (freefn == NULL) 109 | freefn = fifo_nullfree; 110 | 111 | while (fn) { 112 | (*freefn)(fn->fn_data); 113 | 114 | tmp = fn; 115 | fn = fn->fn_next; 116 | free(tmp); 117 | } 118 | 119 | free(f); 120 | } 121 | 122 | int 123 | fifo_len(fifo_t *f) 124 | { 125 | fifonode_t *fn; 126 | int i; 127 | 128 | for (i = 0, fn = f->f_head; fn; fn = fn->fn_next, i++); 129 | 130 | return (i); 131 | } 132 | 133 | int 134 | fifo_empty(fifo_t *f) 135 | { 136 | return (f->f_head == NULL); 137 | } 138 | 139 | int 140 | fifo_iter(fifo_t *f, int (*iter)(void *data, void *arg), void *arg) 141 | { 142 | fifonode_t *fn; 143 | int rc; 144 | int ret = 0; 145 | 146 | for (fn = f->f_head; fn; fn = fn->fn_next) { 147 | if ((rc = iter(fn->fn_data, arg)) < 0) 148 | return (-1); 149 | ret += rc; 150 | } 151 | 152 | return (ret); 153 | } 154 | -------------------------------------------------------------------------------- /cluster-manager/fifo.h: -------------------------------------------------------------------------------- 1 | /* 2 | * CDDL HEADER START 3 | * 4 | * The contents of this file are subject to the terms of the 5 | * Common Development and Distribution License, Version 1.0 only 6 | * (the "License"). You may not use this file except in compliance 7 | * with the License. 8 | * 9 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 | * or http://www.opensolaris.org/os/licensing. 11 | * See the License for the specific language governing permissions 12 | * and limitations under the License. 13 | * 14 | * When distributing Covered Code, include this CDDL HEADER in each 15 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 | * If applicable, add the following below this CDDL HEADER, with the 17 | * fields enclosed by brackets "[]" replaced with your own identifying 18 | * information: Portions Copyright [yyyy] [name of copyright owner] 19 | * 20 | * CDDL HEADER END 21 | */ 22 | /* 23 | * Copyright 2002 Sun Microsystems, Inc. All rights reserved. 24 | * Use is subject to license terms. 25 | */ 26 | 27 | #ifndef _FIFO_H 28 | #define _FIFO_H 29 | 30 | //#pragma ident "@(#)fifo.h 1.2 05/06/08 SMI" 31 | 32 | /* 33 | * Routines for manipulating a FIFO queue 34 | */ 35 | 36 | #ifdef __cplusplus 37 | extern "C" { 38 | #endif 39 | 40 | typedef struct fifo fifo_t; 41 | 42 | extern fifo_t *fifo_new(void); 43 | extern void fifo_add(fifo_t *, void *); 44 | extern void *fifo_remove(fifo_t *); 45 | extern void fifo_free(fifo_t *, void (*)(void *)); 46 | extern int fifo_len(fifo_t *); 47 | extern int fifo_empty(fifo_t *); 48 | extern int fifo_iter(fifo_t *, int (*)(void *, void *), void *); 49 | 50 | #ifdef __cplusplus 51 | } 52 | #endif 53 | 54 | #endif /* _FIFO_H */ 55 | -------------------------------------------------------------------------------- /cluster-manager/init.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include "mgmt_server.h" 10 | #include "client.h" 11 | 12 | #include 13 | 14 | 15 | int establish_cluster(int ib_port, int eth_port) 16 | { 17 | pid_t child_pid; 18 | network_init(ib_port, eth_port, 0); 19 | return 0; 20 | } 21 | 22 | int application_init(int app_id) 23 | { 24 | return 0; 25 | } 26 | static void usage(const char *argv0) 27 | { 28 | printf("Usage:\n"); 29 | printf(" %s start a server and wait for connection\n", argv0); 30 | printf("\n"); 31 | printf("Options:\n"); 32 | printf(" -i, --ib-port= use port of IB device (default 1)\n"); 33 | printf(" -p, use port of eth (default 18500)\n"); 34 | } 35 | int main(int argc, const char* argv[]) 36 | { 37 | 38 | // if (pass_argument(argc, argv)) 39 | // return 1; 40 | int ib_port = 1; 41 | int eth_port = LISTEN_PORT; 42 | while (1) { 43 | int c; 44 | 45 | static struct option long_options[] = { 46 | { .name = "ib-port", .has_arg = 1, .val = 'i' }, 47 | { .name = "eth-port", .has_arg = 1, .val = 'p' }, 48 | { 0 } 49 | }; 50 | 51 | c = getopt_long(argc, argv, "i:p:", 52 | long_options, NULL); 53 | if (c == -1) 54 | break; 55 | 56 | switch (c) { 57 | case 'i': 58 | ib_port = strtol(optarg, NULL, 0); 59 | if (ib_port < 0) { 60 | usage(argv[0]); 61 | return 1; 62 | } 63 | break; 64 | case 'p': 65 | eth_port = strtol(optarg, NULL, 0); 66 | if (ib_port < 0) { 67 | usage(argv[0]); 68 | return 1; 69 | } 70 | break; 71 | default: 72 | usage(argv[0]); 73 | return 1; 74 | } 75 | } 76 | //int num_node = argc - 1; 77 | 78 | // ======================================================= 79 | // Establish cluster as a main API 80 | // Which could be found in main.c 81 | // Old form: establish_cluster(num_node, argv); 82 | establish_cluster(ib_port, eth_port); 83 | // ======================================================= 84 | return 0; 85 | } 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /cluster-manager/lite-cd-base.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2006 Cisco Systems. All rights reserved. 3 | * 4 | * This software is available to you under a choice of one of two 5 | * licenses. You may choose to be licensed under the terms of the GNU 6 | * General Public License (GPL) Version 2, available from the file 7 | * COPYING in the main directory of this source tree, or the 8 | * OpenIB.org BSD license below: 9 | * 10 | * Redistribution and use in source and binary forms, with or 11 | * without modification, are permitted provided that the following 12 | * conditions are met: 13 | * 14 | * - Redistributions of source code must retain the above 15 | * copyright notice, this list of conditions and the following 16 | * disclaimer. 17 | * 18 | * - Redistributions in binary form must reproduce the above 19 | * copyright notice, this list of conditions and the following 20 | * disclaimer in the documentation and/or other materials 21 | * provided with the distribution. 22 | * 23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 | * SOFTWARE. 31 | */ 32 | 33 | #include "lite-cd-base.h" 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | enum ibv_mtu pp_mtu_to_enum(int mtu) 40 | { 41 | switch (mtu) { 42 | case 256: return IBV_MTU_256; 43 | case 512: return IBV_MTU_512; 44 | case 1024: return IBV_MTU_1024; 45 | case 2048: return IBV_MTU_2048; 46 | case 4096: return IBV_MTU_4096; 47 | default: return -1; 48 | } 49 | } 50 | 51 | uint16_t pp_get_local_lid(struct ibv_context *context, int port) 52 | { 53 | struct ibv_port_attr attr; 54 | 55 | if (ibv_query_port(context, port, &attr)) 56 | return 0; 57 | 58 | return attr.lid; 59 | } 60 | 61 | int pp_get_port_info(struct ibv_context *context, int port, 62 | struct ibv_port_attr *attr) 63 | { 64 | return ibv_query_port(context, port, attr); 65 | } 66 | 67 | void wire_gid_to_gid(const char *wgid, union ibv_gid *gid) 68 | { 69 | char tmp[9]; 70 | uint32_t v32; 71 | int i; 72 | 73 | for (tmp[8] = 0, i = 0; i < 4; ++i) { 74 | memcpy(tmp, wgid + i * 8, 8); 75 | sscanf(tmp, "%x", &v32); 76 | *(uint32_t *)(&gid->raw[i * 4]) = ntohl(v32); 77 | } 78 | } 79 | 80 | void gid_to_wire_gid(const union ibv_gid *gid, char wgid[]) 81 | { 82 | int i; 83 | 84 | for (i = 0; i < 4; ++i) 85 | { 86 | sprintf(&wgid[i * 8], "%08x", htonl(*(uint32_t *)(gid->raw + i * 4))); 87 | printf("%08x", htonl(*(uint32_t *)(gid->raw + i * 4))); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /cluster-manager/lite-cd-base.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2006 Cisco Systems. All rights reserved. 3 | * 4 | * This software is available to you under a choice of one of two 5 | * licenses. You may choose to be licensed under the terms of the GNU 6 | * General Public License (GPL) Version 2, available from the file 7 | * COPYING in the main directory of this source tree, or the 8 | * OpenIB.org BSD license below: 9 | * 10 | * Redistribution and use in source and binary forms, with or 11 | * without modification, are permitted provided that the following 12 | * conditions are met: 13 | * 14 | * - Redistributions of source code must retain the above 15 | * copyright notice, this list of conditions and the following 16 | * disclaimer. 17 | * 18 | * - Redistributions in binary form must reproduce the above 19 | * copyright notice, this list of conditions and the following 20 | * disclaimer in the documentation and/or other materials 21 | * provided with the distribution. 22 | * 23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 | * SOFTWARE. 31 | */ 32 | 33 | #ifndef IBV_PINGPONG_H 34 | #define IBV_PINGPONG_H 35 | 36 | #include 37 | 38 | //#define _GNU_SOURCE 39 | 40 | enum ibv_mtu pp_mtu_to_enum(int mtu); 41 | uint16_t pp_get_local_lid(struct ibv_context *context, int port); 42 | int pp_get_port_info(struct ibv_context *context, int port, 43 | struct ibv_port_attr *attr); 44 | void wire_gid_to_gid(const char *wgid, union ibv_gid *gid); 45 | void gid_to_wire_gid(const union ibv_gid *gid, char wgid[]); 46 | 47 | enum mode { 48 | M_WRITE, 49 | M_READ 50 | }; 51 | 52 | int liteapi_init(int ib_port, int ethernet_port, int option); 53 | int liteapi_send_msg(int nodeid, char *msg, int size); 54 | int liteapi_send_msg_async(int nodeid, char *msg, int size); 55 | //int liteapi_recv_msg(); 56 | //int liteapi_exchange_mr(int connection_id, enum mode s_mode); 57 | //int liteapi_receive_request(int connection_id, enum mode s_mode); 58 | //int liteapi_accept_request(); 59 | 60 | #endif /* IBV_PINGPONG_H */ 61 | -------------------------------------------------------------------------------- /cluster-manager/mgmt_server.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "uthash.h" 7 | #include "lite-cd-base.h" 8 | //#include "buddy.h" 9 | 10 | #define _GNU_SOURCE 11 | 12 | #define ATOMIC_MAX_SIZE 4096 13 | #define CHECKPOINT_THRESH 10000 //1000000 14 | #define CHECKPOINT_THRESH_COUNT 100000 //100 //1000000 15 | #define CHECKPOINT_SIZE 100 16 | 17 | #ifndef LITE_CD_SETUP 18 | #define LITE_CD_SETUP 19 | 20 | struct sockaddr_in *node_addr; 21 | 22 | //int network_init(int num_node, const char *server_list[]); 23 | int network_init(int ib_port, int ethernet_port, int option); 24 | //int network_reply(int node_id, char *content); 25 | int handle_remote_request(int node_id, char *msg, int size); 26 | 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /cluster-manager/network_handler.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | 19 | 20 | #include 21 | 22 | #include "client.h" 23 | #include "mgmt_server.h" 24 | 25 | #define LISTEN_BACKLOG 10 26 | //#define LISTEN_PORT 18515 27 | //#define SEND_BUF_LENGTH 2048 28 | static const int RDMA_BUFFER_SIZE = 2048; 29 | 30 | 31 | char *strdupa1 (const char *s) { 32 | char *d = malloc (strlen (s) + 1); // Space for length plus nul 33 | if (d == NULL) return NULL; // No memory 34 | strcpy (d,s); // Copy the characters 35 | return d; // Return the new string 36 | } 37 | 38 | //Please be aware that below structs and definitions are also used in server side network_handler.c 39 | //#define LID_SEND_RECV_FORMAT "0000:000000:000000:00000000000000000000000000000000" 40 | //#define MAX_NODE 32 41 | #define SERVER_INFORMATION_BUFFER_SIZE 256 42 | void *get_in_addr(struct sockaddr *sa) { 43 | return sa->sa_family == AF_INET 44 | ? (void *) &(((struct sockaddr_in*)sa)->sin_addr) 45 | : (void *) &(((struct sockaddr_in6*)sa)->sin6_addr); 46 | } 47 | 48 | int network_reply(int node_id, char *content) 49 | { 50 | return 0; 51 | } 52 | int server_test_function() 53 | { 54 | 55 | int target; 56 | char *testtest; 57 | testtest=calloc(RDMA_BUFFER_SIZE, sizeof(char)); 58 | int choice; 59 | struct lmr_info *testmr; 60 | int *mr_flag; 61 | mr_flag = calloc(MAX_NODE,sizeof(int)); 62 | do 63 | { 64 | printf("Interact with ?\n"); 65 | scanf("%d", &target); 66 | }while(target==0); 67 | testmr = calloc(MAX_NODE,sizeof(struct lmr_info)); 68 | server_get_remotemr(target,testtest,RDMA_BUFFER_SIZE,&testmr[target]); 69 | mr_flag[target]=1; 70 | int i; 71 | char *input_ato; 72 | struct atomic_struct *temp_ato; 73 | while(1) 74 | { 75 | printf("1. RDMA WRITE \n2. RDMA READ \n3. SEND MESSAGE\n4. SEND-REPLY PAIR\n5. ATOMIC SEND\n6. CHANGE TARGET\n"); 76 | scanf("%d", &choice); 77 | switch(choice) 78 | { 79 | case 1: 80 | printf("With ?\n"); 81 | scanf("%s", testtest); 82 | server_rdma_write(target, &testmr[target], testtest, RDMA_BUFFER_SIZE); 83 | //client_send_request(target, M_WRITE, testtest, RDMA_BUFFER_SIZE); 84 | //liteapi_rdma_write(target, &ctx->peer_mr[target], testtest, RDMA_BUFFER_SIZE); 85 | break; 86 | case 2: 87 | server_rdma_read(target, &testmr[target], testtest, RDMA_BUFFER_SIZE); 88 | //liteapi_rdma_read(target, &ctx->peer_mr[target], testtest, RDMA_BUFFER_SIZE); 89 | printf("%d: %s\n", target, testtest); 90 | break; 91 | case 3: 92 | printf("with ?\n"); 93 | scanf("%s", testtest); 94 | liteapi_send_message(target, testtest, RDMA_BUFFER_SIZE); 95 | break; 96 | case 4: 97 | printf("with ?\n"); 98 | scanf("%s", testtest); 99 | char *abc; 100 | abc = calloc(4096, sizeof(char)); 101 | liteapi_send_reply(target, testtest, strlen(testtest), abc); 102 | printf("%s\n", abc); 103 | break; 104 | /*case 5: 105 | temp_ato = malloc(sizeof(struct atomic_struct)*16); 106 | char *reply = malloc(4096); 107 | int ret_size; 108 | for(i=0;i<16;i++) 109 | { 110 | input_ato = malloc(32); 111 | scanf("%s", input_ato); 112 | temp_ato[i].vaddr = input_ato; 113 | temp_ato[i].len = strlen(input_ato); 114 | if(!strcmp(input_ato, "exit")) 115 | break; 116 | } 117 | i=i+1; 118 | server_atomic_send_reply(target, temp_ato, i, reply, &ret_size); 119 | break;*/ 120 | case 6: 121 | printf("change to ?\n"); 122 | scanf("%d", &target); 123 | { 124 | if(mr_flag[target]==1) 125 | break; 126 | server_get_remotemr(target,testtest,RDMA_BUFFER_SIZE,&testmr[target]); 127 | mr_flag[target]=1; 128 | } 129 | break; 130 | /*case 4: 131 | printf("send to ?\n"); 132 | scanf("%d", &target); 133 | printf("with size ?\n"); 134 | scanf("%d", &size); 135 | //strcpy(ctx->send_msg[target]->data.newnode_msg, testtest); 136 | //client_send_message(target, MSG_CLIENT_SEND); 137 | 138 | testmr = malloc(sizeof(struct ibv_mr)); 139 | liteapi_get_remotemr(target,testtest,size,testmr); 140 | printf("%lu.%lu\n", (long unsigned int)testmr->addr, (long unsigned int)testmr->lkey); 141 | printf("With ?\n"); 142 | scanf("%s", testtest); 143 | liteapi_rdma_write(target, testmr, testtest, size); 144 | strcpy(testtest, "!!"); 145 | liteapi_rdma_read(target, testmr, testtest, size); 146 | printf("%d: %s\n", target, testtest); 147 | break;*/ 148 | default: 149 | printf("Error input\n"); 150 | } 151 | memset(testtest, 0, RDMA_BUFFER_SIZE); 152 | 153 | } 154 | } 155 | // ======================================================= 156 | // Old form: int network_init(int num_node, const char *server_list[]) 157 | // ======================================================= 158 | int network_init(int ib_port, int ethernet_port, int option) 159 | { 160 | char hostname[128]; 161 | gethostname(hostname, 128);//Get own host name 162 | printf("Initialize Server\n"); 163 | printf("Hostname:\t%s\n", hostname); 164 | printf("IB-port:\t%d\n", ib_port); 165 | printf("Eth-port:\t%d\n", ethernet_port); 166 | printf("Option:\t%d\n", option); 167 | liteapi_init(ib_port, ethernet_port, option); 168 | while(1); 169 | return 0; 170 | } 171 | -------------------------------------------------------------------------------- /cluster-manager/uthash.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2003-2014, Troy D. Hanson http://troydhanson.github.com/uthash/ 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 12 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 13 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 15 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 16 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 17 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 18 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 19 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | */ 23 | 24 | #ifndef UTHASH_H 25 | #define UTHASH_H 26 | 27 | #include /* memcmp,strlen */ 28 | #include /* ptrdiff_t */ 29 | #include /* exit() */ 30 | 31 | /* These macros use decltype or the earlier __typeof GNU extension. 32 | As decltype is only available in newer compilers (VS2010 or gcc 4.3+ 33 | when compiling c++ source) this code uses whatever method is needed 34 | or, for VS2008 where neither is available, uses casting workarounds. */ 35 | #if defined(_MSC_VER) /* MS compiler */ 36 | #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ 37 | #define DECLTYPE(x) (decltype(x)) 38 | #else /* VS2008 or older (or VS2010 in C mode) */ 39 | #define NO_DECLTYPE 40 | #define DECLTYPE(x) 41 | #endif 42 | #elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__) 43 | #define NO_DECLTYPE 44 | #define DECLTYPE(x) 45 | #else /* GNU, Sun and other compilers */ 46 | #define DECLTYPE(x) (__typeof(x)) 47 | #endif 48 | 49 | #ifdef NO_DECLTYPE 50 | #define DECLTYPE_ASSIGN(dst,src) \ 51 | do { \ 52 | char **_da_dst = (char**)(&(dst)); \ 53 | *_da_dst = (char*)(src); \ 54 | } while(0) 55 | #else 56 | #define DECLTYPE_ASSIGN(dst,src) \ 57 | do { \ 58 | (dst) = DECLTYPE(dst)(src); \ 59 | } while(0) 60 | #endif 61 | 62 | /* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */ 63 | #if defined(_WIN32) 64 | #if defined(_MSC_VER) && _MSC_VER >= 1600 65 | #include 66 | #elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__) 67 | #include 68 | #else 69 | typedef unsigned int uint32_t; 70 | typedef unsigned char uint8_t; 71 | #endif 72 | #elif defined(__GNUC__) && !defined(__VXWORKS__) 73 | #include 74 | #else 75 | typedef unsigned int uint32_t; 76 | typedef unsigned char uint8_t; 77 | #endif 78 | 79 | #define UTHASH_VERSION 1.9.9 80 | 81 | #ifndef uthash_fatal 82 | #define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ 83 | #endif 84 | #ifndef uthash_malloc 85 | #define uthash_malloc(sz) malloc(sz) /* malloc fcn */ 86 | #endif 87 | #ifndef uthash_free 88 | #define uthash_free(ptr,sz) free(ptr) /* free fcn */ 89 | #endif 90 | 91 | #ifndef uthash_noexpand_fyi 92 | #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ 93 | #endif 94 | #ifndef uthash_expand_fyi 95 | #define uthash_expand_fyi(tbl) /* can be defined to log expands */ 96 | #endif 97 | 98 | /* initial number of buckets */ 99 | #define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ 100 | #define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */ 101 | #define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */ 102 | 103 | /* calculate the element whose hash handle address is hhe */ 104 | #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) 105 | 106 | #define HASH_FIND(hh,head,keyptr,keylen,out) \ 107 | do { \ 108 | out=NULL; \ 109 | if (head != NULL) { \ 110 | unsigned _hf_bkt,_hf_hashv; \ 111 | HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ 112 | if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv) != 0) { \ 113 | HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ 114 | keyptr,keylen,out); \ 115 | } \ 116 | } \ 117 | } while (0) 118 | 119 | #ifdef HASH_BLOOM 120 | #define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) 121 | #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL) 122 | #define HASH_BLOOM_MAKE(tbl) \ 123 | do { \ 124 | (tbl)->bloom_nbits = HASH_BLOOM; \ 125 | (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ 126 | if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ 127 | memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ 128 | (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ 129 | } while (0) 130 | 131 | #define HASH_BLOOM_FREE(tbl) \ 132 | do { \ 133 | uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ 134 | } while (0) 135 | 136 | #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U))) 137 | #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U))) 138 | 139 | #define HASH_BLOOM_ADD(tbl,hashv) \ 140 | HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) 141 | 142 | #define HASH_BLOOM_TEST(tbl,hashv) \ 143 | HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) 144 | 145 | #else 146 | #define HASH_BLOOM_MAKE(tbl) 147 | #define HASH_BLOOM_FREE(tbl) 148 | #define HASH_BLOOM_ADD(tbl,hashv) 149 | #define HASH_BLOOM_TEST(tbl,hashv) (1) 150 | #define HASH_BLOOM_BYTELEN 0U 151 | #endif 152 | 153 | #define HASH_MAKE_TABLE(hh,head) \ 154 | do { \ 155 | (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ 156 | sizeof(UT_hash_table)); \ 157 | if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ 158 | memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ 159 | (head)->hh.tbl->tail = &((head)->hh); \ 160 | (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ 161 | (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ 162 | (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ 163 | (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ 164 | HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 165 | if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ 166 | memset((head)->hh.tbl->buckets, 0, \ 167 | HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 168 | HASH_BLOOM_MAKE((head)->hh.tbl); \ 169 | (head)->hh.tbl->signature = HASH_SIGNATURE; \ 170 | } while(0) 171 | 172 | #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ 173 | HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add) 174 | 175 | #define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \ 176 | do { \ 177 | replaced=NULL; \ 178 | HASH_FIND(hh,head,&((add)->fieldname),keylen_in,replaced); \ 179 | if (replaced!=NULL) { \ 180 | HASH_DELETE(hh,head,replaced); \ 181 | } \ 182 | HASH_ADD(hh,head,fieldname,keylen_in,add); \ 183 | } while(0) 184 | 185 | #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ 186 | do { \ 187 | unsigned _ha_bkt; \ 188 | (add)->hh.next = NULL; \ 189 | (add)->hh.key = (char*)(keyptr); \ 190 | (add)->hh.keylen = (unsigned)(keylen_in); \ 191 | if (!(head)) { \ 192 | head = (add); \ 193 | (head)->hh.prev = NULL; \ 194 | HASH_MAKE_TABLE(hh,head); \ 195 | } else { \ 196 | (head)->hh.tbl->tail->next = (add); \ 197 | (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ 198 | (head)->hh.tbl->tail = &((add)->hh); \ 199 | } \ 200 | (head)->hh.tbl->num_items++; \ 201 | (add)->hh.tbl = (head)->hh.tbl; \ 202 | HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ 203 | (add)->hh.hashv, _ha_bkt); \ 204 | HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ 205 | HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ 206 | HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ 207 | HASH_FSCK(hh,head); \ 208 | } while(0) 209 | 210 | #define HASH_TO_BKT( hashv, num_bkts, bkt ) \ 211 | do { \ 212 | bkt = ((hashv) & ((num_bkts) - 1U)); \ 213 | } while(0) 214 | 215 | /* delete "delptr" from the hash table. 216 | * "the usual" patch-up process for the app-order doubly-linked-list. 217 | * The use of _hd_hh_del below deserves special explanation. 218 | * These used to be expressed using (delptr) but that led to a bug 219 | * if someone used the same symbol for the head and deletee, like 220 | * HASH_DELETE(hh,users,users); 221 | * We want that to work, but by changing the head (users) below 222 | * we were forfeiting our ability to further refer to the deletee (users) 223 | * in the patch-up process. Solution: use scratch space to 224 | * copy the deletee pointer, then the latter references are via that 225 | * scratch pointer rather than through the repointed (users) symbol. 226 | */ 227 | #define HASH_DELETE(hh,head,delptr) \ 228 | do { \ 229 | struct UT_hash_handle *_hd_hh_del; \ 230 | if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ 231 | uthash_free((head)->hh.tbl->buckets, \ 232 | (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 233 | HASH_BLOOM_FREE((head)->hh.tbl); \ 234 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 235 | head = NULL; \ 236 | } else { \ 237 | unsigned _hd_bkt; \ 238 | _hd_hh_del = &((delptr)->hh); \ 239 | if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ 240 | (head)->hh.tbl->tail = \ 241 | (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ 242 | (head)->hh.tbl->hho); \ 243 | } \ 244 | if ((delptr)->hh.prev != NULL) { \ 245 | ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ 246 | (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ 247 | } else { \ 248 | DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ 249 | } \ 250 | if (_hd_hh_del->next != NULL) { \ 251 | ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \ 252 | (head)->hh.tbl->hho))->prev = \ 253 | _hd_hh_del->prev; \ 254 | } \ 255 | HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ 256 | HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ 257 | (head)->hh.tbl->num_items--; \ 258 | } \ 259 | HASH_FSCK(hh,head); \ 260 | } while (0) 261 | 262 | 263 | /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ 264 | #define HASH_FIND_STR(head,findstr,out) \ 265 | HASH_FIND(hh,head,findstr,(unsigned)strlen(findstr),out) 266 | #define HASH_ADD_STR(head,strfield,add) \ 267 | HASH_ADD(hh,head,strfield[0],(unsigned int)strlen(add->strfield),add) 268 | #define HASH_REPLACE_STR(head,strfield,add,replaced) \ 269 | HASH_REPLACE(hh,head,strfield[0],(unsigned)strlen(add->strfield),add,replaced) 270 | #define HASH_FIND_INT(head,findint,out) \ 271 | HASH_FIND(hh,head,findint,sizeof(int),out) 272 | #define HASH_ADD_INT(head,intfield,add) \ 273 | HASH_ADD(hh,head,intfield,sizeof(int),add) 274 | #define HASH_REPLACE_INT(head,intfield,add,replaced) \ 275 | HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced) 276 | #define HASH_FIND_PTR(head,findptr,out) \ 277 | HASH_FIND(hh,head,findptr,sizeof(void *),out) 278 | #define HASH_ADD_PTR(head,ptrfield,add) \ 279 | HASH_ADD(hh,head,ptrfield,sizeof(void *),add) 280 | #define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \ 281 | HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced) 282 | #define HASH_DEL(head,delptr) \ 283 | HASH_DELETE(hh,head,delptr) 284 | 285 | /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. 286 | * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. 287 | */ 288 | #ifdef HASH_DEBUG 289 | #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) 290 | #define HASH_FSCK(hh,head) \ 291 | do { \ 292 | struct UT_hash_handle *_thh; \ 293 | if (head) { \ 294 | unsigned _bkt_i; \ 295 | unsigned _count; \ 296 | char *_prev; \ 297 | _count = 0; \ 298 | for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ 299 | unsigned _bkt_count = 0; \ 300 | _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ 301 | _prev = NULL; \ 302 | while (_thh) { \ 303 | if (_prev != (char*)(_thh->hh_prev)) { \ 304 | HASH_OOPS("invalid hh_prev %p, actual %p\n", \ 305 | _thh->hh_prev, _prev ); \ 306 | } \ 307 | _bkt_count++; \ 308 | _prev = (char*)(_thh); \ 309 | _thh = _thh->hh_next; \ 310 | } \ 311 | _count += _bkt_count; \ 312 | if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ 313 | HASH_OOPS("invalid bucket count %u, actual %u\n", \ 314 | (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ 315 | } \ 316 | } \ 317 | if (_count != (head)->hh.tbl->num_items) { \ 318 | HASH_OOPS("invalid hh item count %u, actual %u\n", \ 319 | (head)->hh.tbl->num_items, _count ); \ 320 | } \ 321 | /* traverse hh in app order; check next/prev integrity, count */ \ 322 | _count = 0; \ 323 | _prev = NULL; \ 324 | _thh = &(head)->hh; \ 325 | while (_thh) { \ 326 | _count++; \ 327 | if (_prev !=(char*)(_thh->prev)) { \ 328 | HASH_OOPS("invalid prev %p, actual %p\n", \ 329 | _thh->prev, _prev ); \ 330 | } \ 331 | _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ 332 | _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ 333 | (head)->hh.tbl->hho) : NULL ); \ 334 | } \ 335 | if (_count != (head)->hh.tbl->num_items) { \ 336 | HASH_OOPS("invalid app item count %u, actual %u\n", \ 337 | (head)->hh.tbl->num_items, _count ); \ 338 | } \ 339 | } \ 340 | } while (0) 341 | #else 342 | #define HASH_FSCK(hh,head) 343 | #endif 344 | 345 | /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to 346 | * the descriptor to which this macro is defined for tuning the hash function. 347 | * The app can #include to get the prototype for write(2). */ 348 | #ifdef HASH_EMIT_KEYS 349 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ 350 | do { \ 351 | unsigned _klen = fieldlen; \ 352 | write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ 353 | write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ 354 | } while (0) 355 | #else 356 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) 357 | #endif 358 | 359 | /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ 360 | #ifdef HASH_FUNCTION 361 | #define HASH_FCN HASH_FUNCTION 362 | #else 363 | #define HASH_FCN HASH_JEN 364 | #endif 365 | 366 | /* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ 367 | #define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ 368 | do { \ 369 | unsigned _hb_keylen=(unsigned)keylen; \ 370 | const unsigned char *_hb_key=(const unsigned char*)(key); \ 371 | (hashv) = 0; \ 372 | while (_hb_keylen-- != 0U) { \ 373 | (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ 374 | } \ 375 | bkt = (hashv) & (num_bkts-1U); \ 376 | } while (0) 377 | 378 | 379 | /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at 380 | * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ 381 | #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ 382 | do { \ 383 | unsigned _sx_i; \ 384 | const unsigned char *_hs_key=(const unsigned char*)(key); \ 385 | hashv = 0; \ 386 | for(_sx_i=0; _sx_i < keylen; _sx_i++) { \ 387 | hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ 388 | } \ 389 | bkt = hashv & (num_bkts-1U); \ 390 | } while (0) 391 | /* FNV-1a variation */ 392 | #define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ 393 | do { \ 394 | unsigned _fn_i; \ 395 | const unsigned char *_hf_key=(const unsigned char*)(key); \ 396 | hashv = 2166136261U; \ 397 | for(_fn_i=0; _fn_i < keylen; _fn_i++) { \ 398 | hashv = hashv ^ _hf_key[_fn_i]; \ 399 | hashv = hashv * 16777619U; \ 400 | } \ 401 | bkt = hashv & (num_bkts-1U); \ 402 | } while(0) 403 | 404 | #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ 405 | do { \ 406 | unsigned _ho_i; \ 407 | const unsigned char *_ho_key=(const unsigned char*)(key); \ 408 | hashv = 0; \ 409 | for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ 410 | hashv += _ho_key[_ho_i]; \ 411 | hashv += (hashv << 10); \ 412 | hashv ^= (hashv >> 6); \ 413 | } \ 414 | hashv += (hashv << 3); \ 415 | hashv ^= (hashv >> 11); \ 416 | hashv += (hashv << 15); \ 417 | bkt = hashv & (num_bkts-1U); \ 418 | } while(0) 419 | 420 | #define HASH_JEN_MIX(a,b,c) \ 421 | do { \ 422 | a -= b; a -= c; a ^= ( c >> 13 ); \ 423 | b -= c; b -= a; b ^= ( a << 8 ); \ 424 | c -= a; c -= b; c ^= ( b >> 13 ); \ 425 | a -= b; a -= c; a ^= ( c >> 12 ); \ 426 | b -= c; b -= a; b ^= ( a << 16 ); \ 427 | c -= a; c -= b; c ^= ( b >> 5 ); \ 428 | a -= b; a -= c; a ^= ( c >> 3 ); \ 429 | b -= c; b -= a; b ^= ( a << 10 ); \ 430 | c -= a; c -= b; c ^= ( b >> 15 ); \ 431 | } while (0) 432 | 433 | #define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ 434 | do { \ 435 | unsigned _hj_i,_hj_j,_hj_k; \ 436 | unsigned const char *_hj_key=(unsigned const char*)(key); \ 437 | hashv = 0xfeedbeefu; \ 438 | _hj_i = _hj_j = 0x9e3779b9u; \ 439 | _hj_k = (unsigned)(keylen); \ 440 | while (_hj_k >= 12U) { \ 441 | _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ 442 | + ( (unsigned)_hj_key[2] << 16 ) \ 443 | + ( (unsigned)_hj_key[3] << 24 ) ); \ 444 | _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ 445 | + ( (unsigned)_hj_key[6] << 16 ) \ 446 | + ( (unsigned)_hj_key[7] << 24 ) ); \ 447 | hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ 448 | + ( (unsigned)_hj_key[10] << 16 ) \ 449 | + ( (unsigned)_hj_key[11] << 24 ) ); \ 450 | \ 451 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 452 | \ 453 | _hj_key += 12; \ 454 | _hj_k -= 12U; \ 455 | } \ 456 | hashv += (unsigned)(keylen); \ 457 | switch ( _hj_k ) { \ 458 | case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */ \ 459 | case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); /* FALLTHROUGH */ \ 460 | case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); /* FALLTHROUGH */ \ 461 | case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); /* FALLTHROUGH */ \ 462 | case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); /* FALLTHROUGH */ \ 463 | case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); /* FALLTHROUGH */ \ 464 | case 5: _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ 465 | case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); /* FALLTHROUGH */ \ 466 | case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); /* FALLTHROUGH */ \ 467 | case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); /* FALLTHROUGH */ \ 468 | case 1: _hj_i += _hj_key[0]; \ 469 | } \ 470 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 471 | bkt = hashv & (num_bkts-1U); \ 472 | } while(0) 473 | 474 | /* The Paul Hsieh hash function */ 475 | #undef get16bits 476 | #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ 477 | || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) 478 | #define get16bits(d) (*((const uint16_t *) (d))) 479 | #endif 480 | 481 | #if !defined (get16bits) 482 | #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ 483 | +(uint32_t)(((const uint8_t *)(d))[0]) ) 484 | #endif 485 | #define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ 486 | do { \ 487 | unsigned const char *_sfh_key=(unsigned const char*)(key); \ 488 | uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \ 489 | \ 490 | unsigned _sfh_rem = _sfh_len & 3U; \ 491 | _sfh_len >>= 2; \ 492 | hashv = 0xcafebabeu; \ 493 | \ 494 | /* Main loop */ \ 495 | for (;_sfh_len > 0U; _sfh_len--) { \ 496 | hashv += get16bits (_sfh_key); \ 497 | _sfh_tmp = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv; \ 498 | hashv = (hashv << 16) ^ _sfh_tmp; \ 499 | _sfh_key += 2U*sizeof (uint16_t); \ 500 | hashv += hashv >> 11; \ 501 | } \ 502 | \ 503 | /* Handle end cases */ \ 504 | switch (_sfh_rem) { \ 505 | case 3: hashv += get16bits (_sfh_key); \ 506 | hashv ^= hashv << 16; \ 507 | hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18; \ 508 | hashv += hashv >> 11; \ 509 | break; \ 510 | case 2: hashv += get16bits (_sfh_key); \ 511 | hashv ^= hashv << 11; \ 512 | hashv += hashv >> 17; \ 513 | break; \ 514 | case 1: hashv += *_sfh_key; \ 515 | hashv ^= hashv << 10; \ 516 | hashv += hashv >> 1; \ 517 | } \ 518 | \ 519 | /* Force "avalanching" of final 127 bits */ \ 520 | hashv ^= hashv << 3; \ 521 | hashv += hashv >> 5; \ 522 | hashv ^= hashv << 4; \ 523 | hashv += hashv >> 17; \ 524 | hashv ^= hashv << 25; \ 525 | hashv += hashv >> 6; \ 526 | bkt = hashv & (num_bkts-1U); \ 527 | } while(0) 528 | 529 | #ifdef HASH_USING_NO_STRICT_ALIASING 530 | /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. 531 | * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. 532 | * MurmurHash uses the faster approach only on CPU's where we know it's safe. 533 | * 534 | * Note the preprocessor built-in defines can be emitted using: 535 | * 536 | * gcc -m64 -dM -E - < /dev/null (on gcc) 537 | * cc -## a.c (where a.c is a simple test file) (Sun Studio) 538 | */ 539 | #if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) 540 | #define MUR_GETBLOCK(p,i) p[i] 541 | #else /* non intel */ 542 | #define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL) 543 | #define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL) 544 | #define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL) 545 | #define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL) 546 | #define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) 547 | #if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) 548 | #define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) 549 | #define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) 550 | #define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) 551 | #else /* assume little endian non-intel */ 552 | #define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) 553 | #define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) 554 | #define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) 555 | #endif 556 | #define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ 557 | (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ 558 | (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ 559 | MUR_ONE_THREE(p)))) 560 | #endif 561 | #define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) 562 | #define MUR_FMIX(_h) \ 563 | do { \ 564 | _h ^= _h >> 16; \ 565 | _h *= 0x85ebca6bu; \ 566 | _h ^= _h >> 13; \ 567 | _h *= 0xc2b2ae35u; \ 568 | _h ^= _h >> 16; \ 569 | } while(0) 570 | 571 | #define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \ 572 | do { \ 573 | const uint8_t *_mur_data = (const uint8_t*)(key); \ 574 | const int _mur_nblocks = (int)(keylen) / 4; \ 575 | uint32_t _mur_h1 = 0xf88D5353u; \ 576 | uint32_t _mur_c1 = 0xcc9e2d51u; \ 577 | uint32_t _mur_c2 = 0x1b873593u; \ 578 | uint32_t _mur_k1 = 0; \ 579 | const uint8_t *_mur_tail; \ 580 | const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \ 581 | int _mur_i; \ 582 | for(_mur_i = -_mur_nblocks; _mur_i!=0; _mur_i++) { \ 583 | _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ 584 | _mur_k1 *= _mur_c1; \ 585 | _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 586 | _mur_k1 *= _mur_c2; \ 587 | \ 588 | _mur_h1 ^= _mur_k1; \ 589 | _mur_h1 = MUR_ROTL32(_mur_h1,13); \ 590 | _mur_h1 = (_mur_h1*5U) + 0xe6546b64u; \ 591 | } \ 592 | _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4)); \ 593 | _mur_k1=0; \ 594 | switch((keylen) & 3U) { \ 595 | case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \ 596 | case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8; /* FALLTHROUGH */ \ 597 | case 1: _mur_k1 ^= (uint32_t)_mur_tail[0]; \ 598 | _mur_k1 *= _mur_c1; \ 599 | _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 600 | _mur_k1 *= _mur_c2; \ 601 | _mur_h1 ^= _mur_k1; \ 602 | } \ 603 | _mur_h1 ^= (uint32_t)(keylen); \ 604 | MUR_FMIX(_mur_h1); \ 605 | hashv = _mur_h1; \ 606 | bkt = hashv & (num_bkts-1U); \ 607 | } while(0) 608 | #endif /* HASH_USING_NO_STRICT_ALIASING */ 609 | 610 | /* key comparison function; return 0 if keys equal */ 611 | #define HASH_KEYCMP(a,b,len) memcmp(a,b,(unsigned long)(len)) 612 | 613 | /* iterate over items in a known bucket to find desired item */ 614 | #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ 615 | do { \ 616 | if (head.hh_head != NULL) { DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); } \ 617 | else { out=NULL; } \ 618 | while (out != NULL) { \ 619 | if ((out)->hh.keylen == (keylen_in)) { \ 620 | if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) { break; } \ 621 | } \ 622 | if ((out)->hh.hh_next != NULL) { DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); } \ 623 | else { out = NULL; } \ 624 | } \ 625 | } while(0) 626 | 627 | /* add an item to a bucket */ 628 | #define HASH_ADD_TO_BKT(head,addhh) \ 629 | do { \ 630 | head.count++; \ 631 | (addhh)->hh_next = head.hh_head; \ 632 | (addhh)->hh_prev = NULL; \ 633 | if (head.hh_head != NULL) { (head).hh_head->hh_prev = (addhh); } \ 634 | (head).hh_head=addhh; \ 635 | if ((head.count >= ((head.expand_mult+1U) * HASH_BKT_CAPACITY_THRESH)) \ 636 | && ((addhh)->tbl->noexpand != 1U)) { \ 637 | HASH_EXPAND_BUCKETS((addhh)->tbl); \ 638 | } \ 639 | } while(0) 640 | 641 | /* remove an item from a given bucket */ 642 | #define HASH_DEL_IN_BKT(hh,head,hh_del) \ 643 | (head).count--; \ 644 | if ((head).hh_head == hh_del) { \ 645 | (head).hh_head = hh_del->hh_next; \ 646 | } \ 647 | if (hh_del->hh_prev) { \ 648 | hh_del->hh_prev->hh_next = hh_del->hh_next; \ 649 | } \ 650 | if (hh_del->hh_next) { \ 651 | hh_del->hh_next->hh_prev = hh_del->hh_prev; \ 652 | } 653 | 654 | /* Bucket expansion has the effect of doubling the number of buckets 655 | * and redistributing the items into the new buckets. Ideally the 656 | * items will distribute more or less evenly into the new buckets 657 | * (the extent to which this is true is a measure of the quality of 658 | * the hash function as it applies to the key domain). 659 | * 660 | * With the items distributed into more buckets, the chain length 661 | * (item count) in each bucket is reduced. Thus by expanding buckets 662 | * the hash keeps a bound on the chain length. This bounded chain 663 | * length is the essence of how a hash provides constant time lookup. 664 | * 665 | * The calculation of tbl->ideal_chain_maxlen below deserves some 666 | * explanation. First, keep in mind that we're calculating the ideal 667 | * maximum chain length based on the *new* (doubled) bucket count. 668 | * In fractions this is just n/b (n=number of items,b=new num buckets). 669 | * Since the ideal chain length is an integer, we want to calculate 670 | * ceil(n/b). We don't depend on floating point arithmetic in this 671 | * hash, so to calculate ceil(n/b) with integers we could write 672 | * 673 | * ceil(n/b) = (n/b) + ((n%b)?1:0) 674 | * 675 | * and in fact a previous version of this hash did just that. 676 | * But now we have improved things a bit by recognizing that b is 677 | * always a power of two. We keep its base 2 log handy (call it lb), 678 | * so now we can write this with a bit shift and logical AND: 679 | * 680 | * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) 681 | * 682 | */ 683 | #define HASH_EXPAND_BUCKETS(tbl) \ 684 | do { \ 685 | unsigned _he_bkt; \ 686 | unsigned _he_bkt_i; \ 687 | struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ 688 | UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ 689 | _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ 690 | 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 691 | if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ 692 | memset(_he_new_buckets, 0, \ 693 | 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 694 | tbl->ideal_chain_maxlen = \ 695 | (tbl->num_items >> (tbl->log2_num_buckets+1U)) + \ 696 | (((tbl->num_items & ((tbl->num_buckets*2U)-1U)) != 0U) ? 1U : 0U); \ 697 | tbl->nonideal_items = 0; \ 698 | for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ 699 | { \ 700 | _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ 701 | while (_he_thh != NULL) { \ 702 | _he_hh_nxt = _he_thh->hh_next; \ 703 | HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2U, _he_bkt); \ 704 | _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ 705 | if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ 706 | tbl->nonideal_items++; \ 707 | _he_newbkt->expand_mult = _he_newbkt->count / \ 708 | tbl->ideal_chain_maxlen; \ 709 | } \ 710 | _he_thh->hh_prev = NULL; \ 711 | _he_thh->hh_next = _he_newbkt->hh_head; \ 712 | if (_he_newbkt->hh_head != NULL) { _he_newbkt->hh_head->hh_prev = \ 713 | _he_thh; } \ 714 | _he_newbkt->hh_head = _he_thh; \ 715 | _he_thh = _he_hh_nxt; \ 716 | } \ 717 | } \ 718 | uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 719 | tbl->num_buckets *= 2U; \ 720 | tbl->log2_num_buckets++; \ 721 | tbl->buckets = _he_new_buckets; \ 722 | tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ 723 | (tbl->ineff_expands+1U) : 0U; \ 724 | if (tbl->ineff_expands > 1U) { \ 725 | tbl->noexpand=1; \ 726 | uthash_noexpand_fyi(tbl); \ 727 | } \ 728 | uthash_expand_fyi(tbl); \ 729 | } while(0) 730 | 731 | 732 | /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ 733 | /* Note that HASH_SORT assumes the hash handle name to be hh. 734 | * HASH_SRT was added to allow the hash handle name to be passed in. */ 735 | #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) 736 | #define HASH_SRT(hh,head,cmpfcn) \ 737 | do { \ 738 | unsigned _hs_i; \ 739 | unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ 740 | struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ 741 | if (head != NULL) { \ 742 | _hs_insize = 1; \ 743 | _hs_looping = 1; \ 744 | _hs_list = &((head)->hh); \ 745 | while (_hs_looping != 0U) { \ 746 | _hs_p = _hs_list; \ 747 | _hs_list = NULL; \ 748 | _hs_tail = NULL; \ 749 | _hs_nmerges = 0; \ 750 | while (_hs_p != NULL) { \ 751 | _hs_nmerges++; \ 752 | _hs_q = _hs_p; \ 753 | _hs_psize = 0; \ 754 | for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ 755 | _hs_psize++; \ 756 | _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ 757 | ((void*)((char*)(_hs_q->next) + \ 758 | (head)->hh.tbl->hho)) : NULL); \ 759 | if (! (_hs_q) ) { break; } \ 760 | } \ 761 | _hs_qsize = _hs_insize; \ 762 | while ((_hs_psize > 0U) || ((_hs_qsize > 0U) && (_hs_q != NULL))) {\ 763 | if (_hs_psize == 0U) { \ 764 | _hs_e = _hs_q; \ 765 | _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ 766 | ((void*)((char*)(_hs_q->next) + \ 767 | (head)->hh.tbl->hho)) : NULL); \ 768 | _hs_qsize--; \ 769 | } else if ( (_hs_qsize == 0U) || (_hs_q == NULL) ) { \ 770 | _hs_e = _hs_p; \ 771 | if (_hs_p != NULL){ \ 772 | _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ 773 | ((void*)((char*)(_hs_p->next) + \ 774 | (head)->hh.tbl->hho)) : NULL); \ 775 | } \ 776 | _hs_psize--; \ 777 | } else if (( \ 778 | cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ 779 | DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ 780 | ) <= 0) { \ 781 | _hs_e = _hs_p; \ 782 | if (_hs_p != NULL){ \ 783 | _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ 784 | ((void*)((char*)(_hs_p->next) + \ 785 | (head)->hh.tbl->hho)) : NULL); \ 786 | } \ 787 | _hs_psize--; \ 788 | } else { \ 789 | _hs_e = _hs_q; \ 790 | _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ 791 | ((void*)((char*)(_hs_q->next) + \ 792 | (head)->hh.tbl->hho)) : NULL); \ 793 | _hs_qsize--; \ 794 | } \ 795 | if ( _hs_tail != NULL ) { \ 796 | _hs_tail->next = ((_hs_e != NULL) ? \ 797 | ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ 798 | } else { \ 799 | _hs_list = _hs_e; \ 800 | } \ 801 | if (_hs_e != NULL) { \ 802 | _hs_e->prev = ((_hs_tail != NULL) ? \ 803 | ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ 804 | } \ 805 | _hs_tail = _hs_e; \ 806 | } \ 807 | _hs_p = _hs_q; \ 808 | } \ 809 | if (_hs_tail != NULL){ \ 810 | _hs_tail->next = NULL; \ 811 | } \ 812 | if ( _hs_nmerges <= 1U ) { \ 813 | _hs_looping=0; \ 814 | (head)->hh.tbl->tail = _hs_tail; \ 815 | DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ 816 | } \ 817 | _hs_insize *= 2U; \ 818 | } \ 819 | HASH_FSCK(hh,head); \ 820 | } \ 821 | } while (0) 822 | 823 | /* This function selects items from one hash into another hash. 824 | * The end result is that the selected items have dual presence 825 | * in both hashes. There is no copy of the items made; rather 826 | * they are added into the new hash through a secondary hash 827 | * hash handle that must be present in the structure. */ 828 | #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ 829 | do { \ 830 | unsigned _src_bkt, _dst_bkt; \ 831 | void *_last_elt=NULL, *_elt; \ 832 | UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ 833 | ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ 834 | if (src != NULL) { \ 835 | for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ 836 | for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ 837 | _src_hh != NULL; \ 838 | _src_hh = _src_hh->hh_next) { \ 839 | _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ 840 | if (cond(_elt)) { \ 841 | _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ 842 | _dst_hh->key = _src_hh->key; \ 843 | _dst_hh->keylen = _src_hh->keylen; \ 844 | _dst_hh->hashv = _src_hh->hashv; \ 845 | _dst_hh->prev = _last_elt; \ 846 | _dst_hh->next = NULL; \ 847 | if (_last_elt_hh != NULL) { _last_elt_hh->next = _elt; } \ 848 | if (dst == NULL) { \ 849 | DECLTYPE_ASSIGN(dst,_elt); \ 850 | HASH_MAKE_TABLE(hh_dst,dst); \ 851 | } else { \ 852 | _dst_hh->tbl = (dst)->hh_dst.tbl; \ 853 | } \ 854 | HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ 855 | HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ 856 | (dst)->hh_dst.tbl->num_items++; \ 857 | _last_elt = _elt; \ 858 | _last_elt_hh = _dst_hh; \ 859 | } \ 860 | } \ 861 | } \ 862 | } \ 863 | HASH_FSCK(hh_dst,dst); \ 864 | } while (0) 865 | 866 | #define HASH_CLEAR(hh,head) \ 867 | do { \ 868 | if (head != NULL) { \ 869 | uthash_free((head)->hh.tbl->buckets, \ 870 | (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ 871 | HASH_BLOOM_FREE((head)->hh.tbl); \ 872 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 873 | (head)=NULL; \ 874 | } \ 875 | } while(0) 876 | 877 | #define HASH_OVERHEAD(hh,head) \ 878 | ((head != NULL) ? ( \ 879 | (size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ 880 | ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ 881 | sizeof(UT_hash_table) + \ 882 | (HASH_BLOOM_BYTELEN))) : 0U) 883 | 884 | #ifdef NO_DECLTYPE 885 | #define HASH_ITER(hh,head,el,tmp) \ 886 | for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \ 887 | (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL))) 888 | #else 889 | #define HASH_ITER(hh,head,el,tmp) \ 890 | for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL)); \ 891 | (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL))) 892 | #endif 893 | 894 | /* obtain a count of items in the hash */ 895 | #define HASH_COUNT(head) HASH_CNT(hh,head) 896 | #define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U) 897 | 898 | typedef struct UT_hash_bucket { 899 | struct UT_hash_handle *hh_head; 900 | unsigned count; 901 | 902 | /* expand_mult is normally set to 0. In this situation, the max chain length 903 | * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If 904 | * the bucket's chain exceeds this length, bucket expansion is triggered). 905 | * However, setting expand_mult to a non-zero value delays bucket expansion 906 | * (that would be triggered by additions to this particular bucket) 907 | * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. 908 | * (The multiplier is simply expand_mult+1). The whole idea of this 909 | * multiplier is to reduce bucket expansions, since they are expensive, in 910 | * situations where we know that a particular bucket tends to be overused. 911 | * It is better to let its chain length grow to a longer yet-still-bounded 912 | * value, than to do an O(n) bucket expansion too often. 913 | */ 914 | unsigned expand_mult; 915 | 916 | } UT_hash_bucket; 917 | 918 | /* random signature used only to find hash tables in external analysis */ 919 | #define HASH_SIGNATURE 0xa0111fe1u 920 | #define HASH_BLOOM_SIGNATURE 0xb12220f2u 921 | 922 | typedef struct UT_hash_table { 923 | UT_hash_bucket *buckets; 924 | unsigned num_buckets, log2_num_buckets; 925 | unsigned num_items; 926 | struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ 927 | ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ 928 | 929 | /* in an ideal situation (all buckets used equally), no bucket would have 930 | * more than ceil(#items/#buckets) items. that's the ideal chain length. */ 931 | unsigned ideal_chain_maxlen; 932 | 933 | /* nonideal_items is the number of items in the hash whose chain position 934 | * exceeds the ideal chain maxlen. these items pay the penalty for an uneven 935 | * hash distribution; reaching them in a chain traversal takes >ideal steps */ 936 | unsigned nonideal_items; 937 | 938 | /* ineffective expands occur when a bucket doubling was performed, but 939 | * afterward, more than half the items in the hash had nonideal chain 940 | * positions. If this happens on two consecutive expansions we inhibit any 941 | * further expansion, as it's not helping; this happens when the hash 942 | * function isn't a good fit for the key domain. When expansion is inhibited 943 | * the hash will still work, albeit no longer in constant time. */ 944 | unsigned ineff_expands, noexpand; 945 | 946 | uint32_t signature; /* used only to find hash tables in external analysis */ 947 | #ifdef HASH_BLOOM 948 | uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ 949 | uint8_t *bloom_bv; 950 | uint8_t bloom_nbits; 951 | #endif 952 | 953 | } UT_hash_table; 954 | 955 | typedef struct UT_hash_handle { 956 | struct UT_hash_table *tbl; 957 | void *prev; /* prev element in app order */ 958 | void *next; /* next element in app order */ 959 | struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ 960 | struct UT_hash_handle *hh_next; /* next hh in bucket order */ 961 | void *key; /* ptr to enclosing struct's key */ 962 | unsigned keylen; /* enclosing struct's key len */ 963 | unsigned hashv; /* result of hash-fcn(key) */ 964 | } UT_hash_handle; 965 | 966 | #endif /* UTHASH_H */ 967 | -------------------------------------------------------------------------------- /core/.gitignore: -------------------------------------------------------------------------------- 1 | # 2 | # NOTE! Don't add files that are generated in specific 3 | # subdirectories here. Add them in the ".gitignore" file 4 | # in that subdirectory instead. 5 | # 6 | # NOTE! Please use 'git ls-files -i --exclude-standard' 7 | # command after changing this file, to see if there are 8 | # any tracked files which get ignored after the change. 9 | # 10 | # Normal rules 11 | # 12 | .* 13 | *.o 14 | *.o.* 15 | *.a 16 | *.s 17 | *.ko 18 | *.so 19 | *.so.dbg 20 | *.mod.c 21 | *.i 22 | *.lst 23 | *.symtypes 24 | *.order 25 | modules.builtin 26 | *.elf 27 | *.bin 28 | *.gz 29 | *.bz2 30 | *.lzma 31 | *.xz 32 | *.lz4 33 | *.lzo 34 | *.patch 35 | *.gcno 36 | 37 | # 38 | # Top-level generic files 39 | # 40 | /tags 41 | /TAGS 42 | /System.map 43 | /Module.markers 44 | /Module.symvers 45 | 46 | # 47 | # git files that we don't want to ignore even it they are dot-files 48 | # 49 | !.gitignore 50 | !.mailmap 51 | 52 | # cscope files 53 | cscope.* 54 | ncscope.* 55 | 56 | # gnu global files 57 | GPATH 58 | GRTAGS 59 | GSYMS 60 | GTAGS 61 | 62 | *.orig 63 | *~ 64 | \#*# 65 | -------------------------------------------------------------------------------- /core/Makefile: -------------------------------------------------------------------------------- 1 | obj-m := lite_internal.o lite_api.o lite_test.o 2 | lite_internal-objs := lite_core.o lite_internal_tool.o 3 | 4 | all: 5 | make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules 6 | clean: 7 | make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean 8 | rm lite*.o -f 9 | -------------------------------------------------------------------------------- /core/README.md: -------------------------------------------------------------------------------- 1 | This is LITE module. More details are in /README.md 2 | For userspace example, please check `lite-userspace/` for more details. 3 | 4 | *current limitation* 5 | 1. remote memset doesn't support multiple MR under one LMR now. Only can interact with the first LMR 6 | 2. remote memset only does zero to clear the memspace. Currently, it doesn't take any input characters 7 | 3. becayse the system is optimized for send-reply, we have limitations in processing send-only (if the receiver is slower than sender for 4096 packets, error will happen) 8 | 4. send-reply doesn't support local channel now 9 | 5. ibapi_send_reply_imm_multisge (multicast send-reply api) is only available for kernel space application. And this is a wrap-up for our send-reply function. It doesn't optimized significantly. And most of the features of send-reply don't support this api including multiple MR under one LMR, local_send-reply, and send-only request 10 | 6. one LMR can only support upto 128MB. It's defined in lite.h 11 | 7. a port is reserved by one thread only. Different threads should have a synchronization way when calling receive function. 12 | -------------------------------------------------------------------------------- /core/doxygen/doxygen.sh: -------------------------------------------------------------------------------- 1 | doxygen LITE-doxygen 2 | -------------------------------------------------------------------------------- /core/lite.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 | * 4 | * This software is available to you under a choice of one of two 5 | * licenses. You may choose to be licensed under the terms of the GNU 6 | * General Public License (GPL) Version 2, available from the file 7 | * COPYING in the main directory of this source tree, or the 8 | * OpenIB.org BSD license below: 9 | * 10 | * Redistribution and use in source and binary forms, with or 11 | * without modification, are permitted provided that the following 12 | * conditions are met: 13 | * 14 | * - Redistributions of source code must retain the above 15 | * copyright notice, this list of conditions and the following 16 | * disclaimer. 17 | * 18 | * - Redistributions in binary form must reproduce the above 19 | * copyright notice, this list of conditions and the following 20 | * disclaimer in the documentation and/or other materials 21 | * provided with the distribution. 22 | * 23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 | * SOFTWARE. 31 | */ 32 | 33 | 34 | #ifndef HAVE_CLIENT_H 35 | #define HAVE_CLIENT_H 36 | 37 | 38 | //This is the version modified from 000be840c215d5da3011a2c7b486d5ae122540c4 39 | //It adds LOCKS, sge, and other things into the system 40 | //Client.h is also modified. 41 | //Server is also modified to match this patch 42 | //Patch SERIAL_VERSION_ID: 04202300 43 | //Please make sure that this version is not fully tested inside dsnvm (interactions are not fully tested) 44 | 45 | 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | #include 52 | #include 53 | #include 54 | #include 55 | #include 56 | #include 57 | #include 58 | #include 59 | #include 60 | #include 61 | #include 62 | #include 63 | #include 64 | //#include 65 | //#include 66 | #include 67 | #include 68 | #include 69 | #include 70 | #include 71 | #include 72 | #include 73 | #include 74 | #include 75 | #include 76 | #include 77 | #include 78 | #include 79 | #include 80 | #include 81 | #include 82 | #include 83 | 84 | #include 85 | #include 86 | #include 87 | #include 88 | #include 89 | #include 90 | #include 91 | #include 92 | //#include 93 | 94 | //#include "dsnvm-common.h" 95 | 96 | #include 97 | #include 98 | #include 99 | #include 100 | // 101 | #include "lite_syscall.h" 102 | //#include "lite_test.h" 103 | #define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S)) 104 | #define MIN(a,b) (((a)<(b))?(a):(b)) 105 | #define MAX(a,b) (((a)>(b))?(a):(b)) 106 | 107 | 108 | #define DEBUG_SHINYEH 109 | 110 | //#define LITE_ROCE 111 | 112 | #ifdef LITE_ROCE 113 | #define SGID_INDEX 0 114 | #else 115 | #define SGID_INDEX -1 116 | #endif 117 | 118 | #define MAX_LITE_NUM 4 119 | #define MESSAGE_SIZE 4096 120 | 121 | #define LITE_USERSPACE_FLAG 1 122 | #define LITE_KERNELSPACE_FLAG 0 123 | #define LITE_LINUX_PAGE_OFFSET 0x00000fff 124 | #define LITE_LINUX_PAGE_SIZE 4096 125 | 126 | #define CIRCULAR_BUFFER_LENGTH 256 127 | 128 | #define MAX_NODE 32 129 | #define MAX_NODE_BIT 5 130 | 131 | #define LISTEN_PORT 18500 132 | 133 | #define RECV_DEPTH 256 134 | #define CONNECTION_ID_PUSH_BITS_BASED_ON_RECV_DEPTH 8 135 | #define NUM_PARALLEL_CONNECTION 4 136 | #define GET_NODE_ID_FROM_POST_RECEIVE_ID(id) (id>>8)/NUM_PARALLEL_CONNECTION 137 | #define GET_POST_RECEIVE_DEPTH_FROM_POST_RECEIVE_ID(id) (id&0x000000ff) 138 | 139 | #ifdef LITE_ROCE 140 | #define LITE_MTU IB_MTU_1024 141 | #else 142 | #define LITE_MTU IB_MTU_4096 143 | #endif 144 | 145 | //#define LITE_GET_TIME 146 | //#define LITE_GET_TIME_MULTISGE 147 | 148 | #define UD_QP_SL 1 149 | 150 | #define LID_SEND_RECV_FORMAT "0000:0000:000000:000000:00000000000000000000000000000000" 151 | #define NUM_POLLING_THREADS 1 152 | #define NUM_POLLING_WC 32 153 | #define MAX_CONNECTION MAX_NODE * NUM_PARALLEL_CONNECTION //Assume that MAX_CONNECTION is smaller than 256 154 | #define MAX_PARALLEL_THREAD 64 155 | #define WRAP_UP_NUM_FOR_WRID 256 //since there are 64 bits in wr_id, we are going to use 9-12 bits to do thread id waiting passing 156 | #define WRAP_UP_NUM_FOR_CIRCULAR_ID 256 157 | #define WRAP_UP_NUM_FOR_WAITING_INBOX 256 158 | #define WRAP_UP_NUM_FOR_TYPE 65536 //since there are 64 bits in wr_id, we are going to use 9-12 bits to do thread id waiting passing 159 | //const int MESSAGE_SIZE = 4096; 160 | //const int CIRCULAR_BUFFER_LENGTH = 256; 161 | //const int MAX_NODE = 4; 162 | #define POST_RECEIVE_CACHE_SIZE 2048 163 | #define SERVER_ID 0 164 | 165 | 166 | #define HIGH_PRIORITY 4 167 | #define LOW_PRIORITY 0 168 | #define KEY_PRIORITY 8 169 | #define USERSPACE_LOW_PRIORITY_DELAY 64 170 | #define USERSPACE_LOW_PRIORITY_THRESHOLD 2 171 | #define USERSPACE_HIGH_PRIORITY 16 172 | #define USERSPACE_LOW_PRIORITY 17 173 | #define CONGESTION_ALERT 2 174 | #define CONGESTION_WARNING 1 175 | #define CONGESTION_FREE 0 176 | 177 | #define PRIORITY_START 1 178 | #define PRIORITY_END 2 179 | 180 | #define PRIORITY_CHECKING_PERIOD_US 100 181 | #define PRIORITY_CHECKING_THRESHOLD_US 32 182 | #define PRIORITY_CHECKING_THRESHOLD_COUNTER 250 183 | 184 | #define PRIORITY_WRITE 1 185 | #define PRIORITY_READ 2 186 | #define PRIORITY_SR 3 187 | 188 | //MULTICAST RELATED 189 | #define MAX_MULTICAST_HOP 16 190 | #define MAX_LENGTH_OF_ATOMIC 256 191 | 192 | //ASYIO RELATED 193 | #define RING_BUFFER_LENGTH 1024 194 | #define RING_BUFFER_MAXSIZE 4096 195 | #define REMOTE_MEMORY_PAGE_SIZE RING_BUFFER_MAXSIZE 196 | #define INTERARRIVAL_UNLESS_FENCE 1 197 | #define ASY_SETUP_COMPLETE true 198 | 199 | //alloc continuous memory related 200 | #define LITE_MEM_OFFSET 0x100000000 201 | 202 | // IMM_ related things 203 | #define NUM_OF_CORES 2 204 | //Model 2 --> 2-6-24 (Send-recv-opcode, port, offset) 205 | #define IMM_SEND_REPLY_SEND 0x80000000 206 | #define IMM_SEND_REPLY_RECV 0x40000000 207 | #define IMM_SEND_ONLY_FLAG 0xffffffffffffffff 208 | #define IMM_PORT_PUSH_BIT 24 209 | #define IMM_GET_PORT_NUMBER(imm) (imm<<2)>>26 210 | #define IMM_GET_OFFSET 0x00ffffff 211 | //#define IMM_GET_SEMAPHORE 0x3fffffff 212 | #define IMM_GET_SEMAPHORE 0x00ffffff 213 | #define IMM_GET_OPCODE 0x0f000000 214 | #define IMM_GET_OPCODE_NUMBER(imm) (imm<<4)>>28 215 | #define IMM_DATA_BIT 32 216 | #define IMM_NUM_OF_SEMAPHORE 64 217 | #define IMM_MAX_PORT 64 218 | #define IMM_MAX_PORT_BIT 6 219 | #define IMM_MAX_PORT_BITMASK 0x3F 220 | #define IMM_MAX_PRIORITY 64 221 | #define IMM_MAX_PRIORITY_BIT 6 222 | #define IMM_MAX_PRIORITY_BITMASK 0x3F 223 | 224 | #define IMM_MAX_SGE_LENGTH 31 225 | 226 | #define IMM_MAX_SIZE IMM_PORT_CACHE_SIZE/NUM_OF_CORES 227 | #define IMM_SEND_SLEEP_SIZE_THRESHOLD 40960 228 | #define IMM_SEND_SLEEP_TIME_THRESHOLD 10 229 | #define IMM_ROUND_UP 4096 230 | //#define IMM_PORT_CACHE_SIZE 1024*1024*4 231 | #define IMM_PORT_CACHE_SIZE 4194304 // 1024*1024*4 232 | #define IMM_ACK_PORTION 4 233 | 234 | //Lock related 235 | #define LITE_MAX_LOCK_NUM 1024 236 | #define LITE_MAX_WAIT_QUEUE 64 237 | 238 | //Memory Related 239 | #define LITE_MEMORY_BLOCK 4194304 //1024*1024*4 240 | #ifdef LITE_ROCE 241 | #define LITE_MAX_MEMORY_BLOCK 16 //4MB * 16 = 64MB 242 | #else 243 | #define LITE_MAX_MEMORY_BLOCK 32 //4MB * 32 = 128MB 244 | #endif 245 | 246 | 247 | 248 | //struct semaphore atomic_accessing_lock[MAX_NODE]; 249 | //struct semaphore mr_mutex; 250 | //struct semaphore get_thread_waiting_number_semaphore; 251 | //struct semaphore get_thread_waiting_number_mutex; 252 | //struct semaphore send_reply_wait_semaphore; 253 | //struct semaphore send_reply_wait_mutex; 254 | 255 | inline void get_time_start(void); 256 | void get_time_end(void); 257 | inline void get_cycle_start(void); 258 | void get_cycle_end(void); 259 | 260 | #define SEND_REPLY_WAIT -101 261 | #define SEND_REPLY_EMPTY -102 262 | #define SEND_REPLY_PORT_NOT_OPENED -103 263 | #define SEND_REPLY_PORT_IS_FULL -104 264 | #define SEND_REPLY_SIZE_TOO_BIG -105 265 | #define SEND_REPLY_FAIL -106 266 | #define SEND_REPLY_ACK 0 267 | 268 | enum mode { 269 | M_WRITE, 270 | M_READ, 271 | LITE_SEND_MESSAGE_IMM_ONLY, 272 | LITE_SEND_MESSAGE_HEADER_AND_IMM, 273 | LITE_SEND_MESSAGE_HEADER_ONLY 274 | }; 275 | 276 | enum lock_state{ 277 | LOCK_AVAILABLE, 278 | LOCK_GET_LOCK, 279 | UNLOCK_ALREADY_ARRIVED, 280 | WAIT_FOR_UNLOCK 281 | // LOCK_USED, 282 | // LOCK_AVAILABLE, 283 | // LOCK_LOCK, 284 | // LOCK_ASSIGNED 285 | }; 286 | 287 | struct liteapi_post_receive_intermediate_struct 288 | { 289 | uintptr_t header; 290 | uintptr_t msg; 291 | }; 292 | 293 | struct liteapi_header{ 294 | uint32_t src_id; 295 | uint64_t store_addr; 296 | uint64_t store_semaphore; 297 | uint32_t length; 298 | int priority; 299 | int type; 300 | }; 301 | 302 | struct lmr_info { 303 | //struct ib_device *context; 304 | //struct ib_pd *pd; 305 | void *addr; 306 | size_t length; 307 | //uint32_t handle; 308 | uint32_t lkey; 309 | uint32_t rkey; 310 | uint32_t node_id; 311 | }; 312 | 313 | #define LITE_PAGE_SHIFT 12 314 | #define LITE_PAGE_SIZE (1UL << LITE_PAGE_SHIFT) 315 | 316 | struct max_reply_msg { 317 | char msg[LITE_PAGE_SIZE]; 318 | int length; 319 | }; 320 | 321 | struct atomic_struct{ 322 | void *vaddr; 323 | size_t len; 324 | }; 325 | 326 | /*struct hash_lmr_info{ 327 | uint32_t node_id; 328 | int size; 329 | struct lmr_info *data; 330 | uint64_t hash_key; 331 | struct hlist_node hlist; 332 | };*/ 333 | 334 | struct hash_asyio_key{ 335 | uint32_t node_id; 336 | int size;//total length 337 | struct lmr_info **datalist; 338 | int list_length; 339 | 340 | uint64_t permission; 341 | 342 | int initialized_flag; 343 | int count; 344 | int mr_local_index;//For hash usage 345 | unsigned long *bitmap; 346 | unsigned long bitmap_size; 347 | unsigned long *askmr_bitmap; 348 | int link_flag; 349 | 350 | struct hlist_node hlist; 351 | struct list_head list; 352 | 353 | uint64_t lite_handler; 354 | int priority; 355 | 356 | int password; 357 | }; 358 | 359 | struct hash_page_key{ 360 | char *addr; 361 | int dirty_flag; 362 | int link_flag; 363 | 364 | int target_node; 365 | uint64_t lite_handler; 366 | uint64_t hash_key; 367 | int offset; 368 | int priority; 369 | uint32_t page_num; 370 | struct hash_asyio_key *mother_addr; 371 | 372 | struct hlist_node hlist; 373 | }; 374 | 375 | struct hash_mraddr_to_lmr_metadata{ 376 | struct hash_asyio_key *mother_addr; 377 | uint64_t hash_key;//actually it's mr.addr 378 | uint64_t lmr; 379 | struct hlist_node hlist; 380 | }; 381 | 382 | struct ask_mr_form{ 383 | uint64_t identifier; 384 | //int identifier_length; 385 | uint64_t permission; 386 | unsigned int designed_port; 387 | }; 388 | 389 | struct ask_mr_table{ 390 | uint64_t lmr; 391 | uint64_t identifier; 392 | uint64_t permission; 393 | uint64_t hash_key; 394 | struct hlist_node hlist; 395 | }; 396 | 397 | struct ask_mr_reply_form{ 398 | uint64_t op_code; 399 | int total_length; 400 | int node_id; 401 | uint64_t permission; 402 | uint64_t list_length; 403 | struct lmr_info reply_mr[LITE_MAX_MEMORY_BLOCK]; 404 | }; 405 | 406 | struct mr_request_form{ 407 | struct lmr_info request_mr; 408 | struct lmr_info copyto_mr; 409 | uint64_t offset; 410 | uint64_t copyto_offset; 411 | uint64_t size; 412 | uint64_t op_code; 413 | }; 414 | 415 | enum register_application_port_ret{ 416 | REG_FAIL = -1, 417 | REG_PORT_TOO_LARGE = -2, 418 | REG_SIZE_TOO_LARGE = -3, 419 | REG_NAME_TOO_LONG = -4, 420 | REG_PORT_OCCUPIED = -5, 421 | REG_DO_QUERY_FIRST = -6, 422 | REG_DO_LOCAL_SEND = -7 423 | }; 424 | 425 | struct app_reg_port{ 426 | struct lmr_info ring_mr; 427 | unsigned int port; 428 | unsigned int node; 429 | uint64_t hash_key; 430 | uint64_t port_node_key; 431 | void *addr; 432 | char name[32]; 433 | struct hlist_node hlist; 434 | int remote_imm_ring_index; 435 | spinlock_t remote_imm_offset_lock; 436 | uint64_t last_ack_index; 437 | spinlock_t last_ack_index_lock; 438 | }; 439 | 440 | struct imm_ack_form{ 441 | int node_id; 442 | unsigned int designed_port; 443 | int ack_offset; 444 | }; 445 | 446 | struct lite_lock_form{ 447 | int lock_num; 448 | struct lmr_info lock_mr; 449 | uint64_t ticket_num; 450 | }; 451 | 452 | typedef struct lite_lock_form remote_spinlock_t; 453 | 454 | struct lite_lock_reserve_form{ 455 | int lock_num; 456 | uint64_t ticket_num; 457 | }; 458 | 459 | struct lite_lock_queue_element{ 460 | uint64_t store_addr; 461 | uint64_t store_semaphore; 462 | uint32_t src_id; 463 | unsigned int ticket_num; 464 | int lock_num; 465 | int state; 466 | int tar_lock_index; 467 | struct hlist_node hlist; 468 | }; 469 | 470 | enum mr_request_op_code{ 471 | OP_REMOTE_MEMSET=0, 472 | OP_REMOTE_MEMCPY=1, 473 | OP_REMOTE_REREGISTER=2, 474 | OP_REMOTE_DEREGISTER=3, 475 | OP_REMOTE_FREE=4, 476 | OP_REMOTE_MEMMOV=5 477 | }; 478 | 479 | enum permission_mode{ 480 | MR_READ_FLAG=0x01, 481 | MR_WRITE_FLAG=0x02, 482 | MR_SHARE_FLAG=0x04, 483 | MR_ADMIN_FLAG=0x08, 484 | MR_ATOMIC_FLAG=0x10, 485 | MR_ASK_SUCCESS=0, 486 | MR_ASK_REFUSE=1, 487 | MR_ASK_UNPERMITTED=2, 488 | MR_ASK_HANDLER_ERROR=3, 489 | MR_ASK_UNKNOWN=4 490 | }; 491 | 492 | 493 | enum asy_page_dirty_status{ 494 | ASY_PAGE_DIRTY=1, 495 | ASY_PAGE_CLEAN=0 496 | }; 497 | 498 | enum asy_page_link_status{ 499 | ASY_PAGE_LINK=1, 500 | ASY_PAGE_UNLINK=0 501 | }; 502 | 503 | struct asy_page_fence_linked_list_entry{ 504 | struct hash_page_key *pag_addr; 505 | struct list_head list; 506 | }; 507 | 508 | struct send_and_reply_format 509 | { 510 | uint32_t src_id; 511 | uint64_t store_addr; 512 | uint64_t store_semaphore; 513 | uint32_t length; 514 | int type; 515 | char *msg; 516 | int priority; 517 | struct list_head list; 518 | }; 519 | 520 | 521 | 522 | #define QUEUE_ACK 0 523 | #define QUEUE_POST_RECV 1 524 | #define QUEUE_HIGH 2 525 | #define QUEUE_MEDIUM 3 526 | #define QUEUE_LOW 4 527 | #define QUEUE_NUM_OF_QUEUE 5 528 | 529 | enum { 530 | MSG_MR, 531 | MSG_DONE, 532 | MSG_NODE_JOIN, 533 | MSG_NODE_JOIN_UD, 534 | MSG_SERVER_SEND, 535 | MSG_CLIENT_SEND, 536 | MSG_CREATE_LOCK, 537 | MSG_CREATE_LOCK_REPLY, 538 | MSG_RESERVE_LOCK, 539 | MSG_ASSIGN_LOCK, 540 | MSG_UNLOCK, 541 | MSG_ASK_LOCK, 542 | MSG_ASK_LOCK_REPLY, 543 | MSG_GET_REMOTEMR, 544 | MSG_GET_REMOTE_ATOMIC_OPERATION, 545 | MSG_GET_REMOTEMR_REPLY, 546 | MSG_GET_SEND_AND_REPLY_1, 547 | MSG_GET_SEND_AND_REPLY_1_UD, 548 | MSG_GET_SEND_AND_REPLY_2, 549 | MSG_GET_ATOMIC_START, 550 | MSG_GET_ATOMIC_MID, 551 | MSG_GET_ATOMIC_REPLY, 552 | MSG_GET_ATOMIC_SINGLE_START, 553 | MSG_GET_ATOMIC_SINGLE_MID, 554 | MSG_ASK_MR_1, 555 | MSG_ASK_MR_2, 556 | MSG_MR_REQUEST, 557 | MSG_GET_SEND_AND_REPLY_OPT_1, 558 | MSG_GET_SEND_AND_REPLY_OPT_2, 559 | MSG_GET_INTERNAL_EXCHANGE, 560 | MSG_DIST_BARRIER, 561 | MSG_GET_FINISH, 562 | MSG_QUERY_PORT_1, 563 | MSG_QUERY_PORT_2, 564 | MSG_PASS_LOCAL_IMM, 565 | MSG_DO_RC_POST_RECEIVE, 566 | MSG_DO_UD_POST_RECEIVE, 567 | MSG_DO_ACK_INTERNAL, 568 | MSG_DO_ACK_REMOTE 569 | }; 570 | 571 | struct buf_message 572 | { 573 | char buf[MESSAGE_SIZE]; 574 | }; 575 | 576 | 577 | enum { 578 | PINGPONG_RECV_WRID = 1, 579 | PINGPONG_SEND_WRID = 2, 580 | }; 581 | 582 | 583 | struct asy_IO_header 584 | { 585 | int target_node; 586 | uint64_t lite_handler; 587 | int size; 588 | int priority; 589 | uint64_t offset; 590 | int complete; 591 | int type; 592 | uint32_t page_num; 593 | char *addr; 594 | int* wait_id_addr; 595 | }; 596 | 597 | enum asy_IO_event_type { 598 | ASY_READ=1, 599 | ASY_WRITE=2, 600 | ASY_FENCE=3, 601 | ASY_INIT=4, 602 | SYN_WRITE=5, 603 | REMOTE_MEMSET=6, 604 | ASY_READ_PREFETCH=7, 605 | ASY_WAIT=8 606 | }; 607 | 608 | struct client_ah_combined 609 | { 610 | int qpn; 611 | int node_id; 612 | int qkey; 613 | int dlid; 614 | union ib_gid gid; 615 | }; 616 | 617 | //Related to remote imm-write 618 | 619 | struct imm_message_metadata 620 | { 621 | //uint32_t size; 622 | uint32_t designed_port; 623 | uint32_t source_node_id; 624 | uintptr_t store_addr; 625 | uint32_t store_rkey; 626 | uint32_t store_semaphore; 627 | uint32_t size; 628 | }; 629 | 630 | struct imm_header_from_cq_to_port 631 | { 632 | uint32_t source_node_id; 633 | uint64_t offset; 634 | }; 635 | 636 | 637 | struct imm_header_from_cq_to_userspace 638 | { 639 | void *ret_addr; 640 | int receive_size; 641 | void *reply_descriptor; 642 | void *ret_length; 643 | struct list_head list; 644 | }; 645 | 646 | struct lite_context { 647 | struct ib_context *context; 648 | struct ib_comp_channel *channel; 649 | struct ib_pd *pd; 650 | struct ib_cq **cq; // one completion queue for all qps 651 | atomic_t *cq_block; 652 | wait_queue_head_t *cq_block_queue; 653 | struct ib_cq **send_cq; 654 | struct ib_qp **qp; // multiple queue pair for multiple connections 655 | 656 | struct ib_qp *qpUD;// one UD qp for all the send-reply connections 657 | struct ib_cq *cqUD; 658 | struct ib_cq *send_cqUD; 659 | struct ib_ah **ah; 660 | struct client_ah_combined *ah_attrUD; 661 | struct ib_qp *loopback_in; 662 | struct ib_qp *loopback_out; 663 | struct ib_cq *loopback_cq; 664 | spinlock_t loopback_lock; 665 | 666 | 667 | int recv_numUD; 668 | spinlock_t connection_lockUD; 669 | 670 | int size; 671 | int send_flags; 672 | int rx_depth; 673 | // int pending; 674 | struct ib_port_attr portinfo; 675 | int ib_port; 676 | int num_connections; 677 | int num_node; 678 | int num_parallel_connection; 679 | atomic_t *num_alive_connection; 680 | atomic_t num_alive_nodes; 681 | struct ib_mr *proc; 682 | int node_id; 683 | 684 | 685 | int *recv_num; 686 | atomic_t *atomic_request_num; 687 | //unsigned long *atomic_request_num; 688 | atomic_t *atomic_request_num_high; 689 | atomic_t parallel_thread_num; 690 | 691 | 692 | enum s_state { 693 | SS_INIT, 694 | SS_MR_SENT, 695 | SS_RDMA_WAIT, 696 | SS_RDMA_SENT, 697 | SS_DONE_SENT, 698 | SS_MSG_WAIT, 699 | SS_MSG_SENT, 700 | SS_GET_REMOTE_WAIT, 701 | SS_GET_REMOTE_DONE, 702 | MSG_GET_SEND_AND_REPLY 703 | } *send_state; 704 | 705 | enum r_state { 706 | RS_INIT, 707 | RS_MR_RECV, 708 | RS_RDMA_WAIT, 709 | RS_RDMA_RECV, 710 | RS_DONE_RECV 711 | } *recv_state; 712 | 713 | 714 | atomic_t send_reply_wait_num; 715 | 716 | struct atomic_struct **atomic_buffer; 717 | int *atomic_buffer_total_length; 718 | int *atomic_buffer_cur_length; 719 | 720 | 721 | int (*send_handler)(char *addr, uint32_t size, int sender_id); 722 | int (*send_reply_handler)(char *input_addr, uint32_t input_size, char *output_addr, uint32_t *output_size, int sender_id); 723 | int (*atomic_send_handler)(struct atomic_struct *input_list, uint32_t length, char *output_buf, uint32_t *output_size, int sender_id); 724 | int (*atomic_single_send_handler)(struct atomic_struct *input_list, uint32_t length, int sender_id); 725 | int (*send_reply_opt_handler)(char *input_buf, uint32_t size, void **output_buf, uint32_t *output_size, int sender_id); 726 | int (*ask_mr_handler)(struct ask_mr_form *ask_form, uint32_t source_id, uint64_t *litekey_addr, uint64_t *permission); 727 | 728 | atomic_t* connection_congestion_status; 729 | ktime_t* connection_timer_start; 730 | ktime_t* connection_timer_end; 731 | 732 | struct liteapi_header *first_packet_header, *other_packet_header; 733 | int *connection_id_array; 734 | uintptr_t *length_addr_array; 735 | void **output_header_addr; 736 | void **first_header_addr; 737 | void **mid_addr; 738 | 739 | //Needed for cross-nodes-implementation 740 | atomic_t alive_connection; 741 | atomic_t num_completed_threads; 742 | 743 | //Related to AsyIO 744 | atomic_t asy_current_job; 745 | atomic_t asy_latest_job; 746 | 747 | char **asy_tmp_buffer; 748 | struct asy_IO_header *asy_tmp_header; 749 | 750 | atomic_t asy_fence_counter; 751 | 752 | atomic_t mr_index_counter; 753 | //struct list_head asy_fence_list; 754 | //struct list_head asy_fence_list_ms; 755 | 756 | //Related to Emulator 757 | int *bridge_tar; 758 | int bridge_num_nodes; 759 | 760 | //Related to barrier 761 | atomic_t dist_barrier_counter; 762 | int dist_barrier_idx; 763 | int last_barrier_idx[MAX_NODE]; 764 | 765 | //Related to lmr 766 | atomic_t lmr_inc; 767 | 768 | //This is contradict to each ring for each process (use EREP to search inside the code) 769 | //Related to imm 770 | //void **local_imm_ring_buffer; 771 | //struct imm_metadata *remote_imm_metadata; 772 | //struct lmr_info **local_imm_ring_mr; 773 | 774 | //void **imm_cache_perport; 775 | struct imm_header_from_cq_to_port **imm_waitqueue_perport; 776 | unsigned long long imm_waitqueue_perport_count_poll[IMM_MAX_PORT]; 777 | unsigned long long imm_waitqueue_perport_count_recv[IMM_MAX_PORT]; 778 | wait_queue_head_t imm_receive_block_queue[IMM_MAX_PORT]; 779 | int imm_perport_reg_num[IMM_MAX_PORT];//-1 no registeration, 0 up --> how many 780 | spinlock_t imm_perport_lock[IMM_MAX_PORT]; 781 | spinlock_t imm_waitqueue_perport_lock[IMM_MAX_PORT]; 782 | spinlock_t imm_readyqueue_perport_lock[IMM_MAX_PORT]; 783 | struct imm_header_from_cq_to_userspace imm_wait_userspace_perport[IMM_MAX_PORT]; 784 | int imm_cq_is_available[NUM_POLLING_THREADS]; 785 | //local semaphore related 786 | void **imm_store_semaphore; 787 | struct imm_message_metadata *imm_store_header; 788 | unsigned long *imm_store_semaphore_bitmap; 789 | spinlock_t *imm_store_semaphore_lock; 790 | atomic_t imm_store_semaphore_count; 791 | wait_queue_head_t *imm_store_block_queue; 792 | struct task_struct **imm_store_semaphore_task; 793 | 794 | atomic_t imm_cache_perport_work_head[IMM_MAX_PORT]; 795 | atomic_t imm_cache_perport_work_tail[IMM_MAX_PORT]; 796 | struct app_reg_port *last_port_node_key_hash_ptr; 797 | 798 | 799 | //#define IMM_MAX_PORT 64 800 | // 801 | atomic_t *connection_count; 802 | 803 | //Lock related 804 | atomic_t lock_num; 805 | struct lite_lock_form *lock_data; 806 | 807 | struct lite_lock_queue_element *lock_queue; 808 | 809 | //memory allocated 810 | atomic_t current_alloc_size; 811 | //priority related 812 | atomic_t high_cur_num_write; 813 | atomic_t high_cur_num_read; 814 | atomic_t high_cur_num_sr; 815 | atomic_t low_cur_num_write; 816 | atomic_t low_cur_num_read; 817 | atomic_t low_cur_num_sr; 818 | atomic_t slow_counter; 819 | atomic_t low_total_num_write; 820 | atomic_t low_total_num_read; 821 | atomic_t low_total_num_sr; 822 | wait_queue_head_t priority_block_queue; 823 | 824 | union ib_gid gid; 825 | }; 826 | 827 | typedef struct lite_context ltc; 828 | 829 | struct lite_dest { 830 | int node_id; 831 | int lid; 832 | int qpn; 833 | int psn; 834 | union ib_gid gid; 835 | }; 836 | 837 | struct client_data{ 838 | char server_information_buffer[sizeof(LID_SEND_RECV_FORMAT)]; 839 | }; 840 | 841 | struct thread_pass_struct{ 842 | ltc *ctx; 843 | struct ib_cq *target_cq; 844 | char *msg; 845 | struct send_and_reply_format *sr_request; 846 | }; 847 | 848 | 849 | struct reply_struct{ 850 | void *addr; 851 | int size; 852 | uintptr_t descriptor; 853 | }; 854 | 855 | struct receive_struct{ 856 | unsigned int designed_port; 857 | void *ret_addr; 858 | int receive_size; 859 | void *descriptor; 860 | int block_call; 861 | }; 862 | 863 | #endif 864 | -------------------------------------------------------------------------------- /core/lite_api.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _INCLUDE_LITE_API_H 3 | #define _INCLUDE_LITE_API_H 4 | #include "lite.h" 5 | #include "lite_core.h" 6 | //static void ibv_add_one(struct ib_device *device); 7 | //static void ibv_release_dev(struct device *dev); 8 | //static void ibv_remove_one(struct ib_device *device); 9 | 10 | int liteapi_reg_send_handler(int (*input_funptr)(char *addr, uint32_t length, int sender_id)); 11 | int liteapi_reg_send_reply_handler(int (*input_funptr)(char *input_buf, uint32_t size, char *output_buf, uint32_t *output_size, int sender_id)); 12 | int liteapi_reg_atomic_send_handler(int (*input_funptr)(struct atomic_struct *input_list, uint32_t length, char *output_buf, uint32_t *output_size, int sender_id)); 13 | int liteapi_reg_atomic_single_send_handler(int (*input_funptr)(struct atomic_struct *input_list, uint32_t length, int sender_id)); 14 | int liteapi_reg_send_reply_opt_handler(int (*input_funptr)(char *input_buf, uint32_t size, void **output_buf, uint32_t *output_size, int sender_id)); 15 | int liteapi_reg_ask_mr_handler(int (*input_funptr)(struct ask_mr_form *ask_form, uint32_t node_id, uint64_t *litekey_addr, uint64_t *permission)); 16 | int liteapi_establish_conn(char *servername, int eth_port, int ib_port); 17 | //Do atomic_send_reply. Returned value is the length of output_msg (similar to socket programming) 18 | //int liteapi_atomic_send(int target_node, struct atomic_struct *input_atomic, int length, char *output_msg); 19 | //Do send. 20 | int liteapi_send_message(int node_id, void *local_addr, int size); 21 | //Do send_reply. Returned value is the length of output_msg (similar to socket programming) 22 | int liteapi_send_reply(int node_id, char *send_msg, int send_size, char *ack_msg); 23 | int liteapi_send_reply_type(int target_node, char *msg, int size, char *output_msg, int type); 24 | //Haven't implemented teardown_conn 25 | int liteapi_teardown_conn(void); 26 | int liteapi_rdma_write(uint64_t lite_handler, void *local_addr, int size, int priority); 27 | int liteapi_rdma_read(uint64_t lite_handler, void *local_addr, int size, int priority); 28 | inline void liteapi_free_recv_buf(void *input_buf); 29 | int liteapi_send_reply_opt(int target_node, char *msg, int size, void **output_msg, int priority); 30 | int liteapi_send_message_type(int target_node, void *addr, int size, int type); 31 | int liteapi_send_message_priority(int target_node, void *addr, int size, int priority); 32 | int liteapi_send_message_UD(int target_node, void *addr, int size, int type); 33 | int liteapi_send_reply_UD(int target_node, char *msg, int size, char *output_msg); 34 | 35 | //int liteapi_multi_send(int number_of_target, int *target_array, struct atomic_struct *input_atomic); 36 | int liteapi_create_lock(int target_node, void *output_lock); 37 | int liteapi_ask_lock(int target_node, int target_num, void *output_mr); 38 | int liteapi_lock(void *input_void_key); 39 | int liteapi_unlock(void *input_key); 40 | 41 | int liteapi_rdma_asywrite_offset(uint64_t lite_handler, void *local_addr, int size, int priority, int offset); 42 | int liteapi_rdma_asyread_offset(uint64_t lite_handler, void *local_addr, int size, int priority, int offset); 43 | int liteapi_rdma_synwrite_offset(uint64_t lite_handler, void *local_addr, int size, int priority, int offset); 44 | int liteapi_rdma_asyfence(void); 45 | int liteapi_rdma_fetch_and_add(uint64_t lite_handler, void *local_addr, unsigned long long input_value, int priority); 46 | 47 | inline int liteapi_remote_memset(uint64_t lite_handler, int offset, int size); 48 | //RDMA RELATED 49 | uint64_t liteapi_alloc_remote_mem(unsigned int target_node, unsigned int size, unsigned atomic_flag, int password); 50 | uint64_t liteapi_ask_mr(int memory_space_owner_node, uint64_t identifier, uint64_t permission, int password); 51 | uint64_t liteapi_register_lmr_with_virt_addr(void *addr, int size, bool atomic_flag, int password); 52 | 53 | int liteapi_rdma_read_offset_mr(struct lmr_info *mr_addr, void *local_addr, int size, int priority, int offset); 54 | int liteapi_rdma_write_offset_mr(struct lmr_info *mr_addr, void *local_addr, int size, int priority, int offset); 55 | int liteapi_rdma_write_offset(uint64_t lite_handler, void *local_addr, int size, int priority, int offset, int password); 56 | int liteapi_rdma_write_offset_userspace(uint64_t lite_handler, void *local_addr, int size, int priority, int offset, int password); 57 | int liteapi_rdma_read_offset(uint64_t lite_handler, void *local_addr, int size, int priority, int offset, int password); 58 | int liteapi_rdma_read_offset_userspace(uint64_t lite_handler, void *local_addr, int size, int priority, int offset, int password); 59 | int liteapi_rdma_write_offset_multiplesge(uint64_t lite_handler, void *local_addr, int size, int priority, int offset, int sge_num, struct ib_sge *input_sge); 60 | //int liteapi_multi_send_reply(int number_of_target, int *target_array, struct atomic_struct *input_atomic, struct max_reply_msg* reply); 61 | long long get_time_difference(int tid, ktime_t inputtime); 62 | void get_time_difference_str(char *input_str, ktime_t inputtime); 63 | uint64_t liteapi_dist_barrier(unsigned int checknum); 64 | int liteapi_rdma_asywait(void); 65 | int liteapi_add_askmr_table(uint64_t identifier, uint64_t lmr, uint64_t permission, int password); 66 | int liteapi_rdma_compare_and_swp(uint64_t lite_handler, void *local_addr, unsigned long long guess_value, unsigned long long set_value, int priority); 67 | int liteapi_rdma_swp(int target_node, struct lmr_info *mr_addr, void *local_addr, unsigned long long guess, unsigned long long swp_value, int priority); 68 | //int liteapi_multi_send_reply_type(int number_of_target, int *target_array, struct atomic_struct *input_atomic, struct max_reply_msg* reply, int type); 69 | int liteapi_umap_lmr(uint64_t lmr); 70 | //int atomic_send_reply_thread_helper(struct thread_pass_struct *input); 71 | 72 | int liteapi_rdma_write_offset_imm(uint64_t lite_handler, void *local_addr, int size, int priority, int offset, int imm); 73 | inline int liteapi_query_port(int target_node, int designed_port, int requery_flag); 74 | //IMM related 75 | inline int liteapi_register_application(unsigned int designed_port, unsigned int max_size_per_message, unsigned int max_user_per_node, char *name, uint64_t name_len); 76 | int liteapi_unregister_application(unsigned int designed_port); 77 | inline int liteapi_receive_message(unsigned int designed_port, void *ret_addr, int receive_size, uintptr_t *descriptor); 78 | inline int liteapi_receive_message_userspace(int size_port, void *ret_addr, void *descriptor, void *ret_length, int block_call, unsigned int priority); 79 | inline int liteapi_reply_message(void *addr, int size, uintptr_t descriptor); 80 | 81 | inline int liteapi_reply_message_userspace(void *addr, int size, uintptr_t descriptor, unsigned int priority); 82 | 83 | inline int liteapi_reply_and_receive_message(void *addr, int size, uintptr_t descriptor, int port, void *ret_addr, int receive_size, void *receive_descriptor); 84 | inline int liteapi_reply_and_receive_message_userspace(void *addr, int size_port, uintptr_t descriptor, void *ret_addr, int receive_size, void *receive_descriptor); 85 | 86 | inline int liteapi_send_reply_imm(int target_node, unsigned int port, void *addr, int size, void *ret_addr, int max_ret_size); 87 | inline int liteapi_send_reply_imm_userspace(int target_node, int size_port, void *addr, void *ret_addr, void *ret_length, unsigned int max_ret_size_and_priority); 88 | 89 | int liteapi_rdma_mr_request(uint64_t src_key, int src_offset, uint64_t tar_key, int tar_offset, int size, int op_code); 90 | int liteapi_rdma_mr_memcpy(uint64_t src_key, int src_offset, uint64_t tar_key, int tar_offset, int size); 91 | int liteapi_rdma_mr_memmov(uint64_t src_key, int src_offset, uint64_t tar_key, int tar_offset, int size); 92 | uint64_t liteapi_deregister_mr(uint64_t lmr); 93 | 94 | inline int liteapi_get_node_id(void); 95 | inline int liteapi_get_total_node(void); 96 | inline int liteapi_num_connected_nodes(void); 97 | inline int liteapi_alloc_continuous_memory(unsigned long long vaddr, unsigned long size); 98 | uint64_t liteapi_wrapup_alloc_for_remote_access(void *data, unsigned int size, uint64_t identifier, int password); 99 | 100 | int liteapi_send_reply_imm_multisge(int number_of_node, int *target_node, int port, struct atomic_struct **input_atomic, int *length, struct max_reply_msg *output_msg); 101 | 102 | //For FARM test 103 | inline ltc *liteapi_get_ctx(void); 104 | inline int liteapi_rdma_write_offset_withmr_without_polling(struct lmr_info *mr_addr, void *local_addr, int size, int priority, int offset, int wr_id); 105 | 106 | 107 | inline int liteapi_priority_hadling(int priority, int flag, unsigned long *priority_jiffies); 108 | #endif 109 | -------------------------------------------------------------------------------- /core/lite_core.h: -------------------------------------------------------------------------------- 1 | #ifndef _INCLUDE_LITE_INTERNAL_H 2 | #define _INCLUDE_LITE_INTERNAL_H 3 | 4 | #include "lite_internal_tool.h" 5 | #include "lite.h" 6 | 7 | #define COUNT_TIME_START tt_start= ktime_get(); 8 | #define COUNT_TIME_END tt_end = ktime_get();\ 9 | client_internal_stat(client_get_time_difference(tt_start, tt_end), LITE_STAT_ADD);\ 10 | if(Internal_Stat_Count==1000) printk(KERN_CRIT "%s: %lld ns\n", __func__, client_internal_stat(0, LITE_STAT_CLEAR)); 11 | 12 | /* PRIORITY_IMPLEMENT_OR_NOT */ 13 | //#define PRIORITY_IMPLEMENTATION_RESOURCE //If not RESOURCE, it would be traffic prioritization directly 14 | //#define PRIORITY_IMPLEMENTATION_TRAFFIC_PRIORITIZATION 15 | 16 | /* THREAD_HANDLER_MODEL - CHOOSE ONE*/ 17 | #define WAITING_QUEUE_IMPLEMENTATION 18 | //#define IMPLEMENTATION_THREAD_SPAWN 19 | //#define POLLING_THREAD_HANDLING_IMPLEMENTATION 20 | 21 | #define ASK_MR_TABLE_HANDLING 22 | 23 | /* POLLING OPTIONS - CHOOSE ONE*/ 24 | #define BUSY_POLL_MODEL 25 | //#define NOTIFY_MODEL 26 | 27 | #define BUSY_POLL_MODEL_UD 28 | //#define NOTIFY_MODEL_UD 29 | 30 | // 31 | /* sendreply-send model*/ 32 | //#define CPURELAX_MODEL 33 | //#define SCHEDULE_MODEL 34 | #define ADAPTIVE_MODEL 35 | 36 | /* sendreply-recv model*/ 37 | //#define RECV_WAITQUEUE_MODEL 38 | #define RECV_SCHEDULE_MODEL 39 | //#define RECV_CPURELAX_MODEL 40 | // 41 | 42 | //#define SHARE_POLL_CQ_MODEL 43 | #define NON_SHARE_POLL_CQ_MODEL 44 | 45 | 46 | int client_connect_ctx(ltc *ctx, int connection_id, int port, int my_psn, enum ib_mtu mtu, int sl, struct lite_dest *dest); 47 | 48 | ltc *client_init_ctx(int size,int rx_depth, int port, struct ib_device *ib_dev); 49 | 50 | ltc *client_init_interface(int ib_port, struct ib_device *ib_dev); 51 | 52 | int client_send_message_sge_UD(ltc *ctx, int target_node, int type, void *addr, int size, uint64_t store_addr, uint64_t store_semaphore, int priority); 53 | int client_send_request(ltc *ctx, int connection_id, enum mode s_mode, struct lmr_info *input_mr, void *addr, int size, int offset, int userspace_flag, int *poll_addr); 54 | 55 | int client_msg_to_lite_dest(char *msg, struct lite_dest *rem_dest); 56 | int client_gen_msg(ltc *ctx, char *msg, int connection_id); 57 | int client_post_receives_message(ltc *ctx, int connection_id, int n); 58 | 59 | int client_close_ctx(struct lite_context *ctx); 60 | 61 | 62 | struct lmr_info *client_ib_reg_mr(ltc *ctx, void *addr, size_t length, enum ib_access_flags access); 63 | 64 | int client_get_mr_id_by_semaphore(void); 65 | int client_get_port_info(struct ib_context *context, int port, struct ib_port_attr *attr); 66 | void client_wire_gid_to_gid(const char *wgid, union ib_gid *gid); 67 | void client_gid_to_wire_gid(const union ib_gid *gid, char wgid[]); 68 | 69 | struct hash_asyio_key *lmr_to_mr_metadata(uint64_t input_key); 70 | uint64_t client_hash_mr(struct lmr_info *input_mr); 71 | inline void client_free_recv_buf(void *input_buf); 72 | int client_get_random_number(void); 73 | int client_create_metadata_by_lmr(ltc *ctx, uint64_t ret_key, struct lmr_info **ret_mr_list, int ret_mr_list_length, int target_node, int roundup_size, uint64_t permission, bool local_flag, int password); 74 | inline int client_get_connection_by_atomic_number(ltc *ctx, int target_node, int priority); 75 | void client_setup_liteapi_header(uint32_t src_id, uint64_t store_addr, uint64_t store_semaphore, uint32_t length, int priority, int type, struct liteapi_header *output_header); 76 | int client_send_request_multiplesge(ltc *ctx, int connection_id, enum mode s_mode, struct lmr_info *input_mr, void *addr, int size, int sge_num, struct ib_sge *input_sge); 77 | 78 | struct lmr_info *client_alloc_lmr_info_buf(void); 79 | void client_free_lmr_info_buf(void *input_buf); 80 | 81 | void poll_cq(struct ib_cq *cq, void *cq_context); 82 | //struct lmr_info *lmr_to_mr(uint64_t input_key); 83 | struct lmr_info **lmr_to_mr(uint64_t input_key, int *length); 84 | int client_check_askmr_table(ltc *ctx, struct ask_mr_form *ask_form, uint32_t source_id, uint64_t *litekey_addr, uint64_t *permission); 85 | uintptr_t client_ib_reg_mr_phys_addr(ltc *ctx, void *addr, size_t length); 86 | inline uintptr_t client_ib_reg_mr_addr(ltc *ctx, void *addr, size_t length); 87 | int client_spawn_send_reply_handler(struct thread_pass_struct *input); 88 | int client_add_newnode(ltc *ctx, char *msg); 89 | int client_add_newnode_pass(struct thread_pass_struct *input); 90 | int client_poll_cq(ltc *ctx, struct ib_cq *target_cq); 91 | int client_poll_cq_pass(struct thread_pass_struct *input); 92 | int client_asy_latest_job_add(ltc *ctx, int type, uint64_t key, int offset, int size); 93 | ltc *client_establish_conn(struct ib_device *ib_dev, char *servername, int eth_port, int ib_port); 94 | int client_send_request_without_polling(ltc *ctx, int connection_id, enum mode s_mode, struct lmr_info *input_mr, void *addr, int size, int offset, int wr_id); 95 | int lmr_permission_check(uint64_t input_key, int input_flag, struct hash_asyio_key **ret_ptr); 96 | int client_compare_swp(ltc *ctx, int connection_id, struct lmr_info *remote_mr, void *addr, uint64_t guess_value, uint64_t swp_value); 97 | int client_compare_swp_loopback(ltc *ctx, struct lmr_info *remote_mr, void *addr, uint64_t guess_value, uint64_t swp_value); 98 | int client_fetch_and_add(ltc *ctx, int connection_id, struct lmr_info *input_mr, void *addr, unsigned long long input_value); 99 | int client_fetch_and_add_loopback(ltc *ctx, struct lmr_info *input_mr, void *addr, unsigned long long input_value); 100 | int client_send_request_polling_only(ltc *ctx, int connection_id, int polling_num, struct ib_wc *wc); 101 | int client_cleanup_module(void); 102 | 103 | 104 | //The below functions in liteapi are required to modify based on these four 105 | int client_rdma_read_offset(ltc *ctx, uint64_t lite_handler, void *local_addr, int size, int priority, int offset); 106 | int client_rdma_write_offset(ltc *ctx, uint64_t lite_handler, void *local_addr, int size, int priority, int offset); 107 | int client_rdma_write_offset_multiplesge(ltc *ctx, uint64_t lite_handler, void *local_addr, int size, int priority, int offset, int sge_num, struct ib_sge *input_sge); 108 | int client_send_reply_type(ltc *ctx, int target_node, char *msg, int size, char *output_msg, int type); 109 | 110 | int client_rdma_write_with_imm(ltc *ctx, int connection_id, struct lmr_info *input_mr, void *addr, int size, int offset, uint32_t imm); 111 | int client_poll_cq_UD(ltc *ctx, struct ib_cq *target_cq); 112 | void *client_alloc_memory_for_mr(unsigned int length); 113 | int client_register_application(ltc *ctx, unsigned int designed_port, unsigned int max_size_per_message, unsigned int max_user_per_node, char *name, uint64_t name_len); 114 | int client_unregister_application(ltc *ctx, unsigned int designed_port); 115 | 116 | int client_receive_message(ltc *ctx, unsigned int port, void *ret_addr, int receive_size, uintptr_t *reply_descriptor, void *ret_length, int userspace_flag, int block_call); 117 | int client_reply_message(ltc *ctx, void *addr, int size, uintptr_t descriptor, int userspace_flag, int priority); 118 | int client_query_port(ltc *ctx, int target_node, int desigend_port, int requery_flag); 119 | int client_send_reply_with_rdma_write_with_imm(ltc *ctx, int target_node, unsigned int port, void *addr, int size, void *ret_addr, int max_ret_size, void *ret_length, int userspace_flag, int priority); 120 | int client_send_message_with_rdma_write_with_imm_request(ltc *ctx, int connection_id, uint32_t input_mr_rkey, uintptr_t input_mr_addr, void *addr, int size, int offset, uint32_t imm, enum mode s_mode, struct imm_message_metadata *header, int userspace_flag, int sge_length, struct atomic_struct *input_atomic, int force_poll_flag); 121 | inline int client_get_offset_by_length(ltc *ctx, int target_node, int port, int size); 122 | inline int client_find_qp_id_by_qpnum(ltc *ctx, uint32_t qp_num); 123 | inline int client_find_node_id_by_qpnum(ltc *ctx, uint32_t qp_num); 124 | int client_setup_loopback_connections(ltc *ctx, int size, int rx_depth, int ib_port); 125 | int client_connect_loopback(struct ib_qp *src_qp, int port, int src_psn, enum ib_mtu mtu, int sl, struct lite_dest *dest); 126 | int lite_check_page_continuous(void *local_addr, int size, unsigned long *answer); 127 | int client_send_message_local(ltc *ctx, int target_node, int type, void *addr, int size, uint64_t store_addr, uint64_t store_semaphore, int priority); 128 | int client_send_message_local_reply(ltc *ctx, int target_node, int type, void *addr, int size, uint64_t store_addr, uint64_t store_semaphore, int priority); 129 | int client_internal_poll_sendcq(struct ib_cq *tar_cq, int connection_id, int *check); 130 | int client_alloc_continuous_memory(ltc *ctx, unsigned long long addr, unsigned long size); 131 | int client_add_askmr_table(ltc *ctx, uint64_t identifier, uint64_t lmr, uint64_t permission); 132 | int client_internal_poll_sendcq(struct ib_cq *tar_cq, int connection_id, int *check); 133 | 134 | int client_send_reply_with_rdma_write_with_imm_sge(ltc *ctx, int number_of_node, int *target_node, unsigned int port, struct atomic_struct **input_atomic, int *length, struct max_reply_msg *output_msg); 135 | 136 | 137 | int client_send_message_with_rdma_emulated_for_local(ltc *ctx, int port, void *addr, int size, struct imm_message_metadata *header, int userspace_flag); 138 | #endif 139 | -------------------------------------------------------------------------------- /core/lite_insmod.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd ../lite-userspace ; make clean ; make all -j 24; cd ../core 3 | #cd test ; make ; cd .. 4 | make 5 | insmod lite_internal.ko 6 | insmod lite_api.ko 7 | #insmod lite_test.ko 8 | -------------------------------------------------------------------------------- /core/lite_internal_tool.c: -------------------------------------------------------------------------------- 1 | #include "lite_internal_tool.h" 2 | long long int Internal_Stat_Sum=0; 3 | int Internal_Stat_Count=0; 4 | EXPORT_SYMBOL(Internal_Stat_Count); 5 | 6 | long long int client_internal_stat(long long input, int flag) 7 | { 8 | if(flag == LITE_STAT_ADD) 9 | { 10 | Internal_Stat_Sum += input; 11 | Internal_Stat_Count ++; 12 | return 0; 13 | } 14 | else if(flag == LITE_STAT_CLEAR) 15 | { 16 | long long int ret; 17 | ret = Internal_Stat_Sum / Internal_Stat_Count; 18 | printk(KERN_CRIT "%lld / %d \n", Internal_Stat_Sum, Internal_Stat_Count); 19 | Internal_Stat_Sum = 0; 20 | Internal_Stat_Count = 0; 21 | return ret; 22 | } 23 | else if(flag == LITE_STAT_TEMP) 24 | { 25 | long long ret; 26 | ret = Internal_Stat_Sum / Internal_Stat_Count; 27 | return ret; 28 | } 29 | printk(KERN_CRIT "%s Error: flag undefined - %d\n", __func__, flag); 30 | return -1; 31 | } 32 | EXPORT_SYMBOL(client_internal_stat); 33 | 34 | inline long long client_get_time_difference(ktime_t inputtime, ktime_t endtime) 35 | { 36 | return (long long) ktime_to_ns(ktime_sub(endtime, inputtime)); 37 | } 38 | EXPORT_SYMBOL(client_get_time_difference); 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /core/lite_internal_tool.h: -------------------------------------------------------------------------------- 1 | #ifndef _INCLUDE_LITE_INTERNAL_TOOL_H 2 | #define _INCLUDE_LITE_INTERNAL_TOOL_H 3 | 4 | #include "lite.h" 5 | enum LITE_STAT { 6 | LITE_STAT_ADD, 7 | LITE_STAT_CLEAR, 8 | LITE_STAT_TEMP 9 | }; 10 | 11 | long long int client_internal_stat(long long input, int flag); 12 | inline long long client_get_time_difference(ktime_t inputtime, ktime_t endtime); 13 | #endif 14 | -------------------------------------------------------------------------------- /core/lite_rmmod.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #rmmod lite_test.ko 4 | rmmod lite_api.ko 5 | rmmod lite_internal.ko 6 | -------------------------------------------------------------------------------- /core/lite_syscall.h: -------------------------------------------------------------------------------- 1 | #ifndef _INCLUDE_FIT_SYS_H 2 | #define _INCLUDE_FIT_SYS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "lite.h" 9 | 10 | struct lite_hooks 11 | { 12 | 13 | uint64_t (*lite_alloc_remote)(unsigned int, unsigned int, unsigned int, int); 14 | int (*lite_remote_memset)(uint64_t, int, int); 15 | int (*lite_fetch_add)(uint64_t, void*, unsigned long long, int); 16 | int (*lite_rdma_synwrite)(uint64_t, void*, int, int, int, int); 17 | 18 | int (*lite_rdma_asywrite)(uint64_t, void*, int, int, int); 19 | int (*lite_rdma_read)(uint64_t, void*, int, int, int, int); 20 | uint64_t (*lite_ask_lmr)(int, uint64_t, uint64_t, int); 21 | uint64_t (*lite_dist_barrier)(unsigned int); 22 | int (*lite_add_ask_mr_table)(uint64_t, uint64_t, uint64_t, int); 23 | int (*lite_compare_swp)(uint64_t, void*, unsigned long long, unsigned long long, int); 24 | int (*lite_umap_lmr)(uint64_t); 25 | 26 | int (*lite_register_application)(unsigned int, unsigned int, unsigned int, char*, uint64_t); 27 | int (*lite_unregister_application)(unsigned int); 28 | int (*lite_receive_message)(int, void*, void*, void*, int, unsigned int); 29 | int (*lite_send_reply_imm)(int, int, void*, void *, void*, unsigned int); 30 | int (*lite_reply_message)(void *, int, uintptr_t, unsigned int); 31 | int (*lite_get_node_id)(void); 32 | int (*lite_get_total_node)(void); 33 | int (*lite_query_port)(int, int, int); 34 | int (*lite_alloc_continuous_memory)(unsigned long long, unsigned long); 35 | uint64_t (*lite_wrap_alloc_for_remote_access)(void*, unsigned int, uint64_t, int); 36 | int (*lite_create_lock)(int, void*); 37 | int (*lite_ask_lock)(int, int, void*); 38 | int (*lite_lock)(void*); 39 | int (*lite_unlock)(void*); 40 | int (*lite_reply_and_receive_message)(void *, int, uintptr_t, void *, int, void *); 41 | 42 | int (*lite_join)(char *, int, int); 43 | }; 44 | int register_lite_hooks(const struct lite_hooks *hooks); 45 | void unregister_lite_hooks(void); 46 | #endif 47 | -------------------------------------------------------------------------------- /core/lite_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "lite_test.h" 5 | 6 | 7 | MODULE_AUTHOR("yiying, shinyeh"); 8 | MODULE_LICENSE("GPL"); 9 | static int __init lite_test_init_module(void) 10 | { 11 | int node_id; 12 | node_id = liteapi_establish_conn("192.168.0.1", LISTEN_PORT, 1); 13 | 14 | return node_id; 15 | } 16 | 17 | static void __exit lite_test_cleanup_module(void) 18 | { 19 | printk(KERN_INFO "Ready to remove test module\n"); 20 | } 21 | 22 | module_init(lite_test_init_module); 23 | module_exit(lite_test_cleanup_module); 24 | -------------------------------------------------------------------------------- /core/lite_test.h: -------------------------------------------------------------------------------- 1 | #ifndef _INCLUDE_LITE_TEST_H 2 | #define _INCLUDE_LITE_TEST_H 3 | 4 | #include "lite.h" 5 | #include "lite_distribution.h" 6 | #include "lite_api.h" 7 | #include "lite_core.h" 8 | #endif 9 | -------------------------------------------------------------------------------- /lite-syscall/.gitignore: -------------------------------------------------------------------------------- 1 | # 2 | # NOTE! Don't add files that are generated in specific 3 | # subdirectories here. Add them in the ".gitignore" file 4 | # in that subdirectory instead. 5 | # 6 | # NOTE! Please use 'git ls-files -i --exclude-standard' 7 | # command after changing this file, to see if there are 8 | # any tracked files which get ignored after the change. 9 | # 10 | # Normal rules 11 | # 12 | .* 13 | *.o 14 | *.o.* 15 | *.a 16 | *.s 17 | *.ko 18 | *.so 19 | *.so.dbg 20 | *.mod.c 21 | *.i 22 | *.lst 23 | *.symtypes 24 | *.order 25 | modules.builtin 26 | *.elf 27 | *.bin 28 | *.gz 29 | *.bz2 30 | *.lzma 31 | *.xz 32 | *.lz4 33 | *.lzo 34 | *.patch 35 | *.gcno 36 | 37 | # 38 | # Top-level generic files 39 | # 40 | /tags 41 | /TAGS 42 | /System.map 43 | /Module.markers 44 | /Module.symvers 45 | 46 | # 47 | # git files that we don't want to ignore even it they are dot-files 48 | # 49 | !.gitignore 50 | !.mailmap 51 | 52 | # cscope files 53 | cscope.* 54 | ncscope.* 55 | 56 | # gnu global files 57 | GPATH 58 | GRTAGS 59 | GSYMS 60 | GTAGS 61 | 62 | *.orig 63 | *~ 64 | \#*# 65 | -------------------------------------------------------------------------------- /lite-syscall/Makefile: -------------------------------------------------------------------------------- 1 | #obj-y := dsm_syscall.o 2 | obj-y += lite_syscall.o 3 | 4 | all: 5 | make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules 6 | clean: 7 | make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean 8 | rm lite*.o dsm*.o -f 9 | -------------------------------------------------------------------------------- /lite-syscall/lite.h: -------------------------------------------------------------------------------- 1 | ../core/lite.h -------------------------------------------------------------------------------- /lite-syscall/lite_syscall.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #include "lite_syscall.h" 7 | //#include "client.h" 8 | 9 | static uint64_t (*lite_alloc_remote_hook)(unsigned int, unsigned int, unsigned int, int); 10 | static int (*lite_remote_memset_hook)(uint64_t, int, int); 11 | static int (*lite_fetch_add_hook)(uint64_t, void*, unsigned long long, int); 12 | static int (*lite_rdma_synwrite_hook)(uint64_t, void*, int, int, int, int); 13 | 14 | static int (*lite_rdma_asywrite_hook)(uint64_t, void*, int, int, int); 15 | static int (*lite_rdma_read_hook)(uint64_t, void*, int, int, int, int); 16 | static uint64_t (*lite_ask_lmr_hook)(int, uint64_t, uint64_t, int); 17 | static uint64_t (*lite_dist_barrier_hook)(unsigned int); 18 | static int (*lite_add_ask_mr_table_hook)(uint64_t, uint64_t, uint64_t, int); 19 | static int (*lite_compare_swp_hook)(uint64_t, void*, unsigned long long, unsigned long long, int); 20 | static int (*lite_umap_lmr_hook)(uint64_t); 21 | 22 | static int (*lite_register_application_hook)(unsigned int, unsigned int, unsigned int, char*, uint64_t); 23 | static int (*lite_unregister_application_hook)(unsigned int); 24 | static int (*lite_receive_message_hook)(int, void*, void*, void*, int, unsigned int); 25 | static int (*lite_send_reply_imm_hook)(int, int, void*, void *, void*, unsigned int); 26 | static int (*lite_reply_message_hook)(void *, int, uintptr_t, unsigned int); 27 | static int (*lite_get_node_id_hook)(void); 28 | static int (*lite_get_total_node_hook)(void); 29 | static int (*lite_query_port_hook)(int, int, int); 30 | static int (*lite_alloc_continuous_memory_hook)(unsigned long long, unsigned long); 31 | static uint64_t (*lite_wrap_alloc_for_remote_access_hook)(void *, unsigned int, uint64_t, int); 32 | static int (*lite_create_lock_hook)(int, void*); 33 | static int (*lite_ask_lock_hook)(int, int, void*); 34 | static int (*lite_lock_hook)(void*); 35 | static int (*lite_unlock_hook)(void*); 36 | static int (*lite_reply_and_receive_message_hook)(void *, int, uintptr_t, void *, int, void *); 37 | 38 | static int (*lite_join_hook)(char *, int, int); 39 | 40 | int register_lite_hooks(const struct lite_hooks *hooks) 41 | { 42 | if(unlikely(!hooks)) 43 | return -EINVAL; 44 | if(unlikely(!hooks->lite_alloc_remote || 45 | !hooks->lite_remote_memset || 46 | !hooks->lite_fetch_add || 47 | !hooks->lite_rdma_synwrite || 48 | !hooks->lite_rdma_asywrite || 49 | !hooks->lite_rdma_read || 50 | !hooks->lite_ask_lmr || 51 | !hooks->lite_add_ask_mr_table || 52 | !hooks->lite_compare_swp || 53 | !hooks->lite_umap_lmr || 54 | !hooks->lite_register_application || 55 | !hooks->lite_unregister_application || 56 | !hooks->lite_receive_message || 57 | !hooks->lite_send_reply_imm || 58 | !hooks->lite_reply_message || 59 | !hooks->lite_get_node_id || 60 | !hooks->lite_get_total_node || 61 | !hooks->lite_query_port || 62 | !hooks->lite_alloc_continuous_memory || 63 | !hooks->lite_wrap_alloc_for_remote_access || 64 | !hooks->lite_create_lock || 65 | !hooks->lite_ask_lock || 66 | !hooks->lite_lock || 67 | !hooks->lite_unlock || 68 | !hooks->lite_reply_and_receive_message || 69 | !hooks->lite_join)) 70 | { 71 | return -EINVAL; 72 | } 73 | lite_alloc_remote_hook = hooks->lite_alloc_remote; 74 | lite_remote_memset_hook = hooks->lite_remote_memset; 75 | lite_fetch_add_hook = hooks->lite_fetch_add; 76 | lite_rdma_synwrite_hook = hooks->lite_rdma_synwrite; 77 | lite_rdma_asywrite_hook = hooks->lite_rdma_asywrite; 78 | lite_rdma_read_hook = hooks->lite_rdma_read; 79 | lite_ask_lmr_hook = hooks->lite_ask_lmr; 80 | lite_dist_barrier_hook = hooks->lite_dist_barrier; 81 | lite_add_ask_mr_table_hook = hooks->lite_add_ask_mr_table; 82 | lite_compare_swp_hook = hooks->lite_compare_swp; 83 | lite_umap_lmr_hook = hooks->lite_umap_lmr; 84 | lite_register_application_hook = hooks->lite_register_application; 85 | lite_unregister_application_hook = hooks->lite_unregister_application; 86 | lite_receive_message_hook = hooks->lite_receive_message; 87 | lite_send_reply_imm_hook = hooks->lite_send_reply_imm; 88 | lite_reply_message_hook = hooks->lite_reply_message; 89 | lite_get_node_id_hook = hooks->lite_get_node_id; 90 | lite_get_total_node_hook = hooks->lite_get_total_node; 91 | lite_query_port_hook = hooks->lite_query_port; 92 | lite_alloc_continuous_memory_hook = hooks->lite_alloc_continuous_memory; 93 | lite_wrap_alloc_for_remote_access_hook = hooks->lite_wrap_alloc_for_remote_access; 94 | lite_create_lock_hook = hooks->lite_create_lock; 95 | lite_ask_lock_hook = hooks->lite_ask_lock; 96 | lite_lock_hook = hooks->lite_lock; 97 | lite_unlock_hook = hooks->lite_unlock; 98 | lite_reply_and_receive_message_hook = hooks->lite_reply_and_receive_message; 99 | lite_join_hook = hooks->lite_join; 100 | return 0; 101 | } 102 | EXPORT_SYMBOL(register_lite_hooks); 103 | void unregister_lite_hooks(void) 104 | { 105 | 106 | lite_alloc_remote_hook = NULL; 107 | lite_remote_memset_hook = NULL; 108 | lite_fetch_add_hook = NULL; 109 | lite_rdma_synwrite_hook = NULL; 110 | lite_rdma_asywrite_hook = NULL; 111 | lite_rdma_read_hook = NULL; 112 | lite_ask_lmr_hook = NULL; 113 | lite_dist_barrier_hook = NULL; 114 | lite_add_ask_mr_table_hook = NULL; 115 | lite_compare_swp_hook = NULL; 116 | lite_umap_lmr_hook = NULL; 117 | lite_register_application_hook = NULL; 118 | lite_unregister_application_hook = NULL; 119 | lite_receive_message_hook = NULL; 120 | lite_send_reply_imm_hook = NULL; 121 | lite_reply_message_hook = NULL; 122 | lite_get_node_id_hook = NULL; 123 | lite_get_total_node_hook = NULL; 124 | lite_query_port_hook = NULL; 125 | lite_alloc_continuous_memory_hook = NULL; 126 | lite_wrap_alloc_for_remote_access_hook = NULL; 127 | lite_create_lock_hook = NULL; 128 | lite_ask_lock_hook = NULL; 129 | lite_lock_hook = NULL; 130 | lite_unlock_hook = NULL; 131 | lite_reply_and_receive_message_hook = NULL; 132 | lite_join_hook = NULL; 133 | } 134 | EXPORT_SYMBOL(unregister_lite_hooks); 135 | //lite 136 | 137 | SYSCALL_DEFINE4(lite_alloc_remote, unsigned int, node_id, 138 | unsigned int, size, 139 | unsigned int, atomic_flag, 140 | int, password) 141 | { 142 | if(likely(lite_alloc_remote_hook)) 143 | { 144 | uint64_t lmr; 145 | lmr = lite_alloc_remote_hook(node_id, size, atomic_flag, password); 146 | return (long)lmr; 147 | } 148 | return -EFAULT; 149 | } 150 | 151 | SYSCALL_DEFINE4(lite_wrap_alloc_for_remote_access, void __user *, data, 152 | unsigned int, size, 153 | uint64_t, identifier, 154 | int, password) 155 | { 156 | if(likely(lite_wrap_alloc_for_remote_access_hook)) 157 | { 158 | uint64_t lmr; 159 | lmr = lite_wrap_alloc_for_remote_access_hook(data, size, identifier, password); 160 | return (long)lmr; 161 | } 162 | return -EFAULT; 163 | } 164 | 165 | SYSCALL_DEFINE3(lite_remote_memset, unsigned long, lmr, 166 | int, offset, 167 | int, size) 168 | { 169 | if(likely(lite_remote_memset_hook)) 170 | { 171 | lite_remote_memset_hook(lmr, offset, size); 172 | return 0; 173 | } 174 | return -EFAULT; 175 | } 176 | 177 | SYSCALL_DEFINE4(lite_fetch_add, unsigned long, lite_handler, 178 | void __user *, local_addr, 179 | unsigned long long, input_value, 180 | unsigned int, priority) 181 | { 182 | if(likely(lite_fetch_add_hook)) 183 | { 184 | int ret; 185 | uint64_t output; 186 | ret = lite_fetch_add_hook(lite_handler, &output, input_value, priority); 187 | if(ret) 188 | { 189 | return -EFAULT; 190 | } 191 | if(copy_to_user(local_addr, &output, sizeof(uint64_t))) 192 | { 193 | return -EFAULT; 194 | } 195 | return 0; 196 | 197 | } 198 | return -EFAULT; 199 | } 200 | 201 | 202 | SYSCALL_DEFINE6(lite_rdma_synwrite,unsigned long, lite_handler, 203 | void __user *, local_addr, 204 | unsigned int, size, 205 | unsigned int, priority, 206 | unsigned int, offset, 207 | int, password) 208 | { 209 | if(likely(lite_rdma_synwrite_hook)) 210 | { 211 | //void *output; 212 | int ret; 213 | //output = kmalloc(size, GFP_KERNEL); 214 | /*if(copy_from_user(output, local_addr, size)) 215 | { 216 | kfree(output); 217 | return -EFAULT; 218 | }*/ 219 | //ret = lite_rdma_synwrite_hook(lite_handler, output, size, priority, offset, password); 220 | ret = lite_rdma_synwrite_hook(lite_handler, local_addr, size, priority, offset, password); 221 | if(ret) 222 | { 223 | //kfree(output); 224 | return -EFAULT; 225 | } 226 | //kfree(output); 227 | return 0; 228 | 229 | } 230 | return -EFAULT; 231 | } 232 | SYSCALL_DEFINE5(lite_rdma_asywrite,unsigned long, lite_handler, 233 | void __user *, local_addr, 234 | unsigned int, size, 235 | unsigned int, priority, 236 | unsigned int, offset) 237 | { 238 | if(likely(lite_rdma_asywrite_hook)) 239 | { 240 | void *output; 241 | int ret; 242 | output = kmalloc(size, GFP_KERNEL); 243 | if(copy_from_user(output, local_addr, size)) 244 | { 245 | kfree(output); 246 | return -EFAULT; 247 | } 248 | ret = lite_rdma_asywrite_hook(lite_handler, output, size, priority, offset); 249 | if(ret) 250 | { 251 | kfree(output); 252 | return -EFAULT; 253 | } 254 | kfree(output); 255 | return 0; 256 | 257 | } 258 | return -EFAULT; 259 | } 260 | SYSCALL_DEFINE6(lite_rdma_read, unsigned long, lite_handler, 261 | void __user *, local_addr, 262 | unsigned int, size, 263 | unsigned int, priority, 264 | unsigned int, offset, 265 | int, password) 266 | { 267 | if(likely(lite_rdma_read_hook)) 268 | { 269 | /*void *output; 270 | int ret; 271 | output = kmalloc(size, GFP_KERNEL); 272 | ret = lite_rdma_read_hook(lite_handler, output, size, priority, offset, password); 273 | if(ret) 274 | { 275 | kfree(output); 276 | return -EFAULT; 277 | } 278 | if(copy_to_user(local_addr, output, size)) 279 | { 280 | kfree(output); 281 | return -EFAULT; 282 | } 283 | kfree(output);*/ 284 | int ret; 285 | ret = lite_rdma_read_hook(lite_handler, local_addr, size, priority, offset, password); 286 | if(ret) 287 | { 288 | return -EFAULT; 289 | } 290 | return 0; 291 | 292 | } 293 | return -EFAULT; 294 | } 295 | SYSCALL_DEFINE4(lite_ask_lmr, int, memory_space_owner_node, 296 | uint64_t, identifier, 297 | uint64_t, permission, 298 | int, password) 299 | { 300 | if(likely(lite_ask_lmr_hook)) 301 | { 302 | uint64_t ret; 303 | ret = lite_ask_lmr_hook(memory_space_owner_node, identifier, permission, password); 304 | return (long)ret; 305 | } 306 | return -EFAULT; 307 | } 308 | SYSCALL_DEFINE1(lite_dist_barrier, unsigned int, check_num) 309 | { 310 | if(likely(lite_dist_barrier_hook)) 311 | { 312 | lite_dist_barrier_hook(check_num); 313 | return 0; 314 | } 315 | return -EFAULT; 316 | } 317 | SYSCALL_DEFINE4(lite_add_ask_mr_table, uint64_t, identifier, 318 | uint64_t, lmr, 319 | uint64_t, permission, 320 | int, password) 321 | { 322 | if(likely(lite_add_ask_mr_table_hook)) 323 | { 324 | lite_add_ask_mr_table_hook(identifier, lmr, permission, password); 325 | return 0; 326 | } 327 | return -EFAULT; 328 | } 329 | SYSCALL_DEFINE5(lite_compare_swp, unsigned long, lite_handler, 330 | void __user *, local_addr, 331 | unsigned long long, guess_value, 332 | unsigned long long, set_value, 333 | unsigned int, priority) 334 | { 335 | if(likely(lite_compare_swp_hook)) 336 | { 337 | int ret; 338 | uint64_t output; 339 | ret = lite_compare_swp_hook(lite_handler, &output, guess_value, set_value, priority); 340 | if(ret) 341 | { 342 | return ret; 343 | } 344 | if(copy_to_user(local_addr, &output, sizeof(uint64_t))) 345 | { 346 | return -EFAULT; 347 | } 348 | return 0; 349 | 350 | } 351 | return -EFAULT; 352 | } 353 | SYSCALL_DEFINE1(lite_umap_lmr, unsigned long, lite_handler) 354 | { 355 | if(likely(lite_umap_lmr_hook)) 356 | { 357 | int ret; 358 | ret = lite_umap_lmr_hook(lite_handler); 359 | return ret; 360 | } 361 | return -EFAULT; 362 | } 363 | 364 | SYSCALL_DEFINE5(lite_register_application, unsigned int, designed_port, 365 | unsigned int, max_size_per_message, 366 | unsigned int, max_user_per_node, 367 | void __user*, input_name, 368 | unsigned int, name_len) 369 | { 370 | if(likely(lite_register_application_hook)) 371 | { 372 | int ret; 373 | char *name = kmalloc(name_len * sizeof(char), GFP_KERNEL); 374 | ret = copy_from_user(name, input_name, name_len); 375 | if(ret) 376 | { 377 | kfree(name); 378 | return -EFAULT; 379 | } 380 | ret = lite_register_application_hook(designed_port, max_size_per_message, max_user_per_node, name, name_len); 381 | return ret; 382 | } 383 | return -EFAULT; 384 | } 385 | 386 | SYSCALL_DEFINE1(lite_unregister_application, unsigned int, port) 387 | { 388 | if(likely(lite_unregister_application_hook)) 389 | { 390 | int ret; 391 | ret = lite_unregister_application_hook(port); 392 | return ret; 393 | } 394 | return -EFAULT; 395 | } 396 | 397 | SYSCALL_DEFINE3(lite_query_port, int, target_node, 398 | int, designed_port, 399 | int, requery_flag) 400 | { 401 | if(likely(lite_query_port_hook)) 402 | { 403 | int ret; 404 | ret = lite_query_port_hook(target_node, designed_port, requery_flag); 405 | return ret; 406 | } 407 | return -EFAULT; 408 | } 409 | 410 | SYSCALL_DEFINE6(lite_send_reply_imm, int, node, 411 | int, size_port, 412 | void __user *, local_addr, 413 | void __user *, ret_addr, 414 | void __user *, ret_length, 415 | unsigned int, max_ret_size_and_priority) 416 | { 417 | if(likely(lite_send_reply_imm_hook)) 418 | { 419 | int ret; 420 | ret = lite_send_reply_imm_hook(node, size_port, local_addr, ret_addr, ret_length, max_ret_size_and_priority); 421 | return ret; 422 | } 423 | return -EFAULT; 424 | } 425 | 426 | SYSCALL_DEFINE6(lite_receive_message, int, size_port, 427 | void __user *, local_addr, 428 | void __user *, descriptor, 429 | void __user *, ret_length, 430 | int, block_call, 431 | unsigned int, priority) 432 | { 433 | if(likely(lite_receive_message_hook)) 434 | { 435 | int ret; 436 | ret = lite_receive_message_hook(size_port, local_addr, descriptor, ret_length, block_call, priority); 437 | return ret; 438 | } 439 | return -EFAULT; 440 | } 441 | 442 | SYSCALL_DEFINE4(lite_reply_message, void __user *, local_addr, 443 | int, size, 444 | unsigned long, descriptor, 445 | unsigned int, priority) 446 | { 447 | if(likely(lite_reply_message_hook)) 448 | { 449 | int ret; 450 | ret = lite_reply_message_hook(local_addr, size, descriptor, priority); 451 | return ret; 452 | } 453 | return -EFAULT; 454 | } 455 | 456 | SYSCALL_DEFINE6(lite_reply_and_receive_message, void __user *, local_addr, 457 | int, size_port, 458 | unsigned long, descriptor, 459 | void __user *, ret_addr, 460 | int, receive_size, 461 | void __user *, receive_descriptor) 462 | { 463 | if(likely(lite_reply_and_receive_message_hook)) 464 | { 465 | int ret; 466 | ret = lite_reply_and_receive_message_hook(local_addr, size_port, descriptor, ret_addr, receive_size, receive_descriptor); 467 | return ret; 468 | } 469 | return -EFAULT; 470 | } 471 | 472 | 473 | SYSCALL_DEFINE0(lite_get_node_id) 474 | { 475 | if(likely(lite_get_node_id_hook)) 476 | { 477 | int ret; 478 | ret = lite_get_node_id_hook(); 479 | return ret; 480 | } 481 | return -EFAULT; 482 | } 483 | 484 | 485 | SYSCALL_DEFINE0(lite_get_total_node) 486 | { 487 | if(likely(lite_get_total_node_hook)) 488 | { 489 | int ret; 490 | ret = lite_get_total_node_hook(); 491 | return ret; 492 | } 493 | return -EFAULT; 494 | } 495 | 496 | SYSCALL_DEFINE2(lite_alloc_continuous_memory, unsigned long long, vaddr, 497 | unsigned long, size) 498 | { 499 | if(likely(lite_alloc_continuous_memory_hook)) 500 | { 501 | int ret; 502 | ret = lite_alloc_continuous_memory_hook(vaddr, size); 503 | return ret; 504 | } 505 | return -EFAULT; 506 | } 507 | 508 | SYSCALL_DEFINE2(lite_create_lock, int, target_node, 509 | void __user *, input_addr) 510 | { 511 | if(likely(lite_create_lock_hook)) 512 | { 513 | int ret; 514 | remote_spinlock_t temp_lock; 515 | ret = lite_create_lock_hook(target_node, (void *)&temp_lock); 516 | if(copy_to_user(input_addr, &temp_lock, sizeof(remote_spinlock_t))) 517 | return -EFAULT; 518 | return ret; 519 | } 520 | return -EFAULT; 521 | } 522 | 523 | SYSCALL_DEFINE3(lite_ask_lock, int, target_node, 524 | int, target_num, 525 | void __user *, input_addr) 526 | { 527 | if(likely(lite_ask_lock_hook)) 528 | { 529 | int ret; 530 | remote_spinlock_t temp_lock; 531 | ret = lite_ask_lock_hook(target_node, target_num, (void *)&temp_lock); 532 | if(copy_to_user(input_addr, &temp_lock, sizeof(remote_spinlock_t))) 533 | return -EFAULT; 534 | return ret; 535 | } 536 | return -EFAULT; 537 | } 538 | 539 | SYSCALL_DEFINE1(lite_lock, void __user *, input_addr) 540 | { 541 | if(likely(lite_lock_hook)) 542 | { 543 | int ret; 544 | remote_spinlock_t temp_lock; 545 | if(copy_from_user(&temp_lock, input_addr, sizeof(remote_spinlock_t))) 546 | return -EFAULT; 547 | ret = lite_lock_hook((void *)&temp_lock); 548 | if(copy_to_user(input_addr, &temp_lock, sizeof(remote_spinlock_t))) 549 | return -EFAULT; 550 | return ret; 551 | } 552 | return -EFAULT; 553 | } 554 | 555 | SYSCALL_DEFINE1(lite_unlock, void __user *, input_addr) 556 | { 557 | if(likely(lite_lock_hook)) 558 | { 559 | int ret; 560 | remote_spinlock_t temp_lock; 561 | if(copy_from_user(&temp_lock, input_addr, sizeof(remote_spinlock_t))) 562 | return -EFAULT; 563 | ret = lite_unlock_hook((void *)&temp_lock); 564 | if(copy_to_user(input_addr, &temp_lock, sizeof(remote_spinlock_t))) 565 | return -EFAULT; 566 | return ret; 567 | } 568 | return -EFAULT; 569 | } 570 | 571 | SYSCALL_DEFINE3(lite_join, void __user *, input_addr, 572 | int, eth_port, 573 | int, ib_port) 574 | { 575 | if(likely(lite_lock_hook)) 576 | { 577 | int ret; 578 | char ip_str[32]; 579 | if(copy_from_user(ip_str, input_addr, 32)) 580 | return -EFAULT; 581 | ret = lite_join_hook(ip_str, eth_port, ib_port); 582 | return ret; 583 | } 584 | return -EFAULT; 585 | } 586 | -------------------------------------------------------------------------------- /lite-syscall/lite_syscall.h: -------------------------------------------------------------------------------- 1 | ../core/lite_syscall.h -------------------------------------------------------------------------------- /lite-userspace/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS := -std=gnu11 -Wall -Wmissing-prototypes -Wstrict-prototypes\ 2 | -fomit-frame-pointer -freg-struct-return -O2 3 | 4 | CFLAGS := -fomit-frame-pointer -freg-struct-return 5 | 6 | SRCS := $(wildcard example*.c) 7 | SRCS += $(wildcard lite_*.c) 8 | OBJS := $(SRCS:.c=.o) 9 | DEPS := lite-lib.h 10 | 11 | all: $(OBJS) 12 | 13 | clean: 14 | rm -f *.o 15 | 16 | %.o: %.c 17 | gcc lite-lib.c -lpthread -o $@ $(CFLAGS) $< 18 | -------------------------------------------------------------------------------- /lite-userspace/README.md: -------------------------------------------------------------------------------- 1 | LITE Local Indirection TiEr - userspace example 2 | ==== 3 | 4 | `lite-lib.c` and `lite-lib.h` contain the code of lite-userspace library call. 5 | `lite-lib.c` mainly interacts with LITE-kernel with syscall. 6 | Syscall definition should match 7 | 1. `lite-syscall/lite_syscall.c` 8 | 2. `kernel_src/arch/x86/syscalls/syscall_64.tbl`. 9 | Regular example call for send-reply and send are in `lite_example.c` and `lite_send.c` respectively. 10 | 11 | ## How to Run LITE example 12 | 13 | ### S1: run cluster manager 14 | ./mgmt_server 15 | ### S2: follow step 5.2.2 in README.md to initial userspace_ibapi_join 16 | ``` 17 | userspace_ibapi_join(IP, eth-port, IB-port) --> change to correct IP and port 18 | ``` 19 | or you could study `lite_join.c` to find the way to join cluster 20 | ### S3: compile by Makefile 21 | make all 22 | ### S4: execute RPC example example 23 | on node 1, execute `./lite_rpc.o 0` 24 | on node 2, execute `./lite_rpc.o 1` 25 | ### S5: execute LITE-Write example 26 | on node 2, execute ./lite_write.o 1 27 | Remember to rebuild the whole cluster after running examples. 28 | -------------------------------------------------------------------------------- /lite-userspace/lite-lib.c: -------------------------------------------------------------------------------- 1 | #include "lite-lib.h" 2 | 3 | inline int userspace_liteapi_dist_barrier(unsigned int num) 4 | { 5 | return syscall(__NR_lite_dist_barrier, num); 6 | } 7 | 8 | inline int userspace_liteapi_alloc_remote_mem(unsigned int node_id, unsigned int size, bool atomic_flag, int password) 9 | { 10 | return syscall(__NR_lite_alloc_remote, node_id, size, atomic_flag, password); 11 | } 12 | 13 | inline int userspace_liteapi_register_application(unsigned int destined_port, unsigned int max_size_per_message, unsigned int max_user_per_node, char *name, uint64_t name_len) 14 | { 15 | return syscall(__NR_lite_register_application, destined_port, max_size_per_message, max_user_per_node, name, name_len); 16 | } 17 | 18 | inline int userspace_liteapi_receive_message(unsigned int port, void *ret_addr, int receive_size, uintptr_t *descriptor, int block_call) 19 | { 20 | int ret; 21 | //ret = syscall(__NR_lite_receive_message, receive_size*IMM_MAX_PORT+port, ret_addr, descriptor, 0, block_call, NULL_PRIORITY); 22 | ret = syscall(__NR_lite_receive_message, (receive_size<= LIMITATION) 72 | { 73 | printf("%s: size %d too big\n", __func__, size); 74 | return -1; 75 | } 76 | return syscall(__NR_lite_send_reply_imm, target_node, (size<= LIMITATION || max_ret_size >= LIMITATION) 82 | { 83 | printf("%s: size %d max_ret_size %d too big\n", __func__, size, max_ret_size); 84 | return -1; 85 | } 86 | //return syscall(__NR_lite_send_reply_imm, target_node, size*IMM_MAX_PORT+port, addr, ret_addr, 0, max_ret_size); 87 | return syscall(__NR_lite_send_reply_imm, target_node, (size<= LIMITATION || max_ret_size >= LIMITATION) 93 | { 94 | printf("%s: size %d max_ret_size %d too big\n", __func__, size, max_ret_size); 95 | return -1; 96 | } 97 | //return syscall(__NR_lite_send_reply_imm, target_node, size*IMM_MAX_PORT+port, addr, ret_addr, 0, max_ret_size*IMM_MAX_PRIORITY+USERSPACE_HIGH_PRIORITY); 98 | return syscall(__NR_lite_send_reply_imm, target_node, (size<= LIMITATION || max_ret_size >= LIMITATION) 104 | { 105 | printf("%s: size %d max_ret_size %d too big\n", __func__, size, max_ret_size); 106 | return -1; 107 | } 108 | //return syscall(__NR_lite_send_reply_imm, target_node, size*IMM_MAX_PORT+port, addr, ret_addr, 0, max_ret_size*IMM_MAX_PRIORITY+USERSPACE_LOW_PRIORITY); 109 | return syscall(__NR_lite_send_reply_imm, target_node, (size<= LIMITATION || max_ret_size >= LIMITATION) 117 | { 118 | printf("%s: size %d max_ret_size %d too big\n", __func__, size, max_ret_size); 119 | return -1; 120 | } 121 | //ret = syscall(__NR_lite_send_reply_imm, target_node, size*IMM_MAX_PORT+port, addr, ret_addr, ret_length, max_ret_size*IMM_MAX_PRIORITY+NULL_PRIORITY); 122 | ret = syscall(__NR_lite_send_reply_imm, target_node, (size<= LIMITATION) 136 | { 137 | printf("%s: size %d too big\n", __func__, size); 138 | return -1; 139 | } 140 | return syscall(__NR_lite_reply_message, addr, size, descriptor, NULL_PRIORITY); 141 | } 142 | inline int userspace_liteapi_reply_message_high(void *addr, int size, uintptr_t descriptor) 143 | { 144 | if(size >= LIMITATION) 145 | { 146 | printf("%s: size %d too big\n", __func__, size); 147 | return -1; 148 | } 149 | return syscall(__NR_lite_reply_message, addr, size, descriptor, USERSPACE_HIGH_PRIORITY); 150 | } 151 | inline int userspace_liteapi_reply_message_low(void *addr, int size, uintptr_t descriptor) 152 | { 153 | if(size >= LIMITATION) 154 | { 155 | printf("%s: size %d too big\n", __func__, size); 156 | return -1; 157 | } 158 | return syscall(__NR_lite_reply_message, addr, size, descriptor, USERSPACE_LOW_PRIORITY); 159 | } 160 | 161 | inline int userspace_liteapi_query_port(int target_node, int designed_port) 162 | { 163 | return syscall(__NR_lite_query_port, target_node, designed_port, 0); 164 | } 165 | 166 | inline int userspace_liteapi_wrap_alloc(void *data, int size, uint64_t identifier, int password) 167 | { 168 | return syscall(__NR_lite_wrap_alloc, data, size, identifier, password); 169 | } 170 | 171 | inline int userspace_liteapi_ask_lmr(int memory_node, uint64_t identifier, uint64_t permission, int password) 172 | { 173 | return syscall(__NR_lite_ask_lmr, memory_node, identifier, permission, password); 174 | } 175 | 176 | inline int userspace_liteapi_get_node_id(void) 177 | { 178 | return syscall(__NR_lite_get_node_id); 179 | } 180 | 181 | inline int userspace_liteapi_get_total_node(void) 182 | { 183 | return syscall(__NR_lite_get_total_node); 184 | } 185 | 186 | inline int userspace_liteapi_rdma_write(unsigned lite_handler, void *local_addr, unsigned int size, unsigned int offset, int password) 187 | { 188 | return syscall(__NR_lite_rdma_synwrite, lite_handler, local_addr, size, NULL_PRIORITY, offset, password); 189 | } 190 | 191 | inline int userspace_liteapi_rdma_write_high(unsigned lite_handler, void *local_addr, unsigned int size, unsigned int offset, int password) 192 | { 193 | return syscall(__NR_lite_rdma_synwrite, lite_handler, local_addr, size, USERSPACE_HIGH_PRIORITY, offset, password); 194 | } 195 | 196 | inline int userspace_liteapi_rdma_write_low(unsigned lite_handler, void *local_addr, unsigned int size, unsigned int offset, int password) 197 | { 198 | return syscall(__NR_lite_rdma_synwrite, lite_handler, local_addr, size, USERSPACE_LOW_PRIORITY, offset, password); 199 | } 200 | 201 | inline int userspace_liteapi_rdma_read(unsigned lite_handler, void *local_addr, unsigned int size, unsigned int offset, int password) 202 | { 203 | return syscall(__NR_lite_rdma_read, lite_handler, local_addr, size, NULL_PRIORITY, offset, password); 204 | } 205 | 206 | inline int userspace_liteapi_rdma_read_high(unsigned lite_handler, void *local_addr, unsigned int size, unsigned int offset, int password) 207 | { 208 | return syscall(__NR_lite_rdma_read, lite_handler, local_addr, size, USERSPACE_HIGH_PRIORITY, offset, password); 209 | } 210 | 211 | inline int userspace_liteapi_rdma_read_low(unsigned lite_handler, void *local_addr, unsigned int size, unsigned int offset, int password) 212 | { 213 | return syscall(__NR_lite_rdma_read, lite_handler, local_addr, size, USERSPACE_LOW_PRIORITY, offset, password); 214 | } 215 | 216 | void* userspace_liteapi_alloc_memory(unsigned long size) 217 | { 218 | unsigned long roundup_size = (((1<>PAGE_SHIFT)<=0) 240 | return ret; 241 | else 242 | printf("create lock error\n"); 243 | return 0; 244 | } 245 | 246 | inline int userspace_liteapi_ask_lock(int target_node, int target_idx, remote_spinlock_t *input) 247 | { 248 | int ret; 249 | ret = syscall(__NR_lite_ask_lock, target_node, target_idx, input); 250 | if(ret>=0) 251 | return ret; 252 | else 253 | printf("ask lock error\n"); 254 | return 0; 255 | } 256 | 257 | inline int userspace_liteapi_lock(remote_spinlock_t *input) 258 | { 259 | return syscall(__NR_lite_lock, input); 260 | } 261 | 262 | inline int userspace_liteapi_unlock(remote_spinlock_t *input) 263 | { 264 | return syscall(__NR_lite_unlock, input); 265 | } 266 | 267 | inline int userspace_liteapi_remote_memset(unsigned lite_handler, int offset, int size) 268 | { 269 | return syscall(__NR_lite_remote_memset, lite_handler, offset, size); 270 | } 271 | 272 | inline int userspace_liteapi_add_ask_mr_table(uint64_t identifier, uint64_t lmr, uint64_t permission, int password) 273 | { 274 | return syscall(__NR_lite_add_ask_mr_table, identifier, lmr, permission, password); 275 | } 276 | 277 | inline int userspace_liteapi_compare_swp(unsigned long lite_handler, void *local_addr, unsigned long long guess_value, unsigned long long set_value) 278 | { 279 | return syscall(__NR_lite_compare_swp, lite_handler, local_addr, guess_value, set_value, 0); 280 | } 281 | 282 | inline int userspace_liteapi_fetch_add(unsigned long lite_handler, void *local_addr, unsigned long long input_value) 283 | { 284 | return syscall(__NR_lite_fetch_add, lite_handler, local_addr, input_value, 0); 285 | } 286 | 287 | //int userspace_liteapi_reply_and_receive_message(void *addr, int size, uintptr_t descriptor, unsigned int port, void *ret_addr, int receive_size, uintptr_t *receive_descriptor, int block_call) 288 | inline int userspace_liteapi_reply_and_receive_message(void *addr, int size, uintptr_t descriptor, unsigned int port, void *ret_addr, int receive_size, uintptr_t *receive_descriptor) 289 | { 290 | return syscall(__NR_lite_reply_and_receive_message, addr, size*IMM_MAX_PORT+port, descriptor, ret_addr, receive_size, receive_descriptor); 291 | } 292 | 293 | inline int userspace_syscall_test(void) 294 | { 295 | return syscall(__NR_lite_umap_testsyscall, 0); 296 | } 297 | 298 | inline int userspace_liteapi_join(char *input_str, int eth_port, int ib_port) 299 | { 300 | char ipstr[32]; 301 | memset(ipstr, 0, 32); 302 | strcpy(ipstr, input_str); 303 | return syscall(__NR_lite_join, ipstr, eth_port, ib_port); 304 | } 305 | 306 | int stick_this_thread_to_core(int core_id) 307 | { 308 | int num_cores = sysconf(_SC_NPROCESSORS_ONLN); 309 | if (core_id < 0 || core_id >= num_cores) 310 | return EINVAL; 311 | 312 | cpu_set_t cpuset; 313 | CPU_ZERO(&cpuset); 314 | CPU_SET(core_id, &cpuset); 315 | 316 | pthread_t current_thread = pthread_self(); 317 | return pthread_setaffinity_np(current_thread, sizeof(cpu_set_t), &cpuset); 318 | } 319 | 320 | -------------------------------------------------------------------------------- /lite-userspace/lite-lib.h: -------------------------------------------------------------------------------- 1 | #ifndef FIT_TEST 2 | #define FIT_TEST 3 | 4 | #define _GNU_SOURCE 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #define max(x, y) \ 25 | ({ \ 26 | x > y ? x : y; \ 27 | }) 28 | 29 | #define IMM_SEND_ONLY_FLAG 0xffffffffffffffff 30 | 31 | struct lmr_info { 32 | //struct ib_device *context; 33 | //struct ib_pd *pd; 34 | void *addr; 35 | size_t length; 36 | //uint32_t handle; 37 | uint32_t lkey; 38 | uint32_t rkey; 39 | uint32_t node_id; 40 | }; 41 | struct lite_lock_form{ 42 | int lock_num; 43 | struct lmr_info lock_mr; 44 | uint64_t ticket_num; 45 | }; 46 | typedef struct lite_lock_form remote_spinlock_t; 47 | 48 | struct reply_struct{ 49 | void *addr; 50 | int size; 51 | uintptr_t descriptor; 52 | }; 53 | 54 | struct receive_struct{ 55 | unsigned int designed_port; 56 | void *ret_addr; 57 | int receive_size; 58 | void *descriptor; 59 | int block_call; 60 | }; 61 | 62 | #define __NR_lite_remote_memset 319 63 | #define __NR_lite_fetch_add 320 64 | #define __NR_lite_rdma_synwrite 321 65 | #define __NR_lite_rdma_read 322 66 | #define __NR_lite_ask_lmr 323 67 | #define __NR_lite_dist_barrier 327 68 | #define __NR_lite_add_ask_mr_table 328 69 | #define __NR_lite_compare_swp 329 70 | #define __NR_lite_alloc_remote 330 71 | 72 | #define __NR_lite_register_application 332 73 | #define __NR_lite_receive_message 334 74 | #define __NR_lite_send_reply_imm 335 75 | #define __NR_lite_reply_message 336 76 | #define __NR_lite_get_node_id 337 77 | #define __NR_lite_query_port 338 78 | #define __NR_lite_alloc_memory 339 79 | 80 | #define __NR_lite_umap_testsyscall 331 81 | 82 | #define __NR_lite_wrap_alloc 340 83 | 84 | #define __NR_lite_create_lock 341 85 | #define __NR_lite_ask_lock 342 86 | #define __NR_lite_lock 343 87 | #define __NR_lite_unlock 344 88 | #define __NR_lite_get_total_node 345 89 | #define __NR_lite_reply_and_receive_message 346 90 | #define __NR_lite_join 347 91 | 92 | #define __ACTIVE_NODES 3 93 | #define LIMITATION 1024*1024*4 94 | #define PAGE_SHIFT 12 95 | 96 | #define IMM_MAX_PORT 64 97 | #define IMM_MAX_PORT_BIT 6 98 | #define IMM_MAX_PRIORITY 64 99 | #define IMM_MAX_PRIORITY_BIT 6 100 | 101 | #define SEND_REPLY_WAIT -101 102 | 103 | #define CHECK_LENGTH 100000 104 | 105 | #define USERSPACE_HIGH_PRIORITY 16 106 | #define USERSPACE_LOW_PRIORITY 17 107 | #define NULL_PRIORITY 0 108 | 109 | enum permission_mode{ 110 | MR_READ_FLAG=0x01, 111 | MR_WRITE_FLAG=0x02, 112 | MR_SHARE_FLAG=0x04, 113 | MR_ADMIN_FLAG=0x08, 114 | MR_ATOMIC_FLAG=0x10, 115 | MR_ASK_SUCCESS=0, 116 | MR_ASK_REFUSE=1, 117 | MR_ASK_UNPERMITTED=2, 118 | MR_ASK_HANDLER_ERROR=3, 119 | MR_ASK_UNKNOWN=4 120 | }; 121 | #define BLOCK_CALL 1 122 | inline int userspace_liteapi_get_node_id(void); 123 | inline int userspace_liteapi_get_total_node(void); 124 | inline int userspace_liteapi_dist_barrier(unsigned int num); 125 | inline int userspace_liteapi_register_application(unsigned int destined_port, unsigned int max_size_per_message, unsigned int max_user_per_node, char *name, uint64_t name_len); 126 | inline int userspace_liteapi_receive_message(unsigned int port, void *ret_addr, int receive_size, uintptr_t *descriptor, int block_call); 127 | inline int userspace_liteapi_receive_message_high(unsigned int port, void *ret_addr, int receive_size, uintptr_t *descriptor, int block_call); 128 | inline int userspace_liteapi_receive_message_low(unsigned int port, void *ret_addr, int receive_size, uintptr_t *descriptor, int block_call); 129 | inline int userspace_liteapi_receive_message_fast(unsigned int port, void *ret_addr, int receive_size, uintptr_t *descriptor, int *ret_length, int block_call); 130 | inline double userspace_liteapi_receive_message_fast_record(unsigned int port, void *ret_addr, int receive_size, uintptr_t *descriptor, int *ret_length, int block_call); 131 | inline int userspace_liteapi_send_reply_imm(int target_node, unsigned int port, void *addr, int size, void *ret_addr, int max_ret_size); 132 | inline int userspace_liteapi_send_reply_imm_high(int target_node, unsigned int port, void *addr, int size, void *ret_addr, int max_ret_size); 133 | inline int userspace_liteapi_send_reply_imm_low(int target_node, unsigned int port, void *addr, int size, void *ret_addr, int max_ret_size); 134 | inline int userspace_liteapi_send_reply_imm_fast(int target_node, unsigned int port, void *addr, int size, void *ret_addr, int *ret_length, int max_ret_size); 135 | inline int userspace_liteapi_reply_message(void *addr, int size, uintptr_t descriptor); 136 | inline int userspace_liteapi_reply_message_high(void *addr, int size, uintptr_t descriptor); 137 | inline int userspace_liteapi_reply_message_low(void *addr, int size, uintptr_t descriptor); 138 | inline int userspace_liteapi_reply_and_receive_message(void *addr, int size, uintptr_t descriptor, unsigned int port, void *ret_addr, int receive_size, uintptr_t *receive_descriptor); 139 | inline int userspace_liteapi_query_port(int target_node, int designed_port); 140 | inline int userspace_liteapi_wrap_alloc(void *data, int size, uint64_t identifier, int password); 141 | inline int userspace_liteapi_ask_lmr(int memory_node, uint64_t identifier, uint64_t permission, int password); 142 | inline int userspace_liteapi_rdma_read(unsigned lite_handler, void *local_addr, unsigned int size, unsigned int offset, int password); 143 | inline int userspace_liteapi_rdma_read_high(unsigned lite_handler, void *local_addr, unsigned int size, unsigned int offset, int password); 144 | inline int userspace_liteapi_rdma_read_low(unsigned lite_handler, void *local_addr, unsigned int size, unsigned int offset, int password); 145 | inline int userspace_liteapi_rdma_write(unsigned lite_handler, void *local_addr, unsigned int size, unsigned int offset, int password); 146 | inline int userspace_liteapi_rdma_write_high(unsigned lite_handler, void *local_addr, unsigned int size, unsigned int offset, int password); 147 | inline int userspace_liteapi_rdma_write_low(unsigned lite_handler, void *local_addr, unsigned int size, unsigned int offset, int password); 148 | void* userspace_liteapi_alloc_memory(unsigned long size); 149 | inline int userspace_liteapi_create_lock(int target_node, remote_spinlock_t *input); 150 | inline int userspace_liteapi_ask_lock(int target_node, int target_idx, remote_spinlock_t *input); 151 | inline int userspace_liteapi_lock(remote_spinlock_t *input); 152 | inline int userspace_liteapi_unlock(remote_spinlock_t *input); 153 | inline int userspace_liteapi_memset(unsigned lite_handler, int offset, int size); 154 | inline int userspace_liteapi_alloc_remote_mem(unsigned int node_id, unsigned int size, bool atomic_flag, int password); 155 | inline int userspace_liteapi_compare_swp(unsigned long lite_handler, void *local_addr, unsigned long long guess_value, unsigned long long set_value); 156 | inline int userspace_liteapi_add_ask_mr_table(uint64_t identifier, uint64_t lmr, uint64_t permission, int password); 157 | inline int userspace_liteapi_remote_memset(unsigned lite_handler, int offset, int size); 158 | inline int userspace_liteapi_fetch_add(unsigned long lite_handler, void *local_addr, unsigned long long input_value); 159 | inline int userspace_syscall_test(void); 160 | inline int userspace_liteapi_join(char *input_str, int eth_port, int ib_port); 161 | int stick_this_thread_to_core(int core_id); 162 | inline int userspace_liteapi_send(int target_node, unsigned int port, void *addr, int size); 163 | #endif 164 | -------------------------------------------------------------------------------- /lite-userspace/lite_join.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "lite-lib.h" 16 | 17 | int main(int argc, char *argv[]) 18 | { 19 | printf("Ready to join a LITE cluster\n"); 20 | userspace_liteapi_join("192.168.1.1", 18500, 1); 21 | printf("after join cluster as %d\n", userspace_liteapi_get_node_id()); 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /lite-userspace/lite_rpc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include "lite-lib.h" 17 | 18 | 19 | 20 | const int run_times = 50000; 21 | 22 | int testsize[7]={8,8,64,512,1024,2048,4096}; 23 | 24 | int test_MB_size; 25 | int write_mode = 0; 26 | int thread_node; 27 | int thread_send_num=1; 28 | int thread_recv_num=1; 29 | pthread_mutex_t count_mutex; 30 | int count = 0; 31 | int go = 0; 32 | pthread_mutex_t end_count_mutex; 33 | int end_count = 0; 34 | 35 | 36 | void *thread_send_lat(void *tmp) 37 | { 38 | int ret; 39 | int remote_node = thread_node; 40 | int port = *(int *)tmp; 41 | char *read = memalign(sysconf(_SC_PAGESIZE),4096*2); 42 | char *write = memalign(sysconf(_SC_PAGESIZE),4096*2); 43 | int ret_length; 44 | int i,j; 45 | struct timespec start, end; 46 | double total_lat; 47 | double *record=calloc(run_times, sizeof(double)); 48 | memset(write, 0x36, 4096); 49 | memset(read, 0, 4096); 50 | mlock(read, 4096); 51 | mlock(write, 4096); 52 | mlock(&ret_length, sizeof(int)); 53 | for(j=0;j<7;j++) 54 | { 55 | memset(read, 0, 4096); 56 | pthread_mutex_lock(&count_mutex); 57 | count++; 58 | pthread_mutex_unlock(&count_mutex); 59 | while(count<(thread_send_num+1)*(j+1)); 60 | for(i=0;i 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "lite-lib.h" 16 | 17 | 18 | int write_mode = 0; 19 | 20 | int run_times = 10000; 21 | 22 | 23 | int init_log(int remote_node) 24 | { 25 | uint64_t xact_ID; 26 | uint64_t test_key; 27 | int i, j; 28 | int *random_idx; 29 | struct timespec start, end; 30 | double *total_lat; 31 | char *write = memalign(sysconf(_SC_PAGESIZE),1024*1024*4); 32 | memset(write, 0x36, 1024*64); 33 | //=========================RDMA syscall experiments======================= 34 | int n=run_times; 35 | int testsize[12]={8,8,64,128,512,1024,1024*2,1024*4,1024*8, 1024*16, 1024*32, 1024*64}; 36 | int password=100; 37 | 38 | test_key = userspace_liteapi_alloc_remote_mem(remote_node, 1024*1024*4, 0, password); 39 | printf("finish allocation get key %d\n", test_key); 40 | for(j=0;j<12;j++) 41 | { 42 | for(i=0;ianon_vma, pmd); 105 | return 0; 106 | } 107 | +EXPORT_SYMBOL(__pte_alloc); 108 | 109 | int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) 110 | { 111 | @@ -3877,6 +3878,7 @@ 112 | spin_unlock(&mm->page_table_lock); 113 | return 0; 114 | } 115 | +EXPORT_SYMBOL(__pud_alloc); 116 | #endif /* __PAGETABLE_PUD_FOLDED */ 117 | 118 | #ifndef __PAGETABLE_PMD_FOLDED 119 | @@ -3907,6 +3909,7 @@ 120 | spin_unlock(&mm->page_table_lock); 121 | return 0; 122 | } 123 | +EXPORT_SYMBOL(__pmd_alloc); 124 | #endif /* __PAGETABLE_PMD_FOLDED */ 125 | 126 | #if !defined(__HAVE_ARCH_GATE_AREA) 127 | -------------------------------------------------------------------------------- /lite_kernel_patch_3.11.1: -------------------------------------------------------------------------------- 1 | diff -uNr linux-3.11.1/arch/x86/syscalls/syscall_64.tbl lite-kernel/arch/x86/syscalls/syscall_64.tbl 2 | --- linux-3.11.1/arch/x86/syscalls/syscall_64.tbl 2017-10-24 17:27:28.402482943 -0400 3 | +++ lite-kernel/arch/x86/syscalls/syscall_64.tbl 2017-10-24 17:28:23.245636614 -0400 4 | @@ -321,6 +321,38 @@ 5 | 312 common kcmp sys_kcmp 6 | 313 common finit_module sys_finit_module 7 | 8 | +319 common lite_remote_memset sys_lite_remote_memset 9 | +320 common lite_fetch_add sys_lite_fetch_add 10 | +321 common lite_rdma_synwrite sys_lite_rdma_synwrite 11 | +322 common lite_rdma_read sys_lite_rdma_read 12 | +323 common lite_ask_lmr sys_lite_ask_lmr 13 | + 14 | + 15 | +326 common lite_rdma_asywrite sys_lite_rdma_asywrite 16 | +327 common lite_dist_barrier sys_lite_dist_barrier 17 | +328 common lite_add_ask_mr_table sys_lite_add_ask_mr_table 18 | +329 common lite_compare_swp sys_lite_compare_swp 19 | + 20 | +330 common lite_alloc_remote sys_lite_alloc_remote 21 | +331 common lite_umap_lmr sys_lite_umap_lmr 22 | + 23 | +332 common lite_register_application sys_lite_register_application 24 | +333 common lite_unregister_application sys_lite_unregister_application 25 | +334 common lite_receive_message sys_lite_receive_message 26 | +335 common lite_send_reply_imm sys_lite_send_reply_imm 27 | +336 common lite_reply_message sys_lite_reply_message 28 | +337 common lite_get_node_id sys_lite_get_node_id 29 | +338 common lite_query_port sys_lite_query_port 30 | +339 common lite_alloc_continuous_memory sys_lite_alloc_continuous_memory 31 | +340 common lite_wrap_alloc_for_remote_access sys_lite_wrap_alloc_for_remote_access 32 | +341 common lite_create_lock sys_lite_create_lock 33 | +342 common lite_ask_lock sys_lite_ask_lock 34 | +343 common lite_lock sys_lite_lock 35 | +344 common lite_unlock sys_lite_unlock 36 | +345 common lite_get_total_node sys_lite_get_total_node 37 | +346 common lite_reply_and_receive_message sys_lite_reply_and_receive_message 38 | +347 common lite_join sys_lite_join 39 | + 40 | # 41 | # x32-specific system call numbers start at 512 to avoid cache impact 42 | # for native 64-bit operation. 43 | diff -uNr linux-3.11.1/include/linux/syscalls.h lite-kernel/include/linux/syscalls.h 44 | --- linux-3.11.1/include/linux/syscalls.h 2017-10-24 17:27:28.612483534 -0400 45 | +++ lite-kernel/include/linux/syscalls.h 2017-10-24 17:28:38.863682487 -0400 46 | @@ -846,4 +846,43 @@ 47 | asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, 48 | unsigned long idx1, unsigned long idx2); 49 | asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); 50 | + 51 | + 52 | +/* Fit design */ 53 | + 54 | +asmlinkage long sys_lite_remote_memset(unsigned long, int, int); 55 | +asmlinkage long sys_lite_fetch_add(unsigned long, void __user *, 56 | + unsigned long long, unsigned int); 57 | +asmlinkage long sys_lite_rdma_synwrite(unsigned long, void __user*, 58 | + unsigned int, unsigned int, unsigned int, int); 59 | +asmlinkage long sys_lite_rdma_asywrite(unsigned long, void __user*, 60 | + unsigned int, unsigned int, unsigned int); 61 | +asmlinkage long sys_lite_rdma_read(unsigned long, void __user*, 62 | + unsigned int, unsigned int, unsigned int, int); 63 | +asmlinkage long sys_lite_ask_lmr(int, uint64_t, uint64_t, int); 64 | + 65 | +asmlinkage long sys_lite_dist_barrier(unsigned int); 66 | + 67 | +asmlinkage long sys_lite_add_ask_mr_table(uint64_t, uint64_t, uint64_t, int); 68 | +asmlinkage long sys_lite_compare_swp(unsigned long, void*, unsigned long long, unsigned long long, unsigned int); 69 | +asmlinkage long sys_lite_alloc_remote(unsigned int, unsigned int, unsigned int, int); 70 | +asmlinkage long sys_lite_umap_lmr(unsigned long); 71 | + 72 | +asmlinkage long sys_lite_register_application(unsigned int, unsigned int, unsigned int, void*, unsigned int); 73 | +asmlinkage long sys_lite_unregister_application(unsigned int); 74 | +asmlinkage long sys_lite_receive_message(int, void*, void *, void*, int, unsigned int); 75 | +asmlinkage long sys_lite_send_reply_imm(int, int, void*, void*, void*, unsigned int); 76 | +asmlinkage long sys_lite_reply_message(void*, int, unsigned long, unsigned int); 77 | +asmlinkage long sys_lite_get_node_id(void); 78 | +asmlinkage long sys_lite_get_total_node(void); 79 | +asmlinkage long sys_lite_query_port(int, int, int); 80 | +asmlinkage long sys_lite_alloc_continuous_memory(unsigned long long, unsigned long); 81 | +asmlinkage long sys_lite_wrap_alloc_for_remote_access(void*, unsigned int, uint64_t, int); 82 | +asmlinkage long sys_lite_create_lock(int, void*); 83 | +asmlinkage long sys_lite_ask_lock(int, int, void*); 84 | +asmlinkage long sys_lite_lock(void*); 85 | +asmlinkage long sys_lite_unlock(void*); 86 | +asmlinkage long sys_lite_reply_and_receive_message(void *, int, unsigned long, void *, int, void *); 87 | +asmlinkage long sys_lite_join(void *, int, int); 88 | + 89 | #endif 90 | diff -uNr linux-3.11.1/Makefile lite-kernel/Makefile 91 | --- linux-3.11.1/Makefile 2017-10-24 17:27:28.013481847 -0400 92 | +++ lite-kernel/Makefile 2017-10-24 17:27:56.429561876 -0400 93 | @@ -733,7 +733,7 @@ 94 | 95 | 96 | ifeq ($(KBUILD_EXTMOD),) 97 | -core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ 98 | +core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ ../lite-syscall/ 99 | 100 | vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ 101 | $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ 102 | diff -uNr linux-3.11.1/mm/memory.c lite-kernel/mm/memory.c 103 | --- linux-3.11.1/mm/memory.c 2017-10-24 17:27:28.534483315 -0400 104 | +++ lite-kernel/mm/memory.c 2017-10-24 17:28:09.398598400 -0400 105 | @@ -611,6 +611,7 @@ 106 | wait_split_huge_page(vma->anon_vma, pmd); 107 | return 0; 108 | } 109 | +EXPORT_SYMBOL(__pte_alloc); 110 | 111 | int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) 112 | { 113 | @@ -3882,6 +3883,7 @@ 114 | spin_unlock(&mm->page_table_lock); 115 | return 0; 116 | } 117 | +EXPORT_SYMBOL(__pud_alloc); 118 | #endif /* __PAGETABLE_PUD_FOLDED */ 119 | 120 | #ifndef __PAGETABLE_PMD_FOLDED 121 | @@ -3912,6 +3914,7 @@ 122 | spin_unlock(&mm->page_table_lock); 123 | return 0; 124 | } 125 | +EXPORT_SYMBOL(__pmd_alloc); 126 | #endif /* __PAGETABLE_PMD_FOLDED */ 127 | 128 | #if !defined(__HAVE_ARCH_GATE_AREA) 129 | --------------------------------------------------------------------------------