├── Makefile ├── context.c ├── direct.c └── switch.c /Makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | CFLAGS=-std=gnu99 3 | LDFLAGS=-lrt 4 | EXECS=context direct switch 5 | 6 | all: $(EXECS) 7 | ./switch 8 | ./direct 9 | ./context 10 | 11 | clean: 12 | rm -f $(EXECS) 13 | 14 | .PHONY: all clean 15 | -------------------------------------------------------------------------------- /context.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define LEN (1024 * 1024 * 8) 7 | #define LOOP 50 8 | 9 | struct timespec start; 10 | 11 | static int vec[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 12 | 13 | static void loop(char *inst) { 14 | void *table[] = { &&L0, &&L1, &&L2, &&L3, &&L4, &&L5, &&L6, &&L7, &&L8, &&L9, &&L10, &&L11, &&L12, &&L13, &&L14, &&L15 }; 15 | void *sp; 16 | 17 | // Allocate a chunk of executable memory 18 | void *machinecode = mmap(NULL, 12 * LEN, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 19 | if (mprotect(machinecode, 12 * LEN, PROT_WRITE | PROT_READ | PROT_EXEC)) 20 | perror("mprotect"); 21 | 22 | // Construct machine code to jump to the middle of this function using CALL 23 | // instruction. Because x86-64 does not have a CALL instruction with 8-byte 24 | // immediate operand, the destination address is first set to %RAX and then 25 | // CALL is called with %RAX. 26 | unsigned char *p = machinecode; 27 | for (int i = 0; i < LEN; i++) { 28 | // 0x48b8 is MOV <8-byte immediate operand>, %rax 29 | *(p++) = 0x48; *(p++) = 0xb8; 30 | *((long*)p) = (long)table[inst[i]]; 31 | p += 8; 32 | // 0xffd0 is CALL %rax 33 | *(p++) = 0xff; *(p++) = 0xd0; 34 | } 35 | 36 | clock_gettime(CLOCK_REALTIME, &start); 37 | 38 | // CALL instruction put a return address to the machine stack. This may cause 39 | // SEGV because C compiler does not expect the stack will be manipulated by 40 | // someone else. To maintain the stack the same, we pop the return address 41 | // immediately after coming back to this function with CALL, so that the depth 42 | // of the stack remains the same while compiler-generated code is being 43 | // executed. 44 | #define ENTER \ 45 | asm volatile ("pop %0;" : "=r"(sp)) 46 | 47 | // Restores the previous return address to the stack and jump to that address, 48 | // which effectively jumping to the next CALL instruction in machinecode. 49 | #define RET \ 50 | asm volatile ("push %0; ret;" : : "r"(sp) : "%rax") 51 | 52 | // Jumps to the begining of constructed machine code. 53 | goto *machinecode; 54 | 55 | L0: ENTER; return; 56 | L1: ENTER; for (int i = 0; i < LOOP; i++) vec[1]++; RET; 57 | L2: ENTER; for (int i = 0; i < LOOP; i++) vec[2]++; RET; 58 | L3: ENTER; for (int i = 0; i < LOOP; i++) vec[3]++; RET; 59 | L4: ENTER; for (int i = 0; i < LOOP; i++) vec[4]++; RET; 60 | L5: ENTER; for (int i = 0; i < LOOP; i++) vec[5]++; RET; 61 | L6: ENTER; for (int i = 0; i < LOOP; i++) vec[6]++; RET; 62 | L7: ENTER; for (int i = 0; i < LOOP; i++) vec[7]++; RET; 63 | L8: ENTER; for (int i = 0; i < LOOP; i++) vec[8]++; RET; 64 | L9: ENTER; for (int i = 0; i < LOOP; i++) vec[9]++; RET; 65 | L10: ENTER; for (int i = 0; i < LOOP; i++) vec[10]++; RET; 66 | L11: ENTER; for (int i = 0; i < LOOP; i++) vec[11]++; RET; 67 | L12: ENTER; for (int i = 0; i < LOOP; i++) vec[12]++; RET; 68 | L13: ENTER; for (int i = 0; i < LOOP; i++) vec[13]++; RET; 69 | L14: ENTER; for (int i = 0; i < LOOP; i++) vec[14]++; RET; 70 | L15: ENTER; for (int i = 0; i < LOOP; i++) vec[15]++; RET; 71 | } 72 | 73 | static void printvec() { 74 | for (int i = 0; i < 16; i++) 75 | printf("%d ", vec[i]); 76 | printf("\n"); 77 | } 78 | 79 | static void printtime() { 80 | struct timespec end; 81 | clock_gettime(CLOCK_REALTIME, &end); 82 | long t1 = start.tv_sec * 1000 * 1000 * 1000 + start.tv_nsec; 83 | long t2 = end.tv_sec * 1000 * 1000 * 1000 + end.tv_nsec; 84 | printf("time: %f\n", (t2 - t1) / 1e9); 85 | } 86 | 87 | int main(int argc, char **argv) { 88 | char *inst = malloc(LEN); 89 | for (int i = 0; i < LEN - 1;) { 90 | int op = rand() & 15; 91 | if (op == 0) continue; 92 | inst[i++] = op; 93 | } 94 | inst[LEN - 1] = 0; 95 | loop(inst); 96 | printvec(); 97 | printtime(); 98 | return 0; 99 | } 100 | -------------------------------------------------------------------------------- /direct.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define LEN (1024 * 1024 * 8) 6 | #define LOOP 50 7 | 8 | struct timespec start; 9 | 10 | static int vec[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 11 | 12 | static void loop(char *inst) { 13 | void *table[] = { &&L0, &&L1, &&L2, &&L3, &&L4, &&L5, &&L6, &&L7, &&L8, &&L9, &&L10, &&L11, &&L12, &&L13, &&L14, &&L15 }; 14 | 15 | void **addrs = malloc(LEN * sizeof(void *)); 16 | for (int i = 0; i < LEN; i++) 17 | addrs[i] = table[inst[i]]; 18 | 19 | clock_gettime(CLOCK_REALTIME, &start); 20 | #define NEXT goto *(*addrs++) 21 | NEXT; 22 | L0: return; 23 | L1: for (int i = 0; i < LOOP; i++) vec[1]++; NEXT; 24 | L2: for (int i = 0; i < LOOP; i++) vec[2]++; NEXT; 25 | L3: for (int i = 0; i < LOOP; i++) vec[3]++; NEXT; 26 | L4: for (int i = 0; i < LOOP; i++) vec[4]++; NEXT; 27 | L5: for (int i = 0; i < LOOP; i++) vec[5]++; NEXT; 28 | L6: for (int i = 0; i < LOOP; i++) vec[6]++; NEXT; 29 | L7: for (int i = 0; i < LOOP; i++) vec[7]++; NEXT; 30 | L8: for (int i = 0; i < LOOP; i++) vec[8]++; NEXT; 31 | L9: for (int i = 0; i < LOOP; i++) vec[9]++; NEXT; 32 | L10: for (int i = 0; i < LOOP; i++) vec[10]++; NEXT; 33 | L11: for (int i = 0; i < LOOP; i++) vec[11]++; NEXT; 34 | L12: for (int i = 0; i < LOOP; i++) vec[12]++; NEXT; 35 | L13: for (int i = 0; i < LOOP; i++) vec[13]++; NEXT; 36 | L14: for (int i = 0; i < LOOP; i++) vec[14]++; NEXT; 37 | L15: for (int i = 0; i < LOOP; i++) vec[15]++; NEXT; 38 | } 39 | 40 | void printvec() { 41 | for (int i = 0; i < 16; i++) 42 | printf("%d ", vec[i]); 43 | printf("\n"); 44 | } 45 | 46 | void printtime() { 47 | struct timespec end; 48 | clock_gettime(CLOCK_REALTIME, &end); 49 | long t1 = start.tv_sec * 1000 * 1000 * 1000 + start.tv_nsec; 50 | long t2 = end.tv_sec * 1000 * 1000 * 1000 + end.tv_nsec; 51 | printf("time: %f\n", (t2 - t1) / 1e9); 52 | } 53 | 54 | int main(int argc, char **argv) { 55 | char *inst = malloc(LEN); 56 | for (int i = 0; i < LEN - 1;) { 57 | int op = rand() & 15; 58 | if (op == 0) continue; 59 | inst[i++] = op; 60 | } 61 | inst[LEN - 1] = 0; 62 | loop(inst); 63 | printvec(); 64 | printtime(); 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /switch.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define LEN (1024 * 1024 * 8) 6 | #define LOOP 50 7 | 8 | struct timespec start; 9 | 10 | static int vec[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 11 | 12 | static void loop(char *inst) { 13 | clock_gettime(CLOCK_REALTIME, &start); 14 | for (;;) { 15 | switch (*inst) { 16 | case 0: return; 17 | case 1: for (int i = 0; i < LOOP; i++) vec[1]++; break; 18 | case 2: for (int i = 0; i < LOOP; i++) vec[2]++; break; 19 | case 3: for (int i = 0; i < LOOP; i++) vec[3]++; break; 20 | case 4: for (int i = 0; i < LOOP; i++) vec[4]++; break; 21 | case 5: for (int i = 0; i < LOOP; i++) vec[5]++; break; 22 | case 6: for (int i = 0; i < LOOP; i++) vec[6]++; break; 23 | case 7: for (int i = 0; i < LOOP; i++) vec[7]++; break; 24 | case 8: for (int i = 0; i < LOOP; i++) vec[8]++; break; 25 | case 9: for (int i = 0; i < LOOP; i++) vec[9]++; break; 26 | case 10: for (int i = 0; i < LOOP; i++) vec[10]++; break; 27 | case 11: for (int i = 0; i < LOOP; i++) vec[11]++; break; 28 | case 12: for (int i = 0; i < LOOP; i++) vec[12]++; break; 29 | case 13: for (int i = 0; i < LOOP; i++) vec[13]++; break; 30 | case 14: for (int i = 0; i < LOOP; i++) vec[14]++; break; 31 | case 15: for (int i = 0; i < LOOP; i++) vec[15]++; break; 32 | } 33 | inst++; 34 | } 35 | } 36 | 37 | static void printvec() { 38 | for (int i = 0; i < 16; i++) 39 | printf("%d ", vec[i]); 40 | printf("\n"); 41 | } 42 | 43 | static void printtime() { 44 | struct timespec end; 45 | clock_gettime(CLOCK_REALTIME, &end); 46 | long t1 = start.tv_sec * 1000 * 1000 * 1000 + start.tv_nsec; 47 | long t2 = end.tv_sec * 1000 * 1000 * 1000 + end.tv_nsec; 48 | printf("time: %f\n", (t2 - t1) / 1e9); 49 | } 50 | 51 | int main(int argc, char **argv) { 52 | // create random "VM" instructions 53 | char *inst = malloc(LEN); 54 | for (int i = 0; i < LEN - 1;) { 55 | int op = rand() & 15; 56 | if (op == 0) continue; 57 | inst[i++] = op; 58 | } 59 | inst[LEN - 1] = 0; 60 | 61 | loop(inst); 62 | printvec(); 63 | printtime(); 64 | return 0; 65 | } 66 | --------------------------------------------------------------------------------