├── Exceptional_Control_Flow ├── README.md ├── concurrency.c ├── concurrency_buggy.c ├── counter.c ├── csapp.c ├── csapp.h ├── fork1.c ├── fork2.c ├── restart.c ├── setjmp.c ├── signal.c ├── sigsuspend.c ├── waitforsignal.c └── waitpid.c ├── Linking ├── duplicate_symbol_names_example │ ├── Makefile │ ├── bar1.c │ ├── bar2.c │ ├── bar3.c │ ├── bar4.c │ ├── bar5.c │ ├── bar6.c │ ├── foo1.c │ ├── foo2.c │ ├── foo3.c │ ├── foo4.c │ ├── foo5.c │ ├── foo6.c │ ├── readme.md │ ├── show_bytes_b.c │ └── test.sh ├── elfstructs.c ├── example_static_and_dynamic_linking │ ├── Makefile │ ├── addvec.c │ ├── dll.c │ ├── main2.c │ ├── multvec.c │ ├── readme.md │ └── vector.h ├── library_interpositioning_demo │ ├── Makefile │ ├── int.c │ ├── malloc.h │ ├── mymalloc.c │ └── readme.md ├── readme.md ├── simple_linking_example │ ├── ELF_Format │ ├── Makefile │ ├── main.c │ ├── readme.md │ └── sum.c ├── symbol_resolution_exercise │ ├── Makefile │ ├── m.c │ ├── readme.md │ └── swap.c └── 引导习题参考答案.pdf ├── Optimizing_Program_Performancess ├── README.md ├── benchmark │ ├── README.md │ ├── benchmark.c │ ├── combine.c │ ├── combine.h │ ├── include │ │ ├── clock.h │ │ ├── cpe.h │ │ ├── fcyc.h │ │ └── lsquare.h │ ├── run.sh │ ├── src │ │ ├── Makefile │ │ ├── clock.c │ │ ├── cpe.c │ │ ├── fcyc.c │ │ └── lsquare.c │ ├── vec.c │ └── vec.h └── matrix_multiplication │ ├── ReadMe.md │ ├── double_matrix_multiplication_SIMD.c │ ├── float_matrix_multiplication_SIMD.c │ ├── matrix_multiplication_Strassen.c │ └── matrix_multiplication_optimization.c ├── README.md ├── System_Level_IO ├── Makefile ├── README.md ├── abcde.txt ├── baz.txt ├── cpfile.c ├── cpstdin.c ├── csapp.c ├── csapp.h ├── fdprob1.c ├── ffiles1.c ├── ffiles2.c ├── ffiles3.c ├── ffiles3.txt ├── foo.txt ├── foobar.txt ├── hello.c ├── readdir.c ├── sharing1.c ├── sharing2.c ├── statcheck.c └── stdout.c └── Virtual_Memory ├── Makefile ├── README.md ├── Rust ├── README.md ├── Rust_guarantee_memory_safety_examples │ ├── README.md │ ├── heapref.rs │ ├── no_leak │ ├── no_leak.rs │ ├── off_by_one │ ├── off_by_one.rs │ └── stackref.rs └── experiment │ ├── experiment1.rs │ ├── experiment2.rs │ └── experiment3.rs ├── mallocex.c └── malloclab ├── .DS_Store ├── Makefile ├── README.md ├── clock.c ├── clock.h ├── clock.o ├── config.h ├── fcyc.c ├── fcyc.h ├── fcyc.o ├── fsecs.c ├── fsecs.h ├── fsecs.o ├── ftimer.c ├── ftimer.h ├── ftimer.o ├── malloclab.md ├── mdriver ├── mdriver.c ├── mdriver.o ├── mdriver_bp2.c ├── memlib.c ├── memlib.h ├── memlib.o ├── mm.c ├── mm.h ├── mm.o ├── mm_explicit.c ├── mm_implicit.c ├── mm_segerated.c ├── short1-bal.rep ├── short2-bal.rep └── traces ├── amptjp-bal.rep ├── binary-bal.rep ├── binary2-bal.rep ├── cccp-bal.rep ├── coalescing-bal.rep ├── cp-decl-bal.rep ├── expr-bal.rep ├── random-bal.rep ├── random2-bal.rep ├── realloc-bal.rep └── realloc2-bal.rep /Exceptional_Control_Flow/README.md: -------------------------------------------------------------------------------- 1 | # 异常控制流 2 | > 本页面中包含异常控制流PPT中所涉及的代码,PPT中已经标注好文件名,可根据文件名进行查找 3 | 4 | fork1.c:一个fork实例 5 | > 1. 编译:gcc -o fork1 fork1.c -lpthread 6 | > 2. 运行:./fork1 7 | 8 | fork2.c:另一个fork实例,并且用到atexit函数(添加一个函数到函数列表,在调用exit后调用该函数。 9 | > 1. 编译:gcc -o fork2 fork2.c -lpthread 10 | > 2. 运行:./fork2 11 | 12 | waitpid.c:一个waitpid的实例 13 | > 1. 编译:gcc -o waitpid waitpid.c -lpthread 14 | > 2. 运行:./waitpid 15 | 16 | signal.c:包含了fork、waitpid、signal、sigprocmask的一个实例 17 | > 1. 编译:gcc -o signal signal.c -lpthread 18 | > 2. 运行:./signal 19 | 20 | counter.c:展示signal不会排队的一个实例 21 | > 1. 编译:gcc -o counter counter.c -lpthread 22 | > 2. 运行:./counter 23 | 24 | concurrency_buggy.c:有并发错误的一个实例 25 | > 1. 编译:gcc -o concurrency_buggy concurrency_buggy.c -lpthread 26 | > 2. 运行:./concurrency_buggy 27 | 28 | concurrency.c:同步流解决了上一个程序并发错误 29 | > 1. 编译:gcc -o concurrency concurrency.c -lpthread 30 | > 2. 运行:./concurrency 31 | 32 | waitforsignal.c:利用循环显示地去等待一个信号 33 | > 1. 编译:gcc -o waitforsignal waitforsignal.c -lpthread 34 | > 2. 运行:./waitforsignal 35 | 36 | sigsuspend.c:上一个循环太浪费,此程序利用sigsuspend来显示地等待信号 37 | >1. 编译:gcc -o sigsuspend sigsuspend.c -lpthread 38 | >2. 运行:./sigsuspend 39 | 40 | setjmp.c:利用非本地跳转从一个深层嵌套的函数调用中立即返回 41 | > 1. 编译:gcc -o setjmp setjmp.c -lpthread 42 | > 2. 运行:./setjmp 43 | 44 | restart.c:利用非本地跳转使信号处理程序分支到一个特殊的代码位置 45 | > 1. 编译:gcc -o restart restart.c -lpthread 46 | > 2. 运行:./restart 47 | 48 | 注: 49 | 如果是刚装好的gcc不能编译,则是因为没有一些必须的头文件,现在需要安装build-essential,安装了这个包会安装上g++、libc6-dev、linux-libc-dev等好多必须的软件和头文件。 50 | 更新资源地址:sudo apt-get update 51 | 安装build-essential:sudo apt-get install build-essential 52 | -------------------------------------------------------------------------------- /Exceptional_Control_Flow/concurrency.c: -------------------------------------------------------------------------------- 1 | #include "csapp.h" 2 | 3 | void initjobs() 4 | { 5 | } 6 | 7 | void addjob(int pid) 8 | { 9 | } 10 | 11 | void deletejob(int pid) 12 | { 13 | } 14 | 15 | /* $begin procmask2 */ 16 | void handler(int sig) 17 | { 18 | int olderrno = errno; 19 | sigset_t mask_all, prev_all; 20 | pid_t pid; 21 | 22 | Sigfillset(&mask_all); 23 | while ((pid = waitpid(-1, NULL, 0)) > 0) { /* Reap a zombie child */ 24 | Sigprocmask(SIG_BLOCK, &mask_all, &prev_all); 25 | deletejob(pid); /* Delete the child from the job list */ 26 | Sigprocmask(SIG_SETMASK, &prev_all, NULL); 27 | } 28 | if (errno != ECHILD) 29 | Sio_error("waitpid error"); 30 | errno = olderrno; 31 | } 32 | 33 | int main(int argc, char **argv) 34 | { 35 | int pid; 36 | sigset_t mask_all, mask_one, prev_one; 37 | 38 | Sigfillset(&mask_all); 39 | Sigemptyset(&mask_one); 40 | Sigaddset(&mask_one, SIGCHLD); 41 | Signal(SIGCHLD, handler); 42 | initjobs(); /* Initialize the job list */ 43 | 44 | while (1) { 45 | Sigprocmask(SIG_BLOCK, &mask_one, &prev_one); /* Block SIGCHLD */ 46 | if ((pid = Fork()) == 0) { /* Child process */ 47 | Sigprocmask(SIG_SETMASK, &prev_one, NULL); /* Unblock SIGCHLD */ 48 | Execve("/bin/date", argv, NULL); 49 | } 50 | Sigprocmask(SIG_BLOCK, &mask_all, NULL); /* Parent process */ 51 | addjob(pid); /* Add the child to the job list */ 52 | Sigprocmask(SIG_SETMASK, &prev_one, NULL); /* Unblock SIGCHLD */ 53 | } 54 | exit(0); 55 | } 56 | /* $end procmask2 */ 57 | -------------------------------------------------------------------------------- /Exceptional_Control_Flow/concurrency_buggy.c: -------------------------------------------------------------------------------- 1 | #include "csapp.h" 2 | 3 | void initjobs() 4 | { 5 | } 6 | 7 | void addjob(int pid) 8 | { 9 | } 10 | 11 | void deletejob(int pid) 12 | { 13 | } 14 | 15 | /* WARNING: This code is buggy! */ 16 | void handler(int sig) 17 | { 18 | int olderrno = errno; 19 | sigset_t mask_all, prev_all; 20 | pid_t pid; 21 | 22 | Sigfillset(&mask_all); 23 | while ((pid = waitpid(-1, NULL, 0)) > 0) { /* Reap a zombie child */ 24 | Sigprocmask(SIG_BLOCK, &mask_all, &prev_all); 25 | deletejob(pid); /* Delete the child from the job list */ 26 | Sigprocmask(SIG_SETMASK, &prev_all, NULL); 27 | } 28 | if (errno != ECHILD) 29 | Sio_error("waitpid error"); 30 | errno = olderrno; 31 | } 32 | 33 | int main(int argc, char **argv) 34 | { 35 | int pid; 36 | sigset_t mask_all, prev_all; 37 | 38 | Sigfillset(&mask_all); 39 | Signal(SIGCHLD, handler); 40 | initjobs(); /* Initialize the job list */ 41 | 42 | while (1) { 43 | if ((pid = Fork()) == 0) { /* Child process */ 44 | Execve("/bin/date", argv, NULL); 45 | } 46 | Sigprocmask(SIG_BLOCK, &mask_all, &prev_all); /* Parent process */ 47 | addjob(pid); /* Add the child to the job list */ 48 | Sigprocmask(SIG_SETMASK, &prev_all, NULL); 49 | } 50 | exit(0); 51 | } 52 | 53 | -------------------------------------------------------------------------------- /Exceptional_Control_Flow/counter.c: -------------------------------------------------------------------------------- 1 | #include "csapp.h" 2 | 3 | int counter = 0; 4 | 5 | void handler(int sig) 6 | { 7 | counter++; 8 | sleep(1); /* Do some work in the handler */ 9 | return; 10 | } 11 | 12 | int main() 13 | { 14 | int i; 15 | 16 | Signal(SIGUSR2, handler); 17 | 18 | if (Fork() == 0) { /* Child */ 19 | for (i = 0; i < 5; i++) { 20 | Kill(getppid(), SIGUSR2); 21 | printf("sent SIGUSR2 to parent\n"); 22 | } 23 | exit(0); 24 | } 25 | 26 | Wait(NULL); 27 | printf("counter=%d\n", counter); 28 | exit(0); 29 | } 30 | 31 | -------------------------------------------------------------------------------- /Exceptional_Control_Flow/csapp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * csapp.h - prototypes and definitions for the CS:APP3e book 3 | */ 4 | /* $begin csapp.h */ 5 | #ifndef __CSAPP_H__ 6 | #define __CSAPP_H__ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | /* Default file permissions are DEF_MODE & ~DEF_UMASK */ 33 | /* $begin createmasks */ 34 | #define DEF_MODE S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH 35 | #define DEF_UMASK S_IWGRP|S_IWOTH 36 | /* $end createmasks */ 37 | 38 | /* Simplifies calls to bind(), connect(), and accept() */ 39 | /* $begin sockaddrdef */ 40 | typedef struct sockaddr SA; 41 | /* $end sockaddrdef */ 42 | 43 | /* Persistent state for the robust I/O (Rio) package */ 44 | /* $begin rio_t */ 45 | #define RIO_BUFSIZE 8192 46 | typedef struct { 47 | int rio_fd; /* Descriptor for this internal buf */ 48 | int rio_cnt; /* Unread bytes in internal buf */ 49 | char *rio_bufptr; /* Next unread byte in internal buf */ 50 | char rio_buf[RIO_BUFSIZE]; /* Internal buffer */ 51 | } rio_t; 52 | /* $end rio_t */ 53 | 54 | /* External variables */ 55 | extern int h_errno; /* Defined by BIND for DNS errors */ 56 | extern char **environ; /* Defined by libc */ 57 | 58 | /* Misc constants */ 59 | #define MAXLINE 8192 /* Max text line length */ 60 | #define MAXBUF 8192 /* Max I/O buffer size */ 61 | #define LISTENQ 1024 /* Second argument to listen() */ 62 | 63 | /* Our own error-handling functions */ 64 | void unix_error(char *msg); 65 | void posix_error(int code, char *msg); 66 | void dns_error(char *msg); 67 | void gai_error(int code, char *msg); 68 | void app_error(char *msg); 69 | 70 | /* Process control wrappers */ 71 | pid_t Fork(void); 72 | void Execve(const char *filename, char *const argv[], char *const envp[]); 73 | pid_t Wait(int *status); 74 | pid_t Waitpid(pid_t pid, int *iptr, int options); 75 | void Kill(pid_t pid, int signum); 76 | unsigned int Sleep(unsigned int secs); 77 | void Pause(void); 78 | unsigned int Alarm(unsigned int seconds); 79 | void Setpgid(pid_t pid, pid_t pgid); 80 | pid_t Getpgrp(); 81 | 82 | /* Signal wrappers */ 83 | typedef void handler_t(int); 84 | handler_t *Signal(int signum, handler_t *handler); 85 | void Sigprocmask(int how, const sigset_t *set, sigset_t *oldset); 86 | void Sigemptyset(sigset_t *set); 87 | void Sigfillset(sigset_t *set); 88 | void Sigaddset(sigset_t *set, int signum); 89 | void Sigdelset(sigset_t *set, int signum); 90 | int Sigismember(const sigset_t *set, int signum); 91 | int Sigsuspend(const sigset_t *set); 92 | 93 | /* Sio (Signal-safe I/O) routines */ 94 | ssize_t sio_puts(char s[]); 95 | ssize_t sio_putl(long v); 96 | void sio_error(char s[]); 97 | 98 | /* Sio wrappers */ 99 | ssize_t Sio_puts(char s[]); 100 | ssize_t Sio_putl(long v); 101 | void Sio_error(char s[]); 102 | 103 | /* Unix I/O wrappers */ 104 | int Open(const char *pathname, int flags, mode_t mode); 105 | ssize_t Read(int fd, void *buf, size_t count); 106 | ssize_t Write(int fd, const void *buf, size_t count); 107 | off_t Lseek(int fildes, off_t offset, int whence); 108 | void Close(int fd); 109 | int Select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 110 | struct timeval *timeout); 111 | int Dup2(int fd1, int fd2); 112 | void Stat(const char *filename, struct stat *buf); 113 | void Fstat(int fd, struct stat *buf) ; 114 | 115 | /* Directory wrappers */ 116 | DIR *Opendir(const char *name); 117 | struct dirent *Readdir(DIR *dirp); 118 | int Closedir(DIR *dirp); 119 | 120 | /* Memory mapping wrappers */ 121 | void *Mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset); 122 | void Munmap(void *start, size_t length); 123 | 124 | /* Standard I/O wrappers */ 125 | void Fclose(FILE *fp); 126 | FILE *Fdopen(int fd, const char *type); 127 | char *Fgets(char *ptr, int n, FILE *stream); 128 | FILE *Fopen(const char *filename, const char *mode); 129 | void Fputs(const char *ptr, FILE *stream); 130 | size_t Fread(void *ptr, size_t size, size_t nmemb, FILE *stream); 131 | void Fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); 132 | 133 | /* Dynamic storage allocation wrappers */ 134 | void *Malloc(size_t size); 135 | void *Realloc(void *ptr, size_t size); 136 | void *Calloc(size_t nmemb, size_t size); 137 | void Free(void *ptr); 138 | 139 | /* Sockets interface wrappers */ 140 | int Socket(int domain, int type, int protocol); 141 | void Setsockopt(int s, int level, int optname, const void *optval, int optlen); 142 | void Bind(int sockfd, struct sockaddr *my_addr, int addrlen); 143 | void Listen(int s, int backlog); 144 | int Accept(int s, struct sockaddr *addr, socklen_t *addrlen); 145 | void Connect(int sockfd, struct sockaddr *serv_addr, int addrlen); 146 | 147 | /* Protocol independent wrappers */ 148 | void Getaddrinfo(const char *node, const char *service, 149 | const struct addrinfo *hints, struct addrinfo **res); 150 | void Getnameinfo(const struct sockaddr *sa, socklen_t salen, char *host, 151 | size_t hostlen, char *serv, size_t servlen, int flags); 152 | void Freeaddrinfo(struct addrinfo *res); 153 | void Inet_ntop(int af, const void *src, char *dst, socklen_t size); 154 | void Inet_pton(int af, const char *src, void *dst); 155 | 156 | /* DNS wrappers */ 157 | struct hostent *Gethostbyname(const char *name); 158 | struct hostent *Gethostbyaddr(const char *addr, int len, int type); 159 | 160 | /* Pthreads thread control wrappers */ 161 | void Pthread_create(pthread_t *tidp, pthread_attr_t *attrp, 162 | void * (*routine)(void *), void *argp); 163 | void Pthread_join(pthread_t tid, void **thread_return); 164 | void Pthread_cancel(pthread_t tid); 165 | void Pthread_detach(pthread_t tid); 166 | void Pthread_exit(void *retval); 167 | pthread_t Pthread_self(void); 168 | void Pthread_once(pthread_once_t *once_control, void (*init_function)()); 169 | 170 | /* POSIX semaphore wrappers */ 171 | void Sem_init(sem_t *sem, int pshared, unsigned int value); 172 | void P(sem_t *sem); 173 | void V(sem_t *sem); 174 | 175 | /* Rio (Robust I/O) package */ 176 | ssize_t rio_readn(int fd, void *usrbuf, size_t n); 177 | ssize_t rio_writen(int fd, void *usrbuf, size_t n); 178 | void rio_readinitb(rio_t *rp, int fd); 179 | ssize_t rio_readnb(rio_t *rp, void *usrbuf, size_t n); 180 | ssize_t rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen); 181 | 182 | /* Wrappers for Rio package */ 183 | ssize_t Rio_readn(int fd, void *usrbuf, size_t n); 184 | void Rio_writen(int fd, void *usrbuf, size_t n); 185 | void Rio_readinitb(rio_t *rp, int fd); 186 | ssize_t Rio_readnb(rio_t *rp, void *usrbuf, size_t n); 187 | ssize_t Rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen); 188 | 189 | /* Reentrant protocol-independent client/server helpers */ 190 | int open_clientfd(char *hostname, char *port); 191 | int open_listenfd(char *port); 192 | 193 | /* Wrappers for reentrant protocol-independent client/server helpers */ 194 | int Open_clientfd(char *hostname, char *port); 195 | int Open_listenfd(char *port); 196 | 197 | 198 | #endif /* __CSAPP_H__ */ 199 | /* $end csapp.h */ 200 | -------------------------------------------------------------------------------- /Exceptional_Control_Flow/fork1.c: -------------------------------------------------------------------------------- 1 | #include "csapp.h" 2 | 3 | /* $begin fork1 */ 4 | int main() 5 | { 6 | int x = 1; 7 | 8 | if (Fork() == 0) 9 | printf("p1: x=%d\n", ++x); 10 | printf("p2: x=%d\n", --x); 11 | exit(0); 12 | } 13 | /* $end fork1 */ 14 | 15 | -------------------------------------------------------------------------------- /Exceptional_Control_Flow/fork2.c: -------------------------------------------------------------------------------- 1 | #include "csapp.h" 2 | 3 | void end(void) 4 | { 5 | printf("2"); fflush(stdout); 6 | } 7 | 8 | int main() 9 | { 10 | if (Fork() == 0) 11 | atexit(end); 12 | if (Fork() == 0) { 13 | printf("0"); fflush(stdout); 14 | } 15 | else { 16 | printf("1"); fflush(stdout); 17 | } 18 | exit(0); 19 | } 20 | 21 | 22 | -------------------------------------------------------------------------------- /Exceptional_Control_Flow/restart.c: -------------------------------------------------------------------------------- 1 | #include "csapp.h" 2 | 3 | sigjmp_buf buf; 4 | 5 | void handler(int sig) 6 | { 7 | siglongjmp(buf, 1); 8 | } 9 | 10 | int main() 11 | { 12 | if (!sigsetjmp(buf, 1)) { 13 | Signal(SIGINT, handler); 14 | Sio_puts("starting\n"); 15 | } 16 | else 17 | Sio_puts("restarting\n"); 18 | 19 | while(1) { 20 | Sleep(1); 21 | Sio_puts("processing...\n"); 22 | } 23 | exit(0); /* Control never reaches here */ 24 | } 25 | 26 | -------------------------------------------------------------------------------- /Exceptional_Control_Flow/setjmp.c: -------------------------------------------------------------------------------- 1 | #include "csapp.h" 2 | 3 | jmp_buf buf; 4 | 5 | int error1 = 0; 6 | int error2 = 1; 7 | 8 | void foo(void), bar(void); 9 | 10 | int main() 11 | { 12 | switch(setjmp(buf)) { 13 | case 0: 14 | foo(); 15 | break; 16 | case 1: 17 | printf("Detected an error1 condition in foo\n"); 18 | break; 19 | case 2: 20 | printf("Detected an error2 condition in foo\n"); 21 | break; 22 | default: 23 | printf("Unknown error condition in foo\n"); 24 | } 25 | exit(0); 26 | } 27 | 28 | /* Deeply nested function foo */ 29 | void foo(void) 30 | { 31 | if (error1) 32 | longjmp(buf, 1); 33 | bar(); 34 | } 35 | 36 | void bar(void) 37 | { 38 | if (error2) 39 | longjmp(buf, 2); 40 | } 41 | 42 | -------------------------------------------------------------------------------- /Exceptional_Control_Flow/signal.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "csapp.h" 7 | 8 | volatile long counter = 2; 9 | 10 | void handler1(int sig) 11 | { 12 | sigset_t mask, prev_mask; 13 | 14 | Sigfillset(&mask); 15 | Sigprocmask(SIG_BLOCK, &mask, &prev_mask); /* Block sigs */ 16 | Sio_putl(--counter); 17 | Sigprocmask(SIG_SETMASK, &prev_mask, NULL); /* Restore sigs */ 18 | 19 | _exit(0); 20 | } 21 | 22 | int main() 23 | { 24 | pid_t pid; 25 | sigset_t mask, prev_mask; 26 | 27 | printf("%ld", counter); 28 | fflush(stdout); 29 | 30 | signal(SIGUSR1, handler1); 31 | if ((pid = Fork()) == 0) { 32 | while(1) {}; 33 | } 34 | Kill(pid, SIGUSR1); 35 | Waitpid(-1, NULL, 0); 36 | 37 | Sigfillset(&mask); 38 | Sigprocmask(SIG_BLOCK, &mask, &prev_mask); /* Block sigs */ 39 | printf("%ld", ++counter); 40 | Sigprocmask(SIG_SETMASK, &prev_mask, NULL); /* Restore sigs */ 41 | 42 | exit(0); 43 | } 44 | 45 | -------------------------------------------------------------------------------- /Exceptional_Control_Flow/sigsuspend.c: -------------------------------------------------------------------------------- 1 | #include "csapp.h" 2 | 3 | volatile sig_atomic_t pid; 4 | 5 | void sigchld_handler(int s) 6 | { 7 | int olderrno = errno; 8 | pid = Waitpid(-1, NULL, 0); 9 | errno = olderrno; 10 | } 11 | 12 | void sigint_handler(int s) 13 | { 14 | } 15 | 16 | int main(int argc, char **argv) 17 | { 18 | sigset_t mask, prev; 19 | 20 | Signal(SIGCHLD, sigchld_handler); 21 | Signal(SIGINT, sigint_handler); 22 | Sigemptyset(&mask); 23 | Sigaddset(&mask, SIGCHLD); 24 | 25 | while (1) { 26 | Sigprocmask(SIG_BLOCK, &mask, &prev); /* Block SIGCHLD */ 27 | if (Fork() == 0) /* Child */ 28 | exit(0); 29 | 30 | /* Wait for SIGCHLD to be received */ 31 | pid = 0; 32 | while (!pid) 33 | Sigsuspend(&prev); 34 | 35 | /* Optionally unblock SIGCHLD */ 36 | Sigprocmask(SIG_SETMASK, &prev, NULL); 37 | 38 | /* Do some work after receiving SIGCHLD */ 39 | printf("."); 40 | } 41 | exit(0); 42 | } 43 | 44 | -------------------------------------------------------------------------------- /Exceptional_Control_Flow/waitforsignal.c: -------------------------------------------------------------------------------- 1 | #include "csapp.h" 2 | 3 | volatile sig_atomic_t pid; 4 | 5 | void sigchld_handler(int s) 6 | { 7 | int olderrno = errno; 8 | pid = Waitpid(-1, NULL, 0); 9 | errno = olderrno; 10 | } 11 | 12 | void sigint_handler(int s) 13 | { 14 | } 15 | 16 | int main(int argc, char **argv) 17 | { 18 | sigset_t mask, prev; 19 | 20 | Signal(SIGCHLD, sigchld_handler); 21 | Signal(SIGINT, sigint_handler); 22 | Sigemptyset(&mask); 23 | Sigaddset(&mask, SIGCHLD); 24 | 25 | while (1) { 26 | Sigprocmask(SIG_BLOCK, &mask, &prev); /* Block SIGCHLD */ 27 | if (Fork() == 0) /* Child */ 28 | exit(0); 29 | 30 | /* Parent */ 31 | pid = 0; 32 | Sigprocmask(SIG_SETMASK, &prev, NULL); /* Unblock SIGCHLD */ 33 | 34 | /* Wait for SIGCHLD to be received (wasteful) */ 35 | while (!pid) 36 | ; 37 | 38 | /* Do some work after receiving SIGCHLD */ 39 | printf("."); 40 | } 41 | exit(0); 42 | } 43 | -------------------------------------------------------------------------------- /Exceptional_Control_Flow/waitpid.c: -------------------------------------------------------------------------------- 1 | #include "csapp.h" 2 | 3 | int main() 4 | { 5 | if (fork() == 0) { 6 | printf("a"); fflush(stdout); 7 | exit(0); 8 | } 9 | else { 10 | printf("b"); fflush(stdout); 11 | waitpid(-1, NULL, 0); 12 | } 13 | printf("c"); fflush(stdout); 14 | exit(0); 15 | } 16 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -Wall -Og 3 | FOOBARS = foobar2\ 4 | foobar3\ 5 | foobar4\ 6 | foobar5\ 7 | foobar6\ 8 | 9 | all: $(FOOBARS) 10 | 11 | # linker puzzles 12 | foobar1: foo1.o bar1.o 13 | $(CC) $(CFLAGS) -o foobar1 foo1.o bar1.o 14 | foobar2: foo2.o bar2.o 15 | $(CC) $(CFLAGS) -o foobar2 foo2.o bar2.o 16 | foobar3: foo3.o bar3.o 17 | $(CC) $(CFLAGS) -o foobar3 foo3.o bar3.o show_bytes_b.c 18 | foobar4: foo4.o bar4.o 19 | $(CC) $(CFLAGS) -o foobar4 foo4.o bar4.o show_bytes_b.c 20 | foobar5: foo5.o bar5.o 21 | $(CC) $(CFLAGS) -o foobar5 foo5.o bar5.o 22 | foobar6: foo6.o bar6.o 23 | $(CC) $(CFLAGS) -o foobar6 foo6.o bar6.o show_bytes_b.c 24 | 25 | clean: 26 | rm -f $(FOOBARS) *.o *~ *.d *.so *.a *.s 27 | rm -f tmp/* 28 | 29 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/bar1.c: -------------------------------------------------------------------------------- 1 | /* $begin bar1 */ 2 | /* bar1.c */ 3 | int main() 4 | { 5 | return 0; 6 | } 7 | /* $end bar1 */ 8 | 9 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/bar2.c: -------------------------------------------------------------------------------- 1 | /* $begin bar2 */ 2 | /* bar2.c */ 3 | int x; 4 | 5 | void f() 6 | { 7 | x = 15212; 8 | } 9 | /* $end bar2 */ 10 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/bar3.c: -------------------------------------------------------------------------------- 1 | /* $begin bar3 */ 2 | /* bar3.c */ 3 | #include 4 | void show_double(double x); 5 | double x; 6 | 7 | void f() 8 | { 9 | x = 2.0; 10 | printf("In bar3.c , double x: "); 11 | show_double(x); 12 | } 13 | /* $end bar3 */ 14 | 15 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/bar4.c: -------------------------------------------------------------------------------- 1 | /* $begin bar4 */ 2 | /* bar4.c */ 3 | #include 4 | void show_bytes(unsigned char* start, size_t len); 5 | double x; 6 | 7 | void f() 8 | { 9 | x = 2.0; 10 | double y = 2.0; 11 | printf("doulbe 2.0's binary representation: \n"); 12 | show_bytes((unsigned char*)&y, sizeof(y)); 13 | } 14 | /* $end bar4 */ 15 | 16 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/bar5.c: -------------------------------------------------------------------------------- 1 | /* $begin bar5 */ 2 | /* bar5.c */ 3 | int x; 4 | void f() 5 | { 6 | } 7 | /* $end bar5 */ 8 | 9 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/bar6.c: -------------------------------------------------------------------------------- 1 | /* $begin bar6 */ 2 | /* bar6.c */ 3 | typedef struct 4 | { 5 | double x; 6 | int y; 7 | }s; 8 | 9 | s s1; 10 | void p2() 11 | { 12 | s1.x = -2.0; 13 | s1.y = -1; 14 | } 15 | /* $end bar6 */ 16 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/foo1.c: -------------------------------------------------------------------------------- 1 | /* $begin foo1 */ 2 | /* foo1.c */ 3 | int x; 4 | int main() 5 | { 6 | return 0; 7 | } 8 | /* $end foo1 */ 9 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/foo2.c: -------------------------------------------------------------------------------- 1 | /* $begin foo2 */ 2 | /* foo2.c */ 3 | #include 4 | void f(void); 5 | 6 | int x; 7 | 8 | int main() 9 | { 10 | x = 15213; 11 | f(); 12 | printf("x = %d\n", x); 13 | return 0; 14 | } 15 | /* $end foo2 */ 16 | 17 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/foo3.c: -------------------------------------------------------------------------------- 1 | /* $begin foo3 */ 2 | /* foo3.c */ 3 | #include 4 | void show_pointer(void *x); 5 | void f(void); 6 | 7 | 8 | int x; 9 | int y; 10 | 11 | int main() 12 | { 13 | x = 1; 14 | y = -1; 15 | printf("Previously, x = 0x%x y = 0x%x \n", x, y); 16 | printf("The address of x and y are %p, %p, and &x+1 = %p\n", &x, &y, &x+1); 17 | show_pointer((void *)&x); 18 | f(); 19 | printf("Now, x = 0x%x y = 0x%x *(&x+1) = 0x%x\n", 20 | x, y, *(&x+1)); 21 | return 0; 22 | } 23 | /* $end foo3 */ 24 | 25 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/foo4.c: -------------------------------------------------------------------------------- 1 | /* $begin foo4 */ 2 | /* foo4.c */ 3 | #include 4 | typedef unsigned char *byte_pointer; 5 | void show_bytes(byte_pointer start, size_t len); 6 | void f(void); 7 | 8 | int x = 1; 9 | int y = -1; 10 | 11 | int main() 12 | { 13 | printf("Previouly, the address of x and y are %p, %p \n", &x, &y); 14 | printf("x = 0x%x y = 0x%x \n", x, y); 15 | show_bytes((byte_pointer)&x, sizeof(int)); 16 | f(); 17 | printf("x = 0x%x y = 0x%x \n", x, y); 18 | show_bytes((byte_pointer)&x, sizeof(int)); 19 | show_bytes((byte_pointer)&y, sizeof(int)); 20 | return 0; 21 | } 22 | /* $end foo4 */ 23 | 24 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/foo5.c: -------------------------------------------------------------------------------- 1 | /* $begin foo5 */ 2 | /* foo5.c */ 3 | int x = 15213; 4 | int main() 5 | { 6 | return 0; 7 | } 8 | /* $end foo5 */ 9 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/foo6.c: -------------------------------------------------------------------------------- 1 | /* $begin foo6 */ 2 | /* foo6.c */ 3 | #include 4 | void p2(void); 5 | void show_bytes(unsigned char *start, size_t len); 6 | 7 | typedef struct 8 | { 9 | int x; 10 | double y; 11 | }s; 12 | 13 | s s1; 14 | int main() 15 | { 16 | s1.x = 1; 17 | s1.y = 2.0; 18 | printf("Previously, struct s1's element x = %d, y = %lf\n", s1.x, s1.y); 19 | printf("The binary of struct is:\n"); 20 | show_bytes((unsigned char *)&s1, sizeof(s)); 21 | 22 | p2(); 23 | printf("Now, struct s1's element x = %d, y = %lf\n", s1.x, s1.y); 24 | printf("The binary of struct is:\n"); 25 | show_bytes((unsigned char *)&s1, sizeof(s)); 26 | return 0; 27 | } 28 | /* $end foo6 */ 29 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/readme.md: -------------------------------------------------------------------------------- 1 | # 演示实验介绍 2 | 本目录主要是利用6个扩展的例子来解释说明链接器处理重复符号定义的三条规则可能带来的一些困惑 3 | foo{1...6}.c是包含main的主文件,bar{1...6}.c是定义同名全局符号的辅助文件。 4 | foo1.c和bar1.c:定义两个同名强符号main,造成链接时错误 5 | foo2.c和bar2.c:定义两个同名弱符号x,根据规则三,链接器会随机选择某一个定义,使得这两个文件对符号x的引用都解析到这个定义上去。 6 | foo3.c和bar3.c:同样是两个弱符号,但是由于类型不同,如果链接器符号解析时确定的定义是int x;则,在bar3.c中对double x的赋值操作会覆盖其他内存区域,在这里就是x之后的4个字节。但是,在多台机器上试验,发现链接器往往会选择double x的定义(可以查看可执行文件foobar3的符号表中x的条目信息,若大小为8,即选择了doublex的定义)。 7 | foo4.c和bar4.c:同理,只是这里的int x的定义被初始化了。因此上述情况一定发生 8 | foo5.c和bar5.c:定义了一个强符号int x = 15213;和一个弱符号,利用规则二,在bar5.c中所有对x的引用都会解析到强符号上去。 9 | foo6.c和bar6.c:定义了两个弱符号——结构体类型s的变量s1,只是两个文件中结构体定义的成员变量不同。 10 | ```c 11 | /* foo6.c */ 12 | typedef struct 13 | { 14 | int x; 15 | double y; 16 | }s; 17 | /* bar6.c */ 18 | typedef struct 19 | { 20 | double y; 21 | int x; 22 | }s; 23 | ``` 24 | ### 编译/运行 25 | ```shell 26 | linux > gcc -Wall -Og -o foobar1 foo1.c bar1.c 27 | linux > gcc -Wall -Og -o foobar2 foo2.c bar2.c 28 | linux > gcc -Wall -Og -o foobar3 foo3.c bar3.c show_bytes_b.c 29 | linux > gcc _wall -Og -o foobar4 foo4.c bar4.c show_bytes_b.c 30 | linux > gcc -Wall -Og -o foobar5 foo5.c bar5.c 31 | linux > gcc -Wall -Og -o foobar6 foo6.c bar6.c show_bytes_b.c 32 | ``` 33 | 34 | ###Files 35 | show_bytes_b.c 辅助打印模块,按照正常阅读顺序(即高位字节在左,低位字节在右)打印任意字节序列,任意类型的变量,目前支持打印int, float, double, 指针,有需要可以扩展。 36 | test.sh 多次执行编译、运行某个foo.c和bar.c文件,输入第一个参数为想要重复执行的文件名数字,第二个参数为想要循环的次数,如想要循环编译并测试结果10次foo3.c和bar3.c,输入`linux > ./test.sh 3 10`.编译的文件保存在tmp目录下, 结果输出和错误都保存在tmp/results.txt中 37 | 38 | ### Command 39 | 本目录下的命令使用Makefile管理 40 | 如需生成所有的文件,直接执行`linux > make all`,但是这里不建议直接生成所有,因为本目录下的所有示例都是为了演示一些puzzles,比如foobar1是设计成会报链接时错误 41 | 因此,尽量用`linux > make foobar1`来分别执行 42 | 清理所有中间文件,执行`linux > make clean` 43 | GCC 命令可参考Makefile 44 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/show_bytes_b.c: -------------------------------------------------------------------------------- 1 | /* $begin show-bytes */ 2 | #include 3 | /* $end show-bytes */ 4 | #include 5 | #include 6 | /* $begin show-bytes */ 7 | 8 | typedef unsigned char *byte_pointer; 9 | 10 | void show_bytes(byte_pointer start, size_t len) { 11 | size_t i; 12 | for (i =0; i < len; i++) 13 | printf(" %.2x", start[len-i-1]); 14 | printf("\n"); 15 | } 16 | 17 | void show_int(int x) { 18 | show_bytes((byte_pointer) &x, sizeof(int)); 19 | } 20 | 21 | void show_float(float x) { 22 | show_bytes((byte_pointer) &x, sizeof(float)); 23 | } 24 | 25 | void show_double(double x) { 26 | show_bytes((byte_pointer) &x, sizeof(double)); 27 | } 28 | 29 | void show_pointer(void *x) { 30 | show_bytes((byte_pointer) &x, sizeof(void *)); 31 | } 32 | /* $end show-bytes */ 33 | 34 | /* 35 | int main(int argc, char *argv[]) 36 | { 37 | int val = 12345; 38 | show_int(val); 39 | //show_int(54321); 40 | //show_float(1.0); 41 | //show_double(0.0); 42 | //show_double(1.0); 43 | //show_double(12345); 44 | return 0; 45 | } 46 | */ 47 | -------------------------------------------------------------------------------- /Linking/duplicate_symbol_names_example/test.sh: -------------------------------------------------------------------------------- 1 | # $1: the number x in source file foox.c and barx.c 2 | if [ x$1 = x ];then 3 | echo "Please input file number you want to test" 4 | else if [ x$2 = x ];then 5 | echo "Please input the times you want to test" 6 | else 7 | for((i=0;i<$2;i++));do 8 | out="foobar$1_${i}" 9 | gcc -o tmp/$out "foo$1.c" "bar$1.c" show_bytes_b.c >> tmp/results.txt 2>&1 10 | tmp/$out >> tmp/results.txt 2>&1 11 | done 12 | fi 13 | fi 14 | -------------------------------------------------------------------------------- /Linking/elfstructs.c: -------------------------------------------------------------------------------- 1 | /* $begin elfsymbol */ 2 | typedef struct { 3 | int name; /* String table offset */ 4 | char type:4, /* Function or data (4 bits) */ 5 | binding:4; /* Local or global (4 bits) */ 6 | char reserved; /* Unused */ 7 | short section; /* Section header index */ 8 | long value; /* Section offset or absolute address */ 9 | long size; /* Object size in bytes */ 10 | } Elf64_Symbol; 11 | /* $end elfsymbol */ 12 | 13 | /* $begin elfrelo */ 14 | typedef struct { 15 | long offset; /* Offset of the reference to relocate */ 16 | long type:32, /* Relocation type */ 17 | symbol:32; /* Symbol table index */ 18 | long addend; /* Constant part of relocation expression */ 19 | } Elf64_Rela; 20 | /* $end elfrelo */ 21 | 22 | -------------------------------------------------------------------------------- /Linking/example_static_and_dynamic_linking/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -Wall -Og 3 | 4 | prog2 = prog2c\ 5 | prog2l\ 6 | prog2r\ 7 | 8 | all: $(prog2) 9 | 10 | # Static library linked at compile time 11 | prog2c: main2.o addvec.o multvec.o 12 | $(CC) $(CFLAGS) -c main2.c addvec.c multvec.c 13 | ar rcs libvector.a addvec.o multvec.o 14 | $(CC) $(CFLAGS) -static -o prog2c main2.o libvector.a 15 | $(CC) $(CFLAGS) -static -o prog2c main2.o -L. -lvector 16 | 17 | # Shared library linked at load-time 18 | prog2l: main2.o libvector.so 19 | $(CC) $(CFLAGS) -o prog2l main2.o ./libvector.so 20 | objdump -dx main2.o > main2-relo.d 21 | objdump -dx prog2l > prog2l-exe.d 22 | objdump -xs -j .data -j .got prog2l > prog2ldata-exe.d 23 | 24 | # Shared library linked at run time 25 | prog2r: dll.c 26 | $(CC) $(CFLAGS) -rdynamic -o prog2r dll.c -ldl 27 | 28 | libvector.so: addvec.c multvec.c 29 | $(CC) $(CFLAGS) -shared -fpic -o libvector.so addvec.c multvec.c 30 | objdump -xd libvector.so > libvector-relo.d 31 | objdump -xRr -j .data -j .got -j .got.plt -j rela.dyn -j rela.plt libvector.so > libvectordata-relo.d 32 | 33 | clean: 34 | rm -f $(prog2) *.o *~ *.d *.so *.a *.s 35 | 36 | -------------------------------------------------------------------------------- /Linking/example_static_and_dynamic_linking/addvec.c: -------------------------------------------------------------------------------- 1 | /* addvec.c */ 2 | /* $begin addvec */ 3 | int addcnt = 0; 4 | 5 | void addvec(int *x, int *y, 6 | int *z, int n) 7 | { 8 | int i; 9 | 10 | addcnt++; 11 | 12 | for (i = 0; i < n; i++) 13 | z[i] = x[i] + y[i]; 14 | } 15 | /* $end addvec */ 16 | 17 | -------------------------------------------------------------------------------- /Linking/example_static_and_dynamic_linking/dll.c: -------------------------------------------------------------------------------- 1 | /* $begin dll */ 2 | #include 3 | #include 4 | #include 5 | 6 | int x[2] = {1, 2}; 7 | int y[2] = {3, 4}; 8 | int z[2]; 9 | 10 | int main() 11 | { 12 | void *handle; 13 | void (*addvec)(int *, int *, int *, int); 14 | char *error; 15 | 16 | /* Dynamically load the shared library that contains addvec() */ 17 | handle = dlopen("./libvector.so", RTLD_LAZY); 18 | if (!handle) { 19 | fprintf(stderr, "%s\n", dlerror()); 20 | exit(1); 21 | } 22 | 23 | /* Get a pointer to the addvec() function we just loaded */ 24 | addvec = dlsym(handle, "addvec"); 25 | if ((error = dlerror()) != NULL) { 26 | fprintf(stderr, "%s\n", error); 27 | exit(1); 28 | } 29 | 30 | /* Now we can call addvec() just like any other function */ 31 | addvec(x, y, z, 2); 32 | printf("z = [%d %d]\n", z[0], z[1]); 33 | 34 | /* Unload the shared library */ 35 | if (dlclose(handle) < 0) { 36 | fprintf(stderr, "%s\n", dlerror()); 37 | exit(1); 38 | } 39 | return 0; 40 | } 41 | /* $end dll */ 42 | 43 | -------------------------------------------------------------------------------- /Linking/example_static_and_dynamic_linking/main2.c: -------------------------------------------------------------------------------- 1 | /* main2.c */ 2 | /* $begin main2 */ 3 | #include 4 | #include "vector.h" 5 | 6 | int x[2] = {1, 2}; 7 | int y[2] = {3, 4}; 8 | int z[2]; 9 | 10 | int main() 11 | { 12 | addvec(x, y, z, 2); 13 | printf("z = [%d %d]\n", z[0], z[1]); 14 | return 0; 15 | } 16 | /* $end main2 */ 17 | 18 | -------------------------------------------------------------------------------- /Linking/example_static_and_dynamic_linking/multvec.c: -------------------------------------------------------------------------------- 1 | /* multvec.c */ 2 | /* $begin multvec */ 3 | int multcnt = 0; 4 | 5 | void multvec(int *x, int *y, 6 | int *z, int n) 7 | { 8 | int i; 9 | 10 | multcnt++; 11 | 12 | for (i = 0; i < n; i++) 13 | z[i] = x[i] * y[i]; 14 | } 15 | /* $end multvec */ 16 | 17 | -------------------------------------------------------------------------------- /Linking/example_static_and_dynamic_linking/readme.md: -------------------------------------------------------------------------------- 1 | # 演示实验介绍 2 | 本目录为有关静态链接和动态链接的演示目录,其中包括利用静态链接库进行链接,加载时动态链接和运行时动态链接三个演示实验 3 | 4 | 1. 链接静态链接库 5 | ```shell 6 | linux > gcc -Wall -Og -c main2.c addvec.c multvec.c 7 | linux > ar rcs libvector.a addvec.o multvec.o 8 | linux > gcc -Wall -Og -static -o prog2c main2.o libvector.a 9 | ##或者是使用下面这条语句 10 | linux > gcc -Wall -Og -static -o prog2c main2.o -L. -lvector 11 | ``` 12 | 2. 加载时动态链接 13 | ```shell 14 | ## -shared 指示链接器生成一个共享目标文件, -fpic 告诉链接器生成位置无关代码 15 | linux > gcc -Wall -Og -shared -fpic -o libvector.so addec.c multvec.c 16 | linux > gcc -Wall -Og -o prog2l main2.o ./libvector.so 17 | ``` 18 | 3. 运行时动态链接 19 | ```shell 20 | ## -rdynamic 通知链接器将所有符号添加到动态符号表中,使得之后可以使用类似于dlopen这类接口查询到符号,-ldl 指定程序需要链接动态函数库,在编译使用动态链接库的程序都需要加入这两个编译选项 21 | linux > gcc -Wall -Og -rdynamic -o prog2r dll.c -ldl 22 | ``` 23 | 24 | ### Files 25 | addvec.c 包含向量的加法操作 26 | multvec.c 包含向量的对应元素乘法 27 | main2.c 初始化数组array,并使用addvec函数做加法运算 28 | vector.h 包含addvec和multvec的函数声明的头文件 29 | dll.c 运行时动态链接的演示源程序, 使用dlopen加载链接需要的动态链接库,使用dlsym接口找到需要的符号位置 30 | Makefile 31 | 32 | ### Command 33 | 本目录下的命令使用Makefile管理 34 | 如需生成所有的文件,直接执行`linux > make all` 35 | 清理所有中间文件,执行`linux > make clean` 36 | GCC 命令可参考Makefile -------------------------------------------------------------------------------- /Linking/example_static_and_dynamic_linking/vector.h: -------------------------------------------------------------------------------- 1 | /* prototypes for libvector */ 2 | void addvec(int *x, int *y, int *z, int n); 3 | void multvec(int *x, int *y, int *z, int n); 4 | int getcount(); 5 | -------------------------------------------------------------------------------- /Linking/library_interpositioning_demo/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -Wall 3 | 4 | all: intr intl intc 5 | 6 | ##### 7 | # Run-time interpositioning 8 | # 9 | intr: int.c mymalloc.c 10 | $(CC) $(CFLAGS) -DRUNTIME -shared -fpic -o mymalloc.so mymalloc.c -ldl 11 | $(CC) $(CFLAGS) -g -o intr int.c 12 | 13 | runr: 14 | (LD_PRELOAD="./mymalloc.so" ./intr) 15 | 16 | ##### 17 | # Link-time interpositioning 18 | # 19 | intl: int.c mymalloc.c 20 | $(CC) $(CFLAGS) -DLINKTIME -c mymalloc.c 21 | $(CC) $(CFLAGS) -c int.c 22 | $(CC) $(CFLAGS) -Wl,--wrap,malloc -Wl,--wrap,free -o intl int.o mymalloc.o 23 | 24 | runl: 25 | ./intl 26 | 27 | ##### 28 | # Compile-time interposition 29 | # 30 | intc: int.c mymalloc.c 31 | $(CC) $(CFLAGS) -DCOMPILETIME -c mymalloc.c 32 | $(CC) $(CFLAGS) -I. -o intc int.c mymalloc.o 33 | 34 | runc: 35 | ./intc 36 | 37 | 38 | clean: 39 | rm -f *~ intr intl intc *.so *.o 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /Linking/library_interpositioning_demo/int.c: -------------------------------------------------------------------------------- 1 | /* 2 | * hello.c - Example program to demonstrate different ways to 3 | * interpose on the malloc and free functions. 4 | * 5 | * Note: be sure to compile unoptimized (-O0) so that gcc won't 6 | * optimize away the calls to malloc and free. 7 | */ 8 | /* $begin interposemain */ 9 | #include 10 | #include 11 | 12 | int main() 13 | { 14 | int *p = malloc(32); 15 | free(p); 16 | return(0); 17 | } 18 | /* $end interposemain */ 19 | -------------------------------------------------------------------------------- /Linking/library_interpositioning_demo/malloc.h: -------------------------------------------------------------------------------- 1 | /* Local malloc header file */ 2 | /* $begin mallocheader */ 3 | #define malloc(size) mymalloc(size) 4 | #define free(ptr) myfree(ptr) 5 | 6 | void *mymalloc(size_t size); 7 | void myfree(void *ptr); 8 | /* $end mallocheader */ 9 | -------------------------------------------------------------------------------- /Linking/library_interpositioning_demo/mymalloc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * mymalloc.c - Examples of run-time, link-time, and compile-time 3 | * library interpositioning. 4 | */ 5 | 6 | /* 7 | * Run-time interpositioning of malloc and free based 8 | * on the dynamic linker's (ld-linux.so) LD_PRELOAD mechanism 9 | * 10 | * Example (Assume a.out calls malloc and free): 11 | * linux> gcc -Wall -DRUNTIME -shared -fpic -o mymalloc.so mymalloc.c -ldl 12 | * 13 | * bash> (LD_PRELOAD="./mymalloc.so" ./a.out) 14 | * ...or 15 | * tcsh> (setenv LD_PRELOAD "./mymalloc.so"; ./a.out; unsetenv LD_PRELOAD) 16 | */ 17 | /* $begin interposer */ 18 | #ifdef RUNTIME 19 | #define _GNU_SOURCE 20 | #include 21 | #include 22 | #include 23 | 24 | /* malloc wrapper function */ 25 | void *malloc(size_t size) 26 | { 27 | void *(*mallocp)(size_t size); 28 | char *error; 29 | 30 | mallocp = dlsym(RTLD_NEXT, "malloc"); /* Get address of libc malloc */ 31 | if ((error = dlerror()) != NULL) { 32 | fputs(error, stderr); 33 | exit(1); 34 | } 35 | char *ptr = mallocp(size); /* Call libc malloc */ 36 | printf("malloc(%d) = %p\n", (int)size, ptr); 37 | return ptr; 38 | } 39 | 40 | /* free wrapper function */ 41 | void free(void *ptr) 42 | { 43 | void (*freep)(void *) = NULL; 44 | char *error; 45 | 46 | if (!ptr) 47 | return; 48 | 49 | freep = dlsym(RTLD_NEXT, "free"); /* Get address of libc free */ 50 | if ((error = dlerror()) != NULL) { 51 | fputs(error, stderr); 52 | exit(1); 53 | } 54 | freep(ptr); /* Call libc free */ 55 | printf("free(%p)\n", ptr); 56 | } 57 | #endif 58 | /* $end interposer */ 59 | 60 | /* 61 | * Link-time interposition of malloc and free using the static 62 | * linker's (ld) "--wrap symbol" flag. 63 | * 64 | * Compile the executable using "-Wl,--wrap,malloc -Wl,--wrap,free". 65 | * This tells the linker to resolve references to malloc as 66 | * __wrap_malloc, free as __wrap_free, __real_malloc as malloc, and 67 | * __real_free as free. 68 | */ 69 | /* $begin interposel */ 70 | #ifdef LINKTIME 71 | #include 72 | 73 | void *__real_malloc(size_t size); 74 | void __real_free(void *ptr); 75 | 76 | /* malloc wrapper function */ 77 | void *__wrap_malloc(size_t size) 78 | { 79 | void *ptr = __real_malloc(size); /* Call libc malloc */ 80 | printf("malloc(%d) = %p\n", (int)size, ptr); 81 | return ptr; 82 | } 83 | 84 | /* free wrapper function */ 85 | void __wrap_free(void *ptr) 86 | { 87 | __real_free(ptr); /* Call libc free */ 88 | printf("free(%p)\n", ptr); 89 | } 90 | #endif 91 | /* $end interposel */ 92 | 93 | /* 94 | * Compile-time interpositioning of malloc and free using the C 95 | * preprocessor. A local malloc.h file defines malloc and free as 96 | * wrappers mymalloc and myfree respectively. 97 | */ 98 | /* $begin interposec */ 99 | #ifdef COMPILETIME 100 | #include 101 | #include 102 | 103 | /* malloc wrapper function */ 104 | void *mymalloc(size_t size) 105 | { 106 | void *ptr = malloc(size); 107 | printf("malloc(%d)=%p\n", 108 | (int)size, ptr); 109 | return ptr; 110 | } 111 | 112 | /* free wrapper function */ 113 | void myfree(void *ptr) 114 | { 115 | free(ptr); 116 | printf("free(%p)\n", ptr); 117 | } 118 | #endif 119 | /* $end interposec */ 120 | -------------------------------------------------------------------------------- /Linking/library_interpositioning_demo/readme.md: -------------------------------------------------------------------------------- 1 | # 演示实验介绍 2 | 本目录主要展示三种强大的打桩机制:包括编译时打桩,链接时打桩,加载/运行时打桩用于截获和包装库函数调用,如malloc、free。 3 | 实验目标是截获对malloc和free的调用以追踪它们分配和释放的内存块的大小及地址。 4 | 1. 编译时打桩 5 | 主要是利用预处理器的宏展开技术,我们定义了一个malloc.h利用宏将malloc展开成我们的包装函数mymalloc,并且保持它们的原型一致,然后我们利用-I.编译参数指示编译驱动程序从当前目录include头文件,从而达到截获调用的目的。 6 | ```c 7 | /* malloc.h */ 8 | #define malloc(size) mymalloc(size) 9 | #define free(ptr) myfree(ptr) 10 | 11 | void *mymalloc(size_t size); 12 | void myfree(void *ptr); 13 | 14 | 15 | /* mymallo.c compile time wrapper function*/ 16 | void *mymalloc(size_t size) 17 | { 18 | void *ptr = malloc(size); 19 | printf("malloc(%d)=%p\n", 20 | (int)size, ptr); 21 | return ptr; 22 | } 23 | ``` 24 | 25 | 2. 链接时打桩 26 | 利用链接器的符号解析功能,传入参数-Wl, wrap, malloc 使得链接器在解析符号时,将符号malloc解析成__wrap_malloc,而将符号__real_malloc解析到malloc上。然后定义我们的包装函数为__wrap_malloc,并且在包装函数内部需要调用malloc时, 使用__real_malloc,同样可以截获调用 27 | 28 | ```c 29 | /* mymalloc.c linktime wrapper funtion 30 | void *__real_malloc(size_t size); 31 | 32 | /* malloc wrapper function */ 33 | void *__wrap_malloc(size_t size) 34 | { 35 | void *ptr = __real_malloc(size); /* Call libc malloc */ 36 | printf("malloc(%d) = %p\n", (int)size, ptr); 37 | return ptr; 38 | } 39 | ``` 40 | 41 | 3. 加载/运行时打桩 42 | 加载/运行时打桩技术主要使用动态链接器的环境变量技术,在运行时设置环境变量LD_PRELOAD为你实现已经编译好的包含包装程序的动态链接库所在的路径,使得在加载运行时如果动态链接器遇到没有解析的符号会首先去LD_PRELOAD所指向的路径下查找。这样也达到了截获库函数调用的目的。 43 | ```c 44 | void *malloc(size_t size) 45 | { 46 | void *(*mallocp)(size_t size); 47 | char *error; 48 | 49 | mallocp = dlsym(RTLD_NEXT, "malloc"); /* Get address of libc malloc */ 50 | if ((error = dlerror()) != NULL) { 51 | fputs(error, stderr); 52 | exit(1); 53 | } 54 | char *ptr = mallocp(size); /* Call libc malloc */ 55 | printf("malloc(%d) = %p\n", (int)size, ptr); 56 | return ptr; 57 | } 58 | ``` 59 | 60 | ### File 61 | int.c 想要追踪的源程序 62 | mallo.h 编译时打桩使用的头文件 63 | mymalloc.c 三次打桩共用的源文件,通过编译时传入变量`-Dxx`的形式来控制编译的内容 64 | 65 | intc 编译时打桩生成的可执行目标文件 66 | 67 | intl 链接时打桩生成的可执行目标文件 68 | 69 | intr 运行时打桩生成的可执行目标文件 70 | 71 | ### Command 72 | 1. 编译时打桩 73 | 74 | make命令 75 | 76 | 编译: make intc 77 | 运行: make runc 78 | 79 | 也可以使用gcc命令 80 | 81 | ```shell 82 | linux > gcc -Wall -Og -DCOMPILETIME -c mymalloc.c 83 | linux > gcc -Wall -Og -I. -o intc int.c mymalloc.o 84 | 85 | 运行 86 | ./intc 87 | ``` 88 | 89 | 90 | 91 | 2. 链接时打桩 92 | 93 | make命令: 94 | 95 | 编译: make intl 96 | 运行: make runl 97 | 98 | gcc命令: 99 | 100 | ```shell 101 | linux > gcc -Wall -Og -DLINKTIME -c mymalloc.c 102 | linux > gcc -Wall -Og-c int.c 103 | linux > gcc -Wall -Og -Wl,--wrap,malloc -Wl,--wrap,free -o intl int.o mymalloc.o 104 | 105 | 运行: 106 | ./intl 107 | ``` 108 | 109 | 110 | 111 | 3. 加载/运行时打桩 112 | 113 | make命令: 114 | 115 | 编译: make intr 116 | 运行: make runr 117 | 118 | gcc命令: 119 | 120 | ```shell 121 | linux > gcc -Wall -Og -DRUNTIME -shared -fpic -o mymalloc.so mymalloc.c -ldl 122 | linux > gcc -Wall -Og -g -o intr int.c 123 | 124 | 运行 125 | bash: 126 | (LD_PRELOAD="./mymalloc.so" ./intr) 127 | ``` 128 | 129 | 130 | 131 | 如需生成所有文件直接执行`linux > make all` 132 | 清理所有中间文件,执行`linux > make clean` 133 | 具体的GCC命令可以参考Makefile 134 | -------------------------------------------------------------------------------- /Linking/readme.md: -------------------------------------------------------------------------------- 1 | # 链接 2 | ## 目录说明 3 | linking目录为计算机体系结构课程**链接**部分的演示实验目录,包含simple_linking_example, symbol_resolution_exercise, duplicate_symbol_names_example, example_static_and_dynamic_linking,library_interpositioning_demo五个演示实验。 4 | 每个实验目录下包含所有用到的源代码,以及相应的readme介绍和Makefile文件(包含所有的编译、运行命令) 5 | 6 | ## 文件格式说明 7 | .c c源文件 8 | .h c头文件 9 | .s 编译器生成的汇编语言文件 10 | .o 编译器生成的目标文件 11 | 可执行目标文件 一般无后缀,且名字易理解 12 | .a 库文件/存档文件 13 | .so 共享目标文件/动态链接库 14 | .d 反汇编生成的文件 15 | 16 | ## Make工具介绍 17 | 18 | 这里为了方便后面对Makefile文件的阅读,简单介绍make工具及Makefile文件: 19 | Linux开发常使用make工具构建大型程序,在命令行中输入`make`,将调用make工具在当前目录下查找Makefile文件并执行,其中Makefile主要记录构建过程中的依赖以及构建命令。 20 | Makefile 的命令主要采用如下形式 21 | 22 | ``` 23 | target: pre 24 | command1 25 | command2 26 | ``` 27 | 目标target依赖于pre文件,生成target文件需要执行之后的command1,2... 28 | 如果使用`make target`,make工具将会去查找当前目录下是否有target, pre这些文件,以及这些文件是否是最新的,如果发现依赖的文件中有更新或修改,则会重新生成文件target。另外,Makefile也可以定义伪目标,即它不是一个真正的我们需要的文件,而可以是某种操作,比如清除所有中间生成的文件等,详细信息,自行搜索。 29 | 30 | ### Makefile 基本语法 31 | ```make 32 | CC = gcc # 定义将要使用的编译工具是gcc 33 | CFLAGS = -Wall -Og #定义将采用的gcc命令行标志有-Wall -Og 34 | 35 | prog: main.o sum.o #说明prog文件的生成依赖于main.o和sum.o,如果没有这两个文件,make会先去找是否有这两个目标的依赖,而在这里我们可以省略.o目标的定义如`main.o: main.c`,make工具会自动完成main.o的编译 36 | $(CC) $(CFLAGS) -o prog main.o sum.o #这就是一条gcc编译指令,只是使用了在之前定义的变量 CC和CFLAGS 37 | 38 | clean: 39 | rm -rf *.o *.s *.a *.so #定义了一个伪目标,这个伪目标没有任何依赖,需要执行rm操作 40 | 41 | ``` 42 | -------------------------------------------------------------------------------- /Linking/simple_linking_example/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -Wall -Og 3 | 4 | prog: main.o sum.o 5 | $(CC) $(CFLAGS) -o prog main.o sum.o 6 | $(CC) $(CFLAGS) -S main.c 7 | objdump -dx main.o > main.d 8 | objdump -dr main.o > main-relo.d 9 | objdump -dx -j .data main.o > maindata-relo.d 10 | readelf -s main.o > mainsym.d 11 | objdump -dx sum.o > sum.d 12 | objdump -dr sum.o > sum-relo.d 13 | objdump -dx -j .data sum.o > sumdata-relo.d 14 | objdump -dx prog > prog-exe.d 15 | objdump -dx -j .data prog > progdata-exe.d 16 | 17 | clean: 18 | rm -f prog *.o *~ *.d *.so *.a *.s 19 | 20 | 21 | -------------------------------------------------------------------------------- /Linking/simple_linking_example/main.c: -------------------------------------------------------------------------------- 1 | /* main.c */ 2 | /* $begin main */ 3 | int sum(int *a, int n); 4 | 5 | int array[2] = {1, 2}; 6 | 7 | int main() 8 | { 9 | int val = sum(array, 2); 10 | return val; 11 | } 12 | /* $end main */ 13 | -------------------------------------------------------------------------------- /Linking/simple_linking_example/readme.md: -------------------------------------------------------------------------------- 1 | # 演示实验介绍 2 | 本目录利用两个示例程序main.c和sum.c来展示静态链接符号解析和重定位的过程。 3 | sum.c对数组进行求和,并返回结果;main.c初始化数组array,调用sum函数,计算总和 4 | ```c 5 | int sum(int *a, int n) 6 | { 7 | int i, s = 0; 8 | 9 | for (i = 0; i < n; i++) { 10 | s += a[i]; 11 | } 12 | return s; 13 | } 14 | ``` 15 | 16 | 17 | 首先,我们将两个源文件编译、链接形成可执行目标文件prog 18 | ```shell 19 | linux > gcc -Wall -Og -o prog main.c sum.c 20 | 21 | ``` 22 | 其次,通过反汇编展示可执行目标文件prog的各个节,来说明ELF目标文件的基本格式,具体内容可查看ELF_Format文件 23 | 最后,利用反汇编main.o和prog对应的.text节等信息来说明链接器如何完成重定位步骤。 24 | ```shell 25 | linux > objdump -dr sum.o > sumdata-relo.d 26 | linux > objdump -dx -j .data prog > progdata-exe.d 27 | ``` 28 | ### Files 29 | main.c 30 | sum.c 两个示例程序 31 | ELF_Format 利用反汇编由main.c和sum.c 生成的可执行目标文件prog,展示ELF文件的格式 32 | MakeFile 33 | `linux > make clean; make prog`可以直接生成所有需要的文件,包括prog,以及对main.o、sum.o以及prog反汇编得到的文件 34 | 35 | ### 36 | 本目录下的所有命令使用Makefile管理 37 | 如需生成所有的文件,直接执行`linux > make prog` 38 | 清理所有中间文件,执行`linux > make clean` 39 | GCC 命令可参考Makefile 40 | Makefile 使用和介绍,可参见顶层目录readme 41 | -------------------------------------------------------------------------------- /Linking/simple_linking_example/sum.c: -------------------------------------------------------------------------------- 1 | /* sum.c */ 2 | /* $begin sum */ 3 | int sum(int *a, int n) 4 | { 5 | int i, s = 0; 6 | 7 | for (i = 0; i < n; i++) { 8 | s += a[i]; 9 | } 10 | return s; 11 | } 12 | /* $end sum */ 13 | -------------------------------------------------------------------------------- /Linking/symbol_resolution_exercise/Makefile: -------------------------------------------------------------------------------- 1 | # swap.c, m.c 2 | swap: swap.o m.o 3 | $(CC) $(CFLAGS) -c swap.c 4 | $(CC) $(CFLAGS) -c m.c 5 | objdump -t swap.o > swapsym.d 6 | objdump -t m.o > msym.d 7 | clean: 8 | rm -f swap *.o *~ *.d *.so *.a *.s 9 | 10 | -------------------------------------------------------------------------------- /Linking/symbol_resolution_exercise/m.c: -------------------------------------------------------------------------------- 1 | /* m.c */ 2 | /* $begin symprobmain */ 3 | void swap(); 4 | 5 | int buf[2] = {1, 2}; 6 | 7 | int main() 8 | { 9 | swap(); 10 | return 0; 11 | } 12 | /* $end symprobmain */ 13 | 14 | -------------------------------------------------------------------------------- /Linking/symbol_resolution_exercise/readme.md: -------------------------------------------------------------------------------- 1 | # 演示实验介绍 2 | 3 | 本目录为符号解析练习的验证演示(对应PPT 20页),请大家首先完成PPT上的练习(完成该表格)。然后可以将swap.c和m.c编译之后,反汇编查看它的符号表验证你的想法是否正确。 4 | 5 | 具体步骤: 6 | 7 | ```shell 8 | linux > gcc -Wall -Og -c swap.c 9 | linux > gcc -Wall -Og -c m.c 10 | linux > objdump -t swap.o > swapsym.d 11 | linux > objdump -t m.o > msym.d 12 | ``` 13 | 14 | ### shell instruction 15 | objdump 使用说明 16 | -d disassemble 反汇编所有可以反汇编的部分 17 | -x 所有头部信息 18 | -j 指定需要的节 19 | -t symboltable 符号表信息 20 | -T 和 -t 选项在于 -T 只能查看动态符号,如库导出的函数和引用其他库的函数,而 -t 可以查看所有的符号,包括数据段的符号 21 | -r relocation 重定位信息, 可以查看.o文件的重定位信息 22 | -h obj 输出目标文件的所有段概括() 23 | -j 输出指定段的信息 24 | -S 同时显示源文件(二进制)及反汇编信息 25 | 26 | 关于objdump的用法详情可查看objdump --help 27 | 28 | ### File 29 | swap.c 30 | m.c PPT上作为练习的示例程序 31 | Makefile 32 | 以下两个.d文件为包含符号表信息的swap.o及m.o反汇编文件: 33 | swap-symtab.d 34 | m-symtab.d 35 | 36 | ### Command 37 | 本目录下的命令使用Makefile管理 38 | 如需生成所有的文件,直接执行`linux > make swap` 39 | 清理所有中间文件,执行`linux > make clean` 40 | GCC命令其他可参考Makefile 41 | -------------------------------------------------------------------------------- /Linking/symbol_resolution_exercise/swap.c: -------------------------------------------------------------------------------- 1 | /* swap.c */ 2 | /* $begin swap */ 3 | extern int buf[]; 4 | 5 | int *bufp0 = &buf[0]; 6 | int *bufp1; 7 | 8 | void swap() 9 | { 10 | int temp; 11 | 12 | bufp1 = &buf[1]; 13 | temp = *bufp0; 14 | *bufp0 = *bufp1; 15 | *bufp1 = temp; 16 | } 17 | /* $end swap */ 18 | 19 | -------------------------------------------------------------------------------- /Linking/引导习题参考答案.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitsz-ids2021/Architecture_Course/a9c20e2107d41805ade34f3d559b307a24f3abca/Linking/引导习题参考答案.pdf -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/README.md: -------------------------------------------------------------------------------- 1 | # 程序性能优化 2 | ``` 3 | 本目录包含程序性能优化专题相关源程序 4 | ``` 5 | ### 1. 合并函数优化及其评测:benchmark; 6 | ### 2. 矩阵乘法示例:matrix_multiplication; -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/README.md: -------------------------------------------------------------------------------- 1 | # 合并函数优化及其测试 2 | 3 | ### 1. 运行脚本sh run.sh执行程序示例 4 | ### 2. 修改run.sh的-D参数可以更改测试内容 5 | ``` 6 | 测试数据类型: 7 | 整型 -DINT 8 | 长整型 -DLONG 9 | 单精度浮点型 -DFLOAT 10 | 双精度浮点型 -DDOUBLE 11 | 长双精度浮点型 -DEXTEND 12 | 字符型 -DCHAR 13 | 测试操作: 14 | 默认加法 15 | 乘法 -DPROD 16 | 除法 -DDIV 17 | ``` -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/benchmark.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "include/cpe.h" 5 | #include "combine.h" 6 | 7 | 8 | #define SHORT 0 9 | #if SHORT 10 | #define ASIZE 31 11 | #else 12 | #define ASIZE 973 13 | #endif 14 | 15 | /* Keep track of a number of different combining programs */ 16 | #define MAX_BENCHMARKS 100 17 | 18 | static struct { 19 | combiner cfunct; 20 | combiner checkfunct; 21 | char *description; 22 | double cpe; 23 | } benchmarks[MAX_BENCHMARKS]; 24 | 25 | static long benchmark_count = 0; 26 | 27 | static long current_benchmark = 0; 28 | 29 | static vec_ptr data; 30 | static data_t combine_result; 31 | 32 | /* Used to make sure code doesn't get optimized away */ 33 | volatile data_t sink; 34 | 35 | /* Log especially fast or slow cases */ 36 | combiner log_combiner_fun = NULL; 37 | double log_slow_cpe = 0.0; 38 | double log_fast_cpe = 1000.0; 39 | char *log_name = "benchmark-log.txt"; 40 | char *log_fast_name = "benchmark-log-fast.txt"; 41 | char *log_slow_name = "benchmark-log-slow.txt"; 42 | 43 | static void setup() 44 | { 45 | long i; 46 | data = new_vec(ASIZE); 47 | /* Initialize array */ 48 | for (i = 0; i < ASIZE; i++) 49 | #if 0 50 | /* This runs into overflow inefficiencies with FLOAT PROD */ 51 | set_vec_element(data, i, (data_t) (i+1)); 52 | #else 53 | set_vec_element(data, i, (data_t) (random() & 0x1) ? -1 : 1); 54 | #endif 55 | sink = (data_t) 0; 56 | } 57 | 58 | void run(long cnt) { 59 | set_vec_length(data, cnt); 60 | benchmarks[current_benchmark].cfunct(data, &combine_result); 61 | } 62 | 63 | /* Perform test of combination function */ 64 | static void run_test(long bench_index) { 65 | double cpe; 66 | char *description = benchmarks[bench_index].description; 67 | data_t good_result; 68 | FILE *logfile = NULL; 69 | current_benchmark = bench_index; 70 | setup(); 71 | 72 | if (benchmarks[bench_index].cfunct == log_combiner_fun) { 73 | logfile = fopen(log_name, "w"); 74 | if (!logfile) { 75 | fprintf(stderr, "Failed to open log file\n"); 76 | exit(1); 77 | } 78 | } 79 | cpe = find_cpe_full(run, ASIZE, 200, logfile, RAN_SAMPLE, 0.3, 2); 80 | if (logfile) { 81 | fclose(logfile); 82 | if (cpe <= log_fast_cpe) { 83 | if (rename(log_name, log_fast_name)) { 84 | fprintf(stderr, "Couldn't rename fast cpe file\n"); 85 | exit(1); 86 | } 87 | } 88 | 89 | if (cpe >= log_slow_cpe) { 90 | if (rename(log_name, log_slow_name)) { 91 | fprintf(stderr, "Couldn't rename slow cpe file\n"); 92 | exit(1); 93 | } 94 | } 95 | } 96 | benchmarks[bench_index].cfunct(data, &combine_result); 97 | benchmarks[bench_index].checkfunct(data, &good_result); 98 | if (combine_result != good_result) { 99 | printf("Function %s, Should be %ld, Got %ld\n", 100 | description, (long) good_result, (long) combine_result); 101 | } 102 | benchmarks[current_benchmark].cpe = cpe; 103 | /* print results */ 104 | /* Column Heading */ 105 | printf("%s %s %s:\n", DATA_NAME, OP_NAME, description); 106 | printf("%.2f cycles/element\n", cpe); 107 | } 108 | 109 | void add_combiner(combiner f, combiner fc, char *description) { 110 | benchmarks[benchmark_count].cfunct = f; 111 | benchmarks[benchmark_count].checkfunct = fc; 112 | benchmarks[benchmark_count].description = description; 113 | benchmark_count++; 114 | } 115 | 116 | void log_combiner(combiner f, double fast_cpe, double slow_cpe) { 117 | log_combiner_fun = f; 118 | log_fast_cpe = fast_cpe; 119 | log_slow_cpe = slow_cpe; 120 | } 121 | 122 | int main() 123 | { 124 | long i; 125 | register_combiners(); 126 | for (i = 0; i < benchmark_count; i++) { 127 | run_test(i); 128 | } 129 | printf("\n"); 130 | return 0; 131 | } 132 | 133 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/combine.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "combine.h" 3 | 4 | /* Combining functions */ 5 | 6 | char combine1_descr[] = "combine1: Maximum use of data abstraction"; 7 | /* $begin combine1 */ 8 | /* Implementation with maximum use of data abstraction */ 9 | void combine1(vec_ptr v, data_t *dest) 10 | { 11 | long i; 12 | 13 | *dest = IDENT; 14 | for (i = 0; i < vec_length(v); i++) { 15 | data_t val; 16 | get_vec_element(v, i, &val); 17 | /* $begin combineline */ 18 | *dest = *dest OP val; 19 | /* $end combineline */ 20 | } 21 | } 22 | /* $end combine1 */ 23 | 24 | char combine2_descr[] = "combine2: Take vec_length() out of loop"; 25 | /* $begin combine2 */ 26 | /* Move call to vec_length out of loop */ 27 | void combine2(vec_ptr v, data_t *dest) 28 | { 29 | long i; 30 | long length = vec_length(v); 31 | 32 | *dest = IDENT; 33 | for (i = 0; i < length; i++) { 34 | data_t val; 35 | get_vec_element(v, i, &val); 36 | *dest = *dest OP val; 37 | } 38 | } 39 | /* $end combine2 */ 40 | 41 | char combine3_descr[] = "combine3: Array reference to vector data"; 42 | /* $begin combine3 */ 43 | /* Direct access to vector data */ 44 | void combine3(vec_ptr v, data_t *dest) 45 | { 46 | long i; 47 | long length = vec_length(v); 48 | data_t *data = get_vec_start(v); 49 | 50 | *dest = IDENT; 51 | for (i = 0; i < length; i++) { 52 | *dest = *dest OP data[i]; 53 | } 54 | } 55 | /* $end combine3 */ 56 | 57 | char combine4_descr[] = "combine4: Array reference, accumulate in temporary"; 58 | /* $begin combine4 */ 59 | /* Accumulate result in local variable */ 60 | void combine4(vec_ptr v, data_t *dest) 61 | { 62 | long i; 63 | long length = vec_length(v); 64 | data_t *data = get_vec_start(v); 65 | data_t x = IDENT; 66 | 67 | for (i = 0; i < length; i++) { 68 | x = x OP data[i]; 69 | } 70 | *dest = x; 71 | } 72 | /* $end combine4 */ 73 | 74 | char unroll2a_combin_descr[] = "unroll2a_combine: Array code, unrolled by 2"; 75 | /* $begin combine5 */ 76 | /* 2 x 1 loop unrolling */ 77 | void unroll2a_combin(vec_ptr v, data_t *dest) 78 | { 79 | 80 | long length = vec_length(v); 81 | long limit = length-1; 82 | data_t *data = get_vec_start(v); 83 | data_t x = IDENT; 84 | long i; 85 | /* Combine 2 elements at a time */ 86 | for (i = 0; i < limit; i+=2) { 87 | /* $begin combine5-update */ 88 | x = (x OP data[i]) OP data[i+1]; 89 | /* $end combine5-update */ 90 | } 91 | 92 | /* Finish any remaining elements */ 93 | for (; i < length; i++) { 94 | x = x OP data[i]; 95 | } 96 | *dest = x; 97 | } 98 | char unroll2aa_combine_descr[] = "unroll2aa_combine: Array code, unrolled by 2"; 99 | /* $begin combine5a */ 100 | /* 2 x 1 loop unrolling */ 101 | void unroll2aa_combine(vec_ptr v, data_t *dest) 102 | { 103 | 104 | long length = vec_length(v); 105 | long limit = length-1; 106 | data_t *data = get_vec_start(v); 107 | data_t x = IDENT; 108 | long i; 109 | /* Combine 2 elements at a time */ 110 | for (i = 0; i < limit; i+=2) { 111 | /* $begin combine5a-update */ 112 | x = x OP (data[i] OP data[i+1]); 113 | /* $end combine5a-update */ 114 | } 115 | 116 | /* Finish any remaining elements */ 117 | for (; i < length; i++) { 118 | x = x OP data[i]; 119 | } 120 | *dest = x; 121 | } 122 | /* $end combine5a */ 123 | 124 | char unroll2x2_combine_descr[] = "unroll2x2_combine: Array code, unrolled by 2, Superscalar x2"; 125 | /* $begin combine6 */ 126 | /* 2 x 2 loop unrolling */ 127 | void unroll2x2_combine(vec_ptr v, data_t *dest) 128 | { 129 | long length = vec_length(v); 130 | long limit = length-1; 131 | data_t *data = get_vec_start(v); 132 | data_t x0 = IDENT; 133 | data_t x1 = IDENT; 134 | long i; 135 | /* Combine 2 elements at a time */ 136 | for (i = 0; i < limit; i+=2) { 137 | x0 = x0 OP data[i]; 138 | x1 = x1 OP data[i+1]; 139 | } 140 | /* Finish any remaining elements */ 141 | for (; i < length; i++) { 142 | x0 = x0 OP data[i]; 143 | } 144 | *dest = x0 OP x1; 145 | } 146 | /* $end combine6 */ 147 | 148 | void register_combiners(void) 149 | { 150 | add_combiner(combine1, combine1, combine1_descr); 151 | add_combiner(combine2, combine1, combine2_descr); 152 | add_combiner(combine3, combine1, combine3_descr); 153 | add_combiner(combine4, combine1, combine4_descr); 154 | add_combiner(unroll2a_combin, combine1, unroll2a_combin_descr); 155 | add_combiner(unroll2aa_combine, combine1, unroll2aa_combine_descr); 156 | add_combiner(unroll2x2_combine, combine1, unroll2x2_combine_descr); 157 | } 158 | 159 | 160 | 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/combine.h: -------------------------------------------------------------------------------- 1 | #ifdef FLOAT 2 | typedef float data_t; 3 | #define DATA_NAME "Float" 4 | #endif 5 | 6 | #ifdef DOUBLE 7 | typedef double data_t; 8 | #define DATA_NAME "Double" 9 | #endif 10 | 11 | 12 | #ifdef EXTEND 13 | typedef long double data_t; 14 | #define DATA_NAME "Extended" 15 | #endif 16 | 17 | #ifdef INT 18 | typedef int data_t; 19 | #define DATA_NAME "Integer" 20 | #endif 21 | 22 | #ifdef LONG 23 | /* $begin typedefint */ 24 | typedef long data_t; 25 | /* $end typedefint */ 26 | #define DATA_NAME "Long" 27 | #endif 28 | 29 | #ifdef CHAR 30 | typedef char data_t; 31 | #define DATA_NAME "Char" 32 | #endif 33 | 34 | #ifdef PROD 35 | /* $begin operprod */ 36 | #define IDENT 1 37 | #define OP * 38 | /* $end operprod */ 39 | #define OP_NAME "Product" 40 | #else 41 | #ifdef DIV 42 | #define OP / 43 | #define IDENT 1 44 | #define OP_NAME "Divide" 45 | #else 46 | /* $begin operplus */ 47 | #define IDENT 0 48 | #define OP + 49 | /* $end operplus */ 50 | #define OP_NAME "Sum" 51 | #endif /* DIV */ 52 | #endif /* PROD */ 53 | 54 | #include "vec.h" 55 | 56 | /* Declaration of a combining routine */ 57 | /* Source vector, destination location */ 58 | typedef void (*combiner)(vec_ptr, data_t *); 59 | 60 | /* Add combining routine to list of programs to measure */ 61 | void add_combiner(combiner f, combiner fc, char *description); 62 | 63 | /* Flag combiner for logging, giving bounds for fast and slow cases */ 64 | /* Can only log one combiner at a time */ 65 | void log_combiner(combiner f, double fast_cpe, double slow_cpe); 66 | 67 | /* Called by main to register the set of transposition routines to benchmark */ 68 | void register_combiners(void); 69 | 70 | 71 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/include/clock.h: -------------------------------------------------------------------------------- 1 | /* Routines for using cycle counter */ 2 | 3 | /* Start the counter */ 4 | void start_counter(); 5 | 6 | /* Get # cycles since counter started. Returns 1e20 if detect timing anomaly */ 7 | double get_counter(); 8 | 9 | /* Determine clock rate of processor (using a default sleeptime) */ 10 | double mhz(int verbose); 11 | 12 | /* Determine clock rate of processor, having more control over accuracy */ 13 | double mhz_full(int verbose, int sleeptime); 14 | 15 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/include/cpe.h: -------------------------------------------------------------------------------- 1 | /* Compute CPE for function */ 2 | 3 | /* Compute for function that is linear in some parameter cnt */ 4 | typedef void (*elem_fun_t)(long int); 5 | 6 | /* Different ways of finding samples 7 | UNI_SAMPLE: samples uniformly spaced between bias*maxcnt and maxcnt 8 | RAN_SAMPLE: samples randomly selected between bias*maxcnt and maxcnt 9 | */ 10 | 11 | typedef enum {UNI_SAMPLE, RAN_SAMPLE} 12 | sample_t; 13 | 14 | /* Find cpe for function f, which allows cnt up to maxcnt. 15 | Uses default parameters 16 | */ 17 | double find_cpe(elem_fun_t f, long int maxcnt); 18 | 19 | /* Find cpe for function f, which allows cnt up to maxcnt, using 20 | specified number of sample points. 21 | If data_file, then print data so that can plot points with Excel 22 | smethod determines method for generating samples 23 | */ 24 | double find_cpe_full(elem_fun_t f, long int maxcnt, long int samples, FILE *data_file, 25 | sample_t smethod, double bias, long int verbose); 26 | 27 | /* Find number of cycles taken by function. 28 | Do this by running number of trials until best two within TOL (2%) of 29 | each other 30 | */ 31 | double measure_function(elem_fun_t f, long int cnt); 32 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/include/fcyc.h: -------------------------------------------------------------------------------- 1 | 2 | /* Fcyc measures the speed of any "test function." Such a function 3 | is passed a list of integer parameters, which it may interpret 4 | in any way it chooses. 5 | */ 6 | 7 | typedef void (*test_funct)(long int *); 8 | 9 | /* Compute number of cycles used by function f on given set of parameters */ 10 | double fcyc(test_funct f, long int* params); 11 | 12 | /***********************************************************/ 13 | /* Set the various parameters used by measurement routines */ 14 | 15 | 16 | /* When set, will run code to clear cache before each measurement 17 | Default = 0 18 | */ 19 | void set_fcyc_clear_cache(long int clear); 20 | 21 | /* Set size of cache to use when clearing cache 22 | Default = 1<<19 (512KB) 23 | */ 24 | void set_fcyc_cache_size(long int bytes); 25 | 26 | /* Set size of cache block 27 | Default = 32 28 | */ 29 | void set_fcyc_cache_block(long int bytes); 30 | 31 | /* When set, will attempt to compensate for timer interrupt overhead 32 | Default = 0 33 | */ 34 | void set_fcyc_compensate(long int compensate); 35 | 36 | /* Value of K in K-best 37 | Default = 3 38 | */ 39 | void set_fcyc_k(long int k); 40 | 41 | /* Maximum number of samples attempting to find K-best within some tolerance. 42 | When exceeded, just return best sample found. 43 | Default = 20 44 | */ 45 | void set_fcyc_maxsamples(long int maxsamples); 46 | 47 | /* Tolerance required for K-best 48 | Default = 0.01 49 | */ 50 | void set_fcyc_epsilon(double epsilon); 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/include/lsquare.h: -------------------------------------------------------------------------------- 1 | /* Compute least squares fit of set of data points */ 2 | 3 | /* Fit is of form y = mx + b. m is slope, b is intercept */ 4 | double ls_slope(double *xval, double *yval, int cnt); 5 | double ls_intercept(double *xval, double *yval, int cnt); 6 | 7 | typedef enum {LS_AVG, LS_MAX} ls_err_t; 8 | 9 | /* Determine error (either absolute or average) of least squares fit */ 10 | double ls_error(double *xval, double *yval, int cnt, ls_err_t etype); 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/run.sh: -------------------------------------------------------------------------------- 1 | gcc -o benchmark benchmark.c vec.c combine.c src/cpe.c src/lsquare.c src/fcyc.c src/clock.c -DINT 2 | ./benchmark 3 | gcc -o benchmark benchmark.c vec.c combine.c src/cpe.c src/lsquare.c src/fcyc.c src/clock.c -DINT -DPROD 4 | ./benchmark 5 | gcc -o benchmark benchmark.c vec.c combine.c src/cpe.c src/lsquare.c src/fcyc.c src/clock.c -DFLOAT 6 | ./benchmark 7 | gcc -o benchmark benchmark.c vec.c combine.c src/cpe.c src/lsquare.c src/fcyc.c src/clock.c -DFLOAT -DPROD 8 | ./benchmark 9 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/src/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -O1 -Wall 3 | #CFLAGS = -O1 -Wall -DUSE_TSC 4 | INC = ../include 5 | LIB = ../lib 6 | 7 | F64 =-m64 8 | 9 | F32 =-m32 10 | 11 | OBJS32 = clock32.o fcyc32.o csapp32.o lsquare32.o cpe32.o 12 | OBJS64 = clock64.o fcyc64.o csapp64.o lsquare64.o cpe64.o 13 | 14 | OBJS = $(OBJS64) 15 | 16 | all: $(LIB)/libcsapp64.a 17 | #all: $(LIB)/libcsapp32.a $(LIB)/libcsapp64.a 18 | 19 | ### 32 Bit Code 20 | # This file must be compiled with gcc due to embedded assembly 21 | clock32.o: clock.c $(INC)/clock.h 22 | gcc $(CFLAGS) $(F32) -c -o clock32.o clock.c -I$(INC) 23 | 24 | fcyc32.o: fcyc.c $(INC)/fcyc.h $(INC)/clock.h 25 | $(CC) $(CFLAGS) $(F32) -c -o fcyc32.o fcyc.c -I$(INC) 26 | 27 | csapp32.o: $(INC)/csapp.h csapp.c 28 | $(CC) $(CFLAGS) $(F32) -c -o csapp32.o csapp.c -I$(INC) 29 | 30 | lsquare32.o: $(INC)/lsquare.h lsquare.c 31 | $(CC) $(CFLAGS) $(F32) -c -o lsquare32.o lsquare.c -I$(INC) 32 | 33 | cpe32.o: $(INC)/clock.h $(INC)/clock.h $(INC)/cpe.h cpe.c 34 | $(CC) $(CFLAGS) $(F32) -c -o cpe32.o cpe.c -I$(INC) 35 | 36 | $(LIB)/libcsapp32.a: $(OBJS32) 37 | ar rcs $(LIB)/libcsapp32.a $(OBJS32) 38 | 39 | ### 64 Bit Code 40 | # This file must be compiled with gcc due to embedded assembly 41 | clock64.o: clock.c $(INC)/clock.h 42 | gcc $(CFLAGS) $(F64) -c -o clock64.o clock.c -I$(INC) 43 | 44 | fcyc64.o: fcyc.c $(INC)/fcyc.h $(INC)/clock.h 45 | $(CC) $(CFLAGS) $(F64) -c -o fcyc64.o fcyc.c -I$(INC) 46 | 47 | csapp64.o: $(INC)/csapp.h csapp.c 48 | $(CC) $(CFLAGS) $(F64) -c -o csapp64.o csapp.c -I$(INC) 49 | 50 | lsquare64.o: $(INC)/lsquare.h lsquare.c 51 | $(CC) $(CFLAGS) $(F64) -c -o lsquare64.o lsquare.c -I$(INC) 52 | 53 | cpe64.o: $(INC)/clock.h $(INC)/clock.h $(INC)/cpe.h cpe.c 54 | $(CC) $(CFLAGS) $(F64) -c -o cpe64.o cpe.c -I$(INC) 55 | 56 | $(LIB)/libcsapp64.a: $(OBJS64) 57 | ar rcs $(LIB)/libcsapp64.a $(OBJS64) 58 | 59 | 60 | # 61 | # Sparc cycle counter examples (for future reference) 62 | # 63 | #sparc_tick.o: sparc_tick.s 64 | # as -xarch=v8plus sparc_tick.s 65 | # 66 | # This tests the get_tick routine in sparc_tick.s 67 | #sparc_tick_driver: sparc_tick_driver.c sparc_tick.o 68 | # $(CC) $(CFLAGS) -o sparc_tick_driver sparc_tick_driver.c sparc_tick.o 69 | 70 | 71 | clean: 72 | rm -f sparc_tick_driver *.o *~ 73 | 74 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/src/clock.c: -------------------------------------------------------------------------------- 1 | /* clock.c 2 | * Retrofitted to use thread-specific timers 3 | * and to get clock information from /proc/cpuinfo 4 | * (C) R. E. Bryant, 2010 5 | * 6 | */ 7 | 8 | /* When this constant is not defined, uses time stamp counter */ 9 | #define USE_POSIX 0 10 | 11 | /* Choice to use cpu_gettime call or Intel time stamp counter directly */ 12 | 13 | #include 14 | #include 15 | #include 16 | #ifdef USE_POSIX 17 | #include 18 | #endif 19 | #include "../include/clock.h" 20 | 21 | int gverbose = 1; 22 | 23 | /* Keep track of clock speed */ 24 | double cpu_ghz = 0.0; 25 | 26 | /* Get megahertz from /etc/proc */ 27 | #define MAXBUF 512 28 | 29 | 30 | double core_mhz(int verbose) { 31 | static char buf[MAXBUF]; 32 | FILE *fp = fopen("/proc/cpuinfo", "r"); 33 | cpu_ghz = 0.0; 34 | 35 | if (!fp) { 36 | fprintf(stderr, "Can't open /proc/cpuinfo to get clock information\n"); 37 | cpu_ghz = 1.0; 38 | return cpu_ghz * 1000.0; 39 | } 40 | while (fgets(buf, MAXBUF, fp)) { 41 | if (strstr(buf, "cpu MHz")) { 42 | double cpu_mhz = 0.0; 43 | sscanf(buf, "cpu MHz\t: %lf", &cpu_mhz); 44 | cpu_ghz = cpu_mhz / 1000.0; 45 | break; 46 | } 47 | } 48 | fclose(fp); 49 | if (cpu_ghz == 0.0) { 50 | fprintf(stderr, "Can't open /proc/cpuinfo to get clock information\n"); 51 | cpu_ghz = 1.0; 52 | return cpu_ghz * 1000.0; 53 | } 54 | if (verbose) { 55 | printf("Processor Clock Rate ~= %.4f GHz (extracted from file)\n", cpu_ghz); 56 | } 57 | return cpu_ghz * 1000; 58 | } 59 | 60 | double mhz(int verbose) { 61 | double val = core_mhz(verbose); 62 | return val; 63 | } 64 | 65 | #ifdef USE_POSIX 66 | /* Simulate counters by using nanosecond timers and then converting to clock cycles */ 67 | struct timespec last_time; 68 | 69 | /* Use thread clock */ 70 | #define CLKT CLOCK_THREAD_CPUTIME_ID 71 | 72 | void start_counter() 73 | { 74 | if (cpu_ghz == 0.0) 75 | mhz(gverbose); 76 | if (clock_gettime(CLKT, &last_time) != 0) { 77 | fprintf(stderr, "Couldn't get time\n"); 78 | exit(1); 79 | } 80 | } 81 | 82 | double get_counter() 83 | { 84 | struct timespec new_time; 85 | double delta_nsecs = 0.0; 86 | if (clock_gettime(CLKT, &new_time) != 0) { 87 | fprintf(stderr, "Couldn't get time\n"); 88 | exit(1); 89 | } 90 | delta_nsecs = 1e9 * (new_time.tv_sec - last_time.tv_sec) + (new_time.tv_nsec - last_time.tv_nsec); 91 | return delta_nsecs * cpu_ghz; 92 | } 93 | 94 | #else /* !USE_POSIX */ 95 | 96 | /* Use x86 cycle counter */ 97 | 98 | /* Initialize the cycle counter */ 99 | static unsigned cyc_hi = 0; 100 | static unsigned cyc_lo = 0; 101 | 102 | /* Set *hi and *lo to the high and low order bits of the cycle counter. 103 | Implementation requires assembly code to use the rdtsc instruction. */ 104 | void access_counter(unsigned *hi, unsigned *lo) 105 | { 106 | asm("rdtsc; movl %%edx,%0; movl %%eax,%1" /* Read cycle counter */ 107 | : "=r" (*hi), "=r" (*lo) /* and move results to */ 108 | : /* No input */ /* the two outputs */ 109 | : "%edx", "%eax"); 110 | } 111 | 112 | 113 | /* Record the current value of the cycle counter. */ 114 | void start_counter() 115 | { 116 | access_counter(&cyc_hi, &cyc_lo); 117 | } 118 | 119 | /* Return the number of cycles since the last call to start_counter. */ 120 | double get_counter() 121 | { 122 | unsigned ncyc_hi, ncyc_lo; 123 | unsigned hi, lo, borrow; 124 | double result; 125 | 126 | /* Get cycle counter */ 127 | access_counter(&ncyc_hi, &ncyc_lo); 128 | 129 | /* Do double precision subtraction */ 130 | lo = ncyc_lo - cyc_lo; 131 | borrow = cyc_lo > ncyc_lo; 132 | hi = ncyc_hi - cyc_hi - borrow; 133 | result = (double) hi * (1 << 30) * 4 + lo; 134 | return result; 135 | } 136 | #endif /* USE_POSIX */ 137 | 138 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/src/cpe.c: -------------------------------------------------------------------------------- 1 | /* Compute CPE for function */ 2 | #include 3 | #include 4 | #include "../include/fcyc.h" 5 | #include "../include/cpe.h" 6 | #include "../include/lsquare.h" 7 | #include "../include/clock.h" 8 | 9 | 10 | /* Find number of cycles taken by function. 11 | Do this by running number of trials until best two within TOL of 12 | each other 13 | */ 14 | double measure_function(elem_fun_t f, long int cnt) 15 | { 16 | /* Need to fudge fact that fcyc wants a function taking an 17 | long int *, while our function takes an long int */ 18 | test_funct tf = (test_funct) f; 19 | return fcyc(tf, (long int *) (long int) cnt); 20 | } 21 | 22 | #define MAXCNT 100 23 | 24 | #define LIM (1<<30) 25 | 26 | /* LCM of unrolling degree */ 27 | #ifdef USE_UNI 28 | #define UNROLL 32 29 | #else /* USE_UNI */ 30 | #define UNROLL 1 31 | #endif 32 | 33 | static long int get_cnt(long int index, long int samples, 34 | long int maxcnt, sample_t smethod, double bias) 35 | { 36 | long int mincnt = (long int) (bias*maxcnt); 37 | double weight; 38 | long int val; 39 | switch (smethod) { 40 | case UNI_SAMPLE: 41 | weight = (double) index/(samples - 1); 42 | break; 43 | case RAN_SAMPLE: 44 | weight = (double) (random() % LIM) / (double) (LIM-1); 45 | break; 46 | default: 47 | fprintf(stderr, "Undefined sampling method %d\n", smethod); 48 | exit(1); 49 | } 50 | val = mincnt + weight*(maxcnt-mincnt); 51 | return UNROLL * (val/UNROLL); 52 | } 53 | 54 | #define SEED 31415 55 | 56 | /* Find cpe for function f, which allows cnt up to maxcnt, using 57 | specified number of sample points. 58 | If data_file, then print data so that can plot points with Excel 59 | smethod determines method for generating samples 60 | */ 61 | double find_cpe_full(elem_fun_t f, long int maxcnt, long int samples, FILE *data_file, 62 | sample_t smethod, double bias, long int verbose) 63 | { 64 | long int i; 65 | long int cnt; 66 | double cpe; 67 | double overhead = 0; 68 | double *cnt_val = calloc(samples, sizeof(double)); 69 | double *cycle_val = calloc(samples, sizeof(double)); 70 | /* Do the samples */ 71 | 72 | srandom(SEED); 73 | for (i = 0; i < samples; i++) { 74 | cnt = get_cnt(i, samples, maxcnt, smethod, bias); 75 | cnt_val[i] = cnt; 76 | cycle_val[i] = measure_function(f, cnt); 77 | if (cycle_val[i] < 1.0) { 78 | fprintf(stderr, "Got %.2f cycles for count %ld\n", cycle_val[i], cnt); 79 | } 80 | } 81 | /* Fit data */ 82 | cpe = ls_slope(cnt_val, cycle_val, samples); 83 | if (data_file) 84 | overhead = ls_intercept(cnt_val, cycle_val, samples); 85 | if (data_file && verbose > 1) { 86 | /* Print x values */ 87 | fprintf(data_file, "Cnt\t0"); 88 | for (i = 0; i < samples; i++) 89 | fprintf(data_file, "\t%.0f",cnt_val[i]); 90 | fprintf(data_file, "\n"); 91 | /* Print y values */ 92 | fprintf(data_file, "Cycs.\t"); 93 | for (i = 0; i < samples; i++) 94 | fprintf(data_file, "\t%.2f", cycle_val[i]); 95 | fprintf(data_file, "\n"); 96 | /* Print ax*b values */ 97 | fprintf(data_file, "Interp.\t%.2f", overhead); 98 | for (i = 0; i < samples; i++) 99 | fprintf(data_file, "\t%.2f", cpe*cnt_val[i]+overhead); 100 | fprintf(data_file, "\n"); 101 | } 102 | if (data_file && verbose) { 103 | /* Print results */ 104 | fprintf(data_file, "cpe\t%.2f\tovhd\t%.2f\tavgerr\t\%.3f\tmaxerr\t\%.3f\n", 105 | cpe, overhead, 106 | ls_error(cnt_val, cycle_val, samples, LS_AVG), 107 | ls_error(cnt_val, cycle_val, samples, LS_MAX)); 108 | } 109 | free(cnt_val); 110 | free(cycle_val); 111 | return cpe; 112 | } 113 | 114 | /* Use default parameters */ 115 | double find_cpe(elem_fun_t f, long int maxcnt) 116 | { 117 | return find_cpe_full(f, maxcnt, 100, stdout, RAN_SAMPLE, 0.3, 0); 118 | } 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/src/fcyc.c: -------------------------------------------------------------------------------- 1 | /* Compute time used by function f */ 2 | #include 3 | #include 4 | #include 5 | 6 | #include "../include/clock.h" 7 | #include "../include/fcyc.h" 8 | 9 | #define K 3 10 | #define MAXSAMPLES 20 11 | #define EPSILON 0.01 12 | #define COMPENSATE 0 13 | #define CLEAR_CACHE 0 14 | #define CACHE_BYTES (1<<19) 15 | #define CACHE_BLOCK 32 16 | 17 | static long int kbest = K; 18 | static long int compensate = COMPENSATE; 19 | static long int clear_cache = CLEAR_CACHE; 20 | static long int maxsamples = MAXSAMPLES; 21 | static double epsilon = EPSILON; 22 | static long int cache_bytes = CACHE_BYTES; 23 | static long int cache_block = CACHE_BLOCK; 24 | 25 | static long int *cache_buf = NULL; 26 | 27 | static double *values = NULL; 28 | static long int samplecount = 0; 29 | 30 | #define KEEP_VALS 0 31 | #define KEEP_SAMPLES 0 32 | 33 | #if KEEP_SAMPLES 34 | static double *samples = NULL; 35 | #endif 36 | 37 | /* Start new sampling process */ 38 | static void init_sampler() 39 | { 40 | if (values) 41 | free(values); 42 | values = calloc(kbest, sizeof(double)); 43 | #if KEEP_SAMPLES 44 | if (samples) 45 | free(samples); 46 | /* Allocate extra for wraparound analysis */ 47 | samples = calloc(maxsamples+kbest, sizeof(double)); 48 | #endif 49 | samplecount = 0; 50 | } 51 | 52 | /* Add new sample. */ 53 | static void add_sample(double val) 54 | { 55 | long int pos = 0; 56 | if (samplecount < kbest) { 57 | pos = samplecount; 58 | values[pos] = val; 59 | } else if (val < values[kbest-1]) { 60 | pos = kbest-1; 61 | values[pos] = val; 62 | } 63 | #if KEEP_SAMPLES 64 | samples[samplecount] = val; 65 | #endif 66 | samplecount++; 67 | /* Insertion sort */ 68 | while (pos > 0 && values[pos-1] > values[pos]) { 69 | double temp = values[pos-1]; 70 | values[pos-1] = values[pos]; 71 | values[pos] = temp; 72 | pos--; 73 | } 74 | } 75 | 76 | /* Have kbest minimum measurements converged within epsilon? */ 77 | static long int has_converged() 78 | { 79 | return 80 | (samplecount >= kbest) && 81 | ((1 + epsilon)*values[0] >= values[kbest-1]); 82 | } 83 | 84 | /* Code to clear cache */ 85 | 86 | 87 | static volatile long int sink = 0; 88 | 89 | static void clear() 90 | { 91 | long int x = sink; 92 | long int *cptr, *cend; 93 | long int incr = cache_block/sizeof(long int); 94 | if (!cache_buf) { 95 | cache_buf = malloc(cache_bytes); 96 | if (!cache_buf) { 97 | fprintf(stderr, "Fatal error. Malloc returned null when trying to clear cache\n"); 98 | exit(1); 99 | } 100 | } 101 | cptr = (long int *) cache_buf; 102 | cend = cptr + cache_bytes/sizeof(long int); 103 | while (cptr < cend) { 104 | x += *cptr; 105 | cptr += incr; 106 | } 107 | sink = x; 108 | } 109 | 110 | double fcyc(test_funct f, long int *params) 111 | { 112 | double result; 113 | init_sampler(); 114 | if (compensate) { 115 | do { 116 | double cyc; 117 | if (clear_cache) 118 | clear(); 119 | start_counter(); 120 | f(params); 121 | cyc = get_counter(); 122 | if (cyc > 0.0) 123 | add_sample(cyc); 124 | } while (!has_converged() && samplecount < maxsamples); 125 | } else { 126 | do { 127 | double cyc; 128 | if (clear_cache) 129 | clear(); 130 | start_counter(); 131 | f(params); 132 | cyc = get_counter(); 133 | if (cyc > 0.0) 134 | add_sample(cyc); 135 | } while (!has_converged() && samplecount < maxsamples); 136 | } 137 | #ifdef DEBUG 138 | { 139 | long int i; 140 | printf(" %ld smallest values: [", kbest); 141 | for (i = 0; i < kbest; i++) 142 | printf("%.0f%s", values[i], i==kbest-1 ? "]\n" : ", "); 143 | } 144 | #endif 145 | result = values[0]; 146 | #if !KEEP_VALS 147 | free(values); 148 | values = NULL; 149 | #endif 150 | return result; 151 | } 152 | 153 | 154 | /***********************************************************/ 155 | /* Set the various parameters used by measurement routines */ 156 | 157 | 158 | /* When set, will run code to clear cache before each measurement 159 | Default = 0 160 | */ 161 | void set_fcyc_clear_cache(long int clear) 162 | { 163 | clear_cache = clear; 164 | } 165 | 166 | /* Set size of cache to use when clearing cache 167 | Default = 1<<19 (512KB) 168 | */ 169 | void set_fcyc_cache_sizee(long int bytes) 170 | { 171 | if (bytes != cache_bytes) { 172 | cache_bytes = bytes; 173 | if (cache_buf) { 174 | free(cache_buf); 175 | cache_buf = NULL; 176 | } 177 | } 178 | } 179 | 180 | /* Set size of cache block 181 | Default = 32 182 | */ 183 | void set_fcyc_cache_block(long int bytes) { 184 | cache_block = bytes; 185 | } 186 | 187 | 188 | /* When set, will attempt to compensate for timer interrupt overhead 189 | Default = 0 190 | */ 191 | void set_fcyc_compensate(long int compensate_arg) 192 | { 193 | compensate = compensate_arg; 194 | } 195 | 196 | /* Value of K in K-best 197 | Default = 3 198 | */ 199 | void set_fcyc_k(long int k) 200 | { 201 | kbest = k; 202 | } 203 | 204 | /* Maximum number of samples attempting to find K-best within some tolerance. 205 | When exceeded, just return best sample found. 206 | Default = 20 207 | */ 208 | void set_fcyc_maxsamples(long int maxsamples_arg) 209 | { 210 | maxsamples = maxsamples_arg; 211 | } 212 | 213 | /* Tolerance required for K-best 214 | Default = 0.01 215 | */ 216 | void set_fcyc_epsilon(double epsilon_arg) 217 | { 218 | epsilon = epsilon_arg; 219 | } 220 | 221 | 222 | 223 | 224 | 225 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/src/lsquare.c: -------------------------------------------------------------------------------- 1 | /* Compute least squares fit of set of data points */ 2 | #include 3 | #include 4 | #include "../include/lsquare.h" 5 | 6 | typedef struct { 7 | double sum_x; 8 | double sum_y; 9 | double sum_xx; 10 | double sum_xy; 11 | } ls_stat_t; 12 | 13 | /* Accumulate various sums of the data */ 14 | static void ls_stats(double *xval, double *yval, int cnt, ls_stat_t *statp) 15 | { 16 | int i; 17 | statp->sum_x = 0.0; 18 | statp->sum_y = 0.0; 19 | statp->sum_xx = 0.0; 20 | statp->sum_xy = 0.0; 21 | for (i = 0; i < cnt; i++) { 22 | double x = xval[i]; 23 | double y = yval[i]; 24 | statp->sum_x += x; 25 | statp->sum_y += y; 26 | statp->sum_xx += x * x; 27 | statp->sum_xy += x * y; 28 | } 29 | } 30 | 31 | double ls_slope(double *xval, double *yval, int cnt) 32 | { 33 | double slope; 34 | ls_stat_t stat; 35 | ls_stats(xval, yval, cnt, &stat); 36 | slope = (cnt * stat.sum_xy - stat.sum_x * stat.sum_y)/ 37 | (cnt * stat.sum_xx - stat.sum_x*stat.sum_x); 38 | return slope; 39 | } 40 | 41 | double ls_intercept(double *xval, double *yval, int cnt) 42 | { 43 | double intercept; 44 | ls_stat_t stat; 45 | ls_stats(xval, yval, cnt, &stat); 46 | intercept = (stat.sum_xx * stat.sum_y - stat.sum_xy * stat.sum_x)/ 47 | (cnt * stat.sum_xx - stat.sum_x*stat.sum_x); 48 | return intercept; 49 | } 50 | 51 | static double rel_err(double x, double y, double slope, double intercept) 52 | { 53 | double pred_y = slope*x + intercept; 54 | double offset = y - pred_y; 55 | if (offset < 0) 56 | offset = -offset; 57 | if (pred_y == 0) 58 | return offset; 59 | return offset/pred_y; 60 | } 61 | 62 | double ls_error(double *xval, double *yval, int cnt, ls_err_t etype) 63 | { 64 | double slope; 65 | double intercept; 66 | ls_stat_t stat; 67 | int i; 68 | double num, denom; 69 | ls_stats(xval, yval, cnt, &stat); 70 | slope = (cnt * stat.sum_xy - stat.sum_x * stat.sum_y)/ 71 | (cnt * stat.sum_xx - stat.sum_x*stat.sum_x); 72 | intercept = (stat.sum_xx * stat.sum_y - stat.sum_xy * stat.sum_x)/ 73 | (cnt * stat.sum_xx - stat.sum_x*stat.sum_x); 74 | num = denom = 0; 75 | for (i = 0; i < cnt; i++) { 76 | double e = rel_err(xval[i], yval[i], slope, intercept); 77 | switch (etype) { 78 | case LS_AVG: 79 | num += e; 80 | denom++; 81 | break; 82 | case LS_MAX: 83 | if (num < e) 84 | num = e; 85 | denom = 1; 86 | break; 87 | default: 88 | fprintf(stderr, "Invalid error type: %d\n", etype); 89 | exit(1); 90 | break; 91 | } 92 | } 93 | return num/denom; 94 | } 95 | 96 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/vec.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "combine.h" 3 | 4 | /* $begin vec */ 5 | /* Create vector of specified length */ 6 | vec_ptr new_vec(long len) 7 | { 8 | /* Allocate header structure */ 9 | vec_ptr result = (vec_ptr) malloc(sizeof(vec_rec)); 10 | data_t *data = NULL; 11 | if (!result) 12 | return NULL; /* Couldn't allocate storage */ 13 | result->len = len; 14 | /* $end vec */ 15 | /* We don't show this in the book */ 16 | result->allocated_len = len; 17 | /* $begin vec */ 18 | /* Allocate array */ 19 | if (len > 0) { 20 | data = (data_t *)calloc(len, sizeof(data_t)); 21 | if (!data) { 22 | free((void *) result); 23 | return NULL; /* Couldn't allocate storage */ 24 | } 25 | } 26 | /* data will either be NULL or allocated array */ 27 | result->data = data; 28 | return result; 29 | } 30 | 31 | /* Free storage used by vector */ 32 | void free_vec(vec_ptr v) { 33 | if (v->data) 34 | free(v->data); 35 | free(v); 36 | } 37 | 38 | /* 39 | * Retrieve vector element and store at dest. 40 | * Return 0 (out of bounds) or 1 (successful) 41 | */ 42 | int get_vec_element(vec_ptr v, long index, data_t *dest) 43 | { 44 | if (index < 0 || index >= v->len) 45 | return 0; 46 | *dest = v->data[index]; 47 | return 1; 48 | } 49 | 50 | /* Return length of vector */ 51 | long vec_length(vec_ptr v) 52 | { 53 | return v->len; 54 | } 55 | /* $end vec */ 56 | 57 | 58 | /* $begin get_vec_start */ 59 | data_t *get_vec_start(vec_ptr v) 60 | { 61 | return v->data; 62 | } 63 | /* $end get_vec_start */ 64 | 65 | 66 | /* 67 | * Set vector element. 68 | * Return 0 (out of bounds) or 1 (successful) 69 | */ 70 | int set_vec_element(vec_ptr v, long index, data_t val) 71 | { 72 | if (index < 0 || index >= v->len) 73 | return 0; 74 | v->data[index] = val; 75 | return 1; 76 | } 77 | 78 | 79 | /* Set vector length. If >= allocated length, will reallocate */ 80 | void set_vec_length(vec_ptr v, long newlen) 81 | { 82 | if (newlen > v->allocated_len) { 83 | free(v->data); 84 | v->data = calloc(newlen, sizeof(data_t)); 85 | v->allocated_len = newlen; 86 | } 87 | v->len = newlen; 88 | } 89 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/benchmark/vec.h: -------------------------------------------------------------------------------- 1 | /* $begin adt */ 2 | /* Create abstract data type for vector */ 3 | typedef struct { 4 | long len; 5 | data_t *data; 6 | /* $end adt */ 7 | long allocated_len; /* NOTE: we don't use this field in the book */ 8 | /* $begin adt */ 9 | } vec_rec, *vec_ptr; 10 | /* $end adt */ 11 | 12 | /* Create vector */ 13 | vec_ptr new_vec(long len); 14 | 15 | /* Free storage used by vector */ 16 | void free_vec(vec_ptr v); 17 | 18 | /* 19 | * Retrieve vector element and store in dest. 20 | * Return 0 (out of bounds) or 1 (successful) 21 | */ 22 | int get_vec_element(vec_ptr v, long index, data_t *dest); 23 | 24 | /* Macro version */ 25 | #define GET_VEC_ELEMENT(v,index,dest) \ 26 | !((index) < 0 || (index) >= (v)->len) && \ 27 | *(dest) = (v)->data[(index)], 1; 28 | 29 | 30 | data_t *get_vec_start(vec_ptr v); 31 | 32 | /* 33 | * Set vector element. 34 | * Return 0 (out of bounds) or 1 (successful) 35 | */ 36 | 37 | int set_vec_element(vec_ptr v, long index, data_t val); 38 | 39 | /* Get vector length */ 40 | long vec_length(vec_ptr v); 41 | 42 | /* Set length of vector. If > allocated length, will reallocate */ 43 | void set_vec_length(vec_ptr v, long newlen); 44 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/matrix_multiplication/ReadMe.md: -------------------------------------------------------------------------------- 1 | # 矩阵乘法示例 2 | 3 | ### 1. flost_matrix_multiplication_SIMD.c是使用SIMD指令对flost类型的矩阵进行矩阵乘法的代码 4 | > 使用gcc -mavx -o flost_matrix_multiplication_SIMD.out flost_matrix_multiplication_SIMD.c进行编译; 5 | ### 2. double_matrix_multiplication_SIMD.c是使用SIMD指令对double类型的矩阵进行矩阵乘法的代码 6 | > 使用gcc -mavx -o double_matrix_multiplication_SIMD.out double_matrix_multiplication_SIMD.c进行编译; 7 | ### 3. matrix_multiplication_Strassen.c是使用Strassen算法进行矩阵乘法的代码 8 | > 使用gcc -o matrix_multiplication_Strassen.out matrix_multiplication_Strassen.c进行编译; 9 | ### 4. matrix_multiplication_optimization.c是case study中对矩阵乘法进行优化的代码 10 | > 使用gcc -mavx -o matrix_multiplication_optimization.out matrix_multiplication_optimization.c进行编译; -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/matrix_multiplication/double_matrix_multiplication_SIMD.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define M 400 6 | #define N 600 7 | #define K 800 8 | 9 | //使用SIMD指令执行矩阵乘法,一条SIMD指令完成4个double类型数据的运算 10 | void matrixMul_SIMD(double a[][K], double b[][N], double c[][N]) { 11 | __m256d va, vb, sum, prod ;//__m256d存储256位,将其分为4部分,每部分为一个double类型的数 12 | for (int i = 0; i < M; i++) { 13 | for (int j = 0; j < N; j += 4) { 14 | sum = _mm256_loadu_pd(&c[i][j]);//读入从c[i][j]开始的4个double类型数 15 | for (int k = 0; k < K; k ++) { 16 | va = _mm256_set1_pd(a[i][k]);//读入a[i][k]并复制3个扩展成4个double类型数 17 | vb = _mm256_loadu_pd(&b[k][j]);//读入从b[k][j]开始的4个double类型数 18 | prod = _mm256_mul_pd(va, vb);//将va和vb中对应位置的double类型数相乘 19 | sum = _mm256_add_pd(sum, prod);//将sum和prod中对应位置的double类型数相加 20 | } 21 | _mm256_storeu_pd(&c[i][j], sum);//将sum中的4个double类型数存到从c[i][j]开始的内存空间中 22 | } 23 | } 24 | } 25 | 26 | //未优化的矩阵乘法 27 | void matrixMul_base(double a[][K], double b[][N], double c[][N]) { 28 | for (int i = 0; i < M; ++i) { 29 | for (int j = 0; j < N; ++j) { 30 | for (int k = 0; k < K; ++k) 31 | c[i][j] += a[i][k] * b[k][j]; 32 | } 33 | } 34 | } 35 | 36 | int main() { 37 | double a[M][K], b[K][N], c[M][N]; 38 | int i, j, k; 39 | clock_t start, stop; 40 | //初始化矩阵a和c 41 | for (i = 0; i < M; i++) { 42 | for (k = 0; k < K; k++) 43 | { 44 | a[i][k] = i * 0.02 + k * 0.01; 45 | } 46 | for (j = 0; j < N; j++) 47 | { 48 | c[i][j] = 0.0; 49 | } 50 | } 51 | //初始化矩阵b 52 | for (k = 0; k < K; k++) { 53 | for (j = 0; j < N; j++) 54 | { 55 | b[k][j] = k * 0.01 + j * 0.02; 56 | } 57 | } 58 | start = clock(); 59 | matrixMul_base(a, b, c); 60 | stop = clock(); 61 | printf("The time of bsae matrix multiplication(double):%f\n", (double)(stop - start) / CLOCKS_PER_SEC); 62 | for (i = 0; i < M; i++) 63 | for (j = 0; j < N; j++) 64 | c[i][j] = 0.0; 65 | start = clock(); 66 | matrixMul_SIMD(a, b, c); 67 | stop = clock(); 68 | printf("The time of matrix multiplication with SIMD(double):%f\n", (double)(stop - start) / CLOCKS_PER_SEC); 69 | return 0; 70 | } 71 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/matrix_multiplication/float_matrix_multiplication_SIMD.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define M 400 6 | #define N 600 7 | #define K 800 8 | 9 | //使用SIMD指令执行矩阵乘法,一条SIMD指令完成8个float类型数据的运算 10 | void matrixMul_SIMD(float a[][K], float b[][N], float c[][N]) { 11 | __m256 va, vb, sum, prod ;//__m256存储256位,将其分为8部分,每部分为一个float类型的数 12 | for (int i = 0; i < M; i++) { 13 | for (int j = 0; j < N; j += 8) { 14 | sum = _mm256_loadu_ps(&c[i][j]);//读入从c[i][j]开始的8个float类型数 15 | for (int k = 0; k < K; k ++) { 16 | va = _mm256_set1_ps(a[i][k]);//读入a[i][k]并复制7个扩展成8个float类型数 17 | vb = _mm256_loadu_ps(&b[k][j]);//读入从b[k][j]开始的8个float类型数 18 | prod = _mm256_mul_ps(va, vb);//将va和vb中对应位置的float类型数相乘 19 | sum = _mm256_add_ps(sum, prod);//将sum和prod中对应位置的float类型数相加 20 | } 21 | _mm256_storeu_ps(&c[i][j], sum);//将sum中的8个float类型数存到从c[i][j]开始的内存空间中 22 | } 23 | } 24 | } 25 | 26 | //未优化的矩阵乘法 27 | void matrixMul_base(float a[][K], float b[][N], float c[][N]) { 28 | for (int i = 0; i < M; ++i) { 29 | for (int j = 0; j < N; ++j) { 30 | for (int k = 0; k < K; ++k) 31 | c[i][j] += a[i][k] * b[k][j]; 32 | } 33 | } 34 | } 35 | 36 | int main() { 37 | float a[M][K], b[K][N], c[M][N]; 38 | int i, j, k; 39 | clock_t start, stop; 40 | //初始化矩阵a和c 41 | for (i = 0; i < M; i++) { 42 | for (k = 0; k < K; k++) 43 | { 44 | a[i][k] = i * 0.02 + k * 0.01; 45 | } 46 | for (j = 0; j < N; j++) 47 | { 48 | c[i][j] = 0.0; 49 | } 50 | } 51 | //初始化矩阵b 52 | for (k = 0; k < K; k++) { 53 | for (j = 0; j < N; j++) 54 | { 55 | b[k][j] = k * 0.01 + j * 0.02; 56 | } 57 | } 58 | start = clock(); 59 | matrixMul_base(a, b, c); 60 | stop = clock(); 61 | printf("The time of bsae matrix multiplication(float):%f\n", (double)(stop - start) / CLOCKS_PER_SEC); 62 | for (i = 0; i < M; i++) 63 | for (j = 0; j < N; j++) 64 | c[i][j] = 0.0; 65 | start = clock(); 66 | matrixMul_SIMD(a, b, c); 67 | stop = clock(); 68 | printf("The time of matrix multiplication with SIMD(float):%f\n", (double)(stop - start) / CLOCKS_PER_SEC); 69 | return 0; 70 | } 71 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/matrix_multiplication/matrix_multiplication_Strassen.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define N 1024 6 | 7 | //未优化的矩阵乘法 8 | void matrixMul_base(float **a, float **b, float **c, int n) { 9 | for (int i = 0; i < n; ++i) { 10 | for (int j = 0; j < n; ++j) { 11 | for (int k = 0; k < n; ++k) 12 | c[i][j] += a[i][k] * b[k][j]; 13 | } 14 | } 15 | } 16 | 17 | //矩阵加法 18 | void matrixAdd(float **a, float **b, float **c, int n) { 19 | for (int i = 0; i < n; ++i) { 20 | for (int j = 0; j < n; ++j) { 21 | c[i][j] = a[i][j] + b[i][j]; 22 | } 23 | } 24 | } 25 | 26 | //矩阵减法 27 | void matrixSub(float **a, float **b, float **c, int n) { 28 | for (int i = 0; i < n; ++i) { 29 | for (int j = 0; j < n; ++j) { 30 | c[i][j] = a[i][j] - b[i][j]; 31 | } 32 | } 33 | } 34 | 35 | //Strassen算法 36 | void matrixMul_Strassen(float **a, float **b, float **c, int size) { 37 | if (size < 16) //当矩阵大小小于16时,使用普通矩阵乘法(递归退出的条件) 38 | matrixMul_base(a, b, c, size); 39 | else { 40 | int n = size / 2; 41 | //初始化临时变量 42 | float **A11 = (float **)malloc(sizeof(float *) * n); 43 | float **A12 = (float **)malloc(sizeof(float *) * n); 44 | float **A21 = (float **)malloc(sizeof(float *) * n); 45 | float **A22 = (float **)malloc(sizeof(float *) * n); 46 | float **B11 = (float **)malloc(sizeof(float *) * n); 47 | float **B12 = (float **)malloc(sizeof(float *) * n); 48 | float **B21 = (float **)malloc(sizeof(float *) * n); 49 | float **B22 = (float **)malloc(sizeof(float *) * n); 50 | float **C11 = (float **)malloc(sizeof(float *) * n); 51 | float **C12 = (float **)malloc(sizeof(float *) * n); 52 | float **C21 = (float **)malloc(sizeof(float *) * n); 53 | float **C22 = (float **)malloc(sizeof(float *) * n); 54 | float **P1 = (float **)malloc(sizeof(float *) * n); 55 | float **P2 = (float **)malloc(sizeof(float *) * n); 56 | float **P3 = (float **)malloc(sizeof(float *) * n); 57 | float **P4 = (float **)malloc(sizeof(float *) * n); 58 | float **P5 = (float **)malloc(sizeof(float *) * n); 59 | float **P6 = (float **)malloc(sizeof(float *) * n); 60 | float **P7 = (float **)malloc(sizeof(float *) * n); 61 | float **AResult = (float **)malloc(sizeof(float *) * n); 62 | float **BResult = (float **)malloc(sizeof(float *) * n); 63 | for (int i = 0; i < n; i++) { 64 | A11[i] = (float *)malloc(sizeof(float) * n); 65 | A12[i] = (float *)malloc(sizeof(float) * n); 66 | A21[i] = (float *)malloc(sizeof(float) * n); 67 | A22[i] = (float *)malloc(sizeof(float) * n); 68 | B11[i] = (float *)malloc(sizeof(float) * n); 69 | B12[i] = (float *)malloc(sizeof(float) * n); 70 | B21[i] = (float *)malloc(sizeof(float) * n); 71 | B22[i] = (float *)malloc(sizeof(float) * n); 72 | C11[i] = (float *)malloc(sizeof(float) * n); 73 | C12[i] = (float *)malloc(sizeof(float) * n); 74 | C21[i] = (float *)malloc(sizeof(float) * n); 75 | C22[i] = (float *)malloc(sizeof(float) * n); 76 | P1[i] = (float *)malloc(sizeof(float) * n); 77 | P2[i] = (float *)malloc(sizeof(float) * n); 78 | P3[i] = (float *)malloc(sizeof(float) * n); 79 | P4[i] = (float *)malloc(sizeof(float) * n); 80 | P5[i] = (float *)malloc(sizeof(float) * n); 81 | P6[i] = (float *)malloc(sizeof(float) * n); 82 | P7[i] = (float *)malloc(sizeof(float) * n); 83 | AResult[i] = (float *)malloc(sizeof(float) * n); 84 | BResult[i] = (float *)malloc(sizeof(float) * n); 85 | //将矩阵拆分成大小相等的4部分 86 | for (int j = 0; j < n; j++) 87 | { 88 | A11[i][j] = a[i][j]; 89 | A12[i][j] = a[i][j + n]; 90 | A21[i][j] = a[i + n][j]; 91 | A22[i][j] = a[i + n][j + n]; 92 | B11[i][j] = b[i][j]; 93 | B12[i][j] = b[i][j + n]; 94 | B21[i][j] = b[i + n][j]; 95 | B22[i][j] = b[i + n][j + n]; 96 | 97 | } 98 | } 99 | //P1=(A11+A22)(B11+B22) 100 | matrixAdd(A11, A22, AResult, n); 101 | matrixAdd(B11, B22, BResult, n); 102 | matrixMul_Strassen(AResult, BResult, P1, n); 103 | //P2=(A21+A22)B11 104 | matrixAdd(A21, A22, AResult, n); 105 | matrixMul_Strassen(AResult, B11, P2, n); 106 | //P3=A11(B12-B22) 107 | matrixSub(B12, B22, BResult, n); 108 | matrixMul_Strassen(A11, BResult, P3, n); 109 | //P4=A22(B21-B11) 110 | matrixSub(B21, B11, BResult, n); 111 | matrixMul_Strassen(A22, BResult, P4, n); 112 | //P5=(A11+A12)B22 113 | matrixAdd(A11, A12, AResult, n); 114 | matrixMul_Strassen(AResult, B22, P5, n); 115 | //P6=(A21-A11)(B11+B12) 116 | matrixSub(A21, A11, AResult, n); 117 | matrixAdd(B11, B12, BResult, n); 118 | matrixMul_Strassen(AResult, BResult, P6, n); 119 | //P7=(A12-A22)(B21+B22) 120 | matrixSub(A12, A22, AResult, n); 121 | matrixAdd(B21, B22, BResult, n); 122 | matrixMul_Strassen(AResult, BResult, P7, n); 123 | //C11=P1+P4-P5+P7; 124 | matrixAdd(P1, P4, AResult, n); 125 | matrixSub(AResult, P5, BResult, n); 126 | matrixAdd(BResult, P7, C11, n); 127 | //C12=P3+P5; 128 | matrixAdd(P3, P5, C12, n); 129 | //C21=P2+P4; 130 | matrixAdd(P2, P4, C21, n); 131 | //C22=P1-P2+P3+P6; 132 | matrixSub(P1, P2, AResult, n); 133 | matrixAdd(AResult, P3, BResult, n); 134 | matrixAdd(BResult, P6, C22, n); 135 | //将结果写会矩阵c 136 | for (int i = 0; i < n; i++) { 137 | for (int j = 0; j < n; j++) { 138 | c[i][j] = C11[i][j]; 139 | c[i][j + n] = C12[i][j]; 140 | c[i + n][j] = C21[i][j]; 141 | c[i + n][j + n] = C22[i][j]; 142 | } 143 | } 144 | //释放临时变量 145 | for (int i = 0; i < n; i++) { 146 | free(A11[i]); 147 | free(A12[i]); 148 | free(A21[i]); 149 | free(A22[i]); 150 | free(B11[i]); 151 | free(B12[i]); 152 | free(B21[i]); 153 | free(B22[i]); 154 | free(C11[i]); 155 | free(C12[i]); 156 | free(C21[i]); 157 | free(C22[i]); 158 | free(P1[i]); 159 | free(P2[i]); 160 | free(P3[i]); 161 | free(P4[i]); 162 | free(P5[i]); 163 | free(P6[i]); 164 | free(P7[i]); 165 | free(AResult[i]); 166 | free(BResult[i]); 167 | } 168 | free(A11); 169 | free(A12); 170 | free(A21); 171 | free(A22); 172 | free(B11); 173 | free(B12); 174 | free(B21); 175 | free(B22); 176 | free(C11); 177 | free(C12); 178 | free(C21); 179 | free(C22); 180 | free(P1); 181 | free(P2); 182 | free(P3); 183 | free(P4); 184 | free(P5); 185 | free(P6); 186 | free(P7); 187 | free(AResult); 188 | free(BResult); 189 | } 190 | } 191 | 192 | int main() { 193 | float **a = (float **)malloc(sizeof(float *) * N); 194 | float **b = (float **)malloc(sizeof(float *) * N); 195 | float **c = (float **)malloc(sizeof(float *) * N); 196 | int i, j; 197 | clock_t start, stop; 198 | //初始化矩阵a,b和c 199 | for (i = 0; i < N; i++) { 200 | a[i] = (float *)malloc(sizeof(float) * N); 201 | b[i] = (float *)malloc(sizeof(float) * N); 202 | c[i] = (float *)malloc(sizeof(float) * N); 203 | for (j = 0; j < N; j++) 204 | { 205 | a[i][j] = i * 0.02 + j * 0.01; 206 | b[i][j] = i * 0.01 + j * 0.02; 207 | c[i][j] = 0.0; 208 | } 209 | } 210 | start = clock(); 211 | matrixMul_base(a, b, c, N); 212 | stop = clock(); 213 | printf("The time of bsae matrix multiplication:%f\n", (double)(stop - start) / CLOCKS_PER_SEC); 214 | for (i = 0; i < N; i++) 215 | for (j = 0; j < N; j++) 216 | c[i][j] = 0.0; 217 | start = clock(); 218 | matrixMul_Strassen(a, b, c, N); 219 | stop = clock(); 220 | printf("The time of matrix multiplication with Strassen:%f\n", (double)(stop - start) / CLOCKS_PER_SEC); 221 | return 0; 222 | } 223 | -------------------------------------------------------------------------------- /Optimizing_Program_Performancess/matrix_multiplication/matrix_multiplication_optimization.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define M 700 6 | #define N 600 7 | #define K 800 8 | 9 | //两个向量逐位相乘后相加 10 | void hardware_dot(float *accumulator, const float *a_slice, const float *b_slice) { 11 | float total = 0.0; 12 | for (int k = 0; k < 8; ++k) { 13 | total += a_slice[k] * b_slice[k]; 14 | } 15 | *accumulator += total; 16 | } 17 | 18 | //标量与向量相乘 19 | void hardware_saxpy(float *accumulator, float a, const float *input) { 20 | for (int k = 0; k < 8; ++k) { 21 | accumulator[k] += a * input[k]; 22 | } 23 | } 24 | 25 | //未优化的矩阵乘法 26 | void matrixMul_base(float a[][K], float b[][N], float c[][N]) { 27 | for (int i = 0; i < M; ++i) { 28 | for (int j = 0; j < N; ++j) { 29 | for (int k = 0; k < K; ++k) 30 | c[i][j] += a[i][k] * b[k][j]; 31 | } 32 | } 33 | } 34 | 35 | //转置矩阵B后的矩阵乘法 36 | void matrixMul_transpose(float a[][K], float b[][K], float c[][N]) { 37 | for (int i = 0; i < M; ++i) { 38 | for (int j = 0; j < N; ++j) { 39 | for (int k = 0; k < K; ++k) 40 | c[i][j] += a[i][k] * b[j][k]; 41 | } 42 | } 43 | } 44 | 45 | //转置矩阵B后8个元素一起相乘的矩阵乘法 46 | void matrixMul_transpose_8_element_product(float a[][K], float b[][K], float c[][N]) { 47 | for (int i = 0; i < M; ++i) { 48 | for (int j = 0; j < N; ++j) { 49 | for (int k = 0; k < K; k+=8) 50 | hardware_dot(&c[i][j], &a[i][k], &b[j][k]); 51 | } 52 | } 53 | } 54 | 55 | //未转置矩阵B的8个元素一起相乘的矩阵乘法 56 | void matrixMul_8_element_product(float a[][K], float b[][N], float c[][N]) { 57 | for (int i = 0; i < M; ++i) { 58 | for (int j = 0; j < N; j+=8) { 59 | for (int k = 0; k < K; ++k) 60 | hardware_saxpy(&c[i][j], a[i][k], &b[k][j]); 61 | } 62 | } 63 | } 64 | 65 | //转置矩阵B后使用SIMD指令执行矩阵乘法 66 | void matrixMul_transpose_SIMD(float a[][K], float b[][K], float c[][N]) { 67 | __m256 va, vb, sum, prod, prod_sum;//__m256存储256位,将其分为8部分,每部分为一个float类型的数 68 | __m128 total, sum_high, sum_low, result;//__m128存储128位,将其分为4部分,每部分为一个float类型的数 69 | float d[4];//SIMD指令每次至少会向内存写入128位,因此无法直接对矩阵c的单个元素写入,使用数组d来暂存结果 70 | for (int i = 0; i < M; i++) { 71 | for (int j = 0; j < N; j++) { 72 | total = _mm_broadcast_ss(&c[i][j]);//读入c[i][j]并复制3个扩展成4个float类型数 73 | for (int k = 0; k < K; k += 8) { 74 | va = _mm256_loadu_ps(&a[i][k]);//读入从a[i][k]开始的8个float类型数 75 | vb = _mm256_loadu_ps(&b[j][k]);//读入从b[j][k]开始的8个float类型数 76 | prod = _mm256_mul_ps(va, vb);//将va和vb中对应位置的float类型数相乘 77 | //v3 = _mm256_hadd_ps(v1, v2)实现: 78 | //v3[0]=v1[0]+v1[1],v3[1]=v1[2]+v1[3],v3[2]=v2[0]+v2[1],v3[3]=v2[2]+v2[3], 79 | //v3[4]=v1[4]+v1[4],v3[5]=v1[6]+v1[7],v3[6]=v2[4]+v2[5],v3[7]=v2[6]+v2[7] 80 | prod_sum = _mm256_hadd_ps(prod, prod);//prod_sum[0]=prod[0]+prod[1], 81 | //prod_sum[1]=prod[2]+prod[3], 82 | //prod_sum[4]=prod[4]+prod[5], 83 | //prod_sum[5]=prod[6]+prod[7], 84 | sum = _mm256_hadd_ps(prod_sum, prod_sum);//sum[0]=prod[0]+prod[1]+prod[2]+prod[3] 85 | //sum[4]=prod[4]+prod[5]+prod[6]+prod[7] 86 | sum_low = _mm256_extractf128_ps(sum, 0);//取sum的低128位 87 | sum_high = _mm256_extractf128_ps(sum, 1);//取sum的高128位 88 | result = _mm_add_ps(sum_high, sum_low);//result[0]=sum[0]+sum[4] 89 | total = _mm_add_ps(total, result);//与c[i][j]累加 90 | } 91 | _mm_storeu_ps(&d[0], total);//将total中的4个float类型数存到从d[0]开始的内存空间中 92 | c[i][j] = d[0];//将结果写会c[i][j] 93 | } 94 | } 95 | } 96 | 97 | int main() { 98 | float a[M][K], b[K][N], c[M][N], transpose_b[N][K]; 99 | int i, j, k; 100 | clock_t start, stop; 101 | //初始化矩阵a和c 102 | for (i = 0; i < M; i++) { 103 | for (k = 0; k < K; k++) 104 | { 105 | a[i][k] = i * 0.02 + k * 0.01; 106 | } 107 | for (j = 0; j < N; j++) 108 | { 109 | c[i][j] = 0.0; 110 | } 111 | } 112 | //初始化矩阵b 113 | for (k = 0; k < K; k++) { 114 | for (j = 0; j < N; j++) 115 | { 116 | b[k][j] = k * 0.01 + j * 0.02; 117 | } 118 | } 119 | //转制矩阵b 120 | for (k = 0; k < K; k++) { 121 | for (j = 0; j < N; j++) 122 | { 123 | transpose_b[j][k] = b[k][j]; 124 | } 125 | } 126 | start = clock(); 127 | matrixMul_base(a, b, c); 128 | stop = clock(); 129 | printf("The time of bsae matrix multiplication:%f\n", (double)(stop - start) / CLOCKS_PER_SEC); 130 | for (i = 0; i < M; i++) 131 | for (j = 0; j < N; j++) 132 | c[i][j] = 0.0; 133 | start = clock(); 134 | matrixMul_transpose(a, transpose_b, c); 135 | stop = clock(); 136 | printf("The time of matrix multiplication with transpose:%f\n", (double)(stop - start) / CLOCKS_PER_SEC); 137 | for (i = 0; i < M; i++) 138 | for (j = 0; j < N; j++) 139 | c[i][j] = 0.0; 140 | start = clock(); 141 | matrixMul_transpose_8_element_product(a, transpose_b, c); 142 | stop = clock(); 143 | printf("The time of matrix multiplication with transpose and 8_element product:%f\n", (double)(stop - start) / CLOCKS_PER_SEC); 144 | for (i = 0; i < M; i++) 145 | for (j = 0; j < N; j++) 146 | c[i][j] = 0.0; 147 | start = clock(); 148 | matrixMul_8_element_product(a, b, c); 149 | stop = clock(); 150 | printf("The time of matrix multiplication with 8_element product:%f\n", (double)(stop - start) / CLOCKS_PER_SEC); 151 | for (i = 0; i < M; i++) 152 | for (j = 0; j < N; j++) 153 | c[i][j] = 0.0; 154 | start = clock(); 155 | matrixMul_transpose_SIMD(a, transpose_b, c); 156 | stop = clock(); 157 | printf("The time of matrix multiplication with transpose and SIMD:%f\n", (double)(stop - start) / CLOCKS_PER_SEC); 158 | return 0; 159 | } 160 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HITSZ-SYSTEMS 2 | 2020年春季学期哈工大(深圳)体系结构课程 3 | -------------------------------------------------------------------------------- /System_Level_IO/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | 3 | CSAPP_INC = . 4 | CSAPP_SRC = . 5 | 6 | CFLAGS = -Wall -g -Og -I $(CSAPP_INC) -I . 7 | LDLIBS = -lpthread 8 | 9 | PROGS = statcheck\ 10 | cpstdin\ 11 | cpfile\ 12 | hello\ 13 | ffiles1\ 14 | ffiles2\ 15 | ffiles3\ 16 | stdout\ 17 | readdir\ 18 | sharing1\ 19 | sharing2\ 20 | fdprob1 21 | 22 | all: $(CSAPP_SRC)/csapp.o $(PROGS) 23 | $(PROGS): $(CSAPP_SRC)/csapp.o 24 | $(CSAPP_SRC)/csapp.o: $(CSAPP_SRC)/csapp.c $(CSAPP_INC)/csapp.h 25 | 26 | clean: 27 | rm -f $(PROGS) *.o *~ 28 | -------------------------------------------------------------------------------- /System_Level_IO/README.md: -------------------------------------------------------------------------------- 1 | # 系统级I/O 2 | > 本目录包含I/O专题涉及到的代码,通过makefile文件可以编译 3 | 4 | 5 | 1. cpfile.c 使用RIO复制stdin到stdout 6 | 7 | 2. hello.c 证明libc函数的缓存机制 8 | 9 | 3. statcheck.c 获取文件元数据的示例 10 | 11 | 4. fdprob1.c 关于文件打开关闭和文件描述符的练习 12 | > foo.txt 13 | > baz.txt 14 | 15 | 5. sharing1.c 文件共享的练习 16 | > foobar.txt 17 | 18 | 6. sharing2.c 关于fork的文件共享练习 19 | > foobar.txt 20 | 21 | 7. readdir.c 读取目录 22 | 23 | 8. cpstdin.c 使用read和write从标准输入一次一个字节复制到标准输出 24 | 25 | 9. stdout.c 输出字符到标准输出文件 26 | 27 | 10. ffiles1.c 关于文件读写以及重定向的练习 28 | > abcde.txt 29 | 30 | 11. ffiles2.c 关于fork的文件共享练习 31 | > abcde.txt 32 | 33 | 12. ffiles3.c 关于文件读写模式和重定向的练习 34 | > ffiles3.txt 35 | -------------------------------------------------------------------------------- /System_Level_IO/abcde.txt: -------------------------------------------------------------------------------- 1 | abcde -------------------------------------------------------------------------------- /System_Level_IO/baz.txt: -------------------------------------------------------------------------------- 1 | baz -------------------------------------------------------------------------------- /System_Level_IO/cpfile.c: -------------------------------------------------------------------------------- 1 | /* $begin cpfile */ 2 | #include "csapp.h" 3 | 4 | int main(int argc, char **argv) 5 | { 6 | int n; 7 | rio_t rio; 8 | char buf[MAXLINE]; 9 | 10 | Rio_readinitb(&rio, STDIN_FILENO); 11 | while((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0) 12 | Rio_writen(STDOUT_FILENO, buf, n); 13 | /* $end cpfile */ 14 | exit(0); 15 | /* $begin cpfile */ 16 | } 17 | /* $end cpfile */ 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /System_Level_IO/cpstdin.c: -------------------------------------------------------------------------------- 1 | #include "csapp.h" 2 | 3 | int main(void) 4 | { 5 | char c; 6 | 7 | while(Read(STDIN_FILENO, &c, 1) != 0) 8 | Write(STDOUT_FILENO, &c, 1); 9 | exit(0); 10 | } 11 | -------------------------------------------------------------------------------- /System_Level_IO/csapp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * csapp.h - prototypes and definitions for the CS:APP3e book 3 | */ 4 | /* $begin csapp.h */ 5 | #ifndef __CSAPP_H__ 6 | #define __CSAPP_H__ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | /* Default file permissions are DEF_MODE & ~DEF_UMASK */ 33 | /* $begin createmasks */ 34 | #define DEF_MODE S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH 35 | #define DEF_UMASK S_IWGRP|S_IWOTH 36 | /* $end createmasks */ 37 | 38 | /* Simplifies calls to bind(), connect(), and accept() */ 39 | /* $begin sockaddrdef */ 40 | typedef struct sockaddr SA; 41 | /* $end sockaddrdef */ 42 | 43 | /* Persistent state for the robust I/O (Rio) package */ 44 | /* $begin rio_t */ 45 | #define RIO_BUFSIZE 8192 46 | typedef struct { 47 | int rio_fd; /* Descriptor for this internal buf */ 48 | int rio_cnt; /* Unread bytes in internal buf */ 49 | char *rio_bufptr; /* Next unread byte in internal buf */ 50 | char rio_buf[RIO_BUFSIZE]; /* Internal buffer */ 51 | } rio_t; 52 | /* $end rio_t */ 53 | 54 | /* External variables */ 55 | extern int h_errno; /* Defined by BIND for DNS errors */ 56 | extern char **environ; /* Defined by libc */ 57 | 58 | /* Misc constants */ 59 | #define MAXLINE 8192 /* Max text line length */ 60 | #define MAXBUF 8192 /* Max I/O buffer size */ 61 | #define LISTENQ 1024 /* Second argument to listen() */ 62 | 63 | /* Our own error-handling functions */ 64 | void unix_error(char *msg); 65 | void posix_error(int code, char *msg); 66 | void dns_error(char *msg); 67 | void gai_error(int code, char *msg); 68 | void app_error(char *msg); 69 | 70 | /* Process control wrappers */ 71 | pid_t Fork(void); 72 | void Execve(const char *filename, char *const argv[], char *const envp[]); 73 | pid_t Wait(int *status); 74 | pid_t Waitpid(pid_t pid, int *iptr, int options); 75 | void Kill(pid_t pid, int signum); 76 | unsigned int Sleep(unsigned int secs); 77 | void Pause(void); 78 | unsigned int Alarm(unsigned int seconds); 79 | void Setpgid(pid_t pid, pid_t pgid); 80 | pid_t Getpgrp(); 81 | 82 | /* Signal wrappers */ 83 | typedef void handler_t(int); 84 | handler_t *Signal(int signum, handler_t *handler); 85 | void Sigprocmask(int how, const sigset_t *set, sigset_t *oldset); 86 | void Sigemptyset(sigset_t *set); 87 | void Sigfillset(sigset_t *set); 88 | void Sigaddset(sigset_t *set, int signum); 89 | void Sigdelset(sigset_t *set, int signum); 90 | int Sigismember(const sigset_t *set, int signum); 91 | int Sigsuspend(const sigset_t *set); 92 | 93 | /* Sio (Signal-safe I/O) routines */ 94 | ssize_t sio_puts(char s[]); 95 | ssize_t sio_putl(long v); 96 | void sio_error(char s[]); 97 | 98 | /* Sio wrappers */ 99 | ssize_t Sio_puts(char s[]); 100 | ssize_t Sio_putl(long v); 101 | void Sio_error(char s[]); 102 | 103 | /* Unix I/O wrappers */ 104 | int Open(const char *pathname, int flags, mode_t mode); 105 | ssize_t Read(int fd, void *buf, size_t count); 106 | ssize_t Write(int fd, const void *buf, size_t count); 107 | off_t Lseek(int fildes, off_t offset, int whence); 108 | void Close(int fd); 109 | int Select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 110 | struct timeval *timeout); 111 | int Dup2(int fd1, int fd2); 112 | void Stat(const char *filename, struct stat *buf); 113 | void Fstat(int fd, struct stat *buf) ; 114 | 115 | /* Directory wrappers */ 116 | DIR *Opendir(const char *name); 117 | struct dirent *Readdir(DIR *dirp); 118 | int Closedir(DIR *dirp); 119 | 120 | /* Memory mapping wrappers */ 121 | void *Mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset); 122 | void Munmap(void *start, size_t length); 123 | 124 | /* Standard I/O wrappers */ 125 | void Fclose(FILE *fp); 126 | FILE *Fdopen(int fd, const char *type); 127 | char *Fgets(char *ptr, int n, FILE *stream); 128 | FILE *Fopen(const char *filename, const char *mode); 129 | void Fputs(const char *ptr, FILE *stream); 130 | size_t Fread(void *ptr, size_t size, size_t nmemb, FILE *stream); 131 | void Fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); 132 | 133 | /* Dynamic storage allocation wrappers */ 134 | void *Malloc(size_t size); 135 | void *Realloc(void *ptr, size_t size); 136 | void *Calloc(size_t nmemb, size_t size); 137 | void Free(void *ptr); 138 | 139 | /* Sockets interface wrappers */ 140 | int Socket(int domain, int type, int protocol); 141 | void Setsockopt(int s, int level, int optname, const void *optval, int optlen); 142 | void Bind(int sockfd, struct sockaddr *my_addr, int addrlen); 143 | void Listen(int s, int backlog); 144 | int Accept(int s, struct sockaddr *addr, socklen_t *addrlen); 145 | void Connect(int sockfd, struct sockaddr *serv_addr, int addrlen); 146 | 147 | /* Protocol independent wrappers */ 148 | void Getaddrinfo(const char *node, const char *service, 149 | const struct addrinfo *hints, struct addrinfo **res); 150 | void Getnameinfo(const struct sockaddr *sa, socklen_t salen, char *host, 151 | size_t hostlen, char *serv, size_t servlen, int flags); 152 | void Freeaddrinfo(struct addrinfo *res); 153 | void Inet_ntop(int af, const void *src, char *dst, socklen_t size); 154 | void Inet_pton(int af, const char *src, void *dst); 155 | 156 | /* DNS wrappers */ 157 | struct hostent *Gethostbyname(const char *name); 158 | struct hostent *Gethostbyaddr(const char *addr, int len, int type); 159 | 160 | /* Pthreads thread control wrappers */ 161 | void Pthread_create(pthread_t *tidp, pthread_attr_t *attrp, 162 | void * (*routine)(void *), void *argp); 163 | void Pthread_join(pthread_t tid, void **thread_return); 164 | void Pthread_cancel(pthread_t tid); 165 | void Pthread_detach(pthread_t tid); 166 | void Pthread_exit(void *retval); 167 | pthread_t Pthread_self(void); 168 | void Pthread_once(pthread_once_t *once_control, void (*init_function)()); 169 | 170 | /* POSIX semaphore wrappers */ 171 | void Sem_init(sem_t *sem, int pshared, unsigned int value); 172 | void P(sem_t *sem); 173 | void V(sem_t *sem); 174 | 175 | /* Rio (Robust I/O) package */ 176 | ssize_t rio_readn(int fd, void *usrbuf, size_t n); 177 | ssize_t rio_writen(int fd, void *usrbuf, size_t n); 178 | void rio_readinitb(rio_t *rp, int fd); 179 | ssize_t rio_readnb(rio_t *rp, void *usrbuf, size_t n); 180 | ssize_t rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen); 181 | 182 | /* Wrappers for Rio package */ 183 | ssize_t Rio_readn(int fd, void *usrbuf, size_t n); 184 | void Rio_writen(int fd, void *usrbuf, size_t n); 185 | void Rio_readinitb(rio_t *rp, int fd); 186 | ssize_t Rio_readnb(rio_t *rp, void *usrbuf, size_t n); 187 | ssize_t Rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen); 188 | 189 | /* Reentrant protocol-independent client/server helpers */ 190 | int open_clientfd(char *hostname, char *port); 191 | int open_listenfd(char *port); 192 | 193 | /* Wrappers for reentrant protocol-independent client/server helpers */ 194 | int Open_clientfd(char *hostname, char *port); 195 | int Open_listenfd(char *port); 196 | 197 | 198 | #endif /* __CSAPP_H__ */ 199 | /* $end csapp.h */ 200 | -------------------------------------------------------------------------------- /System_Level_IO/fdprob1.c: -------------------------------------------------------------------------------- 1 | /* $begin fdprob1 */ 2 | #include "csapp.h" 3 | 4 | int main() 5 | { 6 | int fd1, fd2; 7 | 8 | fd1 = Open("foo.txt", O_RDONLY, 0); 9 | Close(fd1); 10 | fd2 = Open("baz.txt", O_RDONLY, 0); 11 | printf("fd2 = %d\n", fd2); 12 | exit(0); 13 | } 14 | /* $end fdprob1 */ 15 | -------------------------------------------------------------------------------- /System_Level_IO/ffiles1.c: -------------------------------------------------------------------------------- 1 | #include "csapp.h" 2 | int main(int argc, char *argv[]) 3 | { 4 | int fd1, fd2, fd3; 5 | char c1, c2, c3; 6 | char *fname = argv[1]; 7 | fd1 = Open(fname, O_RDONLY, 0); 8 | fd2 = Open(fname, O_RDONLY, 0); 9 | fd3 = Open(fname, O_RDONLY, 0); 10 | dup2(fd2, fd3); 11 | Read(fd1, &c1, 1); 12 | Read(fd2, &c2, 1); 13 | Read(fd3, &c3, 1); 14 | printf("c1 = %c, c2 = %c, c3 = %c\n", c1, c2, c3); 15 | Close(fd1); 16 | Close(fd2); 17 | Close(fd3); 18 | return 0; 19 | } -------------------------------------------------------------------------------- /System_Level_IO/ffiles2.c: -------------------------------------------------------------------------------- 1 | #include "csapp.h" 2 | int main(int argc, char *argv[]) 3 | { 4 | int fd1; 5 | int s = getpid() & 0x1; 6 | char c1, c2; 7 | char *fname = argv[1]; 8 | fd1 = Open(fname, O_RDONLY, 0); 9 | Read(fd1, &c1, 1); 10 | if (fork()) {/* Parent */ 11 | sleep(s); 12 | Read(fd1, &c2, 1); 13 | printf("Parent: c1 = %c, c2 = %c\n", c1, c2); 14 | } else {/* Child */ 15 | sleep(1-s); 16 | Read(fd1, &c2, 1); 17 | printf("Child: c1 = %c, c2 = %c\n", c1, c2); 18 | } 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /System_Level_IO/ffiles3.c: -------------------------------------------------------------------------------- 1 | #include "csapp.h" 2 | int main(int argc, char *argv[]) 3 | { 4 | int fd1, fd2, fd3; 5 | char *fname = argv[1]; 6 | fd1 = Open(fname, O_CREAT|O_TRUNC|O_RDWR, S_IRUSR|S_IWUSR); 7 | Write(fd1, "pqrs", 4); 8 | fd3 = Open(fname, O_APPEND|O_WRONLY, 0); 9 | Write(fd3, "jklmn", 5); 10 | fd2 = dup(fd1); /* Allocates new descriptor */ 11 | Write(fd2, "wxyz", 4); 12 | Write(fd3, "ef", 2); 13 | Close(fd1); 14 | Close(fd2); 15 | Close(fd3); 16 | return 0; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /System_Level_IO/ffiles3.txt: -------------------------------------------------------------------------------- 1 | pqrswxyznef -------------------------------------------------------------------------------- /System_Level_IO/foo.txt: -------------------------------------------------------------------------------- 1 | foo -------------------------------------------------------------------------------- /System_Level_IO/foobar.txt: -------------------------------------------------------------------------------- 1 | foobar -------------------------------------------------------------------------------- /System_Level_IO/hello.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | printf("h"); 6 | printf("e"); 7 | printf("l"); 8 | printf("l"); 9 | printf("o"); 10 | printf("\n"); 11 | fflush(stdout); 12 | exit(0); 13 | } 14 | -------------------------------------------------------------------------------- /System_Level_IO/readdir.c: -------------------------------------------------------------------------------- 1 | #include "csapp.h" 2 | int main(int argc, char **argv) 3 | { 4 | DIR *streamp; 5 | struct dirent *dep; 6 | 7 | streamp = Opendir(argv[1]); 8 | errno = 0; 9 | while ((dep = readdir(streamp)) != NULL) { 10 | printf("Found file: %s\n", dep->d_name); 11 | } 12 | if (errno != 0){ 13 | unix_error("readdir error"); 14 | } 15 | Closedir(streamp); 16 | exit(0); 17 | } 18 | 19 | -------------------------------------------------------------------------------- /System_Level_IO/sharing1.c: -------------------------------------------------------------------------------- 1 | /* $begin sharing1 */ 2 | #include "csapp.h" 3 | 4 | int main() 5 | { 6 | int fd1, fd2; 7 | char c; 8 | 9 | fd1 = Open("foobar.txt", O_RDONLY, 0); 10 | fd2 = Open("foobar.txt", O_RDONLY, 0); 11 | Read(fd1, &c, 1); 12 | Read(fd2, &c, 1); 13 | printf("c = %c\n", c); 14 | exit(0); 15 | } 16 | /* $end sharing1 */ 17 | -------------------------------------------------------------------------------- /System_Level_IO/sharing2.c: -------------------------------------------------------------------------------- 1 | /* $begin sharing2 */ 2 | #include "csapp.h" 3 | int main() 4 | { 5 | int fd; 6 | char c; 7 | 8 | fd = Open("foobar.txt", O_RDONLY, 0); 9 | if (Fork() == 0) { 10 | Read(fd, &c, 1); 11 | exit(0); 12 | } 13 | Wait(NULL); 14 | Read(fd, &c, 1); 15 | printf("c = %c\n", c); 16 | exit(0); 17 | } 18 | /* $end sharing2 */ 19 | -------------------------------------------------------------------------------- /System_Level_IO/statcheck.c: -------------------------------------------------------------------------------- 1 | /* $begin statcheck */ 2 | #include "csapp.h" 3 | 4 | int main (int argc, char **argv) 5 | { 6 | struct stat stat; 7 | char *type, *readok; 8 | 9 | /* $end statcheck */ 10 | if (argc != 2) { 11 | fprintf(stderr, "usage: %s \n", argv[0]); 12 | exit(0); 13 | } 14 | /* $begin statcheck */ 15 | Stat(argv[1], &stat); 16 | if (S_ISREG(stat.st_mode)) /* Determine file type */ 17 | type = "regular"; 18 | else if (S_ISDIR(stat.st_mode)) 19 | type = "directory"; 20 | else 21 | type = "other"; 22 | if ((stat.st_mode & S_IRUSR)) /* Check read access */ 23 | readok = "yes"; 24 | else 25 | readok = "no"; 26 | 27 | printf("type: %s, read: %s\n", type, readok); 28 | exit(0); 29 | } 30 | /* $end statcheck */ 31 | -------------------------------------------------------------------------------- /System_Level_IO/stdout.c: -------------------------------------------------------------------------------- 1 | #include 2 | extern FILE *stdin; /* standard input (descriptor 0) */ 3 | extern FILE *stdout; /* standard output (descriptor 1) */ 4 | extern FILE *stderr; /* standard error (descriptor 1) */ 5 | 6 | int main(){ 7 | fprintf(stdout, "Hello, world\n"); 8 | } 9 | -------------------------------------------------------------------------------- /Virtual_Memory/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | 3 | CSAPP_INC = . 4 | CSAPP_SRC = . 5 | 6 | CFLAGS = -Wall -O2 -I $(CSAPP_INC) -I . 7 | LDLIBS = -lpthread 8 | 9 | PROGS = mallocex.o 10 | 11 | all: $(PROGS) 12 | 13 | clean: 14 | rm -f $(PROGS) *.o *~ 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /Virtual_Memory/README.md: -------------------------------------------------------------------------------- 1 | # 虚拟内存 2 | > 本页面中包含一个malloc的使用示例代码和一个动态内存分配实验 3 | 4 | 1. mallocex.c:malloc的一个使用示例 5 | > 编译:通过使用Makefile,执行编译,终端输入make 6 | > 注:因为只是malloc执行的示例所以没有编写main函数无法执行 7 | 8 | 2. malloclab:一个动态内存分配实验 9 | > 具体实验步骤可以参考该文件夹中的malloclab的md 10 | > 实验中的代码介绍可以参考该文件夹中的README 11 | 12 | 3.Rust:有关Rust的实验和几个例子 -------------------------------------------------------------------------------- /Virtual_Memory/Rust/README.md: -------------------------------------------------------------------------------- 1 | ## Rust环境搭建 2 | ### 1. 安装Rust 3 | ```bash 4 | $ curl https://sh.rustup.rs -sSf | sh 5 | ``` 6 | ### 2. 将Rust手动添加到系统PATH中: 7 | ```bash 8 | $ source $HOME/.cargo/env 9 | ``` 10 | ### 3. 编写和运行Rust程序 11 | ``` 12 | #创建main.rs文件写入以下内容: 13 | fn main() { 14 | println!("Hello, world!"); 15 | } 16 | ``` 17 | ```bash 18 | #编译 19 | $ rustc main.rs 20 | #运行 21 | $ ./main 22 | ``` 23 | ## Rust实验 24 | 要求:在experiment文件夹中有三个存在编译错误的Rust程序experiment1.rs,experiment2.rs,experiment3.rs,在限制条件下修改程序,使它们可以成功编译,并且要保持程序的原有功能: 25 | 26 | > experiment1.rs限制条件:不添加任何一行代码,只对原有代码做修改。 27 | experiment2.rs限制条件:fill_vec函数不能有参数。 28 | experiment3.rs限制条件:不能删除第7行代码:println!("{} has length {} content `{:?}`", "vec0", vec0.len(), vec0);。 29 | 30 | 参考: 31 | https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html 32 | https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html 33 | 34 | ## Rust_guarantee_memory_safety_examples文件夹 35 | 一些rust的保护内存安全、发现错误的例子。 -------------------------------------------------------------------------------- /Virtual_Memory/Rust/Rust_guarantee_memory_safety_examples/README.md: -------------------------------------------------------------------------------- 1 | > no_leak.rs使用rush语言编写,不存在存泄露。使用rustc no_leak.rs命令编译,编译成功后会生成no_leak可执行文件,使用./no_leak命令可运行。 2 | 3 | > stackref.rs使用rush语言编写,存在引用不存在的变量问题。使用rustc stackref.rs命令编译,在编译过程中会检测出该问题,导致编译失败。 4 | 5 | > heapref.rs使用rush语言编写,存在引用空闲堆块中的数据问题。使用rustc heapref.rs命令编译,在编译过程中会检测出该问题,导致编译失败。 6 | 7 | > off_by_one.rs使用rush语言编写,存在造成错位错误。使用rustc no_leak.rs命令编译,编译成功后会生成off_by_one可执行文件,使用./off_by_one命令可运行。在运行时会发现该错误导致程序退出。 8 | -------------------------------------------------------------------------------- /Virtual_Memory/Rust/Rust_guarantee_memory_safety_examples/heapref.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | { 3 | let mut a = Box::new(5); 4 | println!("{}", *a); 5 | } 6 | println!("{}", *a); 7 | } 8 | -------------------------------------------------------------------------------- /Virtual_Memory/Rust/Rust_guarantee_memory_safety_examples/no_leak: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitsz-ids2021/Architecture_Course/a9c20e2107d41805ade34f3d559b307a24f3abca/Virtual_Memory/Rust/Rust_guarantee_memory_safety_examples/no_leak -------------------------------------------------------------------------------- /Virtual_Memory/Rust/Rust_guarantee_memory_safety_examples/no_leak.rs: -------------------------------------------------------------------------------- 1 | fn func() { 2 | let mut a = Box::new(5); 3 | *a = 10; 4 | } 5 | 6 | fn main() { 7 | func(); 8 | } 9 | -------------------------------------------------------------------------------- /Virtual_Memory/Rust/Rust_guarantee_memory_safety_examples/off_by_one: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitsz-ids2021/Architecture_Course/a9c20e2107d41805ade34f3d559b307a24f3abca/Virtual_Memory/Rust/Rust_guarantee_memory_safety_examples/off_by_one -------------------------------------------------------------------------------- /Virtual_Memory/Rust/Rust_guarantee_memory_safety_examples/off_by_one.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let mut a: [i32; 3] = [0;3]; 3 | for i in 0..=3 { 4 | a[i] = 1; 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /Virtual_Memory/Rust/Rust_guarantee_memory_safety_examples/stackref.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let reference_to_nothing = stackref (); 3 | } 4 | 5 | fn stackref () -> &i32 { 6 | let val:i32 = 10; 7 | &val 8 | } 9 | -------------------------------------------------------------------------------- /Virtual_Memory/Rust/experiment/experiment1.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let vec0 = Vec::new(); 3 | 4 | let mut vec1 = fill_vec(vec0); 5 | 6 | println!("{} has length {} content `{:?}`", "vec1", vec1.len(), vec1); 7 | 8 | vec1.push(88); 9 | 10 | println!("{} has length {} content `{:?}`", "vec1", vec1.len(), vec1); 11 | } 12 | 13 | fn fill_vec(vec: Vec) -> Vec { 14 | vec.push(22); 15 | vec.push(44); 16 | vec.push(66); 17 | 18 | vec 19 | } 20 | -------------------------------------------------------------------------------- /Virtual_Memory/Rust/experiment/experiment2.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let vec0 = Vec::new(); 3 | 4 | let mut vec1 = fill_vec(vec0); 5 | 6 | println!("{} has length {} content `{:?}`", "vec1", vec1.len(), vec1); 7 | 8 | vec1.push(88); 9 | 10 | println!("{} has length {} content `{:?}`", "vec1", vec1.len(), vec1); 11 | } 12 | 13 | // `fill_vec()` no longer take `vec: Vec` as argument 14 | fn fill_vec() -> Vec { 15 | let mut vec = vec; 16 | 17 | vec.push(22); 18 | vec.push(44); 19 | vec.push(66); 20 | 21 | vec 22 | } 23 | -------------------------------------------------------------------------------- /Virtual_Memory/Rust/experiment/experiment3.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let vec0 = Vec::new(); 3 | 4 | let mut vec1 = fill_vec(vec0); 5 | 6 | // Do not change the following line! 7 | println!("{} has length {} content `{:?}`", "vec0", vec0.len(), vec0); 8 | 9 | vec1.push(88); 10 | 11 | println!("{} has length {} content `{:?}`", "vec1", vec1.len(), vec1); 12 | } 13 | 14 | fn fill_vec(vec: Vec) -> Vec { 15 | let mut vec = vec; 16 | 17 | vec.push(22); 18 | vec.push(44); 19 | vec.push(66); 20 | 21 | vec 22 | } 23 | -------------------------------------------------------------------------------- /Virtual_Memory/mallocex.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void foo(int n) { 5 | int i, *p; 6 | 7 | /* Allocate a block of n ints */ 8 | p = (int *) malloc(n * sizeof(int)); 9 | if (p == NULL) { 10 | perror("malloc"); 11 | exit(0); 12 | } 13 | 14 | /* Initialize allocated block */ 15 | for (i=0; i mdriver -V -f short1-bal.rep 46 | 47 | The -V option prints out helpful tracing and summary information. 48 | 49 | To get a list of the driver flags: 50 | 51 | unix> mdriver -h 52 | 53 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/clock.c: -------------------------------------------------------------------------------- 1 | /* 2 | * clock.c - Routines for using the cycle counters on x86, 3 | * Alpha, and Sparc boxes. 4 | * 5 | * Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. 6 | * May not be used, modified, or copied without permission. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "clock.h" 14 | 15 | 16 | /******************************************************* 17 | * Machine dependent functions 18 | * 19 | * Note: the constants __i386__ and __alpha 20 | * are set by GCC when it calls the C preprocessor 21 | * You can verify this for yourself using gcc -v. 22 | *******************************************************/ 23 | 24 | #if defined(__i386__) 25 | /******************************************************* 26 | * Pentium versions of start_counter() and get_counter() 27 | *******************************************************/ 28 | 29 | 30 | /* $begin x86cyclecounter */ 31 | /* Initialize the cycle counter */ 32 | static unsigned cyc_hi = 0; 33 | static unsigned cyc_lo = 0; 34 | 35 | 36 | /* Set *hi and *lo to the high and low order bits of the cycle counter. 37 | Implementation requires assembly code to use the rdtsc instruction. */ 38 | void access_counter(unsigned *hi, unsigned *lo) 39 | { 40 | asm("rdtsc; movl %%edx,%0; movl %%eax,%1" /* Read cycle counter */ 41 | : "=r" (*hi), "=r" (*lo) /* and move results to */ 42 | : /* No input */ /* the two outputs */ 43 | : "%edx", "%eax"); 44 | } 45 | 46 | /* Record the current value of the cycle counter. */ 47 | void start_counter() 48 | { 49 | access_counter(&cyc_hi, &cyc_lo); 50 | } 51 | 52 | /* Return the number of cycles since the last call to start_counter. */ 53 | double get_counter() 54 | { 55 | unsigned ncyc_hi, ncyc_lo; 56 | unsigned hi, lo, borrow; 57 | double result; 58 | 59 | /* Get cycle counter */ 60 | access_counter(&ncyc_hi, &ncyc_lo); 61 | 62 | /* Do double precision subtraction */ 63 | lo = ncyc_lo - cyc_lo; 64 | borrow = lo > ncyc_lo; 65 | hi = ncyc_hi - cyc_hi - borrow; 66 | result = (double) hi * (1 << 30) * 4 + lo; 67 | if (result < 0) { 68 | fprintf(stderr, "Error: counter returns neg value: %.0f\n", result); 69 | } 70 | return result; 71 | } 72 | /* $end x86cyclecounter */ 73 | 74 | #elif defined(__alpha) 75 | 76 | /**************************************************** 77 | * Alpha versions of start_counter() and get_counter() 78 | ***************************************************/ 79 | 80 | /* Initialize the cycle counter */ 81 | static unsigned cyc_hi = 0; 82 | static unsigned cyc_lo = 0; 83 | 84 | 85 | /* Use Alpha cycle timer to compute cycles. Then use 86 | measured clock speed to compute seconds 87 | */ 88 | 89 | /* 90 | * counterRoutine is an array of Alpha instructions to access 91 | * the Alpha's processor cycle counter. It uses the rpcc 92 | * instruction to access the counter. This 64 bit register is 93 | * divided into two parts. The lower 32 bits are the cycles 94 | * used by the current process. The upper 32 bits are wall 95 | * clock cycles. These instructions read the counter, and 96 | * convert the lower 32 bits into an unsigned int - this is the 97 | * user space counter value. 98 | * NOTE: The counter has a very limited time span. With a 99 | * 450MhZ clock the counter can time things for about 9 100 | * seconds. */ 101 | static unsigned int counterRoutine[] = 102 | { 103 | 0x601fc000u, 104 | 0x401f0000u, 105 | 0x6bfa8001u 106 | }; 107 | 108 | /* Cast the above instructions into a function. */ 109 | static unsigned int (*counter)(void)= (void *)counterRoutine; 110 | 111 | 112 | void start_counter() 113 | { 114 | /* Get cycle counter */ 115 | cyc_hi = 0; 116 | cyc_lo = counter(); 117 | } 118 | 119 | double get_counter() 120 | { 121 | unsigned ncyc_hi, ncyc_lo; 122 | unsigned hi, lo, borrow; 123 | double result; 124 | ncyc_lo = counter(); 125 | ncyc_hi = 0; 126 | lo = ncyc_lo - cyc_lo; 127 | borrow = lo > ncyc_lo; 128 | hi = ncyc_hi - cyc_hi - borrow; 129 | result = (double) hi * (1 << 30) * 4 + lo; 130 | if (result < 0) { 131 | fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result); 132 | } 133 | return result; 134 | } 135 | 136 | #else 137 | 138 | /**************************************************************** 139 | * All the other platforms for which we haven't implemented cycle 140 | * counter routines. Newer models of sparcs (v8plus) have cycle 141 | * counters that can be accessed from user programs, but since there 142 | * are still many sparc boxes out there that don't support this, we 143 | * haven't provided a Sparc version here. 144 | ***************************************************************/ 145 | 146 | void start_counter() 147 | { 148 | printf("ERROR: You are trying to use a start_counter routine in clock.c\n"); 149 | printf("that has not been implemented yet on this platform.\n"); 150 | printf("Please choose another timing package in config.h.\n"); 151 | exit(1); 152 | } 153 | 154 | double get_counter() 155 | { 156 | printf("ERROR: You are trying to use a get_counter routine in clock.c\n"); 157 | printf("that has not been implemented yet on this platform.\n"); 158 | printf("Please choose another timing package in config.h.\n"); 159 | exit(1); 160 | } 161 | #endif 162 | 163 | 164 | 165 | 166 | /******************************* 167 | * Machine-independent functions 168 | ******************************/ 169 | double ovhd() 170 | { 171 | /* Do it twice to eliminate cache effects */ 172 | int i; 173 | double result; 174 | 175 | for (i = 0; i < 2; i++) { 176 | start_counter(); 177 | result = get_counter(); 178 | } 179 | return result; 180 | } 181 | 182 | /* $begin mhz */ 183 | /* Estimate the clock rate by measuring the cycles that elapse */ 184 | /* while sleeping for sleeptime seconds */ 185 | double mhz_full(int verbose, int sleeptime) 186 | { 187 | double rate; 188 | 189 | start_counter(); 190 | sleep(sleeptime); 191 | rate = get_counter() / (1e6*sleeptime); 192 | if (verbose) 193 | printf("Processor clock rate ~= %.1f MHz\n", rate); 194 | return rate; 195 | } 196 | /* $end mhz */ 197 | 198 | /* Version using a default sleeptime */ 199 | double mhz(int verbose) 200 | { 201 | return mhz_full(verbose, 2); 202 | } 203 | 204 | /** Special counters that compensate for timer interrupt overhead */ 205 | 206 | static double cyc_per_tick = 0.0; 207 | 208 | #define NEVENT 100 209 | #define THRESHOLD 1000 210 | #define RECORDTHRESH 3000 211 | 212 | /* Attempt to see how much time is used by timer interrupt */ 213 | static void callibrate(int verbose) 214 | { 215 | double oldt; 216 | struct tms t; 217 | clock_t oldc; 218 | int e = 0; 219 | 220 | times(&t); 221 | oldc = t.tms_utime; 222 | start_counter(); 223 | oldt = get_counter(); 224 | while (e = THRESHOLD) { 228 | clock_t newc; 229 | times(&t); 230 | newc = t.tms_utime; 231 | if (newc > oldc) { 232 | double cpt = (newt-oldt)/(newc-oldc); 233 | if ((cyc_per_tick == 0.0 || cyc_per_tick > cpt) && cpt > RECORDTHRESH) 234 | cyc_per_tick = cpt; 235 | /* 236 | if (verbose) 237 | printf("Saw event lasting %.0f cycles and %d ticks. Ratio = %f\n", 238 | newt-oldt, (int) (newc-oldc), cpt); 239 | */ 240 | e++; 241 | oldc = newc; 242 | } 243 | oldt = newt; 244 | } 245 | } 246 | if (verbose) 247 | printf("Setting cyc_per_tick to %f\n", cyc_per_tick); 248 | } 249 | 250 | static clock_t start_tick = 0; 251 | 252 | void start_comp_counter() 253 | { 254 | struct tms t; 255 | 256 | if (cyc_per_tick == 0.0) 257 | callibrate(0); 258 | times(&t); 259 | start_tick = t.tms_utime; 260 | start_counter(); 261 | } 262 | 263 | double get_comp_counter() 264 | { 265 | double time = get_counter(); 266 | double ctime; 267 | struct tms t; 268 | clock_t ticks; 269 | 270 | times(&t); 271 | ticks = t.tms_utime - start_tick; 272 | ctime = time - ticks*cyc_per_tick; 273 | /* 274 | printf("Measured %.0f cycles. Ticks = %d. Corrected %.0f cycles\n", 275 | time, (int) ticks, ctime); 276 | */ 277 | return ctime; 278 | } 279 | 280 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/clock.h: -------------------------------------------------------------------------------- 1 | /* Routines for using cycle counter */ 2 | 3 | /* Start the counter */ 4 | void start_counter(); 5 | 6 | /* Get # cycles since counter started */ 7 | double get_counter(); 8 | 9 | /* Measure overhead for counter */ 10 | double ovhd(); 11 | 12 | /* Determine clock rate of processor (using a default sleeptime) */ 13 | double mhz(int verbose); 14 | 15 | /* Determine clock rate of processor, having more control over accuracy */ 16 | double mhz_full(int verbose, int sleeptime); 17 | 18 | /** Special counters that compensate for timer interrupt overhead */ 19 | 20 | void start_comp_counter(); 21 | 22 | double get_comp_counter(); 23 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/clock.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitsz-ids2021/Architecture_Course/a9c20e2107d41805ade34f3d559b307a24f3abca/Virtual_Memory/malloclab/clock.o -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/config.h: -------------------------------------------------------------------------------- 1 | #ifndef __CONFIG_H_ 2 | #define __CONFIG_H_ 3 | 4 | /* 5 | * config.h - malloc lab configuration file 6 | * 7 | * Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. 8 | * May not be used, modified, or copied without permission. 9 | */ 10 | 11 | /* 12 | * This is the default path where the driver will look for the 13 | * default tracefiles. You can override it at runtime with the -t flag. 14 | */ 15 | #define TRACEDIR "/afs/cs/project/ics2/im/labs/malloclab/traces/" 16 | 17 | /* 18 | * This is the list of default tracefiles in TRACEDIR that the driver 19 | * will use for testing. Modify this if you want to add or delete 20 | * traces from the driver's test suite. For example, if you don't want 21 | * your students to implement realloc, you can delete the last two 22 | * traces. 23 | */ 24 | #define DEFAULT_TRACEFILES \ 25 | "amptjp-bal.rep",\ 26 | "cccp-bal.rep",\ 27 | "cp-decl-bal.rep",\ 28 | "expr-bal.rep",\ 29 | "coalescing-bal.rep",\ 30 | "random-bal.rep",\ 31 | "random2-bal.rep",\ 32 | "binary-bal.rep",\ 33 | "binary2-bal.rep",\ 34 | "realloc-bal.rep",\ 35 | "realloc2-bal.rep" 36 | 37 | /* 38 | * This constant gives the estimated performance of the libc malloc 39 | * package using our traces on some reference system, typically the 40 | * same kind of system the students use. Its purpose is to cap the 41 | * contribution of throughput to the performance index. Once the 42 | * students surpass the AVG_LIBC_THRUPUT, they get no further benefit 43 | * to their score. This deters students from building extremely fast, 44 | * but extremely stupid malloc packages. 45 | */ 46 | #define AVG_LIBC_THRUPUT 600E3 /* 600 Kops/sec */ 47 | 48 | /* 49 | * This constant determines the contributions of space utilization 50 | * (UTIL_WEIGHT) and throughput (1 - UTIL_WEIGHT) to the performance 51 | * index. 52 | */ 53 | #define UTIL_WEIGHT .60 54 | 55 | /* 56 | * Alignment requirement in bytes (either 4 or 8) 57 | */ 58 | #define ALIGNMENT 8 59 | 60 | /* 61 | * Maximum heap size in bytes 62 | */ 63 | #define MAX_HEAP (20*(1<<20)) /* 20 MB */ 64 | 65 | /***************************************************************************** 66 | * Set exactly one of these USE_xxx constants to "1" to select a timing method 67 | *****************************************************************************/ 68 | #define USE_FCYC 0 /* cycle counter w/K-best scheme (x86 & Alpha only) */ 69 | #define USE_ITIMER 0 /* interval timer (any Unix box) */ 70 | #define USE_GETTOD 1 /* gettimeofday (any Unix box) */ 71 | 72 | #endif /* __CONFIG_H */ 73 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/fcyc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fcyc.c - Estimate the time (in CPU cycles) used by a function f 3 | * 4 | * Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. 5 | * May not be used, modified, or copied without permission. 6 | * 7 | * Uses the cycle timer routines in clock.c to estimate the 8 | * the time in CPU cycles for a function f. 9 | */ 10 | #include 11 | #include 12 | #include 13 | 14 | #include "fcyc.h" 15 | #include "clock.h" 16 | 17 | /* Default values */ 18 | #define K 3 /* Value of K in K-best scheme */ 19 | #define MAXSAMPLES 20 /* Give up after MAXSAMPLES */ 20 | #define EPSILON 0.01 /* K samples should be EPSILON of each other*/ 21 | #define COMPENSATE 0 /* 1-> try to compensate for clock ticks */ 22 | #define CLEAR_CACHE 0 /* Clear cache before running test function */ 23 | #define CACHE_BYTES (1<<19) /* Max cache size in bytes */ 24 | #define CACHE_BLOCK 32 /* Cache block size in bytes */ 25 | 26 | static int kbest = K; 27 | static int maxsamples = MAXSAMPLES; 28 | static double epsilon = EPSILON; 29 | static int compensate = COMPENSATE; 30 | static int clear_cache = CLEAR_CACHE; 31 | static int cache_bytes = CACHE_BYTES; 32 | static int cache_block = CACHE_BLOCK; 33 | 34 | static int *cache_buf = NULL; 35 | 36 | static double *values = NULL; 37 | static int samplecount = 0; 38 | 39 | /* for debugging only */ 40 | #define KEEP_VALS 0 41 | #define KEEP_SAMPLES 0 42 | 43 | #if KEEP_SAMPLES 44 | static double *samples = NULL; 45 | #endif 46 | 47 | /* 48 | * init_sampler - Start new sampling process 49 | */ 50 | static void init_sampler() 51 | { 52 | if (values) 53 | free(values); 54 | values = calloc(kbest, sizeof(double)); 55 | #if KEEP_SAMPLES 56 | if (samples) 57 | free(samples); 58 | /* Allocate extra for wraparound analysis */ 59 | samples = calloc(maxsamples+kbest, sizeof(double)); 60 | #endif 61 | samplecount = 0; 62 | } 63 | 64 | /* 65 | * add_sample - Add new sample 66 | */ 67 | static void add_sample(double val) 68 | { 69 | int pos = 0; 70 | if (samplecount < kbest) { 71 | pos = samplecount; 72 | values[pos] = val; 73 | } else if (val < values[kbest-1]) { 74 | pos = kbest-1; 75 | values[pos] = val; 76 | } 77 | #if KEEP_SAMPLES 78 | samples[samplecount] = val; 79 | #endif 80 | samplecount++; 81 | /* Insertion sort */ 82 | while (pos > 0 && values[pos-1] > values[pos]) { 83 | double temp = values[pos-1]; 84 | values[pos-1] = values[pos]; 85 | values[pos] = temp; 86 | pos--; 87 | } 88 | } 89 | 90 | /* 91 | * has_converged- Have kbest minimum measurements converged within epsilon? 92 | */ 93 | static int has_converged() 94 | { 95 | return 96 | (samplecount >= kbest) && 97 | ((1 + epsilon)*values[0] >= values[kbest-1]); 98 | } 99 | 100 | /* 101 | * clear - Code to clear cache 102 | */ 103 | static volatile int sink = 0; 104 | 105 | static void clear() 106 | { 107 | int x = sink; 108 | int *cptr, *cend; 109 | int incr = cache_block/sizeof(int); 110 | if (!cache_buf) { 111 | cache_buf = malloc(cache_bytes); 112 | if (!cache_buf) { 113 | fprintf(stderr, "Fatal error. Malloc returned null when trying to clear cache\n"); 114 | exit(1); 115 | } 116 | } 117 | cptr = (int *) cache_buf; 118 | cend = cptr + cache_bytes/sizeof(int); 119 | while (cptr < cend) { 120 | x += *cptr; 121 | cptr += incr; 122 | } 123 | sink = x; 124 | } 125 | 126 | /* 127 | * fcyc - Use K-best scheme to estimate the running time of function f 128 | */ 129 | double fcyc(test_funct f, void *argp) 130 | { 131 | double result; 132 | init_sampler(); 133 | if (compensate) { 134 | do { 135 | double cyc; 136 | if (clear_cache) 137 | clear(); 138 | start_comp_counter(); 139 | f(argp); 140 | cyc = get_comp_counter(); 141 | add_sample(cyc); 142 | } while (!has_converged() && samplecount < maxsamples); 143 | } else { 144 | do { 145 | double cyc; 146 | if (clear_cache) 147 | clear(); 148 | start_counter(); 149 | f(argp); 150 | cyc = get_counter(); 151 | add_sample(cyc); 152 | } while (!has_converged() && samplecount < maxsamples); 153 | } 154 | #ifdef DEBUG 155 | { 156 | int i; 157 | printf(" %d smallest values: [", kbest); 158 | for (i = 0; i < kbest; i++) 159 | printf("%.0f%s", values[i], i==kbest-1 ? "]\n" : ", "); 160 | } 161 | #endif 162 | result = values[0]; 163 | #if !KEEP_VALS 164 | free(values); 165 | values = NULL; 166 | #endif 167 | return result; 168 | } 169 | 170 | 171 | /************************************************************* 172 | * Set the various parameters used by the measurement routines 173 | ************************************************************/ 174 | 175 | /* 176 | * set_fcyc_clear_cache - When set, will run code to clear cache 177 | * before each measurement. 178 | * Default = 0 179 | */ 180 | void set_fcyc_clear_cache(int clear) 181 | { 182 | clear_cache = clear; 183 | } 184 | 185 | /* 186 | * set_fcyc_cache_size - Set size of cache to use when clearing cache 187 | * Default = 1<<19 (512KB) 188 | */ 189 | void set_fcyc_cache_size(int bytes) 190 | { 191 | if (bytes != cache_bytes) { 192 | cache_bytes = bytes; 193 | if (cache_buf) { 194 | free(cache_buf); 195 | cache_buf = NULL; 196 | } 197 | } 198 | } 199 | 200 | /* 201 | * set_fcyc_cache_block - Set size of cache block 202 | * Default = 32 203 | */ 204 | void set_fcyc_cache_block(int bytes) { 205 | cache_block = bytes; 206 | } 207 | 208 | 209 | /* 210 | * set_fcyc_compensate- When set, will attempt to compensate for 211 | * timer interrupt overhead 212 | * Default = 0 213 | */ 214 | void set_fcyc_compensate(int compensate_arg) 215 | { 216 | compensate = compensate_arg; 217 | } 218 | 219 | /* 220 | * set_fcyc_k - Value of K in K-best measurement scheme 221 | * Default = 3 222 | */ 223 | void set_fcyc_k(int k) 224 | { 225 | kbest = k; 226 | } 227 | 228 | /* 229 | * set_fcyc_maxsamples - Maximum number of samples attempting to find 230 | * K-best within some tolerance. 231 | * When exceeded, just return best sample found. 232 | * Default = 20 233 | */ 234 | void set_fcyc_maxsamples(int maxsamples_arg) 235 | { 236 | maxsamples = maxsamples_arg; 237 | } 238 | 239 | /* 240 | * set_fcyc_epsilon - Tolerance required for K-best 241 | * Default = 0.01 242 | */ 243 | void set_fcyc_epsilon(double epsilon_arg) 244 | { 245 | epsilon = epsilon_arg; 246 | } 247 | 248 | 249 | 250 | 251 | 252 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/fcyc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fcyc.h - prototypes for the routines in fcyc.c that estimate the 3 | * time in CPU cycles used by a test function f 4 | * 5 | * Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. 6 | * May not be used, modified, or copied without permission. 7 | * 8 | */ 9 | 10 | /* The test function takes a generic pointer as input */ 11 | typedef void (*test_funct)(void *); 12 | 13 | /* Compute number of cycles used by test function f */ 14 | double fcyc(test_funct f, void* argp); 15 | 16 | /********************************************************* 17 | * Set the various parameters used by measurement routines 18 | *********************************************************/ 19 | 20 | /* 21 | * set_fcyc_clear_cache - When set, will run code to clear cache 22 | * before each measurement. 23 | * Default = 0 24 | */ 25 | void set_fcyc_clear_cache(int clear); 26 | 27 | /* 28 | * set_fcyc_cache_size - Set size of cache to use when clearing cache 29 | * Default = 1<<19 (512KB) 30 | */ 31 | void set_fcyc_cache_size(int bytes); 32 | 33 | /* 34 | * set_fcyc_cache_block - Set size of cache block 35 | * Default = 32 36 | */ 37 | void set_fcyc_cache_block(int bytes); 38 | 39 | /* 40 | * set_fcyc_compensate- When set, will attempt to compensate for 41 | * timer interrupt overhead 42 | * Default = 0 43 | */ 44 | void set_fcyc_compensate(int compensate_arg); 45 | 46 | /* 47 | * set_fcyc_k - Value of K in K-best measurement scheme 48 | * Default = 3 49 | */ 50 | void set_fcyc_k(int k); 51 | 52 | /* 53 | * set_fcyc_maxsamples - Maximum number of samples attempting to find 54 | * K-best within some tolerance. 55 | * When exceeded, just return best sample found. 56 | * Default = 20 57 | */ 58 | void set_fcyc_maxsamples(int maxsamples_arg); 59 | 60 | /* 61 | * set_fcyc_epsilon - Tolerance required for K-best 62 | * Default = 0.01 63 | */ 64 | void set_fcyc_epsilon(double epsilon_arg); 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/fcyc.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitsz-ids2021/Architecture_Course/a9c20e2107d41805ade34f3d559b307a24f3abca/Virtual_Memory/malloclab/fcyc.o -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/fsecs.c: -------------------------------------------------------------------------------- 1 | /**************************** 2 | * High-level timing wrappers 3 | ****************************/ 4 | #include 5 | #include "fsecs.h" 6 | #include "fcyc.h" 7 | #include "clock.h" 8 | #include "ftimer.h" 9 | #include "config.h" 10 | 11 | static double Mhz; /* estimated CPU clock frequency */ 12 | 13 | extern int verbose; /* -v option in mdriver.c */ 14 | 15 | /* 16 | * init_fsecs - initialize the timing package 17 | */ 18 | void init_fsecs(void) 19 | { 20 | Mhz = 0; /* keep gcc -Wall happy */ 21 | 22 | #if USE_FCYC 23 | if (verbose) 24 | printf("Measuring performance with a cycle counter.\n"); 25 | 26 | /* set key parameters for the fcyc package */ 27 | set_fcyc_maxsamples(20); 28 | set_fcyc_clear_cache(1); 29 | set_fcyc_compensate(1); 30 | set_fcyc_epsilon(0.01); 31 | set_fcyc_k(3); 32 | Mhz = mhz(verbose > 0); 33 | #elif USE_ITIMER 34 | if (verbose) 35 | printf("Measuring performance with the interval timer.\n"); 36 | #elif USE_GETTOD 37 | if (verbose) 38 | printf("Measuring performance with gettimeofday().\n"); 39 | #endif 40 | } 41 | 42 | /* 43 | * fsecs - Return the running time of a function f (in seconds) 44 | */ 45 | double fsecs(fsecs_test_funct f, void *argp) 46 | { 47 | #if USE_FCYC 48 | double cycles = fcyc(f, argp); 49 | return cycles/(Mhz*1e6); 50 | #elif USE_ITIMER 51 | return ftimer_itimer(f, argp, 10); 52 | #elif USE_GETTOD 53 | return ftimer_gettod(f, argp, 10); 54 | #endif 55 | } 56 | 57 | 58 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/fsecs.h: -------------------------------------------------------------------------------- 1 | typedef void (*fsecs_test_funct)(void *); 2 | 3 | void init_fsecs(void); 4 | double fsecs(fsecs_test_funct f, void *argp); 5 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/fsecs.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitsz-ids2021/Architecture_Course/a9c20e2107d41805ade34f3d559b307a24f3abca/Virtual_Memory/malloclab/fsecs.o -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/ftimer.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ftimer.c - Estimate the time (in seconds) used by a function f 3 | * 4 | * Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. 5 | * May not be used, modified, or copied without permission. 6 | * 7 | * Function timers that estimate the running time (in seconds) of a function f. 8 | * ftimer_itimer: version that uses the interval timer 9 | * ftimer_gettod: version that uses gettimeofday 10 | */ 11 | #include 12 | #include 13 | #include "ftimer.h" 14 | 15 | /* function prototypes */ 16 | static void init_etime(void); 17 | static double get_etime(void); 18 | 19 | /* 20 | * ftimer_itimer - Use the interval timer to estimate the running time 21 | * of f(argp). Return the average of n runs. 22 | */ 23 | double ftimer_itimer(ftimer_test_funct f, void *argp, int n) 24 | { 25 | double start, tmeas; 26 | int i; 27 | 28 | init_etime(); 29 | start = get_etime(); 30 | for (i = 0; i < n; i++) 31 | f(argp); 32 | tmeas = get_etime() - start; 33 | return tmeas / n; 34 | } 35 | 36 | /* 37 | * ftimer_gettod - Use gettimeofday to estimate the running time of 38 | * f(argp). Return the average of n runs. 39 | */ 40 | double ftimer_gettod(ftimer_test_funct f, void *argp, int n) 41 | { 42 | int i; 43 | struct timeval stv, etv; 44 | double diff; 45 | 46 | gettimeofday(&stv, NULL); 47 | for (i = 0; i < n; i++) 48 | f(argp); 49 | gettimeofday(&etv,NULL); 50 | diff = 1E3*(etv.tv_sec - stv.tv_sec) + 1E-3*(etv.tv_usec-stv.tv_usec); 51 | diff /= n; 52 | return (1E-3*diff); 53 | } 54 | 55 | 56 | /* 57 | * Routines for manipulating the Unix interval timer 58 | */ 59 | 60 | /* The initial value of the interval timer */ 61 | #define MAX_ETIME 86400 62 | 63 | /* static variables that hold the initial value of the interval timer */ 64 | static struct itimerval first_u; /* user time */ 65 | static struct itimerval first_r; /* real time */ 66 | static struct itimerval first_p; /* prof time*/ 67 | 68 | /* init the timer */ 69 | static void init_etime(void) 70 | { 71 | first_u.it_interval.tv_sec = 0; 72 | first_u.it_interval.tv_usec = 0; 73 | first_u.it_value.tv_sec = MAX_ETIME; 74 | first_u.it_value.tv_usec = 0; 75 | setitimer(ITIMER_VIRTUAL, &first_u, NULL); 76 | 77 | first_r.it_interval.tv_sec = 0; 78 | first_r.it_interval.tv_usec = 0; 79 | first_r.it_value.tv_sec = MAX_ETIME; 80 | first_r.it_value.tv_usec = 0; 81 | setitimer(ITIMER_REAL, &first_r, NULL); 82 | 83 | first_p.it_interval.tv_sec = 0; 84 | first_p.it_interval.tv_usec = 0; 85 | first_p.it_value.tv_sec = MAX_ETIME; 86 | first_p.it_value.tv_usec = 0; 87 | setitimer(ITIMER_PROF, &first_p, NULL); 88 | } 89 | 90 | /* return elapsed real seconds since call to init_etime */ 91 | static double get_etime(void) { 92 | struct itimerval v_curr; 93 | struct itimerval r_curr; 94 | struct itimerval p_curr; 95 | 96 | getitimer(ITIMER_VIRTUAL, &v_curr); 97 | getitimer(ITIMER_REAL,&r_curr); 98 | getitimer(ITIMER_PROF,&p_curr); 99 | 100 | return (double) ((first_p.it_value.tv_sec - r_curr.it_value.tv_sec) + 101 | (first_p.it_value.tv_usec - r_curr.it_value.tv_usec)*1e-6); 102 | } 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/ftimer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Function timers 3 | */ 4 | typedef void (*ftimer_test_funct)(void *); 5 | 6 | /* Estimate the running time of f(argp) using the Unix interval timer. 7 | Return the average of n runs */ 8 | double ftimer_itimer(ftimer_test_funct f, void *argp, int n); 9 | 10 | 11 | /* Estimate the running time of f(argp) using gettimeofday 12 | Return the average of n runs */ 13 | double ftimer_gettod(ftimer_test_funct f, void *argp, int n); 14 | 15 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/ftimer.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitsz-ids2021/Architecture_Course/a9c20e2107d41805ade34f3d559b307a24f3abca/Virtual_Memory/malloclab/ftimer.o -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/malloclab.md: -------------------------------------------------------------------------------- 1 | # 实现一个简单的动态内存分配器 2 | ## 一、简介 3 | 在这个实验中,将用C程序编写一个动态内存分配器,即您自己版本的malloc、free和realloc函数。鼓励有创新性地实现正确、高效和快速的分配器。 4 | 5 | ## 二、代码结构 6 | > config.h 7 | 定义了一些参数常量,例如本次实验模型化的空间大小 8 | 实现动态内存分配器: 9 | > mm.h , mm.c 10 | 需要实现的函数均在mm.c中完成 11 | > memlib.h , memlib.c 12 | 提供一个允许我们在不干涉已存在的系统层malloc包的情况下的内存系统模型 13 | 性能评估: 14 | > mdriver.c 15 | 使用trace文件(即*.rep)测试我们实现的mm.c的正确性,空间利用率和吞吐量。 16 | trace文件包括了一系列分配、重新分配和释放指示,mdriver.c根据这些指示调用 mm.c 中的mm_malloc,mm_realloc 和mm_free函数。 17 | 18 | ## 三、实验说明 19 | 动态内存分配器将由以下四个函数组成,它们在mm.h中声明,并在mm.c定义实现。 20 | int mm_init(void); 21 | void *mm_malloc(size_t size); 22 | void mm_free(void *ptr); 23 | void *mm_realloc(void *ptr, size_t size); 24 | 提供的mm.c文件实现了最简单的malloc包。以此为起点,修改这些函数(可能定义其他私有静态函数),使它们完成如下功能: 25 | > mm_init:在调用mm_malloc, mm_realloc或mm_free之前,应用程序(即用于评估实现的跟踪驱动程序)调用mm_init来执行任何必要的初始化,例如分配初始堆区域。如果在执行初始化时出现问题,则返回值应为-1,否则为0。 26 | 27 | > mm_malloc: 返回一个指针,指向至少size字节的已分配块。整个分配的块应位于堆区域内,并且不应与任何其他分配的块重叠。并且我们将把您的实现与标准C库(libc)中提供的malloc版本进行比较。由于libc malloc总是返回与8字节对齐的有效负载指针,因此malloc实现也应该这样做,并且总是返回与8字节对齐的指针。 28 | 29 | > mm_free:释放ptr指向的块。它什么也不返回。只有在前面对mm_malloc或mm_realloc的调用返回传递指针(ptr)且尚未释放时,此功能才能保证工作。 30 | 31 | > mm_realloc:返回一个指针,该指针指向具有以下约束的至少大小为size字节的已分配区域。 32 | –如果指针为空,则等价于mm_malloc(size); 33 | –如果size为0,则等价于mm_free(ptr); 34 | –如果指针不为空,它必须是通过先前对mm_malloc或mm_realloc的调用返回的。对mm_realloc的调用将ptr(旧块)指向的内存块的大小更改为size字节,并返回新块的地址。请注意,新块的地址可能与旧块相同,也可能不同,具体取决于:您的实现、旧块中的内部碎片数量以及重新分配请求的大小。 35 | 新块的内容与旧块的内容相同,大小为新块和旧块大小的最小值。其他均未初始化。例如,如果旧块是8字节,而新块是12字节,则新块的前8字节与旧块的前8字节相同,新块最后4个字节未初始化。类似地,如果旧块是8字节,而新块是4字节,则新块的内容与旧块的前4字节相同。 36 | 这些功能与libc中对应的malloc、realloc和free功能相匹配。在shell中输入man malloc以获取完整的文档介绍。 37 | 38 | ## 四、性能验证驱动程序mdriver.c 39 | 接受以下命令行参数: 40 | -t:在目录tracedir中查找默认跟踪文件,而不是在config.h中定义的默认目录。 41 | -f:使用一个特定的tracefile进行测试,而不是使用默认的跟踪文件。 42 | -h:打印命令行参数的摘要。 43 | -l:除了编写的malloc包外,还运行并测试libc的malloc。 44 | -v: 详细输出。打印每个跟踪文件的性能得分。 45 | -V:更详细的输出。在处理每个跟踪文件时打印其他诊断信息。可在调试期间用于确定哪个跟踪文件导致失败。 46 | 47 | ## 五、评分规则 48 | 将使用两个性能指标来评估解决方案: 49 | –空间利用率:驱动程序使用的内存总量(通过mm_malloc或mm_realloc分配,但尚未通过mm free释放)与分配器使用的堆大小之间的峰值比率。最佳比率等于1。应该找到好的策略来最小化碎片,以便使此比率尽可能接近最佳值。 50 | –吞吐量:平均每秒完成的操作数。 51 | 驱动程序通过计算性能索引(performance index)来总结分配器的性能P,是空间利用率和吞吐量的加权和: 52 | P=wU+(1-w)min⁡(1,T/T_libc ) 53 | 其中U是空间利用率,T是你的分配器的吞吐量, T_libc是libc的malloc包的吞吐量。默认w=0.6。 54 | 鉴于内存和CPU都是昂贵的系统资源,采用此公式鼓励平衡优化内存利用率和吞吐量。理想情况下,性能指标将达到P=w+(1-w)=1或100%。由于每个度量对性能索引的贡献分别最多为w和1-w,因此不应为了优化内存利用率或仅优化吞吐量而走极端。要获得好的分数,必须在利用率和吞吐量之间取得平衡。 55 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/mdriver: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitsz-ids2021/Architecture_Course/a9c20e2107d41805ade34f3d559b307a24f3abca/Virtual_Memory/malloclab/mdriver -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/mdriver.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitsz-ids2021/Architecture_Course/a9c20e2107d41805ade34f3d559b307a24f3abca/Virtual_Memory/malloclab/mdriver.o -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/memlib.c: -------------------------------------------------------------------------------- 1 | /* 2 | * memlib.c - a module that simulates the memory system. Needed because it 3 | * allows us to interleave calls from the student's malloc package 4 | * with the system's malloc package in libc. 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "memlib.h" 15 | #include "config.h" 16 | 17 | /* private variables */ 18 | static char *mem_start_brk; /* points to first byte of heap */ 19 | static char *mem_brk; /* points to last byte of heap */ 20 | static char *mem_max_addr; /* largest legal heap address */ 21 | 22 | /* 23 | * mem_init - initialize the memory system model 24 | */ 25 | void mem_init(void) 26 | { 27 | /* allocate the storage we will use to model the available VM */ 28 | if ((mem_start_brk = (char *)malloc(MAX_HEAP)) == NULL) { 29 | fprintf(stderr, "mem_init_vm: malloc error\n"); 30 | exit(1); 31 | } 32 | mem_max_addr = mem_start_brk + MAX_HEAP; /* max legal heap address */ 33 | mem_brk = mem_start_brk; /* heap is empty initially */ 34 | } 35 | 36 | /* 37 | * mem_deinit - free the storage used by the memory system model 38 | */ 39 | void mem_deinit(void) 40 | { 41 | free(mem_start_brk); 42 | } 43 | 44 | /* 45 | * mem_reset_brk - reset the simulated brk pointer to make an empty heap 46 | */ 47 | void mem_reset_brk() 48 | { 49 | mem_brk = mem_start_brk; 50 | } 51 | 52 | /* 53 | * mem_sbrk - simple model of the sbrk function. Extends the heap 54 | * by incr bytes and returns the start address of the new area. In 55 | * this model, the heap cannot be shrunk. 56 | */ 57 | void *mem_sbrk(int incr) 58 | { 59 | char *old_brk = mem_brk; 60 | 61 | if ( (incr < 0) || ((mem_brk + incr) > mem_max_addr)) { 62 | errno = ENOMEM; 63 | fprintf(stderr, "ERROR: mem_sbrk failed. Ran out of memory...\n"); 64 | return (void *)-1; 65 | } 66 | mem_brk += incr; 67 | return (void *)old_brk; 68 | } 69 | 70 | /* 71 | * mem_heap_lo - return address of the first heap byte 72 | */ 73 | void *mem_heap_lo() 74 | { 75 | return (void *)mem_start_brk; 76 | } 77 | 78 | /* 79 | * mem_heap_hi - return address of last heap byte 80 | */ 81 | void *mem_heap_hi() 82 | { 83 | return (void *)(mem_brk - 1); 84 | } 85 | 86 | /* 87 | * mem_heapsize() - returns the heap size in bytes 88 | */ 89 | size_t mem_heapsize() 90 | { 91 | return (size_t)(mem_brk - mem_start_brk); 92 | } 93 | 94 | /* 95 | * mem_pagesize() - returns the page size of the system 96 | */ 97 | size_t mem_pagesize() 98 | { 99 | return (size_t)getpagesize(); 100 | } 101 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/memlib.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void mem_init(void); 4 | void mem_deinit(void); 5 | void *mem_sbrk(int incr); 6 | void mem_reset_brk(void); 7 | void *mem_heap_lo(void); 8 | void *mem_heap_hi(void); 9 | size_t mem_heapsize(void); 10 | size_t mem_pagesize(void); 11 | 12 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/memlib.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitsz-ids2021/Architecture_Course/a9c20e2107d41805ade34f3d559b307a24f3abca/Virtual_Memory/malloclab/memlib.o -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/mm.c: -------------------------------------------------------------------------------- 1 | /* 2 | * mm-naive.c - The fastest, least memory-efficient malloc package. 3 | * 4 | * In this naive approach, a block is allocated by simply incrementing 5 | * the brk pointer. A block is pure payload. There are no headers or 6 | * footers. Blocks are never coalesced or reused. Realloc is 7 | * implemented directly using mm_malloc and mm_free. 8 | * 9 | * NOTE TO STUDENTS: Replace this header comment with your own header 10 | * comment that gives a high level description of your solution. 11 | */ 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "mm.h" 19 | #include "memlib.h" 20 | 21 | /********************************************************* 22 | * NOTE TO STUDENTS: Before you do anything else, please 23 | * provide your team information in the following struct. 24 | ********************************************************/ 25 | team_t team = 26 | { 27 | /* Team name */ 28 | "XXXXXX", 29 | /* First member's full name */ 30 | "1", 31 | /* First member's email address */ 32 | "@hit", 33 | /* Second member's full name (leave blank if none) */ 34 | "", 35 | /* Second member's email address (leave blank if none) */ 36 | "" 37 | }; 38 | 39 | /* single word (4) or double word (8) alignment */ 40 | #define ALIGNMENT 8 41 | 42 | /* rounds up to the nearest multiple of ALIGNMENT */ 43 | #define ALIGN(size) (((size) + (ALIGNMENT-1)) & ~0x7) 44 | 45 | 46 | #define SIZE_T_SIZE (ALIGN(sizeof(size_t))) 47 | 48 | /* 49 | * mm_init - initialize the malloc package. 50 | */ 51 | int mm_init(void) 52 | { 53 | return 0; 54 | } 55 | 56 | /* 57 | * mm_malloc - Allocate a block by incrementing the brk pointer. 58 | * Always allocate a block whose size is a multiple of the alignment. 59 | */ 60 | void *mm_malloc(size_t size) 61 | { 62 | int newsize = ALIGN(size + SIZE_T_SIZE); 63 | void *p = mem_sbrk(newsize); 64 | if (p == (void *)-1) 65 | return NULL; 66 | else { 67 | *(size_t *)p = size; 68 | return (void *)((char *)p + SIZE_T_SIZE); 69 | } 70 | } 71 | 72 | /* 73 | * mm_free - Freeing a block does nothing. 74 | */ 75 | void mm_free(void *ptr) 76 | { 77 | } 78 | 79 | /* 80 | * mm_realloc - Implemented simply in terms of mm_malloc and mm_free 81 | */ 82 | void *mm_realloc(void *ptr, size_t size) 83 | { 84 | void *oldptr = ptr; 85 | void *newptr; 86 | size_t copySize; 87 | 88 | newptr = mm_malloc(size); 89 | if (newptr == NULL) 90 | return NULL; 91 | copySize = *(size_t *)((char *)oldptr - SIZE_T_SIZE); 92 | if (size < copySize) 93 | copySize = size; 94 | memcpy(newptr, oldptr, copySize); 95 | mm_free(oldptr); 96 | return newptr; 97 | } 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/mm.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern int mm_init (void); 4 | extern void *mm_malloc (size_t size); 5 | extern void mm_free (void *ptr); 6 | extern void *mm_realloc(void *ptr, size_t size); 7 | 8 | 9 | /* 10 | * Students work in teams of one or two. Teams enter their team name, 11 | * personal names and login IDs in a struct of this 12 | * type in their bits.c file. 13 | */ 14 | typedef struct { 15 | char *teamname; /* ID1+ID2 or ID1 */ 16 | char *name1; /* full name of first member */ 17 | char *id1; /* login ID of first member */ 18 | char *name2; /* full name of second member (if any) */ 19 | char *id2; /* login ID of second member */ 20 | } team_t; 21 | 22 | extern team_t team; 23 | 24 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/mm.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitsz-ids2021/Architecture_Course/a9c20e2107d41805ade34f3d559b307a24f3abca/Virtual_Memory/malloclab/mm.o -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/mm_explicit.c: -------------------------------------------------------------------------------- 1 | /* 2 | * mm-naive.c - The fastest, least memory-efficient malloc package. 3 | * 4 | * In this naive approach, a block is allocated by simply incrementing 5 | * the brk pointer. A block is pure payload. There are no headers or 6 | * footers. Blocks are never coalesced or reused. Realloc is 7 | * implemented directly using mm_malloc and mm_free. 8 | * 9 | * NOTE TO STUDENTS: Replace this header comment with your own header 10 | * comment that gives a high level description of your solution. 11 | */ 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "mm.h" 19 | #include "memlib.h" 20 | 21 | /********************************************************* 22 | * NOTE TO STUDENTS: Before you do anything else, please 23 | * provide your team information in the following struct. 24 | ********************************************************/ 25 | team_t team = 26 | { 27 | /* Team name */ 28 | "XXXXXX", 29 | /* First member's full name */ 30 | "1", 31 | /* First member's email address */ 32 | "@hit", 33 | /* Second member's full name (leave blank if none) */ 34 | "", 35 | /* Second member's email address (leave blank if none) */ 36 | "" 37 | }; 38 | 39 | /* single word (4) or double word (8) alignment */ 40 | #define ALIGNMENT 8 41 | 42 | /* rounds up to the nearest multiple of ALIGNMENT */ 43 | #define ALIGN(size) (((size) + (ALIGNMENT-1)) & ~0x7) 44 | 45 | 46 | #define SIZE_T_SIZE (ALIGN(sizeof(size_t))) 47 | 48 | #define WSIZE 4 49 | #define DSIZE 8 /*Double word size*/ 50 | #define CHUNKSIZE (1<<12) /*the page size in bytes is 4K*/ 51 | 52 | #define MAX(x,y) ((x)>(y)?(x):(y)) 53 | 54 | #define PACK(size,alloc) ((size) | (alloc)) 55 | 56 | #define GET(p) (*(unsigned int *)(p)) 57 | #define PUT(p,val) (*(unsigned int *)(p) = (val)) 58 | 59 | #define GET_SIZE(p) (GET(p) & ~0x7) 60 | #define GET_ALLOC(p) (GET(p) & 0x1) 61 | 62 | #define HDRP(bp) ((char *)(bp)-WSIZE) 63 | #define FTRP(bp) ((char *)(bp)+GET_SIZE(HDRP(bp))-DSIZE) 64 | 65 | #define PREV_LINKNODE_RP(bp) ((char*)(bp)) 66 | #define NEXT_LINKNODE_RP(bp) ((char*)(bp)+WSIZE) 67 | 68 | #define NEXT_BLKP(bp) ((char *)(bp)+GET_SIZE(((char *)(bp)-WSIZE))) 69 | #define PREV_BLKP(bp) ((char *)(bp)-GET_SIZE(((char *)(bp)-DSIZE))) 70 | 71 | 72 | int mm_check(char *function); 73 | static void *extend_heap(size_t dwords); 74 | static void *coalesce(void *bp); 75 | static void *find_fit(size_t size); 76 | static void place(void *bp,size_t asize); 77 | void insert_to_Emptylist(char *p); 78 | void fix_linklist(char *p); 79 | static char *heap_listp = NULL; 80 | static char *root = NULL; 81 | 82 | /* 83 | * mm_init - initialize the malloc package. 84 | * The return value should be -1 if there was a problem in performing the initialization, 0 otherwise 85 | */ 86 | int mm_init(void) 87 | { 88 | if((heap_listp = mem_sbrk(6*WSIZE))==(void *)-1) return -1; 89 | 90 | PUT(heap_listp,0); 91 | PUT(heap_listp+(1*WSIZE),0); 92 | PUT(heap_listp+(2*WSIZE),0); 93 | PUT(heap_listp+(3*WSIZE),PACK(DSIZE,1)); 94 | PUT(heap_listp+(4*WSIZE),PACK(DSIZE,1)); 95 | PUT(heap_listp+(5*WSIZE),PACK(0,1)); 96 | root = heap_listp + (1*WSIZE); 97 | 98 | heap_listp += (4*WSIZE); 99 | 100 | if((extend_heap(CHUNKSIZE/DSIZE))==NULL) return -1; 101 | #ifdef DEBUG 102 | mm_check(__FUNCTION__); 103 | #endif // DEBUG 104 | return 0; 105 | } 106 | /*最小Block4字(16字节)*/ 107 | static void *extend_heap(size_t dwords) 108 | { 109 | char *bp; 110 | size_t size; 111 | 112 | size = (dwords % 2) ? (dwords+1) * DSIZE : dwords * DSIZE; 113 | 114 | if((long)(bp = mem_sbrk(size))==(void *)-1) 115 | return NULL; 116 | 117 | PUT(HDRP(bp),PACK(size,0)); 118 | PUT(FTRP(bp),PACK(size,0)); 119 | PUT(NEXT_LINKNODE_RP(bp),0); 120 | PUT(PREV_LINKNODE_RP(bp),0); 121 | 122 | PUT(HDRP(NEXT_BLKP(bp)),PACK(0,1)); 123 | 124 | return coalesce(bp); 125 | } 126 | /* 127 | * mm_malloc - Allocate a block by incrementing the brk pointer. 128 | * Always allocate a block whose size is a multiple of the alignment. 129 | */ 130 | void *mm_malloc(size_t size) 131 | { 132 | size_t asize; 133 | size_t extendsize; 134 | char *bp; 135 | if(size ==0) return NULL; 136 | 137 | if(size <= DSIZE) 138 | { 139 | asize = 2*(DSIZE); 140 | } 141 | else 142 | { 143 | asize = (DSIZE)*((size+(DSIZE)+(DSIZE-1)) / (DSIZE)); 144 | } 145 | if((bp = find_fit(asize))!= NULL) 146 | { 147 | place(bp,asize); 148 | #ifdef DEBUG 149 | mm_check(__FUNCTION__); 150 | #endif // DEBUG 151 | return bp; 152 | } 153 | 154 | /*apply new block*/ 155 | extendsize = MAX(asize,CHUNKSIZE); 156 | if((bp = extend_heap(extendsize/DSIZE))==NULL) 157 | { 158 | return NULL; 159 | } 160 | place(bp,asize); 161 | #ifdef DEBUG 162 | mm_check(__FUNCTION__); 163 | #endif // DEBUG 164 | return bp; 165 | } 166 | 167 | /* 168 | * mm_free - Freeing a block does nothing. 169 | */ 170 | void mm_free(void *bp) 171 | { 172 | /* $end mmfree */ 173 | if(bp == 0) 174 | return; 175 | 176 | /* $begin mmfree */ 177 | size_t size = GET_SIZE(HDRP(bp)); 178 | /* $end mmfree */ 179 | 180 | /* $begin mmfree */ 181 | 182 | PUT(HDRP(bp), PACK(size, 0)); 183 | PUT(FTRP(bp), PACK(size, 0)); 184 | PUT(NEXT_LINKNODE_RP(bp),0); 185 | PUT(PREV_LINKNODE_RP(bp),0); 186 | coalesce(bp); 187 | #ifdef DEBUG 188 | mm_check(__FUNCTION__); 189 | #endif // DEBUG 190 | } 191 | /* 192 | * mm_realloc - Implemented simply in terms of mm_malloc and mm_free 193 | */ 194 | void *mm_realloc(void *ptr, size_t size) 195 | { 196 | size_t oldsize; 197 | void *newptr; 198 | 199 | /* If size == 0 then this is just free, and we return NULL. */ 200 | if(size == 0) 201 | { 202 | mm_free(ptr); 203 | return 0; 204 | } 205 | 206 | /* If oldptr is NULL, then this is just malloc. */ 207 | if(ptr == NULL) 208 | { 209 | return mm_malloc(size); 210 | } 211 | oldsize = GET_SIZE(HDRP(ptr)); 212 | 213 | newptr = mm_malloc(size); 214 | 215 | /* If realloc() fails the original block is left untouched */ 216 | if(!newptr) 217 | { 218 | return 0; 219 | } 220 | 221 | /* Copy the old data. */ 222 | oldsize = GET_SIZE(HDRP(ptr)); 223 | if(size < oldsize) oldsize = size; 224 | memcpy(newptr, ptr, oldsize); 225 | 226 | /* Free the old block. */ 227 | mm_free(ptr); 228 | 229 | return newptr; 230 | } 231 | 232 | /*coalesce the empty block*/ 233 | static void *coalesce(void *bp) 234 | { 235 | size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp))); 236 | size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp))); 237 | size_t size = GET_SIZE(HDRP(bp)); 238 | 239 | /*coalesce the block and change the point*/ 240 | if(prev_alloc && next_alloc) 241 | { 242 | 243 | } 244 | else if(prev_alloc && !next_alloc) 245 | { 246 | size += GET_SIZE(HDRP(NEXT_BLKP(bp))); 247 | fix_linklist(NEXT_BLKP(bp));/*remove from empty list*/ 248 | PUT(HDRP(bp), PACK(size,0)); 249 | PUT(FTRP(bp), PACK(size,0)); 250 | } 251 | else if(!prev_alloc && next_alloc) 252 | { 253 | size += GET_SIZE(HDRP(PREV_BLKP(bp))); 254 | fix_linklist(PREV_BLKP(bp)); 255 | PUT(FTRP(bp),PACK(size,0)); 256 | PUT(HDRP(PREV_BLKP(bp)),PACK(size,0)); 257 | bp = PREV_BLKP(bp); 258 | } 259 | else 260 | { 261 | size +=GET_SIZE(FTRP(NEXT_BLKP(bp)))+ GET_SIZE(HDRP(PREV_BLKP(bp))); 262 | fix_linklist(PREV_BLKP(bp)); 263 | fix_linklist(NEXT_BLKP(bp)); 264 | PUT(FTRP(NEXT_BLKP(bp)),PACK(size,0)); 265 | PUT(HDRP(PREV_BLKP(bp)),PACK(size,0)); 266 | bp = PREV_BLKP(bp); 267 | } 268 | insert_to_Emptylist(bp); 269 | return bp; 270 | } 271 | inline void insert_to_Emptylist(char *p) 272 | { 273 | /*p will be insert into the linklist ,LIFO*/ 274 | char *nextp = GET(root); 275 | if(nextp != NULL) 276 | PUT(PREV_LINKNODE_RP(nextp),p); 277 | 278 | PUT(NEXT_LINKNODE_RP(p),nextp); 279 | // PUT(PREV_LINKNODE_RP(p),root); 280 | PUT(root,p); 281 | } 282 | inline void fix_linklist(char *p) 283 | { 284 | char *prevp = GET(PREV_LINKNODE_RP(p)); 285 | char *nextp = GET(NEXT_LINKNODE_RP(p)); 286 | if(prevp == NULL) 287 | { 288 | if(nextp != NULL)PUT(PREV_LINKNODE_RP(nextp),0); 289 | PUT(root,nextp); 290 | } 291 | else 292 | { 293 | if(nextp != NULL)PUT(PREV_LINKNODE_RP(nextp),prevp); 294 | PUT(NEXT_LINKNODE_RP(prevp),nextp); 295 | } 296 | PUT(NEXT_LINKNODE_RP(p),0); 297 | PUT(PREV_LINKNODE_RP(p),0); 298 | } 299 | static void *find_fit(size_t size) 300 | { 301 | /*first fit*/ 302 | char *tmpP = GET(root); 303 | while(tmpP != NULL) 304 | { 305 | if(GET_SIZE(HDRP(tmpP))>=size) return tmpP; 306 | tmpP = GET(NEXT_LINKNODE_RP(tmpP)); 307 | } 308 | return NULL;\ 309 | 310 | } 311 | 312 | static void place(void *bp,size_t asize) 313 | { 314 | size_t csize = GET_SIZE(HDRP(bp)); 315 | fix_linklist(bp);/*remove from empty_list*/ 316 | if((csize-asize)>=(2*DSIZE)) 317 | { 318 | PUT(HDRP(bp),PACK(asize,1)); 319 | PUT(FTRP(bp),PACK(asize,1)); 320 | bp = NEXT_BLKP(bp); 321 | 322 | PUT(HDRP(bp),PACK(csize-asize,0)); 323 | PUT(FTRP(bp),PACK(csize-asize,0)); 324 | PUT(NEXT_LINKNODE_RP(bp),0); 325 | PUT(PREV_LINKNODE_RP(bp),0); 326 | coalesce(bp); 327 | } 328 | else 329 | { 330 | PUT(HDRP(bp),PACK(csize,1)); 331 | PUT(FTRP(bp),PACK(csize,1)); 332 | } 333 | } 334 | int mm_check(char *function) 335 | { 336 | printf("---cur function:%s empty blocks:\n",function); 337 | char *tmpP = GET(root); 338 | int count_empty_block = 0; 339 | while(tmpP != NULL) 340 | { 341 | count_empty_block++; 342 | printf("address:%x size:%d \n",tmpP,GET_SIZE(HDRP(tmpP))); 343 | tmpP = GET(NEXT_LINKNODE_RP(tmpP)); 344 | } 345 | printf("empty_block num: %d\n",count_empty_block); 346 | } 347 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/mm_implicit.c: -------------------------------------------------------------------------------- 1 | /* 2 | * mm-naive.c - The fastest, least memory-efficient malloc package. 3 | * 4 | * In this naive approach, a block is allocated by simply incrementing 5 | * the brk pointer. A block is pure payload. There are no headers or 6 | * footers. Blocks are never coalesced or reused. Realloc is 7 | * implemented directly using mm_malloc and mm_free. 8 | * 9 | * NOTE TO STUDENTS: Replace this header comment with your own header 10 | * comment that gives a high level description of your solution. 11 | */ 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "mm.h" 19 | #include "memlib.h" 20 | 21 | /********************************************************* 22 | * NOTE TO STUDENTS: Before you do anything else, please 23 | * provide your team information in the following struct. 24 | ********************************************************/ 25 | team_t team = { 26 | /* Team name */ 27 | "1", 28 | /* First member's full name */ 29 | "csapp", 30 | /* First member's email address */ 31 | "@hit.edu", 32 | /* Second member's full name (leave blank if none) */ 33 | "", 34 | /* Second member's email address (leave blank if none) */ 35 | "" 36 | }; 37 | 38 | //字大小和双字大小 39 | #define WSIZE 4 40 | #define DSIZE 8 41 | //当堆内存不够时,向内核申请的堆空间 42 | #define CHUNKSIZE (1<<12) 43 | //将val放入p开始的4字节中 44 | #define PUT(p,val) (*(unsigned int*)(p) = (val)) 45 | //获得头部和脚部的编码 46 | #define PACK(size, alloc) ((size) | (alloc)) 47 | //从头部或脚部获得块大小和已分配位 48 | #define GET_SIZE(p) (*(unsigned int*)(p) & ~0x7) 49 | #define GET_ALLO(p) (*(unsigned int*)(p) & 0x1) 50 | //获得块的头部和脚部 51 | #define HDRP(bp) ((char*)(bp) - WSIZE) 52 | #define FTRP(bp) ((char*)(bp) + GET_SIZE(HDRP(bp)) - DSIZE) 53 | //获得上一个块和下一个块 54 | #define NEXT_BLKP(bp) ((char*)(bp) + GET_SIZE(HDRP(bp))) 55 | #define PREV_BLKP(bp) ((char*)(bp) - GET_SIZE((char*)(bp) - DSIZE)) 56 | 57 | #define MAX(x,y) ((x)>(y)?(x):(y)) 58 | static char *heap_listp = 0; 59 | 60 | static void *extend_heap(size_t words); 61 | static void *coalesce(void *bp); 62 | static void *find_fit(size_t size); 63 | static void place(void *bp,size_t asize); 64 | 65 | /* 66 | * mm_init - initialize the malloc package. 67 | */ 68 | int mm_init(void) 69 | { 70 | if((heap_listp = mem_sbrk(4*WSIZE)) == (void*)-1) //申请4字空间 71 | return -1; 72 | PUT(heap_listp, 0); //填充块 73 | PUT(heap_listp+1*WSIZE, PACK(DSIZE, 1)); //序言块头部 74 | PUT(heap_listp+2*WSIZE, PACK(DSIZE, 1)); //序言块脚部 75 | PUT(heap_listp+3*WSIZE, PACK(0, 1)); //结尾块 76 | 77 | heap_listp += DSIZE; //指向序言块有效载荷的指针 78 | 79 | if(extend_heap(CHUNKSIZE/WSIZE) == NULL) //申请更多的堆空间 80 | return -1; 81 | return 0; 82 | } 83 | 84 | static void *extend_heap(size_t words){ 85 | char *bp; 86 | size_t size; 87 | size = words%2 ? (words+1)*WSIZE : words*WSIZE; //对大小双字对对齐 88 | if((bp = mem_sbrk(size)) == (void*)-1) //申请空间 89 | return NULL; 90 | PUT(HDRP(bp), PACK(size, 0)); //设置头部 91 | PUT(FTRP(bp), PACK(size, 0)); //设置脚部 92 | PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 1)); //设置新的结尾块 93 | //合并旧堆的最后一个可能出现的空闲块   94 | return coalesce(bp); 95 | } 96 | 97 | static void *coalesce(void *bp){ 98 | size_t prev_alloc = GET_ALLO(FTRP(PREV_BLKP(bp))); //获得前面块的已分配位 99 | size_t next_alloc = GET_ALLO(HDRP(NEXT_BLKP(bp))); //获得后面块的已分配位 100 | size_t size = GET_SIZE(HDRP(bp)); //获得当前块的大小 101 | 102 | if(prev_alloc && next_alloc){ 103 | return bp; 104 | }else if(prev_alloc && !next_alloc){ 105 | size += GET_SIZE(HDRP(NEXT_BLKP(bp))); 106 | PUT(HDRP(bp), PACK(size, 0)); 107 | PUT(FTRP(bp), PACK(size, 0)); 108 | }else if(!prev_alloc && next_alloc){ 109 | size += GET_SIZE(FTRP(PREV_BLKP(bp))); 110 | PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0)); 111 | PUT(FTRP(bp), PACK(size, 0)); 112 | bp = PREV_BLKP(bp); 113 | }else{ 114 | size += GET_SIZE(HDRP(NEXT_BLKP(bp))) + 115 | GET_SIZE(FTRP(PREV_BLKP(bp))); 116 | PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0)); 117 | PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0)); 118 | bp = PREV_BLKP(bp); 119 | } 120 | return bp; 121 | } 122 | 123 | /* 124 | * mm_malloc - Allocate a block by incrementing the brk pointer. 125 | * Always allocate a block whose size is a multiple of the alignment. 126 | */ 127 | void *mm_malloc(size_t size) 128 | { 129 | size_t asize; 130 | void *bp; 131 | 132 | if(size == 0) 133 | return NULL; 134 | //满足最小块要求和对齐要求,size是有效负载大小 135 | asize = size<=DSIZE ? 2*DSIZE : DSIZE * ((size + (DSIZE) + (DSIZE-1)) / DSIZE); 136 | //匹配 137 | if((bp = find_fit(asize)) != NULL){ 138 | place(bp, asize); 139 | return bp; 140 | } 141 | if((bp = extend_heap(MAX(CHUNKSIZE, asize)/WSIZE)) == NULL) 142 | return NULL; 143 | place(bp, asize); 144 | return bp; 145 | } 146 | 147 | static void *find_fit(size_t asize) 148 | /* $end mmfirstfit-proto */ 149 | { 150 | /* $begin mmfirstfit */ 151 | /* First-fit search */ 152 | void *bp; 153 | 154 | for (bp = heap_listp; GET_SIZE(HDRP(bp)) > 0; bp = NEXT_BLKP(bp)) { 155 | if (!GET_ALLO(HDRP(bp)) && (asize <= GET_SIZE(HDRP(bp)))) { 156 | return bp; 157 | } 158 | } 159 | return NULL; /* No fit */ 160 | } 161 | 162 | static void place(void *bp,size_t asize){ 163 | size_t csize = GET_SIZE(HDRP(bp)); 164 | if((csize-asize)>=(2*DSIZE)){ 165 | PUT(HDRP(bp),PACK(asize,1)); 166 | PUT(FTRP(bp),PACK(asize,1)); 167 | bp = NEXT_BLKP(bp); 168 | PUT(HDRP(bp),PACK(csize-asize,0)); 169 | PUT(FTRP(bp),PACK(csize-asize,0)); 170 | }else{ 171 | PUT(HDRP(bp),PACK(csize,1)); 172 | PUT(FTRP(bp),PACK(csize,1)); 173 | } 174 | } 175 | 176 | /* 177 | * mm_free - Freeing a block does nothing. 178 | */ 179 | void mm_free(void *ptr){ 180 | size_t size = GET_SIZE(HDRP(ptr)); 181 | PUT(HDRP(ptr), PACK(size, 0)); 182 | PUT(FTRP(ptr), PACK(size, 0)); 183 | 184 | coalesce(ptr); 185 | } 186 | 187 | void *mm_realloc(void *ptr, size_t size) 188 | { 189 | size_t oldsize; 190 | void *newptr; 191 | 192 | /* If size == 0 then this is just free, and we return NULL. */ 193 | if(size == 0) { 194 | mm_free(ptr); 195 | return 0; 196 | } 197 | 198 | /* If oldptr is NULL, then this is just malloc. */ 199 | if(ptr == NULL) { 200 | return mm_malloc(size); 201 | } 202 | 203 | newptr = mm_malloc(size); 204 | 205 | /* If realloc() fails the original block is left untouched */ 206 | 207 | if(!newptr) { 208 | return 0; 209 | } 210 | 211 | /* Copy the old data. */ 212 | oldsize = GET_SIZE(HDRP(ptr)); 213 | if(size < oldsize) oldsize = size; 214 | memcpy(newptr, ptr, oldsize); 215 | 216 | /* Free the old block. */ 217 | mm_free(ptr); 218 | 219 | return newptr; 220 | } 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/mm_segerated.c: -------------------------------------------------------------------------------- 1 | /* 2 | * mm.c - malloc using segregated list 3 | * KAIST 4 | * Tony Kim 5 | * 6 | * In this approach, 7 | * Every block has a header and a footer 8 | * in which header contains reallocation information, size, and allocation info 9 | * and footer contains size and allocation info. 10 | * Free list are tagged to the segregated list. 11 | * Therefore all free block contains pointer to the predecessor and successor. 12 | * The segregated list headers are organized by 2^k size. 13 | * 14 | */ 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include "mm.h" 22 | #include "memlib.h" 23 | 24 | /********************************************************* 25 | * NOTE TO STUDENTS: Before you do anything else, please 26 | * provide your team information in the following struct. 27 | ********************************************************/ 28 | team_t team = 29 | { 30 | /* Team name */ 31 | "XXXXXX", 32 | /* First member's full name */ 33 | "1", 34 | /* First member's email address */ 35 | "@hit", 36 | /* Second member's full name (leave blank if none) */ 37 | "", 38 | /* Second member's email address (leave blank if none) */ 39 | "" 40 | }; 41 | 42 | /* single word (4) or double word (8) alignment */ 43 | #define ALIGNMENT 8 44 | /* rounds up to the nearest multiple of ALIGNMENT */ 45 | #define ALIGN(size) (((size) + (ALIGNMENT-1)) & ~0x7) 46 | 47 | 48 | // My additional Macros 49 | #define WSIZE 4 // word and header/footer size (bytes) 50 | #define DSIZE 8 // double word size (bytes) 51 | #define INITCHUNKSIZE (1<<6) 52 | #define CHUNKSIZE (1<<12)//+(1<<7) 53 | 54 | #define LISTLIMIT 20 55 | #define REALLOC_BUFFER (1<<7) 56 | 57 | #define MAX(x, y) ((x) > (y) ? (x) : (y)) 58 | #define MIN(x, y) ((x) < (y) ? (x) : (y)) 59 | 60 | // Pack a size and allocated bit into a word 61 | #define PACK(size, alloc) ((size) | (alloc)) 62 | 63 | // Read and write a word at address p 64 | #define GET(p) (*(unsigned int *)(p)) 65 | #define PUT(p, val) (*(unsigned int *)(p) = (val) | GET_TAG(p)) 66 | #define PUT_NOTAG(p, val) (*(unsigned int *)(p) = (val)) 67 | 68 | // Store predecessor or successor pointer for free blocks 69 | #define SET_PTR(p, ptr) (*(unsigned int *)(p) = (unsigned int)(ptr)) 70 | 71 | // Read the size and allocation bit from address p 72 | #define GET_SIZE(p) (GET(p) & ~0x7) 73 | #define GET_ALLOC(p) (GET(p) & 0x1) 74 | #define GET_TAG(p) (GET(p) & 0x2) 75 | #define SET_RATAG(p) (GET(p) |= 0x2) 76 | #define REMOVE_RATAG(p) (GET(p) &= ~0x2) 77 | 78 | // Address of block's header and footer 79 | #define HDRP(ptr) ((char *)(ptr) - WSIZE) 80 | #define FTRP(ptr) ((char *)(ptr) + GET_SIZE(HDRP(ptr)) - DSIZE) 81 | 82 | // Address of (physically) next and previous blocks 83 | #define NEXT_BLKP(ptr) ((char *)(ptr) + GET_SIZE((char *)(ptr) - WSIZE)) 84 | #define PREV_BLKP(ptr) ((char *)(ptr) - GET_SIZE((char *)(ptr) - DSIZE)) 85 | 86 | // Address of free block's predecessor and successor entries 87 | #define PRED_PTR(ptr) ((char *)(ptr)) 88 | #define SUCC_PTR(ptr) ((char *)(ptr) + WSIZE) 89 | 90 | // Address of free block's predecessor and successor on the segregated list 91 | #define PRED(ptr) (*(char **)(ptr)) 92 | #define SUCC(ptr) (*(char **)(SUCC_PTR(ptr))) 93 | 94 | 95 | // End of my additional macros 96 | 97 | 98 | // Global var 99 | void *segregated_free_lists[LISTLIMIT]; 100 | 101 | 102 | // Functions 103 | static void *extend_heap(size_t size); 104 | static void *coalesce(void *ptr); 105 | static void *place(void *ptr, size_t asize); 106 | static void insert_node(void *ptr, size_t size); 107 | static void delete_node(void *ptr); 108 | 109 | //static void checkheap(int verbose); 110 | 111 | 112 | ///////////////////////////////// Block information ///////////////////////////////////////////////////////// 113 | /* 114 | 115 | A : Allocated? (1: true, 0:false) 116 | RA : Reallocation tag (1: true, 0:false) 117 | 118 | < Allocated Block > 119 | 120 | 121 | 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 122 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 123 | Header : | size of the block | | | A| 124 | bp ---> +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 125 | | | 126 | | | 127 | . Payload and padding . 128 | . . 129 | . . 130 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 131 | Footer : | size of the block | | A| 132 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 133 | 134 | 135 | < Free block > 136 | 137 | 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 138 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 139 | Header : | size of the block | |RA| A| 140 | bp ---> +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 141 | | pointer to its predecessor in Segregated list | 142 | bp+WSIZE--> +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 143 | | pointer to its successor in Segregated list | 144 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 145 | . . 146 | . . 147 | . . 148 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 149 | Footer : | size of the block | | A| 150 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 151 | 152 | 153 | */ 154 | ///////////////////////////////// End of Block information ///////////////////////////////////////////////////////// 155 | 156 | //////////////////////////////////////// Helper functions ////////////////////////////////////////////////////////// 157 | static void *extend_heap(size_t size) 158 | { 159 | void *ptr; 160 | size_t asize; // Adjusted size 161 | 162 | asize = ALIGN(size); 163 | 164 | if ((ptr = mem_sbrk(asize)) == (void *)-1) 165 | return NULL; 166 | 167 | // Set headers and footer 168 | PUT_NOTAG(HDRP(ptr), PACK(asize, 0)); 169 | PUT_NOTAG(FTRP(ptr), PACK(asize, 0)); 170 | PUT_NOTAG(HDRP(NEXT_BLKP(ptr)), PACK(0, 1)); 171 | insert_node(ptr, asize); 172 | 173 | return coalesce(ptr); 174 | } 175 | 176 | static void insert_node(void *ptr, size_t size) { 177 | int list = 0; 178 | void *search_ptr = ptr; 179 | void *insert_ptr = NULL; 180 | 181 | // Select segregated list 182 | while ((list < LISTLIMIT - 1) && (size > 1)) { 183 | size >>= 1; 184 | list++; 185 | } 186 | 187 | // Keep size ascending order and search 188 | search_ptr = segregated_free_lists[list]; 189 | while ((search_ptr != NULL) && (size > GET_SIZE(HDRP(search_ptr)))) { 190 | insert_ptr = search_ptr; 191 | search_ptr = PRED(search_ptr); 192 | } 193 | 194 | // Set predecessor and successor 195 | if (search_ptr != NULL) { 196 | if (insert_ptr != NULL) { 197 | SET_PTR(PRED_PTR(ptr), search_ptr); 198 | SET_PTR(SUCC_PTR(search_ptr), ptr); 199 | SET_PTR(SUCC_PTR(ptr), insert_ptr); 200 | SET_PTR(PRED_PTR(insert_ptr), ptr); 201 | } else { 202 | SET_PTR(PRED_PTR(ptr), search_ptr); 203 | SET_PTR(SUCC_PTR(search_ptr), ptr); 204 | SET_PTR(SUCC_PTR(ptr), NULL); 205 | segregated_free_lists[list] = ptr; 206 | } 207 | } else { 208 | if (insert_ptr != NULL) { 209 | SET_PTR(PRED_PTR(ptr), NULL); 210 | SET_PTR(SUCC_PTR(ptr), insert_ptr); 211 | SET_PTR(PRED_PTR(insert_ptr), ptr); 212 | } else { 213 | SET_PTR(PRED_PTR(ptr), NULL); 214 | SET_PTR(SUCC_PTR(ptr), NULL); 215 | segregated_free_lists[list] = ptr; 216 | } 217 | } 218 | 219 | return; 220 | } 221 | 222 | 223 | static void delete_node(void *ptr) { 224 | int list = 0; 225 | size_t size = GET_SIZE(HDRP(ptr)); 226 | 227 | // Select segregated list 228 | while ((list < LISTLIMIT - 1) && (size > 1)) { 229 | size >>= 1; 230 | list++; 231 | } 232 | 233 | if (PRED(ptr) != NULL) { 234 | if (SUCC(ptr) != NULL) { 235 | SET_PTR(SUCC_PTR(PRED(ptr)), SUCC(ptr)); 236 | SET_PTR(PRED_PTR(SUCC(ptr)), PRED(ptr)); 237 | } else { 238 | SET_PTR(SUCC_PTR(PRED(ptr)), NULL); 239 | segregated_free_lists[list] = PRED(ptr); 240 | } 241 | } else { 242 | if (SUCC(ptr) != NULL) { 243 | SET_PTR(PRED_PTR(SUCC(ptr)), NULL); 244 | } else { 245 | segregated_free_lists[list] = NULL; 246 | } 247 | } 248 | 249 | return; 250 | } 251 | 252 | 253 | static void *coalesce(void *ptr) 254 | { 255 | size_t prev_alloc = GET_ALLOC(HDRP(PREV_BLKP(ptr))); 256 | size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(ptr))); 257 | size_t size = GET_SIZE(HDRP(ptr)); 258 | 259 | 260 | // Do not coalesce with previous block if the previous block is tagged with Reallocation tag 261 | if (GET_TAG(HDRP(PREV_BLKP(ptr)))) 262 | prev_alloc = 1; 263 | 264 | if (prev_alloc && next_alloc) { // Case 1 265 | return ptr; 266 | } 267 | else if (prev_alloc && !next_alloc) { // Case 2 268 | delete_node(ptr); 269 | delete_node(NEXT_BLKP(ptr)); 270 | size += GET_SIZE(HDRP(NEXT_BLKP(ptr))); 271 | PUT(HDRP(ptr), PACK(size, 0)); 272 | PUT(FTRP(ptr), PACK(size, 0)); 273 | } else if (!prev_alloc && next_alloc) { // Case 3 274 | delete_node(ptr); 275 | delete_node(PREV_BLKP(ptr)); 276 | size += GET_SIZE(HDRP(PREV_BLKP(ptr))); 277 | PUT(FTRP(ptr), PACK(size, 0)); 278 | PUT(HDRP(PREV_BLKP(ptr)), PACK(size, 0)); 279 | ptr = PREV_BLKP(ptr); 280 | } else { // Case 4 281 | delete_node(ptr); 282 | delete_node(PREV_BLKP(ptr)); 283 | delete_node(NEXT_BLKP(ptr)); 284 | size += GET_SIZE(HDRP(PREV_BLKP(ptr))) + GET_SIZE(HDRP(NEXT_BLKP(ptr))); 285 | PUT(HDRP(PREV_BLKP(ptr)), PACK(size, 0)); 286 | PUT(FTRP(NEXT_BLKP(ptr)), PACK(size, 0)); 287 | ptr = PREV_BLKP(ptr); 288 | } 289 | 290 | insert_node(ptr, size); 291 | 292 | return ptr; 293 | } 294 | 295 | static void *place(void *ptr, size_t asize) 296 | { 297 | size_t ptr_size = GET_SIZE(HDRP(ptr)); 298 | size_t remainder = ptr_size - asize; 299 | 300 | delete_node(ptr); 301 | 302 | 303 | if (remainder <= DSIZE * 2) { 304 | // Do not split block 305 | PUT(HDRP(ptr), PACK(ptr_size, 1)); 306 | PUT(FTRP(ptr), PACK(ptr_size, 1)); 307 | } 308 | 309 | else if (asize >= 100) { 310 | // Split block 311 | PUT(HDRP(ptr), PACK(remainder, 0)); 312 | PUT(FTRP(ptr), PACK(remainder, 0)); 313 | PUT_NOTAG(HDRP(NEXT_BLKP(ptr)), PACK(asize, 1)); 314 | PUT_NOTAG(FTRP(NEXT_BLKP(ptr)), PACK(asize, 1)); 315 | insert_node(ptr, remainder); 316 | return NEXT_BLKP(ptr); 317 | 318 | } 319 | 320 | else { 321 | // Split block 322 | PUT(HDRP(ptr), PACK(asize, 1)); 323 | PUT(FTRP(ptr), PACK(asize, 1)); 324 | PUT_NOTAG(HDRP(NEXT_BLKP(ptr)), PACK(remainder, 0)); 325 | PUT_NOTAG(FTRP(NEXT_BLKP(ptr)), PACK(remainder, 0)); 326 | insert_node(NEXT_BLKP(ptr), remainder); 327 | } 328 | return ptr; 329 | } 330 | 331 | 332 | 333 | //////////////////////////////////////// End of Helper functions //////////////////////////////////////// 334 | 335 | 336 | 337 | 338 | 339 | 340 | /* 341 | * mm_init - initialize the malloc package. 342 | * Before calling mm_malloc, mm_realloc, or mm_free, 343 | * the application program calls mm_init to perform any necessary initializations, 344 | * such as allocating the initial heap area. 345 | * 346 | * Return value : -1 if there was a problem, 0 otherwise. 347 | */ 348 | int mm_init(void) 349 | { 350 | int list; 351 | char *heap_start; // Pointer to beginning of heap 352 | 353 | // Initialize segregated free lists 354 | for (list = 0; list < LISTLIMIT; list++) { 355 | segregated_free_lists[list] = NULL; 356 | } 357 | 358 | // Allocate memory for the initial empty heap 359 | if ((long)(heap_start = mem_sbrk(4 * WSIZE)) == -1) 360 | return -1; 361 | 362 | PUT_NOTAG(heap_start, 0); /* Alignment padding */ 363 | PUT_NOTAG(heap_start + (1 * WSIZE), PACK(DSIZE, 1)); /* Prologue header */ 364 | PUT_NOTAG(heap_start + (2 * WSIZE), PACK(DSIZE, 1)); /* Prologue footer */ 365 | PUT_NOTAG(heap_start + (3 * WSIZE), PACK(0, 1)); /* Epilogue header */ 366 | 367 | if (extend_heap(INITCHUNKSIZE) == NULL) 368 | return -1; 369 | 370 | return 0; 371 | } 372 | 373 | /* 374 | * mm_malloc - Allocate a block by incrementing the brk pointer. 375 | * Always allocate a block whose size is a multiple of the alignment. 376 | * 377 | * Role : 378 | * 1. The mm_malloc routine returns a pointer to an allocated block payload. 379 | * 2. The entire allocated block should lie within the heap region. 380 | * 3. The entire allocated block should overlap with any other chunk. 381 | * 382 | * Return value : Always return the payload pointers that are alligned to 8 bytes. 383 | */ 384 | void *mm_malloc(size_t size) 385 | { 386 | size_t asize; /* Adjusted block size */ 387 | size_t extendsize; /* Amount to extend heap if no fit */ 388 | void *ptr = NULL; /* Pointer */ 389 | 390 | // Ignore size 0 cases 391 | if (size == 0) 392 | return NULL; 393 | 394 | // Align block size 395 | if (size <= DSIZE) { 396 | asize = 2 * DSIZE; 397 | } else { 398 | asize = ALIGN(size+DSIZE); 399 | } 400 | 401 | int list = 0; 402 | size_t searchsize = asize; 403 | // Search for free block in segregated list 404 | while (list < LISTLIMIT) { 405 | if ((list == LISTLIMIT - 1) || ((searchsize <= 1) && (segregated_free_lists[list] != NULL))) { 406 | ptr = segregated_free_lists[list]; 407 | // Ignore blocks that are too small or marked with the reallocation bit 408 | while ((ptr != NULL) && ((asize > GET_SIZE(HDRP(ptr))) || (GET_TAG(HDRP(ptr))))) 409 | { 410 | ptr = PRED(ptr); 411 | } 412 | if (ptr != NULL) 413 | break; 414 | } 415 | 416 | searchsize >>= 1; 417 | list++; 418 | } 419 | 420 | // if free block is not found, extend the heap 421 | if (ptr == NULL) { 422 | extendsize = MAX(asize, CHUNKSIZE); 423 | 424 | if ((ptr = extend_heap(extendsize)) == NULL) 425 | return NULL; 426 | } 427 | 428 | // Place and divide block 429 | ptr = place(ptr, asize); 430 | 431 | 432 | // Return pointer to newly allocated block 433 | return ptr; 434 | } 435 | 436 | /* 437 | * mm_free - Freeing a block does nothing. 438 | * 439 | * Role : The mm_free routine frees the block pointed to by ptr 440 | * 441 | * Return value : returns nothing 442 | */ 443 | void mm_free(void *ptr) 444 | { 445 | size_t size = GET_SIZE(HDRP(ptr)); 446 | 447 | REMOVE_RATAG(HDRP(NEXT_BLKP(ptr))); 448 | PUT(HDRP(ptr), PACK(size, 0)); 449 | PUT(FTRP(ptr), PACK(size, 0)); 450 | 451 | insert_node(ptr, size); 452 | coalesce(ptr); 453 | 454 | return; 455 | } 456 | 457 | /* 458 | * mm_realloc - Implemented simply in terms of mm_malloc and mm_free 459 | * 460 | * Role : The mm_realloc routine returns a pointer to an allocated 461 | * region of at least size bytes with constraints. 462 | * 463 | * I used https://github.com/htian/malloc-lab/blob/master/mm.c source idea to maximize utilization 464 | * by using reallocation tags 465 | * in reallocation cases (realloc-bal.rep, realloc2-bal.rep) 466 | */ 467 | void *mm_realloc(void *ptr, size_t size) 468 | { 469 | void *new_ptr = ptr; /* Pointer to be returned */ 470 | size_t new_size = size; /* Size of new block */ 471 | int remainder; /* Adequacy of block sizes */ 472 | int extendsize; /* Size of heap extension */ 473 | int block_buffer; /* Size of block buffer */ 474 | 475 | // Ignore size 0 cases 476 | if (size == 0) 477 | return NULL; 478 | 479 | // Align block size 480 | if (new_size <= DSIZE) { 481 | new_size = 2 * DSIZE; 482 | } else { 483 | new_size = ALIGN(size+DSIZE); 484 | } 485 | 486 | /* Add overhead requirements to block size */ 487 | new_size += REALLOC_BUFFER; 488 | 489 | /* Calculate block buffer */ 490 | block_buffer = GET_SIZE(HDRP(ptr)) - new_size; 491 | 492 | /* Allocate more space if overhead falls below the minimum */ 493 | if (block_buffer < 0) { 494 | /* Check if next block is a free block or the epilogue block */ 495 | if (!GET_ALLOC(HDRP(NEXT_BLKP(ptr))) || !GET_SIZE(HDRP(NEXT_BLKP(ptr)))) { 496 | remainder = GET_SIZE(HDRP(ptr)) + GET_SIZE(HDRP(NEXT_BLKP(ptr))) - new_size; 497 | if (remainder < 0) { 498 | extendsize = MAX(-remainder, CHUNKSIZE); 499 | if (extend_heap(extendsize) == NULL) 500 | return NULL; 501 | remainder += extendsize; 502 | } 503 | 504 | delete_node(NEXT_BLKP(ptr)); 505 | 506 | // Do not split block 507 | PUT_NOTAG(HDRP(ptr), PACK(new_size + remainder, 1)); 508 | PUT_NOTAG(FTRP(ptr), PACK(new_size + remainder, 1)); 509 | } else { 510 | new_ptr = mm_malloc(new_size - DSIZE); 511 | memcpy(new_ptr, ptr, MIN(size, new_size)); 512 | mm_free(ptr); 513 | } 514 | block_buffer = GET_SIZE(HDRP(new_ptr)) - new_size; 515 | } 516 | 517 | // Tag the next block if block overhead drops below twice the overhead 518 | if (block_buffer < 2 * REALLOC_BUFFER) 519 | SET_RATAG(HDRP(NEXT_BLKP(new_ptr))); 520 | 521 | // Return the reallocated block 522 | return new_ptr; 523 | } 524 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/short1-bal.rep: -------------------------------------------------------------------------------- 1 | 20000 2 | 6 3 | 12 4 | 1 5 | a 0 2040 6 | a 1 2040 7 | f 1 8 | a 2 48 9 | a 3 4072 10 | f 3 11 | a 4 4072 12 | f 0 13 | f 2 14 | a 5 4072 15 | f 4 16 | f 5 17 | -------------------------------------------------------------------------------- /Virtual_Memory/malloclab/short2-bal.rep: -------------------------------------------------------------------------------- 1 | 20000 2 | 6 3 | 12 4 | 1 5 | a 0 2040 6 | a 1 4010 7 | a 2 48 8 | a 3 4072 9 | a 4 4072 10 | a 5 4072 11 | f 0 12 | f 1 13 | f 2 14 | f 3 15 | f 4 16 | f 5 17 | --------------------------------------------------------------------------------