├── my_types.h ├── mem_api.h ├── bitset.h ├── cache.h ├── Makefile ├── vm.h ├── bitset.c ├── mem_api.c ├── phys_mem.c ├── tlb.c ├── cache.c ├── vm.c ├── cpu.c └── README.md /my_types.h: -------------------------------------------------------------------------------- 1 | #ifndef include_types_h 2 | #define include_types_h 3 | 4 | #define pa_t uint32 5 | #define va_t uint32 6 | 7 | typedef unsigned int uint32; 8 | typedef unsigned char uint8; 9 | typedef unsigned long uint64; 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /mem_api.h: -------------------------------------------------------------------------------- 1 | #ifndef _incl_mem_api_h 2 | #define _incl_mem_api_h 3 | 4 | #include "my_types.h" 5 | 6 | extern int mem_read_32 (pa_t phys_addr, uint32 *data); 7 | extern int mem_write_32 (pa_t phys_addr, uint32 data); 8 | extern int init_mem_lib (char *ip_addr); 9 | 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /bitset.h: -------------------------------------------------------------------------------- 1 | #ifndef include_snd_buffer_h 2 | #define include_snd_buffer_h 3 | 4 | #include "my_types.h" 5 | 6 | extern uint8 *create_bitset (uint32 number_of_bits); 7 | extern int is_set (int bit, uint8 *array_of_bits); 8 | extern void set_bit (int bit, uint8 *array_of_bits); 9 | extern void clr_bit (int bit, uint8 *array_of_bits); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /cache.h: -------------------------------------------------------------------------------- 1 | #ifndef incl_cache_h 2 | #define incl_cache_h 3 | 4 | #include "my_types.h" 5 | 6 | extern int cache_add_data (va_t va, uint32 tag, uint8 *data, int len); 7 | extern int cache_get_data (va_t va, uint32 tag, uint8 *buffer, int len); 8 | extern int cache_line_read_from_mem (va_t v_addr, pa_t p_addr); 9 | extern int cache_line_write_to_mem (uint8 *cache_line, pa_t p_addr); 10 | extern void invalidate (); 11 | extern int flush(); 12 | extern void init_cache (); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -g -I. -Wall -Wfatal-errors 3 | DEPS = tlb.h cache.h vm.h bitset.h my_types.h mem_api.h 4 | CPU_OBJ = tlb.o cache.o vm.o bitset.o cpu.o mem_api.o 5 | PHYS_MEM_OBJ = phys_mem.o 6 | 7 | %.o: %.c $(DEPS) 8 | $(CC) -c -o $@ $< $(CFLAGS) 9 | 10 | cpu: $(CPU_OBJ) 11 | $(CC) -o $@ $^ $(CFLAGS) 12 | phys_mem: $(PHYS_MEM_OBJ) 13 | $(CC) -o $@ $^ $(CFLAGS) 14 | 15 | .PHONY: all clean 16 | 17 | all: cpu phys_mem 18 | 19 | clean: 20 | rm -f *.o 21 | rm -f cpu 22 | rm -f phys_mem 23 | -------------------------------------------------------------------------------- /vm.h: -------------------------------------------------------------------------------- 1 | #ifndef incl_vm_h 2 | #define incl_vm_h 3 | 4 | #include "my_types.h" 5 | 6 | #define FIRST_LEVEL_SIZE 256 7 | #define SECOND_LEVEL_SIZE 4096 8 | #define PAGE_SIZE 4096 9 | #define PROCESS_NAME_SIZE 32 10 | #define PAGE_VALID 0x1 11 | 12 | 13 | typedef struct vma { 14 | va_t start_vm_addr; 15 | va_t end_vm_addr; 16 | uint32 flags; 17 | struct vma *next; 18 | } vma_t; 19 | 20 | typedef struct pte { 21 | pa_t physical_addr; 22 | uint8 flags; 23 | } pte_t; 24 | 25 | typedef struct pmd { 26 | pte_t pte [SECOND_LEVEL_SIZE]; 27 | } pmd_t; 28 | 29 | typedef struct page_d { 30 | pmd_t *pmd [FIRST_LEVEL_SIZE]; 31 | } pgd_t; 32 | 33 | typedef struct mm_struct { 34 | vma_t *v_mem_space; 35 | va_t code_start; 36 | va_t code_end; 37 | va_t data_start; 38 | va_t data_end; 39 | va_t stack_start; 40 | va_t stack_end; 41 | pgd_t *pgd; 42 | } mm_t; 43 | 44 | 45 | typedef struct process_ctx { 46 | uint32 cpu_registers [16]; 47 | } process_ctx_t; 48 | 49 | typedef struct task { 50 | uint32 pid; 51 | uint8 name [PROCESS_NAME_SIZE]; 52 | uint32 running_ticks; 53 | uint32 state; 54 | mm_t *mem_descriptor; 55 | process_ctx_t ctx; 56 | } task_t; 57 | 58 | 59 | extern int lookup_page_table (va_t v_addr, pgd_t *page_table, pa_t *p_addr); 60 | extern mm_t *create_addr_space (va_t code_start, uint32 code_size, va_t data_start, 61 | uint32 data_size, va_t stack_start, uint32 stack_size); 62 | extern void free_addr_space (mm_t *mm); 63 | extern int create_free_pool (pa_t start_addr, uint32 number_of_pages); 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /bitset.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "my_types.h" 4 | 5 | uint8 *create_bitset (uint32 number_of_bits) 6 | { 7 | int bytes_to_alloc; 8 | uint8 *array_of_bits; 9 | 10 | bytes_to_alloc = (number_of_bits >> 3) + 1; 11 | array_of_bits = malloc (bytes_to_alloc); 12 | if (array_of_bits == NULL) { 13 | printf("Malloc failed. \n"); 14 | return NULL; 15 | } 16 | else { 17 | return array_of_bits; 18 | } 19 | } 20 | 21 | static inline int trickery (int bit) 22 | { 23 | return (1 << (8 - ((bit + 1) % 8))); 24 | } 25 | 26 | int is_set (int bit, uint8 *array_of_bits) 27 | { 28 | int index; 29 | uint8 num; 30 | 31 | index = bit >> 3; 32 | num = array_of_bits[index]; 33 | 34 | //Have to do this trickery because we're doing LSB first 35 | if ((num & trickery (bit)) == trickery (bit)) { 36 | return 1; 37 | } 38 | return 0; 39 | } 40 | 41 | void set_bit (int bit, uint8 *array_of_bits) 42 | { 43 | int index; 44 | uint8 num; 45 | 46 | index = bit / 8; 47 | num = array_of_bits[index]; 48 | array_of_bits[index] = num | trickery (bit); 49 | } 50 | 51 | void clr_bit (int bit, uint8 *array_of_bits) 52 | { 53 | int index; 54 | uint8 num; 55 | 56 | index = bit >> 3; 57 | num = array_of_bits[index]; 58 | array_of_bits[index] = num & ~(trickery (bit)); 59 | } 60 | 61 | /* 62 | static void print_bitset () 63 | { 64 | int i; 65 | 66 | for (i = 0; i < bytes; i++) { 67 | printf ("%u ", array_of_bits[i]); 68 | } 69 | printf("\n"); 70 | } 71 | 72 | int main () 73 | { 74 | create_bitset (36); 75 | set_bit (1); 76 | set_bit (35); 77 | print_bitset(); 78 | 79 | clr_bit (1); 80 | clr_bit (35); 81 | print_bitset (); 82 | set_bit (0); 83 | print_bitset (); 84 | return 0; 85 | } 86 | */ 87 | -------------------------------------------------------------------------------- /mem_api.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "my_types.h" 11 | 12 | 13 | extern int errno; 14 | 15 | enum {MEM_RD_REQ, MEM_WR_REQ, MEM_RD_RESP, MEM_WR_RESP}; 16 | 17 | typedef struct mem_op { 18 | uint32 addr; 19 | uint32 data; 20 | uint8 cmd; 21 | uint8 status; 22 | } mem_op_t; 23 | 24 | 25 | static int mem_sd; 26 | static struct sockaddr_in servaddr; 27 | static struct sockaddr_in tmpaddr; 28 | static socklen_t tmpaddrlen; 29 | 30 | 31 | void bus_error (uint32 addr) 32 | { 33 | printf ("bus error at addr %u\n", addr); 34 | } 35 | 36 | 37 | int mem_read_32 (pa_t phys_addr, uint32 *data) 38 | { 39 | mem_op_t req, resp; 40 | int n; 41 | 42 | req.cmd = MEM_RD_REQ; 43 | req.addr = phys_addr; 44 | 45 | sendto (mem_sd, &req, sizeof(req), 0, (struct sockaddr *)&servaddr, sizeof(servaddr)); 46 | n = recvfrom (mem_sd, &resp, sizeof(resp), 0, (struct sockaddr *)&tmpaddr, &tmpaddrlen); 47 | if (n == sizeof(resp) && resp.cmd == MEM_RD_RESP) { 48 | if (resp.status == 0) { 49 | *data = resp.data; 50 | } 51 | else { 52 | printf ("resp.status = %d\n", resp.status); 53 | bus_error (phys_addr); 54 | } 55 | return 0; 56 | } 57 | return -1; 58 | } 59 | 60 | 61 | int mem_write_32 (pa_t phys_addr, uint32 data) 62 | { 63 | mem_op_t req, resp; 64 | int n; 65 | 66 | req.cmd = MEM_WR_REQ; 67 | req.addr = phys_addr; 68 | req.data = data; 69 | 70 | sendto (mem_sd, &req, sizeof(req), 0, (struct sockaddr *)&servaddr, sizeof(servaddr)); 71 | n = recvfrom (mem_sd, &resp, sizeof(resp), 0, (struct sockaddr *)&tmpaddr, &tmpaddrlen); 72 | if (n == sizeof(resp) && resp.cmd == MEM_WR_RESP) { 73 | if (resp.status != 0) { 74 | printf ("resp.status = %d\n", resp.status); 75 | bus_error(phys_addr); 76 | } 77 | return 0; 78 | } 79 | return -1; 80 | } 81 | 82 | 83 | int init_mem_lib (char *ip_addr) 84 | { 85 | mem_sd = socket (AF_INET, SOCK_DGRAM, 0); 86 | if (mem_sd < 0) { 87 | printf ("socket failed..errno = %d\n", errno); 88 | return -1; 89 | } 90 | 91 | memset (&servaddr, 0, sizeof(servaddr)); 92 | servaddr.sin_family = AF_INET; 93 | servaddr.sin_addr.s_addr = inet_addr (ip_addr); 94 | servaddr.sin_port = htons (32001); 95 | 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /phys_mem.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | typedef unsigned char uint8; 10 | typedef unsigned int uint32; 11 | typedef unsigned long uint64; 12 | 13 | #define PHYS_MEM_START 0x00000000 14 | 15 | static uint8 *memory; 16 | extern int errno; 17 | 18 | enum {MEM_RD_REQ, MEM_WR_REQ, MEM_RD_RESP, MEM_WR_RESP}; 19 | 20 | typedef struct mem_op { 21 | uint32 addr; 22 | uint32 data; 23 | uint8 cmd; 24 | uint8 status; 25 | } mem_op_t; 26 | 27 | 28 | int main (int argc, char *argv[]) 29 | { 30 | int sd, n; 31 | uint32 mem_sz; 32 | struct sockaddr_in servaddr, clientaddr; 33 | mem_op_t mesg, resp; 34 | socklen_t addrlen; 35 | uint32 offset; 36 | 37 | if (argc < 2) { 38 | printf ("usage: mem_sim_d \n"); 39 | exit (1); 40 | } 41 | 42 | mem_sz = atoi (argv[1]); 43 | 44 | memory = malloc (mem_sz); 45 | if (!memory) { 46 | printf ("malloc failed sz = %u\n", mem_sz); 47 | exit (2); 48 | } 49 | 50 | sd = socket (AF_INET, SOCK_DGRAM, 0); 51 | if (sd < 0) { 52 | printf ("socket failed..errno = %d\n", errno); 53 | exit (3); 54 | } 55 | 56 | memset (&servaddr, 0, sizeof(servaddr)); 57 | servaddr.sin_family = AF_INET; 58 | servaddr.sin_addr.s_addr = htonl(INADDR_ANY); 59 | servaddr.sin_port = htons (32001); 60 | 61 | if (bind (sd, (struct sockaddr *)&servaddr, sizeof(servaddr)) < 0) { 62 | printf ("bind failed..errno = %d\n", errno); 63 | exit (4); 64 | } 65 | 66 | while (1) { 67 | n = recvfrom (sd, (void *) &mesg, sizeof(mesg), 0, (struct sockaddr *)&clientaddr, &addrlen); 68 | if (n == sizeof(mesg)) { 69 | if (mesg.addr < PHYS_MEM_START || mesg.addr > (PHYS_MEM_START + mem_sz - 4)) { 70 | printf ("invalid mesg.addr = %u %u\n", mesg.addr, PHYS_MEM_START + mem_sz - 4); 71 | resp.status = -1; 72 | } 73 | else { 74 | resp.status = 0; 75 | } 76 | 77 | offset = mesg.addr - PHYS_MEM_START; 78 | switch (mesg.cmd) { 79 | case MEM_RD_REQ: 80 | resp.cmd = MEM_RD_RESP; 81 | memcpy (&resp.data, &memory[offset], 4); /**/ 82 | printf ("READ at address %u : %x\n", offset, 83 | *((uint32 *)(memory+offset))); 84 | break; 85 | 86 | case MEM_WR_REQ: 87 | memcpy (&memory[offset], &mesg.data, 4); /**/ 88 | resp.cmd = MEM_WR_RESP; 89 | printf ("WRITE at address %u : %x %x\n", offset, 90 | *((uint32 *)(memory+offset)), mesg.data); 91 | break; 92 | 93 | default: 94 | break; 95 | } 96 | sendto (sd, &resp, sizeof(resp), 0, (struct sockaddr *)&clientaddr, addrlen); 97 | } 98 | } 99 | return 0; 100 | } 101 | -------------------------------------------------------------------------------- /tlb.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "my_types.h" 4 | 5 | #define MAX_HASH_LENGTH 16 6 | 7 | 8 | typedef struct tlb_entry_s { 9 | uint32 va; 10 | uint32 pa; 11 | uint8 perms; 12 | struct tlb_entry_s *next; 13 | struct tlb_entry_s *global_next; 14 | } tlb_entry; 15 | 16 | 17 | static tlb_entry *tlb [MAX_HASH_LENGTH]; 18 | 19 | void init_tlb () 20 | { 21 | int i; 22 | for (i = 0; i < MAX_HASH_LENGTH; i++) { 23 | tlb [i] = NULL; 24 | } 25 | } 26 | 27 | int size_of_hash_list (int h) 28 | { 29 | tlb_entry *tmp = tlb[h]; 30 | int count = 0; 31 | while (tmp != NULL) { 32 | tmp = tmp->next; 33 | count++; 34 | } 35 | return (count); 36 | } 37 | 38 | void add_to_hash_list (tlb_entry **list, tlb_entry *entry) 39 | { 40 | entry->next = *list; 41 | *list = entry; 42 | } 43 | 44 | 45 | static uint32 hash (uint32 va) 46 | { 47 | uint32 v = va & 0xFFFFF000; 48 | /* uint32 v = va & ~0x3FF; */ 49 | v = v >> 12; 50 | return (v % MAX_HASH_LENGTH); 51 | } 52 | 53 | 54 | int translate (uint32 va, uint32 *pa, uint8 *perms) 55 | { 56 | uint32 h; 57 | tlb_entry *tmp; 58 | 59 | h = hash (va); 60 | tmp = tlb [h]; 61 | while (tmp != NULL) { 62 | if (tmp->va == (va & 0xFFFFF000)) { 63 | *pa = tmp->pa + (va & 0x00000FFF); 64 | *perms = tmp->perms; 65 | return 0; 66 | } 67 | tmp = tmp->next; 68 | } 69 | return -1; 70 | } 71 | 72 | void naive_invalidate () 73 | { 74 | int i; 75 | tlb_entry *t_next; 76 | tlb_entry *tmp; 77 | 78 | 79 | for (i = 0; i < MAX_HASH_LENGTH; i++) { 80 | tmp = tlb [i]; 81 | while (tmp != NULL) { 82 | t_next = tmp->next; 83 | free (tmp); 84 | tmp = t_next; 85 | } 86 | tlb [i] = NULL; 87 | } 88 | 89 | return; 90 | } 91 | 92 | int add_tlb_entry (uint32 va, uint32 pa, uint8 perms) 93 | { 94 | uint32 h; 95 | tlb_entry *t; 96 | 97 | h = hash (va); 98 | 99 | if (size_of_hash_list (h) < 8) { 100 | t = malloc (sizeof (tlb_entry)); 101 | if (t == NULL) { 102 | return (-1); 103 | } 104 | t->va = va & 0xFFFFF000; 105 | t->pa = pa; 106 | t->perms = perms; 107 | add_to_hash_list (&tlb[h], t); 108 | return 0; 109 | } 110 | else { 111 | /*Right now, just replacing the first one*/ 112 | /*TODO:decide which entry to replace*/ 113 | tlb_entry *entry_to_modify = tlb[h]; 114 | entry_to_modify->va = va; 115 | entry_to_modify->pa = pa; 116 | entry_to_modify->perms = perms; 117 | return 0; 118 | } 119 | } 120 | 121 | void print_tlb () 122 | { 123 | int i; 124 | tlb_entry *tmp; 125 | 126 | for (i = 0; i < MAX_HASH_LENGTH; i++) { 127 | tmp = tlb[i]; 128 | printf("Hash bucket %d\n", i); 129 | while (tmp != NULL) { 130 | printf("VA %u; PA %u; PERMS %u\n", tmp->va, tmp->pa, tmp->perms); 131 | tmp = tmp->next; 132 | } 133 | } 134 | } 135 | #if DEBUG 136 | int main() 137 | { 138 | return 0; 139 | } 140 | #endif 141 | -------------------------------------------------------------------------------- /cache.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "my_types.h" 6 | #include "mem_api.h" 7 | #include "cache.h" 8 | 9 | #define VALID_BIT 0x01 10 | #define DIRTY_BIT 0x02 11 | #define NUM_SET 4 12 | #define NUM_INDICES 16 13 | #define BLOCK_SIZE 256 14 | 15 | 16 | typedef struct { 17 | uint32 tag; /*Max Value is 2^20-1 */ 18 | uint8 flags; 19 | uint8 data [BLOCK_SIZE]; 20 | } cache_entry; 21 | 22 | cache_entry *cache [NUM_INDICES] [NUM_SET]; 23 | 24 | long cache_hits = 0; 25 | long cache_misses = 0; 26 | 27 | void init_cache () 28 | { 29 | int i, j; 30 | for (i = 0; i < NUM_INDICES; i++) { 31 | for (j = 0; j < NUM_SET; j++) { 32 | cache[i][j] = NULL; 33 | } 34 | } 35 | } 36 | 37 | int cache_line_write_to_mem (uint8 *data, pa_t p_addr) 38 | { 39 | int i, rc; 40 | 41 | for (i = 0; i < 64; i++) { 42 | rc = mem_write_32 (p_addr, ((uint32 *)data)[i]); 43 | if (rc < 0) { 44 | printf ("ERROR: mem_write_32 failed for p_addr %x\n", p_addr); 45 | return -1; 46 | } 47 | } 48 | return 0; 49 | } 50 | 51 | int cache_line_read_from_mem (va_t v_addr, pa_t p_addr) 52 | { 53 | uint32 buffer[64]; 54 | int i, rc; 55 | 56 | for (i = 0; i < 64; i++) { 57 | //printf ("reading mem at addr %u\n", p_addr + (i * 4)); 58 | rc = mem_read_32 (p_addr + (i * 4), &buffer[i]); 59 | if (rc < 0) { 60 | printf ("ERROR: mem_read_32 failed for p_addr %x\n", p_addr); 61 | return -1; 62 | } 63 | //printf ("read %u : %x\n", i, buffer[i]); 64 | } 65 | 66 | rc = cache_add_data (v_addr, (p_addr & 0xFFFFF000), (uint8 *)buffer, 256); 67 | return rc; 68 | } 69 | 70 | //4-way set associative cache 71 | 72 | int cache_add_data (uint32 va, uint32 tag, uint8 *data, int len) 73 | { 74 | int i; 75 | uint32 index; 76 | uint32 offset; 77 | cache_entry *t; 78 | 79 | //printf ("cache_add_data va = %x tag = %x\n", va, tag); 80 | for (i = 0; i < len; i++) { 81 | //printf ("adding cache data [%u] = %x\n", i, data[i]); 82 | } 83 | 84 | index = (va & 0x00000F00) >> 8; 85 | offset = va & 0x000000FF; 86 | 87 | cache_entry **c = cache [index]; 88 | 89 | for (i = 0; i < NUM_SET; i++) { 90 | if (c[i] && (c[i]->tag == tag && (c[i]->flags & VALID_BIT))) { 91 | //printf ("found va %x in cache\n", va); 92 | cache_hits++; 93 | //printf ("updating index %u offset %u\n", index, offset); 94 | memcpy ((uint8 *)(c[i]->data) + offset, data, len); 95 | c[i]->flags |= DIRTY_BIT; 96 | return 0; 97 | } 98 | } 99 | 100 | printf ("did not find va/tag in cache!\n"); 101 | 102 | for (i = 0; i < NUM_SET; i++) { 103 | if (!c[i] || ((c[i]->flags & VALID_BIT) != VALID_BIT)) { 104 | t = malloc (sizeof (cache_entry)); 105 | if (!t) { 106 | return -1; 107 | } 108 | t->tag = tag; 109 | memcpy ((uint8 *)(t->data) + offset, data, len); 110 | t->flags = t->flags | VALID_BIT; 111 | if (c[i]) { 112 | free (c[i]); 113 | } 114 | c[i] = t; 115 | c[i]->flags |= DIRTY_BIT; 116 | cache_misses++; 117 | return 0; 118 | } 119 | } 120 | 121 | // IF ALL BLOCKS ARE VALID BUT NONE OF THE TAGS MATCH, NEED TO KICK ONE OUT 122 | t = malloc (sizeof (cache_entry)); 123 | if (!t) { 124 | return -1; 125 | } 126 | t->tag = tag; 127 | memcpy ((t->data) + offset, data, len); 128 | t->flags = t->flags | VALID_BIT; 129 | free (c[0]); 130 | c[0] = t; 131 | c[0]->flags |= DIRTY_BIT; 132 | cache_misses++; 133 | return 0; 134 | } 135 | 136 | 137 | int cache_get_data (uint32 va, uint32 tag, uint8 *buffer, int len) 138 | { 139 | int i; 140 | uint32 index = (va & 0x00000F00) >> 8; 141 | uint32 offset = va & 0x000000FF; 142 | 143 | //printf ("cache get data va %x tag %x\n", va, tag); 144 | 145 | cache_entry **c = cache[index]; /*Why is this pointer to pointer */ 146 | 147 | for (i = 0; i < NUM_SET; i++) { 148 | if (c[i] != NULL && ((c[i]->flags & VALID_BIT) == VALID_BIT) && c[i]->tag == tag) { 149 | memcpy (buffer, &(c[i]->data[offset]), len); 150 | //printf ("found data in cache index %u offset %u\n", index, offset); 151 | cache_hits++; 152 | return 0; 153 | } 154 | } 155 | cache_misses++; 156 | return -1; 157 | } 158 | 159 | /*writes the contents of the cache to physical memory*/ 160 | int flush () 161 | { 162 | int i,j; 163 | pa_t phys_addr; 164 | 165 | for (i = 0; i < NUM_INDICES; i++) { 166 | for (j = 0; j < NUM_SET; j++) { 167 | if ((cache [i][j]->flags & DIRTY_BIT) == DIRTY_BIT) { 168 | phys_addr = cache[i][j]->tag + (i << 8); 169 | if (cache_line_write_to_mem (cache[i][j]->data, phys_addr)) { 170 | printf ("ERROR: cache_line_write_mem failed\n"); 171 | return -1; 172 | } 173 | cache[i][j]->flags &= ~DIRTY_BIT; 174 | } 175 | } 176 | } 177 | return 0; 178 | } 179 | 180 | /*Sets valid bit to 0*/ 181 | void invalidate () 182 | { 183 | int i; 184 | int j; 185 | 186 | for (i = 0; i < NUM_INDICES; i++) { 187 | for (j = 0; j < NUM_SET; j++) { 188 | cache[i][j]->flags &= ~VALID_BIT; 189 | } 190 | } 191 | } 192 | #if DEBUG 193 | int main() 194 | { 195 | return 0; 196 | } 197 | #endif 198 | -------------------------------------------------------------------------------- /vm.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "my_types.h" 5 | #include "bitset.h" 6 | #include "vm.h" 7 | 8 | 9 | 10 | static pa_t start_phys_address; 11 | static uint32 num_pages; 12 | static uint8 *user_page_bitset; 13 | 14 | int lookup_page_table (va_t v_addr, pgd_t *page_table, pa_t *p_addr) 15 | { 16 | pmd_t *pmd; 17 | pte_t pte; 18 | 19 | pmd = page_table->pmd[v_addr >> 24]; 20 | if (!pmd) { 21 | return -1; 22 | } 23 | pte = pmd->pte[(v_addr & 0x00FFF000) >> 12]; 24 | if ((pte.flags | PAGE_VALID) != PAGE_VALID) { 25 | return -1; 26 | } 27 | *p_addr = pte.physical_addr + (v_addr & 0x00000FFF); 28 | return 0; 29 | } 30 | 31 | int create_free_pool (pa_t start_addr, uint32 number_of_pages) 32 | { 33 | user_page_bitset = create_bitset (number_of_pages); 34 | if (!user_page_bitset) { 35 | return -1; 36 | } 37 | num_pages = number_of_pages; 38 | start_phys_address = start_addr; 39 | return 0; 40 | } 41 | 42 | static uint32 allocate_page () 43 | { 44 | int i; 45 | 46 | for (i = 0; i < num_pages; i++) { 47 | if (is_set (i, user_page_bitset) == 0) { 48 | set_bit (i, user_page_bitset); 49 | return (start_phys_address + (i<<12)); 50 | } 51 | } 52 | return -1; 53 | } 54 | 55 | static int free_page (pa_t page_addr) 56 | { 57 | uint32 bit; 58 | 59 | if ((page_addr & (PAGE_SIZE - 1)) != 0) { 60 | //printf("Page addr = %8x\n", page_addr); 61 | printf ("ERR: NOt aligned on page boundary\n"); 62 | return -1; 63 | } 64 | 65 | bit = (page_addr - start_phys_address) >> 12; 66 | clr_bit (bit, user_page_bitset); 67 | return 0; 68 | } 69 | 70 | static pgd_t *create_page_table () 71 | { 72 | int i; 73 | pgd_t *page_table; 74 | 75 | page_table = malloc (sizeof (pgd_t)); 76 | if (!page_table) { 77 | return NULL; 78 | } 79 | for (i = 0; i < FIRST_LEVEL_SIZE; i++) { 80 | page_table->pmd[i] = NULL; 81 | } 82 | return page_table; 83 | } 84 | 85 | static int allocate_memory (va_t start_addr, va_t end_addr, pgd_t *page_table) 86 | { 87 | uint32 tmp_addr; 88 | pmd_t *tmp_pmd; 89 | 90 | if ((start_addr & 4095) != 0) { 91 | printf ("Start Addr not aligned on page boundary\n"); 92 | return -1; 93 | } 94 | /* 95 | if ((end_addr & 4095) != 0) { 96 | printf ("End Addr not aligned on page boundary\n"); 97 | return -1; 98 | } 99 | */ 100 | tmp_addr = start_addr; 101 | 102 | while (tmp_addr <= end_addr) { 103 | if (!page_table->pmd[tmp_addr >> 24]) { 104 | tmp_pmd = malloc (sizeof (pmd_t)); 105 | if (!tmp_pmd) { 106 | printf("Unable to allocate memory for pmd\n"); 107 | return -1; 108 | } 109 | page_table->pmd[tmp_addr >> 24] = tmp_pmd; 110 | } 111 | 112 | page_table->pmd[tmp_addr >> 24]->pte[(tmp_addr & 0x00FFF000) >> 12].physical_addr = allocate_page (); 113 | page_table->pmd[tmp_addr >> 24]->pte[(tmp_addr & 0x00FFF000) >> 12].flags |= PAGE_VALID; 114 | /*TODO:Do something with flags */ 115 | tmp_addr = tmp_addr + 4096; 116 | } 117 | return 0; 118 | } 119 | 120 | 121 | mm_t *create_addr_space (va_t code_start, uint32 code_size, va_t data_start, uint32 data_size, va_t stack_start, uint32 stack_size) 122 | { 123 | mm_t *mm; 124 | int rc; 125 | pgd_t *page_table; 126 | 127 | mm = malloc (sizeof (mm_t)); 128 | if (!mm) { 129 | return NULL; 130 | } 131 | 132 | page_table = create_page_table (); 133 | if (!page_table) { 134 | printf ("malloc failed for page table\n"); 135 | return NULL; 136 | } 137 | 138 | mm->pgd = page_table; 139 | mm->code_start = code_start; 140 | mm->code_end = code_start + code_size; 141 | rc = allocate_memory (mm->code_start, mm->code_end, mm->pgd); 142 | if (rc < 0) { 143 | return NULL; 144 | } 145 | 146 | //printf ("allocated code\n"); 147 | 148 | if (data_size != 0) { 149 | mm->data_start = data_start; 150 | mm->data_end = data_start + data_size; 151 | rc = allocate_memory (mm->data_start, mm->data_end, mm->pgd); 152 | if (rc < 0) { 153 | return NULL; 154 | } 155 | } 156 | 157 | //printf ("### allocated data\n"); 158 | 159 | if (stack_size != 0) { 160 | mm->stack_start = stack_start; 161 | mm->stack_end = stack_start + stack_size; 162 | rc = allocate_memory (mm->stack_start, mm->stack_end, mm->pgd); 163 | if (rc < 0) { 164 | return NULL; 165 | } 166 | } 167 | 168 | //printf ("### allocated stack\n"); 169 | mm->v_mem_space = NULL; 170 | 171 | return mm; 172 | } 173 | 174 | 175 | static void free_memory (va_t start, va_t end, pgd_t *page_table) 176 | { 177 | va_t tmp_addr; 178 | uint32 prev; 179 | tmp_addr = start; 180 | pmd_t *page_md; 181 | 182 | tmp_addr = start; 183 | prev = tmp_addr >> 24; 184 | 185 | while (tmp_addr <= end) { 186 | page_md = page_table->pmd[prev]; 187 | if (!page_md) { 188 | printf ("ERROR: tmp_addr 0x%x page_md not set at index 0x%x\n", tmp_addr, prev); 189 | tmp_addr += 4096; 190 | continue; 191 | } 192 | if ((page_md->pte[(tmp_addr & 0x00FFF000) >> 12].flags & PAGE_VALID) != PAGE_VALID) { 193 | tmp_addr += 4096; 194 | continue; 195 | } 196 | free_page (page_md->pte[(tmp_addr & 0x00FFF000) >> 12].physical_addr); 197 | 198 | if (tmp_addr >> 24 != prev) { 199 | free (page_md); 200 | prev = tmp_addr >> 24; 201 | } 202 | tmp_addr += 4096; 203 | } 204 | } 205 | 206 | void free_addr_space (mm_t *mm) 207 | { 208 | pgd_t *page_table; 209 | 210 | page_table = mm->pgd; 211 | free_memory (mm->code_start, mm->code_end, page_table); 212 | free_memory (mm->data_start, mm->data_end, page_table); 213 | free_memory (mm->stack_start,mm->stack_end, page_table); 214 | free (mm->pgd); 215 | free (mm); 216 | } 217 | 218 | #if DEBUG 219 | int main () 220 | { 221 | mm_t *mm; 222 | 223 | create_free_pool (0, 4096); 224 | mm = create_addr_space (0xA0001000, 0xA0010000, 4096, 8192, 12288, 16384); 225 | free_addr_space (mm); 226 | } 227 | #endif 228 | -------------------------------------------------------------------------------- /cpu.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "my_types.h" 11 | #include "tlb.h" 12 | #include "vm.h" 13 | #include "cache.h" 14 | #include "mem_api.h" 15 | 16 | extern int errno; 17 | 18 | #define R0 cpu_registers[0] 19 | #define R1 cpu_registers[1] 20 | #define R2 cpu_registers[2] 21 | #define R3 cpu_registers[3] 22 | #define R4 cpu_registers[4] 23 | #define R5 cpu_registers[5] 24 | #define R6 cpu_registers[6] 25 | #define R7 cpu_registers[7] 26 | #define R8 cpu_registers[8] 27 | #define R9 cpu_registers[9] 28 | #define R10 cpu_registers[10] 29 | #define R11 cpu_registers[11] 30 | #define R12 cpu_registers[12] 31 | #define R13 cpu_registers[13] 32 | #define R14 cpu_registers[14] 33 | #define PC cpu_registers[15] 34 | 35 | 36 | enum {NOP, LDIMM, LD, SD, JMP, PRINTC, PRINTI, BNE, BEQ, BGT, BLT, BGE, BLE, ADD, SUB}; 37 | char *instructions[] = {"NOP", "LDIMM", "LD", "SD", "JMP", "PRINTC", "PRINTI", 38 | "BNE", "BEQ", "BGT", "BLT", "BGE", "BLE", "ADD", "SUB"}; 39 | 40 | static uint32 cpu_registers [16]; 41 | static task_t *current; 42 | 43 | 44 | uint32 program1 [] = { 45 | 0x01030000, 46 | 0x01020001, 47 | 0x01000041, 48 | 0x0101001a, 49 | 0x05000000, 50 | 0x0d002000, 51 | 0x0e112000, 52 | 0x07103010, 53 | 0x00000000, 54 | 0x00000000, 55 | 0x00000000, 56 | 0x00000000, 57 | }; 58 | 59 | 60 | typedef struct task_queue { 61 | task_t task; 62 | struct task_queue *next; 63 | } task_queue_t; 64 | 65 | void save_ctx (task_t *task) 66 | { 67 | memcpy (task->ctx.cpu_registers, cpu_registers, sizeof (cpu_registers)); 68 | } 69 | 70 | void restore_ctx (task_t *task) 71 | { 72 | memcpy (cpu_registers, task->ctx.cpu_registers, sizeof (cpu_registers)); 73 | } 74 | 75 | 76 | int load (va_t v_addr, pgd_t *page_table, uint32 *dst) 77 | { 78 | pa_t p_addr; 79 | uint8 perms; 80 | int rc; 81 | 82 | rc = translate (v_addr, &p_addr, &perms); 83 | if (rc < 0) { 84 | //printf ("translate failed for v_addr %x\n", v_addr); 85 | rc = lookup_page_table (v_addr, page_table, &p_addr); /*TODO*/ 86 | if (rc < 0) { 87 | printf("Segfault\n"); 88 | return -1; 89 | } 90 | //printf ("adding tlb entry for v_addr %x p_addr %x\n", 91 | // v_addr, p_addr); 92 | add_tlb_entry (v_addr, p_addr, 0); 93 | } 94 | 95 | //printf ("getting data from cache v_addr %x\n", v_addr); 96 | rc = cache_get_data (v_addr, (p_addr & 0xFFFFF000), (uint8 *)dst, 4); 97 | if (rc < 0) { 98 | //printf ("reading cache line from memory v_addr %x\n", v_addr); 99 | cache_line_read_from_mem (v_addr, p_addr); 100 | rc = cache_get_data (v_addr, (p_addr & 0xFFFFF000), (uint8 *)dst, 4); 101 | if (rc < 0) { 102 | printf ("ERROR: something is wrong at line num %d!\n", __LINE__); 103 | return -1; 104 | } 105 | } 106 | return 0; 107 | } 108 | 109 | /*TODO: NEED TO DO SOMETHING WITH DIRTY BIT */ 110 | int store (va_t v_addr, pgd_t *page_table, uint32 *data) 111 | { 112 | pa_t *p_addr; 113 | uint8 perms; 114 | int rc; 115 | 116 | p_addr = malloc (sizeof (pa_t)); 117 | rc = translate (v_addr, p_addr, &perms); 118 | if (rc < 0) { 119 | rc = lookup_page_table (v_addr, page_table, p_addr); 120 | if (rc < 0) { 121 | printf("Segfault\n"); 122 | return -1; 123 | } 124 | add_tlb_entry (v_addr, *p_addr, 0); 125 | } 126 | rc = cache_add_data (v_addr, (*p_addr & 0xFFFFF000), (uint8 *)data, 4); 127 | return rc; 128 | } 129 | 130 | static int 131 | decode_instr (uint32 instr, uint32 *opcode, uint32 *sreg, uint32 *dreg, 132 | uint32 *treg, uint32 *val) 133 | { 134 | *opcode = (instr & 0xFF000000) >> 24; 135 | if (*opcode > SUB) { 136 | printf("Invalid instruction\n"); 137 | return -1; 138 | } 139 | //printf("Instruction = %s\n", instructions[*opcode]); 140 | *sreg = (instr & 0x00F00000) >> 20; 141 | *dreg = (instr & 0x000F0000) >> 16; 142 | *treg = (instr & 0x0000F000) >> 12; 143 | *val = instr & 0x00000FFF; 144 | return 0; 145 | } 146 | 147 | static uint32 fetch_instruction (uint32 pc) 148 | { 149 | uint32 instr; 150 | 151 | if (load (pc, current->mem_descriptor->pgd, &instr) < 0) { 152 | printf("unable to get instruction\n"); 153 | return 0; 154 | } 155 | return instr; 156 | } 157 | 158 | static void execute_instruction (uint32 instr, uint32 *pc) 159 | { 160 | uint32 opcode, sreg, dreg, treg, imm_val; 161 | uint32 vaddr; 162 | 163 | decode_instr (instr, &opcode, &sreg, &dreg, &treg, &imm_val); 164 | 165 | switch (opcode) { 166 | case LDIMM: 167 | cpu_registers [dreg] = imm_val; 168 | *pc = *pc + 4; 169 | break; 170 | 171 | case LD: 172 | vaddr = sreg + imm_val; 173 | if (load (vaddr, current->mem_descriptor->pgd, &cpu_registers[dreg]) < 0) { 174 | printf ("segmentation violation? %x\n", vaddr); 175 | exit (0); 176 | } 177 | *pc = *pc + 4; 178 | break; 179 | 180 | case SD: 181 | vaddr = dreg + imm_val; 182 | if (store (vaddr, current->mem_descriptor->pgd, &cpu_registers[sreg]) < 0) { 183 | printf ("segmentation violation? %x\n", vaddr); 184 | exit (0); 185 | } 186 | *pc = *pc + 4; 187 | break; 188 | 189 | case JMP: 190 | *pc = imm_val; 191 | break; 192 | 193 | case ADD: 194 | cpu_registers[dreg] = cpu_registers[sreg] + cpu_registers[treg]; 195 | *pc = *pc + 4; 196 | break; 197 | 198 | case SUB: 199 | cpu_registers[dreg] = cpu_registers[sreg] - cpu_registers[treg]; 200 | *pc = *pc + 4; 201 | break; 202 | 203 | case PRINTC: 204 | printf ("%c", cpu_registers[sreg]); 205 | *pc = *pc + 4; 206 | break; 207 | 208 | case PRINTI: 209 | printf ("%u", cpu_registers[sreg]); 210 | *pc = *pc + 4; 211 | break; 212 | 213 | case BEQ: 214 | if (cpu_registers[sreg] == cpu_registers[treg]) { 215 | *pc = imm_val; 216 | } 217 | else { 218 | *pc = *pc + 4; 219 | } 220 | break; 221 | 222 | case BNE: 223 | if (cpu_registers[sreg] != cpu_registers[treg]) { 224 | *pc = imm_val; 225 | } 226 | else { 227 | *pc = *pc + 4; 228 | } 229 | break; 230 | 231 | case BGE: 232 | if (cpu_registers[sreg] >= cpu_registers[treg]) { 233 | *pc = imm_val; 234 | } 235 | else { 236 | *pc = *pc + 4; 237 | } 238 | break; 239 | 240 | case BLE: 241 | if (cpu_registers[sreg] <= cpu_registers[treg]) { 242 | *pc = imm_val; 243 | } 244 | else { 245 | *pc = *pc + 4; 246 | } 247 | break; 248 | 249 | case BLT: 250 | if (cpu_registers[sreg] < cpu_registers[treg]) { 251 | *pc = imm_val; 252 | } 253 | else { 254 | *pc = *pc + 4; 255 | } 256 | break; 257 | 258 | case BGT: 259 | if (cpu_registers[sreg] > cpu_registers[treg]) { 260 | *pc = imm_val; 261 | } 262 | else { 263 | *pc = *pc + 4; 264 | } 265 | break; 266 | 267 | case NOP: 268 | *pc = *pc + 4; 269 | break; 270 | 271 | default: 272 | printf("TRAP: invalid instruction \n"); 273 | exit(-1); 274 | } 275 | } 276 | 277 | 278 | 279 | int main (int argc, char *argv[]) 280 | { 281 | mm_t *mem_desc1, *mem_desc2; 282 | task_t *task1, *task2; 283 | uint32 instr, i; 284 | pa_t paddr; 285 | 286 | 287 | if (argc != 2) { 288 | printf ("usage: cpu \n"); 289 | exit (1); 290 | } 291 | 292 | init_mem_lib (argv[1]); 293 | init_tlb (); 294 | init_cache (); 295 | 296 | if (create_free_pool (0, 4096) < 0) { 297 | printf ("Unable to allocate bitset\n"); 298 | exit (-1); 299 | } 300 | 301 | //printf ("creating addr space...\n"); 302 | mem_desc1 = create_addr_space (0, 64, 0, 0, 0, 0); 303 | if (!mem_desc1) { 304 | printf ("ERROR: failed to create mem space\n"); 305 | exit (-2); 306 | } 307 | 308 | task1 = malloc (sizeof (task_t)); 309 | if (!task1) { 310 | printf ("ERROR: failed to create task\n"); 311 | exit (-2); 312 | } 313 | 314 | task1->mem_descriptor = mem_desc1; 315 | task1->ctx.cpu_registers[15] = 0; 316 | 317 | current = task1; 318 | 319 | /* KLUGE ALERT */ 320 | for (i = 0; i < sizeof(program1)/sizeof(uint32); i++) { 321 | //printf ("### lookup addr %x\n", i*4); 322 | if (lookup_page_table (i<<2, current->mem_descriptor->pgd, &paddr) < 0) { 323 | printf("ERROR:unable to map va to pa in lookup\n"); 324 | exit (-3); 325 | } 326 | //printf ("writing %x at addr %x\n", program1[i], paddr); 327 | if (mem_write_32 (paddr, program1[i]) < 0) { 328 | printf("ERROR: could not write to main mem\n"); 329 | exit (-4); 330 | } 331 | } 332 | 333 | 334 | PC = task1->ctx.cpu_registers[15]; 335 | 336 | while (1) { 337 | instr = fetch_instruction (PC); 338 | //printf ("fetched instr %x\n", instr); 339 | if (instr == 0) { 340 | exit (0); 341 | } 342 | execute_instruction (instr, &PC); 343 | } 344 | return 0; 345 | } 346 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A simulator for a simple RISC CPU in C 2 | 3 | Before discussing the cpu module, we’ll first discuss a couple of the critical components. 4 | 5 | ## Cache 6 | 7 | ### Background: 8 | The cache is one of the fundamental building blocks of the modern CPU. To speed up performance, the processor will typically load information that is accessed from main memory into the cache. 9 | 10 | When the processor wants to get some data from main memory, it checks first whether or not that data is in the cache (if it is found in the cache, it’s called a cache hit). 11 | If it isn’t (called a cache miss), it will read from main memory, and load a certain block of fixed size around that data into the cache . This block is called a cache line. This is based on the idea of spatial locality, or the idea that if the data at a certain memory location is requested, data at surrounding memory locations will also be requested soon. 12 | 13 | Also copied into the cache is the physical memory address of the data (so the processor can find the data in the cache). This is called the tag. There is also the index, which is calculated from the virtual address. This is used to address into the cache itself. 14 | 15 | There are three different ways to implement a cache. Two of them are direct-hit and fully associative. In direct hit, for each unique index, there is only one location in the cache. In a fully associative cache, any index can be put in any location in the cache. 16 | Each of these has their own benefits and drawbacks. In direct hit, it is easy to locate data, but problems arise when writing to the cache, because there may be more conflicts (data is already in the cache, but you have to write to that location). 17 | In fully-associative, writing to the cache is easy, but locating a particular index is hard because the entire cache may have to be crawled. 18 | 19 | ### My implementation: 20 | In my implementation, I combine these two into a 4-way set associative cache. 21 | What that essentially means is that for each tag, there are four possible sets that the data could be in. 22 | This is the best compromise between a direct-hit cache and fully associative. 23 | Each cache line is represented using a structure called cache_entry. The cache itself is a 2-dimensional array. 24 | The first dimension represents the number of possible indices (the index is calculated based on the virtual address), and the second dimension represents the 4-way associativity of the cache (see the cache_add_data method of cache.c). 25 | 26 | Here's a really concise description of all of the parts of the cache program (ping me if you have any more questions about any of these modules): 27 | 28 | • The two methods cache_line_read_from_mem and cache_line_write_to_mem are helper methods that read and write from/to main memory. 29 | 30 | • cache_add_data is where most of the work is done. 31 | - It first calculates the index and offset of the data from its virtual address. 32 | - It then walks through the four possible lines where the data could be (b/c of 4-way associativity), and compares the tag of each to the data it is searching for. If it is found in a valid line, then we have a cache hit, and it copies the data at that particular offset into the cache line. The dirty bit, which represents whether or not the cache and main memory are in sync, is set to 1, which means they are out of sync. 33 | - If there is a cache miss (that particular tag was not found), then it first searches to see if any of the cache lines at that index is invalid (the valid bit is 0). If it is, then it is overwritten. 34 | 35 | - If no invalid line is found, one of the old entries at that index is booted out, and the data is added to the cache. 36 | 37 | - cache_get_data operates in the same way as cache_add_data, but reads instead of writes. 38 | 39 | - It searches at a particular index for the tag; if it is not found, then it is a cache miss, and the method returns -1. This means that, later, we will have to read from main memory. 40 | 41 | - If it is found, then the correct data is copied into the buffer that is passed in, and the method returns 0. 42 | 43 | • flush is a method that syncs the cache and main memory. It will copy all changes made to data in the cache to the appropriate data in main memory. 44 | 45 | • invalidate is a method meant to simulate what happens when the CPU first boots. Since the cache has to be empty on startup (but it may not be; the memory could be set to any random sequence), the processor will just invalidate all lines in the cache. 46 | 47 | ## Virtual Memory 48 | 49 | ### Background: 50 | The only memory that is actually available to the processor is the main memory, different segments of which will be used by various processes, or user programs. However, the OS wants to give the illusion to all processes that they have the entire main memory available to them. It does this by providing each process with virtual address space. 51 | Virtual memory is divided into contiguous segments, called pages. A structure called the page table maps virtual memory page addresses into physical addresses. When a process makes a call, say to store some data in some virtual address, the processor looks up the physical address and stores the data in the corresponding page in physical memory. 52 | Each process is given its own memory space. This memory space is usually divided into a couple of different sections. One section is reserved for the code, or the actual instructions that the processor has to execute for the program. Some is for the stack, or the local variables stored in each function in the program. The rest is for the data of the program, such as global variables. 53 | 54 | ### My implementation: 55 | • The lookup_page_table() function indexes into a page table (that is passed in) and locates the corresponding pmd and pte structures. From this, it can find the corresponding physical address. If the address is not found, then it returns -1. This is called a page fault. 56 | 57 | • The function create_free_pool() takes in a physical address and the number of pages to allocate, and allocates a new virtual address space. It also initializes a bitset, which is just a string of bits, used in this case to keep track of which pages are valid (have valid data stored in them). 58 | 59 | • allocate_page() simulates the process in which the OS allocates memory to processes. This function simply selects the first available page and allocates it. free_page() does the opposite, with a given page address. 60 | 61 | • Create_page_table() serves as a helper method that initializes the page table. 62 | 63 | • allocate_memory() simulates the allocation of an address space and populates the page table. It walks through, page by page, and adds entries to the page table and calls allocate_page(). It does this by creating a temporary pmd, which represents this chunk of memory (if it doesn’t exist already). 64 | 65 | • create_addr_space() simulates the creation of a new process in memory. It takes in the virtual addresses of the code, data, and stack sections, as well as their sizes. It then initializes a structure that represents a process’s memory space (see vm.h -> mm_struct). It then goes through and calls allocate_memory() for the code, data, and stack sections, building the page table along the way. 66 | 67 | ## TLB (Translation Lookaside Buffer) 68 | 69 | ### Background: 70 | To actually obtain useful data, the processor must look up information in main memory, which is addressed using physical addresses. However, the processor assigns each process a virtual address space to use. Therefore, to load or store data into physical memory, the processor has to translate a process’s virtual address into a physical address. There are two locations in which these translations are stored. 71 | 72 | The first is the page table. This is a ‘large’ section of memory that contains the mappings of all virtual addresses into physical addresses. Because it is stored in main memory, accessing the page table is quite slow. Therefore, the processor caches certain translations in the translation lookaside buffer. It functions similarly to the cache described above; it is a smaller piece of SRAM memory that the processor checks before translating any virtual address into a physical address. 73 | 74 | ### My implementation: 75 | I implemented the TLB as a chained linked-list hash table. 76 | 77 | • The methods init_tlb(), size_of_hash_list(), add_to_hash_list(), and hash() are just helper methods. 78 | 79 | • When the processor wants to obtain a physical address from a virtual address, it calls translate(). translate() finds the appropriate bucket in the TLB, and walks the list to find the entry with the corresponding physical address. It returns 0 on success, and -1 on failure. If this method fails, the processor will have to make a call to the page table, and then add the entry to the TLB later. 80 | 81 | • The method add_tlb_entry() adds an entry to the tlb. It functions similarly to translate() in that it first calculates the hash to find the appropriate bucket, and then if there are fewer than 8 entries in the linked-list (an arbitrary length), then it will simply add the entry to its head. If there are 8 entries, then it replaces the first entry (again, an arbitrary decision) with this translation. 82 | 83 | • Similar to the cache, we need a way to invalidate entries at startup; naïve_invalidate() does this. 84 | 85 | ## Physical Memory 86 | 87 | ### Background 88 | There isn’t much to explain for the background of physical memory. It is divided up into pages, much like virtual memory. However, this is memory that is “shared” amongst the OS and all the other processes running on the computer. 89 | 90 | ### My implementation 91 | 92 | There are two pertinent programs here: mem_api.c and phys_mem.c 93 | • My implementation uses sockets to simulate the memory bus. This way, the processor program can make requests to the physical memory program, which is running simultaneously. 94 | 95 | • The structure mem_op represents a memory operation. It contains the address, data, and the request. The memory itself is represented as an array of bytes. 96 | 97 | • The main loop of phys_mem listens to the socket, and when it receives a request, it writes it into a mem_op structure. Then, based on the value of cmd, the program either writes to or reads from the main memory array. It then sends a response back to the processor. 98 | 99 | • mem_api acts as the memory bus, providing simple methods for cpu.c to interact with phys_mem. 100 | 101 | • mem_read_32 takes in a physical address and a data buffer, and sends a request over the bound socket to the physical memory. If the request is successful, it copies the data into the buffer. 102 | 103 | • mem_write_32 functions in a similar manner; it takes in a physical address and some data, and sends a write request to the physical memory. If either method gets a failure, this program will throw a bus error. 104 | 105 | ## CPU 106 | 107 | ### Background 108 | 109 | This is where everything gets put together. The CPU itself is actually quite simple. It simply follows a constant loop of fetching instructions and executing them. 110 | The CPU typically has a set of instructions that it can execute, called an instruction set. For this project, I created my own instruction set, which follows a RISC paradigm. Typically, most instructions are either 32-bit of 64-bit quantities. MIPS has three different types of instructions: R, I, and J. Each starts, with a 6-bit opcode, and the differences in each refers to the number of registers the instruction operates on. R type instructions operate on 3 registers; I on 2, and J just on an address. 111 | 112 | The very basic pipeline for what goes on in the CPU is this; it will first fetch the next instruction from the address stored in the program counter. 113 | It will then decode this instruction (get the opcode, registers, and immediate values if there are any), and then execute the instruction. 114 | What happens at this step depends on the instruction. If it is an instruction where something needs to be computed (ie. add, sub, bne), it will send the appropriate data to the ALU. 115 | If it is a jump, it will simply modify the address at the program counter. 116 | 117 | When executing processes, a CPU typically also has to do context switches, where it will save the register values and other data for one process, and then switch to executing another process. 118 | It does many of these switches per second, to give the illusion that it is executing all of these processes simultaneously. 119 | 120 | ### My implementation 121 | • On “startup”, my simulation resets the cache, tlb, loads a process into main memory, and loads the address for that process (which I’ve hard coded into the program) into the program counter. It then goes into a loop of fetching and executing instructions (I have the functions for context switching written, but haven’t implemented them into the CPU functionality) 122 | 123 | • My instruction set is structured as such: the first 8 bits are reserved for the opcode, or the actual command. Then, there are 4 bits for the src register, dst register, and target register each. The final 12 bits are for any immediate values (take a look at program1 and see if you can tell what it does). The possible opcodes themselves are listed in the “instructions” array. 124 | 125 | • The function load() is called for the load instruction. It first checks the tlb to translate the virtual address into a physical address. If the entry is not in the tlb, it looks in the page table, and adds the entry to the tlb. It then checks the cache for the data, and if the data is not in the cache, it loads the data into the cache from physical memory, and then reads from the cache again. 126 | 127 | • The function store() operates similarly. It will try to translate, and then calls cache_add_data() to store the data (when the cache is flushed, that data will be written back to physical memory). 128 | 129 | • decode_instr() is what determines the opcode, registers, and value from the instruction itself. It accomplishes this by masking out the irrelevant parts of the instruction. 130 | 131 | • fetch_instruction() takes the address in the pc, and calls load() to load the instruction from the cache or main memory. 132 | 133 | • execute_instr() is where the brunt of the work is done. It first calls decode_instr() to determine the registers, opcode, and value. Then, using a switch/case statement on the opcode, it makes the appropriate calls to execute the instruction. For example, for a load immediate instruction, it will write the immediate value to the specified register. For jump, it will modify the pc by the appropriate amount. 134 | 135 | ###A Minor Note 136 | 137 | My simulation (well, it walks the line between a simulation and emulation) actually incorporates a little bit the work the Operating System does as well. So it's not strictly all "CPU". For example, the allocation of memory for processes (the free pool, bitset, virtual memory, etc.) aren't really CPU-level tasks (for a better idea for what goes on in the CPU itself, check out my verilog-cpu repo). These are things the OS takes care of on a typical computer, so technically I've also written parts of an OS into this emulation/simulation. 138 | --------------------------------------------------------------------------------