├── test ├── test_define.h ├── test_link2.s ├── test_link2.c ├── test_link.s ├── system.s ├── test_link.c ├── testutil.c ├── test.sh ├── Makefile ├── test_define.c └── stdlib.c ├── .clang-format ├── as ├── system.s ├── Makefile ├── main.c ├── string_builder.c ├── map.c ├── vector.c ├── test.inc ├── object.c ├── stdlib.c ├── utility.c ├── as.h ├── lex.c └── code.c ├── cc ├── system.s ├── Makefile ├── string_builder.c ├── main.c ├── map.c ├── vector.c ├── test.inc ├── type.c ├── token.c ├── env.c ├── ast.c ├── utility.c ├── cpp.c ├── stdlib.c ├── cc.h └── lex.c ├── ld ├── system.s ├── Makefile ├── main.c ├── string_builder.c ├── map.c ├── vector.c ├── test.inc ├── object.c ├── utility.c ├── ld.h ├── stdlib.c └── link.c ├── Makefile ├── .gitignore ├── LICENSE ├── README.md └── aqcc /test/test_define.h: -------------------------------------------------------------------------------- 1 | #define test001header 42 2 | 3 | int test001iret(int i) { return i; } 4 | -------------------------------------------------------------------------------- /test/test_link2.s: -------------------------------------------------------------------------------- 1 | non_global_func: 2 | mov $20, %eax 3 | ret 4 | 5 | .global test002 6 | test002: 7 | call non_global_func 8 | ret 9 | -------------------------------------------------------------------------------- /test/test_link2.c: -------------------------------------------------------------------------------- 1 | static int test003004var = 20; 2 | 3 | int test004() { return test003004var; } 4 | 5 | static int test005() { return 20; } 6 | 7 | int test007() { return test005(); } 8 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: google 2 | AccessModifierOffset : -4 3 | AllowShortIfStatementsOnASingleLine : true 4 | AlwaysBreakTemplateDeclarations : true 5 | Standard : C++11 6 | BreakBeforeBraces : Stroustrup 7 | IndentWidth : 4 8 | -------------------------------------------------------------------------------- /test/test_link.s: -------------------------------------------------------------------------------- 1 | .global _start 2 | _start: 3 | call main 4 | mov %rax, %rdi 5 | mov $60, %eax 6 | syscall 7 | 8 | non_global_func: 9 | mov $10, %eax 10 | ret 11 | 12 | .global test001 13 | test001: 14 | call non_global_func 15 | ret 16 | -------------------------------------------------------------------------------- /as/system.s: -------------------------------------------------------------------------------- 1 | .global syscall 2 | syscall: 3 | mov %rdi, %rax 4 | mov %rsi, %rdi 5 | mov %rdx, %rsi 6 | mov %rcx, %rdx 7 | mov %r8, %rcx 8 | mov %r9, %r8 9 | mov 8(%rsp), %r9 10 | syscall 11 | ret 12 | 13 | .global _start 14 | _start: 15 | mov (%rsp), %rdi 16 | lea 8(%rsp), %rsi 17 | call main 18 | mov %rax, %rdi 19 | mov $60, %eax 20 | syscall 21 | -------------------------------------------------------------------------------- /cc/system.s: -------------------------------------------------------------------------------- 1 | .global syscall 2 | syscall: 3 | mov %rdi, %rax 4 | mov %rsi, %rdi 5 | mov %rdx, %rsi 6 | mov %rcx, %rdx 7 | mov %r8, %rcx 8 | mov %r9, %r8 9 | mov 8(%rsp), %r9 10 | syscall 11 | ret 12 | 13 | .global _start 14 | _start: 15 | mov (%rsp), %rdi 16 | lea 8(%rsp), %rsi 17 | call main 18 | mov %rax, %rdi 19 | mov $60, %eax 20 | syscall 21 | -------------------------------------------------------------------------------- /ld/system.s: -------------------------------------------------------------------------------- 1 | .global syscall 2 | syscall: 3 | mov %rdi, %rax 4 | mov %rsi, %rdi 5 | mov %rdx, %rsi 6 | mov %rcx, %rdx 7 | mov %r8, %rcx 8 | mov %r9, %r8 9 | mov 8(%rsp), %r9 10 | syscall 11 | ret 12 | 13 | .global _start 14 | _start: 15 | mov (%rsp), %rdi 16 | lea 8(%rsp), %rsi 17 | call main 18 | mov %rax, %rdi 19 | mov $60, %eax 20 | syscall 21 | -------------------------------------------------------------------------------- /test/system.s: -------------------------------------------------------------------------------- 1 | .global syscall 2 | syscall: 3 | mov %rdi, %rax 4 | mov %rsi, %rdi 5 | mov %rdx, %rsi 6 | mov %rcx, %rdx 7 | mov %r8, %rcx 8 | mov %r9, %r8 9 | mov 8(%rsp), %r9 10 | syscall 11 | ret 12 | 13 | .global _start 14 | _start: 15 | mov (%rsp), %rdi 16 | lea 8(%rsp), %rsi 17 | call main 18 | mov %rax, %rdi 19 | mov $60, %eax 20 | syscall 21 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cd cc && make 3 | cd as && make 4 | cd ld && make 5 | 6 | test: 7 | cd test && make test 8 | 9 | self_test: 10 | cd test && make self_test 11 | 12 | selfself_test: 13 | cd test && make selfself_test 14 | 15 | clean: 16 | cd cc && make clean 17 | cd as && make clean 18 | cd ld && make clean 19 | cd test && make clean 20 | 21 | .PHONY: all test clean 22 | -------------------------------------------------------------------------------- /ld/Makefile: -------------------------------------------------------------------------------- 1 | TARGET=ld 2 | SRC=main.c vector.c utility.c map.c link.c object.c stdlib.c string_builder.c 3 | SRC_ASM=system.s 4 | CC=gcc 5 | FLAGS=-O0 -g3 -Wall -std=c11 -fno-builtin -fno-stack-protector -static -nostdlib 6 | 7 | $(TARGET): $(SRC) $(SRC_ASM) test.inc ld.h 8 | $(CC) -o $@ $(SRC) $(SRC_ASM) $(FLAGS) 9 | 10 | clean: 11 | rm -f $(TARGET) 12 | 13 | .PHONY: clean 14 | -------------------------------------------------------------------------------- /as/Makefile: -------------------------------------------------------------------------------- 1 | TARGET=as 2 | SRC=main.c vector.c utility.c map.c lex.c assemble.c code.c object.c stdlib.c string_builder.c 3 | SRC_ASM=system.s 4 | CC=gcc 5 | FLAGS=-O0 -g3 -Wall -std=c11 -fno-builtin -fno-stack-protector -static -nostdlib 6 | 7 | $(TARGET): $(SRC) $(SRC_ASM) test.inc as.h 8 | $(CC) -o $@ $(SRC) $(SRC_ASM) $(FLAGS) 9 | 10 | clean: 11 | rm -f $(TARGET) 12 | 13 | .PHONY: clean 14 | -------------------------------------------------------------------------------- /cc/Makefile: -------------------------------------------------------------------------------- 1 | TARGET=cc 2 | SRC=main.c vector.c utility.c map.c lex.c parse.c x86_64_gen.c type.c env.c ast.c analyze.c string_builder.c cpp.c token.c stdlib.c 3 | SRC_ASM=system.s 4 | CC=gcc 5 | FLAGS=-O0 -g3 -Wall -std=c11 -fno-builtin -fno-stack-protector -static -nostdlib 6 | 7 | $(TARGET): $(SRC) $(SRC_ASM) test.inc cc.h 8 | $(CC) -o $@ $(SRC) $(SRC_ASM) $(FLAGS) 9 | 10 | clean: 11 | rm -f $(TARGET) 12 | 13 | .PHONY: clean 14 | -------------------------------------------------------------------------------- /test/test_link.c: -------------------------------------------------------------------------------- 1 | int fib(int n) { return n <= 1 ? n : fib(n - 1) + fib(n - 2); } 2 | 3 | int test001(); 4 | int test002(); 5 | int test004(); 6 | int test007(); 7 | 8 | static int test003004var = 10; 9 | 10 | int test003() { return test003004var; } 11 | 12 | static int test005() { return 10; } 13 | 14 | int test006() { return test005(); } 15 | 16 | int main() 17 | { 18 | return fib(5) == 5 && test001() == 10 && test002() == 20 && 19 | test003() == 10 && test004() == 20 && test006() == 10 && 20 | test007() == 20; 21 | } 22 | -------------------------------------------------------------------------------- /as/main.c: -------------------------------------------------------------------------------- 1 | #include "as.h" 2 | 3 | #include "test.inc" 4 | 5 | int main(int argc, char **argv) 6 | { 7 | if (argc == 2 && strcmp(argv[1], "test") == 0) { 8 | execute_test(); 9 | return 0; 10 | } 11 | 12 | if (argc != 3) goto usage; 13 | 14 | char *infile = argv[1], *outfile = argv[2]; 15 | 16 | Vector *code = read_asm_from_filepath(infile); 17 | ObjectImage *obj = assemble_code(code); 18 | 19 | FILE *fh = fopen(outfile, "wb"); 20 | dump_object_image(obj, fh); 21 | fclose(fh); 22 | 23 | return 0; 24 | 25 | usage: 26 | error("Usage: as input-asm-file-path output-obj-file-path"); 27 | } 28 | -------------------------------------------------------------------------------- /ld/main.c: -------------------------------------------------------------------------------- 1 | #include "ld.h" 2 | 3 | #include "test.inc" 4 | 5 | int main(int argc, char **argv) 6 | { 7 | if (argc == 2 && strcmp(argv[1], "test") == 0) { 8 | execute_test(); 9 | return 0; 10 | } 11 | 12 | if (argc < 3) goto usage; 13 | 14 | Vector *objs = new_vector(); 15 | for (int i = 1; i < argc - 1; i++) vector_push_back(objs, argv[i]); 16 | 17 | ExeImage *exe = link_objs(objs); 18 | 19 | FILE *fh = fopen(argv[argc - 1], "wb"); 20 | dump_exe_image(exe, fh); 21 | fclose(fh); 22 | 23 | return 0; 24 | 25 | usage: 26 | error("Usage: ld input-obj-file-path... output-exe-file-path"); 27 | } 28 | -------------------------------------------------------------------------------- /test/testutil.c: -------------------------------------------------------------------------------- 1 | int ret0() { return 0; } 2 | int ret1() { return 1; } 3 | int add1(int s) { return s + 1; } 4 | int add_all(int a, int b, int c, int d, int e, int f, int g, int h) 5 | { 6 | return h - (a + b + c + d + e + f + g); 7 | } 8 | int add_two(int a, int b) { return a + b; } 9 | 10 | //#include 11 | void *malloc(int size); 12 | int *alloc4(int **p) 13 | { 14 | *p = malloc(sizeof(int) * 4); 15 | (*p)[0] = 10; 16 | (*p)[1] = 11; 17 | (*p)[2] = 12; 18 | (*p)[3] = 13; 19 | 20 | return *p; 21 | } 22 | 23 | typedef int Number; 24 | Number test343inc_var = 343; 25 | void test343inc() { test343inc_var++; } 26 | 27 | Number test344inc_var = 344; 28 | void test344inc() { test344inc_var++; } 29 | -------------------------------------------------------------------------------- /as/string_builder.c: -------------------------------------------------------------------------------- 1 | #include "as.h" 2 | 3 | struct StringBuilder { 4 | Vector *data; 5 | }; 6 | 7 | StringBuilder *new_string_builder() 8 | { 9 | StringBuilder *sb = safe_malloc(sizeof(StringBuilder)); 10 | sb->data = new_vector(); 11 | return sb; 12 | } 13 | 14 | char string_builder_append(StringBuilder *sb, char ch) 15 | { 16 | vector_push_back(sb->data, (void *)ch); 17 | return ch; 18 | } 19 | 20 | char *string_builder_get(StringBuilder *sb) 21 | { 22 | int size = vector_size(sb->data); 23 | char *ret = safe_malloc(size + 1); 24 | for (int i = 0; i < size; i++) ret[i] = (char)vector_get(sb->data, i); 25 | ret[size] = '\0'; 26 | return ret; 27 | } 28 | 29 | int string_builder_size(StringBuilder *sb) { return vector_size(sb->data) + 1; } 30 | -------------------------------------------------------------------------------- /cc/string_builder.c: -------------------------------------------------------------------------------- 1 | #include "cc.h" 2 | 3 | struct StringBuilder { 4 | Vector *data; 5 | }; 6 | 7 | StringBuilder *new_string_builder() 8 | { 9 | StringBuilder *sb = safe_malloc(sizeof(StringBuilder)); 10 | sb->data = new_vector(); 11 | return sb; 12 | } 13 | 14 | char string_builder_append(StringBuilder *sb, char ch) 15 | { 16 | vector_push_back(sb->data, (void *)ch); 17 | return ch; 18 | } 19 | 20 | char *string_builder_get(StringBuilder *sb) 21 | { 22 | int size = vector_size(sb->data); 23 | char *ret = safe_malloc(size + 1); 24 | for (int i = 0; i < size; i++) ret[i] = (char)vector_get(sb->data, i); 25 | ret[size] = '\0'; 26 | return ret; 27 | } 28 | 29 | int string_builder_size(StringBuilder *sb) { return vector_size(sb->data) + 1; } 30 | -------------------------------------------------------------------------------- /ld/string_builder.c: -------------------------------------------------------------------------------- 1 | #include "ld.h" 2 | 3 | struct StringBuilder { 4 | Vector *data; 5 | }; 6 | 7 | StringBuilder *new_string_builder() 8 | { 9 | StringBuilder *sb = safe_malloc(sizeof(StringBuilder)); 10 | sb->data = new_vector(); 11 | return sb; 12 | } 13 | 14 | char string_builder_append(StringBuilder *sb, char ch) 15 | { 16 | vector_push_back(sb->data, (void *)ch); 17 | return ch; 18 | } 19 | 20 | char *string_builder_get(StringBuilder *sb) 21 | { 22 | int size = vector_size(sb->data); 23 | char *ret = safe_malloc(size + 1); 24 | for (int i = 0; i < size; i++) ret[i] = (char)vector_get(sb->data, i); 25 | ret[size] = '\0'; 26 | return ret; 27 | } 28 | 29 | int string_builder_size(StringBuilder *sb) { return vector_size(sb->data) + 1; } 30 | -------------------------------------------------------------------------------- /test/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function fail(){ 4 | echo -ne "\e[1;31m[ERROR]\e[0m " 5 | echo "$1" 6 | exit 1 7 | } 8 | 9 | $AQCC_CC test 10 | [ $? -eq 0 ] || fail "$AQCC_CC test" 11 | 12 | $AQCC_AS test 13 | [ $? -eq 0 ] || fail "$AQCC_AS test" 14 | 15 | $AQCC_LD test 16 | [ $? -eq 0 ] || fail "$AQCC_LD test" 17 | 18 | AQCC=../aqcc 19 | 20 | $AQCC test_define.c testutil.c stdlib.c system.s -o _test_define_exe.o -v 21 | [ $? -eq 0 ] || fail "$AQCC" 22 | ./_test_define_exe.o 23 | [ $? -eq 0 ] || fail "./_test_define_exe.o" 24 | 25 | gcc -E -P test.c -o _test.c 26 | $AQCC _test.c testutil.c stdlib.c system.s -o _test_exe.o -v 27 | [ $? -eq 0 ] || fail "$AQCC" 28 | ./_test_exe.o 29 | [ $? -eq 0 ] || fail "./_test_exe.o" 30 | 31 | $AQCC test_link.c test_link2.c test_link.s test_link2.s -o _test_exe.o -v 32 | [ $? -eq 0 ] || fail "$AQCC" 33 | ./_test_exe.o 34 | [ $? -eq 1 ] || fail "./_test_exe.o (link)" 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | 10 | # Linker output 11 | *.ilk 12 | *.map 13 | *.exp 14 | 15 | # Precompiled Headers 16 | *.gch 17 | *.pch 18 | 19 | # Libraries 20 | *.lib 21 | *.a 22 | *.la 23 | *.lo 24 | 25 | # Shared objects (inc. Windows DLLs) 26 | *.dll 27 | *.so 28 | *.so.* 29 | *.dylib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | *.i*86 36 | *.x86_64 37 | *.hex 38 | 39 | # Debug files 40 | *.dSYM/ 41 | *.su 42 | *.idb 43 | *.pdb 44 | 45 | # Kernel Module Compile Results 46 | *.mod* 47 | *.cmd 48 | .tmp_versions/ 49 | modules.order 50 | Module.symvers 51 | Mkfile.old 52 | dkms.conf 53 | 54 | # written for aqcc 55 | _test* 56 | *.~*~ 57 | main 58 | aqcc_detail 59 | *.self.s 60 | *.selfself.s 61 | tags 62 | aqcc_self_detail 63 | aqcc_selfself_detail 64 | examples/nqueen/main.s 65 | examples/nqueen/nqueen 66 | __self_sort.in 67 | __selfself_sort.in 68 | cc/cc 69 | as/as 70 | ld/ld 71 | test/bin 72 | -------------------------------------------------------------------------------- /cc/main.c: -------------------------------------------------------------------------------- 1 | #include "cc.h" 2 | 3 | #include "test.inc" 4 | 5 | int main(int argc, char **argv) 6 | { 7 | if (argc == 2 && strcmp(argv[1], "test") == 0) { 8 | execute_test(); 9 | return 0; 10 | } 11 | 12 | if (argc != 3) goto usage; 13 | 14 | char *infile = argv[1], *outfile = argv[2]; 15 | 16 | Vector *tokens = read_tokens_from_filepath(infile); 17 | tokens = preprocess_tokens(tokens); 18 | tokens = concatenate_string_literal_tokens(tokens); 19 | 20 | Vector *asts = parse_prog(tokens); 21 | 22 | Env *env = analyze_ast(asts); 23 | x86_64_optimize_asts_constant(asts, env); 24 | 25 | Vector *code = x86_64_generate_code(asts); 26 | code = x86_64_optimize_code(code); 27 | 28 | FILE *fh = fopen(outfile, "wb"); 29 | for (int i = 0; i < vector_size(code); i++) 30 | dump_code((Code *)vector_get(code, i), fh); 31 | fclose(fh); 32 | 33 | return 0; 34 | 35 | usage: 36 | error("Usage: cc input-c-file-path output-asm-file-path"); 37 | } 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Ushitora Anqou 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /as/map.c: -------------------------------------------------------------------------------- 1 | #include "as.h" 2 | 3 | struct KeyValue { 4 | const char *key; 5 | void *value; 6 | }; 7 | 8 | struct Map { 9 | Vector *data; 10 | }; 11 | 12 | Map *new_map() 13 | { 14 | Map *map = safe_malloc(sizeof(Map)); 15 | map->data = new_vector(); 16 | return map; 17 | } 18 | 19 | int map_size(Map *map) { return vector_size(map->data); } 20 | 21 | KeyValue *map_insert(Map *map, const char *key, void *item) 22 | { 23 | KeyValue *kv = safe_malloc(sizeof(KeyValue)); 24 | kv->key = key; 25 | kv->value = item; 26 | vector_push_back(map->data, kv); 27 | return kv; 28 | } 29 | 30 | KeyValue *map_lookup(Map *map, const char *key) 31 | { 32 | int i; 33 | 34 | for (i = 0; i < vector_size(map->data); i++) { 35 | KeyValue *kv = (KeyValue *)vector_get(map->data, i); 36 | if (strcmp(kv->key, key) == 0) return kv; 37 | } 38 | 39 | return NULL; 40 | } 41 | 42 | const char *kv_key(KeyValue *kv) 43 | { 44 | if (kv == NULL) return NULL; 45 | return kv->key; 46 | } 47 | 48 | void *kv_value(KeyValue *kv) 49 | { 50 | if (kv == NULL) return NULL; 51 | return kv->value; 52 | } 53 | -------------------------------------------------------------------------------- /cc/map.c: -------------------------------------------------------------------------------- 1 | #include "cc.h" 2 | 3 | struct KeyValue { 4 | const char *key; 5 | void *value; 6 | }; 7 | 8 | struct Map { 9 | Vector *data; 10 | }; 11 | 12 | Map *new_map() 13 | { 14 | Map *map = safe_malloc(sizeof(Map)); 15 | map->data = new_vector(); 16 | return map; 17 | } 18 | 19 | int map_size(Map *map) { return vector_size(map->data); } 20 | 21 | KeyValue *map_insert(Map *map, const char *key, void *item) 22 | { 23 | KeyValue *kv = safe_malloc(sizeof(KeyValue)); 24 | kv->key = key; 25 | kv->value = item; 26 | vector_push_back(map->data, kv); 27 | return kv; 28 | } 29 | 30 | KeyValue *map_lookup(Map *map, const char *key) 31 | { 32 | int i; 33 | 34 | for (i = 0; i < vector_size(map->data); i++) { 35 | KeyValue *kv = (KeyValue *)vector_get(map->data, i); 36 | if (strcmp(kv->key, key) == 0) return kv; 37 | } 38 | 39 | return NULL; 40 | } 41 | 42 | const char *kv_key(KeyValue *kv) 43 | { 44 | if (kv == NULL) return NULL; 45 | return kv->key; 46 | } 47 | 48 | void *kv_value(KeyValue *kv) 49 | { 50 | if (kv == NULL) return NULL; 51 | return kv->value; 52 | } 53 | -------------------------------------------------------------------------------- /ld/map.c: -------------------------------------------------------------------------------- 1 | #include "ld.h" 2 | 3 | struct KeyValue { 4 | const char *key; 5 | void *value; 6 | }; 7 | 8 | struct Map { 9 | Vector *data; 10 | }; 11 | 12 | Map *new_map() 13 | { 14 | Map *map = safe_malloc(sizeof(Map)); 15 | map->data = new_vector(); 16 | return map; 17 | } 18 | 19 | int map_size(Map *map) { return vector_size(map->data); } 20 | 21 | KeyValue *map_insert(Map *map, const char *key, void *item) 22 | { 23 | KeyValue *kv = safe_malloc(sizeof(KeyValue)); 24 | kv->key = key; 25 | kv->value = item; 26 | vector_push_back(map->data, kv); 27 | return kv; 28 | } 29 | 30 | KeyValue *map_lookup(Map *map, const char *key) 31 | { 32 | int i; 33 | 34 | for (i = 0; i < vector_size(map->data); i++) { 35 | KeyValue *kv = (KeyValue *)vector_get(map->data, i); 36 | if (strcmp(kv->key, key) == 0) return kv; 37 | } 38 | 39 | return NULL; 40 | } 41 | 42 | const char *kv_key(KeyValue *kv) 43 | { 44 | if (kv == NULL) return NULL; 45 | return kv->key; 46 | } 47 | 48 | void *kv_value(KeyValue *kv) 49 | { 50 | if (kv == NULL) return NULL; 51 | return kv->value; 52 | } 53 | -------------------------------------------------------------------------------- /as/vector.c: -------------------------------------------------------------------------------- 1 | #include "as.h" 2 | 3 | struct Vector { 4 | int size, rsved_size; 5 | void **data; 6 | }; 7 | 8 | Vector *new_vector() 9 | { 10 | Vector *ret; 11 | 12 | ret = safe_malloc(sizeof(Vector)); 13 | ret->size = 0; 14 | ret->rsved_size = 1; 15 | ret->data = NULL; 16 | return ret; 17 | } 18 | 19 | Vector *new_vector_from_scalar(void *scalar) 20 | { 21 | Vector *vec = new_vector(); 22 | vector_push_back(vec, scalar); 23 | return vec; 24 | } 25 | 26 | int vector_size(Vector *vec) { return vec->size; } 27 | 28 | void vector_push_back(Vector *vec, void *item) 29 | { 30 | if (vec->data == NULL || vec->size == vec->rsved_size) { 31 | vec->rsved_size *= 2; 32 | void **ndata = (void **)safe_malloc(sizeof(void *) * vec->rsved_size); 33 | memcpy(ndata, vec->data, vec->size * sizeof(void *)); 34 | vec->data = ndata; 35 | } 36 | 37 | vec->data[vec->size++] = item; 38 | } 39 | 40 | void *vector_get(Vector *vec, int i) 41 | { 42 | if (i >= vec->size) return NULL; 43 | return vec->data[i]; 44 | } 45 | 46 | void *vector_set(Vector *vec, int i, void *item) 47 | { 48 | assert(vec != NULL && i < vector_size(vec)); 49 | vec->data[i] = item; 50 | return item; 51 | } 52 | 53 | void vector_push_back_vector(Vector *vec, Vector *src) 54 | { 55 | for (int i = 0; i < vector_size(src); i++) 56 | vector_push_back(vec, vector_get(src, i)); 57 | } 58 | 59 | Vector *clone_vector(Vector *src) 60 | { 61 | Vector *vec = new_vector(); 62 | vector_push_back_vector(vec, src); 63 | return vec; 64 | } 65 | -------------------------------------------------------------------------------- /cc/vector.c: -------------------------------------------------------------------------------- 1 | #include "cc.h" 2 | 3 | struct Vector { 4 | int size, rsved_size; 5 | void **data; 6 | }; 7 | 8 | Vector *new_vector() 9 | { 10 | Vector *ret; 11 | 12 | ret = safe_malloc(sizeof(Vector)); 13 | ret->size = 0; 14 | ret->rsved_size = 1; 15 | ret->data = NULL; 16 | return ret; 17 | } 18 | 19 | Vector *new_vector_from_scalar(void *scalar) 20 | { 21 | Vector *vec = new_vector(); 22 | vector_push_back(vec, scalar); 23 | return vec; 24 | } 25 | 26 | int vector_size(Vector *vec) { return vec->size; } 27 | 28 | void vector_push_back(Vector *vec, void *item) 29 | { 30 | if (vec->data == NULL || vec->size == vec->rsved_size) { 31 | vec->rsved_size *= 2; 32 | void **ndata = (void **)safe_malloc(sizeof(void *) * vec->rsved_size); 33 | memcpy(ndata, vec->data, vec->size * sizeof(void *)); 34 | vec->data = ndata; 35 | } 36 | 37 | vec->data[vec->size++] = item; 38 | } 39 | 40 | void *vector_get(Vector *vec, int i) 41 | { 42 | if (i >= vec->size) return NULL; 43 | return vec->data[i]; 44 | } 45 | 46 | void *vector_set(Vector *vec, int i, void *item) 47 | { 48 | assert(vec != NULL && i < vector_size(vec)); 49 | vec->data[i] = item; 50 | return item; 51 | } 52 | 53 | void vector_push_back_vector(Vector *vec, Vector *src) 54 | { 55 | for (int i = 0; i < vector_size(src); i++) 56 | vector_push_back(vec, vector_get(src, i)); 57 | } 58 | 59 | Vector *clone_vector(Vector *src) 60 | { 61 | Vector *vec = new_vector(); 62 | vector_push_back_vector(vec, src); 63 | return vec; 64 | } 65 | -------------------------------------------------------------------------------- /ld/vector.c: -------------------------------------------------------------------------------- 1 | #include "ld.h" 2 | 3 | struct Vector { 4 | int size, rsved_size; 5 | void **data; 6 | }; 7 | 8 | Vector *new_vector() 9 | { 10 | Vector *ret; 11 | 12 | ret = safe_malloc(sizeof(Vector)); 13 | ret->size = 0; 14 | ret->rsved_size = 1; 15 | ret->data = NULL; 16 | return ret; 17 | } 18 | 19 | Vector *new_vector_from_scalar(void *scalar) 20 | { 21 | Vector *vec = new_vector(); 22 | vector_push_back(vec, scalar); 23 | return vec; 24 | } 25 | 26 | int vector_size(Vector *vec) { return vec->size; } 27 | 28 | void vector_push_back(Vector *vec, void *item) 29 | { 30 | if (vec->data == NULL || vec->size == vec->rsved_size) { 31 | vec->rsved_size *= 2; 32 | void **ndata = (void **)safe_malloc(sizeof(void *) * vec->rsved_size); 33 | memcpy(ndata, vec->data, vec->size * sizeof(void *)); 34 | vec->data = ndata; 35 | } 36 | 37 | vec->data[vec->size++] = item; 38 | } 39 | 40 | void *vector_get(Vector *vec, int i) 41 | { 42 | if (i >= vec->size) return NULL; 43 | return vec->data[i]; 44 | } 45 | 46 | void *vector_set(Vector *vec, int i, void *item) 47 | { 48 | assert(vec != NULL && i < vector_size(vec)); 49 | vec->data[i] = item; 50 | return item; 51 | } 52 | 53 | void vector_push_back_vector(Vector *vec, Vector *src) 54 | { 55 | for (int i = 0; i < vector_size(src); i++) 56 | vector_push_back(vec, vector_get(src, i)); 57 | } 58 | 59 | Vector *clone_vector(Vector *src) 60 | { 61 | Vector *vec = new_vector(); 62 | vector_push_back_vector(vec, src); 63 | return vec; 64 | } 65 | -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | AQCC_CC=../cc/cc 2 | AQCC_AS=../as/as 3 | AQCC_LD=../ld/ld 4 | AQCC_CC_SELF=bin/cc_self 5 | AQCC_AS_SELF=bin/as_self 6 | AQCC_LD_SELF=bin/ld_self 7 | AQCC_CC_SELFSELF=bin/cc_selfself 8 | AQCC_AS_SELFSELF=bin/as_selfself 9 | AQCC_LD_SELFSELF=bin/ld_selfself 10 | AQCC_ENV=\ 11 | AQCC_CC=$(realpath $(AQCC_CC))\ 12 | AQCC_AS=$(realpath $(AQCC_AS))\ 13 | AQCC_LD=$(realpath $(AQCC_LD)) 14 | AQCC_SELF_ENV=\ 15 | AQCC_CC=$(realpath $(AQCC_CC_SELF))\ 16 | AQCC_AS=$(realpath $(AQCC_AS_SELF))\ 17 | AQCC_LD=$(realpath $(AQCC_LD_SELF)) 18 | AQCC_SELFSELF_ENV=\ 19 | AQCC_CC=$(realpath $(AQCC_CC_SELFSELF))\ 20 | AQCC_AS=$(realpath $(AQCC_AS_SELFSELF))\ 21 | AQCC_LD=$(realpath $(AQCC_LD_SELFSELF)) 22 | 23 | test: $(AQCC_CC) $(AQCC_AS) $(AQCC_LD) 24 | $(AQCC_ENV) ./test.sh 25 | 26 | self_test: $(AQCC_CC_SELF) $(AQCC_AS_SELF) $(AQCC_LD_SELF) 27 | $(AQCC_SELF_ENV) ./test.sh 28 | 29 | selfself_test: $(AQCC_CC_SELFSELF) $(AQCC_AS_SELFSELF) $(AQCC_LD_SELFSELF) 30 | $(AQCC_SELFSELF_ENV) ./test.sh 31 | cmp $(AQCC_CC_SELF) $(AQCC_CC_SELFSELF) 32 | cmp $(AQCC_AS_SELF) $(AQCC_AS_SELFSELF) 33 | cmp $(AQCC_LD_SELF) $(AQCC_LD_SELFSELF) 34 | 35 | $(AQCC_CC): 36 | cd ../cc && make 37 | 38 | $(AQCC_AS): 39 | cd ../as && make 40 | 41 | $(AQCC_LD): 42 | cd ../ld && make 43 | 44 | $(AQCC_CC_SELF): $(AQCC_CC) $(AQCC_AS) $(AQCC_LD) 45 | mkdir -p bin 46 | cd ../cc && make CC=../aqcc FLAGS=-v TARGET=../test/$@ $(AQCC_ENV) 47 | 48 | $(AQCC_AS_SELF): $(AQCC_CC) $(AQCC_AS) $(AQCC_LD) 49 | mkdir -p bin 50 | cd ../as && make CC=../aqcc FLAGS=-v TARGET=../test/$@ $(AQCC_ENV) 51 | 52 | $(AQCC_LD_SELF): $(AQCC_CC) $(AQCC_AS) $(AQCC_LD) 53 | mkdir -p bin 54 | cd ../ld && make CC=../aqcc FLAGS=-v TARGET=../test/$@ $(AQCC_ENV) 55 | 56 | $(AQCC_CC_SELFSELF): $(AQCC_CC_SELF) $(AQCC_AS_SELF) $(AQCC_LD_SELF) 57 | mkdir -p bin 58 | cd ../cc && make CC=../aqcc FLAGS=-v TARGET=../test/$@ $(AQCC_SELF_ENV) 59 | 60 | $(AQCC_AS_SELFSELF): $(AQCC_CC_SELF) $(AQCC_AS_SELF) $(AQCC_LD_SELF) 61 | mkdir -p bin 62 | cd ../as && make CC=../aqcc FLAGS=-v TARGET=../test/$@ $(AQCC_SELF_ENV) 63 | 64 | $(AQCC_LD_SELFSELF): $(AQCC_CC_SELF) $(AQCC_AS_SELF) $(AQCC_LD_SELF) 65 | mkdir -p bin 66 | cd ../ld && make CC=../aqcc FLAGS=-v TARGET=../test/$@ $(AQCC_SELF_ENV) 67 | 68 | clean: 69 | rm -rf bin/ 70 | rm -rf _test.c _test_define_exe.o _test_exe.o 71 | 72 | .PHONY: test self_test selfself_test $(AQCC_CC) $(AQCC_AS) $(AQCC_LD) 73 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AnQou C Compiler: aqcc 2 | 3 | aqcc is yet another tiny self-hosted C compiler with an also tiny assembler, 4 | linker and standard C library. 5 | 6 | ## To Build 7 | 8 | Just `make`. 9 | 10 | ## Usage 11 | 12 | `./aqcc [options] file...` 13 | 14 | `options` are: 15 | 16 | - `-S`: output an assembly file. 17 | - `-c`: output an object file. 18 | - `-o`: set the output file name. 19 | 20 | To find the detail, try `make selfself_test`, which tells you all the things. 21 | 22 | ## Note 23 | 24 | - aqcc is a product of [Security Camp 2018](https://www.ipa.go.jp/jinzai/camp/2018/zenkoku2018_index.html). 25 | Special thanks to @rui314 san and @hikalium san. 26 | - Many features, functions and so on have not yet been implemented in aqcc 27 | that a C compiler generally has. 28 | Feel free to implement missing features and to send pull requests :) 29 | 30 | 31 | --------- 32 | 33 | AnQou C Compiler の使い方 34 | 35 | ## aqcc をビルド 36 | 37 | `make` としてください。生成物を削除したいときは、 `make clean` などとしてください。 38 | 39 | ## aqcc の挙動をテスト 40 | 41 | - `make test` 42 | - 第一世代(gccなどによりコンパイルされたaqcc)をテスト 43 | - `make self_test` 44 | - 第二世代(第一世代によりコンパイルされたaqcc)をテスト 45 | - `make selfself_test` 46 | - 第三世代(第二世代によりコンパイルされたaqcc)をテスト 47 | - 第二世代と第三世代に違いがないことも確認されます。 48 | 49 | ## 一般のCファイルをコンパイル 50 | 51 | `./aqcc [options] file...` 52 | 53 | `options` には以下のようなものを使用できます。 54 | 55 | - `-S` 56 | アセンブリファイルを出力します。 57 | - `-c` 58 | オブジェクトファイルを出力します。 59 | - `-o out` 60 | 出力ファイル名を指定できます。 61 | 62 | `program.c` を以下のようにしてコンパイルし、実行できます。 63 | `aqcc` や `program.c` などは適宜読みかえてください。 64 | 65 | ``` 66 | $ ./aqcc program.c -o program 67 | $ ./program 68 | ``` 69 | 70 | なお、`#include ` などとはできません。 71 | `program.c` の中にこのような構文が含まれている場合は、取り除いて下さい。 72 | その代わりに、自前で `puts()` 関数などの プロトタイプ宣言を `program.c` の冒頭に加えてください。 73 | なお、カレントディレクトリ内のファイルはインクルードできますので、 `#include "aqcc.h"` などとインクルードして、 74 | `aqcc.h` に記されているプロトタイプ宣言を流用できます。 75 | 76 | また、標準ライブラリのうち提供されている機能はごく僅かです。 77 | `stdlib.c` などを参照して下さい。aqccが自動でこれらをリンクすることはありません。明示的に指定してください。 78 | 79 | ## 謝辞 80 | 81 | aqccは[セキュリティキャンプ全国大会2018](https://www.ipa.go.jp/jinzai/camp/2018/zenkoku2018_index.html)の成果物です。 82 | 講師の@rui314さんと@hikaliumさんに深く感謝申し上げます。 83 | 84 | ## 資料 85 | 86 | - [セルフホストCコンパイラaqcc 開発記](https://anqou.net/poc/2018/08/21/post-1853/) 87 | - セキュリティキャンプの前日にセルフホストを達成するまでのaqcc開発日記。 88 | - [ seccamp2018でセルフホストCコンパイラをつくった](https://speakerdeck.com/anqou/seccamp2018deseruhuhosutockonpairawotukututa) 89 | - Kernel/VM関西9回目での発表資料。 90 | - [ベストオブ頭おかしい発表に選ばれました](https://twitter.com/kernelvm/status/1044153390060625920)。ありがとうございます。 91 | 92 | -------------------------------------------------------------------------------- /as/test.inc: -------------------------------------------------------------------------------- 1 | //#include 2 | //#include 3 | 4 | void test_vector(int n) 5 | { 6 | Vector *vec = new_vector(); 7 | int i = 0; 8 | 9 | for (i = 0; i < n; i++) { 10 | int *d = safe_malloc(sizeof(int)); 11 | *d = i; 12 | vector_push_back(vec, d); 13 | } 14 | 15 | assert(vector_size(vec) == n); 16 | 17 | for (i = 0; i < n; i++) { 18 | int *d = (int *)vector_get(vec, i); 19 | vector_push_back(vec, d); 20 | assert(*d == i); 21 | } 22 | 23 | vector_set(vec, n - 1, new_int(1)); 24 | assert(*(int*)vector_get(vec, n - 1) == 1); 25 | 26 | // out-of-range access should return NULL. 27 | assert(vector_get(vec, vector_size(vec)) == NULL); 28 | 29 | Vector *vec2 = new_vector_from_scalar(new_int(100)); 30 | assert(*(int*)vector_get(vec2, 0) == 100); 31 | 32 | vector_push_back_vector(vec, vec2); 33 | assert(*(int*)vector_get(vec, vector_size(vec) - 1) == 100); 34 | 35 | Vector *vec3 = clone_vector(vec2); 36 | assert(*(int*)vector_get(vec3, 0) == 100); 37 | } 38 | 39 | void test_map() 40 | { 41 | Map *map = new_map(); 42 | const char *key[4]; 43 | key[0] = "key0"; 44 | key[1] = "key1"; 45 | key[2] = "key2"; 46 | key[3] = "key3"; 47 | int data[4]; 48 | data[0] = 0; 49 | data[1] = 1; 50 | data[2] = 2; 51 | data[3] = 3; 52 | 53 | for (int i = 0; i < 4; i++) { 54 | KeyValue *kv = map_insert(map, key[i], &data[i]); 55 | assert(strcmp(kv_key(kv), key[i]) == 0); 56 | assert(*(int *)(kv_value(kv)) == data[i]); 57 | } 58 | 59 | assert(map_size(map) == sizeof(data) / sizeof(int)); 60 | 61 | for (int i = 0; i < 4; i++) { 62 | KeyValue *kv = map_lookup(map, key[i]); 63 | assert(strcmp(kv_key(kv), key[i]) == 0); 64 | assert(*(int *)kv_value(kv) == data[i]); 65 | } 66 | } 67 | 68 | void test_string_builder() 69 | { 70 | StringBuilder *sb; 71 | char *str; 72 | 73 | sb = new_string_builder(); 74 | string_builder_append(sb, 'a'); 75 | string_builder_append(sb, 'b'); 76 | string_builder_append(sb, 'c'); 77 | 78 | str = string_builder_get(sb); 79 | assert(str[0] == 'a'); 80 | assert(str[1] == 'b'); 81 | assert(str[2] == 'c'); 82 | assert(str[3] == '\0'); 83 | } 84 | 85 | void test_escape_char() 86 | { 87 | assert(unescape_char('a') == '\a'); 88 | assert(unescape_char('n') == '\n'); 89 | assert(unescape_char('s') == 's'); 90 | } 91 | 92 | void execute_test() 93 | { 94 | test_vector(10); 95 | test_map(); 96 | test_string_builder(); 97 | test_escape_char(); 98 | } 99 | -------------------------------------------------------------------------------- /cc/test.inc: -------------------------------------------------------------------------------- 1 | //#include 2 | //#include 3 | 4 | void test_vector(int n) 5 | { 6 | Vector *vec = new_vector(); 7 | int i = 0; 8 | 9 | for (i = 0; i < n; i++) { 10 | int *d = safe_malloc(sizeof(int)); 11 | *d = i; 12 | vector_push_back(vec, d); 13 | } 14 | 15 | assert(vector_size(vec) == n); 16 | 17 | for (i = 0; i < n; i++) { 18 | int *d = (int *)vector_get(vec, i); 19 | vector_push_back(vec, d); 20 | assert(*d == i); 21 | } 22 | 23 | vector_set(vec, n - 1, new_int(1)); 24 | assert(*(int*)vector_get(vec, n - 1) == 1); 25 | 26 | // out-of-range access should return NULL. 27 | assert(vector_get(vec, vector_size(vec)) == NULL); 28 | 29 | Vector *vec2 = new_vector_from_scalar(new_int(100)); 30 | assert(*(int*)vector_get(vec2, 0) == 100); 31 | 32 | vector_push_back_vector(vec, vec2); 33 | assert(*(int*)vector_get(vec, vector_size(vec) - 1) == 100); 34 | 35 | Vector *vec3 = clone_vector(vec2); 36 | assert(*(int*)vector_get(vec3, 0) == 100); 37 | } 38 | 39 | void test_map() 40 | { 41 | Map *map = new_map(); 42 | const char *key[4]; 43 | key[0] = "key0"; 44 | key[1] = "key1"; 45 | key[2] = "key2"; 46 | key[3] = "key3"; 47 | int data[4]; 48 | data[0] = 0; 49 | data[1] = 1; 50 | data[2] = 2; 51 | data[3] = 3; 52 | 53 | for (int i = 0; i < 4; i++) { 54 | KeyValue *kv = map_insert(map, key[i], &data[i]); 55 | assert(strcmp(kv_key(kv), key[i]) == 0); 56 | assert(*(int *)(kv_value(kv)) == data[i]); 57 | } 58 | 59 | assert(map_size(map) == sizeof(data) / sizeof(int)); 60 | 61 | for (int i = 0; i < 4; i++) { 62 | KeyValue *kv = map_lookup(map, key[i]); 63 | assert(strcmp(kv_key(kv), key[i]) == 0); 64 | assert(*(int *)kv_value(kv) == data[i]); 65 | } 66 | } 67 | 68 | void test_string_builder() 69 | { 70 | StringBuilder *sb; 71 | char *str; 72 | 73 | sb = new_string_builder(); 74 | string_builder_append(sb, 'a'); 75 | string_builder_append(sb, 'b'); 76 | string_builder_append(sb, 'c'); 77 | 78 | str = string_builder_get(sb); 79 | assert(str[0] == 'a'); 80 | assert(str[1] == 'b'); 81 | assert(str[2] == 'c'); 82 | assert(str[3] == '\0'); 83 | } 84 | 85 | void test_escape_char() 86 | { 87 | assert(unescape_char('a') == '\a'); 88 | assert(unescape_char('n') == '\n'); 89 | assert(unescape_char('s') == 's'); 90 | } 91 | 92 | void execute_test() 93 | { 94 | test_vector(10); 95 | test_map(); 96 | test_string_builder(); 97 | test_escape_char(); 98 | } 99 | -------------------------------------------------------------------------------- /ld/test.inc: -------------------------------------------------------------------------------- 1 | //#include 2 | //#include 3 | 4 | void test_vector(int n) 5 | { 6 | Vector *vec = new_vector(); 7 | int i = 0; 8 | 9 | for (i = 0; i < n; i++) { 10 | int *d = safe_malloc(sizeof(int)); 11 | *d = i; 12 | vector_push_back(vec, d); 13 | } 14 | 15 | assert(vector_size(vec) == n); 16 | 17 | for (i = 0; i < n; i++) { 18 | int *d = (int *)vector_get(vec, i); 19 | vector_push_back(vec, d); 20 | assert(*d == i); 21 | } 22 | 23 | vector_set(vec, n - 1, new_int(1)); 24 | assert(*(int*)vector_get(vec, n - 1) == 1); 25 | 26 | // out-of-range access should return NULL. 27 | assert(vector_get(vec, vector_size(vec)) == NULL); 28 | 29 | Vector *vec2 = new_vector_from_scalar(new_int(100)); 30 | assert(*(int*)vector_get(vec2, 0) == 100); 31 | 32 | vector_push_back_vector(vec, vec2); 33 | assert(*(int*)vector_get(vec, vector_size(vec) - 1) == 100); 34 | 35 | Vector *vec3 = clone_vector(vec2); 36 | assert(*(int*)vector_get(vec3, 0) == 100); 37 | } 38 | 39 | void test_map() 40 | { 41 | Map *map = new_map(); 42 | const char *key[4]; 43 | key[0] = "key0"; 44 | key[1] = "key1"; 45 | key[2] = "key2"; 46 | key[3] = "key3"; 47 | int data[4]; 48 | data[0] = 0; 49 | data[1] = 1; 50 | data[2] = 2; 51 | data[3] = 3; 52 | 53 | for (int i = 0; i < 4; i++) { 54 | KeyValue *kv = map_insert(map, key[i], &data[i]); 55 | assert(strcmp(kv_key(kv), key[i]) == 0); 56 | assert(*(int *)(kv_value(kv)) == data[i]); 57 | } 58 | 59 | assert(map_size(map) == sizeof(data) / sizeof(int)); 60 | 61 | for (int i = 0; i < 4; i++) { 62 | KeyValue *kv = map_lookup(map, key[i]); 63 | assert(strcmp(kv_key(kv), key[i]) == 0); 64 | assert(*(int *)kv_value(kv) == data[i]); 65 | } 66 | } 67 | 68 | void test_string_builder() 69 | { 70 | StringBuilder *sb; 71 | char *str; 72 | 73 | sb = new_string_builder(); 74 | string_builder_append(sb, 'a'); 75 | string_builder_append(sb, 'b'); 76 | string_builder_append(sb, 'c'); 77 | 78 | str = string_builder_get(sb); 79 | assert(str[0] == 'a'); 80 | assert(str[1] == 'b'); 81 | assert(str[2] == 'c'); 82 | assert(str[3] == '\0'); 83 | } 84 | 85 | void test_escape_char() 86 | { 87 | assert(unescape_char('a') == '\a'); 88 | assert(unescape_char('n') == '\n'); 89 | assert(unescape_char('s') == 's'); 90 | } 91 | 92 | void execute_test() 93 | { 94 | test_vector(10); 95 | test_map(); 96 | test_string_builder(); 97 | test_escape_char(); 98 | } 99 | -------------------------------------------------------------------------------- /cc/type.c: -------------------------------------------------------------------------------- 1 | #include "cc.h" 2 | 3 | enum { 4 | // Relative size of types. Only the order is important. 5 | SIZE_VOID = -100, 6 | SIZE_CHAR = -10, 7 | SIZE_INT, 8 | SIZE_PTR, 9 | SIZE_UNK = -1, 10 | }; 11 | 12 | Type *new_type(int kind, int nbytes) 13 | { 14 | Type *type = safe_malloc(sizeof(Type)); 15 | type->kind = kind; 16 | type->nbytes = nbytes; 17 | type->is_static = type->is_extern = 0; 18 | return type; 19 | } 20 | 21 | void move_static_extern_specifier(Type *src, Type *dst) 22 | { 23 | if (src->is_static) dst->is_static = 1; 24 | if (src->is_extern) dst->is_extern = 1; 25 | src->is_static = src->is_extern = 0; 26 | } 27 | 28 | Type *type_int() 29 | { 30 | static Type *type = NULL; 31 | if (type == NULL) type = new_type(TY_INT, SIZE_INT); 32 | 33 | return type; 34 | } 35 | 36 | Type *type_char() 37 | { 38 | static Type *type = NULL; 39 | if (type == NULL) type = new_type(TY_CHAR, SIZE_CHAR); 40 | 41 | return type; 42 | } 43 | 44 | Type *type_void() 45 | { 46 | static Type *type = NULL; 47 | if (type == NULL) type = new_type(TY_VOID, SIZE_VOID); 48 | return type; 49 | } 50 | 51 | Type *new_pointer_type(Type *src) 52 | { 53 | Type *type = new_type(TY_PTR, SIZE_PTR); 54 | type->ptr_of = src; 55 | return type; 56 | } 57 | 58 | Type *new_array_type(Type *src, int len) 59 | { 60 | Type *type = new_type(TY_ARY, src->nbytes * len); 61 | type->ary_of = src; 62 | type->len = len; 63 | return type; 64 | } 65 | 66 | Type *new_struct_or_union_type(int kind, char *stname, Vector *decls) 67 | { 68 | Type *type = new_type(kind, SIZE_UNK); 69 | type->stname = stname; 70 | type->members = NULL; 71 | type->decls = decls; 72 | return type; 73 | } 74 | 75 | Type *new_typedef_type(char *typedef_name) 76 | { 77 | Type *type = new_type(TY_TYPEDEF, SIZE_UNK); 78 | type->typedef_name = typedef_name; 79 | return type; 80 | } 81 | 82 | Type *new_enum_type(char *name, Vector *list) 83 | { 84 | Type *type = new_type(TY_ENUM, SIZE_UNK); 85 | type->enname = name; 86 | type->enum_list = list; 87 | return type; 88 | } 89 | 90 | Type *new_static_type(Type *src) 91 | { 92 | Type *type = safe_malloc(sizeof(Type)); 93 | memcpy(type, src, sizeof(Type)); 94 | type->is_static = 1; 95 | return type; 96 | } 97 | 98 | Type *new_extern_type(Type *src) 99 | { 100 | Type *type = safe_malloc(sizeof(Type)); 101 | memcpy(type, src, sizeof(Type)); 102 | type->is_extern = 1; 103 | return type; 104 | } 105 | -------------------------------------------------------------------------------- /cc/token.c: -------------------------------------------------------------------------------- 1 | #include "cc.h" 2 | 3 | Token *new_token(int kind, Source *source) 4 | { 5 | Token *token = (Token *)safe_malloc(sizeof(Token)); 6 | token->kind = kind; 7 | token->source = source; 8 | return token; 9 | } 10 | 11 | Token *clone_token(Token *src) 12 | { 13 | Token *dst = (Token *)safe_malloc(sizeof(Token)); 14 | memcpy(dst, src, sizeof(Token)); 15 | return dst; 16 | } 17 | 18 | TokenSeq *new_token_seq(Vector *tokens) 19 | { 20 | TokenSeq *tokseq = safe_malloc(sizeof(TokenSeq)); 21 | tokseq->tokens = tokens; 22 | tokseq->idx = 0; 23 | return tokseq; 24 | } 25 | 26 | TokenSeq *tokenseq; 27 | 28 | void init_tokenseq(Vector *tokens) { tokenseq = new_token_seq(tokens); } 29 | 30 | void insert_tokens(Vector *tokens) 31 | { 32 | Vector *tmp = new_vector(); 33 | for (int i = 0; i < tokenseq->idx; i++) 34 | vector_push_back(tmp, vector_get(tokenseq->tokens, i)); 35 | for (int i = 0; i < vector_size(tokens); i++) { 36 | Token *token = vector_get(tokens, i); 37 | if (token->kind == tEOF) break; 38 | vector_push_back(tmp, token); 39 | } 40 | for (int i = tokenseq->idx; i < vector_size(tokenseq->tokens); i++) 41 | vector_push_back(tmp, vector_get(tokenseq->tokens, i)); 42 | tokenseq->tokens = tmp; 43 | } 44 | 45 | Token *peek_token() 46 | { 47 | Token *token = vector_get(tokenseq->tokens, tokenseq->idx); 48 | if (token == NULL) error("no next token."); 49 | return token; 50 | } 51 | 52 | Token *pop_token() 53 | { 54 | Token *token = vector_get(tokenseq->tokens, tokenseq->idx++); 55 | if (token == NULL) error("no next token."); 56 | return token; 57 | } 58 | 59 | Token *expect_token(int kind) 60 | { 61 | Token *token = pop_token(tokenseq); 62 | if (token->kind != kind) error_unexpected_token_kind(kind, token); 63 | return token; 64 | } 65 | 66 | int match_token(int kind) 67 | { 68 | Token *token = peek_token(tokenseq); 69 | return token->kind == kind; 70 | } 71 | 72 | Token *pop_token_if(int kind) 73 | { 74 | if (match_token(kind)) return pop_token(); 75 | return NULL; 76 | } 77 | 78 | int match_token2(int kind0, int kind1) 79 | { 80 | Token *token = peek_token(tokenseq); 81 | if (token->kind != kind0) return 0; 82 | tokenseq->idx++; 83 | token = peek_token(tokenseq); 84 | tokenseq->idx--; 85 | if (token->kind != kind1) return 0; 86 | return 1; 87 | } 88 | 89 | TokenSeqSaved *new_token_seq_saved() 90 | { 91 | TokenSeqSaved *tokseqsav; 92 | tokseqsav = (TokenSeqSaved *)safe_malloc(sizeof(TokenSeqSaved)); 93 | tokseqsav->idx = tokenseq->idx; 94 | return tokseqsav; 95 | } 96 | 97 | void restore_token_seq_saved(TokenSeqSaved *saved) 98 | { 99 | tokenseq->idx = saved->idx; 100 | } 101 | -------------------------------------------------------------------------------- /as/object.c: -------------------------------------------------------------------------------- 1 | #include "as.h" 2 | 3 | void add_byte(Vector *vec, int val) 4 | { 5 | vector_push_back(vec, (void *)(val & 0xff)); 6 | } 7 | 8 | void set_byte(Vector *vec, int index, int val) 9 | { 10 | vector_set(vec, index, (void *)val); 11 | } 12 | 13 | void add_word(Vector *vec, int val0, int val1) 14 | { 15 | add_byte(vec, val0); 16 | add_byte(vec, val1); 17 | } 18 | 19 | void add_word_int(Vector *vec, int ival) 20 | { 21 | add_word(vec, ival & 0xff, (ival >> 8) & 0xff); 22 | } 23 | 24 | void add_dword(Vector *vec, int val0, int val1, int val2, int val3) 25 | { 26 | add_word(vec, val0, val1); 27 | add_word(vec, val2, val3); 28 | } 29 | 30 | void add_dword_int(Vector *vec, int ival) 31 | { 32 | add_dword(vec, ival & 0xff, (ival >> 8) & 0xff, (ival >> 16) & 0xff, 33 | (ival >> 24) & 0xff); 34 | } 35 | 36 | void add_qword_int(Vector *vec, int low, int high) 37 | { 38 | add_dword_int(vec, low); 39 | add_dword_int(vec, high); 40 | } 41 | 42 | void add_string(Vector *vec, char *src, int len) 43 | { 44 | if (len == -1) len = strlen(src); 45 | for (int i = 0; i < len; i++) add_byte(vec, src[i]); 46 | } 47 | 48 | void add_qword(Vector *vec, int val0, int val1, int val2, int val3, int val4, 49 | int val5, int val6, int val7) 50 | { 51 | add_dword(vec, val0, val1, val2, val3); 52 | add_dword(vec, val4, val5, val6, val7); 53 | } 54 | 55 | void write_byte(FILE *fh, int val0) { fputc(val0, fh); } 56 | 57 | Vector *buffer_to_emit = NULL; 58 | 59 | Vector *get_buffer_to_emit() { return buffer_to_emit; } 60 | 61 | int emitted_size() { return vector_size(buffer_to_emit); } 62 | 63 | void set_buffer_to_emit(Vector *buffer) { buffer_to_emit = buffer; } 64 | 65 | void reemit_byte(int index, int val0) 66 | { 67 | vector_set(buffer_to_emit, index, (void *)(val0 & 0xff)); 68 | } 69 | 70 | void emit_byte(int val0) { add_byte(buffer_to_emit, val0); } 71 | 72 | void emit_word(int val0, int val1) 73 | { 74 | emit_byte(val0); 75 | emit_byte(val1); 76 | } 77 | 78 | void emit_word_int(int ival) { emit_word(ival & 0xff, (ival >> 8) & 0xff); } 79 | 80 | void emit_dword(int val0, int val1, int val2, int val3) 81 | { 82 | emit_word(val0, val1); 83 | emit_word(val2, val3); 84 | } 85 | 86 | void emit_dword_int(int ival) 87 | { 88 | emit_dword(ival & 0xff, (ival >> 8) & 0xff, (ival >> 16) & 0xff, 89 | (ival >> 24) & 0xff); 90 | } 91 | 92 | void emit_qword(int val0, int val1, int val2, int val3, int val4, int val5, 93 | int val6, int val7) 94 | { 95 | emit_dword(val0, val1, val2, val3); 96 | emit_dword(val4, val5, val6, val7); 97 | } 98 | 99 | void emit_qword_int(int low, int high) 100 | { 101 | emit_dword_int(low); 102 | emit_dword_int(high); 103 | } 104 | 105 | void emit_string(char *src, int len) { add_string(buffer_to_emit, src, len); } 106 | 107 | void emit_nbytes(int nbytes, int val) 108 | { 109 | for (int i = 0; i < nbytes; i++) emit_byte((val >> (i << 3)) & 0xff); 110 | } 111 | -------------------------------------------------------------------------------- /ld/object.c: -------------------------------------------------------------------------------- 1 | #include "ld.h" 2 | 3 | void add_byte(Vector *vec, int val) 4 | { 5 | vector_push_back(vec, (void *)(val & 0xff)); 6 | } 7 | 8 | void set_byte(Vector *vec, int index, int val) 9 | { 10 | vector_set(vec, index, (void *)val); 11 | } 12 | 13 | void add_word(Vector *vec, int val0, int val1) 14 | { 15 | add_byte(vec, val0); 16 | add_byte(vec, val1); 17 | } 18 | 19 | void add_word_int(Vector *vec, int ival) 20 | { 21 | add_word(vec, ival & 0xff, (ival >> 8) & 0xff); 22 | } 23 | 24 | void add_dword(Vector *vec, int val0, int val1, int val2, int val3) 25 | { 26 | add_word(vec, val0, val1); 27 | add_word(vec, val2, val3); 28 | } 29 | 30 | void add_dword_int(Vector *vec, int ival) 31 | { 32 | add_dword(vec, ival & 0xff, (ival >> 8) & 0xff, (ival >> 16) & 0xff, 33 | (ival >> 24) & 0xff); 34 | } 35 | 36 | void add_qword_int(Vector *vec, int low, int high) 37 | { 38 | add_dword_int(vec, low); 39 | add_dword_int(vec, high); 40 | } 41 | 42 | void add_string(Vector *vec, char *src, int len) 43 | { 44 | if (len == -1) len = strlen(src); 45 | for (int i = 0; i < len; i++) add_byte(vec, src[i]); 46 | } 47 | 48 | void add_qword(Vector *vec, int val0, int val1, int val2, int val3, int val4, 49 | int val5, int val6, int val7) 50 | { 51 | add_dword(vec, val0, val1, val2, val3); 52 | add_dword(vec, val4, val5, val6, val7); 53 | } 54 | 55 | void write_byte(FILE *fh, int val0) { fputc(val0, fh); } 56 | 57 | Vector *buffer_to_emit = NULL; 58 | 59 | Vector *get_buffer_to_emit() { return buffer_to_emit; } 60 | 61 | int emitted_size() { return vector_size(buffer_to_emit); } 62 | 63 | void set_buffer_to_emit(Vector *buffer) { buffer_to_emit = buffer; } 64 | 65 | void reemit_byte(int index, int val0) 66 | { 67 | vector_set(buffer_to_emit, index, (void *)(val0 & 0xff)); 68 | } 69 | 70 | void emit_byte(int val0) { add_byte(buffer_to_emit, val0); } 71 | 72 | void emit_word(int val0, int val1) 73 | { 74 | emit_byte(val0); 75 | emit_byte(val1); 76 | } 77 | 78 | void emit_word_int(int ival) { emit_word(ival & 0xff, (ival >> 8) & 0xff); } 79 | 80 | void emit_dword(int val0, int val1, int val2, int val3) 81 | { 82 | emit_word(val0, val1); 83 | emit_word(val2, val3); 84 | } 85 | 86 | void emit_dword_int(int ival) 87 | { 88 | emit_dword(ival & 0xff, (ival >> 8) & 0xff, (ival >> 16) & 0xff, 89 | (ival >> 24) & 0xff); 90 | } 91 | 92 | void emit_qword(int val0, int val1, int val2, int val3, int val4, int val5, 93 | int val6, int val7) 94 | { 95 | emit_dword(val0, val1, val2, val3); 96 | emit_dword(val4, val5, val6, val7); 97 | } 98 | 99 | void emit_qword_int(int low, int high) 100 | { 101 | emit_dword_int(low); 102 | emit_dword_int(high); 103 | } 104 | 105 | void emit_string(char *src, int len) { add_string(buffer_to_emit, src, len); } 106 | 107 | void emit_nbytes(int nbytes, int val) 108 | { 109 | for (int i = 0; i < nbytes; i++) emit_byte((val >> (i << 3)) & 0xff); 110 | } 111 | -------------------------------------------------------------------------------- /aqcc: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function fail(){ 4 | echo -ne "\e[1;31m[ERROR]\e[0m " 5 | echo "$1" 6 | exit 1 7 | } 8 | 9 | [ -z $AQCC_CC ] && AQCC_CC=`dirname ${0}`/cc/cc 10 | [ -z $AQCC_AS ] && AQCC_AS=`dirname ${0}`/as/as 11 | [ -z $AQCC_LD ] && AQCC_LD=`dirname ${0}`/ld/ld 12 | 13 | [ ! -f $AQCC_CC ] && [ ! -f $AQCC_AS ] && [ ! -f $AQCC_LD ]\ 14 | && fail "Please 'make' first." 15 | 16 | function print_usage_to_fail() { 17 | fail "Usage: aqcc [-c, -S] input-files... -o output-file" 18 | } 19 | 20 | outft='e' 21 | outfile='a.out' 22 | infiles=() 23 | verbose=0 24 | while (( $# > 0 )) 25 | do 26 | case "$1" in 27 | '-o') 28 | if [[ -z "$2" ]] || [[ "$2" =~ ^-+ ]]; then 29 | print_usage_to_fail 30 | fi 31 | outfile=$2 32 | shift 2 33 | ;; 34 | '-c') 35 | outft='o' 36 | shift 37 | ;; 38 | '-S') 39 | outft='s' 40 | shift 41 | ;; 42 | '-v') 43 | verbose=1 44 | shift 45 | ;; 46 | *) 47 | infiles+=("$1") 48 | shift 49 | ;; 50 | esac 51 | done 52 | 53 | function aqcc_cc() { 54 | [ $verbose = 1 ] && echo $AQCC_CC "$@" 55 | $AQCC_CC "$@" 56 | } 57 | 58 | function aqcc_as() { 59 | [ $verbose = 1 ] && echo $AQCC_AS "$@" 60 | $AQCC_AS "$@" 61 | } 62 | 63 | function aqcc_ld() { 64 | [ $verbose = 1 ] && echo $AQCC_LD "$@" 65 | $AQCC_LD "$@" 66 | } 67 | 68 | case $outft in 69 | s) 70 | [ ${#infiles[@]} -eq 1 ] || print_usage_to_fail 71 | aqcc_cc "${infiles[0]}" "$outfile" 72 | ;; 73 | 74 | o) 75 | [ ${#infiles[@]} -eq 1 ] || print_usage_to_fail 76 | case "${infiles[0]}" in 77 | *c) 78 | sfile=$(mktemp) 79 | aqcc_cc "${infiles[0]}" $sfile 80 | aqcc_as $sfile "$outfile" 81 | rm $sfile 82 | ;; 83 | *s) 84 | aqcc_as "${infiles[0]}" "$outfile" 85 | ;; 86 | *) 87 | print_usage_to_fail 88 | ;; 89 | esac 90 | ;; 91 | 92 | e) 93 | insrc=() 94 | tempfiles=() 95 | 96 | for ((i = 0; i < ${#infiles[@]}; i++)) 97 | do 98 | fname="${infiles[$i]}" 99 | case $fname in 100 | *c) 101 | sfile=$(mktemp) 102 | ofile=$(mktemp) 103 | aqcc_cc "$fname" $sfile 104 | aqcc_as $sfile $ofile 105 | insrc+=($ofile) 106 | rm $sfile 107 | tempfiles+=($ofile) 108 | ;; 109 | 110 | *s) 111 | ofile=$(mktemp) 112 | aqcc_as "$fname" $ofile 113 | insrc+=($ofile) 114 | tempfiles+=($ofile) 115 | ;; 116 | 117 | *o) 118 | insrc+=("$fname") 119 | ;; 120 | esac 121 | done 122 | 123 | aqcc_ld "${insrc[@]}" "$outfile" 124 | chmod +x "$outfile" 125 | 126 | for ((i = 0; i < ${#tempfiles[@]}; i++)) 127 | do 128 | rm "${tempfiles[$i]}" 129 | done 130 | ;; 131 | esac 132 | -------------------------------------------------------------------------------- /cc/env.c: -------------------------------------------------------------------------------- 1 | #include "cc.h" 2 | 3 | Env *new_env(Env *parent) 4 | { 5 | Env *env = safe_malloc(sizeof(Env)); 6 | env->parent = parent; 7 | env->symbols = new_map(); 8 | env->scoped_vars = parent == NULL ? new_vector() : parent->scoped_vars; 9 | env->types = new_map(); 10 | env->enum_values = new_map(); 11 | return env; 12 | } 13 | 14 | AST *add_symbol(Env *env, const char *name, AST *ast) 15 | { 16 | KeyValue *kv = map_lookup(env->symbols, name); 17 | if (kv != NULL) error("same symbol already exists: '%s'", name); 18 | map_insert(env->symbols, name, ast); 19 | return ast; 20 | } 21 | 22 | AST *lookup_symbol(Env *env, const char *name) 23 | { 24 | KeyValue *kv = map_lookup(env->symbols, name); 25 | if (kv == NULL) { 26 | if (env->parent == NULL) return NULL; 27 | return lookup_symbol(env->parent, name); 28 | } 29 | 30 | return (AST *)kv_value(kv); 31 | } 32 | 33 | AST *add_var(Env *env, AST *ast) 34 | { 35 | assert(ast->kind == AST_LVAR_DECL || ast->kind == AST_GVAR_DECL); 36 | assert(ast->kind != AST_GVAR_DECL || env->parent == NULL); 37 | 38 | // Create a local/global variable instance. 39 | // All AST_VAR that point the same variable will be replaced with 40 | // the pointer to this AST_LVAR/AST_GVAR instance when analyzing. 41 | AST *var = new_lgvar_ast(ast->kind == AST_LVAR_DECL ? AST_LVAR : AST_GVAR, 42 | ast->type, ast->varname, -1); 43 | 44 | add_symbol(env, ast->varname, var); 45 | vector_push_back(env->scoped_vars, var); 46 | 47 | return var; 48 | } 49 | 50 | AST *lookup_var(Env *env, const char *name) 51 | { 52 | AST *ast; 53 | 54 | ast = lookup_symbol(env, name); 55 | if (ast && ast->kind != AST_LVAR && ast->kind != AST_GVAR) 56 | error("found but not var"); 57 | return ast; 58 | } 59 | 60 | AST *add_func(Env *env, const char *name, AST *ast) 61 | { 62 | assert(ast->kind == AST_FUNCDEF || ast->kind == AST_FUNC_DECL); 63 | add_symbol(env, name, ast); 64 | 65 | return ast; 66 | } 67 | 68 | AST *lookup_func(Env *env, const char *name) 69 | { 70 | AST *ast; 71 | 72 | ast = lookup_symbol(env, name); 73 | if (ast && ast->kind != AST_FUNCDEF && ast->kind != AST_FUNC_DECL) 74 | error("found but not func"); 75 | return ast; 76 | } 77 | 78 | Type *add_type(Env *env, Type *type, char *name) 79 | { 80 | KeyValue *kv = map_lookup(env->types, name); 81 | if (kv != NULL) error("same type already exists: '%s'", name); 82 | map_insert(env->types, name, type); 83 | return type; 84 | } 85 | 86 | Type *lookup_type(Env *env, const char *name) 87 | { 88 | KeyValue *kv = map_lookup(env->types, name); 89 | if (kv == NULL) { 90 | if (env->parent == NULL) return NULL; 91 | return lookup_type(env->parent, name); 92 | } 93 | return (Type *)kv_value(kv); 94 | } 95 | 96 | Type *add_struct_or_union_or_enum_type(Env *env, Type *type) 97 | { 98 | assert(type->kind == TY_STRUCT || type->kind == TY_UNION || 99 | type->kind == TY_ENUM); 100 | return add_type(env, type, format("struct/union %s", type->stname)); 101 | } 102 | 103 | Type *lookup_struct_or_union_or_enum_type(Env *env, const char *name) 104 | { 105 | Type *type = lookup_type(env, format("struct/union %s", name)); 106 | assert(type == NULL || type->kind == TY_STRUCT || type->kind == TY_UNION || 107 | type->kind == TY_ENUM); 108 | return type; 109 | } 110 | 111 | void add_enum_value(Env *env, char *name, AST *value) 112 | { 113 | if (lookup_enum_value(env, name)) error("duplicate enum name: '%s'", name); 114 | map_insert(env->enum_values, name, value); 115 | } 116 | 117 | AST *lookup_enum_value(Env *env, char *name) 118 | { 119 | KeyValue *kv = map_lookup(env->enum_values, name); 120 | if (kv == NULL) { 121 | if (env->parent == NULL) return NULL; 122 | return lookup_enum_value(env->parent, name); 123 | } 124 | return kv_value(kv); 125 | } 126 | -------------------------------------------------------------------------------- /ld/utility.c: -------------------------------------------------------------------------------- 1 | #include "ld.h" 2 | 3 | _Noreturn void error(const char *msg, ...) 4 | { 5 | va_list args; 6 | va_start(args, msg); 7 | char *str = vformat(msg, args); 8 | va_end(args); 9 | 10 | // fprintf(stderr, "[ERROR] %s\n", str); 11 | printf("[ERROR] %s\n", str); 12 | // fprintf(stderr, "[DEBUG] %s, %d\n", __FILE__, __LINE__); 13 | exit(EXIT_FAILURE); 14 | } 15 | 16 | void warn(const char *msg, ...) 17 | { 18 | va_list args; 19 | va_start(args, msg); 20 | char *str = vformat(msg, args); 21 | va_end(args); 22 | 23 | // fprintf(stderr, "[WARN] %s\n", str); 24 | printf("[WARN] %s\n", str); 25 | // fprintf(stderr, "[DEBUG] %s, %d\n", __FILE__, __LINE__); 26 | } 27 | 28 | void *safe_malloc(int size) 29 | { 30 | void *ptr; 31 | 32 | ptr = malloc(size); 33 | if (ptr == NULL) error("malloc failed."); 34 | return ptr; 35 | } 36 | 37 | char *new_str(const char *src) 38 | { 39 | char *ret = safe_malloc(strlen(src) + 1); 40 | strcpy(ret, src); 41 | return ret; 42 | } 43 | 44 | int *new_int(int src) 45 | { 46 | int *ret = safe_malloc(sizeof(int)); 47 | *ret = src; 48 | return ret; 49 | } 50 | 51 | char *vformat(const char *src, va_list ap) 52 | { 53 | char buf[512]; // TODO: enough length? 54 | vsprintf(buf, src, ap); 55 | 56 | char *ret = safe_malloc(strlen(buf) + 1); 57 | strcpy(ret, buf); 58 | return ret; 59 | } 60 | 61 | char *format(const char *src, ...) 62 | { 63 | va_list args; 64 | va_start(args, src); 65 | char *ret = vformat(src, args); 66 | va_end(args); 67 | return ret; 68 | } 69 | 70 | int unescape_char(int src) 71 | { 72 | static int table[128]; 73 | if (table[0] == 0) { 74 | memset(table, 255, sizeof(table)); 75 | 76 | table['n'] = '\n'; 77 | table['r'] = '\r'; 78 | table['t'] = '\t'; 79 | table['0'] = '\0'; 80 | table['a'] = '\a'; 81 | table['b'] = '\b'; 82 | table['v'] = '\v'; 83 | table['f'] = '\f'; 84 | } 85 | 86 | int ch = table[src]; 87 | return ch == -1 ? src : ch; 88 | } 89 | 90 | char *escape_string(char *str, int size) 91 | { 92 | StringBuilder *sb = new_string_builder(); 93 | for (int i = 0; i < size; i++) { 94 | char ch = str[i]; 95 | 96 | switch (ch) { 97 | case '\n': 98 | string_builder_append(sb, '\\'); 99 | string_builder_append(sb, 'n'); 100 | break; 101 | 102 | case '\r': 103 | string_builder_append(sb, '\\'); 104 | string_builder_append(sb, 'n'); 105 | break; 106 | 107 | case '\t': 108 | string_builder_append(sb, '\\'); 109 | string_builder_append(sb, 't'); 110 | break; 111 | 112 | case '\0': 113 | string_builder_append(sb, '\\'); 114 | string_builder_append(sb, '0'); 115 | break; 116 | 117 | case '\a': 118 | string_builder_append(sb, '\\'); 119 | string_builder_append(sb, 'a'); 120 | break; 121 | 122 | case '\b': 123 | string_builder_append(sb, '\\'); 124 | string_builder_append(sb, 'b'); 125 | break; 126 | 127 | case '\v': 128 | string_builder_append(sb, '\\'); 129 | string_builder_append(sb, 'v'); 130 | break; 131 | 132 | case '\f': 133 | string_builder_append(sb, '\\'); 134 | string_builder_append(sb, 'f'); 135 | break; 136 | 137 | case '"': 138 | string_builder_append(sb, '\\'); 139 | string_builder_append(sb, '"'); 140 | break; 141 | 142 | default: 143 | string_builder_append(sb, ch); 144 | break; 145 | } 146 | } 147 | 148 | return string_builder_get(sb); 149 | } 150 | 151 | int min(int a, int b) { return a < b ? a : b; } 152 | 153 | int max(int a, int b) { return a < b ? b : a; } 154 | 155 | int roundup(int n, int b) { return (n + b - 1) & ~(b - 1); } 156 | -------------------------------------------------------------------------------- /cc/ast.c: -------------------------------------------------------------------------------- 1 | #include "cc.h" 2 | 3 | int match_type(AST *ast, int kind) 4 | { 5 | return ast->type != NULL && ast->type->kind == kind; 6 | } 7 | 8 | int match_type2(AST *lhs, AST *rhs, int lkind, int rkind) 9 | { 10 | return match_type(lhs, lkind) && match_type(rhs, rkind); 11 | } 12 | 13 | AST *new_ast(int kind) 14 | { 15 | AST *ast = safe_malloc(sizeof(AST)); 16 | ast->kind = kind; 17 | ast->type = NULL; 18 | return ast; 19 | } 20 | 21 | AST *new_binop_ast(int kind, AST *lhs, AST *rhs) 22 | { 23 | AST *ast = new_ast(kind); 24 | 25 | ast->type = NULL; 26 | ast->lhs = lhs; 27 | ast->rhs = rhs; 28 | return ast; 29 | } 30 | 31 | AST *new_func_ast(int kind, char *fname, Vector *args, Vector *params, 32 | Type *ret_type) 33 | { 34 | AST *ast = new_ast(kind); 35 | ast->fname = fname; 36 | ast->args = args; 37 | ast->params = params; 38 | ast->type = ret_type; 39 | ast->body = NULL; 40 | ast->env = NULL; 41 | ast->is_variadic = 0; 42 | return ast; 43 | } 44 | 45 | AST *new_while_stmt(AST *cond, AST *body) 46 | { 47 | AST *ast; 48 | 49 | ast = new_ast(AST_WHILE); 50 | ast->cond = cond; 51 | ast->then = body; 52 | ast->els = NULL; // not used 53 | 54 | return ast; 55 | } 56 | 57 | AST *new_compound_stmt2(AST *first, AST *second) 58 | { 59 | AST *ast; 60 | Vector *stmts = new_vector(); 61 | 62 | vector_push_back(stmts, first); 63 | vector_push_back(stmts, second); 64 | ast = new_ast(AST_COMPOUND); 65 | ast->stmts = stmts; 66 | 67 | return ast; 68 | } 69 | 70 | AST *new_ary2ptr_ast(AST *ary) 71 | { 72 | AST *ast; 73 | 74 | assert(match_type(ary, TY_ARY)); 75 | 76 | ast = new_ast(AST_ARY2PTR); 77 | ast->ary = ary; 78 | ast->type = new_pointer_type(ary->type->ary_of); 79 | 80 | return ast; 81 | } 82 | 83 | AST *ary2ptr(AST *ary) 84 | { 85 | if (!match_type(ary, TY_ARY)) return ary; 86 | return new_ary2ptr_ast(ary); 87 | } 88 | 89 | AST *char2int(AST *ch) 90 | { 91 | AST *ast; 92 | 93 | if (!match_type(ch, TY_CHAR)) return ch; 94 | ast = new_ast(AST_CHAR2INT); 95 | ast->lhs = ch; 96 | ast->type = type_int(); 97 | return ast; 98 | } 99 | 100 | AST *new_var_ast(char *varname) 101 | { 102 | AST *ast; 103 | 104 | ast = new_ast(AST_VAR); 105 | ast->varname = varname; 106 | return ast; 107 | } 108 | 109 | AST *new_unary_ast(int kind, AST *that) 110 | { 111 | AST *ast; 112 | 113 | ast = new_ast(kind); 114 | ast->lhs = that; 115 | return ast; 116 | } 117 | 118 | // variable reference 119 | AST *new_lgvar_ast(int kind, Type *type, char *varname, int stack_idx) 120 | { 121 | assert(kind == AST_LVAR || kind == AST_GVAR); 122 | AST *ast = new_ast(kind); 123 | ast->type = type; 124 | ast->varname = ast->gen_varname = varname; 125 | ast->stack_idx = stack_idx; 126 | return ast; 127 | } 128 | 129 | // variable declaration 130 | AST *new_var_decl_ast(int kind, Type *type, char *varname) 131 | { 132 | AST *ast; 133 | 134 | ast = new_ast(kind); 135 | ast->type = type; 136 | ast->varname = varname; 137 | return ast; 138 | } 139 | 140 | // variable declaration with initializer 141 | AST *new_var_decl_init_ast(AST *var_decl, AST *initer) 142 | { 143 | int kind = -1; 144 | switch (var_decl->kind) { 145 | case AST_LVAR_DECL: 146 | kind = AST_LVAR_DECL_INIT; 147 | break; 148 | case AST_GVAR_DECL: 149 | kind = AST_GVAR_DECL_INIT; 150 | break; 151 | case AST_ENUM_VAR_DECL: 152 | kind = AST_ENUM_VAR_DECL_INIT; 153 | break; 154 | default: 155 | error("only local/global variable can have initializer."); 156 | } 157 | 158 | AST *ast = new_ast(kind); 159 | ast->lhs = var_decl; 160 | ast->rhs = 161 | new_binop_ast(AST_ASSIGN, new_var_ast(var_decl->varname), initer); 162 | return ast; 163 | } 164 | 165 | AST *new_label_ast(char *name, AST *stmt) 166 | { 167 | AST *label = new_ast(AST_LABEL); 168 | label->label_name = name; 169 | label->label_stmt = stmt; 170 | return label; 171 | } 172 | 173 | AST *new_lvalue2rvalue_ast(AST *lvalue) 174 | { 175 | AST *ast = new_ast(AST_LVALUE2RVALUE); 176 | ast->lhs = lvalue; 177 | ast->type = lvalue->type; 178 | return ast; 179 | } 180 | 181 | AST *new_int_ast(int ival) 182 | { 183 | AST *ast = new_ast(AST_INT); 184 | ast->ival = ival; 185 | ast->type = type_int(); 186 | return ast; 187 | } 188 | -------------------------------------------------------------------------------- /ld/ld.h: -------------------------------------------------------------------------------- 1 | #ifndef AQCC_AQCC_H 2 | #define AQCC_AQCC_H 3 | 4 | //#include 5 | //#include 6 | //#include 7 | //#include 8 | //#include 9 | //#include 10 | 11 | #ifdef __GNUC__ 12 | typedef __builtin_va_list va_list; 13 | #else 14 | #endif 15 | #ifndef __GNUC__ 16 | typedef struct { 17 | int gp_offset; 18 | int fp_offset; 19 | void *overflow_arg_area; 20 | void *reg_save_area; 21 | } va_list[1]; 22 | #endif 23 | #define va_start __builtin_va_start 24 | #define va_end __builtin_va_end 25 | #define va_arg __builtin_va_arg 26 | 27 | typedef struct _IO_FILE FILE; 28 | // extern FILE *stdin; /* Standard input stream. */ 29 | // extern FILE *stdout; /* Standard output stream. */ 30 | // extern FILE *stderr; /* Standard error output stream. */ 31 | #define NULL 0 32 | #define EOF (-1) 33 | FILE *fopen(const char *pathname, const char *mode); 34 | int fclose(FILE *stream); 35 | int fputc(int c, FILE *stream); 36 | int fgetc(FILE *stream); 37 | int fprintf(FILE *stream, const char *format, ...); 38 | int printf(const char *format, ...); 39 | int vsprintf(char *str, const char *format, va_list ap); 40 | #define EXIT_FAILURE 1 /* Failing exit status. */ 41 | #define EXIT_SUCCESS 0 /* Successful exit status. */ 42 | _Noreturn void exit(int status); 43 | void *malloc(int size); 44 | int strlen(const char *s); 45 | int strcmp(const char *s1, const char *s2); 46 | char *strcpy(char *dest, const char *src); 47 | int isalpha(int c); 48 | int isalnum(int c); 49 | int isdigit(int c); 50 | int isspace(int c); 51 | void *memcpy(void *dest, const void *src, int n); 52 | void *memset(void *s, int c, int n); 53 | void assert(int cond); 54 | 55 | // vector.c 56 | typedef struct Vector Vector; 57 | Vector *new_vector(); 58 | Vector *new_vector_from_scalar(void *scalar); 59 | void vector_push_back(Vector *vec, void *item); 60 | void *vector_get(Vector *vec, int i); 61 | int vector_size(Vector *vec); 62 | void *vector_set(Vector *vec, int i, void *item); 63 | void vector_push_back_vector(Vector *vec, Vector *src); 64 | Vector *clone_vector(Vector *src); 65 | 66 | // map.c 67 | typedef struct KeyValue KeyValue; 68 | typedef struct Map Map; 69 | Map *new_map(); 70 | int map_size(Map *map); 71 | KeyValue *map_insert(Map *map, const char *key, void *item); 72 | KeyValue *map_lookup(Map *map, const char *key); 73 | const char *kv_key(KeyValue *kv); 74 | void *kv_value(KeyValue *kv); 75 | 76 | // string_builder.c 77 | typedef struct StringBuilder StringBuilder; 78 | StringBuilder *new_string_builder(); 79 | char string_builder_append(StringBuilder *sb, char ch); 80 | char *string_builder_get(StringBuilder *sb); 81 | int string_builder_size(StringBuilder *sb); 82 | 83 | // utility.c 84 | _Noreturn void error(const char *msg, ...); 85 | void warn(const char *msg, ...); 86 | void *safe_malloc(int size); 87 | char *new_str(const char *src); 88 | int *new_int(int src); 89 | char *format(const char *src, ...); 90 | char *vformat(const char *src, va_list ap); 91 | int unescape_char(int src); 92 | char *escape_string(char *str, int size); 93 | int min(int a, int b); 94 | int max(int a, int b); 95 | int roundup(int n, int b); 96 | 97 | // link.c 98 | typedef struct ExeImage ExeImage; 99 | ExeImage *link_objs(Vector *obj_paths); 100 | void dump_exe_image(ExeImage *exeimg, FILE *fh); 101 | 102 | // object.c 103 | void add_byte(Vector *vec, int val); 104 | void set_byte(Vector *vec, int index, int val); 105 | void add_word(Vector *vec, int val0, int val1); 106 | void add_word_int(Vector *vec, int ival); 107 | void add_dword(Vector *vec, int val0, int val1, int val2, int val3); 108 | void add_dword_int(Vector *vec, int ival); 109 | void add_qword_int(Vector *vec, int low, int high); 110 | void add_string(Vector *vec, char *src, int len); 111 | void add_qword(Vector *vec, int val0, int val1, int val2, int val3, int val4, 112 | int val5, int val6, int val7); 113 | void write_byte(FILE *fh, int val0); 114 | Vector *get_buffer_to_emit(); 115 | int emitted_size(); 116 | void set_buffer_to_emit(Vector *buffer); 117 | void reemit_byte(int index, int val0); 118 | void emit_byte(int val0); 119 | void emit_word(int val0, int val1); 120 | void emit_word_int(int ival); 121 | void emit_dword(int val0, int val1, int val2, int val3); 122 | void emit_dword_int(int ival); 123 | void emit_qword(int val0, int val1, int val2, int val3, int val4, int val5, 124 | int val6, int val7); 125 | void emit_qword_int(int low, int high); 126 | void emit_string(char *src, int len); 127 | void emit_nbytes(int nbytes, int val); 128 | 129 | #endif 130 | -------------------------------------------------------------------------------- /cc/utility.c: -------------------------------------------------------------------------------- 1 | #include "cc.h" 2 | 3 | _Noreturn void error(const char *msg, ...) 4 | { 5 | va_list args; 6 | va_start(args, msg); 7 | char *str = vformat(msg, args); 8 | va_end(args); 9 | 10 | // fprintf(stderr, "[ERROR] %s\n", str); 11 | printf("[ERROR] %s\n", str); 12 | // fprintf(stderr, "[DEBUG] %s, %d\n", __FILE__, __LINE__); 13 | exit(EXIT_FAILURE); 14 | } 15 | 16 | _Noreturn void error_unexpected_token_kind(int expect_kind, Token *got) 17 | { 18 | error("%s:%d:%d: unexpected token: expect %s, got %s", 19 | got->source->filepath, got->source->line, got->source->column, 20 | token_kind2str(expect_kind), token_kind2str(got->kind)); 21 | } 22 | 23 | _Noreturn void error_unexpected_token_str(char *expect_str, Token *got) 24 | { 25 | error("%s:%d:%d: unexpected token: expect %s, got %s", 26 | got->source->filepath, got->source->line, got->source->column, 27 | expect_str, token_kind2str(got->kind)); 28 | } 29 | 30 | void warn(const char *msg, ...) 31 | { 32 | va_list args; 33 | va_start(args, msg); 34 | char *str = vformat(msg, args); 35 | va_end(args); 36 | 37 | // fprintf(stderr, "[WARN] %s\n", str); 38 | printf("[WARN] %s\n", str); 39 | // fprintf(stderr, "[DEBUG] %s, %d\n", __FILE__, __LINE__); 40 | } 41 | 42 | void *safe_malloc(int size) 43 | { 44 | void *ptr; 45 | 46 | ptr = malloc(size); 47 | if (ptr == NULL) error("malloc failed."); 48 | return ptr; 49 | } 50 | 51 | char *new_str(const char *src) 52 | { 53 | char *ret = safe_malloc(strlen(src) + 1); 54 | strcpy(ret, src); 55 | return ret; 56 | } 57 | 58 | int *new_int(int src) 59 | { 60 | int *ret = safe_malloc(sizeof(int)); 61 | *ret = src; 62 | return ret; 63 | } 64 | 65 | char *vformat(const char *src, va_list ap) 66 | { 67 | char buf[512]; // TODO: enough length? 68 | vsprintf(buf, src, ap); 69 | 70 | char *ret = safe_malloc(strlen(buf) + 1); 71 | strcpy(ret, buf); 72 | return ret; 73 | } 74 | 75 | char *format(const char *src, ...) 76 | { 77 | va_list args; 78 | va_start(args, src); 79 | char *ret = vformat(src, args); 80 | va_end(args); 81 | return ret; 82 | } 83 | 84 | int unescape_char(int src) 85 | { 86 | static int table[128]; 87 | if (table[0] == 0) { 88 | memset(table, 255, sizeof(table)); 89 | 90 | table['n'] = '\n'; 91 | table['r'] = '\r'; 92 | table['t'] = '\t'; 93 | table['0'] = '\0'; 94 | table['a'] = '\a'; 95 | table['b'] = '\b'; 96 | table['v'] = '\v'; 97 | table['f'] = '\f'; 98 | } 99 | 100 | int ch = table[src]; 101 | return ch == -1 ? src : ch; 102 | } 103 | 104 | char *escape_string(char *str, int size) 105 | { 106 | StringBuilder *sb = new_string_builder(); 107 | for (int i = 0; i < size; i++) { 108 | char ch = str[i]; 109 | 110 | switch (ch) { 111 | case '\n': 112 | string_builder_append(sb, '\\'); 113 | string_builder_append(sb, 'n'); 114 | break; 115 | 116 | case '\r': 117 | string_builder_append(sb, '\\'); 118 | string_builder_append(sb, 'n'); 119 | break; 120 | 121 | case '\t': 122 | string_builder_append(sb, '\\'); 123 | string_builder_append(sb, 't'); 124 | break; 125 | 126 | case '\0': 127 | string_builder_append(sb, '\\'); 128 | string_builder_append(sb, '0'); 129 | break; 130 | 131 | case '\a': 132 | string_builder_append(sb, '\\'); 133 | string_builder_append(sb, 'a'); 134 | break; 135 | 136 | case '\b': 137 | string_builder_append(sb, '\\'); 138 | string_builder_append(sb, 'b'); 139 | break; 140 | 141 | case '\v': 142 | string_builder_append(sb, '\\'); 143 | string_builder_append(sb, 'v'); 144 | break; 145 | 146 | case '\f': 147 | string_builder_append(sb, '\\'); 148 | string_builder_append(sb, 'f'); 149 | break; 150 | 151 | case '"': 152 | string_builder_append(sb, '\\'); 153 | string_builder_append(sb, '"'); 154 | break; 155 | 156 | default: 157 | string_builder_append(sb, ch); 158 | break; 159 | } 160 | } 161 | 162 | return string_builder_get(sb); 163 | } 164 | 165 | char *make_label_string() 166 | { 167 | static int count; 168 | return format(".L%d", count++); 169 | } 170 | 171 | int min(int a, int b) { return a < b ? a : b; } 172 | 173 | int max(int a, int b) { return a < b ? b : a; } 174 | 175 | int roundup(int n, int b) { return (n + b - 1) & ~(b - 1); } 176 | -------------------------------------------------------------------------------- /cc/cpp.c: -------------------------------------------------------------------------------- 1 | #include "cc.h" 2 | 3 | void skip_newline() 4 | { 5 | while (pop_token_if(tNEWLINE)) 6 | ; 7 | } 8 | 9 | Map *define_table; 10 | 11 | void init_preprocess() { define_table = new_map(); } 12 | 13 | Vector *add_define(char *name, Vector *tokens) 14 | { 15 | if (map_lookup(define_table, name)) 16 | error("duplicate define's name: '%s'", name); 17 | map_insert(define_table, name, tokens); 18 | return tokens; 19 | } 20 | 21 | Vector *lookup_define(char *name) 22 | { 23 | return (Vector *)kv_value(map_lookup(define_table, name)); 24 | } 25 | 26 | void preprocess_skip_until_else_or_endif() 27 | { 28 | // search corresponding #endif or #else 29 | int cnt = 1; 30 | while (1) { 31 | Token *token = pop_token(); 32 | 33 | if (token->kind == tEOF) 34 | error_unexpected_token_str("#endif or #else", token); 35 | 36 | if (token->kind == tNUMBER) { 37 | if (token = pop_token_if(tIDENT)) { 38 | char *ident = token->sval; 39 | if (strcmp("ifdef", ident) == 0 || strcmp("ifndef", ident) == 0) 40 | cnt++; 41 | else if (strcmp("endif", ident) == 0 && --cnt == 0) { 42 | expect_token(tNEWLINE); 43 | break; 44 | } 45 | } 46 | else if ((token = pop_token_if(kELSE)) && cnt - 1 == 0) 47 | break; 48 | } 49 | } 50 | } 51 | 52 | void preprocess_tokens_detail_define() 53 | { 54 | char *name = expect_token(tIDENT)->sval; 55 | Vector *tokens = new_vector(); 56 | while (!match_token(tNEWLINE)) vector_push_back(tokens, pop_token()); 57 | expect_token(tNEWLINE); 58 | add_define(name, tokens); 59 | } 60 | 61 | void preprocess_tokens_detail_include() 62 | { 63 | Token *token = expect_token(tSTRING_LITERAL); 64 | char *include_filepath = format("%s%s", token->source->cwd, token->sval); 65 | expect_token(tNEWLINE); 66 | insert_tokens(read_tokens_from_filepath(include_filepath)); 67 | } 68 | 69 | void preprocess_tokens_detail_ifdef_ifndef(const char *keyword) 70 | { 71 | char *name = expect_token(tIDENT)->sval; 72 | expect_token(tNEWLINE); 73 | if (strcmp("ifdef", keyword) == 0 && lookup_define(name) || 74 | strcmp("ifndef", keyword) == 0 && !lookup_define(name)) { 75 | return; 76 | } 77 | 78 | preprocess_skip_until_else_or_endif(); 79 | } 80 | 81 | void preprocess_tokens_detail_number() 82 | { 83 | if (match_token(tIDENT) || match_token(kELSE)) { 84 | Token *token = pop_token(); 85 | char *keyword = token->sval; 86 | // TODO: other preprocess token 87 | if (!keyword && token->kind == kELSE) 88 | preprocess_skip_until_else_or_endif(); 89 | else if (strcmp(keyword, "define") == 0) 90 | preprocess_tokens_detail_define(); 91 | else if (strcmp(keyword, "include") == 0) 92 | preprocess_tokens_detail_include(); 93 | else if ((strcmp(keyword, "ifdef") == 0) || 94 | strcmp(keyword, "ifndef") == 0) 95 | preprocess_tokens_detail_ifdef_ifndef(keyword); 96 | else if (strcmp(keyword, "endif") == 0) 97 | return; // skip endif 98 | else 99 | error("invalid preprocess token"); 100 | } 101 | } 102 | 103 | Vector *preprocess_tokens(Vector *tokens) 104 | { 105 | init_tokenseq(tokens); 106 | init_preprocess(tokens); 107 | 108 | Vector *ntokens = new_vector(); 109 | while (!match_token(tEOF)) { 110 | Token *token = pop_token(); 111 | if (token->kind == tNUMBER) { 112 | preprocess_tokens_detail_number(); 113 | continue; 114 | } 115 | 116 | if (token->kind == tNEWLINE) continue; 117 | 118 | if (token->kind == tIDENT) { 119 | // TODO: should be implemented by function macro? 120 | // TODO: it can handle only `va_arg(args_var_name, int|char *)` 121 | if (strcmp(token->sval, "__builtin_va_arg") == 0) { 122 | Token *ntoken = clone_token(token); 123 | ntoken->sval = "__builtin_va_arg_int"; 124 | vector_push_back(ntokens, ntoken); 125 | 126 | skip_newline(); 127 | vector_push_back(ntokens, expect_token(tLPAREN)); 128 | skip_newline(); 129 | while (!match_token(tCOMMA)) 130 | vector_push_back(ntokens, pop_token()); 131 | expect_token(tCOMMA); 132 | skip_newline(); 133 | if (pop_token_if(kCHAR)) { 134 | skip_newline(); 135 | expect_token(tSTAR); 136 | ntoken->sval = "__builtin_va_arg_charp"; 137 | } 138 | else { 139 | expect_token(kINT); 140 | } 141 | skip_newline(); 142 | vector_push_back(ntokens, expect_token(tRPAREN)); 143 | continue; 144 | } 145 | 146 | Vector *deftokens = lookup_define(token->sval); 147 | if (deftokens != NULL) { // found: replace tokens 148 | insert_tokens(deftokens); 149 | continue; 150 | } 151 | } 152 | 153 | vector_push_back(ntokens, token); 154 | } 155 | vector_push_back(ntokens, expect_token(tEOF)); 156 | 157 | return ntokens; 158 | } 159 | -------------------------------------------------------------------------------- /test/test_define.c: -------------------------------------------------------------------------------- 1 | int printf(char *str, ...); 2 | 3 | #include "test_define.h" 4 | 5 | #ifndef test001nop 6 | #define test001nop 7 | #define test001int int 8 | int test001() 9 | { 10 | test001int a = 0; 11 | if (a != 0) printf("[ERROR] test001:1: a != 0\n"); 12 | 13 | #define test001test \ 14 | if (a != 0) printf("[ERROR] test001:2: a != 0\n"); 15 | test001test; 16 | 17 | if (test001header != 42) printf("[ERROR] test001:3: test001header != 42\n"); 18 | if (test001iret(334) != 334) 19 | printf("[ERROR] test001:4: test001iret(334) != 334\n"); 20 | } 21 | #ifndef test001nop 22 | #define test001nop 23 | #define test001int int 24 | int test001() 25 | { 26 | test001int a = 0; 27 | if (a != 0) printf("[ERROR] test001:1: a != 0\n"); 28 | 29 | #define test001test \ 30 | if (a != 0) printf("[ERROR] test001:2: a != 0\n"); 31 | test001test; 32 | 33 | if (test001header != 42) printf("[ERROR] test001:3: test001header != 42\n"); 34 | if (test001iret(334) != 334) 35 | printf("[ERROR] test001:4: test001iret(334) != 334\n"); 36 | } 37 | #endif 38 | #endif 39 | 40 | #ifndef test001nop 41 | #define test001nop 42 | #define test001int int 43 | int test001() 44 | { 45 | test001int a = 0; 46 | if (a != 0) printf("[ERROR] test001:1: a != 0\n"); 47 | 48 | #define test001test \ 49 | if (a != 0) printf("[ERROR] test001:2: a != 0\n"); 50 | test001test; 51 | 52 | if (test001header != 42) printf("[ERROR] test001:3: test001header != 42\n"); 53 | if (test001iret(334) != 334) 54 | printf("[ERROR] test001:4: test001iret(334) != 334\n"); 55 | } 56 | #endif 57 | 58 | #define test002value 59 | int test002() 60 | { 61 | int a = 0; 62 | #ifndef test002value 63 | printf("[ERROR] test002:1: #ifndef guard is out of order\n"); 64 | #else 65 | a = 1; 66 | #ifdef test002value 67 | // Test nested #ifdef 68 | a = 2; 69 | #else 70 | printf("[ERROR] test002:2: nested #ifdef guard is out of order\n"); 71 | #endif // #ifdef test002value 72 | 73 | #ifndef test002value 74 | printf( 75 | "[ERROR] test002:3: nested and multiple #else guard is out of order\n"); 76 | #else 77 | a = 3; 78 | #endif 79 | if (a != 3) { 80 | printf( 81 | "[ERROR] test002:4: neither #ifdef nor #else is called. Expected " 82 | "'a': 3, got: %d\n", 83 | a); 84 | } 85 | #endif 86 | // #ifndef test002value 87 | 88 | // DO NOT define test002unknown 89 | #ifdef test002unknown 90 | printf("[ERROR] test002:5: #ifdef guard is out of order\n"); 91 | #else 92 | // ok 93 | a = 5; 94 | #endif 95 | if (a != 5) { 96 | printf( 97 | "[ERROR] test002:6: neither #ifdef nor #else is called. Expected " 98 | "'a': 5, got: %d\n", 99 | a); 100 | } 101 | 102 | #ifndef test002unknown 103 | #ifdef test002unknown 104 | printf("[ERROR] test002:7: nested #ifdef guard is out of order\n"); 105 | #else 106 | a = 6; 107 | #endif // #ifdef test002unknown 108 | #else 109 | printf("[ERROR] test002:8: nested #ifndef guard is out of order\n"); 110 | #endif 111 | if (a != 6) { 112 | printf( 113 | "[ERROR] test002:9: neither #ifdef nor #else is called. Expected " 114 | "'a': 6, got: %d\n", 115 | a); 116 | } 117 | } 118 | 119 | typedef struct { 120 | int gp_offset; 121 | int fp_offset; 122 | void *overflow_arg_area; 123 | void *reg_save_area; 124 | } va_list[1]; 125 | 126 | #define va_start __builtin_va_start 127 | #define va_end __builtin_va_end 128 | #define va_arg __builtin_va_arg 129 | 130 | void test003allcorrect_va_arg(int a, int b, int c, int d, int e, int f, int g, 131 | int h, ...) 132 | { 133 | va_list args; 134 | va_start(args, h); 135 | if (va_arg(args, int) != a) 136 | printf("[ERROR] test003:1: va_arg(args, int) != a\n"); 137 | if (va_arg(args, int) != b) 138 | printf("[ERROR] test003:2: va_arg(args, int) != b\n"); 139 | if (va_arg(args, int) != c) 140 | printf("[ERROR] test003:3: va_arg(args, int) != c\n"); 141 | if (va_arg(args, int) != d) 142 | printf("[ERROR] test003:4: va_arg(args, int) != d\n"); 143 | if (va_arg(args, int) != e) 144 | printf("[ERROR] test003:5: va_arg(args, int) != e\n"); 145 | if (va_arg(args, int) != f) 146 | printf("[ERROR] test003:6: va_arg(args, int) != f\n"); 147 | if (va_arg(args, int) != g) 148 | printf("[ERROR] test003:7: va_arg(args, int) != g\n"); 149 | if (va_arg(args, int) != h) 150 | printf("[ERROR] test003:8: va_arg(args, int) != h\n"); 151 | va_end(args); 152 | } 153 | 154 | void test003charp(int a, ...) 155 | { 156 | va_list args; 157 | va_start(args, a); 158 | if (va_arg(args, char *)[0] != a) 159 | printf("[ERROR] test003:9: va_arg(args, char *)[0] != a\n"); 160 | va_end(args); 161 | } 162 | 163 | void test003vaarg_valist(char *a, char *b, va_list ap) 164 | { 165 | if (va_arg(ap, char *)[0] != a[0]) 166 | printf("[ERROR] test003:10: va_arg(ap, char *)[0] != a[0]\n"); 167 | } 168 | 169 | void test003vaarg(char *a, char *b, ...) 170 | { 171 | va_list args; 172 | va_start(args, b); 173 | test003vaarg_valist(a, b, args); 174 | va_end(args); 175 | } 176 | 177 | int test003() 178 | { 179 | test003allcorrect_va_arg(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7); 180 | test003charp('a', "a"); 181 | test003vaarg("a", "b", "a"); 182 | } 183 | 184 | int main() 185 | { 186 | test001(); 187 | test002(); 188 | test003(); 189 | } 190 | -------------------------------------------------------------------------------- /as/stdlib.c: -------------------------------------------------------------------------------- 1 | #include "as.h" 2 | 3 | int isdigit(int c) { return '0' <= c && c <= '9'; } 4 | 5 | int isalpha(int c) { return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'); } 6 | 7 | int isalnum(int c) { return isdigit(c) || isalpha(c); } 8 | 9 | int isspace(int c) 10 | { 11 | switch (c) { 12 | case ' ': 13 | case '\f': 14 | case '\n': 15 | case '\r': 16 | case '\t': 17 | case '\v': 18 | return 1; 19 | } 20 | return 0; 21 | } 22 | 23 | void *memcpy(void *dst, const void *src, int n) 24 | { 25 | for (int i = 0; i < n; i++) *((char *)dst + i) = *((char *)src + i); 26 | return dst; 27 | } 28 | 29 | char *strcpy(char *dst, const char *src) 30 | { 31 | char *ret = dst; 32 | while (*src != '\0') *dst++ = *src++; 33 | *dst = '\0'; 34 | return ret; 35 | } 36 | 37 | int strcmp(const char *s1, const char *s2) 38 | { 39 | while (*s1 != '\0' && *s1 == *s2) s1++, s2++; 40 | return (*s1 & 0xff) - (*s2 & 0xff); 41 | } 42 | 43 | int strlen(const char *s) 44 | { 45 | int cnt = 0; 46 | while (*s++ != '\0') cnt++; 47 | return cnt; 48 | } 49 | 50 | void *memset(void *s, int c, int n) 51 | { 52 | for (int i = 0; i < n; i++) *((char *)s + i) = c; 53 | return s; 54 | } 55 | 56 | int vsprintf(char *str, const char *format, va_list ap) 57 | { 58 | const char *p = format, *org_str = str; 59 | while (*p != '\0') { 60 | if (*p != '%') { 61 | *str++ = *p++; 62 | continue; 63 | } 64 | 65 | p++; 66 | switch (*p++) { 67 | case '\0': 68 | goto end; 69 | 70 | case 'c': 71 | *str++ = va_arg(ap, int); 72 | break; 73 | 74 | case 's': { 75 | char *src = va_arg(ap, char *); 76 | while (*src != '\0') *str++ = *src++; 77 | } break; 78 | 79 | case 'd': { 80 | int ival = va_arg(ap, int); 81 | 82 | if (ival == 0) { 83 | *str++ = '0'; 84 | break; 85 | } 86 | 87 | if (ival < 0) { 88 | *str++ = '-'; 89 | ival *= -1; 90 | } 91 | 92 | int i = 0, buf[256]; // TODO: enough length? 93 | for (; ival != 0; ival /= 10) buf[i++] = ival % 10; 94 | while (--i >= 0) *str++ = '0' + buf[i]; 95 | } break; 96 | 97 | default: 98 | assert(0); 99 | } 100 | } 101 | 102 | end: 103 | *str = '\0'; 104 | 105 | return str - org_str; 106 | } 107 | 108 | void *syscall(int number, ...); 109 | 110 | _Noreturn void exit(int status) 111 | { 112 | // __NR_exit 113 | syscall(60, status); 114 | } 115 | 116 | void *brk(void *addr) 117 | { 118 | // __NR_brk 119 | // printf("initbrk %d\n", addr); 120 | return syscall(12, addr); 121 | } 122 | 123 | void *malloc(int size) 124 | { 125 | static char *malloc_pointer_head = 0; 126 | static int malloc_remaining_size = 0; 127 | 128 | if (malloc_pointer_head == 0) { 129 | char *p = brk(0); 130 | int size = 0x32000000; 131 | brk(p + size); 132 | malloc_pointer_head = p; 133 | malloc_remaining_size = size; 134 | } 135 | 136 | if (malloc_remaining_size < size) { 137 | printf("BUG%d\n", malloc_remaining_size); 138 | printf("BUG%d\n", size); 139 | return NULL; 140 | } 141 | 142 | char *ret = malloc_pointer_head + 4; 143 | malloc_pointer_head += size + 4; 144 | malloc_remaining_size -= size + 4; 145 | return ret; 146 | } 147 | 148 | int open(const char *path, int oflag, int mode) 149 | { 150 | return (int)syscall(2, path, oflag, mode); 151 | } 152 | 153 | int close(int fd) { return (int)syscall(3, fd); } 154 | 155 | struct _IO_FILE { 156 | int fd; 157 | }; 158 | 159 | int write(int fd, const void *buf, int count) 160 | { 161 | return (int)syscall(1, fd, buf, count); 162 | } 163 | 164 | int read(int fd, const void *buf, int count) 165 | { 166 | return (int)syscall(0, fd, buf, count); 167 | } 168 | 169 | FILE *fopen(const char *pathname, const char *mode) 170 | { 171 | if (mode[0] == 'w') { 172 | FILE *file = (FILE *)malloc(sizeof(FILE)); 173 | // O_CREAT | O_WRONLY | O_TRUNC 174 | file->fd = open(pathname, 64 | 1 | 512, 0644); 175 | if (file->fd == -1) return NULL; 176 | return file; 177 | } 178 | 179 | if (mode[0] == 'r') { 180 | FILE *file = (FILE *)malloc(sizeof(FILE)); 181 | // O_RDONLY 182 | file->fd = open(pathname, 0, 0); 183 | if (file->fd == -1) return NULL; 184 | return file; 185 | } 186 | 187 | assert(0); 188 | } 189 | 190 | int fclose(FILE *stream) { return close(stream->fd); } 191 | 192 | int fputc(int c, FILE *stream) 193 | { 194 | char buf[1]; 195 | buf[0] = c & 0xff; 196 | return write(stream->fd, buf, 1); 197 | } 198 | 199 | int fgetc(FILE *stream) 200 | { 201 | char buf[1]; 202 | int res = read(stream->fd, buf, 1); 203 | if (res <= 0) return EOF; 204 | return buf[0] & 0xff; 205 | } 206 | 207 | int fprintf(FILE *stream, const char *format, ...) 208 | { 209 | char buf[512]; // TODO: enough length? 210 | va_list args; 211 | 212 | va_start(args, format); 213 | int cnt = vsprintf(buf, format, args); 214 | va_end(args); 215 | 216 | write(stream->fd, buf, cnt); 217 | return cnt; 218 | } 219 | 220 | int printf(const char *format, ...) 221 | { 222 | char buf[512]; // TODO: enough length? 223 | va_list args; 224 | 225 | va_start(args, format); 226 | int cnt = vsprintf(buf, format, args); 227 | va_end(args); 228 | 229 | write(1, buf, cnt); 230 | return cnt; 231 | } 232 | 233 | void assert(int cond) 234 | { 235 | if (cond) return; 236 | // fprintf(stderr, "[ASSERT] %d\n", cond); 237 | printf("[ASSERT] %d\n", cond); 238 | exit(EXIT_FAILURE); 239 | } 240 | -------------------------------------------------------------------------------- /cc/stdlib.c: -------------------------------------------------------------------------------- 1 | #include "cc.h" 2 | 3 | int isdigit(int c) { return '0' <= c && c <= '9'; } 4 | 5 | int isalpha(int c) { return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'); } 6 | 7 | int isalnum(int c) { return isdigit(c) || isalpha(c); } 8 | 9 | int isspace(int c) 10 | { 11 | switch (c) { 12 | case ' ': 13 | case '\f': 14 | case '\n': 15 | case '\r': 16 | case '\t': 17 | case '\v': 18 | return 1; 19 | } 20 | return 0; 21 | } 22 | 23 | void *memcpy(void *dst, const void *src, int n) 24 | { 25 | for (int i = 0; i < n; i++) *((char *)dst + i) = *((char *)src + i); 26 | return dst; 27 | } 28 | 29 | char *strcpy(char *dst, const char *src) 30 | { 31 | char *ret = dst; 32 | while (*src != '\0') *dst++ = *src++; 33 | *dst = '\0'; 34 | return ret; 35 | } 36 | 37 | int strcmp(const char *s1, const char *s2) 38 | { 39 | while (*s1 != '\0' && *s1 == *s2) s1++, s2++; 40 | return (*s1 & 0xff) - (*s2 & 0xff); 41 | } 42 | 43 | int strlen(const char *s) 44 | { 45 | int cnt = 0; 46 | while (*s++ != '\0') cnt++; 47 | return cnt; 48 | } 49 | 50 | void *memset(void *s, int c, int n) 51 | { 52 | for (int i = 0; i < n; i++) *((char *)s + i) = c; 53 | return s; 54 | } 55 | 56 | int vsprintf(char *str, const char *format, va_list ap) 57 | { 58 | const char *p = format, *org_str = str; 59 | while (*p != '\0') { 60 | if (*p != '%') { 61 | *str++ = *p++; 62 | continue; 63 | } 64 | 65 | p++; 66 | switch (*p++) { 67 | case '\0': 68 | goto end; 69 | 70 | case 'c': 71 | *str++ = va_arg(ap, int); 72 | break; 73 | 74 | case 's': { 75 | char *src = va_arg(ap, char *); 76 | while (*src != '\0') *str++ = *src++; 77 | } break; 78 | 79 | case 'd': { 80 | int ival = va_arg(ap, int); 81 | 82 | if (ival == 0) { 83 | *str++ = '0'; 84 | break; 85 | } 86 | 87 | if (ival < 0) { 88 | *str++ = '-'; 89 | ival *= -1; 90 | } 91 | 92 | int i = 0, buf[256]; // TODO: enough length? 93 | for (; ival != 0; ival /= 10) buf[i++] = ival % 10; 94 | while (--i >= 0) *str++ = '0' + buf[i]; 95 | } break; 96 | 97 | default: 98 | assert(0); 99 | } 100 | } 101 | 102 | end: 103 | *str = '\0'; 104 | 105 | return str - org_str; 106 | } 107 | 108 | void *syscall(int number, ...); 109 | 110 | _Noreturn void exit(int status) 111 | { 112 | // __NR_exit 113 | syscall(60, status); 114 | } 115 | 116 | void *brk(void *addr) 117 | { 118 | // __NR_brk 119 | // printf("initbrk %d\n", addr); 120 | return syscall(12, addr); 121 | } 122 | 123 | void *malloc(int size) 124 | { 125 | static char *malloc_pointer_head = 0; 126 | static int malloc_remaining_size = 0; 127 | 128 | if (malloc_pointer_head == 0) { 129 | char *p = brk(0); 130 | int size = 0x32000000; 131 | brk(p + size); 132 | malloc_pointer_head = p; 133 | malloc_remaining_size = size; 134 | } 135 | 136 | if (malloc_remaining_size < size) { 137 | printf("BUG%d\n", malloc_remaining_size); 138 | printf("BUG%d\n", size); 139 | return NULL; 140 | } 141 | 142 | char *ret = malloc_pointer_head + 4; 143 | malloc_pointer_head += size + 4; 144 | malloc_remaining_size -= size + 4; 145 | return ret; 146 | } 147 | 148 | int open(const char *path, int oflag, int mode) 149 | { 150 | return (int)syscall(2, path, oflag, mode); 151 | } 152 | 153 | int close(int fd) { return (int)syscall(3, fd); } 154 | 155 | struct _IO_FILE { 156 | int fd; 157 | }; 158 | 159 | int write(int fd, const void *buf, int count) 160 | { 161 | return (int)syscall(1, fd, buf, count); 162 | } 163 | 164 | int read(int fd, const void *buf, int count) 165 | { 166 | return (int)syscall(0, fd, buf, count); 167 | } 168 | 169 | FILE *fopen(const char *pathname, const char *mode) 170 | { 171 | if (mode[0] == 'w') { 172 | FILE *file = (FILE *)malloc(sizeof(FILE)); 173 | // O_CREAT | O_WRONLY | O_TRUNC 174 | file->fd = open(pathname, 64 | 1 | 512, 0644); 175 | if (file->fd == -1) return NULL; 176 | return file; 177 | } 178 | 179 | if (mode[0] == 'r') { 180 | FILE *file = (FILE *)malloc(sizeof(FILE)); 181 | // O_RDONLY 182 | file->fd = open(pathname, 0, 0); 183 | if (file->fd == -1) return NULL; 184 | return file; 185 | } 186 | 187 | assert(0); 188 | } 189 | 190 | int fclose(FILE *stream) { return close(stream->fd); } 191 | 192 | int fputc(int c, FILE *stream) 193 | { 194 | char buf[1]; 195 | buf[0] = c & 0xff; 196 | return write(stream->fd, buf, 1); 197 | } 198 | 199 | int fgetc(FILE *stream) 200 | { 201 | char buf[1]; 202 | int res = read(stream->fd, buf, 1); 203 | if (res <= 0) return EOF; 204 | return buf[0] & 0xff; 205 | } 206 | 207 | int fprintf(FILE *stream, const char *format, ...) 208 | { 209 | char buf[512]; // TODO: enough length? 210 | va_list args; 211 | 212 | va_start(args, format); 213 | int cnt = vsprintf(buf, format, args); 214 | va_end(args); 215 | 216 | write(stream->fd, buf, cnt); 217 | return cnt; 218 | } 219 | 220 | int printf(const char *format, ...) 221 | { 222 | char buf[512]; // TODO: enough length? 223 | va_list args; 224 | 225 | va_start(args, format); 226 | int cnt = vsprintf(buf, format, args); 227 | va_end(args); 228 | 229 | write(1, buf, cnt); 230 | return cnt; 231 | } 232 | 233 | void assert(int cond) 234 | { 235 | if (cond) return; 236 | // fprintf(stderr, "[ASSERT] %d\n", cond); 237 | printf("[ASSERT] %d\n", cond); 238 | exit(EXIT_FAILURE); 239 | } 240 | -------------------------------------------------------------------------------- /ld/stdlib.c: -------------------------------------------------------------------------------- 1 | #include "ld.h" 2 | 3 | int isdigit(int c) { return '0' <= c && c <= '9'; } 4 | 5 | int isalpha(int c) { return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'); } 6 | 7 | int isalnum(int c) { return isdigit(c) || isalpha(c); } 8 | 9 | int isspace(int c) 10 | { 11 | switch (c) { 12 | case ' ': 13 | case '\f': 14 | case '\n': 15 | case '\r': 16 | case '\t': 17 | case '\v': 18 | return 1; 19 | } 20 | return 0; 21 | } 22 | 23 | void *memcpy(void *dst, const void *src, int n) 24 | { 25 | for (int i = 0; i < n; i++) *((char *)dst + i) = *((char *)src + i); 26 | return dst; 27 | } 28 | 29 | char *strcpy(char *dst, const char *src) 30 | { 31 | char *ret = dst; 32 | while (*src != '\0') *dst++ = *src++; 33 | *dst = '\0'; 34 | return ret; 35 | } 36 | 37 | int strcmp(const char *s1, const char *s2) 38 | { 39 | while (*s1 != '\0' && *s1 == *s2) s1++, s2++; 40 | return (*s1 & 0xff) - (*s2 & 0xff); 41 | } 42 | 43 | int strlen(const char *s) 44 | { 45 | int cnt = 0; 46 | while (*s++ != '\0') cnt++; 47 | return cnt; 48 | } 49 | 50 | void *memset(void *s, int c, int n) 51 | { 52 | for (int i = 0; i < n; i++) *((char *)s + i) = c; 53 | return s; 54 | } 55 | 56 | int vsprintf(char *str, const char *format, va_list ap) 57 | { 58 | const char *p = format, *org_str = str; 59 | while (*p != '\0') { 60 | if (*p != '%') { 61 | *str++ = *p++; 62 | continue; 63 | } 64 | 65 | p++; 66 | switch (*p++) { 67 | case '\0': 68 | goto end; 69 | 70 | case 'c': 71 | *str++ = va_arg(ap, int); 72 | break; 73 | 74 | case 's': { 75 | char *src = va_arg(ap, char *); 76 | while (*src != '\0') *str++ = *src++; 77 | } break; 78 | 79 | case 'd': { 80 | int ival = va_arg(ap, int); 81 | 82 | if (ival == 0) { 83 | *str++ = '0'; 84 | break; 85 | } 86 | 87 | if (ival < 0) { 88 | *str++ = '-'; 89 | ival *= -1; 90 | } 91 | 92 | int i = 0, buf[256]; // TODO: enough length? 93 | for (; ival != 0; ival /= 10) buf[i++] = ival % 10; 94 | while (--i >= 0) *str++ = '0' + buf[i]; 95 | } break; 96 | 97 | default: 98 | assert(0); 99 | } 100 | } 101 | 102 | end: 103 | *str = '\0'; 104 | 105 | return str - org_str; 106 | } 107 | 108 | void *syscall(int number, ...); 109 | 110 | _Noreturn void exit(int status) 111 | { 112 | // __NR_exit 113 | syscall(60, status); 114 | } 115 | 116 | void *brk(void *addr) 117 | { 118 | // __NR_brk 119 | // printf("initbrk %d\n", addr); 120 | return syscall(12, addr); 121 | } 122 | 123 | void *malloc(int size) 124 | { 125 | static char *malloc_pointer_head = 0; 126 | static int malloc_remaining_size = 0; 127 | 128 | if (malloc_pointer_head == 0) { 129 | char *p = brk(0); 130 | int size = 0x32000000; 131 | brk(p + size); 132 | malloc_pointer_head = p; 133 | malloc_remaining_size = size; 134 | } 135 | 136 | if (malloc_remaining_size < size) { 137 | printf("BUG%d\n", malloc_remaining_size); 138 | printf("BUG%d\n", size); 139 | return NULL; 140 | } 141 | 142 | char *ret = malloc_pointer_head + 4; 143 | malloc_pointer_head += size + 4; 144 | malloc_remaining_size -= size + 4; 145 | return ret; 146 | } 147 | 148 | int open(const char *path, int oflag, int mode) 149 | { 150 | return (int)syscall(2, path, oflag, mode); 151 | } 152 | 153 | int close(int fd) { return (int)syscall(3, fd); } 154 | 155 | struct _IO_FILE { 156 | int fd; 157 | }; 158 | 159 | int write(int fd, const void *buf, int count) 160 | { 161 | return (int)syscall(1, fd, buf, count); 162 | } 163 | 164 | int read(int fd, const void *buf, int count) 165 | { 166 | return (int)syscall(0, fd, buf, count); 167 | } 168 | 169 | FILE *fopen(const char *pathname, const char *mode) 170 | { 171 | if (mode[0] == 'w') { 172 | FILE *file = (FILE *)malloc(sizeof(FILE)); 173 | // O_CREAT | O_WRONLY | O_TRUNC 174 | file->fd = open(pathname, 64 | 1 | 512, 0644); 175 | if (file->fd == -1) return NULL; 176 | return file; 177 | } 178 | 179 | if (mode[0] == 'r') { 180 | FILE *file = (FILE *)malloc(sizeof(FILE)); 181 | // O_RDONLY 182 | file->fd = open(pathname, 0, 0); 183 | if (file->fd == -1) return NULL; 184 | return file; 185 | } 186 | 187 | assert(0); 188 | } 189 | 190 | int fclose(FILE *stream) { return close(stream->fd); } 191 | 192 | int fputc(int c, FILE *stream) 193 | { 194 | char buf[1]; 195 | buf[0] = c & 0xff; 196 | return write(stream->fd, buf, 1); 197 | } 198 | 199 | int fgetc(FILE *stream) 200 | { 201 | char buf[1]; 202 | int res = read(stream->fd, buf, 1); 203 | if (res <= 0) return EOF; 204 | return buf[0] & 0xff; 205 | } 206 | 207 | int fprintf(FILE *stream, const char *format, ...) 208 | { 209 | char buf[512]; // TODO: enough length? 210 | va_list args; 211 | 212 | va_start(args, format); 213 | int cnt = vsprintf(buf, format, args); 214 | va_end(args); 215 | 216 | write(stream->fd, buf, cnt); 217 | return cnt; 218 | } 219 | 220 | int printf(const char *format, ...) 221 | { 222 | char buf[512]; // TODO: enough length? 223 | va_list args; 224 | 225 | va_start(args, format); 226 | int cnt = vsprintf(buf, format, args); 227 | va_end(args); 228 | 229 | write(1, buf, cnt); 230 | return cnt; 231 | } 232 | 233 | void assert(int cond) 234 | { 235 | if (cond) return; 236 | // fprintf(stderr, "[ASSERT] %d\n", cond); 237 | printf("[ASSERT] %d\n", cond); 238 | exit(EXIT_FAILURE); 239 | } 240 | -------------------------------------------------------------------------------- /test/stdlib.c: -------------------------------------------------------------------------------- 1 | #ifdef __GNUC__ 2 | typedef __builtin_va_list va_list; 3 | #else 4 | #endif 5 | #ifndef __GNUC__ 6 | typedef struct { 7 | int gp_offset; 8 | int fp_offset; 9 | void *overflow_arg_area; 10 | void *reg_save_area; 11 | } va_list[1]; 12 | #endif 13 | #define va_start __builtin_va_start 14 | #define va_end __builtin_va_end 15 | #define va_arg __builtin_va_arg 16 | 17 | typedef struct _IO_FILE FILE; 18 | // extern FILE *stdin; /* Standard input stream. */ 19 | // extern FILE *stdout; /* Standard output stream. */ 20 | // extern FILE *stderr; /* Standard error output stream. */ 21 | #define NULL 0 22 | #define EOF (-1) 23 | FILE *fopen(const char *pathname, const char *mode); 24 | int fclose(FILE *stream); 25 | int fputc(int c, FILE *stream); 26 | int fgetc(FILE *stream); 27 | int fprintf(FILE *stream, const char *format, ...); 28 | int printf(const char *format, ...); 29 | int vsprintf(char *str, const char *format, va_list ap); 30 | #define EXIT_FAILURE 1 /* Failing exit status. */ 31 | #define EXIT_SUCCESS 0 /* Successful exit status. */ 32 | _Noreturn void exit(int status); 33 | void *malloc(int size); 34 | int strlen(const char *s); 35 | int strcmp(const char *s1, const char *s2); 36 | char *strcpy(char *dest, const char *src); 37 | int isalpha(int c); 38 | int isalnum(int c); 39 | int isdigit(int c); 40 | int isspace(int c); 41 | void *memcpy(void *dest, const void *src, int n); 42 | void *memset(void *s, int c, int n); 43 | void assert(int cond); 44 | 45 | int isdigit(int c) { return '0' <= c && c <= '9'; } 46 | 47 | int isalpha(int c) { return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'); } 48 | 49 | int isalnum(int c) { return isdigit(c) || isalpha(c); } 50 | 51 | int isspace(int c) 52 | { 53 | switch (c) { 54 | case ' ': 55 | case '\f': 56 | case '\n': 57 | case '\r': 58 | case '\t': 59 | case '\v': 60 | return 1; 61 | } 62 | return 0; 63 | } 64 | 65 | void *memcpy(void *dst, const void *src, int n) 66 | { 67 | for (int i = 0; i < n; i++) *((char *)dst + i) = *((char *)src + i); 68 | return dst; 69 | } 70 | 71 | char *strcpy(char *dst, const char *src) 72 | { 73 | char *ret = dst; 74 | while (*src != '\0') *dst++ = *src++; 75 | *dst = '\0'; 76 | return ret; 77 | } 78 | 79 | int strcmp(const char *s1, const char *s2) 80 | { 81 | while (*s1 != '\0' && *s1 == *s2) s1++, s2++; 82 | return (*s1 & 0xff) - (*s2 & 0xff); 83 | } 84 | 85 | int strlen(const char *s) 86 | { 87 | int cnt = 0; 88 | while (*s++ != '\0') cnt++; 89 | return cnt; 90 | } 91 | 92 | void *memset(void *s, int c, int n) 93 | { 94 | for (int i = 0; i < n; i++) *((char *)s + i) = c; 95 | return s; 96 | } 97 | 98 | int vsprintf(char *str, const char *format, va_list ap) 99 | { 100 | const char *p = format, *org_str = str; 101 | while (*p != '\0') { 102 | if (*p != '%') { 103 | *str++ = *p++; 104 | continue; 105 | } 106 | 107 | p++; 108 | switch (*p++) { 109 | case '\0': 110 | goto end; 111 | 112 | case 'c': 113 | *str++ = va_arg(ap, int); 114 | break; 115 | 116 | case 's': { 117 | char *src = va_arg(ap, char *); 118 | while (*src != '\0') *str++ = *src++; 119 | } break; 120 | 121 | case 'd': { 122 | int ival = va_arg(ap, int); 123 | 124 | if (ival == 0) { 125 | *str++ = '0'; 126 | break; 127 | } 128 | 129 | if (ival < 0) { 130 | *str++ = '-'; 131 | ival *= -1; 132 | } 133 | 134 | int i = 0, buf[256]; // TODO: enough length? 135 | for (; ival != 0; ival /= 10) buf[i++] = ival % 10; 136 | while (--i >= 0) *str++ = '0' + buf[i]; 137 | } break; 138 | 139 | default: 140 | assert(0); 141 | } 142 | } 143 | 144 | end: 145 | *str = '\0'; 146 | 147 | return str - org_str; 148 | } 149 | 150 | void *syscall(int number, ...); 151 | 152 | _Noreturn void exit(int status) 153 | { 154 | // __NR_exit 155 | syscall(60, status); 156 | } 157 | 158 | void *brk(void *addr) 159 | { 160 | // __NR_brk 161 | // printf("initbrk %d\n", addr); 162 | return syscall(12, addr); 163 | } 164 | 165 | void *malloc(int size) 166 | { 167 | static char *malloc_pointer_head = 0; 168 | static int malloc_remaining_size = 0; 169 | 170 | if (malloc_pointer_head == 0) { 171 | char *p = brk(0); 172 | int size = 0x32000000; 173 | char *q = brk(p + size); 174 | // printf("init %d\n", p); 175 | // printf("init %d\n", q); 176 | malloc_pointer_head = p; 177 | malloc_remaining_size = size; 178 | } 179 | 180 | if (malloc_remaining_size < size) { 181 | printf("BUG%d\n", malloc_remaining_size); 182 | printf("BUG%d\n", size); 183 | return NULL; 184 | } 185 | 186 | char *ret = malloc_pointer_head + 4; 187 | malloc_pointer_head += size + 4; 188 | malloc_remaining_size -= size + 4; 189 | 190 | // printf("%d\n", malloc_remaining_size); 191 | // printf("%d\n", size); 192 | // printf("%d\n", ret); 193 | // printf("%d\n", ret + size); 194 | return ret; 195 | } 196 | 197 | int open(const char *path, int oflag, int mode) 198 | { 199 | return (int)syscall(2, path, oflag, mode); 200 | } 201 | 202 | int close(int fd) { return (int)syscall(3, fd); } 203 | 204 | struct _IO_FILE { 205 | int fd; 206 | }; 207 | 208 | int write(int fd, const void *buf, int count) 209 | { 210 | return (int)syscall(1, fd, buf, count); 211 | } 212 | 213 | int read(int fd, const void *buf, int count) 214 | { 215 | return (int)syscall(0, fd, buf, count); 216 | } 217 | 218 | FILE *fopen(const char *pathname, const char *mode) 219 | { 220 | if (mode[0] == 'w') { 221 | FILE *file = (FILE *)malloc(sizeof(FILE)); 222 | // O_CREAT | O_WRONLY | O_TRUNC 223 | file->fd = open(pathname, 64 | 1 | 512, 0644); 224 | if (file->fd == -1) return NULL; 225 | return file; 226 | } 227 | 228 | if (mode[0] == 'r') { 229 | FILE *file = (FILE *)malloc(sizeof(FILE)); 230 | // O_RDONLY 231 | file->fd = open(pathname, 0, 0); 232 | if (file->fd == -1) return NULL; 233 | return file; 234 | } 235 | 236 | assert(0); 237 | } 238 | 239 | int fclose(FILE *stream) { return close(stream->fd); } 240 | 241 | int fputc(int c, FILE *stream) 242 | { 243 | char buf[1]; 244 | buf[0] = c & 0xff; 245 | return write(stream->fd, buf, 1); 246 | } 247 | 248 | int fgetc(FILE *stream) 249 | { 250 | char buf[1]; 251 | int res = read(stream->fd, buf, 1); 252 | if (res <= 0) return EOF; 253 | return buf[0] & 0xff; 254 | } 255 | 256 | int fprintf(FILE *stream, const char *format, ...) 257 | { 258 | char buf[512]; // TODO: enough length? 259 | va_list args; 260 | 261 | va_start(args, format); 262 | int cnt = vsprintf(buf, format, args); 263 | va_end(args); 264 | 265 | write(stream->fd, buf, cnt); 266 | return cnt; 267 | } 268 | 269 | int printf(const char *format, ...) 270 | { 271 | char buf[512]; // TODO: enough length? 272 | va_list args; 273 | 274 | va_start(args, format); 275 | int cnt = vsprintf(buf, format, args); 276 | va_end(args); 277 | 278 | write(1, buf, cnt); 279 | return cnt; 280 | } 281 | 282 | void assert(int cond) 283 | { 284 | if (cond) return; 285 | // fprintf(stderr, "[ASSERT] %d\n", cond); 286 | printf("[ASSERT] %d\n", cond); 287 | exit(EXIT_FAILURE); 288 | } 289 | -------------------------------------------------------------------------------- /as/utility.c: -------------------------------------------------------------------------------- 1 | #include "as.h" 2 | 3 | _Noreturn void error(const char *msg, ...) 4 | { 5 | va_list args; 6 | va_start(args, msg); 7 | char *str = vformat(msg, args); 8 | va_end(args); 9 | 10 | // fprintf(stderr, "[ERROR] %s\n", str); 11 | printf("[ERROR] %s\n", str); 12 | // fprintf(stderr, "[DEBUG] %s, %d\n", __FILE__, __LINE__); 13 | exit(EXIT_FAILURE); 14 | } 15 | 16 | void warn(const char *msg, ...) 17 | { 18 | va_list args; 19 | va_start(args, msg); 20 | char *str = vformat(msg, args); 21 | va_end(args); 22 | 23 | // fprintf(stderr, "[WARN] %s\n", str); 24 | printf("[WARN] %s\n", str); 25 | // fprintf(stderr, "[DEBUG] %s, %d\n", __FILE__, __LINE__); 26 | } 27 | 28 | void *safe_malloc(int size) 29 | { 30 | void *ptr; 31 | 32 | ptr = malloc(size); 33 | if (ptr == NULL) error("malloc failed."); 34 | return ptr; 35 | } 36 | 37 | char *new_str(const char *src) 38 | { 39 | char *ret = safe_malloc(strlen(src) + 1); 40 | strcpy(ret, src); 41 | return ret; 42 | } 43 | 44 | int *new_int(int src) 45 | { 46 | int *ret = safe_malloc(sizeof(int)); 47 | *ret = src; 48 | return ret; 49 | } 50 | 51 | char *vformat(const char *src, va_list ap) 52 | { 53 | char buf[512]; // TODO: enough length? 54 | vsprintf(buf, src, ap); 55 | 56 | char *ret = safe_malloc(strlen(buf) + 1); 57 | strcpy(ret, buf); 58 | return ret; 59 | } 60 | 61 | char *format(const char *src, ...) 62 | { 63 | va_list args; 64 | va_start(args, src); 65 | char *ret = vformat(src, args); 66 | va_end(args); 67 | return ret; 68 | } 69 | 70 | int unescape_char(int src) 71 | { 72 | static int table[128]; 73 | if (table[0] == 0) { 74 | memset(table, 255, sizeof(table)); 75 | 76 | table['n'] = '\n'; 77 | table['r'] = '\r'; 78 | table['t'] = '\t'; 79 | table['0'] = '\0'; 80 | table['a'] = '\a'; 81 | table['b'] = '\b'; 82 | table['v'] = '\v'; 83 | table['f'] = '\f'; 84 | } 85 | 86 | int ch = table[src]; 87 | return ch == -1 ? src : ch; 88 | } 89 | 90 | char *escape_string(char *str, int size) 91 | { 92 | StringBuilder *sb = new_string_builder(); 93 | for (int i = 0; i < size; i++) { 94 | char ch = str[i]; 95 | 96 | switch (ch) { 97 | case '\n': 98 | string_builder_append(sb, '\\'); 99 | string_builder_append(sb, 'n'); 100 | break; 101 | 102 | case '\r': 103 | string_builder_append(sb, '\\'); 104 | string_builder_append(sb, 'n'); 105 | break; 106 | 107 | case '\t': 108 | string_builder_append(sb, '\\'); 109 | string_builder_append(sb, 't'); 110 | break; 111 | 112 | case '\0': 113 | string_builder_append(sb, '\\'); 114 | string_builder_append(sb, '0'); 115 | break; 116 | 117 | case '\a': 118 | string_builder_append(sb, '\\'); 119 | string_builder_append(sb, 'a'); 120 | break; 121 | 122 | case '\b': 123 | string_builder_append(sb, '\\'); 124 | string_builder_append(sb, 'b'); 125 | break; 126 | 127 | case '\v': 128 | string_builder_append(sb, '\\'); 129 | string_builder_append(sb, 'v'); 130 | break; 131 | 132 | case '\f': 133 | string_builder_append(sb, '\\'); 134 | string_builder_append(sb, 'f'); 135 | break; 136 | 137 | case '"': 138 | string_builder_append(sb, '\\'); 139 | string_builder_append(sb, '"'); 140 | break; 141 | 142 | default: 143 | string_builder_append(sb, ch); 144 | break; 145 | } 146 | } 147 | 148 | return string_builder_get(sb); 149 | } 150 | 151 | int min(int a, int b) { return a < b ? a : b; } 152 | 153 | int max(int a, int b) { return a < b ? b : a; } 154 | 155 | int roundup(int n, int b) { return (n + b - 1) & ~(b - 1); } 156 | 157 | char *read_entire_file(char *filepath) 158 | { 159 | FILE *fh = fopen(filepath, "r"); 160 | if (fh == NULL) error("no such file: '%s'", filepath); 161 | 162 | // read the file all 163 | StringBuilder *sb = new_string_builder(); 164 | int ch; 165 | while ((ch = fgetc(fh)) != EOF) string_builder_append(sb, ch); 166 | return string_builder_get(sb); 167 | 168 | fclose(fh); 169 | } 170 | 171 | int is_register_code(Code *code) 172 | { 173 | if (code == NULL) return 0; 174 | return !(code->kind & INST_) && 175 | code->kind & (REG_8 | REG_16 | REG_32 | REG_64); 176 | } 177 | 178 | int reg_of_nbyte(int nbyte, int reg) 179 | { 180 | switch (nbyte) { 181 | case 1: 182 | return (reg & 31) | REG_8; 183 | case 2: 184 | return (reg & 31) | REG_16; 185 | case 4: 186 | return (reg & 31) | REG_32; 187 | case 8: 188 | return (reg & 31) | REG_64; 189 | } 190 | assert(0); 191 | } 192 | 193 | Code *nbyte_reg(int nbyte, int reg) 194 | { 195 | return new_code(reg_of_nbyte(nbyte, reg)); 196 | } 197 | 198 | Code *str2reg(char *src) 199 | { 200 | Map *map = new_map(); 201 | 202 | map_insert(map, "%al", nbyte_reg(1, 0)); 203 | map_insert(map, "%dil", nbyte_reg(1, 1)); 204 | map_insert(map, "%sil", nbyte_reg(1, 2)); 205 | map_insert(map, "%dl", nbyte_reg(1, 3)); 206 | map_insert(map, "%cl", nbyte_reg(1, 4)); 207 | map_insert(map, "%r8b", nbyte_reg(1, 5)); 208 | map_insert(map, "%r9b", nbyte_reg(1, 6)); 209 | map_insert(map, "%r10b", nbyte_reg(1, 7)); 210 | map_insert(map, "%r11b", nbyte_reg(1, 8)); 211 | map_insert(map, "%r12b", nbyte_reg(1, 9)); 212 | map_insert(map, "%r13b", nbyte_reg(1, 10)); 213 | map_insert(map, "%r14b", nbyte_reg(1, 11)); 214 | map_insert(map, "%r15b", nbyte_reg(1, 12)); 215 | 216 | map_insert(map, "%ax", nbyte_reg(2, 0)); 217 | map_insert(map, "%di", nbyte_reg(2, 1)); 218 | map_insert(map, "%si", nbyte_reg(2, 2)); 219 | map_insert(map, "%dx", nbyte_reg(2, 3)); 220 | map_insert(map, "%cx", nbyte_reg(2, 4)); 221 | map_insert(map, "%r8w", nbyte_reg(2, 5)); 222 | map_insert(map, "%r9w", nbyte_reg(2, 6)); 223 | map_insert(map, "%r10w", nbyte_reg(2, 7)); 224 | map_insert(map, "%r11w", nbyte_reg(2, 8)); 225 | map_insert(map, "%r12w", nbyte_reg(2, 9)); 226 | map_insert(map, "%r13w", nbyte_reg(2, 10)); 227 | map_insert(map, "%r14w", nbyte_reg(2, 11)); 228 | map_insert(map, "%r15w", nbyte_reg(2, 12)); 229 | 230 | map_insert(map, "%eax", nbyte_reg(4, 0)); 231 | map_insert(map, "%edi", nbyte_reg(4, 1)); 232 | map_insert(map, "%esi", nbyte_reg(4, 2)); 233 | map_insert(map, "%edx", nbyte_reg(4, 3)); 234 | map_insert(map, "%ecx", nbyte_reg(4, 4)); 235 | map_insert(map, "%r8d", nbyte_reg(4, 5)); 236 | map_insert(map, "%r9d", nbyte_reg(4, 6)); 237 | map_insert(map, "%r10d", nbyte_reg(4, 7)); 238 | map_insert(map, "%r11d", nbyte_reg(4, 8)); 239 | map_insert(map, "%r12d", nbyte_reg(4, 9)); 240 | map_insert(map, "%r13d", nbyte_reg(4, 10)); 241 | map_insert(map, "%r14d", nbyte_reg(4, 11)); 242 | map_insert(map, "%r15d", nbyte_reg(4, 12)); 243 | 244 | map_insert(map, "%rax", nbyte_reg(8, 0)); 245 | map_insert(map, "%rdi", nbyte_reg(8, 1)); 246 | map_insert(map, "%rsi", nbyte_reg(8, 2)); 247 | map_insert(map, "%rdx", nbyte_reg(8, 3)); 248 | map_insert(map, "%rcx", nbyte_reg(8, 4)); 249 | map_insert(map, "%r8", nbyte_reg(8, 5)); 250 | map_insert(map, "%r9", nbyte_reg(8, 6)); 251 | map_insert(map, "%r10", nbyte_reg(8, 7)); 252 | map_insert(map, "%r11", nbyte_reg(8, 8)); 253 | map_insert(map, "%r12", nbyte_reg(8, 9)); 254 | map_insert(map, "%r13", nbyte_reg(8, 10)); 255 | map_insert(map, "%r14", nbyte_reg(8, 11)); 256 | map_insert(map, "%r15", nbyte_reg(8, 12)); 257 | 258 | map_insert(map, "%rip", RIP()); 259 | map_insert(map, "%rbp", RBP()); 260 | map_insert(map, "%rsp", RSP()); 261 | 262 | KeyValue *kv = map_lookup(map, src); 263 | assert(kv != NULL); 264 | return kv_value(kv); 265 | } 266 | -------------------------------------------------------------------------------- /as/as.h: -------------------------------------------------------------------------------- 1 | #ifndef AQCC_AQCC_H 2 | #define AQCC_AQCC_H 3 | 4 | //#include 5 | //#include 6 | //#include 7 | //#include 8 | //#include 9 | //#include 10 | 11 | #ifdef __GNUC__ 12 | typedef __builtin_va_list va_list; 13 | #else 14 | #endif 15 | #ifndef __GNUC__ 16 | typedef struct { 17 | int gp_offset; 18 | int fp_offset; 19 | void *overflow_arg_area; 20 | void *reg_save_area; 21 | } va_list[1]; 22 | #endif 23 | #define va_start __builtin_va_start 24 | #define va_end __builtin_va_end 25 | #define va_arg __builtin_va_arg 26 | 27 | typedef struct _IO_FILE FILE; 28 | // extern FILE *stdin; /* Standard input stream. */ 29 | // extern FILE *stdout; /* Standard output stream. */ 30 | // extern FILE *stderr; /* Standard error output stream. */ 31 | #define NULL 0 32 | #define EOF (-1) 33 | FILE *fopen(const char *pathname, const char *mode); 34 | int fclose(FILE *stream); 35 | int fputc(int c, FILE *stream); 36 | int fgetc(FILE *stream); 37 | int fprintf(FILE *stream, const char *format, ...); 38 | int printf(const char *format, ...); 39 | int vsprintf(char *str, const char *format, va_list ap); 40 | #define EXIT_FAILURE 1 /* Failing exit status. */ 41 | #define EXIT_SUCCESS 0 /* Successful exit status. */ 42 | _Noreturn void exit(int status); 43 | void *malloc(int size); 44 | int strlen(const char *s); 45 | int strcmp(const char *s1, const char *s2); 46 | char *strcpy(char *dest, const char *src); 47 | int isalpha(int c); 48 | int isalnum(int c); 49 | int isdigit(int c); 50 | int isspace(int c); 51 | void *memcpy(void *dest, const void *src, int n); 52 | void *memset(void *s, int c, int n); 53 | void assert(int cond); 54 | 55 | // vector.c 56 | typedef struct Vector Vector; 57 | Vector *new_vector(); 58 | Vector *new_vector_from_scalar(void *scalar); 59 | void vector_push_back(Vector *vec, void *item); 60 | void *vector_get(Vector *vec, int i); 61 | int vector_size(Vector *vec); 62 | void *vector_set(Vector *vec, int i, void *item); 63 | void vector_push_back_vector(Vector *vec, Vector *src); 64 | Vector *clone_vector(Vector *src); 65 | 66 | // map.c 67 | typedef struct KeyValue KeyValue; 68 | typedef struct Map Map; 69 | Map *new_map(); 70 | int map_size(Map *map); 71 | KeyValue *map_insert(Map *map, const char *key, void *item); 72 | KeyValue *map_lookup(Map *map, const char *key); 73 | const char *kv_key(KeyValue *kv); 74 | void *kv_value(KeyValue *kv); 75 | 76 | // string_builder.c 77 | typedef struct StringBuilder StringBuilder; 78 | StringBuilder *new_string_builder(); 79 | char string_builder_append(StringBuilder *sb, char ch); 80 | char *string_builder_get(StringBuilder *sb); 81 | int string_builder_size(StringBuilder *sb); 82 | 83 | typedef struct { 84 | int line, column; 85 | Vector *line2length; 86 | char *src; 87 | // example: "/tmp/1.c" -> cwd: "/tmp/" 88 | char *cwd; // current working directory with '/' 89 | char *filepath; 90 | } Source; 91 | 92 | enum { 93 | REG_8 = 1 << 5, 94 | REG_AL = 0 | REG_8, 95 | REG_DIL, 96 | REG_SIL, 97 | REG_DL, 98 | REG_CL, 99 | REG_R8B, 100 | REG_R9B, 101 | REG_R10B, 102 | REG_R11B, 103 | REG_R12B, 104 | REG_R13B, 105 | REG_R14B, 106 | REG_R15B, 107 | REG_BPL, 108 | REG_SPL, 109 | 110 | REG_16 = 1 << 6, 111 | REG_AX = 0 | REG_16, 112 | REG_DI, 113 | REG_SI, 114 | REG_DX, 115 | REG_CX, 116 | REG_R8W, 117 | REG_R9W, 118 | REG_R10W, 119 | REG_R11W, 120 | REG_R12W, 121 | REG_R13W, 122 | REG_R14W, 123 | REG_R15W, 124 | REG_BP, 125 | REG_SP, 126 | 127 | REG_32 = 1 << 7, 128 | REG_EAX = 0 | REG_32, 129 | REG_EDI, 130 | REG_ESI, 131 | REG_EDX, 132 | REG_ECX, 133 | REG_R8D, 134 | REG_R9D, 135 | REG_R10D, 136 | REG_R11D, 137 | REG_R12D, 138 | REG_R13D, 139 | REG_R14D, 140 | REG_R15D, 141 | REG_EBP, 142 | REG_ESP, 143 | 144 | REG_64 = 1 << 8, 145 | REG_RAX = 0 | REG_64, 146 | REG_RDI, 147 | REG_RSI, 148 | REG_RDX, 149 | REG_RCX, 150 | REG_R8, 151 | REG_R9, 152 | REG_R10, 153 | REG_R11, 154 | REG_R12, 155 | REG_R13, 156 | REG_R14, 157 | REG_R15, 158 | REG_RBP, 159 | REG_RSP, 160 | 161 | REG_RIP, 162 | 163 | INST_ = 1 << 9, 164 | INST_MOV = 0 | INST_, 165 | INST_MOVL, 166 | INST_MOVSBL, 167 | INST_MOVSLQ, 168 | INST_MOVZB, 169 | INST_LEA, 170 | INST_PUSH, 171 | INST_POP, 172 | INST_ADD, 173 | INST_ADDQ, 174 | INST_SUB, 175 | INST_IMUL, 176 | INST_IDIV, 177 | INST_SAR, 178 | INST_SAL, 179 | INST_NEG, 180 | INST_NOT, 181 | INST_CMP, 182 | INST_SETL, 183 | INST_SETLE, 184 | INST_SETE, 185 | INST_AND, 186 | INST_XOR, 187 | INST_OR, 188 | INST_RET, 189 | INST_CLTD, 190 | INST_CLTQ, 191 | INST_JMP, 192 | INST_JE, 193 | INST_JNE, 194 | INST_JAE, 195 | INST_LABEL, 196 | INST_INCL, 197 | INST_INCQ, 198 | INST_DECL, 199 | INST_DECQ, 200 | INST_CALL, 201 | INST_NOP, 202 | INST_SYSCALL, 203 | 204 | CD_VALUE, 205 | CD_ADDR_OF, 206 | CD_ADDR_OF_LABEL, 207 | 208 | CD_GLOBAL, 209 | CD_TEXT, 210 | CD_DATA, 211 | CD_ZERO, 212 | CD_LONG, 213 | CD_BYTE, 214 | CD_QUAD, 215 | CD_ASCII, 216 | 217 | CD_COMMENT, 218 | 219 | MRK_BASIC_BLOCK_START, 220 | MRK_BASIC_BLOCK_END, 221 | MRK_FUNCDEF_START, 222 | MRK_FUNCDEF_END, 223 | MRK_FUNCDEF_RETURN, 224 | }; 225 | 226 | typedef struct Code Code; 227 | struct Code { 228 | int kind; 229 | 230 | Code *lhs, *rhs; 231 | int ival; 232 | char *sval; // size is ival 233 | char *label; 234 | Vector *read_dep; 235 | int can_be_eliminated; 236 | }; 237 | 238 | typedef struct ObjectImage ObjectImage; 239 | 240 | // utility.c 241 | _Noreturn void error(const char *msg, ...); 242 | void warn(const char *msg, ...); 243 | void *safe_malloc(int size); 244 | char *new_str(const char *src); 245 | int *new_int(int src); 246 | char *format(const char *src, ...); 247 | char *vformat(const char *src, va_list ap); 248 | int unescape_char(int src); 249 | char *escape_string(char *str, int size); 250 | int min(int a, int b); 251 | int max(int a, int b); 252 | int roundup(int n, int b); 253 | char *read_entire_file(char *filepath); 254 | int is_register_code(Code *code); 255 | int reg_of_nbyte(int nbyte, int reg); 256 | Code *nbyte_reg(int nbyte, int reg); 257 | Code *str2reg(char *src); 258 | void erase_backslash_newline(char *src); 259 | 260 | // lex.c 261 | Vector *read_all_asm(char *src, char *filepath); 262 | Vector *read_asm_from_filepath(char *filepath); 263 | 264 | // code.c 265 | Code *ADD(Code *lhs, Code *rhs); 266 | Code *ADDQ(Code *lhs, Code *rhs); 267 | Code *AL(); 268 | Code *AND(Code *lhs, Code *rhs); 269 | Code *CL(); 270 | Code *CLTD(); 271 | Code *CLTQ(); 272 | Code *CMP(Code *lhs, Code *rhs); 273 | Code *DECL(Code *lhs); 274 | Code *DECQ(Code *lhs); 275 | Code *IDIV(Code *lhs); 276 | Code *IMUL(Code *lhs, Code *rhs); 277 | Code *INCL(Code *lhs); 278 | Code *INCQ(Code *lhs); 279 | Code *JMP(char *label); 280 | Code *JE(char *label); 281 | Code *JNE(char *label); 282 | Code *JAE(char *label); 283 | Code *LABEL(char *label); 284 | Code *LEA(Code *lhs, Code *rhs); 285 | Code *MOV(Code *lhs, Code *rhs); 286 | Code *MOVL(Code *lhs, Code *rhs); 287 | Code *MOVSBL(Code *lhs, Code *rhs); 288 | Code *MOVSLQ(Code *lhs, Code *rhs); 289 | Code *MOVZB(Code *lhs, Code *rhs); 290 | Code *NEG(Code *lhs); 291 | Code *NOT(Code *lhs); 292 | Code *OR(Code *lhs, Code *rhs); 293 | Code *POP(Code *lhs); 294 | Code *PUSH(Code *lhs); 295 | Code *R10(); 296 | Code *R11(); 297 | Code *R12(); 298 | Code *R13(); 299 | Code *R14(); 300 | Code *R15(); 301 | Code *EAX(); 302 | Code *EDX(); 303 | Code *RAX(); 304 | Code *RBP(); 305 | Code *RDI(); 306 | Code *RDX(); 307 | Code *RET(); 308 | Code *RIP(); 309 | Code *RSP(); 310 | Code *SAL(Code *lhs, Code *rhs); 311 | Code *SAR(Code *lhs, Code *rhs); 312 | Code *SETE(Code *lhs); 313 | Code *SETL(Code *lhs); 314 | Code *SETLE(Code *lhs); 315 | Code *SUB(Code *lhs, Code *rhs); 316 | Code *XOR(Code *lhs, Code *rhs); 317 | Code *GLOBAL(char *label); 318 | Code *new_addrof_code(Code *reg, int offset); 319 | Code *new_addrof_label_code(Code *reg, char *label); 320 | Code *new_value_code(int value); 321 | Code *new_code(int kind); 322 | char *code2str(Code *code); 323 | void dump_code(Code *code, FILE *fh); 324 | Code *new_binop_code(int kind, Code *lhs, Code *rhs); 325 | Code *new_unary_code(int kind, Code *lhs); 326 | 327 | // assemble.c 328 | ObjectImage *assemble_code(Vector *code); 329 | void dump_object_image(ObjectImage *objimg, FILE *fh); 330 | 331 | // object.c 332 | void add_byte(Vector *vec, int val); 333 | void set_byte(Vector *vec, int index, int val); 334 | void add_word(Vector *vec, int val0, int val1); 335 | void add_word_int(Vector *vec, int ival); 336 | void add_dword(Vector *vec, int val0, int val1, int val2, int val3); 337 | void add_dword_int(Vector *vec, int ival); 338 | void add_qword_int(Vector *vec, int low, int high); 339 | void add_string(Vector *vec, char *src, int len); 340 | void add_qword(Vector *vec, int val0, int val1, int val2, int val3, int val4, 341 | int val5, int val6, int val7); 342 | void write_byte(FILE *fh, int val0); 343 | Vector *get_buffer_to_emit(); 344 | int emitted_size(); 345 | void set_buffer_to_emit(Vector *buffer); 346 | void reemit_byte(int index, int val0); 347 | void emit_byte(int val0); 348 | void emit_word(int val0, int val1); 349 | void emit_word_int(int ival); 350 | void emit_dword(int val0, int val1, int val2, int val3); 351 | void emit_dword_int(int ival); 352 | void emit_qword(int val0, int val1, int val2, int val3, int val4, int val5, 353 | int val6, int val7); 354 | void emit_qword_int(int low, int high); 355 | void emit_string(char *src, int len); 356 | void emit_nbytes(int nbytes, int val); 357 | 358 | #endif 359 | -------------------------------------------------------------------------------- /ld/link.c: -------------------------------------------------------------------------------- 1 | #include "ld.h" 2 | 3 | struct ExeImage { 4 | int vaddr_offset, header_size; 5 | Vector *objs; // vector 6 | }; 7 | 8 | typedef struct ObjectData ObjectData; 9 | struct ObjectData { 10 | char *data; 11 | int data_size, entire_size; 12 | 13 | char *shdr, *symtab, *strtab, *rela_text; 14 | int nshdr, nsymtab, nrela_text; 15 | }; 16 | 17 | int read_byte(char *data) { return data[0] & 0xff; } 18 | 19 | int read_word(char *data) 20 | { 21 | return read_byte(data) | (read_byte(data + 1) << 8); 22 | } 23 | 24 | int read_dword(char *data) 25 | { 26 | return read_word(data) | (read_word(data + 2) << 16); 27 | } 28 | 29 | // assume that obj->shdr, obj->nshdr and obj->data are already filled. 30 | int get_section_offset(ObjectData *obj, char *name) 31 | { 32 | char *section_strtab = 33 | obj->data + 34 | read_dword(obj->shdr + 0x40 * read_word(obj->data + 62) + 24); 35 | 36 | for (int i = 1; i < obj->nshdr; i++) { 37 | char *entry = obj->shdr + 0x40 * i; 38 | int offset = read_dword(entry + 24); 39 | if (strcmp(name, section_strtab + read_dword(entry)) == 0) 40 | return offset; 41 | } 42 | 43 | assert(0); 44 | } 45 | 46 | ObjectData *new_object_data(char *data, int data_size) 47 | { 48 | ObjectData *obj = (ObjectData *)safe_malloc(sizeof(ObjectData)); 49 | obj->data = data; 50 | obj->data_size = data_size; 51 | obj->entire_size = roundup(data_size, 16); 52 | obj->shdr = obj->symtab = obj->strtab = NULL; 53 | 54 | // parse data 55 | obj->shdr = data + read_dword(data + 40); 56 | char *section_strtab = 57 | data + read_dword(obj->shdr + 0x40 * read_word(data + 62) + 24); 58 | obj->nshdr = read_word(data + 60); 59 | 60 | for (int i = 1; i < obj->nshdr; i++) { 61 | char *entry = obj->shdr + 0x40 * i; 62 | char *offset = data + read_dword(entry + 24); 63 | int size = read_dword(entry + 32); 64 | char *name = section_strtab + read_dword(entry); 65 | if (obj->symtab == NULL && strcmp(name, ".symtab") == 0) { 66 | obj->symtab = offset; 67 | obj->nsymtab = size / 24; 68 | } 69 | if (obj->strtab == NULL && strcmp(name, ".strtab") == 0) 70 | obj->strtab = offset; 71 | if (obj->rela_text == NULL && strcmp(name, ".rela.text") == 0) { 72 | obj->rela_text = offset; 73 | obj->nrela_text = size / 24; 74 | } 75 | } 76 | assert(obj->shdr != NULL && obj->symtab != NULL && obj->strtab != NULL); 77 | 78 | return obj; 79 | } 80 | 81 | ObjectData *read_entire_binary(char *filepath) 82 | { 83 | FILE *fh = fopen(filepath, "rb"); 84 | if (fh == NULL) error("no such binary file: '%s'", filepath); 85 | 86 | // read the file all 87 | StringBuilder *sb = new_string_builder(); 88 | int ch; 89 | while ((ch = fgetc(fh)) != EOF) string_builder_append(sb, ch); 90 | 91 | fclose(fh); 92 | 93 | // string_builder_size() returns the size including a null character. 94 | return new_object_data(string_builder_get(sb), string_builder_size(sb) - 1); 95 | } 96 | 97 | int *search_symbol_maybe(Vector *objs, const char *name, int header_offset) 98 | { 99 | int prev_offset = header_offset; 100 | 101 | for (int i = 0; i < vector_size(objs); i++) { 102 | ObjectData *obj = (ObjectData *)vector_get(objs, i); 103 | for (int j = 0; j < obj->nsymtab; j++) { 104 | char *entry = obj->symtab + 24 * j; 105 | if (strcmp(obj->strtab + read_dword(entry), name) != 0) continue; 106 | int st_info = read_byte(entry + 4), st_shndx = read_word(entry + 6), 107 | st_value = read_dword(entry + 8); 108 | if (st_shndx == 0 || !(st_info & 0x10)) continue; 109 | return new_int(prev_offset + 110 | read_dword(obj->shdr + 0x40 * st_shndx + 24) + 111 | st_value); 112 | } 113 | prev_offset += obj->entire_size; 114 | } 115 | 116 | return NULL; 117 | } 118 | 119 | int search_symbol(Vector *objs, const char *name, int header_offset) 120 | { 121 | int *offset = search_symbol_maybe(objs, name, header_offset); 122 | if (offset != NULL) return *offset; 123 | error("undefined symbol: %s", name); 124 | } 125 | 126 | void link_objs_detail(Vector *objs, int header_offset) 127 | { 128 | int prev_offset = header_offset; 129 | 130 | for (int i = 0; i < vector_size(objs); i++) { 131 | ObjectData *obj = (ObjectData *)vector_get(objs, i); 132 | for (int j = 0; j < obj->nrela_text; j++) { 133 | char *entry = obj->rela_text + j * 24; 134 | int r_offset = read_dword(entry), 135 | r_info_type = read_dword(entry + 8), 136 | r_info_symtabidx = read_dword(entry + 12), 137 | r_addend = read_dword(entry + 16); 138 | char *symtab_entry = obj->symtab + 24 * r_info_symtabidx; 139 | int st_shndx = read_word(symtab_entry + 6); 140 | 141 | // search new address 142 | int reled_addr = -1; 143 | char *name = obj->strtab + read_dword(symtab_entry); 144 | int *reled_addr_maybe = 145 | search_symbol_maybe(objs, name, header_offset); 146 | if (reled_addr_maybe != NULL) { 147 | reled_addr = *reled_addr_maybe + r_addend; 148 | } 149 | else { 150 | if (st_shndx == 0) error("undefined symbol: %s", name); 151 | reled_addr = prev_offset + 152 | read_dword(obj->shdr + 0x40 * st_shndx + 24) + 153 | read_dword(symtab_entry + 8) + r_addend; 154 | } 155 | assert(reled_addr != -1); 156 | 157 | int offset = r_offset + get_section_offset(obj, ".text"); 158 | switch (r_info_type) { 159 | case 2: { // R_X86_64_PC32 160 | int addr = reled_addr - (prev_offset + offset); 161 | obj->data[offset] = addr & 0xff; 162 | obj->data[offset + 1] = (addr >> 8) & 0xff; 163 | obj->data[offset + 2] = (addr >> 16) & 0xff; 164 | obj->data[offset + 3] = (addr >> 24) & 0xff; 165 | } break; 166 | 167 | default: 168 | assert(0); 169 | } 170 | } 171 | 172 | prev_offset += obj->entire_size; 173 | } 174 | } 175 | 176 | ExeImage *link_objs(Vector *obj_paths) 177 | { 178 | int vaddr_offset = 0x400000, header_size = 4096, // 64 + 56 + 8, 179 | header_offset = vaddr_offset + header_size; 180 | 181 | Vector *objs = new_vector(); 182 | 183 | for (int i = 0; i < vector_size(obj_paths); i++) 184 | vector_push_back(objs, 185 | read_entire_binary((char *)vector_get(obj_paths, i))); 186 | 187 | link_objs_detail(objs, header_offset); 188 | 189 | ExeImage *exe = (ExeImage *)safe_malloc(sizeof(ExeImage)); 190 | exe->objs = objs; 191 | exe->vaddr_offset = vaddr_offset; 192 | exe->header_size = header_size; 193 | return exe; 194 | } 195 | 196 | void dump_exe_image(ExeImage *exeimg, FILE *fh) 197 | { 198 | Vector *dumped = new_vector(); 199 | set_buffer_to_emit(dumped); 200 | 201 | // 202 | // *** ELF HEADER *** 203 | // 204 | 205 | int header_offset = 0; 206 | 207 | // ELF magic number 208 | emit_dword(0x7f, 0x45, 0x4c, 0x46); 209 | // 64bit 210 | emit_byte(0x02); 211 | // little endian 212 | emit_byte(0x01); 213 | // original version of ELF 214 | emit_byte(0x01); 215 | // System V 216 | emit_byte(0x00); // 0x03 GNU 217 | // padding 218 | emit_qword(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); 219 | // ET_REL 220 | emit_word(0x02, 0x00); 221 | // x86-64 222 | emit_word(0x3e, 0x00); 223 | // original version of ELF 224 | emit_dword(0x01, 0x00, 0x00, 0x00); 225 | 226 | // addr of entry point (placeholder) 227 | int ep_addr = emitted_size(); 228 | emit_qword(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); 229 | // addr of program header table 230 | emit_qword(0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); 231 | // addr of section header table 232 | emit_qword(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); 233 | 234 | // flag 235 | emit_dword(0x00, 0x00, 0x00, 0x00); 236 | 237 | // size of this header 238 | emit_word(0x40, 0x00); 239 | 240 | // size of program header table entry 241 | emit_word(0x38, 0x00); 242 | // number of entries in program header table 243 | emit_word(0x01, 0x00); 244 | 245 | // size of section header table entry 246 | emit_word(0x00, 0x00); 247 | // number of entries in section header table 248 | emit_word(0x00, 0x00); 249 | // index of section header entry containing section names 250 | emit_word(0x00, 0x00); 251 | 252 | int header_size = emitted_size() - header_offset; 253 | 254 | // 255 | // *** PROGRAM HEADER *** 256 | // 257 | 258 | // PT_LOAD 259 | emit_dword_int(1); 260 | // PF_X | PF_W | PF_R 261 | emit_dword_int(1 | 2 | 4); 262 | // offset 263 | emit_qword_int(0, 0); 264 | // virtual address in memory 265 | emit_qword_int(exeimg->vaddr_offset, 0); 266 | // reserved (phisical address in memory ?) 267 | emit_qword_int(exeimg->vaddr_offset, 0); 268 | // size of segment in file (placeholder) 269 | int filesz_addr = emitted_size(); 270 | emit_qword_int(0, 0); 271 | // size of segment in memory (placeholder) 272 | int memsz_addr = emitted_size(); 273 | emit_qword_int(0, 0); 274 | // alignment 275 | emit_qword_int(0x1000, 0); 276 | 277 | // padding 278 | for (int i = emitted_size(); i < exeimg->header_size; i++) emit_byte(0); 279 | assert(emitted_size() == exeimg->header_size); 280 | 281 | // 282 | // *** BODY *** 283 | // 284 | 285 | int body_offset = emitted_size(); 286 | 287 | for (int i = 0; i < vector_size(exeimg->objs); i++) { 288 | ObjectData *obj = (ObjectData *)vector_get(exeimg->objs, i); 289 | for (int j = 0; j < obj->data_size; j++) emit_byte(obj->data[j]); 290 | for (int j = 0; j < obj->entire_size - obj->data_size; j++) 291 | emit_byte(0); 292 | } 293 | 294 | // rewrite placeholders 295 | int ep_offset = search_symbol(exeimg->objs, "_start", 296 | exeimg->vaddr_offset + exeimg->header_size); 297 | reemit_byte(ep_addr + 0, (ep_offset >> 0) & 0xff); 298 | reemit_byte(ep_addr + 1, (ep_offset >> 8) & 0xff); 299 | reemit_byte(ep_addr + 2, (ep_offset >> 16) & 0xff); 300 | reemit_byte(ep_addr + 3, (ep_offset >> 24) & 0xff); 301 | 302 | int filesize = emitted_size(); 303 | reemit_byte(filesz_addr + 0, (filesize >> 0) & 0xff); 304 | reemit_byte(filesz_addr + 1, (filesize >> 8) & 0xff); 305 | reemit_byte(filesz_addr + 2, (filesize >> 16) & 0xff); 306 | reemit_byte(filesz_addr + 3, (filesize >> 24) & 0xff); 307 | reemit_byte(memsz_addr + 0, (filesize >> 0) & 0xff); 308 | reemit_byte(memsz_addr + 1, (filesize >> 8) & 0xff); 309 | reemit_byte(memsz_addr + 2, (filesize >> 16) & 0xff); 310 | reemit_byte(memsz_addr + 3, (filesize >> 24) & 0xff); 311 | 312 | // write dumped to file 313 | for (int i = 0; i < vector_size(dumped); i++) 314 | write_byte(fh, (int)vector_get(dumped, i)); 315 | 316 | return; 317 | } 318 | -------------------------------------------------------------------------------- /as/lex.c: -------------------------------------------------------------------------------- 1 | #include "as.h" 2 | 3 | Source source; 4 | 5 | void init_source(char *src, char *filepath) 6 | { 7 | // caluculate current working directory 8 | int i, j, len = strlen(filepath); 9 | source.cwd = safe_malloc(sizeof(char *) * len); 10 | // TODO: ad-hoc 11 | for (i = len - 1; i >= 0; i--) { 12 | if (filepath[i] == '/') { 13 | // detect last '/' 14 | for (j = 0; j <= i; j++) { 15 | source.cwd[j] = filepath[j]; 16 | } 17 | source.cwd[i + 1] = '\0'; 18 | break; 19 | } 20 | } 21 | // When this source code and aqcc are located in the same directory 22 | if (source.cwd[0] == '\0') { 23 | strcpy(source.cwd, "./"); 24 | } 25 | 26 | source.filepath = filepath; 27 | source.src = src; 28 | source.line = source.column = 1; 29 | source.line2length = new_vector(); 30 | 31 | // fill file.line2length 32 | vector_push_back(source.line2length, NULL); // line is 1-based index. 33 | for (int i = 0, column = 1; src[i] == '\0'; i++, column++) { 34 | if (src[i] == '\n') { 35 | vector_push_back(source.line2length, (void *)column); 36 | column = 0; 37 | } 38 | } 39 | } 40 | 41 | void ungetch() 42 | { 43 | source.src--; 44 | if (*source.src == '\n') { 45 | source.line--; 46 | source.column = (int)vector_get(source.line2length, source.line); 47 | } 48 | else { 49 | source.column--; 50 | } 51 | } 52 | 53 | char peekch() { return *source.src; } 54 | 55 | char getch() 56 | { 57 | char ch = peekch(); 58 | if (ch == '\0') error("unexpected EOF"); 59 | 60 | if (ch == '\n') { 61 | source.line++; 62 | source.column = 0; 63 | } 64 | else { 65 | source.column++; 66 | } 67 | return *source.src++; 68 | } 69 | 70 | int read_next_hex_int() 71 | { 72 | // assume that prefix '0x' is already read. 73 | int acc = 0; 74 | while (1) { 75 | int ch = peekch(); 76 | if (isdigit(ch)) 77 | acc = 16 * acc + ch - '0'; 78 | else if ('a' <= ch && ch <= 'f') 79 | acc = 16 * acc + 10 + ch - 'a'; 80 | else if ('A' <= ch && ch <= 'F') 81 | acc = 16 * acc + 10 + ch - 'A'; 82 | else 83 | break; 84 | getch(); 85 | } 86 | 87 | return acc; 88 | } 89 | 90 | int read_next_dec_int() 91 | { 92 | int acc = 0; 93 | while (isdigit(peekch())) acc = 10 * acc + getch() - '0'; 94 | return acc; 95 | } 96 | 97 | int read_next_oct_int() 98 | { 99 | // assume that prefix '0' is already read. 100 | int acc = 0; 101 | while (1) { 102 | int ch = peekch(); 103 | if ('0' <= ch && ch <= '7') 104 | acc = 8 * acc + ch - '0'; 105 | else 106 | break; 107 | getch(); 108 | } 109 | 110 | return acc; 111 | } 112 | 113 | int read_next_int() 114 | { 115 | int ch = peekch(), ival; 116 | if (ch == '0') { 117 | getch(); 118 | if (peekch() == 'x') { 119 | getch(); 120 | ival = read_next_hex_int(); 121 | } 122 | else 123 | ival = read_next_oct_int(); 124 | } 125 | else 126 | ival = read_next_dec_int(); 127 | 128 | return ival; 129 | } 130 | 131 | // assume that the first doublequote has been already read. 132 | void read_next_string_literal(char **sval, int *ssize) 133 | { 134 | StringBuilder *sb = new_string_builder(); 135 | while (1) { 136 | char ch = getch(); 137 | 138 | switch (ch) { 139 | case '\\': 140 | ch = getch(); 141 | ch = unescape_char(ch); 142 | break; 143 | 144 | case '"': 145 | goto end; 146 | 147 | case '\n': 148 | error("unexpected new-line character"); 149 | } 150 | 151 | string_builder_append(sb, ch); 152 | } 153 | 154 | end: 155 | *ssize = string_builder_size(sb); 156 | *sval = string_builder_get(sb); 157 | } 158 | 159 | void skip_space() 160 | { 161 | while (isspace(peekch())) getch(); 162 | } 163 | 164 | char speekch() 165 | { 166 | skip_space(); 167 | return peekch(); 168 | } 169 | 170 | char sgetch() 171 | { 172 | skip_space(); 173 | return getch(); 174 | } 175 | 176 | _Noreturn void unexpected_char_error(char expect, char got) 177 | { 178 | error("%s:%d:%d: unexpected character: expect %c, got %c", source.filepath, 179 | source.line, source.column, expect, got); 180 | } 181 | 182 | char sexpect_ch(char expect) 183 | { 184 | int ch = sgetch(); 185 | if (ch != expect) unexpected_char_error(expect, ch); 186 | return ch; 187 | } 188 | 189 | int read_asm_ival() 190 | { 191 | int mul = 1; 192 | if (speekch() == '-') { 193 | getch(); 194 | mul = -1; 195 | } 196 | return read_next_int() * mul; 197 | } 198 | 199 | char *read_asm_token() 200 | { 201 | StringBuilder *sb = new_string_builder(); 202 | string_builder_append(sb, sgetch()); 203 | while (1) { 204 | int ch = peekch(); 205 | if (!isalnum(ch) && ch != '_' && ch != '.' && ch != ':') break; 206 | string_builder_append(sb, getch()); 207 | } 208 | char *str = string_builder_get(sb); 209 | 210 | return str; 211 | } 212 | 213 | Code *read_asm_memory() 214 | { 215 | sexpect_ch('('); 216 | char *reg = read_asm_token(); 217 | sexpect_ch(')'); 218 | return str2reg(reg); 219 | } 220 | 221 | Code *read_asm_param() 222 | { 223 | char ch = speekch(); 224 | switch (ch) { 225 | case '%': 226 | return str2reg(read_asm_token()); 227 | 228 | case '$': { 229 | getch(); // already skipped space 230 | return new_value_code(read_asm_ival()); 231 | } 232 | 233 | case '(': 234 | return new_addrof_code(read_asm_memory(), 0); 235 | } 236 | 237 | if (isdigit(ch) || ch == '-') { 238 | int offset = read_asm_ival(); 239 | return new_addrof_code(read_asm_memory(), offset); 240 | } 241 | 242 | char *label = read_asm_token(); 243 | return new_addrof_label_code(read_asm_memory(), label); 244 | } 245 | 246 | Vector *read_all_asm(char *src, char *filepath) 247 | { 248 | init_source(src, filepath); 249 | 250 | Vector *code = new_vector(); 251 | 252 | while (speekch() != '\0') { 253 | // comment 254 | if (peekch() == '/') { 255 | getch(); 256 | if (getch() == '*') { 257 | // begin comment 258 | while (1) { 259 | if (getch() != '*') continue; 260 | if (getch() == '/') break; // end comment 261 | ungetch(); 262 | } 263 | continue; 264 | } 265 | else 266 | ungetch(); 267 | } 268 | 269 | char *str = read_asm_token(); 270 | int len = strlen(str); 271 | 272 | if (str[len - 1] == ':') { // label 273 | str[len - 1] = '\0'; 274 | vector_push_back(code, LABEL(str)); 275 | continue; 276 | } 277 | 278 | if (strcmp(str, ".ascii") == 0) { 279 | sexpect_ch('"'); 280 | char *sval; 281 | int ssize; 282 | read_next_string_literal(&sval, &ssize); 283 | 284 | Code *c = new_code(CD_ASCII); 285 | c->sval = sval; 286 | c->ival = ssize - 1; 287 | vector_push_back(code, c); 288 | continue; 289 | } 290 | 291 | KeyValue *kv; 292 | 293 | Map *binop_table = new_map(); 294 | map_insert(binop_table, "mov", (void *)INST_MOV); 295 | map_insert(binop_table, "movl", (void *)INST_MOVL); 296 | map_insert(binop_table, "movsbl", (void *)INST_MOVSBL); 297 | map_insert(binop_table, "movslq", (void *)INST_MOVSLQ); 298 | map_insert(binop_table, "movzb", (void *)INST_MOVZB); 299 | map_insert(binop_table, "lea", (void *)INST_LEA); 300 | map_insert(binop_table, "add", (void *)INST_ADD); 301 | map_insert(binop_table, "add", (void *)INST_ADD); 302 | map_insert(binop_table, "addq", (void *)INST_ADDQ); 303 | map_insert(binop_table, "sub", (void *)INST_SUB); 304 | map_insert(binop_table, "imul", (void *)INST_IMUL); 305 | map_insert(binop_table, "sar", (void *)INST_SAR); 306 | map_insert(binop_table, "sal", (void *)INST_SAL); 307 | map_insert(binop_table, "cmp", (void *)INST_CMP); 308 | map_insert(binop_table, "and", (void *)INST_AND); 309 | map_insert(binop_table, "xor", (void *)INST_XOR); 310 | map_insert(binop_table, "or", (void *)INST_OR); 311 | if (kv = map_lookup(binop_table, str)) { 312 | Code *lhs = read_asm_param(); 313 | sexpect_ch(','); 314 | Code *rhs = read_asm_param(); 315 | vector_push_back(code, new_binop_code((int)kv_value(kv), lhs, rhs)); 316 | continue; 317 | } 318 | 319 | Map *unary_table = new_map(); 320 | map_insert(unary_table, "push", (void *)INST_PUSH); 321 | map_insert(unary_table, "pop", (void *)INST_POP); 322 | map_insert(unary_table, "idiv", (void *)INST_IDIV); 323 | map_insert(unary_table, "neg", (void *)INST_NEG); 324 | map_insert(unary_table, "not", (void *)INST_NOT); 325 | map_insert(unary_table, "setl", (void *)INST_SETL); 326 | map_insert(unary_table, "setle", (void *)INST_SETLE); 327 | map_insert(unary_table, "sete", (void *)INST_SETE); 328 | map_insert(unary_table, "incl", (void *)INST_INCL); 329 | map_insert(unary_table, "incq", (void *)INST_INCQ); 330 | map_insert(unary_table, "decl", (void *)INST_DECL); 331 | map_insert(unary_table, "decq", (void *)INST_DECQ); 332 | if (kv = map_lookup(unary_table, str)) { 333 | Code *lhs = read_asm_param(); 334 | vector_push_back(code, new_unary_code((int)kv_value(kv), lhs)); 335 | continue; 336 | } 337 | 338 | Map *simple_table = new_map(); 339 | map_insert(simple_table, "ret", (void *)INST_RET); 340 | map_insert(simple_table, "nop", (void *)INST_NOP); 341 | map_insert(simple_table, "syscall", (void *)INST_SYSCALL); 342 | map_insert(simple_table, "cltd", (void *)INST_CLTD); 343 | map_insert(simple_table, "cltq", (void *)INST_CLTQ); 344 | map_insert(simple_table, ".text", (void *)CD_TEXT); 345 | map_insert(simple_table, ".data", (void *)CD_DATA); 346 | if (kv = map_lookup(simple_table, str)) { 347 | vector_push_back(code, new_code((int)kv_value(kv))); 348 | continue; 349 | } 350 | 351 | Map *label_table = new_map(); 352 | map_insert(label_table, "call", (void *)INST_CALL); 353 | map_insert(label_table, "jmp", (void *)INST_JMP); 354 | map_insert(label_table, "je", (void *)INST_JE); 355 | map_insert(label_table, "jne", (void *)INST_JNE); 356 | map_insert(label_table, "jae", (void *)INST_JAE); 357 | map_insert(label_table, ".global", (void *)CD_GLOBAL); 358 | if (kv = map_lookup(label_table, str)) { 359 | char *label = read_asm_token(); 360 | Code *c = new_code((int)kv_value(kv)); 361 | c->label = label; 362 | vector_push_back(code, c); 363 | continue; 364 | } 365 | 366 | Map *ival_table = new_map(); 367 | map_insert(ival_table, ".zero", (void *)CD_ZERO); 368 | map_insert(ival_table, ".long", (void *)CD_LONG); 369 | map_insert(ival_table, ".byte", (void *)CD_BYTE); 370 | map_insert(ival_table, ".quad", (void *)CD_QUAD); 371 | if (kv = map_lookup(ival_table, str)) { 372 | skip_space(); 373 | int ival = read_asm_ival(); 374 | Code *c = new_code((int)kv_value(kv)); 375 | c->ival = ival; 376 | vector_push_back(code, c); 377 | continue; 378 | } 379 | 380 | error("%s:%d:%d: not implemented assembly: %s", source.filepath, 381 | source.line, source.column, str); 382 | } 383 | 384 | return code; 385 | } 386 | 387 | Vector *read_asm_from_filepath(char *filepath) 388 | { 389 | char *src = read_entire_file(filepath); 390 | return read_all_asm(src, filepath); 391 | } 392 | -------------------------------------------------------------------------------- /cc/cc.h: -------------------------------------------------------------------------------- 1 | #ifndef AQCC_AQCC_H 2 | #define AQCC_AQCC_H 3 | 4 | //#include 5 | //#include 6 | //#include 7 | //#include 8 | //#include 9 | //#include 10 | 11 | #ifdef __GNUC__ 12 | typedef __builtin_va_list va_list; 13 | #else 14 | #endif 15 | #ifndef __GNUC__ 16 | typedef struct { 17 | int gp_offset; 18 | int fp_offset; 19 | void *overflow_arg_area; 20 | void *reg_save_area; 21 | } va_list[1]; 22 | #endif 23 | #define va_start __builtin_va_start 24 | #define va_end __builtin_va_end 25 | #define va_arg __builtin_va_arg 26 | 27 | typedef struct _IO_FILE FILE; 28 | // extern FILE *stdin; /* Standard input stream. */ 29 | // extern FILE *stdout; /* Standard output stream. */ 30 | // extern FILE *stderr; /* Standard error output stream. */ 31 | #define NULL 0 32 | #define EOF (-1) 33 | FILE *fopen(const char *pathname, const char *mode); 34 | int fclose(FILE *stream); 35 | int fputc(int c, FILE *stream); 36 | int fgetc(FILE *stream); 37 | int fprintf(FILE *stream, const char *format, ...); 38 | int printf(const char *format, ...); 39 | int vsprintf(char *str, const char *format, va_list ap); 40 | #define EXIT_FAILURE 1 /* Failing exit status. */ 41 | #define EXIT_SUCCESS 0 /* Successful exit status. */ 42 | _Noreturn void exit(int status); 43 | void *malloc(int size); 44 | int strlen(const char *s); 45 | int strcmp(const char *s1, const char *s2); 46 | char *strcpy(char *dest, const char *src); 47 | int isalpha(int c); 48 | int isalnum(int c); 49 | int isdigit(int c); 50 | int isspace(int c); 51 | void *memcpy(void *dest, const void *src, int n); 52 | void *memset(void *s, int c, int n); 53 | void assert(int cond); 54 | 55 | // vector.c 56 | typedef struct Vector Vector; 57 | Vector *new_vector(); 58 | Vector *new_vector_from_scalar(void *scalar); 59 | void vector_push_back(Vector *vec, void *item); 60 | void *vector_get(Vector *vec, int i); 61 | int vector_size(Vector *vec); 62 | void *vector_set(Vector *vec, int i, void *item); 63 | void vector_push_back_vector(Vector *vec, Vector *src); 64 | Vector *clone_vector(Vector *src); 65 | 66 | // map.c 67 | typedef struct KeyValue KeyValue; 68 | typedef struct Map Map; 69 | Map *new_map(); 70 | int map_size(Map *map); 71 | KeyValue *map_insert(Map *map, const char *key, void *item); 72 | KeyValue *map_lookup(Map *map, const char *key); 73 | const char *kv_key(KeyValue *kv); 74 | void *kv_value(KeyValue *kv); 75 | 76 | // string_builder.c 77 | typedef struct StringBuilder StringBuilder; 78 | StringBuilder *new_string_builder(); 79 | char string_builder_append(StringBuilder *sb, char ch); 80 | char *string_builder_get(StringBuilder *sb); 81 | int string_builder_size(StringBuilder *sb); 82 | 83 | typedef struct { 84 | int line, column; 85 | Vector *line2length; 86 | char *src; 87 | // example: "/tmp/1.c" -> cwd: "/tmp/" 88 | char *cwd; // current working directory with '/' 89 | char *filepath; 90 | } Source; 91 | 92 | enum { 93 | tINT, 94 | tSTRING_LITERAL, 95 | tPLUS, 96 | tMINUS, 97 | tSTAR, 98 | tSLASH, 99 | tPERCENT, 100 | tLPAREN, 101 | tRPAREN, 102 | tLSHIFT, 103 | tRSHIFT, 104 | tLT, 105 | tGT, 106 | tLTE, 107 | tGTE, 108 | tEQEQ, 109 | tNEQ, 110 | tAND, 111 | tHAT, 112 | tEXCL, 113 | tBAR, 114 | tANDAND, 115 | tBARBAR, 116 | tIDENT, 117 | tEQ, 118 | tPLUSEQ, 119 | tMINUSEQ, 120 | tSTAREQ, 121 | tSLASHEQ, 122 | tPERCENTEQ, 123 | tANDEQ, 124 | tHATEQ, 125 | tBAREQ, 126 | tLSHIFTEQ, 127 | tRSHIFTEQ, 128 | tSEMICOLON, 129 | tCOMMA, 130 | tDOT, 131 | tARROW, 132 | tLBRACE, 133 | tRBRACE, 134 | kRETURN, 135 | tCOLON, 136 | tQUESTION, 137 | tLBRACKET, 138 | tRBRACKET, 139 | tINC, 140 | tDEC, 141 | tDOTS, 142 | tNUMBER, 143 | tNEWLINE, 144 | tTILDE, 145 | tEOF, 146 | kIF, 147 | kELSE, 148 | kWHILE, 149 | kBREAK, 150 | kCONTINUE, 151 | kFOR, 152 | kINT, 153 | kCHAR, 154 | kSIZEOF, 155 | kSWITCH, 156 | kCASE, 157 | kDEFAULT, 158 | kGOTO, 159 | kSTRUCT, 160 | kUNION, 161 | kTYPEDEF, 162 | kDO, 163 | kVOID, 164 | kCONST, 165 | kENUM, 166 | kNORETURN, 167 | kSTATIC, 168 | kEXTERN, 169 | }; 170 | 171 | typedef struct { 172 | int kind; 173 | Source *source; 174 | 175 | union { 176 | int ival; 177 | 178 | struct { 179 | char *sval; 180 | int ssize; // only when string literal. including terminating null 181 | // character. 182 | }; 183 | }; 184 | } Token; 185 | 186 | typedef struct { 187 | Vector *tokens; 188 | int idx; 189 | } TokenSeq; 190 | 191 | typedef struct { 192 | int idx; 193 | } TokenSeqSaved; 194 | 195 | typedef struct Env Env; 196 | struct Env { 197 | Env *parent; 198 | Map *symbols; 199 | Map *types; 200 | Vector *scoped_vars; 201 | Map *enum_values; 202 | }; 203 | 204 | typedef struct AST AST; 205 | 206 | enum { 207 | TY_INT, 208 | TY_CHAR, 209 | TY_PTR, 210 | TY_ARY, 211 | TY_STRUCT, 212 | TY_UNION, 213 | TY_TYPEDEF, 214 | TY_VOID, 215 | TY_ENUM, 216 | }; 217 | 218 | typedef struct Type Type; 219 | struct Type { 220 | int kind, nbytes, is_static, is_extern; 221 | 222 | union { 223 | Type *ptr_of; 224 | 225 | struct { 226 | Type *ary_of; 227 | int len; 228 | }; 229 | 230 | // struct/union 231 | struct { 232 | char *stname; 233 | Vector *members; // for analyzer, generator 234 | Vector *decls; // for parser 235 | }; 236 | 237 | char *typedef_name; 238 | 239 | struct { 240 | char *enname; 241 | Vector *enum_list; 242 | }; 243 | }; 244 | }; 245 | 246 | typedef struct GVar GVar; 247 | struct GVar { 248 | char *name; 249 | Type *type; 250 | 251 | int is_global; 252 | AST *value; // AST_CONSTANT or AST_STRING_LITERAL 253 | }; 254 | 255 | typedef struct { 256 | AST *cond; 257 | char *label_name; 258 | } SwitchCase; 259 | 260 | typedef struct { 261 | Type *type; 262 | char *name; 263 | int offset; // when union, offset=0 264 | } StructMember; // also UnionMember 265 | 266 | enum { 267 | AST_ADD, 268 | AST_SUB, 269 | AST_MUL, 270 | AST_DIV, 271 | AST_REM, 272 | AST_INT, 273 | AST_STRING_LITERAL, 274 | AST_UNARY_MINUS, 275 | AST_NOT, 276 | AST_LSHIFT, 277 | AST_RSHIFT, 278 | AST_LT, 279 | AST_GT, 280 | AST_LTE, 281 | AST_GTE, 282 | AST_EQ, 283 | AST_NEQ, 284 | AST_AND, 285 | AST_XOR, 286 | AST_OR, 287 | AST_COMPL, 288 | AST_LAND, 289 | AST_LOR, 290 | AST_COND, 291 | AST_ASSIGN, 292 | AST_VAR, 293 | AST_LVAR, 294 | AST_GVAR, 295 | AST_LVAR_DECL, 296 | AST_GVAR_DECL, 297 | AST_LVAR_DECL_INIT, 298 | AST_GVAR_DECL_INIT, 299 | AST_STRUCT_VAR_DECL, 300 | AST_ENUM_VAR_DECL, 301 | AST_ENUM_VAR_DECL_INIT, 302 | AST_FUNCCALL, 303 | AST_FUNCDEF, 304 | AST_FUNC_DECL, 305 | AST_NOP, 306 | AST_RETURN, 307 | AST_EXPR_STMT, 308 | AST_COMPOUND, 309 | AST_IF, 310 | AST_SWITCH, 311 | AST_LABEL, 312 | AST_CASE, 313 | AST_DEFAULT, 314 | AST_WHILE, 315 | AST_DOWHILE, 316 | AST_BREAK, 317 | AST_CONTINUE, 318 | AST_FOR, 319 | AST_PREINC, 320 | AST_POSTINC, 321 | AST_PREDEC, 322 | AST_POSTDEC, 323 | AST_ADDR, 324 | AST_INDIR, 325 | AST_ARY2PTR, 326 | AST_CHAR2INT, 327 | AST_SIZEOF, 328 | AST_GOTO, 329 | AST_LVALUE2RVALUE, 330 | AST_MEMBER_REF, 331 | AST_MEMBER_REF_PTR, 332 | AST_EXPR_LIST, 333 | AST_DECL_LIST, 334 | AST_TYPEDEF_VAR_DECL, 335 | AST_CAST, 336 | AST_VA_START, 337 | AST_VA_END, 338 | AST_VA_ARG_INT, 339 | AST_VA_ARG_CHARP, 340 | AST_CONSTANT, 341 | }; 342 | 343 | struct AST { 344 | int kind; 345 | Type *type; 346 | 347 | union { 348 | int ival; 349 | 350 | struct { 351 | char *sval; 352 | int ssize; // only when string literal. including null character. 353 | }; 354 | 355 | struct { 356 | char *varname, *gen_varname; 357 | int stack_idx; 358 | }; 359 | 360 | struct { 361 | AST *lhs, *rhs; 362 | }; 363 | 364 | // AST_IF 365 | // AST_COND 366 | // AST_WHILE 367 | struct { 368 | AST *cond, *then, *els; 369 | }; 370 | 371 | // AST_FOR 372 | struct { 373 | AST *initer, *midcond, *iterer, *for_body; 374 | }; 375 | 376 | struct { 377 | char *fname; 378 | Vector *args; // actual arguments 379 | Vector *params; // formal parameters 380 | AST *body; // If NULL then only declaration exists. 381 | Env *env; 382 | int is_variadic; 383 | }; 384 | 385 | // AST_ARY2PTR 386 | struct { 387 | AST *ary; 388 | }; 389 | 390 | struct { 391 | char *label_name; 392 | AST *label_stmt; 393 | }; 394 | 395 | struct { 396 | AST *target, *switch_body; 397 | Vector *cases; 398 | char *default_label; 399 | }; 400 | 401 | struct { 402 | AST *stsrc; 403 | char *member; 404 | }; 405 | 406 | Vector *stmts; 407 | Vector *exprs; 408 | Vector *decls; 409 | }; 410 | }; 411 | 412 | // utility.c 413 | _Noreturn void error(const char *msg, ...); 414 | _Noreturn void error_unexpected_token_kind(int expect_kind, Token *got); 415 | _Noreturn void error_unexpected_token_str(char *expect_str, Token *got); 416 | void warn(const char *msg, ...); 417 | void *safe_malloc(int size); 418 | char *new_str(const char *src); 419 | int *new_int(int src); 420 | char *format(const char *src, ...); 421 | char *vformat(const char *src, va_list ap); 422 | int unescape_char(int src); 423 | char *escape_string(char *str, int size); 424 | char *make_label_string(); 425 | int min(int a, int b); 426 | int max(int a, int b); 427 | int roundup(int n, int b); 428 | 429 | // lex.c 430 | Vector *read_all_tokens(char *src, char *filepath); 431 | const char *token_kind2str(int kind); 432 | Vector *concatenate_string_literal_tokens(Vector *tokens); 433 | Vector *read_tokens_from_filepath(char *filepath); 434 | 435 | // parse.c 436 | Vector *parse_prog(Vector *tokens); 437 | 438 | // type.c 439 | Type *type_int(); 440 | Type *type_char(); 441 | Type *type_void(); 442 | Type *new_pointer_type(Type *src); 443 | Type *new_array_type(Type *src, int len); 444 | Env *new_env(Env *parent); 445 | Type *new_struct_or_union_type(int kind, char *stname, Vector *members); 446 | Type *type_unknown(); 447 | Type *new_typedef_type(char *typedef_name); 448 | Type *new_enum_type(char *name, Vector *list); 449 | Type *new_static_type(Type *type); 450 | Type *new_extern_type(Type *type); 451 | void move_static_extern_specifier(Type *src, Type *dst); 452 | 453 | // env.c 454 | AST *add_var(Env *env, AST *ast); 455 | AST *lookup_var(Env *env, const char *name); 456 | AST *add_func(Env *env, const char *name, AST *ast); 457 | AST *lookup_func(Env *env, const char *name); 458 | Type *add_type(Env *env, Type *type, char *name); 459 | Type *lookup_type(Env *env, const char *name); 460 | Type *add_struct_or_union_or_enum_type(Env *env, Type *type); 461 | Type *lookup_struct_or_union_or_enum_type(Env *env, const char *name); 462 | void add_enum_value(Env *env, char *name, AST *value); 463 | AST *lookup_enum_value(Env *env, char *name); 464 | 465 | // ast.c 466 | int match_type(AST *ast, int kind); 467 | int match_type2(AST *lhs, AST *rhs, int lkind, int rkind); 468 | AST *new_ast(int kind); 469 | AST *new_binop_ast(int kind, AST *lhs, AST *rhs); 470 | AST *new_while_stmt(AST *cond, AST *body); 471 | AST *new_compound_stmt2(AST *first, AST *second); 472 | AST *new_ary2ptr_ast(AST *ary); 473 | AST *ary2ptr(AST *ary); 474 | AST *char2int(AST *ch); 475 | AST *new_var_ast(char *varname); 476 | AST *new_lgvar_ast(int kind, Type *type, char *varname, int stack_idx); 477 | AST *new_var_decl_ast(int kind, Type *type, char *varname); 478 | AST *new_var_decl_init_ast(AST *var_decl, AST *initer); 479 | AST *new_unary_ast(int kind, AST *that); 480 | AST *new_func_ast(int kind, char *fname, Vector *args, Vector *params, 481 | Type *ret_type); 482 | AST *new_label_ast(char *name, AST *stmt); 483 | AST *new_lvalue2rvalue_ast(AST *lvalue); 484 | AST *new_int_ast(int ival); 485 | 486 | // analyze.c 487 | Env *analyze_ast(Vector *asts); 488 | Vector *get_gvar_list(); 489 | 490 | // cpp.c 491 | Vector *preprocess_tokens(Vector *tokens); 492 | 493 | // token.c 494 | Token *new_token(int kind, Source *source); 495 | Token *clone_token(Token *src); 496 | TokenSeq *new_token_seq(Vector *tokens); 497 | void init_tokenseq(Vector *tokens); 498 | void insert_tokens(Vector *tokens); 499 | Token *peek_token(); 500 | Token *pop_token(); 501 | Token *expect_token(int kind); 502 | int match_token(int kind); 503 | Token *pop_token_if(int kind); 504 | int match_token2(int kind0, int kind1); 505 | TokenSeqSaved *new_token_seq_saved(); 506 | void restore_token_seq_saved(TokenSeqSaved *saved); 507 | 508 | #define SAVE_TOKENSEQ \ 509 | TokenSeqSaved *token_seq_saved__dummy = new_token_seq_saved(); 510 | #define RESTORE_TOKENSEQ restore_token_seq_saved(token_seq_saved__dummy); 511 | 512 | // x86_64_gen.c 513 | typedef struct Code Code; 514 | Vector *x86_64_generate_code(Vector *asts); 515 | void x86_64_optimize_asts_constant(Vector *asts, Env *env); 516 | Vector *x86_64_optimize_code(Vector *code); 517 | void dump_code(Code *code, FILE *fh); 518 | 519 | #endif 520 | -------------------------------------------------------------------------------- /as/code.c: -------------------------------------------------------------------------------- 1 | #include "as.h" 2 | 3 | Code *new_code(int kind) 4 | { 5 | Code *code = safe_malloc(sizeof(Code)); 6 | code->kind = kind; 7 | code->lhs = code->rhs = NULL; 8 | code->ival = 0; 9 | code->sval = NULL; 10 | code->label = NULL; 11 | code->read_dep = new_vector(); 12 | code->can_be_eliminated = 1; 13 | return code; 14 | } 15 | 16 | Code *new_binop_code(int kind, Code *lhs, Code *rhs) 17 | { 18 | Code *code = new_code(kind); 19 | code->lhs = lhs; 20 | code->rhs = rhs; 21 | vector_push_back(code->read_dep, lhs); 22 | vector_push_back(code->read_dep, rhs); 23 | return code; 24 | } 25 | 26 | Code *new_unary_code(int kind, Code *lhs) 27 | { 28 | Code *code = new_code(kind); 29 | code->lhs = lhs; 30 | code->rhs = NULL; 31 | vector_push_back(code->read_dep, lhs); 32 | return code; 33 | } 34 | 35 | Code *new_value_code(int value) 36 | { 37 | Code *code = new_code(CD_VALUE); 38 | code->ival = value; 39 | return code; 40 | } 41 | 42 | Code *new_addrof_label_code(Code *reg, char *label) 43 | { 44 | Code *code = new_code(CD_ADDR_OF_LABEL); 45 | code->lhs = reg; 46 | code->label = label; 47 | return code; 48 | } 49 | 50 | Code *new_addrof_code(Code *reg, int offset) 51 | { 52 | Code *code = new_code(CD_ADDR_OF); 53 | code->lhs = reg; 54 | code->ival = offset; 55 | return code; 56 | } 57 | 58 | Code *MOV(Code *lhs, Code *rhs) 59 | { 60 | Code *code = new_code(INST_MOV); 61 | code->lhs = lhs; 62 | code->rhs = rhs; 63 | vector_push_back(code->read_dep, lhs); 64 | if (!is_register_code(rhs)) vector_push_back(code->read_dep, rhs); 65 | return code; 66 | } 67 | 68 | Code *MOVL(Code *lhs, Code *rhs) 69 | { 70 | Code *code = new_code(INST_MOVL); 71 | code->lhs = lhs; 72 | code->rhs = rhs; 73 | vector_push_back(code->read_dep, lhs); 74 | if (!is_register_code(rhs)) vector_push_back(code->read_dep, rhs); 75 | return code; 76 | } 77 | 78 | Code *MOVSBL(Code *lhs, Code *rhs) 79 | { 80 | Code *code = new_code(INST_MOVSBL); 81 | code->lhs = lhs; 82 | code->rhs = rhs; 83 | vector_push_back(code->read_dep, lhs); 84 | if (!is_register_code(rhs)) vector_push_back(code->read_dep, rhs); 85 | return code; 86 | } 87 | 88 | Code *MOVSLQ(Code *lhs, Code *rhs) 89 | { 90 | Code *code = new_code(INST_MOVSLQ); 91 | code->lhs = lhs; 92 | code->rhs = rhs; 93 | vector_push_back(code->read_dep, lhs); 94 | if (!is_register_code(rhs)) vector_push_back(code->read_dep, rhs); 95 | return code; 96 | } 97 | 98 | Code *MOVZB(Code *lhs, Code *rhs) 99 | { 100 | Code *code = new_code(INST_MOVZB); 101 | code->lhs = lhs; 102 | code->rhs = rhs; 103 | vector_push_back(code->read_dep, lhs); 104 | if (!is_register_code(rhs)) vector_push_back(code->read_dep, rhs); 105 | return code; 106 | } 107 | 108 | Code *LEA(Code *lhs, Code *rhs) 109 | { 110 | Code *code = new_code(INST_LEA); 111 | code->lhs = lhs; 112 | code->rhs = rhs; 113 | vector_push_back(code->read_dep, lhs); 114 | if (!is_register_code(rhs)) vector_push_back(code->read_dep, rhs); 115 | return code; 116 | } 117 | 118 | Code *PUSH(Code *lhs) { return new_unary_code(INST_PUSH, lhs); } 119 | 120 | Code *POP(Code *lhs) { return new_unary_code(INST_POP, lhs); } 121 | 122 | Code *ADD(Code *lhs, Code *rhs) { return new_binop_code(INST_ADD, lhs, rhs); } 123 | 124 | Code *ADDQ(Code *lhs, Code *rhs) { return new_binop_code(INST_ADDQ, lhs, rhs); } 125 | 126 | Code *SUB(Code *lhs, Code *rhs) { return new_binop_code(INST_SUB, lhs, rhs); } 127 | 128 | Code *IMUL(Code *lhs, Code *rhs) { return new_binop_code(INST_IMUL, lhs, rhs); } 129 | 130 | Code *IDIV(Code *lhs) { return new_unary_code(INST_IDIV, lhs); } 131 | 132 | Code *SAR(Code *lhs, Code *rhs) { return new_binop_code(INST_SAR, lhs, rhs); } 133 | 134 | Code *SAL(Code *lhs, Code *rhs) { return new_binop_code(INST_SAL, lhs, rhs); } 135 | 136 | Code *NEG(Code *lhs) { return new_unary_code(INST_NEG, lhs); } 137 | 138 | Code *NOT(Code *lhs) { return new_unary_code(INST_NOT, lhs); } 139 | 140 | Code *CMP(Code *lhs, Code *rhs) { return new_binop_code(INST_CMP, lhs, rhs); } 141 | 142 | Code *SETL(Code *lhs) { return new_unary_code(INST_SETL, lhs); } 143 | 144 | Code *SETLE(Code *lhs) { return new_unary_code(INST_SETLE, lhs); } 145 | 146 | Code *SETE(Code *lhs) { return new_unary_code(INST_SETE, lhs); } 147 | 148 | Code *AND(Code *lhs, Code *rhs) { return new_binop_code(INST_AND, lhs, rhs); } 149 | 150 | Code *XOR(Code *lhs, Code *rhs) { return new_binop_code(INST_XOR, lhs, rhs); } 151 | 152 | Code *OR(Code *lhs, Code *rhs) { return new_binop_code(INST_OR, lhs, rhs); } 153 | 154 | Code *RET() { return new_code(INST_RET); } 155 | 156 | Code *CLTD() { return new_code(INST_CLTD); } 157 | 158 | Code *CLTQ() { return new_code(INST_CLTQ); } 159 | 160 | Code *JMP(char *label) 161 | { 162 | Code *code = new_code(INST_JMP); 163 | code->label = label; 164 | return code; 165 | } 166 | 167 | Code *JE(char *label) 168 | { 169 | Code *code = new_code(INST_JE); 170 | code->label = label; 171 | return code; 172 | } 173 | 174 | Code *JNE(char *label) 175 | { 176 | Code *code = new_code(INST_JNE); 177 | code->label = label; 178 | return code; 179 | } 180 | 181 | Code *JAE(char *label) 182 | { 183 | Code *code = new_code(INST_JAE); 184 | code->label = label; 185 | return code; 186 | } 187 | 188 | Code *LABEL(char *label) 189 | { 190 | Code *code = new_code(INST_LABEL); 191 | code->label = label; 192 | return code; 193 | } 194 | 195 | Code *INCL(Code *lhs) { return new_unary_code(INST_INCL, lhs); } 196 | 197 | Code *INCQ(Code *lhs) { return new_unary_code(INST_INCQ, lhs); } 198 | 199 | Code *DECL(Code *lhs) { return new_unary_code(INST_DECL, lhs); } 200 | 201 | Code *DECQ(Code *lhs) { return new_unary_code(INST_DECQ, lhs); } 202 | 203 | Code *EAX() { return new_code(REG_EAX); } 204 | 205 | Code *EDX() { return new_code(REG_EDX); } 206 | 207 | Code *RAX() { return new_code(REG_RAX); } 208 | 209 | Code *RBP() { return new_code(REG_RBP); } 210 | 211 | Code *RSP() { return new_code(REG_RSP); } 212 | 213 | Code *RIP() { return new_code(REG_RIP); } 214 | 215 | Code *RDI() { return new_code(REG_RDI); } 216 | 217 | Code *RDX() { return new_code(REG_RDX); } 218 | 219 | Code *R10() { return new_code(REG_R10); } 220 | 221 | Code *R11() { return new_code(REG_R11); } 222 | 223 | Code *R12() { return new_code(REG_R12); } 224 | 225 | Code *R13() { return new_code(REG_R13); } 226 | 227 | Code *R14() { return new_code(REG_R14); } 228 | 229 | Code *R15() { return new_code(REG_R15); } 230 | 231 | Code *AL() { return new_code(REG_AL); } 232 | 233 | Code *CL() { return new_code(REG_CL); } 234 | 235 | Code *GLOBAL(char *label) 236 | { 237 | Code *code = new_code(CD_GLOBAL); 238 | code->label = label; 239 | return code; 240 | } 241 | 242 | char *code2str(Code *code) 243 | { 244 | if (code == NULL) return NULL; 245 | 246 | switch (code->kind) { 247 | case REG_AL: 248 | return "%al"; 249 | case REG_DIL: 250 | return "%dil"; 251 | case REG_SIL: 252 | return "%sil"; 253 | case REG_DL: 254 | return "%dl"; 255 | case REG_CL: 256 | return "%cl"; 257 | case REG_R8B: 258 | return "%r8b"; 259 | case REG_R9B: 260 | return "%r9b"; 261 | case REG_R10B: 262 | return "%r10b"; 263 | case REG_R11B: 264 | return "%r11b"; 265 | case REG_R12B: 266 | return "%r12b"; 267 | case REG_R13B: 268 | return "%r13b"; 269 | case REG_R14B: 270 | return "%r14b"; 271 | case REG_R15B: 272 | return "%r15b"; 273 | 274 | case REG_AX: 275 | return "%ax"; 276 | case REG_DI: 277 | return "%di"; 278 | case REG_SI: 279 | return "%si"; 280 | case REG_DX: 281 | return "%dx"; 282 | case REG_CX: 283 | return "%cx"; 284 | case REG_R8W: 285 | return "%r8w"; 286 | case REG_R9W: 287 | return "%r9w"; 288 | case REG_R10W: 289 | return "%r10w"; 290 | case REG_R11W: 291 | return "%r11w"; 292 | case REG_R12W: 293 | return "%r12w"; 294 | case REG_R13W: 295 | return "%r13w"; 296 | case REG_R14W: 297 | return "%r14w"; 298 | case REG_R15W: 299 | return "%r15w"; 300 | 301 | case REG_EAX: 302 | return "%eax"; 303 | case REG_EDI: 304 | return "%edi"; 305 | case REG_ESI: 306 | return "%esi"; 307 | case REG_EDX: 308 | return "%edx"; 309 | case REG_ECX: 310 | return "%ecx"; 311 | case REG_R8D: 312 | return "%r8d"; 313 | case REG_R9D: 314 | return "%r9d"; 315 | case REG_R10D: 316 | return "%r10d"; 317 | case REG_R11D: 318 | return "%r11d"; 319 | case REG_R12D: 320 | return "%r12d"; 321 | case REG_R13D: 322 | return "%r13d"; 323 | case REG_R14D: 324 | return "%r14d"; 325 | case REG_R15D: 326 | return "%r15d"; 327 | 328 | case REG_RAX: 329 | return "%rax"; 330 | case REG_RDI: 331 | return "%rdi"; 332 | case REG_RSI: 333 | return "%rsi"; 334 | case REG_RDX: 335 | return "%rdx"; 336 | case REG_RCX: 337 | return "%rcx"; 338 | case REG_R8: 339 | return "%r8"; 340 | case REG_R9: 341 | return "%r9"; 342 | case REG_R10: 343 | return "%r10"; 344 | case REG_R11: 345 | return "%r11"; 346 | case REG_R12: 347 | return "%r12"; 348 | case REG_R13: 349 | return "%r13"; 350 | case REG_R14: 351 | return "%r14"; 352 | case REG_R15: 353 | return "%r15"; 354 | case REG_RBP: 355 | return "%rbp"; 356 | case REG_RSP: 357 | return "%rsp"; 358 | 359 | case REG_RIP: 360 | return "%rip"; 361 | 362 | case INST_MOV: 363 | return format("mov %s, %s", code2str(code->lhs), 364 | code2str(code->rhs)); 365 | 366 | case INST_MOVL: 367 | return format("movl %s, %s", code2str(code->lhs), 368 | code2str(code->rhs)); 369 | 370 | case INST_MOVSBL: 371 | return format("movsbl %s, %s", code2str(code->lhs), 372 | code2str(code->rhs)); 373 | 374 | case INST_MOVSLQ: 375 | return format("movslq %s, %s", code2str(code->lhs), 376 | code2str(code->rhs)); 377 | 378 | case INST_MOVZB: 379 | return format("movzb %s, %s", code2str(code->lhs), 380 | code2str(code->rhs)); 381 | 382 | case INST_LEA: 383 | return format("lea %s, %s", code2str(code->lhs), 384 | code2str(code->rhs)); 385 | 386 | case INST_POP: 387 | return format("pop %s", code2str(code->lhs)); 388 | 389 | case INST_PUSH: 390 | return format("push %s", code2str(code->lhs)); 391 | 392 | case INST_ADD: 393 | return format("add %s, %s", code2str(code->lhs), 394 | code2str(code->rhs)); 395 | 396 | case INST_ADDQ: 397 | return format("addq %s, %s", code2str(code->lhs), 398 | code2str(code->rhs)); 399 | 400 | case INST_SUB: 401 | return format("sub %s, %s", code2str(code->lhs), 402 | code2str(code->rhs)); 403 | 404 | case INST_IMUL: 405 | return format("imul %s, %s", code2str(code->lhs), 406 | code2str(code->rhs)); 407 | 408 | case INST_IDIV: 409 | return format("idiv %s", code2str(code->lhs)); 410 | 411 | case INST_SAR: 412 | return format("sar %s, %s", code2str(code->lhs), 413 | code2str(code->rhs)); 414 | 415 | case INST_SAL: 416 | return format("sal %s, %s", code2str(code->lhs), 417 | code2str(code->rhs)); 418 | 419 | case INST_NEG: 420 | return format("neg %s", code2str(code->lhs)); 421 | 422 | case INST_NOT: 423 | return format("not %s", code2str(code->lhs)); 424 | 425 | case INST_CMP: 426 | return format("cmp %s, %s", code2str(code->lhs), 427 | code2str(code->rhs)); 428 | 429 | case INST_SETL: 430 | return format("setl %s", code2str(code->lhs)); 431 | 432 | case INST_SETLE: 433 | return format("setle %s", code2str(code->lhs)); 434 | 435 | case INST_SETE: 436 | return format("sete %s", code2str(code->lhs)); 437 | 438 | case INST_AND: 439 | return format("and %s, %s", code2str(code->lhs), 440 | code2str(code->rhs)); 441 | 442 | case INST_XOR: 443 | return format("xor %s, %s", code2str(code->lhs), 444 | code2str(code->rhs)); 445 | 446 | case INST_OR: 447 | return format("or %s, %s", code2str(code->lhs), 448 | code2str(code->rhs)); 449 | 450 | case INST_RET: 451 | return "ret"; 452 | 453 | case INST_CLTD: 454 | return "cltd"; 455 | 456 | case INST_CLTQ: 457 | return "cltq"; 458 | 459 | case INST_JMP: 460 | return format("jmp %s", code->label); 461 | 462 | case INST_JE: 463 | return format("je %s", code->label); 464 | 465 | case INST_JNE: 466 | return format("jne %s", code->label); 467 | 468 | case INST_JAE: 469 | return format("jae %s", code->label); 470 | 471 | case INST_LABEL: 472 | return format("%s:", code->label); 473 | 474 | case INST_INCL: 475 | return format("incl %s", code2str(code->lhs)); 476 | 477 | case INST_INCQ: 478 | return format("incq %s", code2str(code->lhs)); 479 | 480 | case INST_DECL: 481 | return format("decl %s", code2str(code->lhs)); 482 | 483 | case INST_DECQ: 484 | return format("decq %s", code2str(code->lhs)); 485 | 486 | case INST_CALL: 487 | return format("call %s", code->label); 488 | 489 | case INST_NOP: 490 | return "nop"; 491 | 492 | case INST_SYSCALL: 493 | return "syscall"; 494 | 495 | case CD_COMMENT: 496 | return format("/* %s */", code->sval); 497 | 498 | case CD_VALUE: 499 | return format("$%d", code->ival); 500 | 501 | case CD_ADDR_OF: 502 | if (code->ival == 0) return format("(%s)", code2str(code->lhs)); 503 | return format("%d(%s)", code->ival, code2str(code->lhs)); 504 | 505 | case CD_ADDR_OF_LABEL: 506 | return format("%s(%s)", code->label, code2str(code->lhs)); 507 | 508 | case CD_GLOBAL: 509 | return format(".global %s", code->label); 510 | 511 | case CD_TEXT: 512 | return ".text"; 513 | 514 | case CD_DATA: 515 | return ".data"; 516 | 517 | case CD_ZERO: 518 | return format(".zero %d", code->ival); 519 | 520 | case CD_LONG: 521 | return format(".long %d", code->ival); 522 | 523 | case CD_BYTE: 524 | return format(".byte %d", code->ival); 525 | 526 | case CD_QUAD: 527 | return format(".quad %d", code->ival); 528 | 529 | case CD_ASCII: 530 | return format(".ascii \"%s\"", 531 | escape_string(code->sval, code->ival)); 532 | } 533 | warn(format("code.c %d", code->kind)); 534 | assert(0); 535 | } 536 | 537 | void dump_code(Code *code, FILE *fh) 538 | { 539 | char *str = code2str(code); 540 | if (str != NULL) fprintf(fh, "%s\n", str); 541 | } 542 | -------------------------------------------------------------------------------- /cc/lex.c: -------------------------------------------------------------------------------- 1 | #include "cc.h" 2 | 3 | char *read_entire_file(char *filepath); 4 | void erase_backslash_newline(char *src); 5 | 6 | Source source; 7 | 8 | void init_source(char *src, char *filepath) 9 | { 10 | // caluculate current working directory 11 | int i, j, len = strlen(filepath); 12 | source.cwd = safe_malloc(sizeof(char *) * len); 13 | // TODO: ad-hoc 14 | for (i = len - 1; i >= 0; i--) { 15 | if (filepath[i] == '/') { 16 | // detect last '/' 17 | for (j = 0; j <= i; j++) { 18 | source.cwd[j] = filepath[j]; 19 | } 20 | source.cwd[i + 1] = '\0'; 21 | break; 22 | } 23 | } 24 | // When this source code and aqcc are located in the same directory 25 | if (source.cwd[0] == '\0') { 26 | strcpy(source.cwd, "./"); 27 | } 28 | 29 | source.filepath = filepath; 30 | source.src = src; 31 | source.line = source.column = 1; 32 | source.line2length = new_vector(); 33 | 34 | // fill file.line2length 35 | vector_push_back(source.line2length, NULL); // line is 1-based index. 36 | for (int i = 0, column = 1; src[i] == '\0'; i++, column++) { 37 | if (src[i] == '\n') { 38 | vector_push_back(source.line2length, (void *)column); 39 | column = 0; 40 | } 41 | } 42 | } 43 | 44 | Token *make_token(int kind) 45 | { 46 | Source *src = (Source *)safe_malloc(sizeof(Source)); 47 | memcpy(src, &source, sizeof(Source)); 48 | return new_token(kind, src); 49 | } 50 | 51 | void ungetch() 52 | { 53 | source.src--; 54 | if (*source.src == '\n') { 55 | source.line--; 56 | source.column = (int)vector_get(source.line2length, source.line); 57 | } 58 | else { 59 | source.column--; 60 | } 61 | } 62 | 63 | char peekch() { return *source.src; } 64 | 65 | char getch() 66 | { 67 | char ch = peekch(); 68 | if (ch == '\0') error("unexpected EOF"); 69 | 70 | if (ch == '\n') { 71 | source.line++; 72 | source.column = 0; 73 | } 74 | else { 75 | source.column++; 76 | } 77 | return *source.src++; 78 | } 79 | 80 | int read_next_hex_int() 81 | { 82 | // assume that prefix '0x' is already read. 83 | int acc = 0; 84 | while (1) { 85 | int ch = peekch(); 86 | if (isdigit(ch)) 87 | acc = 16 * acc + ch - '0'; 88 | else if ('a' <= ch && ch <= 'f') 89 | acc = 16 * acc + 10 + ch - 'a'; 90 | else if ('A' <= ch && ch <= 'F') 91 | acc = 16 * acc + 10 + ch - 'A'; 92 | else 93 | break; 94 | getch(); 95 | } 96 | 97 | return acc; 98 | } 99 | 100 | int read_next_dec_int() 101 | { 102 | int acc = 0; 103 | while (isdigit(peekch())) acc = 10 * acc + getch() - '0'; 104 | return acc; 105 | } 106 | 107 | int read_next_oct_int() 108 | { 109 | // assume that prefix '0' is already read. 110 | int acc = 0; 111 | while (1) { 112 | int ch = peekch(); 113 | if ('0' <= ch && ch <= '7') 114 | acc = 8 * acc + ch - '0'; 115 | else 116 | break; 117 | getch(); 118 | } 119 | 120 | return acc; 121 | } 122 | 123 | Token *read_next_int_token() 124 | { 125 | int ch = peekch(), ival; 126 | if (ch == '0') { 127 | getch(); 128 | if (peekch() == 'x') { 129 | getch(); 130 | ival = read_next_hex_int(); 131 | } 132 | else 133 | ival = read_next_oct_int(); 134 | } 135 | else 136 | ival = read_next_dec_int(); 137 | 138 | Token *token = make_token(tINT); 139 | token->ival = ival; 140 | return token; 141 | } 142 | 143 | Token *read_next_ident_token() 144 | { 145 | StringBuilder *sb = new_string_builder(); 146 | while (1) { 147 | int ch = getch(); 148 | 149 | if (!isalnum(ch) && ch != '_') { 150 | ungetch(); 151 | break; 152 | } 153 | 154 | string_builder_append(sb, ch); 155 | } 156 | 157 | static Map *str2keyword = NULL; 158 | if (str2keyword == NULL) { 159 | str2keyword = new_map(); 160 | 161 | map_insert(str2keyword, "return", (void *)kRETURN); 162 | map_insert(str2keyword, "if", (void *)kIF); 163 | map_insert(str2keyword, "else", (void *)kELSE); 164 | map_insert(str2keyword, "while", (void *)kWHILE); 165 | map_insert(str2keyword, "break", (void *)kBREAK); 166 | map_insert(str2keyword, "continue", (void *)kCONTINUE); 167 | map_insert(str2keyword, "for", (void *)kFOR); 168 | map_insert(str2keyword, "int", (void *)kINT); 169 | map_insert(str2keyword, "char", (void *)kCHAR); 170 | map_insert(str2keyword, "sizeof", (void *)kSIZEOF); 171 | map_insert(str2keyword, "switch", (void *)kSWITCH); 172 | map_insert(str2keyword, "default", (void *)kDEFAULT); 173 | map_insert(str2keyword, "case", (void *)kCASE); 174 | map_insert(str2keyword, "goto", (void *)kGOTO); 175 | map_insert(str2keyword, "struct", (void *)kSTRUCT); 176 | map_insert(str2keyword, "typedef", (void *)kTYPEDEF); 177 | map_insert(str2keyword, "do", (void *)kDO); 178 | map_insert(str2keyword, "void", (void *)kVOID); 179 | map_insert(str2keyword, "union", (void *)kUNION); 180 | map_insert(str2keyword, "const", (void *)kCONST); 181 | map_insert(str2keyword, "enum", (void *)kENUM); 182 | map_insert(str2keyword, "_Noreturn", (void *)kNORETURN); 183 | map_insert(str2keyword, "static", (void *)kSTATIC); 184 | map_insert(str2keyword, "extern", (void *)kEXTERN); 185 | } 186 | 187 | char *str; 188 | str = string_builder_get(sb); 189 | KeyValue *kv = map_lookup(str2keyword, str); 190 | if (kv) return make_token((int)kv_value(kv)); 191 | 192 | Token *token = make_token(tIDENT); 193 | token->sval = str; 194 | return token; 195 | } 196 | 197 | // assume that the first doublequote has been already read. 198 | Token *read_next_string_literal_token() 199 | { 200 | StringBuilder *sb = new_string_builder(); 201 | while (1) { 202 | char ch = getch(); 203 | 204 | switch (ch) { 205 | case '\\': 206 | ch = getch(); 207 | ch = unescape_char(ch); 208 | break; 209 | 210 | case '"': 211 | goto end; 212 | 213 | case '\n': 214 | error("unexpected new-line character"); 215 | } 216 | 217 | string_builder_append(sb, ch); 218 | } 219 | 220 | Token *token; 221 | end: 222 | token = make_token(tSTRING_LITERAL); 223 | token->sval = string_builder_get(sb); 224 | token->ssize = string_builder_size(sb); 225 | return token; 226 | } 227 | 228 | // assume that the first singlequote has been already read. 229 | Token *read_next_character_constant_token() 230 | { 231 | char ch = getch(); 232 | if (ch == '\'') error("unexpected singlequote."); 233 | if (ch == '\\') ch = unescape_char(getch()); 234 | while (getch() != '\'') 235 | ; 236 | 237 | Token *token = make_token(tINT); 238 | token->ival = ch; 239 | return token; 240 | } 241 | 242 | Token *read_next_token() 243 | { 244 | while (peekch() != '\0') { 245 | char ch = getch(); 246 | 247 | // \n should be a token because \n has some meaning in preprocessing. 248 | if (isspace(ch) && ch != '\n') continue; 249 | 250 | if (isdigit(ch)) { 251 | ungetch(); 252 | return read_next_int_token(); 253 | } 254 | 255 | if (isalpha(ch) || ch == '_') { 256 | ungetch(); 257 | Token *token = read_next_ident_token(); 258 | // TODO: for now, const is the same as comments. 259 | if (token->kind == kCONST) continue; 260 | // TODO: for now, _Noreturn is the same as comments. 261 | if (token->kind == kNORETURN) continue; 262 | return token; 263 | } 264 | 265 | switch (ch) { 266 | case '"': 267 | return read_next_string_literal_token(); 268 | 269 | case '\'': 270 | return read_next_character_constant_token(); 271 | 272 | case '+': 273 | ch = getch(); 274 | if (ch == '+') return make_token(tINC); 275 | if (ch == '=') return make_token(tPLUSEQ); 276 | ungetch(); 277 | return make_token(tPLUS); 278 | 279 | case '-': 280 | ch = getch(); 281 | if (ch == '=') return make_token(tMINUSEQ); 282 | if (ch == '>') return make_token(tARROW); 283 | if (ch == '-') return make_token(tDEC); 284 | ungetch(); 285 | return make_token(tMINUS); 286 | 287 | case '*': 288 | ch = getch(); 289 | if (ch == '=') return make_token(tSTAREQ); 290 | ungetch(); 291 | return make_token(tSTAR); 292 | 293 | case '/': 294 | ch = getch(); 295 | if (ch == '=') return make_token(tSLASHEQ); 296 | if (ch == '*') { // old comment 297 | while (1) { 298 | if (getch() != '*') continue; 299 | if (getch() == '/') break; 300 | ungetch(); 301 | } 302 | continue; 303 | } 304 | if (ch == '/') { // new comment 305 | while (getch() != '\n') 306 | ; 307 | continue; 308 | } 309 | ungetch(); 310 | return make_token(tSLASH); 311 | 312 | case '%': 313 | ch = getch(); 314 | if (ch == '=') return make_token(tPERCENTEQ); 315 | ungetch(); 316 | return make_token(tPERCENT); 317 | 318 | case '(': 319 | return make_token(tLPAREN); 320 | 321 | case ')': 322 | return make_token(tRPAREN); 323 | 324 | case '<': 325 | ch = getch(); 326 | switch (ch) { 327 | case '<': 328 | ch = getch(); 329 | if (ch == '=') return make_token(tLSHIFTEQ); 330 | ungetch(); 331 | return make_token(tLSHIFT); 332 | case '=': 333 | return make_token(tLTE); 334 | } 335 | ungetch(); 336 | return make_token(tLT); 337 | 338 | case '>': 339 | ch = getch(); 340 | switch (ch) { 341 | case '>': 342 | ch = getch(); 343 | if (ch == '=') return make_token(tRSHIFTEQ); 344 | ungetch(); 345 | return make_token(tRSHIFT); 346 | case '=': 347 | return make_token(tGTE); 348 | } 349 | ungetch(); 350 | return make_token(tGT); 351 | 352 | case '=': 353 | ch = getch(); 354 | if (ch == '=') return make_token(tEQEQ); 355 | ungetch(); 356 | return make_token(tEQ); 357 | 358 | case '!': 359 | ch = getch(); 360 | if (ch == '=') return make_token(tNEQ); 361 | ungetch(); 362 | return make_token(tEXCL); 363 | 364 | case '&': 365 | ch = getch(); 366 | if (ch == '&') return make_token(tANDAND); 367 | if (ch == '=') return make_token(tANDEQ); 368 | ungetch(); 369 | return make_token(tAND); 370 | 371 | case '^': 372 | ch = getch(); 373 | if (ch == '=') return make_token(tHATEQ); 374 | ungetch(); 375 | return make_token(tHAT); 376 | 377 | case '|': 378 | ch = getch(); 379 | if (ch == '|') return make_token(tBARBAR); 380 | if (ch == '=') return make_token(tBAREQ); 381 | ungetch(); 382 | return make_token(tBAR); 383 | 384 | case ';': 385 | return make_token(tSEMICOLON); 386 | 387 | case ',': 388 | return make_token(tCOMMA); 389 | 390 | case '.': 391 | ch = getch(); 392 | if (ch != '.') { 393 | ungetch(); 394 | return make_token(tDOT); 395 | } 396 | if (getch() != '.') 397 | error("%s:%d:%d: unexpected dot", source.filepath, 398 | source.line, source.column); 399 | return make_token(tDOTS); // ... 400 | 401 | case '{': 402 | return make_token(tLBRACE); 403 | 404 | case '}': 405 | return make_token(tRBRACE); 406 | 407 | case ':': 408 | return make_token(tCOLON); 409 | 410 | case '?': 411 | return make_token(tQUESTION); 412 | 413 | case '[': 414 | return make_token(tLBRACKET); 415 | 416 | case ']': 417 | return make_token(tRBRACKET); 418 | 419 | case '#': 420 | return make_token(tNUMBER); 421 | 422 | case '~': 423 | return make_token(tTILDE); 424 | 425 | case '\n': 426 | return make_token(tNEWLINE); 427 | } 428 | 429 | error(format("%s:%d:%d:unexpected character", source.filepath, 430 | source.line, source.column)); 431 | } 432 | 433 | return make_token(tEOF); 434 | } 435 | 436 | Vector *read_all_tokens(char *src, char *filepath) 437 | { 438 | erase_backslash_newline(src); 439 | 440 | init_source(src, filepath); 441 | 442 | Vector *tokens = new_vector(); 443 | while (1) { 444 | Token *token = read_next_token(); 445 | vector_push_back(tokens, token); 446 | if (token->kind == tEOF) break; 447 | } 448 | 449 | return tokens; 450 | } 451 | 452 | const char *token_kind2str(int kind) 453 | { 454 | switch (kind) { 455 | case tINT: 456 | return "tINT"; 457 | case tSTRING_LITERAL: 458 | return "tSTRING_LITERAL"; 459 | case tPLUS: 460 | return "tPLUS"; 461 | case tMINUS: 462 | return "tMINUS"; 463 | case tSTAR: 464 | return "tSTAR"; 465 | case tSLASH: 466 | return "tSLASH"; 467 | case tPERCENT: 468 | return "tPERCENT"; 469 | case tLPAREN: 470 | return "tLPAREN"; 471 | case tRPAREN: 472 | return "tRPAREN"; 473 | case tLSHIFT: 474 | return "tLSHIFT"; 475 | case tRSHIFT: 476 | return "tRSHIFT"; 477 | case tLT: 478 | return "tLT"; 479 | case tGT: 480 | return "tGT"; 481 | case tLTE: 482 | return "tLTE"; 483 | case tGTE: 484 | return "tGTE"; 485 | case tEQEQ: 486 | return "tEQEQ"; 487 | case tNEQ: 488 | return "tNEQ"; 489 | case tAND: 490 | return "tAND"; 491 | case tHAT: 492 | return "tHAT"; 493 | case tEXCL: 494 | return "tEXCL"; 495 | case tBAR: 496 | return "tBAR"; 497 | case tANDAND: 498 | return "tANDAND"; 499 | case tBARBAR: 500 | return "tBARBAR"; 501 | case tIDENT: 502 | return "tIDENT"; 503 | case tEQ: 504 | return "tEQ"; 505 | case tPLUSEQ: 506 | return "tPLUSEQ"; 507 | case tMINUSEQ: 508 | return "tMINUSEQ"; 509 | case tSTAREQ: 510 | return "tSTAREQ"; 511 | case tSLASHEQ: 512 | return "tSLASHEQ"; 513 | case tPERCENTEQ: 514 | return "tPERCENTEQ"; 515 | case tANDEQ: 516 | return "tANDEQ"; 517 | case tHATEQ: 518 | return "tHATEQ"; 519 | case tBAREQ: 520 | return "tBAREQ"; 521 | case tLSHIFTEQ: 522 | return "tLSHIFTEQ"; 523 | case tRSHIFTEQ: 524 | return "tRSHIFTEQ"; 525 | case tSEMICOLON: 526 | return "tSEMICOLON"; 527 | case tCOMMA: 528 | return "tCOMMA"; 529 | case tDOT: 530 | return "tDOT"; 531 | case tARROW: 532 | return "tARROW"; 533 | case tLBRACE: 534 | return "tLBRACE"; 535 | case tRBRACE: 536 | return "tRBRACE"; 537 | case kRETURN: 538 | return "kRETURN"; 539 | case tCOLON: 540 | return "tCOLON"; 541 | case tQUESTION: 542 | return "tQUESTION"; 543 | case tLBRACKET: 544 | return "tLBRACKET"; 545 | case tRBRACKET: 546 | return "tRBRACKET"; 547 | case tINC: 548 | return "tINC"; 549 | case tDEC: 550 | return "tDEC"; 551 | case tDOTS: 552 | return "tDOTS"; 553 | case tNUMBER: 554 | return "tNUMBER"; 555 | case tNEWLINE: 556 | return "tNEWLINE"; 557 | case tTILDE: 558 | return "tTILDE"; 559 | case tEOF: 560 | return "tEOF"; 561 | case kIF: 562 | return "kIF"; 563 | case kELSE: 564 | return "kELSE"; 565 | case kWHILE: 566 | return "kWHILE"; 567 | case kBREAK: 568 | return "kBREAK"; 569 | case kCONTINUE: 570 | return "kCONTINUE"; 571 | case kFOR: 572 | return "kFOR"; 573 | case kINT: 574 | return "kINT"; 575 | case kCHAR: 576 | return "kCHAR"; 577 | case kSIZEOF: 578 | return "kSIZEOF"; 579 | case kSWITCH: 580 | return "kSWITCH"; 581 | case kCASE: 582 | return "kCASE"; 583 | case kDEFAULT: 584 | return "kDEFAULT"; 585 | case kGOTO: 586 | return "kGOTO"; 587 | case kSTRUCT: 588 | return "kSTRUCT"; 589 | case kUNION: 590 | return "kUNION"; 591 | case kTYPEDEF: 592 | return "kTYPEDEF"; 593 | case kDO: 594 | return "kDO"; 595 | case kVOID: 596 | return "kVOID"; 597 | case kCONST: 598 | return "kCONST"; 599 | case kENUM: 600 | return "kENUM"; 601 | case kNORETURN: 602 | return "kNORETURN"; 603 | case kSTATIC: 604 | return "kSTATIC"; 605 | case kEXTERN: 606 | return "kEXTERN"; 607 | default: 608 | return "***unknown token***"; 609 | } 610 | } 611 | 612 | Vector *concatenate_string_literal_tokens(Vector *tokens) 613 | { 614 | init_tokenseq(tokens); 615 | 616 | Vector *ntokens = new_vector(); 617 | while (!match_token(tEOF)) { 618 | if (!match_token(tSTRING_LITERAL)) { 619 | vector_push_back(ntokens, pop_token()); 620 | continue; 621 | } 622 | 623 | Token *token = pop_token(); 624 | Vector *strs = new_vector(); 625 | vector_push_back(strs, token); 626 | while (match_token(tSTRING_LITERAL)) 627 | vector_push_back(strs, pop_token()); 628 | 629 | // calc size 630 | int size = 0; 631 | for (int i = 0; i < vector_size(strs); i++) 632 | size += ((Token *)vector_get(strs, i))->ssize - 1; 633 | size++; // '\0' 634 | 635 | // concatenate strings 636 | char *buf = (char *)safe_malloc(size); 637 | int offset = 0; 638 | for (int i = 0; i < vector_size(strs); i++) { 639 | Token *token = (Token *)vector_get(strs, i); 640 | memcpy(buf + offset, token->sval, token->ssize - 1); 641 | offset += token->ssize - 1; 642 | } 643 | 644 | assert(offset == size - 1); 645 | buf[offset] = '\0'; 646 | 647 | token->sval = buf; 648 | token->ssize = size; 649 | vector_push_back(ntokens, token); 650 | } 651 | vector_push_back(ntokens, pop_token()); // tEOF 652 | 653 | return ntokens; 654 | } 655 | 656 | Vector *read_tokens_from_filepath(char *filepath) 657 | { 658 | char *src = read_entire_file(filepath); 659 | return read_all_tokens(src, filepath); 660 | } 661 | 662 | void erase_backslash_newline(char *src) 663 | { 664 | char *r = src, *w = src; 665 | while (*r != '\0') { 666 | if (*r == '\\' && *(r + 1) == '\n') 667 | r += 2; 668 | else 669 | *w++ = *r++; 670 | } 671 | *w = '\0'; 672 | } 673 | 674 | char *read_entire_file(char *filepath) 675 | { 676 | FILE *fh = fopen(filepath, "r"); 677 | if (fh == NULL) error("no such file: '%s'", filepath); 678 | 679 | // read the file all 680 | StringBuilder *sb = new_string_builder(); 681 | int ch; 682 | while ((ch = fgetc(fh)) != EOF) string_builder_append(sb, ch); 683 | return string_builder_get(sb); 684 | 685 | fclose(fh); 686 | } 687 | --------------------------------------------------------------------------------