├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── example ├── ary.c ├── bigints.c ├── break_continue.c ├── calc.c ├── do_while.c ├── enum.c ├── escape-seq.c ├── fact.c ├── fizzbuzz.c ├── funccall.c ├── goto.c ├── hello.c ├── mandelbrot.c ├── op.c ├── prime_list.c ├── sizeof.c ├── struct.c ├── struct_init.c ├── switch.c ├── ternaryop.c └── while_fact.c ├── include ├── float.h ├── iso646.h ├── rucc.h ├── stdalign.h ├── stdarg.h ├── stdbool.h ├── stddef.h └── stdnoreturn.h ├── rucc.sh └── src ├── codegen.rs ├── common.rs ├── error.rs ├── lexer.rs ├── lib.rs ├── main.rs ├── node.rs ├── parser.rs └── types.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "0.6.10" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "81ce3d38065e618af2d7b77e10c5ad9a069859b4be3c2250f674af3840d9c8a5" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "aho-corasick" 16 | version = "1.0.5" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "0c378d78423fdad8089616f827526ee33c19f2fddbd5de1629152c9593ba4783" 19 | dependencies = [ 20 | "memchr", 21 | ] 22 | 23 | [[package]] 24 | name = "ansi_term" 25 | version = "0.9.0" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | checksum = "23ac7c30002a5accbf7e8987d0632fa6de155b7c3d39d0067317a391e00a2ef6" 28 | 29 | [[package]] 30 | name = "ansi_term" 31 | version = "0.12.1" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" 34 | dependencies = [ 35 | "winapi", 36 | ] 37 | 38 | [[package]] 39 | name = "atty" 40 | version = "0.2.14" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 43 | dependencies = [ 44 | "hermit-abi", 45 | "libc", 46 | "winapi", 47 | ] 48 | 49 | [[package]] 50 | name = "bitflags" 51 | version = "1.3.2" 52 | source = "registry+https://github.com/rust-lang/crates.io-index" 53 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 54 | 55 | [[package]] 56 | name = "cc" 57 | version = "1.0.83" 58 | source = "registry+https://github.com/rust-lang/crates.io-index" 59 | checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" 60 | dependencies = [ 61 | "libc", 62 | ] 63 | 64 | [[package]] 65 | name = "clap" 66 | version = "2.34.0" 67 | source = "registry+https://github.com/rust-lang/crates.io-index" 68 | checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" 69 | dependencies = [ 70 | "ansi_term 0.12.1", 71 | "atty", 72 | "bitflags", 73 | "strsim", 74 | "textwrap", 75 | "unicode-width", 76 | "vec_map", 77 | ] 78 | 79 | [[package]] 80 | name = "fuchsia-cprng" 81 | version = "0.1.1" 82 | source = "registry+https://github.com/rust-lang/crates.io-index" 83 | checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" 84 | 85 | [[package]] 86 | name = "hermit-abi" 87 | version = "0.1.19" 88 | source = "registry+https://github.com/rust-lang/crates.io-index" 89 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 90 | dependencies = [ 91 | "libc", 92 | ] 93 | 94 | [[package]] 95 | name = "lazy_static" 96 | version = "1.4.0" 97 | source = "registry+https://github.com/rust-lang/crates.io-index" 98 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 99 | 100 | [[package]] 101 | name = "libc" 102 | version = "0.2.148" 103 | source = "registry+https://github.com/rust-lang/crates.io-index" 104 | checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" 105 | 106 | [[package]] 107 | name = "llvm-sys" 108 | version = "100.2.4" 109 | source = "registry+https://github.com/rust-lang/crates.io-index" 110 | checksum = "72251a917884f75079ba3bd20fe41aa0c89d40392ac3de6a8b1063ee92475b34" 111 | dependencies = [ 112 | "cc", 113 | "lazy_static", 114 | "libc", 115 | "regex 1.9.5", 116 | "semver", 117 | ] 118 | 119 | [[package]] 120 | name = "memchr" 121 | version = "2.6.3" 122 | source = "registry+https://github.com/rust-lang/crates.io-index" 123 | checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" 124 | 125 | [[package]] 126 | name = "rand" 127 | version = "0.3.23" 128 | source = "registry+https://github.com/rust-lang/crates.io-index" 129 | checksum = "64ac302d8f83c0c1974bf758f6b041c6c8ada916fbb44a609158ca8b064cc76c" 130 | dependencies = [ 131 | "libc", 132 | "rand 0.4.6", 133 | ] 134 | 135 | [[package]] 136 | name = "rand" 137 | version = "0.4.6" 138 | source = "registry+https://github.com/rust-lang/crates.io-index" 139 | checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" 140 | dependencies = [ 141 | "fuchsia-cprng", 142 | "libc", 143 | "rand_core 0.3.1", 144 | "rdrand", 145 | "winapi", 146 | ] 147 | 148 | [[package]] 149 | name = "rand_core" 150 | version = "0.3.1" 151 | source = "registry+https://github.com/rust-lang/crates.io-index" 152 | checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" 153 | dependencies = [ 154 | "rand_core 0.4.2", 155 | ] 156 | 157 | [[package]] 158 | name = "rand_core" 159 | version = "0.4.2" 160 | source = "registry+https://github.com/rust-lang/crates.io-index" 161 | checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" 162 | 163 | [[package]] 164 | name = "rdrand" 165 | version = "0.4.0" 166 | source = "registry+https://github.com/rust-lang/crates.io-index" 167 | checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" 168 | dependencies = [ 169 | "rand_core 0.3.1", 170 | ] 171 | 172 | [[package]] 173 | name = "regex" 174 | version = "0.2.11" 175 | source = "registry+https://github.com/rust-lang/crates.io-index" 176 | checksum = "9329abc99e39129fcceabd24cf5d85b4671ef7c29c50e972bc5afe32438ec384" 177 | dependencies = [ 178 | "aho-corasick 0.6.10", 179 | "memchr", 180 | "regex-syntax 0.5.6", 181 | "thread_local", 182 | "utf8-ranges", 183 | ] 184 | 185 | [[package]] 186 | name = "regex" 187 | version = "1.9.5" 188 | source = "registry+https://github.com/rust-lang/crates.io-index" 189 | checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" 190 | dependencies = [ 191 | "aho-corasick 1.0.5", 192 | "memchr", 193 | "regex-automata", 194 | "regex-syntax 0.7.5", 195 | ] 196 | 197 | [[package]] 198 | name = "regex-automata" 199 | version = "0.3.8" 200 | source = "registry+https://github.com/rust-lang/crates.io-index" 201 | checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" 202 | dependencies = [ 203 | "aho-corasick 1.0.5", 204 | "memchr", 205 | "regex-syntax 0.7.5", 206 | ] 207 | 208 | [[package]] 209 | name = "regex-syntax" 210 | version = "0.5.6" 211 | source = "registry+https://github.com/rust-lang/crates.io-index" 212 | checksum = "7d707a4fa2637f2dca2ef9fd02225ec7661fe01a53623c1e6515b6916511f7a7" 213 | dependencies = [ 214 | "ucd-util", 215 | ] 216 | 217 | [[package]] 218 | name = "regex-syntax" 219 | version = "0.7.5" 220 | source = "registry+https://github.com/rust-lang/crates.io-index" 221 | checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" 222 | 223 | [[package]] 224 | name = "rucc" 225 | version = "0.2.1" 226 | dependencies = [ 227 | "ansi_term 0.9.0", 228 | "clap", 229 | "lazy_static", 230 | "libc", 231 | "llvm-sys", 232 | "rand 0.3.23", 233 | "regex 0.2.11", 234 | ] 235 | 236 | [[package]] 237 | name = "semver" 238 | version = "0.9.0" 239 | source = "registry+https://github.com/rust-lang/crates.io-index" 240 | checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" 241 | dependencies = [ 242 | "semver-parser", 243 | ] 244 | 245 | [[package]] 246 | name = "semver-parser" 247 | version = "0.7.0" 248 | source = "registry+https://github.com/rust-lang/crates.io-index" 249 | checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" 250 | 251 | [[package]] 252 | name = "strsim" 253 | version = "0.8.0" 254 | source = "registry+https://github.com/rust-lang/crates.io-index" 255 | checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" 256 | 257 | [[package]] 258 | name = "textwrap" 259 | version = "0.11.0" 260 | source = "registry+https://github.com/rust-lang/crates.io-index" 261 | checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" 262 | dependencies = [ 263 | "unicode-width", 264 | ] 265 | 266 | [[package]] 267 | name = "thread_local" 268 | version = "0.3.6" 269 | source = "registry+https://github.com/rust-lang/crates.io-index" 270 | checksum = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" 271 | dependencies = [ 272 | "lazy_static", 273 | ] 274 | 275 | [[package]] 276 | name = "ucd-util" 277 | version = "0.1.10" 278 | source = "registry+https://github.com/rust-lang/crates.io-index" 279 | checksum = "abd2fc5d32b590614af8b0a20d837f32eca055edd0bbead59a9cfe80858be003" 280 | 281 | [[package]] 282 | name = "unicode-width" 283 | version = "0.1.10" 284 | source = "registry+https://github.com/rust-lang/crates.io-index" 285 | checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" 286 | 287 | [[package]] 288 | name = "utf8-ranges" 289 | version = "1.0.5" 290 | source = "registry+https://github.com/rust-lang/crates.io-index" 291 | checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" 292 | 293 | [[package]] 294 | name = "vec_map" 295 | version = "0.8.2" 296 | source = "registry+https://github.com/rust-lang/crates.io-index" 297 | checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" 298 | 299 | [[package]] 300 | name = "winapi" 301 | version = "0.3.9" 302 | source = "registry+https://github.com/rust-lang/crates.io-index" 303 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 304 | dependencies = [ 305 | "winapi-i686-pc-windows-gnu", 306 | "winapi-x86_64-pc-windows-gnu", 307 | ] 308 | 309 | [[package]] 310 | name = "winapi-i686-pc-windows-gnu" 311 | version = "0.4.0" 312 | source = "registry+https://github.com/rust-lang/crates.io-index" 313 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 314 | 315 | [[package]] 316 | name = "winapi-x86_64-pc-windows-gnu" 317 | version = "0.4.0" 318 | source = "registry+https://github.com/rust-lang/crates.io-index" 319 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 320 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rucc" 3 | version = "0.2.1" 4 | edition = "2015" 5 | 6 | [dependencies] 7 | llvm-sys = "100.0.0" 8 | lazy_static = "1.4.0" 9 | regex = "0.2.1" 10 | rand = "0.3" 11 | clap = "2.29" 12 | ansi_term = "0.9.0" 13 | libc = "*" 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 uint256_t 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RUCC 2 | 3 | 4 | [![](https://circleci.com/gh/maekawatoshiki/rucc/tree/master.svg?style=shield&circle-token=12276a02aa21f18324f9be74cbb922227b7c8551)](https://circleci.com/gh/maekawatoshiki/rucc) 5 | [![codecov](https://codecov.io/gh/maekawatoshiki/rucc/branch/master/graph/badge.svg)](https://codecov.io/gh/maekawatoshiki/rucc) 6 | [![](http://img.shields.io/badge/license-MIT-blue.svg)](./LICENSE) 7 | 8 | rucc is a small toy C compiler implemented in Rust. 9 | 10 | # REQUIREMENTS 11 | 12 | - latest Rust (recommend [rustup](https://www.rustup.rs/)) 13 | - LLVM 10.0 14 | ```sh 15 | # ubuntu, or debian... 16 | $ apt-get install llvm-10 llvm-10-dev 17 | ``` 18 | 19 | # RUN 20 | 21 | First, do test 22 | 23 | ```sh 24 | $ cargo test 25 | ``` 26 | 27 | After the test exits successfully, you can try rucc easily with ``./rucc.sh``! 28 | 29 | ```sh 30 | $ # slow (use binary created by `cargo build`) 31 | $ ./rucc.sh [filename (*.c)] 32 | 33 | $ # fast (use binary created by `cargo build --release`) 34 | $ ./rucc.sh [filename (*.c)] --release 35 | ``` 36 | 37 | # FORK AND PULL REQUEST LIFECYCLE 38 | 39 | 1. fork https://github.com/maekawatoshiki/rucc repository 40 | 2. clone your repository on local pc 41 | 42 | ```sh 43 | $ git clone git@github.com:youraccount/rucc.git 44 | $ cd rucc 45 | ``` 46 | 47 | 3. add maekawatoshiki upstream repository & fetch & confirm 48 | 49 | ```sh 50 | $ git remote add upstream git@github.com:maekawatoshiki/rucc.git 51 | $ git fetch upstream 52 | $ git branch -a 53 | 54 | * master 55 | remotes/origin/HEAD -> origin/master 56 | remotes/origin/master 57 | remotes/upstream/master 58 | ``` 59 | 60 | 4. fetch & marge upstream 61 | 62 | ```sh 63 | $ git fetch upstream 64 | $ git merge upstream/master 65 | ``` 66 | 67 | 5. pullrequest on GitHub 68 | 69 | # REFERENCES 70 | 71 | I'm using [8cc](https://github.com/rui314/8cc) as reference. 72 | -------------------------------------------------------------------------------- /example/ary.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | int q[] = {123, 456, 789}; 5 | int *has_ptr_to_local_var[] = {q}; 6 | char s[][8] = {"hello", "rucc", "world"}; 7 | int a[2]; 8 | a[0] = 12; 9 | a[1] = 23; 10 | a[0] = a[0] + a[1]; 11 | printf("%d %d\n", a[0], 0[a]); 12 | for(int i = 0; i < 3; i++) 13 | puts(s[i]); 14 | } 15 | -------------------------------------------------------------------------------- /example/bigints.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main() { 6 | printf("%llu %llu %llu\n", INT_MAX, UINT_MAX, LONG_MAX); 7 | printf("%llu\n", 52437589423594); 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /example/break_continue.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | for(int i = 0; i < 10; i++) { 5 | if(i == 8) break; 6 | if(i & 1) continue; 7 | printf("%d ", i); 8 | } 9 | puts(""); 10 | for(int i = 0; i < 10; i++) { 11 | for(int k = 0; k < 10; k++) { 12 | printf("%d\n", k); 13 | if(k == 2) break; 14 | } 15 | printf("%d\n", i); 16 | if(i == 3) break; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /example/calc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Calculator 3 | * - this code is written in C that rucc can process. 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | enum NODE_KIND { 12 | KIND_NUM, 13 | KIND_OP 14 | }; 15 | 16 | char *input; 17 | 18 | typedef struct node_t { 19 | enum NODE_KIND kind; 20 | double num; 21 | struct { 22 | char op; 23 | struct node_t *left, *right; 24 | } op; 25 | } node_t; 26 | 27 | node_t *make_number(double n) { 28 | node_t *num = malloc(sizeof(node_t)); 29 | num->kind = KIND_NUM; 30 | num->num = n; 31 | return num; 32 | } 33 | node_t *make_op(char op, node_t *left, node_t *right) { 34 | node_t *num = malloc(sizeof(node_t)); 35 | num->kind = KIND_OP; 36 | num->op.op = op; 37 | num->op.left = left; 38 | num->op.right = right; 39 | return num; 40 | } 41 | 42 | node_t *expr_addsub(); 43 | 44 | node_t *expr_number() { 45 | if(*input == '(') { 46 | input++; 47 | node_t *n = expr_addsub(); 48 | input++; 49 | return n; 50 | } 51 | if(!isdigit(*input)) return NULL; 52 | char buf[16] = {0}; 53 | for(int i = 0; isdigit(*input) || *input == '.'; i++) 54 | buf[i] = *input++; 55 | return make_number(atof(buf)); 56 | } 57 | 58 | node_t *expr_muldiv() { 59 | node_t *left = expr_number(); 60 | while(*input == '*' || *input == '/') { 61 | char op = *input++; 62 | node_t *right = expr_number(); 63 | left = make_op(op, left, right); 64 | } 65 | return left; 66 | } 67 | 68 | node_t *expr_addsub() { 69 | node_t *left = expr_muldiv(); 70 | while(*input == '+' || *input == '-') { 71 | char op = *input++; 72 | node_t *right = expr_muldiv(); 73 | left = make_op(op, left, right); 74 | } 75 | return left; 76 | } 77 | 78 | double calc(node_t *node) { 79 | if(node->kind == KIND_OP) { 80 | double l = calc(node->op.left), r = calc(node->op.right); 81 | // TODO: 'switch' not supported 82 | if(node->op.op == '+') return l + r; 83 | if(node->op.op == '-') return l - r; 84 | if(node->op.op == '*') return l * r; 85 | if(node->op.op == '/') return l / r; 86 | } else return node->num; 87 | } 88 | 89 | void show(node_t *node) { 90 | if(node->kind == KIND_OP) { 91 | printf("(%c ", node->op.op); 92 | show(node->op.left); 93 | show(node->op.right); 94 | printf(") "); 95 | } else printf("%.10g ", node->num); 96 | } 97 | 98 | int main(int argc, char *argv[]) { 99 | if(argc == 1) { 100 | puts("./calc [ an expression without spaces; e.g. 3/2+1.2*(2.1-3) ]"); 101 | return 0; 102 | } 103 | 104 | input = argv[1]; 105 | 106 | node_t *node = expr_addsub(); 107 | show(node); puts(""); 108 | 109 | printf("%.10g%c", calc(node), 10); 110 | return 0; 111 | } 112 | -------------------------------------------------------------------------------- /example/do_while.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | int i = 0; 5 | do { 6 | printf("%d\n", i++); 7 | } while(i < 10); 8 | } 9 | -------------------------------------------------------------------------------- /example/enum.c: -------------------------------------------------------------------------------- 1 | int printf(char *, ...); 2 | 3 | enum Animal { 4 | Dog, 5 | Cat, 6 | Bird, 7 | Lion 8 | }; 9 | 10 | int main() { 11 | enum Animal a; 12 | a = Dog; 13 | } 14 | -------------------------------------------------------------------------------- /example/escape-seq.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | putchar('\\'); 5 | putchar('\"'); 6 | putchar('\''); 7 | putchar('\?'); 8 | putchar('\a'); 9 | putchar('\b'); 10 | putchar('\t'); 11 | putchar('\n'); 12 | putchar('\f'); 13 | putchar('\r'); 14 | putchar('\v'); 15 | putchar('\x17'); 16 | putchar('\123'); 17 | putchar('\0'); 18 | } 19 | -------------------------------------------------------------------------------- /example/fact.c: -------------------------------------------------------------------------------- 1 | int printf(char *, ...); 2 | int puts(char *); 3 | int atoi(const char *); 4 | 5 | int fact(int n) { 6 | if(n == 1) 7 | return 1; 8 | else 9 | return fact(n - 1) * n; 10 | } 11 | 12 | int main(int argc, char *argv[]) { 13 | if(argc < 2) { 14 | puts("./fact [NUMBER]"); 15 | return 0; 16 | } 17 | 18 | int n = atoi(argv[1]); 19 | printf("fact(%d) = %d%c", n, fact(n), 0xA); 20 | } 21 | -------------------------------------------------------------------------------- /example/fizzbuzz.c: -------------------------------------------------------------------------------- 1 | int printf(char *, ...); 2 | int puts(char *); 3 | int atoi(char *); 4 | 5 | int main(int argc, char *argv[]) { 6 | if(argc == 1) { 7 | puts("./fizzbuzz [max_num]"); 8 | return 0; 9 | } 10 | 11 | const int max_num = atoi(argv[1]); 12 | for(int i = 1; i <= max_num; i = i + 1) { 13 | if(i % 15 == 0) { 14 | puts("fizzbuzz"); 15 | } else if(i % 5 == 0) { 16 | puts("buzz"); 17 | } else if(i % 3 == 0) { 18 | puts("fizz"); 19 | } else { 20 | printf("%d%c", i, 0xa); 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /example/funccall.c: -------------------------------------------------------------------------------- 1 | int printf(char *, ...); 2 | int puts(char *); 3 | 4 | int add1(int n) { 5 | return n + 1; 6 | } 7 | 8 | // all qualifiers (for coverage) 9 | typedef int I; 10 | static I i1; 11 | I f1() { auto I i2; register I i3; return 0; } 12 | const I i4 = 1; 13 | volatile I i5; 14 | inline I f2() { I * restrict i6 = 0; return 0; } 15 | 16 | int main(int argc, char *argv[]) { 17 | printf("add1(2) = %d%c", add1(2), 0xa); 18 | printf("hello" 19 | " " 20 | "world%s", 21 | "!!"); 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /example/goto.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | int i = 0; 5 | loop: 6 | printf("%d\n", i++); 7 | if(i == 5) { 8 | goto break_loop; 9 | } 10 | goto loop; 11 | break_loop: 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /example/hello.c: -------------------------------------------------------------------------------- 1 | // #include 2 | // #include 3 | 4 | int printf(char *, ...); 5 | 6 | int main(int argc, char *argv[]) { 7 | printf("hello world"); 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /example/mandelbrot.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // show mandelbrot set. 5 | 6 | int mandelbrot(double c_x, double c_y, int n) { 7 | double x_n = 0, y_n = 0, 8 | x_n_1, y_n_1; 9 | for(int i = 0; i < n; i++) { 10 | x_n_1 = pow(x_n, 2) - pow(y_n, 2) + c_x; 11 | y_n_1 = 2.0 * x_n * y_n + c_y; 12 | if(pow(x_n_1, 2) + pow(y_n_1, 2) > 4.0) { 13 | return n; 14 | } else { 15 | x_n = x_n_1; 16 | y_n = y_n_1; 17 | } 18 | } 19 | return 0; 20 | } 21 | 22 | int main() { 23 | double x_max = 1, x_min = -2, 24 | y_max = 1, y_min = -1, 25 | dx = 0.05, dy = 0.05; 26 | int x_length = (x_max - x_min) / dx; 27 | for(double y = y_max; y > y_min; y -= dy) { 28 | for(double x = x_min; x < x_max; x += dx) 29 | putchar(mandelbrot(x, y, 300) == 0 ? '*' : ' '); 30 | puts(""); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /example/op.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | // TODO: this is for coverage. many other operators will be checked in the future. 5 | printf("%d\n", 1 + 2); 6 | printf("%d\n", 2 - 1); 7 | printf("%d\n", 2 * 3); 8 | printf("%d\n", 4 / 2); 9 | printf("%d\n", 4 % 3); 10 | printf("%d\n", 1 & 2); 11 | printf("%d\n", 1 | 2); 12 | printf("%d\n", 1 ^ 2); 13 | printf("%d\n", 1 && 0); 14 | printf("%d\n", 1 || 0); 15 | printf("%d\n", 5 == 5); 16 | printf("%d\n", 1 != 1); 17 | printf("%d\n", 3 < 4); 18 | printf("%d\n", 3 > 4); 19 | printf("%d\n", 3 <= 4); 20 | printf("%d\n", 3 >= 4); 21 | printf("%d\n", 1 << 4); 22 | printf("%d\n", 16 >> 4); 23 | printf("%d\n", !0); 24 | printf("%d\n", ~123); 25 | printf("%d\n", +12); 26 | printf("%d\n", -34); 27 | int i = 0, *a = &i; 28 | printf("%d\n", ++i); 29 | printf("%d\n", --i); 30 | printf("%d\n", i++); 31 | printf("%d\n", i--); 32 | printf("%d\n", *a); 33 | i += 10; 34 | i -= 10; 35 | i *= 10; 36 | i /= 10; 37 | i %= 10; 38 | i <<= 10; 39 | i >>= 10; 40 | i &= 10; 41 | i |= 10; 42 | return 0; 43 | } 44 | -------------------------------------------------------------------------------- /example/prime_list.c: -------------------------------------------------------------------------------- 1 | int printf(char *, ...); 2 | int puts(char *); 3 | int atoi(char *); 4 | 5 | int is_prime(int n) { 6 | if(n == 2) return 1; 7 | if(n % 2 == 0) return 0; 8 | for(int i = 3; i * i <= n; i = i + 2) { 9 | if(n % i == 0) return 0; 10 | } 11 | return 1; 12 | } 13 | 14 | int main(int argc, char *argv[]) { 15 | if(argc == 1) { 16 | puts("./prime_list [max_num(>2)]"); 17 | return 0; 18 | } 19 | 20 | int max_num = atoi(argv[1]); 21 | if(max_num <= 2) { puts("max_num must be greater than 2"); return 0; } 22 | 23 | puts("2"); 24 | for(int i = 3; i <= max_num; i = i + 2) { 25 | if(is_prime(i)) { printf("%d%c", i, 0xa); } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /example/sizeof.c: -------------------------------------------------------------------------------- 1 | int printf(char *, ...); 2 | int puts(char *); 3 | 4 | struct S { 5 | char a, b, c; 6 | // int a, b, c; 7 | double f; 8 | }; 9 | 10 | union U { 11 | struct S s; 12 | char hello[6]; 13 | }; 14 | 15 | enum { ZERO, ONE }; 16 | 17 | #define PRINT_SIZEOF(type) printf("sizeof(%s) = %d%c", #type, sizeof(type), 0xa) 18 | 19 | int main() { 20 | char str[][8] = {"hello", "rucc", "world"}; 21 | int i; double f; struct S s; union U u; 22 | PRINT_SIZEOF( char ); 23 | PRINT_SIZEOF( short ); 24 | PRINT_SIZEOF( int ); 25 | PRINT_SIZEOF( long ); 26 | PRINT_SIZEOF( long long ); 27 | PRINT_SIZEOF( float ); 28 | PRINT_SIZEOF( double ); 29 | PRINT_SIZEOF( int [5] ); 30 | PRINT_SIZEOF( struct S ); 31 | PRINT_SIZEOF( union U ); 32 | PRINT_SIZEOF( u ); 33 | PRINT_SIZEOF( s.f ); 34 | PRINT_SIZEOF( "a" ); 35 | PRINT_SIZEOF( 1 + 2 ); 36 | PRINT_SIZEOF( 1.2 + 3.4 ); 37 | PRINT_SIZEOF( 100000000000 + 200000000000 ); 38 | PRINT_SIZEOF( ZERO + ONE ); 39 | PRINT_SIZEOF( i + f ); 40 | PRINT_SIZEOF( str ); 41 | PRINT_SIZEOF( str + i ); 42 | PRINT_SIZEOF( main ); 43 | PRINT_SIZEOF( main + 8 ); 44 | PRINT_SIZEOF( puts("hello") ); 45 | PRINT_SIZEOF( !1 ); 46 | PRINT_SIZEOF( ~1 ); 47 | PRINT_SIZEOF( &i ); 48 | PRINT_SIZEOF( i++ ); 49 | PRINT_SIZEOF( i-- ); 50 | PRINT_SIZEOF( *&f ); 51 | PRINT_SIZEOF( 1 ? 100000000000 : 200000000000 ); 52 | } 53 | -------------------------------------------------------------------------------- /example/struct.c: -------------------------------------------------------------------------------- 1 | int printf(char *, ...); 2 | int puts(char *); 3 | 4 | struct A { 5 | int a, b, c; 6 | }; 7 | 8 | typedef struct { 9 | char *name; 10 | int age; 11 | } User; 12 | 13 | int main() { 14 | struct A a; 15 | a.a = a.b = 1; 16 | 17 | User u; 18 | u.name = "uint256_t"; 19 | u.age = 16; 20 | printf("%s %d%c", u.name, u.age, 0xa); 21 | } 22 | -------------------------------------------------------------------------------- /example/struct_init.c: -------------------------------------------------------------------------------- 1 | #include 2 | #define SHOW_INT(x) do { printf("%s = %d\n", #x, x); } while(0); 3 | #define SHOW_STR(x) do { printf("%s = %s\n", #x, x); } while(0); 4 | 5 | struct user_t { 6 | char *name; 7 | size_t age; 8 | }; 9 | 10 | struct A { 11 | struct user_t u; 12 | int i, k; 13 | } a = {{"name", 12}, 12, 23}; 14 | 15 | int main() { 16 | SHOW_INT(a.i); 17 | SHOW_INT(a.k); 18 | SHOW_INT(a.u.age); 19 | SHOW_STR(a.u.name); 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /example/switch.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | for(int i = 0; i < 3; i++) { 5 | switch(i) { 6 | case 0: 7 | printf("i is 0\n"); 8 | break; 9 | case 1: 10 | case 2: 11 | puts("i is 1 or 2"); 12 | default: 13 | puts("this is default block"); 14 | } 15 | } 16 | puts("finish"); 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /example/ternaryop.c: -------------------------------------------------------------------------------- 1 | int printf(char *, ...); 2 | 3 | int main() { 4 | printf("%d%c", 1 ? 2 : 3, 10); 5 | } 6 | -------------------------------------------------------------------------------- /example/while_fact.c: -------------------------------------------------------------------------------- 1 | int printf(char *, ...); 2 | 3 | int fact(int n) { 4 | if(n < 2) return 1; 5 | else return fact(n - 1) * n; 6 | } 7 | 8 | int main(int argc, char *argv[]) { 9 | int i = 1; 10 | while(i < 10) { 11 | printf("%d! = %d%c", i, fact(i), 0xa); 12 | i = i + 1; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /include/float.h: -------------------------------------------------------------------------------- 1 | #ifndef __STDFLOAT_H 2 | #define __STDFLOAT_H 3 | 4 | #define DECIMAL_DIG 21 5 | #define FLT_EVAL_METHOD 0 6 | #define FLT_RADIX 2 7 | #define FLT_ROUNDS 1 8 | 9 | #define FLT_DIG 6 10 | #define FLT_EPSILON 0x1p-23 11 | #define FLT_MANT_DIG 24 12 | #define FLT_MAX 0x1.fffffep+127 13 | #define FLT_MAX_10_EXP 38 14 | #define FLT_MAX_EXP 128 15 | #define FLT_MIN 0x1p-126 16 | #define FLT_MIN_10_EXP -37 17 | #define FLT_MIN_EXP -125 18 | #define FLT_TRUE_MIN 0x1p-149 19 | 20 | #define DBL_DIG 15 21 | #define DBL_EPSILON 0x1p-52 22 | #define DBL_MANT_DIG 53 23 | #define DBL_MAX 0x1.fffffffffffffp+1023 24 | #define DBL_MAX_10_EXP 308 25 | #define DBL_MAX_EXP 1024 26 | #define DBL_MIN 0x1p-1022 27 | #define DBL_MIN_10_EXP -307 28 | #define DBL_MIN_EXP -1021 29 | #define DBL_TRUE_MIN 0x0.0000000000001p-1022 30 | 31 | #define LDBL_DIG 15 32 | #define LDBL_EPSILON 0x1p-52 33 | #define LDBL_MANT_DIG 53 34 | #define LDBL_MAX 0x1.fffffffffffffp+1023 35 | #define LDBL_MAX_10_EXP 308 36 | #define LDBL_MAX_EXP 1024 37 | #define LDBL_MIN 0x1p-1022 38 | #define LDBL_MIN_10_EXP -307 39 | #define LDBL_MIN_EXP -1021 40 | #define LDBL_TRUE_MIN 0x0.0000000000001p-1022 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /include/iso646.h: -------------------------------------------------------------------------------- 1 | #ifndef __ISO646_H 2 | #define __ISO646_H 3 | 4 | #define and && 5 | #define and_eq &= 6 | #define bitand & 7 | #define bitor | 8 | #define compl ~ 9 | #define not ! 10 | #define not_eq != 11 | #define or || 12 | #define or_eq |= 13 | #define xor ^ 14 | #define xor_eq ^= 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /include/rucc.h: -------------------------------------------------------------------------------- 1 | #define _LP64 1 2 | #define __RUCC__ 1 3 | #define __ELF__ 1 4 | #define __LP64__ 1 5 | #define __SIZEOF_DOUBLE__ 8 6 | #define __SIZEOF_FLOAT__ 4 7 | #define __SIZEOF_INT__ 4 8 | #define __SIZEOF_LONG_DOUBLE__ 8 9 | #define __SIZEOF_LONG_LONG__ 8 10 | #define __SIZEOF_LONG__ 8 11 | #define __SIZEOF_POINTER__ 8 12 | #define __SIZEOF_PTRDIFF_T__ 8 13 | #define __SIZEOF_SHORT__ 2 14 | #define __SIZEOF_SIZE_T__ 8 15 | #define __STDC_HOSTED__ 1 16 | #define __STDC_ISO_10646__ 201103L 17 | #define __STDC_NO_ATOMICS__ 1 18 | #define __STDC_NO_COMPLEX__ 1 19 | #define __STDC_NO_THREADS__ 1 20 | #define __STDC_NO_VLA__ 1 21 | #define __STDC_UTF_16__ 1 22 | #define __STDC_UTF_32__ 1 23 | #define __STDC_VERSION__ 201112L 24 | #define __STDC__ 1 25 | #define __amd64 1 26 | #define __amd64__ 1 27 | #define __gnu_linux__ 1 28 | #define __linux 1 29 | #define __linux__ 1 30 | #define __unix 1 31 | #define __unix__ 1 32 | #define __x86_64 1 33 | #define __x86_64__ 1 34 | #define linux 1 35 | #define NO_ANSI_KEYWORDS 1 36 | 37 | #define __alignof__ alignof 38 | #define __const__ const 39 | #define __inline__ inline 40 | #define __restrict restrict 41 | #define __restrict__ restrict 42 | #define __signed__ signed 43 | #define __typeof__ typeof 44 | #define __volatile__ volatile 45 | 46 | typedef unsigned short char16_t; 47 | typedef unsigned int char32_t; 48 | 49 | -------------------------------------------------------------------------------- /include/stdalign.h: -------------------------------------------------------------------------------- 1 | #ifndef __STDALIGN_H 2 | #define __STDALIGN_H 3 | 4 | #define alignas _Alignas 5 | #define alignof _Alignof 6 | #define __alignas_is_defined 1 7 | #define __alignof_is_defined 1 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /include/stdarg.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef __STDARG_H 3 | #define __STDARG_H 4 | 5 | typedef struct { 6 | unsigned int gp_offset; 7 | unsigned int fp_offset; 8 | void *overflow_arg_area; 9 | void *reg_save_area; 10 | } __va_elem; 11 | 12 | typedef __va_elem va_list[1]; 13 | 14 | static void *__va_arg_gp(__va_elem *ap) { 15 | void *r = (char *)ap->reg_save_area + ap->gp_offset; 16 | ap->gp_offset += 8; 17 | return r; 18 | } 19 | 20 | static void *__va_arg_fp(__va_elem *ap) { 21 | void *r = (char *)ap->reg_save_area + ap->fp_offset; 22 | ap->fp_offset += 16; 23 | return r; 24 | } 25 | 26 | static void *__va_arg_mem(__va_elem *ap) { 27 | 1 / 0; // unimplemented 28 | } 29 | 30 | #define va_start(ap, last) __builtin_va_start(ap) 31 | #define va_arg(ap, type) \ 32 | ({ \ 33 | int klass = __builtin_reg_class((type *)0); \ 34 | *(type *)(klass == 0 ? __va_arg_gp(ap) : \ 35 | klass == 1 ? __va_arg_fp(ap) : \ 36 | __va_arg_mem(ap)); \ 37 | }) 38 | 39 | #define va_end(ap) 1 40 | #define va_copy(dest, src) ((dest)[0] = (src)[0]) 41 | 42 | // Workaround to load stdio.h properly 43 | #define __GNUC_VA_LIST 1 44 | typedef va_list __gnuc_va_list; 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /include/stdbool.h: -------------------------------------------------------------------------------- 1 | #ifndef __STDBOOL_H 2 | #define __STDBOOL_H 3 | 4 | #define bool _Bool 5 | #define true 1 6 | #define false 0 7 | #define __bool_true_false_are_defined 1 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /include/stddef.h: -------------------------------------------------------------------------------- 1 | #ifndef __STDDEF_H 2 | #define __STDDEF_H 3 | 4 | #define NULL ((void *)0) 5 | 6 | typedef unsigned long size_t; 7 | typedef long ptrdiff_t; 8 | typedef unsigned int wchar_t; 9 | typedef long double max_align_t; 10 | 11 | #define offsetof(type, member) ((size_t)&(((type *)0)->member)) 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /include/stdnoreturn.h: -------------------------------------------------------------------------------- 1 | #ifndef __STDNORETURN_H 2 | #define __STDNORETURN_H 3 | 4 | #define noreturn _Noreturn 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /rucc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | bc=$(echo $1 | sed -e s/\.c$/\.bc/) 4 | s=$(echo $1 | sed -e s/\.c$/\.s/) 5 | 6 | if [[ $2 == "--release" ]]; then 7 | if [[ ! -e './target/release/rucc' ]]; then 8 | cargo build --release 9 | fi 10 | ./target/release/rucc $1 11 | else 12 | cargo run $1 13 | fi 14 | 15 | if [[ $? == 0 ]]; then 16 | # opt-4.0 -std-link-opts $bc -o $bc 17 | llc-10 $bc 18 | clang $s -lm 19 | rm -f $bc $s 20 | fi 21 | -------------------------------------------------------------------------------- /src/common.rs: -------------------------------------------------------------------------------- 1 | use codegen; 2 | use lexer; 3 | use parser; 4 | use std::io::{stderr, Write}; 5 | use std::path::Path; 6 | 7 | extern crate regex; 8 | 9 | extern crate ansi_term; 10 | use self::ansi_term::Colour; 11 | 12 | use CODEGEN; 13 | 14 | // parse -> codegen -> write llvm bitcode to output file 15 | pub fn run_file<'a>(filename: &'a str) { 16 | // parser::Parser::new(&mut lexer).run(&mut nodes); 17 | 18 | // DEBUG: for node in &ast { 19 | // DEBUG: node.show(); 20 | // DEBUG: } 21 | 22 | // DEBUG: println!("\nllvm-ir test output:"); 23 | unsafe { 24 | let mut nodes = Vec::new(); 25 | let mut lexer = lexer::Lexer::new(filename.to_string()); 26 | let mut parser = parser::Parser::new(&mut lexer); 27 | 28 | loop { 29 | match parser.read_toplevel(&mut nodes) { 30 | Err(parser::Error::EOF) => break, 31 | Err(_) => continue, 32 | _ => {} 33 | } 34 | match CODEGEN.lock().unwrap().run(&nodes) { 35 | Ok(_) => {} 36 | // TODO: implement err handler for codegen 37 | Err(codegen::Error::MsgWithPos(msg, pos)) => { 38 | writeln!( 39 | &mut stderr(), 40 | "{}: {} {}: {}", 41 | parser.lexer.get_filename(), 42 | Colour::Red.bold().paint("error:"), 43 | pos.line, 44 | msg 45 | ) 46 | .unwrap(); 47 | writeln!( 48 | &mut stderr(), 49 | "{}", 50 | parser.lexer.get_surrounding_code_with_err_point(pos.pos) 51 | ) 52 | .unwrap(); 53 | println!( 54 | "{} error{} generated.", 55 | parser.err_counts + 1, 56 | if parser.err_counts + 1 > 1 { "s" } else { "" } 57 | ); 58 | ::std::process::exit(-1); 59 | } 60 | _ => panic!("this is a bug. fix soon"), 61 | } 62 | nodes.clear(); 63 | } 64 | parser.show_total_errors(); 65 | 66 | let output_file_name = Path::new(filename) 67 | .with_extension("bc") 68 | .into_os_string() 69 | .into_string() 70 | .unwrap(); 71 | CODEGEN 72 | .lock() 73 | .unwrap() 74 | .write_llvm_bitcode_to_file(output_file_name.as_str()); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | pub fn error_exit(line: i32, msg: &str) -> ! { 2 | println!("error: {}: {}", line, msg); 3 | panic!(); 4 | } 5 | -------------------------------------------------------------------------------- /src/lexer.rs: -------------------------------------------------------------------------------- 1 | use error; 2 | use node::Bits; 3 | use parser; 4 | use parser::{Error, ParseR}; 5 | use std::collections::VecDeque; 6 | use std::collections::{HashMap, HashSet}; 7 | use std::fs::OpenOptions; 8 | use std::io::prelude::*; 9 | use std::path; 10 | use std::process; 11 | use std::str; 12 | 13 | extern crate ansi_term; 14 | use self::ansi_term::{Colour, Style}; 15 | 16 | #[derive(Debug, Clone)] 17 | pub enum Macro { 18 | // Vec -> macro body 19 | Object(Vec), 20 | FuncLike(Vec), 21 | } 22 | 23 | #[derive(PartialEq, Debug, Clone)] 24 | pub enum Keyword { 25 | Typedef, 26 | Extern, 27 | Static, 28 | Auto, 29 | Restrict, 30 | Register, 31 | Const, 32 | ConstExpr, 33 | Volatile, 34 | Void, 35 | Signed, 36 | Unsigned, 37 | Char, 38 | Int, 39 | Short, 40 | Long, 41 | Float, 42 | Double, 43 | Struct, 44 | Enum, 45 | Union, 46 | Noreturn, 47 | Inline, 48 | If, 49 | Else, 50 | For, 51 | Do, 52 | While, 53 | Switch, 54 | Case, 55 | Default, 56 | Goto, 57 | Break, 58 | Continue, 59 | Return, 60 | } 61 | 62 | #[derive(PartialEq, Debug, Clone)] 63 | pub enum Symbol { 64 | OpeningParen, 65 | ClosingParen, 66 | OpeningBrace, 67 | ClosingBrace, 68 | OpeningBoxBracket, 69 | ClosingBoxBracket, 70 | Comma, 71 | Semicolon, 72 | Colon, 73 | Point, 74 | Arrow, 75 | Inc, 76 | Dec, 77 | Add, 78 | Sub, 79 | Asterisk, 80 | Div, 81 | Mod, 82 | Not, 83 | BitwiseNot, 84 | Ampersand, 85 | Shl, 86 | Shr, 87 | Lt, 88 | Le, 89 | Gt, 90 | Ge, 91 | Eq, 92 | Ne, 93 | Xor, 94 | Or, 95 | LAnd, 96 | LOr, 97 | Question, 98 | Assign, 99 | AssignAdd, 100 | AssignSub, 101 | AssignMul, 102 | AssignDiv, 103 | AssignMod, 104 | AssignShl, 105 | AssignShr, 106 | AssignAnd, 107 | AssignXor, 108 | AssignOr, 109 | Hash, 110 | Vararg, 111 | Sizeof, 112 | } 113 | 114 | #[derive(PartialEq, Debug, Clone)] 115 | pub struct Pos { 116 | pub line: usize, 117 | pub pos: usize, 118 | } 119 | 120 | impl Pos { 121 | pub fn new(line: usize, pos: usize) -> Pos { 122 | Pos { 123 | line: line, 124 | pos: pos, 125 | } 126 | } 127 | } 128 | 129 | #[derive(PartialEq, Debug, Clone)] 130 | pub enum TokenKind { 131 | MacroParam, 132 | Keyword(Keyword), 133 | Identifier(String), 134 | IntNumber(i64, Bits), 135 | FloatNumber(f64), 136 | String(String), 137 | Char(char), 138 | Symbol(Symbol), 139 | Newline, 140 | } 141 | 142 | macro_rules! ident_val { 143 | ($e:expr) => { 144 | match &$e.kind { 145 | &TokenKind::Identifier(ref ident) => ident.to_string(), 146 | _ => "".to_string(), 147 | } 148 | }; 149 | } 150 | macro_rules! ident_mut_val { 151 | ($e:expr) => { 152 | match &mut $e.kind { 153 | &mut TokenKind::Identifier(ref mut ident) => ident, 154 | _ => panic!(), 155 | } 156 | }; 157 | } 158 | macro_rules! retrieve_str { 159 | ($e:expr) => { 160 | match &$e.kind { 161 | &TokenKind::String(ref s) => s.to_string(), 162 | _ => panic!(), 163 | } 164 | }; 165 | } 166 | macro_rules! matches { 167 | ($e:expr, $p:pat) => { 168 | match $e { 169 | $p => true, 170 | _ => false, 171 | } 172 | }; 173 | } 174 | 175 | #[derive(PartialEq, Debug, Clone)] 176 | pub struct Token { 177 | pub kind: TokenKind, 178 | pub space: bool, // leading space 179 | pub macro_position: usize, 180 | pub hideset: HashSet, 181 | pub pos: Pos, 182 | } 183 | 184 | impl Token { 185 | pub fn new(kind: TokenKind, macro_position: usize, pos: usize, line: usize) -> Token { 186 | Token { 187 | kind: kind, 188 | space: false, 189 | macro_position: macro_position, 190 | hideset: HashSet::new(), 191 | pos: Pos::new(line, pos), 192 | } 193 | } 194 | pub fn add_hideset(&mut self, s: String) { 195 | self.hideset.insert(s); 196 | } 197 | } 198 | 199 | #[derive(Clone)] 200 | pub struct Lexer { 201 | pub cur_line: VecDeque, 202 | filename: VecDeque, 203 | macro_map: HashMap, 204 | pub peek: VecDeque>, 205 | pub peek_pos: VecDeque, 206 | buf: VecDeque>, 207 | cond_stack: Vec, 208 | } 209 | 210 | impl Lexer { 211 | pub fn new(filename: String) -> Lexer { 212 | let mut buf = VecDeque::new(); 213 | buf.push_back(VecDeque::new()); 214 | 215 | let mut file = if let Ok(ok) = OpenOptions::new().read(true).open(filename.to_string()) { 216 | ok 217 | } else { 218 | println!( 219 | "{} not found such file '{}'", 220 | Colour::Red.bold().paint("error:"), 221 | Style::new().underline().paint(filename) 222 | ); 223 | ::std::process::exit(0) 224 | }; 225 | 226 | let mut file_body = String::new(); 227 | match file.read_to_string(&mut file_body) { 228 | Ok(_) => (), 229 | Err(e) => { 230 | println!( 231 | "an error occurred while reading file '{}'\n{} {}", 232 | Style::new().underline().paint(filename), 233 | Colour::Red.bold().paint("error:"), 234 | e 235 | ); 236 | ::std::process::exit(0) 237 | } 238 | }; 239 | 240 | let mut rucc_header = OpenOptions::new() 241 | .read(true) 242 | .open("./include/rucc.h") 243 | .unwrap(); 244 | let mut rucc_header_body = String::new(); 245 | rucc_header 246 | .read_to_string(&mut rucc_header_body) 247 | .ok() 248 | .expect("cannot read file"); 249 | let mut peek = VecDeque::new(); 250 | unsafe { 251 | peek.push_back(file_body.as_mut_vec().clone()); 252 | peek.push_back(rucc_header_body.as_mut_vec().clone()); 253 | } 254 | 255 | let mut peek_pos = VecDeque::new(); 256 | peek_pos.push_back(0); 257 | peek_pos.push_back(0); 258 | 259 | let mut filenames = VecDeque::new(); 260 | filenames.push_back(filename); 261 | filenames.push_back("rucc.h".to_string()); 262 | 263 | let mut cur_line = VecDeque::new(); 264 | cur_line.push_back(1); 265 | cur_line.push_back(1); 266 | 267 | Lexer { 268 | cur_line: cur_line, 269 | filename: filenames, 270 | macro_map: HashMap::new(), 271 | peek: peek, 272 | peek_pos: peek_pos, 273 | buf: buf, 274 | cond_stack: Vec::new(), 275 | } 276 | } 277 | pub fn get_filename(&self) -> String { 278 | self.filename.back().unwrap().to_owned() 279 | } 280 | pub fn get_cur_pos(&self) -> Pos { 281 | Pos::new(*self.get_cur_line(), *self.peek_pos.back().unwrap()) 282 | } 283 | pub fn get_cur_line(&self) -> &usize { 284 | self.cur_line.back().unwrap() 285 | } 286 | fn peek_get(&mut self) -> ParseR { 287 | let peek = self.peek.back_mut().unwrap(); 288 | let peek_pos = *self.peek_pos.back_mut().unwrap(); 289 | if peek_pos >= peek.len() { 290 | Err(Error::EOF) 291 | } else { 292 | Ok(peek[peek_pos] as char) 293 | } 294 | } 295 | fn peek_next(&mut self) -> ParseR { 296 | let peek = self.peek.back().unwrap(); 297 | let peek_pos = self.peek_pos.back_mut().unwrap(); 298 | if *peek_pos >= peek.len() { 299 | return Err(Error::EOF); 300 | } 301 | let c = peek[*peek_pos] as char; 302 | *peek_pos += 1; 303 | if c == '\n' { 304 | *self.cur_line.back_mut().unwrap() += 1; 305 | } 306 | Ok(c) 307 | } 308 | fn peek_next_char_is(&mut self, ch: char) -> ParseR { 309 | let peek = self.peek.back_mut().unwrap(); 310 | let peek_pos = self.peek_pos.back_mut().unwrap(); 311 | if *peek_pos >= peek.len() { 312 | Err(Error::EOF) 313 | } else { 314 | let nextc = peek[*peek_pos + 1] as char; 315 | Ok(nextc == ch) 316 | } 317 | } 318 | fn peek_char_is(&mut self, ch: char) -> ParseR { 319 | let peekc = self.peek_get()?; 320 | Ok(peekc == ch) 321 | } 322 | 323 | pub fn peek_keyword_token_is(&mut self, expect: Keyword) -> ParseR { 324 | let peek = self.peek()?; 325 | Ok(peek.kind == TokenKind::Keyword(expect)) 326 | } 327 | pub fn peek_symbol_token_is(&mut self, expect: Symbol) -> ParseR { 328 | let peek = self.peek()?; 329 | Ok(peek.kind == TokenKind::Symbol(expect)) 330 | } 331 | pub fn next_symbol_token_is(&mut self, expect: Symbol) -> ParseR { 332 | let peek = self.get()?; 333 | let next = self.get()?; 334 | let next_token_is_expected = next.kind == TokenKind::Symbol(expect); 335 | self.unget(next); 336 | self.unget(peek); 337 | Ok(next_token_is_expected) 338 | } 339 | pub fn skip_keyword(&mut self, keyword: Keyword) -> ParseR { 340 | let tok = self.get()?; 341 | if tok.kind == TokenKind::Keyword(keyword) { 342 | return Ok(true); 343 | } 344 | self.unget(tok); 345 | Ok(false) 346 | } 347 | pub fn skip_symbol(&mut self, sym: Symbol) -> ParseR { 348 | let tok = self.get()?; 349 | if tok.kind == TokenKind::Symbol(sym) { 350 | return Ok(true); 351 | } 352 | self.unget(tok); 353 | Ok(false) 354 | } 355 | pub fn expect_skip_keyword(&mut self, expect: Keyword) -> ParseR { 356 | self.skip_keyword(expect) 357 | } 358 | pub fn expect_skip_symbol(&mut self, expect: Symbol) -> ParseR { 359 | self.skip_symbol(expect) 360 | } 361 | pub fn unget(&mut self, t: Token) { 362 | self.buf.back_mut().unwrap().push_back(t); 363 | } 364 | pub fn unget_all(&mut self, tv: &Vec) { 365 | let buf = self.buf.back_mut().unwrap(); 366 | buf.extend(tv.iter().rev().map(|tok| tok.clone())); 367 | } 368 | 369 | pub fn read_identifier(&mut self, c: char) -> ParseR { 370 | let mut ident = "".to_string(); 371 | ident.push(c); 372 | let pos = *self.peek_pos.back().unwrap(); 373 | loop { 374 | let c = self.peek_next()?; 375 | if c.is_alphanumeric() || c == '_' { 376 | ident.push(c); 377 | } else { 378 | *self.peek_pos.back_mut().unwrap() -= 1; 379 | break; 380 | } 381 | } 382 | Ok(Token::new( 383 | TokenKind::Identifier(ident), 384 | 0, 385 | pos, 386 | *self.get_cur_line(), 387 | )) 388 | } 389 | fn read_number_literal(&mut self, c: char) -> ParseR { 390 | let mut num = "".to_string(); 391 | num.push(c); 392 | let mut is_float = false; 393 | let mut last = self.peek_get()?; 394 | let pos = *self.peek_pos.back().unwrap(); 395 | loop { 396 | let c = self.peek_next()?; 397 | num.push(c); 398 | is_float = is_float || c == '.'; 399 | let is_f = "eEpP".contains(last) && "+-".contains(c); 400 | if !c.is_alphanumeric() && c != '.' && !is_f { 401 | is_float = is_float || is_f; 402 | num.pop(); 403 | *self.peek_pos.back_mut().unwrap() -= 1; 404 | break; 405 | } 406 | last = c; 407 | } 408 | 409 | if is_float { 410 | // TODO: now rucc ignores suffix 411 | num = num 412 | .trim_end_matches(|c| match c { 413 | 'a'..='z' | 'A'..='Z' | '+' | '-' => true, 414 | _ => false, 415 | }) 416 | .to_string(); 417 | let f: f64 = num.parse().unwrap(); 418 | Ok(Token::new( 419 | TokenKind::FloatNumber(f), 420 | 0, 421 | pos, 422 | *self.get_cur_line(), 423 | )) 424 | } else { 425 | // TODO: suffix supporting 426 | let i = if num.len() > 2 && num.chars().nth(1).unwrap() == 'x' { 427 | self.read_hex_num(&num[2..]).0 428 | } else if num.chars().nth(0).unwrap() == '0' { 429 | self.read_oct_num(&num[1..]).0 430 | } else { 431 | self.read_dec_num(num.as_str()).0 432 | }; 433 | 434 | let max_32bits = 0xffffffff; 435 | let bits = if 0 == (i & !max_32bits) { 436 | Bits::Bits32 437 | } else { 438 | Bits::Bits64 439 | }; 440 | Ok(Token::new( 441 | TokenKind::IntNumber(i, bits), 442 | 0, 443 | pos, 444 | *self.get_cur_line(), 445 | )) 446 | } 447 | } 448 | fn read_dec_num(&mut self, num_literal: &str) -> (i64, String) { 449 | let mut suffix = "".to_string(); 450 | let n = num_literal.chars().fold(0, |n, c| match c { 451 | '0'..='9' => n * 10 + c.to_digit(10).unwrap() as u64, 452 | _ => { 453 | suffix.push(c); 454 | n 455 | } 456 | }); 457 | (n as i64, suffix) 458 | } 459 | fn read_oct_num(&mut self, num_literal: &str) -> (i64, String) { 460 | let mut suffix = "".to_string(); 461 | let n = num_literal.chars().fold(0, |n, c| match c { 462 | '0'..='7' => n * 8 + c.to_digit(8).unwrap() as u64, 463 | _ => { 464 | suffix.push(c); 465 | n 466 | } 467 | }); 468 | (n as i64, suffix) 469 | } 470 | fn read_hex_num(&mut self, num_literal: &str) -> (i64, String) { 471 | let mut suffix = "".to_string(); 472 | let n = num_literal.chars().fold(0, |n, c| match c { 473 | '0'..='9' | 'A'..='F' | 'a'..='f' => n * 16 + c.to_digit(16).unwrap() as u64, 474 | _ => { 475 | suffix.push(c); 476 | n 477 | } 478 | }); 479 | (n as i64, suffix) 480 | } 481 | pub fn read_newline(&mut self) -> ParseR { 482 | Ok(Token::new( 483 | TokenKind::Newline, 484 | 0, 485 | *self.peek_pos.back().unwrap(), 486 | *self.get_cur_line(), 487 | )) 488 | } 489 | pub fn read_symbol(&mut self, c: char) -> ParseR { 490 | let pos = *self.peek_pos.back().unwrap(); 491 | let mut sym = "".to_string(); 492 | sym.push(c); 493 | match c { 494 | '+' | '-' => { 495 | if self.peek_char_is('=')? 496 | || self.peek_char_is('>')? 497 | || self.peek_char_is('+')? 498 | || self.peek_char_is('-')? 499 | { 500 | sym.push(self.peek_next()?); 501 | } 502 | } 503 | '*' | '/' | '%' | '=' | '^' | '!' => { 504 | if self.peek_char_is('=')? { 505 | sym.push(self.peek_next()?); 506 | } 507 | } 508 | '<' | '>' | '&' | '|' => { 509 | if self.peek_char_is(c)? { 510 | sym.push(self.peek_next()?); 511 | } 512 | if self.peek_char_is('=')? { 513 | sym.push(self.peek_next()?); 514 | } 515 | } 516 | '.' => { 517 | if self.peek_char_is('.')? && self.peek_next_char_is('.')? { 518 | sym.push(self.peek_next()?); 519 | sym.push(self.peek_next()?); 520 | } 521 | } 522 | _ => {} 523 | }; 524 | Ok(Token::new( 525 | TokenKind::Identifier(sym), 526 | 0, 527 | pos, 528 | *self.get_cur_line(), 529 | )) 530 | } 531 | fn read_escaped_char(&mut self) -> ParseR { 532 | let c = self.peek_next()?; 533 | match c { 534 | '\'' | '"' | '?' | '\\' => Ok(c), 535 | 'a' => Ok('\x07'), 536 | 'b' => Ok('\x08'), 537 | 'f' => Ok('\x0c'), 538 | 'n' => Ok('\x0a'), 539 | 'r' => Ok('\x0d'), 540 | 't' => Ok('\x09'), 541 | 'v' => Ok('\x0b'), 542 | 'x' => { 543 | let mut hex = "".to_string(); 544 | loop { 545 | let c = self.peek_get()?; 546 | if c.is_alphanumeric() { 547 | hex.push(c); 548 | } else { 549 | break; 550 | } 551 | self.peek_next()?; 552 | } 553 | Ok(self.read_hex_num(hex.as_str()).0 as i32 as u8 as char) 554 | } 555 | '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' => { 556 | // if '0', check whether octal number \nnn or null \0 557 | if self.peek_get()?.is_numeric() { 558 | let mut oct = "".to_string(); 559 | oct.push(c); 560 | loop { 561 | let c = self.peek_next()?; 562 | oct.push(c); 563 | if !c.is_numeric() { 564 | oct.pop(); 565 | break; 566 | } 567 | } 568 | *self.peek_pos.back_mut().unwrap() -= 1; 569 | Ok(self.read_oct_num(oct.as_str()).0 as i32 as u8 as char) 570 | } else { 571 | assert!(c == '0'); 572 | Ok('\x00') 573 | } 574 | } 575 | _ => Ok(c), 576 | } 577 | } 578 | fn read_string_literal(&mut self) -> ParseR { 579 | let pos = *self.peek_pos.back().unwrap(); 580 | let mut s = "".to_string(); 581 | loop { 582 | match self.peek_next()? { 583 | '"' => break, 584 | '\\' => s.push(self.read_escaped_char()?), 585 | c => s.push(c), 586 | } 587 | } 588 | Ok(Token::new( 589 | TokenKind::String(s), 590 | 0, 591 | pos, 592 | *self.get_cur_line(), 593 | )) 594 | } 595 | fn read_char_literal(&mut self) -> ParseR { 596 | let pos = *self.peek_pos.back().unwrap(); 597 | let c = { 598 | let c = self.peek_next()?; 599 | if c == '\\' { 600 | self.read_escaped_char()? 601 | } else { 602 | c 603 | } 604 | }; 605 | if self.peek_next()? != '\'' { 606 | error::error_exit( 607 | *self.cur_line.back().unwrap() as i32, 608 | "missing terminating \' char", 609 | ); 610 | } 611 | Ok(Token::new(TokenKind::Char(c), 0, pos, *self.get_cur_line())) 612 | } 613 | 614 | pub fn do_read_token(&mut self) -> ParseR { 615 | if let Some(tok) = self.buf.back_mut().unwrap().pop_back() { 616 | return Ok(tok); 617 | } 618 | if self.buf.len() > 1 { 619 | return Err(Error::EOF); 620 | } 621 | 622 | match self.peek_next() { 623 | Ok(c) => { 624 | match c { 625 | 'a'..='z' | 'A'..='Z' | '_' => self.read_identifier(c), 626 | ' ' | '\t' => { 627 | self.do_read_token() 628 | // set a leading space 629 | .and_then(|tok| { 630 | let mut t = tok; 631 | t.space = true; 632 | Ok(t) 633 | }) 634 | } 635 | '0'..='9' => self.read_number_literal(c), 636 | '\"' => self.read_string_literal(), 637 | '\'' => self.read_char_literal(), 638 | '\n' => self.read_newline(), 639 | '\\' => { 640 | while self.peek_next()? != '\n' {} 641 | self.do_read_token() 642 | } 643 | '/' => { 644 | if self.peek_char_is('*')? { 645 | self.peek_next()?; // * 646 | let mut last = ' '; 647 | while !(last == '*' && self.peek_char_is('/')?) { 648 | last = self.peek_next()?; 649 | } 650 | self.peek_next()?; // / 651 | self.do_read_token() 652 | } else if self.peek_char_is('/')? { 653 | self.peek_next()?; // / 654 | while !self.peek_char_is('\n')? { 655 | self.peek_next()?; 656 | } 657 | // self.peek_next()?; // \n 658 | self.do_read_token() 659 | } else { 660 | self.read_symbol(c) 661 | } 662 | } 663 | _ => self.read_symbol(c), 664 | } 665 | } 666 | _ => { 667 | if self.peek.len() > 1 { 668 | self.peek.pop_back(); 669 | self.peek_pos.pop_back(); 670 | self.filename.pop_back(); 671 | self.cur_line.pop_back(); 672 | self.do_read_token() 673 | } else { 674 | Err(Error::EOF) 675 | } 676 | } 677 | } 678 | } 679 | pub fn read_token(&mut self) -> ParseR { 680 | let token = self.do_read_token(); 681 | token.and_then(|tok| match tok.kind { 682 | TokenKind::Newline => self.read_token(), 683 | TokenKind::Identifier(_) => Ok(self.convert_to_symbol(tok)), 684 | _ => Ok(tok), 685 | }) 686 | } 687 | fn convert_to_symbol(&mut self, token: Token) -> Token { 688 | let pos = token.pos.pos; 689 | let line = token.pos.line; 690 | let val = ident_val!(token); 691 | 692 | if val == "sizeof" { 693 | return Token::new(TokenKind::Symbol(Symbol::Sizeof), 0, pos, line); 694 | } 695 | 696 | let symbol = match val.as_str() { 697 | "++" => TokenKind::Symbol(Symbol::Inc), 698 | "--" => TokenKind::Symbol(Symbol::Dec), 699 | "(" => TokenKind::Symbol(Symbol::OpeningParen), 700 | ")" => TokenKind::Symbol(Symbol::ClosingParen), 701 | "[" => TokenKind::Symbol(Symbol::OpeningBoxBracket), 702 | "]" => TokenKind::Symbol(Symbol::ClosingBoxBracket), 703 | "{" => TokenKind::Symbol(Symbol::OpeningBrace), 704 | "}" => TokenKind::Symbol(Symbol::ClosingBrace), 705 | "." => TokenKind::Symbol(Symbol::Point), 706 | "," => TokenKind::Symbol(Symbol::Comma), 707 | ";" => TokenKind::Symbol(Symbol::Semicolon), 708 | ":" => TokenKind::Symbol(Symbol::Colon), 709 | "->" => TokenKind::Symbol(Symbol::Arrow), 710 | "+" => TokenKind::Symbol(Symbol::Add), 711 | "-" => TokenKind::Symbol(Symbol::Sub), 712 | "!" => TokenKind::Symbol(Symbol::Not), 713 | "~" => TokenKind::Symbol(Symbol::BitwiseNot), 714 | "*" => TokenKind::Symbol(Symbol::Asterisk), 715 | "&" => TokenKind::Symbol(Symbol::Ampersand), 716 | "/" => TokenKind::Symbol(Symbol::Div), 717 | "%" => TokenKind::Symbol(Symbol::Mod), 718 | "<<" => TokenKind::Symbol(Symbol::Shl), 719 | ">>" => TokenKind::Symbol(Symbol::Shr), 720 | "<" => TokenKind::Symbol(Symbol::Lt), 721 | "<=" => TokenKind::Symbol(Symbol::Le), 722 | ">" => TokenKind::Symbol(Symbol::Gt), 723 | ">=" => TokenKind::Symbol(Symbol::Ge), 724 | "==" => TokenKind::Symbol(Symbol::Eq), 725 | "!=" => TokenKind::Symbol(Symbol::Ne), 726 | "^" => TokenKind::Symbol(Symbol::Xor), 727 | "|" => TokenKind::Symbol(Symbol::Or), 728 | "&&" => TokenKind::Symbol(Symbol::LAnd), 729 | "||" => TokenKind::Symbol(Symbol::LOr), 730 | "?" => TokenKind::Symbol(Symbol::Question), 731 | "=" => TokenKind::Symbol(Symbol::Assign), 732 | "+=" => TokenKind::Symbol(Symbol::AssignAdd), 733 | "-=" => TokenKind::Symbol(Symbol::AssignSub), 734 | "*=" => TokenKind::Symbol(Symbol::AssignMul), 735 | "/=" => TokenKind::Symbol(Symbol::AssignDiv), 736 | "%=" => TokenKind::Symbol(Symbol::AssignMod), 737 | "<<=" => TokenKind::Symbol(Symbol::AssignShl), 738 | ">>=" => TokenKind::Symbol(Symbol::AssignShr), 739 | "&=" => TokenKind::Symbol(Symbol::AssignAnd), 740 | "^=" => TokenKind::Symbol(Symbol::AssignXor), 741 | "|=" => TokenKind::Symbol(Symbol::AssignOr), 742 | "#" => TokenKind::Symbol(Symbol::Hash), 743 | "..." => TokenKind::Symbol(Symbol::Vararg), 744 | _ => return token, 745 | }; 746 | 747 | Token::new(symbol, 0, pos, line) 748 | } 749 | fn maybe_convert_to_keyword(&mut self, token: Token) -> Token { 750 | let pos = token.pos.pos; 751 | let line = token.pos.line; 752 | let val = ident_val!(token); 753 | 754 | if val.len() > 0 && val.chars().nth(0).unwrap().is_alphanumeric() { 755 | let keyw = match val.as_str() { 756 | "typedef" => TokenKind::Keyword(Keyword::Typedef), 757 | "extern" => TokenKind::Keyword(Keyword::Extern), 758 | "auto" => TokenKind::Keyword(Keyword::Auto), 759 | "register" => TokenKind::Keyword(Keyword::Register), 760 | "static" => TokenKind::Keyword(Keyword::Static), 761 | "restrict" => TokenKind::Keyword(Keyword::Restrict), 762 | "const" => TokenKind::Keyword(Keyword::Const), 763 | "constexpr" => TokenKind::Keyword(Keyword::ConstExpr), 764 | "volatile" => TokenKind::Keyword(Keyword::Volatile), 765 | "void" => TokenKind::Keyword(Keyword::Void), 766 | "signed" => TokenKind::Keyword(Keyword::Signed), 767 | "unsigned" => TokenKind::Keyword(Keyword::Unsigned), 768 | "char" => TokenKind::Keyword(Keyword::Char), 769 | "int" => TokenKind::Keyword(Keyword::Int), 770 | "bool" => TokenKind::Keyword(Keyword::Int), 771 | "short" => TokenKind::Keyword(Keyword::Short), 772 | "long" => TokenKind::Keyword(Keyword::Long), 773 | "float" => TokenKind::Keyword(Keyword::Float), 774 | "double" => TokenKind::Keyword(Keyword::Double), 775 | "struct" => TokenKind::Keyword(Keyword::Struct), 776 | "union" => TokenKind::Keyword(Keyword::Union), 777 | "enum" => TokenKind::Keyword(Keyword::Enum), 778 | "inline" => TokenKind::Keyword(Keyword::Inline), 779 | "noreturn" => TokenKind::Keyword(Keyword::Noreturn), 780 | "if" => TokenKind::Keyword(Keyword::If), 781 | "else" => TokenKind::Keyword(Keyword::Else), 782 | "for" => TokenKind::Keyword(Keyword::For), 783 | "while" => TokenKind::Keyword(Keyword::While), 784 | "do" => TokenKind::Keyword(Keyword::Do), 785 | "switch" => TokenKind::Keyword(Keyword::Switch), 786 | "case" => TokenKind::Keyword(Keyword::Case), 787 | "default" => TokenKind::Keyword(Keyword::Default), 788 | "goto" => TokenKind::Keyword(Keyword::Goto), 789 | "break" => TokenKind::Keyword(Keyword::Break), 790 | "continue" => TokenKind::Keyword(Keyword::Continue), 791 | "return" => TokenKind::Keyword(Keyword::Return), 792 | _ => return token, 793 | }; 794 | return Token::new(keyw, 0, pos, line); 795 | } 796 | token 797 | } 798 | 799 | fn expand_obj_macro( 800 | &mut self, 801 | token: Token, 802 | name: String, 803 | macro_body: &Vec, 804 | ) -> ParseR<()> { 805 | let body = macro_body 806 | .iter() 807 | .map(|tok| { 808 | let mut t = tok.clone(); 809 | t.add_hideset(name.to_string()); 810 | t.pos = token.pos.clone(); 811 | t 812 | }) 813 | .collect(); 814 | self.unget_all(&body); 815 | Ok(()) 816 | } 817 | fn read_one_arg(&mut self, end: &mut bool) -> ParseR> { 818 | let mut nest = 0; 819 | let mut arg = Vec::new(); 820 | loop { 821 | let tok = self.do_read_token()?; 822 | let val = ident_val!(tok); 823 | if nest == 0 { 824 | match val.as_str() { 825 | ")" => { 826 | *end = true; 827 | break; 828 | } 829 | "," => break, 830 | _ => {} 831 | } 832 | } 833 | match val.as_str() { 834 | "(" => nest += 1, 835 | ")" => nest -= 1, 836 | _ => {} 837 | } 838 | arg.push(tok); 839 | } 840 | Ok(arg) 841 | } 842 | fn stringize(&mut self, pos: &Pos, tokens: &Vec) -> Token { 843 | let string = tokens 844 | .iter() 845 | .map(|token| { 846 | format!( 847 | "{}{}", 848 | if token.space { " " } else { "" }, 849 | match token.kind { 850 | TokenKind::String(ref s) => format!("\"{}\"", s.as_str()), 851 | TokenKind::IntNumber(ref i, _) => format!("{}", *i), 852 | TokenKind::FloatNumber(ref f) => format!("{}", *f), 853 | TokenKind::Identifier(ref i) => format!("{}", *i), 854 | TokenKind::Char(ref c) => format!("\'{}\'", *c), 855 | _ => "".to_string(), 856 | } 857 | ) 858 | }) 859 | .fold("".to_string(), |a, s| a + s.as_str()) 860 | .trim_start() // remove leading spaces 861 | .to_string(); 862 | Token::new(TokenKind::String(string), 0, pos.pos, pos.line) 863 | } 864 | fn expand_func_macro( 865 | &mut self, 866 | token: Token, 867 | name: String, 868 | macro_body: &Vec, 869 | ) -> ParseR<()> { 870 | // expect '(', self.skip can't be used because self.skip uses 'self.get' that uses MACRO_MAP with Mutex 871 | let expect_bracket = self.read_token()?; 872 | if expect_bracket.kind != TokenKind::Symbol(Symbol::OpeningParen) { 873 | error::error_exit(*self.get_cur_line() as i32, "expected '('"); 874 | } 875 | 876 | let mut args = Vec::new(); 877 | let mut end = false; 878 | while !end { 879 | args.push(self.read_one_arg(&mut end)?); 880 | } 881 | 882 | let mut expanded = Vec::new(); 883 | let mut is_stringize = false; 884 | let mut is_combine = false; 885 | // TODO: refine code 886 | for macro_tok in macro_body { 887 | if ident_val!(macro_tok) == "#" { 888 | if is_stringize { 889 | // means ## 890 | is_stringize = false; 891 | is_combine = true; 892 | } else { 893 | is_stringize = true; 894 | } 895 | continue; 896 | } 897 | 898 | if macro_tok.kind == TokenKind::MacroParam { 899 | let position = macro_tok.macro_position; 900 | 901 | if is_stringize { 902 | let stringized = self.stringize(&token.pos, &args[position]); 903 | expanded.push(stringized); 904 | is_stringize = false; 905 | } else if is_combine { 906 | let mut last = expanded.pop().unwrap(); 907 | for t in &args[position] { 908 | *ident_mut_val!(last) += ident_val!(t).as_str(); 909 | } 910 | expanded.push(last); 911 | is_combine = false; 912 | } else { 913 | self.buf.push_back(VecDeque::new()); 914 | self.unget_all(&args[position]); 915 | loop { 916 | match self.get_token() { 917 | Ok(ok) => expanded.push(ok), 918 | Err(Error::EOF) => break, 919 | Err(e) => return Err(e), 920 | } 921 | } 922 | self.buf.pop_back(); 923 | } 924 | } else { 925 | if is_combine { 926 | let mut last = expanded.pop().unwrap(); 927 | *ident_mut_val!(last) += ident_val!(macro_tok).as_str(); 928 | expanded.push(last); 929 | } else { 930 | expanded.push(macro_tok.clone()); 931 | } 932 | } 933 | } 934 | 935 | for tok in &mut expanded { 936 | tok.add_hideset(name.to_string()); 937 | tok.pos = token.pos.clone(); 938 | } 939 | 940 | self.unget_all(&expanded); 941 | Ok(()) 942 | } 943 | fn expand(&mut self, token: ParseR) -> ParseR { 944 | token.and_then(|tok| { 945 | let name = ident_val!(tok); 946 | match name.as_str() { 947 | "__LINE__" => { 948 | return Ok(Token::new( 949 | TokenKind::IntNumber(*self.get_cur_line() as i64, Bits::Bits32), 950 | 0, 951 | tok.pos.pos, 952 | tok.pos.line, 953 | )) 954 | } 955 | "__FILE__" => { 956 | return Ok(Token::new( 957 | TokenKind::String(self.get_filename()), 958 | 0, 959 | tok.pos.pos, 960 | tok.pos.line, 961 | )) 962 | } 963 | _ => {} 964 | } 965 | if tok.hideset.contains(name.as_str()) || !self.macro_map.contains_key(name.as_str()) { 966 | Ok(tok) 967 | } else { 968 | // if cur token is macro: 969 | match self.macro_map.get(name.as_str()).unwrap().clone() { 970 | Macro::Object(ref body) => self.expand_obj_macro(tok, name, body), 971 | Macro::FuncLike(ref body) => self.expand_func_macro(tok, name, body), 972 | }?; 973 | self.get_token() 974 | } 975 | }) 976 | } 977 | 978 | fn get_token(&mut self) -> ParseR { 979 | let tok = self.read_token().and_then(|tok| match &tok.kind { 980 | &TokenKind::Symbol(Symbol::Hash) => { 981 | self.read_cpp_directive()?; 982 | self.get_token() 983 | } 984 | _ => Ok(tok), 985 | }); 986 | self.expand(tok) 987 | } 988 | 989 | pub fn get(&mut self) -> ParseR { 990 | self.get_token().and_then(|tok| { 991 | if matches!(tok.kind, TokenKind::String(_)) 992 | && matches!(self.peek()?.kind, TokenKind::String(_)) 993 | { 994 | let s1 = retrieve_str!(tok); 995 | let s2 = retrieve_str!(self.get()?); 996 | let mut new_tok = tok; 997 | let mut concat_str = s1; 998 | concat_str.push_str(s2.as_str()); 999 | new_tok.kind = TokenKind::String(concat_str); 1000 | Ok(new_tok) 1001 | } else { 1002 | Ok(self.maybe_convert_to_keyword(tok)) 1003 | } 1004 | }) 1005 | } 1006 | 1007 | pub fn peek(&mut self) -> ParseR { 1008 | self.get_token().and_then(|tok| { 1009 | let conv = self.maybe_convert_to_keyword(tok); 1010 | self.unget(conv.clone()); 1011 | Ok(conv) 1012 | }) 1013 | } 1014 | 1015 | // for c preprocessor 1016 | 1017 | fn read_cpp_directive(&mut self) -> ParseR<()> { 1018 | let tok = self.do_read_token(); // cpp directive 1019 | tok.and_then(|t| match ident_val!(t).as_str() { 1020 | "include" => self.read_include(), 1021 | "define" => self.read_define(), 1022 | "undef" => self.read_undef(), 1023 | "if" => self.read_if(), 1024 | "ifdef" => self.read_ifdef(), 1025 | "ifndef" => self.read_ifndef(), 1026 | "elif" => self.read_elif(), 1027 | "else" => self.read_else(), 1028 | _ => Ok(()), 1029 | }) 1030 | } 1031 | 1032 | fn try_include(&mut self, filename: &str) -> Option { 1033 | let header_paths = vec![ 1034 | "./include/", 1035 | "/include/", 1036 | "/usr/include/", 1037 | "/usr/include/linux/", 1038 | "/usr/include/x86_64-linux-gnu/", 1039 | "./include/", 1040 | "", 1041 | ]; 1042 | header_paths 1043 | .iter() 1044 | .find(|header_path| { 1045 | let abs_filename = format!("{}{}", header_path, filename); 1046 | path::Path::new(abs_filename.as_str()).exists() 1047 | }) 1048 | .and_then(|a| Some(a.to_string() + filename)) 1049 | } 1050 | fn read_headerfile_name(&mut self) -> ParseR { 1051 | let mut name = "".to_string(); 1052 | // Lt = '<' 1053 | if self.skip_symbol(Symbol::Lt)? { 1054 | while !self.peek_char_is('>')? { 1055 | name.push(self.peek_next()?); 1056 | } 1057 | self.peek_next()?; // > 1058 | } else { 1059 | let tok = self.do_read_token()?; 1060 | if let TokenKind::String(s) = tok.kind { 1061 | println!("sorry, using \"double quote\" in #include is currently not supported."); 1062 | name = s; 1063 | } else { 1064 | error::error_exit(*self.get_cur_line() as i32, "expected '<' or '\"'"); 1065 | } 1066 | } 1067 | Ok(name) 1068 | } 1069 | fn read_include(&mut self) -> ParseR<()> { 1070 | // this will be a function 1071 | let filename = self.read_headerfile_name()?; 1072 | let abs_filename = self 1073 | .try_include(filename.as_str()) 1074 | .or_else(|| { 1075 | println!("error: {}: not found '{}'", *self.get_cur_line(), filename); 1076 | process::exit(-1); 1077 | }) 1078 | .unwrap(); 1079 | // DEBUG: println!("include filename: {}", abs_filename); 1080 | 1081 | let mut include_file = OpenOptions::new() 1082 | .read(true) 1083 | .open(abs_filename.to_string()) 1084 | .unwrap(); 1085 | let mut body = String::with_capacity(512); 1086 | include_file 1087 | .read_to_string(&mut body) 1088 | .ok() 1089 | .expect("not found file"); 1090 | self.filename.push_back(abs_filename); 1091 | unsafe { 1092 | self.peek.push_back(body.as_mut_vec().clone()); 1093 | } 1094 | self.peek_pos.push_back(0); 1095 | self.cur_line.push_back(1); 1096 | Ok(()) 1097 | } 1098 | 1099 | fn read_define_obj_macro(&mut self, name: String) -> ParseR<()> { 1100 | // DEBUG: println!("\tmacro: {}", name); 1101 | 1102 | let mut body = Vec::new(); 1103 | // DEBUG: print!("\tmacro body: "); 1104 | loop { 1105 | let c = self.do_read_token()?; 1106 | if c.kind == TokenKind::Newline { 1107 | break; 1108 | } 1109 | // DEBUG: print!("{}{}", if c.space { " " } else { "" }, c.val); 1110 | body.push(c); 1111 | } 1112 | // DEBUG: println!(); 1113 | self.register_obj_macro(name, body); 1114 | Ok(()) 1115 | } 1116 | fn read_define_func_macro(&mut self, name: String) -> ParseR<()> { 1117 | // read macro arguments 1118 | let mut params = HashMap::new(); 1119 | let mut count = 0usize; 1120 | loop { 1121 | let mut arg = ident_val!(self.do_read_token()?); 1122 | if arg == ")" { 1123 | break; 1124 | } 1125 | if count > 0 { 1126 | if arg != "," { 1127 | error::error_exit(*self.get_cur_line() as i32, "expected comma"); 1128 | } 1129 | arg = ident_val!(self.do_read_token()?); 1130 | } 1131 | params.insert(arg, count); 1132 | count += 1; 1133 | } 1134 | 1135 | let mut body = Vec::new(); 1136 | // print!("\tmacro body: "); 1137 | loop { 1138 | let tok = self.do_read_token()?; 1139 | if tok.kind == TokenKind::Newline { 1140 | break; 1141 | } 1142 | 1143 | // if tok is a parameter of funclike macro, 1144 | // the kind of tok will be changed to MacroParam 1145 | // and set macro_position 1146 | let maybe_macro_name = ident_val!(tok); 1147 | // print!("{}{}", if tok.space { " " } else { "" }, tok.val); 1148 | if params.contains_key(maybe_macro_name.as_str()) { 1149 | let mut macro_param = tok; 1150 | macro_param.kind = TokenKind::MacroParam; 1151 | macro_param.macro_position = *params.get(maybe_macro_name.as_str()).unwrap(); 1152 | body.push(macro_param); 1153 | } else { 1154 | body.push(tok); 1155 | } 1156 | } 1157 | self.register_funclike_macro(name, body); 1158 | Ok(()) 1159 | } 1160 | fn read_define(&mut self) -> ParseR<()> { 1161 | let mcro = self.do_read_token()?; 1162 | assert!(matches!(mcro.kind, TokenKind::Identifier(_))); 1163 | // println!("define: {}", mcro.val); 1164 | 1165 | let t = self.do_read_token()?; 1166 | if !t.space && ident_val!(t).as_str() == "(" { 1167 | self.read_define_func_macro(ident_val!(mcro)) 1168 | } else { 1169 | self.unget(t); 1170 | self.read_define_obj_macro(ident_val!(mcro)) 1171 | } 1172 | } 1173 | fn read_undef(&mut self) -> ParseR<()> { 1174 | let mcro = self.do_read_token()?; 1175 | assert!(matches!(mcro.kind, TokenKind::Identifier(_))); 1176 | self.macro_map.remove(ident_val!(mcro).as_str()); 1177 | Ok(()) 1178 | } 1179 | 1180 | fn register_obj_macro(&mut self, name: String, body: Vec) { 1181 | self.macro_map.insert(name, Macro::Object(body)); 1182 | } 1183 | fn register_funclike_macro(&mut self, name: String, body: Vec) { 1184 | self.macro_map.insert(name, Macro::FuncLike(body)); 1185 | } 1186 | 1187 | fn read_defined_op(&mut self) -> ParseR { 1188 | let mut tok = self.do_read_token()?; 1189 | if ident_val!(tok) == "(" { 1190 | tok = self.do_read_token()?; 1191 | self.expect_skip_symbol(Symbol::ClosingParen)?; 1192 | } 1193 | if self.macro_map.contains_key(ident_val!(tok).as_str()) { 1194 | Ok(Token::new( 1195 | TokenKind::IntNumber(1, Bits::Bits32), 1196 | 0, 1197 | 0, 1198 | *self.get_cur_line(), 1199 | )) 1200 | } else { 1201 | Ok(Token::new( 1202 | TokenKind::IntNumber(0, Bits::Bits32), 1203 | 0, 1204 | 0, 1205 | *self.get_cur_line(), 1206 | )) 1207 | } 1208 | } 1209 | fn read_intexpr_line(&mut self) -> ParseR> { 1210 | let mut v = Vec::new(); 1211 | loop { 1212 | let mut tok = self.do_read_token()?; 1213 | tok = self.expand(Ok(tok))?; 1214 | if tok.kind == TokenKind::Newline { 1215 | break; 1216 | } 1217 | 1218 | tok = self.convert_to_symbol(tok); 1219 | match tok.kind { 1220 | TokenKind::Identifier(ident) => { 1221 | if ident == "defined" { 1222 | v.push(self.read_defined_op()?); 1223 | } else { 1224 | // identifier in expr line is replaced with 0i 1225 | v.push(Token::new( 1226 | TokenKind::IntNumber(0, Bits::Bits32), 1227 | 0, 1228 | 0, 1229 | *self.get_cur_line(), 1230 | )); 1231 | } 1232 | } 1233 | _ => v.push(tok), 1234 | } 1235 | } 1236 | Ok(v) 1237 | } 1238 | fn read_constexpr(&mut self) -> ParseR { 1239 | let expr_line = self.read_intexpr_line()?; 1240 | self.buf.push_back(VecDeque::new()); 1241 | 1242 | self.unget(Token::new(TokenKind::Symbol(Symbol::Semicolon), 0, 0, 0)); 1243 | self.unget_all(&expr_line); 1244 | 1245 | let node = parser::Parser::new(self).run_as_expr().ok().unwrap(); 1246 | 1247 | self.buf.pop_back(); 1248 | 1249 | if let Ok(e) = node.eval_constexpr() { 1250 | Ok(e != 0) 1251 | } else { 1252 | println!("error: lexer constexpr"); 1253 | Err(Error::Something) 1254 | } 1255 | } 1256 | 1257 | fn do_read_if(&mut self, cond: bool) -> ParseR<()> { 1258 | self.cond_stack.push(cond); 1259 | if !cond { 1260 | self.skip_cond_include()?; 1261 | } 1262 | Ok(()) 1263 | } 1264 | fn read_if(&mut self) -> ParseR<()> { 1265 | let cond = self.read_constexpr()?; 1266 | self.do_read_if(cond) 1267 | } 1268 | fn read_ifdef(&mut self) -> ParseR<()> { 1269 | let macro_name = ident_val!(self.do_read_token()?); 1270 | let macro_is_defined = self.macro_map.contains_key(macro_name.as_str()); 1271 | self.do_read_if(macro_is_defined) 1272 | } 1273 | fn read_ifndef(&mut self) -> ParseR<()> { 1274 | let macro_name = ident_val!(self.do_read_token()?); 1275 | let macro_is_undefined = !self.macro_map.contains_key(macro_name.as_str()); 1276 | self.do_read_if(macro_is_undefined) 1277 | } 1278 | fn read_elif(&mut self) -> ParseR<()> { 1279 | if *self.cond_stack.last().unwrap() || !self.read_constexpr()? { 1280 | self.skip_cond_include()?; 1281 | } else { 1282 | self.cond_stack.pop(); 1283 | self.cond_stack.push(true); 1284 | } 1285 | Ok(()) 1286 | } 1287 | fn read_else(&mut self) -> ParseR<()> { 1288 | if *self.cond_stack.last().unwrap() { 1289 | self.skip_cond_include()?; 1290 | } 1291 | Ok(()) 1292 | } 1293 | 1294 | fn skip_cond_include(&mut self) -> ParseR<()> { 1295 | let mut nest = 0; 1296 | loop { 1297 | if self.peek_next()? != '#' { 1298 | continue; 1299 | } 1300 | 1301 | let tok = self.do_read_token()?; 1302 | let val = ident_val!(tok); 1303 | if nest == 0 { 1304 | match val.as_str() { 1305 | "else" | "elif" | "endif" => { 1306 | let line = *self.get_cur_line(); 1307 | self.unget(tok); 1308 | self.unget(Token::new( 1309 | TokenKind::Identifier("#".to_string()), 1310 | 0, 1311 | 0, 1312 | line, 1313 | )); 1314 | return Ok(()); 1315 | } 1316 | _ => {} 1317 | } 1318 | } 1319 | 1320 | match val.as_str() { 1321 | "if" | "ifdef" | "ifndef" => nest += 1, 1322 | "endif" => nest -= 1, 1323 | _ => {} 1324 | } 1325 | } 1326 | } 1327 | 1328 | pub fn get_surrounding_code_with_err_point(&mut self, pos: usize) -> String { 1329 | let code = self.peek.back().unwrap(); 1330 | let peek_pos = pos; 1331 | let start_pos = { 1332 | let mut p = peek_pos as i32; 1333 | while p >= 0 && code[p as usize] as char != '\n' { 1334 | p -= 1; 1335 | } 1336 | p += 1; // '\n' 1337 | p as usize 1338 | }; 1339 | let end_pos = { 1340 | let mut p = peek_pos as i32; 1341 | while p < code.len() as i32 && code[p as usize] as char != '\n' { 1342 | p += 1; 1343 | } 1344 | p as usize 1345 | }; 1346 | let surrounding_code = String::from_utf8(code[start_pos..end_pos].to_vec()) 1347 | .unwrap() 1348 | .to_string(); 1349 | let mut err_point = String::new(); 1350 | for _ in 0..(peek_pos - start_pos) { 1351 | err_point.push(' '); 1352 | } 1353 | err_point.push('^'); 1354 | surrounding_code + "\n" + err_point.as_str() 1355 | } 1356 | } 1357 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod common; 2 | pub mod error; 3 | pub mod lexer; 4 | pub mod node; 5 | pub mod parser; 6 | pub mod codegen; 7 | pub mod types; 8 | 9 | // for LLVMLinkInInterpreter 10 | #[link(name = "ffi")] 11 | extern "C" {} 12 | 13 | #[macro_use] 14 | extern crate lazy_static; 15 | 16 | use std::sync::Mutex; 17 | use std::marker::Send; 18 | 19 | unsafe impl Send for codegen::Codegen {} 20 | 21 | lazy_static! { 22 | static ref CODEGEN: Mutex = { 23 | unsafe { 24 | Mutex::new(codegen::Codegen::new("rucc")) 25 | } 26 | }; 27 | } 28 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate rucc; 2 | use rucc::common; 3 | 4 | extern crate ansi_term; 5 | use self::ansi_term::Colour; 6 | 7 | extern crate clap; 8 | use clap::{App, Arg}; 9 | 10 | const VERSION_STR: &'static str = env!("CARGO_PKG_VERSION"); 11 | 12 | fn main() { 13 | let mut app = App::new("rucc") 14 | .version(VERSION_STR) 15 | .author("uint256_t") 16 | .about("rucc is a small toy C compiler in Rust") 17 | .arg( 18 | Arg::with_name("version") 19 | .short("v") 20 | .long("version") 21 | .help("Show version info"), 22 | ) 23 | .arg(Arg::with_name("FILE").help("Input file").index(1)); 24 | let app_matches = app.clone().get_matches(); 25 | 26 | if let Some(filename) = app_matches.value_of("FILE") { 27 | common::run_file(filename); 28 | println!("{}", Colour::Green.paint("Compiling exited successfully.")); 29 | } else { 30 | app.print_help().unwrap(); 31 | println!(); 32 | } 33 | } 34 | 35 | #[test] 36 | fn compare_with_clang_output() { 37 | use std::fs; 38 | use std::process::Command; 39 | use std::io::{stderr, Write}; 40 | 41 | let examples_paths = match fs::read_dir("example") { 42 | Ok(paths) => paths, 43 | Err(e) => panic!(format!("error: {:?}", e.kind())), 44 | }; 45 | for path in examples_paths { 46 | let name = path.unwrap().path().to_str().unwrap().to_string(); 47 | writeln!(&mut stderr(), "comparing {}...", name).unwrap(); 48 | 49 | Command::new("./rucc.sh") 50 | .arg(name.to_string()) 51 | .spawn() 52 | .expect("failed to run") 53 | .wait() 54 | .expect("failed to run"); 55 | let output1 = Command::new("./a.out").output().expect("failed to run"); 56 | Command::new("clang") 57 | .arg(name) 58 | .arg("-lm") 59 | .arg("-w") 60 | .spawn() 61 | .expect("failed to run") 62 | .wait() 63 | .expect("failed to run"); 64 | let output2 = Command::new("./a.out").output().expect("failed to run"); 65 | assert!(output1 == output2); 66 | } 67 | } 68 | 69 | #[test] 70 | fn compile_all_examples() { 71 | use std::fs; 72 | use rucc::{codegen, lexer, parser}; 73 | use std::io::{stderr, Write}; 74 | 75 | let examples_paths = match fs::read_dir("example") { 76 | Ok(paths) => paths, 77 | Err(e) => panic!(format!("error: {:?}", e.kind())), 78 | }; 79 | for path in examples_paths { 80 | let name = path.unwrap().path().to_str().unwrap().to_string(); 81 | writeln!(&mut stderr(), "compiling {}...", name).unwrap(); 82 | 83 | // for coverage... 84 | unsafe { 85 | let mut nodes = Vec::new(); 86 | let mut lexer = lexer::Lexer::new(name.to_string()); 87 | let mut parser = parser::Parser::new(&mut lexer); 88 | let mut codegen = codegen::Codegen::new("test"); 89 | loop { 90 | match parser.read_toplevel(&mut nodes) { 91 | Err(parser::Error::EOF) => break, 92 | Err(_) => continue, 93 | _ => {} 94 | } 95 | for node in &nodes { 96 | node.show(); 97 | } 98 | match codegen.run(&nodes) { 99 | Ok(_) => {} 100 | Err(e) => panic!(format!("err in codegen: {:?}", e)), 101 | } 102 | nodes.clear(); 103 | } 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/node.rs: -------------------------------------------------------------------------------- 1 | use lexer::Pos; 2 | use parser::{Error, ParseR}; 3 | use std::boxed::Box; 4 | use std::marker::Send; 5 | use types::{StorageClass, Type}; 6 | 7 | #[derive(Debug, Clone)] 8 | pub struct AST { 9 | pub kind: ASTKind, 10 | pub pos: Pos, 11 | } 12 | 13 | impl AST { 14 | pub fn new(kind: ASTKind, pos: Pos) -> AST { 15 | AST { 16 | kind: kind, 17 | pos: pos, 18 | } 19 | } 20 | } 21 | 22 | #[derive(Debug, Clone, PartialEq)] 23 | pub enum Bits { 24 | Bits8, 25 | Bits16, 26 | Bits32, 27 | Bits64, 28 | } 29 | 30 | #[derive(Debug, Clone)] 31 | pub enum ASTKind { 32 | Int(i64, Bits), 33 | Float(f64), 34 | Char(i32), 35 | String(String), 36 | Typedef(Type, String), // from, to ( typedef from to; ) 37 | TypeCast(Box, Type), 38 | Load(Box), 39 | Variable(Type, String), 40 | VariableDecl(Type, String, StorageClass, Option>), // type, name, init val 41 | ConstArray(Vec), 42 | ConstStruct(Vec), 43 | UnaryOp(Box, CUnaryOps), 44 | BinaryOp(Box, Box, CBinOps), 45 | TernaryOp(Box, Box, Box), // cond then else 46 | FuncDef(Type, Vec, String, Box), // functype, param names, func name, body 47 | Block(Vec), 48 | Compound(Vec), 49 | If(Box, Box, Box), // cond, then stmt, else stmt 50 | For(Box, Box, Box, Box), // init, cond, step, body 51 | While(Box, Box), // cond, body 52 | DoWhile(Box, Box), // cond, body 53 | Switch(Box, Box), // cond, stmt 54 | Case(Box), 55 | DefaultL, 56 | Goto(String), // label name 57 | Label(String), // label name 58 | FuncCall(Box, Vec), 59 | StructRef(Box, String), // String is name of struct field 60 | Break, 61 | Continue, 62 | Return(Option>), 63 | } 64 | 65 | unsafe impl Send for AST {} 66 | 67 | #[derive(Debug, Clone)] 68 | pub enum CBinOps { 69 | Add, 70 | Sub, 71 | Mul, 72 | Div, 73 | Rem, 74 | And, 75 | Or, 76 | Xor, 77 | LAnd, 78 | LOr, 79 | Eq, 80 | Ne, 81 | Lt, 82 | Gt, 83 | Le, 84 | Ge, 85 | Shl, 86 | Shr, 87 | Comma, 88 | Assign, 89 | } 90 | 91 | #[derive(Debug, Clone)] 92 | pub enum CUnaryOps { 93 | LNot, 94 | BNot, 95 | Minus, 96 | // TODO: Inc and Dec is actually POSTFIX. 97 | Inc, 98 | Dec, 99 | Deref, 100 | Addr, 101 | Sizeof, 102 | // TODO: add Cast, Sizeof 103 | } 104 | 105 | impl AST { 106 | pub fn eval_constexpr(&self) -> ParseR { 107 | self.eval() 108 | } 109 | 110 | fn eval(&self) -> ParseR { 111 | Ok(match self.kind { 112 | ASTKind::Int(n, _) => n, 113 | ASTKind::TypeCast(ref e, _) => e.eval()?, 114 | ASTKind::UnaryOp(ref e, CUnaryOps::LNot) => (e.eval()? == 0) as i64, 115 | ASTKind::UnaryOp(ref e, CUnaryOps::BNot) => !e.eval()?, 116 | ASTKind::UnaryOp(ref e, CUnaryOps::Minus) => -e.eval()?, 117 | ASTKind::UnaryOp(ref e, CUnaryOps::Inc) => e.eval()? + 1, 118 | ASTKind::UnaryOp(ref e, CUnaryOps::Dec) => e.eval()? - 1, 119 | ASTKind::UnaryOp(ref e, CUnaryOps::Deref) => e.eval()?, 120 | ASTKind::UnaryOp(ref e, CUnaryOps::Addr) => e.eval()?, 121 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Add) => l.eval()? + r.eval()?, 122 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Sub) => l.eval()? - r.eval()?, 123 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Mul) => l.eval()? * r.eval()?, 124 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Div) => l.eval()? / r.eval()?, 125 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Rem) => l.eval()? % r.eval()?, 126 | ASTKind::BinaryOp(ref l, ref r, CBinOps::And) => l.eval()? & r.eval()?, 127 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Or) => l.eval()? | r.eval()?, 128 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Xor) => l.eval()? ^ r.eval()?, 129 | ASTKind::BinaryOp(ref l, ref r, CBinOps::LAnd) => l.eval()? & r.eval()?, 130 | ASTKind::BinaryOp(ref l, ref r, CBinOps::LOr) => l.eval()? | r.eval()?, 131 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Eq) => (l.eval()? == r.eval()?) as i64, 132 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Ne) => (l.eval()? != r.eval()?) as i64, 133 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Lt) => (l.eval()? < r.eval()?) as i64, 134 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Gt) => (l.eval()? > r.eval()?) as i64, 135 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Le) => (l.eval()? <= r.eval()?) as i64, 136 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Ge) => (l.eval()? >= r.eval()?) as i64, 137 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Shl) => l.eval()? << r.eval()?, 138 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Shr) => l.eval()? >> r.eval()?, 139 | ASTKind::BinaryOp(ref l, ref r, CBinOps::Comma) => { 140 | l.eval()?; 141 | r.eval()? 142 | } 143 | ASTKind::BinaryOp(ref l, ref r, _) => { 144 | l.eval()?; 145 | r.eval()?; 146 | 0 147 | } 148 | ASTKind::TernaryOp(ref cond, ref l, ref r) => { 149 | if cond.eval()? != 0 { 150 | l.eval()? 151 | } else { 152 | r.eval()? 153 | } 154 | } 155 | _ => return Err(Error::Something), 156 | }) 157 | } 158 | 159 | pub fn is_const(&self) -> bool { 160 | match self.kind { 161 | ASTKind::Int(_, _) | ASTKind::Float(_) | ASTKind::String(_) | ASTKind::Char(_) => true, 162 | // String(String), 163 | _ => false, 164 | } 165 | } 166 | 167 | pub fn get_variable_name<'a>(&'a self) -> Option<&'a str> { 168 | if let ASTKind::Variable(_, ref name) = self.kind { 169 | return Some(name.as_str()); 170 | } 171 | None 172 | } 173 | 174 | pub fn show(&self) { 175 | match self.kind { 176 | ASTKind::Int(n, _) => print!("{} ", n), 177 | ASTKind::Float(n) => print!("{} ", n), 178 | ASTKind::Char(c) => print!("'{}' ", c), 179 | ASTKind::String(ref s) => print!("\"{}\" ", s), 180 | ASTKind::Typedef(ref a, ref b) => print!("(typedef {:?} {})", a, b), 181 | ASTKind::TypeCast(ref e, ref t) => { 182 | print!("(typecast {:?} ", t); 183 | e.show(); 184 | print!(")"); 185 | } 186 | ASTKind::Load(ref expr) => { 187 | print!("(load "); 188 | expr.show(); 189 | print!(")"); 190 | } 191 | ASTKind::Variable(ref ty, ref name) => print!("({:?} {}) ", ty, name), 192 | ASTKind::VariableDecl(ref ty, ref name, ref sclass, ref init) => { 193 | print!("(var-decl {:?} {:?} {}", ty, sclass, name); 194 | if init.is_some() { 195 | print!(" (init "); 196 | init.clone().unwrap().show(); 197 | print!(")"); 198 | } 199 | print!(")"); 200 | } 201 | ASTKind::ConstArray(ref elems) => { 202 | print!("(const-array "); 203 | for elem in elems { 204 | elem.show(); 205 | } 206 | print!(")"); 207 | } 208 | ASTKind::ConstStruct(ref elems) => { 209 | print!("(const-struct "); 210 | for elem in elems { 211 | elem.show(); 212 | } 213 | print!(")"); 214 | } 215 | ASTKind::UnaryOp(ref expr, ref op) => { 216 | print!("({:?} ", op); 217 | expr.show(); 218 | print!(")"); 219 | } 220 | ASTKind::BinaryOp(ref lhs, ref rhs, ref op) => { 221 | print!("({:?} ", op); 222 | lhs.show(); 223 | rhs.show(); 224 | print!(")"); 225 | } 226 | ASTKind::TernaryOp(ref cond, ref lhs, ref rhs) => { 227 | print!("(?: "); 228 | cond.show(); 229 | print!(" "); 230 | lhs.show(); 231 | print!(" "); 232 | rhs.show(); 233 | print!(")"); 234 | } 235 | ASTKind::FuncDef(ref functy, ref param_names, ref name, ref body) => { 236 | print!("(def-func {} {:?} {:?}", name, functy, param_names); 237 | body.show(); 238 | print!(")"); 239 | } 240 | ASTKind::Block(ref body) => { 241 | for stmt in body { 242 | stmt.show(); 243 | } 244 | } 245 | ASTKind::Compound(ref body) => { 246 | for stmt in body { 247 | stmt.show(); 248 | } 249 | } 250 | ASTKind::If(ref cond, ref then_b, ref else_b) => { 251 | print!("(if "); 252 | cond.show(); 253 | print!("("); 254 | then_b.clone().show(); 255 | print!(")("); 256 | else_b.clone().show(); 257 | print!("))"); 258 | } 259 | ASTKind::For(ref init, ref cond, ref step, ref body) => { 260 | print!("(for "); 261 | init.show(); 262 | print!("; "); 263 | cond.show(); 264 | print!("; "); 265 | step.show(); 266 | print!(" ("); 267 | body.show(); 268 | print!(")"); 269 | } 270 | ASTKind::DoWhile(ref cond, ref body) => { 271 | print!("(do-while "); 272 | cond.show(); 273 | print!("("); 274 | body.show(); 275 | print!("))"); 276 | } 277 | ASTKind::While(ref cond, ref body) => { 278 | print!("(while "); 279 | cond.show(); 280 | print!("("); 281 | body.show(); 282 | print!("))"); 283 | } 284 | ASTKind::Switch(ref cond, ref body) => { 285 | print!("(switch "); 286 | cond.show(); 287 | print!("("); 288 | body.show(); 289 | print!("))"); 290 | } 291 | ASTKind::Case(ref expr) => { 292 | print!("(case "); 293 | expr.show(); 294 | print!(")"); 295 | } 296 | ASTKind::DefaultL => { 297 | print!("(default)"); 298 | } 299 | ASTKind::Goto(ref label_name) => { 300 | print!("(goto {})", label_name); 301 | } 302 | ASTKind::Label(ref name) => { 303 | print!("(label {})", name); 304 | } 305 | ASTKind::FuncCall(ref f, ref args) => { 306 | print!("(func-call "); 307 | f.show(); 308 | print!(" "); 309 | for arg in args { 310 | arg.show(); 311 | } 312 | print!(")"); 313 | } 314 | ASTKind::StructRef(ref s, ref field) => { 315 | print!("(struct-ref "); 316 | s.show(); 317 | print!(" {})", field); 318 | } 319 | ASTKind::Continue => { 320 | print!("(continue)"); 321 | } 322 | ASTKind::Break => { 323 | print!("(break)"); 324 | } 325 | ASTKind::Return(ref retval) => { 326 | print!("(return "); 327 | if retval.is_some() { 328 | retval.clone().unwrap().show(); 329 | } 330 | print!(")"); 331 | } 332 | }; 333 | } 334 | } 335 | -------------------------------------------------------------------------------- /src/parser.rs: -------------------------------------------------------------------------------- 1 | use lexer::{Keyword, Lexer, Pos, Symbol, Token, TokenKind}; 2 | use node; 3 | use node::{ASTKind, Bits, AST}; 4 | use types::{Sign, StorageClass, Type}; 5 | 6 | use std::boxed::Box; 7 | use std::collections::{hash_map, HashMap, VecDeque}; 8 | use std::io::{stderr, Write}; 9 | use std::str; 10 | 11 | // use CODEGEN; 12 | 13 | extern crate llvm_sys as llvm; 14 | 15 | extern crate rand; 16 | use self::rand::Rng; 17 | 18 | extern crate ansi_term; 19 | use self::ansi_term::Colour; 20 | 21 | // TODO: add more error kinds 22 | pub enum Error { 23 | Something, 24 | EOF, 25 | } 26 | 27 | pub struct Qualifiers { 28 | pub q_restrict: bool, 29 | pub q_const: bool, 30 | pub q_constexpr: bool, 31 | pub q_volatile: bool, 32 | pub q_inline: bool, 33 | pub q_noreturn: bool, 34 | } 35 | 36 | impl Qualifiers { 37 | pub fn new() -> Qualifiers { 38 | Qualifiers { 39 | q_restrict: false, 40 | q_const: false, 41 | q_constexpr: false, 42 | q_volatile: false, 43 | q_inline: false, 44 | q_noreturn: false, 45 | } 46 | } 47 | } 48 | 49 | pub type ParseR = Result; 50 | 51 | pub struct Parser<'a> { 52 | pub lexer: &'a mut Lexer, 53 | pub err_counts: usize, 54 | env: Env, 55 | tags: Env, 56 | // constexpr_func_map: HashSet, 57 | } 58 | 59 | pub struct Env(pub VecDeque>); 60 | 61 | impl Env { 62 | fn new() -> Env { 63 | let mut env = VecDeque::new(); 64 | env.push_back(HashMap::new()); 65 | Env(env) 66 | } 67 | fn push(&mut self) { 68 | let localenv = (*self.0.back().unwrap()).clone(); 69 | self.0.push_back(localenv); 70 | } 71 | fn pop(&mut self) { 72 | self.0.pop_back(); 73 | } 74 | fn add(&mut self, name: String, val: T) { 75 | self.0.back_mut().unwrap().insert(name, val); 76 | } 77 | fn add_globally(&mut self, name: String, val: T) { 78 | self.0[0].insert(name.clone(), val.clone()); 79 | self.0.back_mut().unwrap().insert(name, val); 80 | } 81 | fn is_local(&self) -> bool { 82 | self.0.len() > 1 83 | } 84 | fn back_mut(&mut self) -> Option<&mut HashMap> { 85 | self.0.back_mut() 86 | } 87 | fn get(&mut self, name: &str) -> Option<&T> { 88 | self.0.back_mut().unwrap().get(name) 89 | } 90 | fn contains(&mut self, name: &str) -> bool { 91 | self.0.back_mut().unwrap().contains_key(name) 92 | } 93 | } 94 | 95 | macro_rules! matches { 96 | ($e:expr, $p:pat) => { 97 | match $e { 98 | $p => true, 99 | _ => false, 100 | } 101 | }; 102 | } 103 | macro_rules! ident_val { 104 | ($e:expr) => { 105 | match &$e.kind { 106 | &TokenKind::Identifier(ref ident) => ident.to_string(), 107 | _ => "".to_string(), 108 | } 109 | }; 110 | } 111 | macro_rules! expect_symbol_error { 112 | ($slf:expr, $sym:expr, $msg:expr) => {{ 113 | if !$slf.lexer.skip_symbol($sym)? { 114 | let peek = $slf.lexer.peek(); 115 | $slf.show_error_token(&peek?, $msg); 116 | } 117 | }}; 118 | } 119 | 120 | impl<'a> Parser<'a> { 121 | pub fn new(lexer: &'a mut Lexer) -> Parser<'a> { 122 | Parser { 123 | lexer: lexer, 124 | err_counts: 0, 125 | env: Env::new(), 126 | tags: Env::new(), 127 | // constexpr_func_map: HashSet::new(), 128 | } 129 | } 130 | fn show_error(&mut self, msg: &str) { 131 | self.err_counts += 1; 132 | writeln!( 133 | &mut stderr(), 134 | "{}: {} {}: {}", 135 | self.lexer.get_filename(), 136 | Colour::Red.bold().paint("error:"), 137 | self.lexer.get_cur_line(), 138 | msg 139 | ) 140 | .unwrap(); 141 | } 142 | fn show_error_token(&mut self, token: &Token, msg: &str) { 143 | self.err_counts += 1; 144 | writeln!( 145 | &mut stderr(), 146 | "{}: {} {}: {}", 147 | self.lexer.get_filename(), 148 | Colour::Red.bold().paint("error:"), 149 | token.pos.line, 150 | msg 151 | ) 152 | .unwrap(); 153 | writeln!( 154 | &mut stderr(), 155 | "{}", 156 | self.lexer 157 | .get_surrounding_code_with_err_point(token.pos.pos,) 158 | ) 159 | .unwrap(); 160 | } 161 | pub fn run_file(filename: String) -> Vec { 162 | let mut nodes: Vec = Vec::new(); 163 | let mut lexer = Lexer::new(filename.to_string()); 164 | // TODO: for debugging 165 | // loop { 166 | // let tok = lexer.get(); 167 | // match tok { 168 | // Some(t) => { 169 | // println!("t:{}{:?} {}", if t.space { " " } else { "" }, t.kind, t.val); 170 | // } 171 | // None => break, 172 | // } 173 | // } 174 | // 175 | // // Debug: (parsing again is big cost?) 176 | // lexer = Lexer::new(filename.to_string(), s.as_str()); 177 | Parser::new(&mut lexer).run(&mut nodes); 178 | nodes 179 | } 180 | pub fn run(&mut self, node: &mut Vec) { 181 | while matches!(self.read_toplevel(node), Ok(_)) {} 182 | self.show_total_errors(); 183 | } 184 | pub fn run_as_expr(&mut self) -> ParseR { 185 | self.read_expr() 186 | } 187 | pub fn show_total_errors(&mut self) { 188 | if self.err_counts > 0 { 189 | println!( 190 | "{} error{} generated.", 191 | self.err_counts, 192 | if self.err_counts > 1 { "s" } else { "" } 193 | ); 194 | ::std::process::exit(-1); 195 | } 196 | } 197 | pub fn read_toplevel(&mut self, ast: &mut Vec) -> ParseR<()> { 198 | // TODO: refine 199 | if self.is_function_def()? { 200 | match self.read_func_def() { 201 | Ok(ok) => ast.push(ok), 202 | Err(Error::EOF) => self.show_error("expected a token, but reached EOF"), 203 | Err(e) => return Err(e), 204 | } 205 | } else { 206 | match self.read_decl(ast) { 207 | Err(Error::EOF) => self.show_error("expected a token, but reached EOF"), 208 | Err(e) => return Err(e), 209 | _ => {} 210 | } 211 | } 212 | Ok(()) 213 | } 214 | fn read_func_def(&mut self) -> ParseR { 215 | self.env.push(); 216 | self.tags.push(); 217 | 218 | let (ret_ty, _, _qualifiers) = self.read_type_spec()?; 219 | let (functy, name, param_names) = self.read_declarator(ret_ty)?; 220 | 221 | // if qualifiers.q_constexpr { 222 | // self.constexpr_func_map.insert(name.clone()); 223 | // } 224 | 225 | self.env.add_globally( 226 | name.clone(), 227 | AST::new( 228 | ASTKind::Variable(functy.clone(), name.clone()), 229 | Pos::new(0, 0), 230 | ), 231 | ); 232 | self.env.add( 233 | "__func__".to_string(), 234 | AST::new(ASTKind::String(name.clone()), Pos::new(0, 0)), 235 | ); 236 | 237 | expect_symbol_error!(self, Symbol::OpeningBrace, "expected '('"); 238 | let body = self.read_func_body(&functy)?; 239 | 240 | self.env.pop(); 241 | self.tags.pop(); 242 | 243 | Ok(AST::new( 244 | ASTKind::FuncDef( 245 | functy, 246 | if param_names.is_none() { 247 | Vec::new() 248 | } else { 249 | param_names.unwrap() 250 | }, 251 | name, 252 | Box::new(body), 253 | ), 254 | Pos::new(0, 0), 255 | )) 256 | } 257 | fn read_func_body(&mut self, _functy: &Type) -> ParseR { 258 | self.read_compound_stmt() 259 | } 260 | fn read_compound_stmt(&mut self) -> ParseR { 261 | let mut stmts: Vec = Vec::new(); 262 | loop { 263 | if self 264 | .lexer 265 | .skip_symbol(Symbol::ClosingBrace) 266 | .or_else(|eof| { 267 | self.show_error("expected '}'"); 268 | Err(eof) 269 | })? 270 | { 271 | break; 272 | } 273 | 274 | let peek_tok = self.lexer.peek()?; 275 | if self.is_type(&peek_tok) { 276 | // variable declaration 277 | self.read_decl(&mut stmts)?; 278 | } else { 279 | match self.read_stmt() { 280 | Ok(stmt) => stmts.push(stmt), 281 | Err(_) => {} 282 | } 283 | } 284 | } 285 | Ok(AST::new(ASTKind::Block(stmts), Pos::new(0, 0))) 286 | } 287 | fn read_stmt(&mut self) -> ParseR { 288 | let tok = self.lexer.get()?; 289 | if let &TokenKind::Keyword(ref keyw) = &tok.kind { 290 | match *keyw { 291 | Keyword::If => return self.read_if_stmt(), 292 | Keyword::For => return self.read_for_stmt(), 293 | Keyword::While => return self.read_while_stmt(), 294 | Keyword::Do => return self.read_do_while_stmt(), 295 | Keyword::Switch => return self.read_switch_stmt(), 296 | Keyword::Case => return self.read_case_label(), 297 | Keyword::Default => return self.read_default_label(), 298 | Keyword::Goto => return self.read_goto_stmt(), 299 | Keyword::Continue => return self.read_continue_stmt(), 300 | Keyword::Break => return self.read_break_stmt(), 301 | Keyword::Return => return self.read_return_stmt(), 302 | _ => {} 303 | } 304 | } else if let &TokenKind::Symbol(Symbol::OpeningBrace) = &tok.kind { 305 | return self.read_compound_stmt(); 306 | } 307 | 308 | if matches!(tok.kind, TokenKind::Identifier(_)) 309 | && self.lexer.peek_symbol_token_is(Symbol::Colon)? 310 | { 311 | return self.read_label(tok); 312 | } 313 | 314 | self.lexer.unget(tok); 315 | let expr = self.read_opt_expr(); 316 | expect_symbol_error!(self, Symbol::Semicolon, "expected ';'"); 317 | expr 318 | } 319 | fn read_if_stmt(&mut self) -> ParseR { 320 | expect_symbol_error!(self, Symbol::OpeningParen, "expected '('"); 321 | let cond = self.read_expr()?; 322 | expect_symbol_error!(self, Symbol::ClosingParen, "expected ')'"); 323 | let then_stmt = Box::new(self.read_stmt()?); 324 | let else_stmt = if self.lexer.skip_keyword(Keyword::Else)? { 325 | Box::new(self.read_stmt()?) 326 | } else { 327 | Box::new(AST::new(ASTKind::Block(Vec::new()), Pos::new(0, 0))) 328 | }; 329 | Ok(AST::new( 330 | ASTKind::If(Box::new(cond), then_stmt, else_stmt), 331 | Pos::new(0, 0), 332 | )) 333 | } 334 | fn read_for_stmt(&mut self) -> ParseR { 335 | expect_symbol_error!(self, Symbol::OpeningParen, "expected '('"); 336 | let init = self.read_opt_decl_or_stmt()?; 337 | // TODO: make read_expr return Option. 338 | // when cur tok is ';', returns None. 339 | let cond = self.read_opt_expr()?; 340 | expect_symbol_error!(self, Symbol::Semicolon, "expected ';'"); 341 | let step = if self.lexer.peek_symbol_token_is(Symbol::ClosingParen)? { 342 | AST::new(ASTKind::Compound(Vec::new()), self.lexer.get_cur_pos()) 343 | } else { 344 | self.read_opt_expr()? 345 | }; 346 | expect_symbol_error!(self, Symbol::ClosingParen, "expected ')'"); 347 | let body = self.read_stmt()?; 348 | Ok(AST::new( 349 | ASTKind::For( 350 | Box::new(init), 351 | Box::new(cond), 352 | Box::new(step), 353 | Box::new(body), 354 | ), 355 | Pos::new(0, 0), 356 | )) 357 | } 358 | fn read_while_stmt(&mut self) -> ParseR { 359 | expect_symbol_error!(self, Symbol::OpeningParen, "expected '('"); 360 | let cond = self.read_expr()?; 361 | expect_symbol_error!(self, Symbol::ClosingParen, "expected ')'"); 362 | let body = self.read_stmt()?; 363 | Ok(AST::new( 364 | ASTKind::While(Box::new(cond), Box::new(body)), 365 | Pos::new(0, 0), 366 | )) 367 | } 368 | fn read_do_while_stmt(&mut self) -> ParseR { 369 | let body = self.read_stmt()?; 370 | if !self.lexer.skip_keyword(Keyword::While)? { 371 | let peek = self.lexer.peek(); 372 | self.show_error_token(&peek?, "expected 'while'"); 373 | } 374 | expect_symbol_error!(self, Symbol::OpeningParen, "expected '('"); 375 | let cond = self.read_expr()?; 376 | expect_symbol_error!(self, Symbol::ClosingParen, "expected ')'"); 377 | expect_symbol_error!(self, Symbol::Semicolon, "expected ';'"); 378 | Ok(AST::new( 379 | ASTKind::DoWhile(Box::new(cond), Box::new(body)), 380 | Pos::new(0, 0), 381 | )) 382 | } 383 | fn read_switch_stmt(&mut self) -> ParseR { 384 | expect_symbol_error!(self, Symbol::OpeningParen, "expected '('"); 385 | let cond = self.read_expr()?; 386 | expect_symbol_error!(self, Symbol::ClosingParen, "expected ')'"); 387 | let body = Box::new(self.read_stmt()?); 388 | Ok(AST::new( 389 | ASTKind::Switch(Box::new(cond), body), 390 | Pos::new(0, 0), 391 | )) 392 | } 393 | fn read_case_label(&mut self) -> ParseR { 394 | let expr = self.read_expr()?; 395 | expect_symbol_error!(self, Symbol::Colon, "expected ':'"); 396 | Ok(AST::new(ASTKind::Case(Box::new(expr)), Pos::new(0, 0))) 397 | } 398 | fn read_default_label(&mut self) -> ParseR { 399 | expect_symbol_error!(self, Symbol::Colon, "expected ':'"); 400 | Ok(AST::new(ASTKind::DefaultL, Pos::new(0, 0))) 401 | } 402 | fn read_goto_stmt(&mut self) -> ParseR { 403 | let pos = self.lexer.get_cur_pos(); 404 | let label_name = ident_val!(self.lexer.get()?); 405 | expect_symbol_error!(self, Symbol::Semicolon, "expected ';'"); 406 | Ok(AST::new(ASTKind::Goto(label_name), pos)) 407 | } 408 | fn read_label(&mut self, tok: Token) -> ParseR { 409 | let pos = self.lexer.get_cur_pos(); 410 | let label_name = ident_val!(tok); 411 | expect_symbol_error!(self, Symbol::Colon, "expected ':'"); 412 | Ok(AST::new(ASTKind::Label(label_name), pos)) 413 | } 414 | fn read_continue_stmt(&mut self) -> ParseR { 415 | let pos = self.lexer.get_cur_pos(); 416 | expect_symbol_error!(self, Symbol::Semicolon, "expected ';'"); 417 | Ok(AST::new(ASTKind::Continue, pos)) 418 | } 419 | fn read_break_stmt(&mut self) -> ParseR { 420 | let pos = self.lexer.get_cur_pos(); 421 | expect_symbol_error!(self, Symbol::Semicolon, "expected ';'"); 422 | Ok(AST::new(ASTKind::Break, pos)) 423 | } 424 | fn read_return_stmt(&mut self) -> ParseR { 425 | let pos = self.lexer.get_cur_pos(); 426 | if self.lexer.skip_symbol(Symbol::Semicolon)? { 427 | Ok(AST::new(ASTKind::Return(None), pos)) 428 | } else { 429 | let retval = Some(Box::new(self.read_expr()?)); 430 | expect_symbol_error!(self, Symbol::Semicolon, "expected ';'"); 431 | Ok(AST::new(ASTKind::Return(retval), pos)) 432 | } 433 | } 434 | fn is_function_def(&mut self) -> ParseR { 435 | let mut buf = Vec::new(); 436 | let mut is_funcdef = false; 437 | 438 | loop { 439 | let mut tok = self.lexer.get()?; 440 | buf.push(tok.clone()); 441 | 442 | if tok.kind == TokenKind::Symbol(Symbol::Semicolon) { 443 | break; 444 | } 445 | 446 | if self.is_type(&tok) { 447 | continue; 448 | } 449 | 450 | if tok.kind == TokenKind::Symbol(Symbol::OpeningParen) { 451 | self.skip_parens(&tok, &mut buf)?; 452 | continue; 453 | } 454 | 455 | if !matches!(tok.kind, TokenKind::Identifier(_)) { 456 | continue; 457 | } 458 | 459 | if self.lexer.peek()?.kind != TokenKind::Symbol(Symbol::OpeningParen) { 460 | continue; 461 | } 462 | 463 | let opening_paren = self.lexer.get()?; 464 | buf.push(opening_paren.clone()); 465 | self.skip_parens(&opening_paren, &mut buf)?; 466 | 467 | tok = self.lexer.peek()?; 468 | is_funcdef = tok.kind == TokenKind::Symbol(Symbol::OpeningBrace); 469 | break; 470 | } 471 | 472 | self.lexer.unget_all(&buf); 473 | Ok(is_funcdef) 474 | } 475 | fn skip_parens(&mut self, opening_paren: &Token, buf: &mut Vec) -> ParseR<()> { 476 | loop { 477 | let tok = self.lexer.get().or_else(|_| { 478 | self.show_error_token(&opening_paren, "expected ')', but reach EOF"); 479 | return Err(Error::Something); 480 | })?; 481 | buf.push(tok.clone()); 482 | 483 | match tok.kind { 484 | TokenKind::Symbol(Symbol::OpeningParen) => self.skip_parens(&tok, buf)?, 485 | TokenKind::Symbol(Symbol::ClosingParen) => break, 486 | _ => {} 487 | }; 488 | } 489 | Ok(()) 490 | } 491 | fn skip_until(&mut self, sym: Symbol) { 492 | let ts = TokenKind::Symbol(sym); 493 | while match self.lexer.get() { 494 | Ok(tok) => tok.kind != ts, 495 | Err(_) => false, 496 | } {} 497 | } 498 | 499 | fn get_typedef(&mut self, name: &str) -> ParseR> { 500 | match self.env.get(name) { 501 | Some(ast) => match ast.kind { 502 | ASTKind::Typedef(ref from, ref _to) => { 503 | let ty = match from { 504 | &Type::Struct(ref name, ref fields) 505 | | &Type::Union(ref name, ref fields, _) => { 506 | if fields.is_empty() { 507 | self.tags.get(name.as_str()).unwrap().clone() 508 | } else { 509 | from.clone() 510 | } 511 | } 512 | _ => from.clone(), 513 | }; 514 | return Ok(Some(ty)); 515 | } 516 | _ => {} 517 | }, 518 | None => return Ok(None), 519 | } 520 | Ok(None) 521 | } 522 | fn is_type(&mut self, token: &Token) -> bool { 523 | if let TokenKind::Keyword(ref keyw) = token.kind { 524 | match *keyw { 525 | Keyword::Typedef 526 | | Keyword::Extern 527 | | Keyword::Static 528 | | Keyword::Auto 529 | | Keyword::Register 530 | | Keyword::Const 531 | | Keyword::Volatile 532 | | Keyword::Void 533 | | Keyword::Signed 534 | | Keyword::Unsigned 535 | | Keyword::Char 536 | | Keyword::Int 537 | | Keyword::Short 538 | | Keyword::Long 539 | | Keyword::Float 540 | | Keyword::Double 541 | | Keyword::Struct 542 | | Keyword::Enum 543 | | Keyword::Union 544 | | Keyword::Noreturn 545 | | Keyword::Inline 546 | | Keyword::Restrict => true, 547 | _ => false, 548 | } 549 | } else if let TokenKind::Identifier(ref ident) = token.kind { 550 | match self.env.get(ident.as_str()) { 551 | Some(ast) => match ast.kind { 552 | ASTKind::Typedef(_, _) => true, 553 | _ => false, 554 | }, 555 | None => false, 556 | } 557 | } else { 558 | false 559 | } 560 | } 561 | fn is_string(&self, ty: &Type) -> bool { 562 | if let &Type::Array(ref elem_ty, _) = ty { 563 | if matches!(**elem_ty, Type::Char(Sign::Signed)) { 564 | return true; 565 | } 566 | } 567 | false 568 | } 569 | fn read_decl_init(&mut self, ty: &mut Type) -> ParseR { 570 | // TODO: implement for like 'int a[] = {...}, char *s="str";' 571 | if self.lexer.peek_symbol_token_is(Symbol::OpeningBrace)? { 572 | return self.read_initializer_list(ty); 573 | } else if self.is_string(ty) { 574 | let tok = self.lexer.get()?; 575 | if let TokenKind::String(s) = tok.kind { 576 | return self.read_string_initializer(ty, s); 577 | } 578 | self.lexer.unget(tok); 579 | } 580 | self.read_assign() 581 | } 582 | fn read_initializer_elem(&mut self, ty: &mut Type) -> ParseR { 583 | if match *ty { 584 | Type::Array(_, _) | Type::Struct(_, _) | Type::Union(_, _, _) => true, 585 | _ => false, 586 | } { 587 | self.read_initializer_list(ty) 588 | } else if self.lexer.peek_symbol_token_is(Symbol::OpeningBrace)? { 589 | let elem = self.read_initializer_elem(ty); 590 | expect_symbol_error!(self, Symbol::ClosingBrace, "expected '}'"); 591 | elem 592 | } else { 593 | self.read_assign() 594 | } 595 | } 596 | fn read_initializer_list(&mut self, ty: &mut Type) -> ParseR { 597 | if self.is_string(ty) { 598 | let tok = self.lexer.get()?; 599 | if let TokenKind::String(s) = tok.kind { 600 | return self.read_string_initializer(ty, s); 601 | } 602 | self.lexer.unget(tok); 603 | } 604 | match ty { 605 | &mut Type::Array(_, _) => self.read_array_initializer(ty), 606 | &mut Type::Struct(_, _) | &mut Type::Union(_, _, _) => self.read_struct_initializer(ty), 607 | _ => self.read_assign(), 608 | } 609 | } 610 | fn read_string_initializer(&mut self, ty: &mut Type, string: String) -> ParseR { 611 | let char_ary = string 612 | .chars() 613 | .map(|c| AST::new(ASTKind::Char(c as i32), Pos::new(0, 0))) 614 | .collect::>(); 615 | if let &mut Type::Array(_, ref mut len) = ty { 616 | *len = char_ary.len() as i32 + 1; 617 | } else { 618 | panic!() 619 | } 620 | Ok(AST::new( 621 | ASTKind::ConstArray(char_ary), 622 | self.lexer.get_cur_pos(), 623 | )) 624 | } 625 | fn read_array_initializer(&mut self, ty: &mut Type) -> ParseR { 626 | let has_brace = self.lexer.skip_symbol(Symbol::OpeningBrace)?; 627 | 628 | if let &mut Type::Array(ref elem_ty, ref mut len) = ty { 629 | let is_flexible = *len < 0; 630 | let mut elems = Vec::new(); 631 | let mut elem_ty = (**elem_ty).clone(); 632 | loop { 633 | let tok = self.lexer.get()?; 634 | if let TokenKind::Symbol(Symbol::ClosingBrace) = tok.kind { 635 | if !has_brace { 636 | self.lexer.unget(tok); 637 | } 638 | break; 639 | } 640 | self.lexer.unget(tok); 641 | let elem = self.read_initializer_elem(&mut elem_ty)?; 642 | elems.push(elem); 643 | self.lexer.skip_symbol(Symbol::Comma)?; 644 | } 645 | if is_flexible { 646 | *len = elems.len() as i32; 647 | } 648 | Ok(AST::new( 649 | ASTKind::ConstArray(elems), 650 | self.lexer.get_cur_pos(), 651 | )) 652 | } else { 653 | panic!() 654 | } 655 | } 656 | fn read_struct_initializer(&mut self, ty: &mut Type) -> ParseR { 657 | let tok = self.lexer.get()?; 658 | let has_brace = tok.kind == TokenKind::Symbol(Symbol::OpeningBrace); 659 | 660 | let mut fields_types = if let Some(fields_types) = ty.get_all_fields_types() { 661 | fields_types 662 | } else { 663 | self.show_error_token(&tok, "initializer of struct must be array"); 664 | return Err(Error::Something); 665 | }; 666 | 667 | let mut elems = Vec::new(); 668 | let mut field_type = fields_types.iter_mut(); 669 | loop { 670 | let tok = self.lexer.get()?; 671 | if let TokenKind::Symbol(Symbol::ClosingBrace) = tok.kind { 672 | if !has_brace { 673 | self.lexer.unget(tok); 674 | } 675 | break; 676 | } 677 | self.lexer.unget(tok); 678 | let elem = self.read_initializer_elem(&mut field_type.next().unwrap().clone())?; 679 | elems.push(elem); 680 | self.lexer.skip_symbol(Symbol::Comma)?; 681 | } 682 | Ok(AST::new( 683 | ASTKind::ConstStruct(elems), 684 | self.lexer.get_cur_pos(), 685 | )) 686 | } 687 | fn skip_type_qualifiers(&mut self) -> ParseR<()> { 688 | while self.lexer.skip_keyword(Keyword::Const)? 689 | || self.lexer.skip_keyword(Keyword::Volatile)? 690 | || self.lexer.skip_keyword(Keyword::Restrict)? 691 | {} 692 | Ok(()) 693 | } 694 | fn read_decl(&mut self, ast: &mut Vec) -> ParseR<()> { 695 | let (basety, sclass, qualifiers) = self.read_type_spec()?; 696 | let is_typedef = sclass == StorageClass::Typedef; 697 | 698 | if self.lexer.skip_symbol(Symbol::Semicolon)? { 699 | return Ok(()); 700 | } 701 | 702 | loop { 703 | let (mut ty, name, _) = self.read_declarator(basety.clone())?; // XXX 704 | 705 | if (qualifiers.q_constexpr || qualifiers.q_const) 706 | && self.lexer.skip_symbol(Symbol::Assign)? 707 | { 708 | let init = self.read_decl_init(&mut ty)?; 709 | self.env.add(name.clone(), init); 710 | } else { 711 | if is_typedef { 712 | let typedef = AST::new( 713 | ASTKind::Typedef(ty, name.to_string()), 714 | self.lexer.get_cur_pos(), 715 | ); 716 | self.env.add(name, typedef); 717 | return Ok(()); 718 | } 719 | 720 | let init = if self.lexer.skip_symbol(Symbol::Assign)? { 721 | Some(Box::new(self.read_decl_init(&mut ty)?)) 722 | } else { 723 | None 724 | }; 725 | self.env.add( 726 | name.clone(), 727 | AST::new(ASTKind::Variable(ty.clone(), name.clone()), Pos::new(0, 0)), 728 | ); 729 | ast.push(AST::new( 730 | ASTKind::VariableDecl(ty, name, sclass.clone(), init), 731 | self.lexer.get_cur_pos(), 732 | )); 733 | } 734 | 735 | if self.lexer.skip_symbol(Symbol::Semicolon)? { 736 | return Ok(()); 737 | } 738 | if !self.lexer.skip_symbol(Symbol::Comma)? { 739 | let peek = self.lexer.get()?; 740 | self.show_error_token(&peek, "expected ','"); 741 | self.skip_until(Symbol::Semicolon); 742 | return Err(Error::Something); 743 | } 744 | } 745 | } 746 | fn read_opt_decl_or_stmt(&mut self) -> ParseR { 747 | if self.lexer.skip_symbol(Symbol::Semicolon)? { 748 | return Ok(AST::new(ASTKind::Compound(Vec::new()), Pos::new(0, 0))); 749 | } 750 | 751 | let peek_tok = self.lexer.peek()?; 752 | if self.is_type(&peek_tok) { 753 | // variable declaration 754 | let mut stmts = Vec::new(); 755 | let pos = self.lexer.get_cur_pos(); 756 | self.read_decl(&mut stmts)?; 757 | Ok(AST::new(ASTKind::Compound(stmts), pos)) 758 | } else { 759 | self.read_stmt() 760 | } 761 | } 762 | // returns (declarator type, name, params{for function}) 763 | fn read_declarator(&mut self, basety: Type) -> ParseR<(Type, String, Option>)> { 764 | if self.lexer.skip_symbol(Symbol::OpeningParen)? { 765 | let peek_tok = self.lexer.peek()?; 766 | if self.is_type(&peek_tok) { 767 | let (ty, params) = self.read_declarator_func(basety)?; 768 | return Ok((ty, "".to_string(), params)); 769 | } 770 | 771 | // TODO: HUH? MAKES NO SENSE!! 772 | let mut buf: Vec = Vec::new(); 773 | while !self.lexer.skip_symbol(Symbol::ClosingParen)? { 774 | buf.push(self.lexer.get()?); 775 | } 776 | let t = self.read_declarator_tail(basety)?; 777 | self.lexer.unget_all(&buf); 778 | return self.read_declarator(t.0); 779 | } 780 | 781 | if self.lexer.skip_symbol(Symbol::Asterisk)? { 782 | self.skip_type_qualifiers()?; 783 | return self.read_declarator(Type::Ptr(Box::new(basety.clone()))); 784 | } 785 | 786 | let tok = self.lexer.get()?; 787 | 788 | if let &TokenKind::Identifier(ref name) = &tok.kind { 789 | let (ty, params) = self.read_declarator_tail(basety)?; 790 | return Ok((ty, name.to_string(), params)); 791 | } 792 | 793 | self.lexer.unget(tok); 794 | let (ty, params) = self.read_declarator_tail(basety)?; 795 | Ok((ty, "".to_string(), params)) 796 | } 797 | fn read_declarator_tail(&mut self, basety: Type) -> ParseR<(Type, Option>)> { 798 | if self.lexer.skip_symbol(Symbol::OpeningBoxBracket)? { 799 | return Ok((self.read_declarator_array(basety)?, None)); 800 | } 801 | if self.lexer.skip_symbol(Symbol::OpeningParen)? { 802 | return self.read_declarator_func(basety); 803 | } 804 | Ok((basety, None)) 805 | } 806 | 807 | fn read_declarator_array(&mut self, basety: Type) -> ParseR { 808 | let len: i32; 809 | if self.lexer.skip_symbol(Symbol::ClosingBoxBracket)? { 810 | len = -1; 811 | } else { 812 | len = match self.read_expr()?.eval_constexpr() { 813 | Ok(len) => len as i32, 814 | Err(Error::Something) => { 815 | let peek = self.lexer.peek()?; 816 | self.show_error_token(&peek, "array size must be constant"); 817 | 0 818 | } 819 | Err(e) => return Err(e), 820 | }; 821 | expect_symbol_error!(self, Symbol::ClosingBoxBracket, "expected ']'"); 822 | } 823 | let ty = self.read_declarator_tail(basety)?.0; 824 | Ok(Type::Array(Box::new(ty), len)) 825 | } 826 | fn read_declarator_func(&mut self, retty: Type) -> ParseR<(Type, Option>)> { 827 | if self.lexer.peek_keyword_token_is(Keyword::Void)? 828 | && self.lexer.next_symbol_token_is(Symbol::ClosingParen)? 829 | { 830 | self.lexer.expect_skip_keyword(Keyword::Void)?; 831 | self.lexer.expect_skip_symbol(Symbol::ClosingParen)?; 832 | return Ok((Type::Func(Box::new(retty), Vec::new(), false), None)); 833 | } 834 | if self.lexer.skip_symbol(Symbol::ClosingParen)? { 835 | return Ok((Type::Func(Box::new(retty), Vec::new(), false), None)); 836 | } 837 | 838 | let (paramtypes, paramnames, vararg) = self.read_declarator_params()?; 839 | Ok(( 840 | Type::Func(Box::new(retty), paramtypes, vararg), 841 | Some(paramnames), 842 | )) 843 | } 844 | // returns (param types, param names, vararg?) 845 | fn read_declarator_params(&mut self) -> ParseR<(Vec, Vec, bool)> { 846 | let mut paramtypes: Vec = Vec::new(); 847 | let mut paramnames: Vec = Vec::new(); 848 | loop { 849 | if self.lexer.skip_symbol(Symbol::Vararg)? { 850 | if paramtypes.len() == 0 { 851 | let peek = self.lexer.peek(); 852 | self.show_error_token(&peek?, "at least one param is required before '...'"); 853 | return Err(Error::Something); 854 | } 855 | expect_symbol_error!(self, Symbol::ClosingParen, "expected ')'"); 856 | return Ok((paramtypes, paramnames, true)); 857 | } 858 | 859 | let (ty, name) = self.read_func_param()?; 860 | 861 | // if reading a parameter of a function to define 862 | if self.env.is_local() { 863 | self.env.add( 864 | name.clone(), 865 | AST::new(ASTKind::Variable(ty.clone(), name.clone()), Pos::new(0, 0)), 866 | ); 867 | } 868 | paramtypes.push(ty); 869 | paramnames.push(name); 870 | if self.lexer.skip_symbol(Symbol::ClosingParen)? { 871 | return Ok((paramtypes, paramnames, false)); 872 | } 873 | if !self.lexer.skip_symbol(Symbol::Comma)? { 874 | let peek = self.lexer.peek(); 875 | self.show_error_token(&peek?, "expected ','"); 876 | self.skip_until(Symbol::ClosingParen); 877 | return Err(Error::Something); 878 | } 879 | } 880 | } 881 | fn read_func_param(&mut self) -> ParseR<(Type, String)> { 882 | let basety = self.read_type_spec()?.0; 883 | let (ty, name, _) = self.read_declarator(basety)?; 884 | match ty { 885 | Type::Array(subst, _) => Ok((Type::Ptr(subst), name)), 886 | Type::Func(_, _, _) => Ok((Type::Ptr(Box::new(ty)), name)), 887 | _ => Ok((ty, name)), 888 | } 889 | } 890 | fn read_type_spec(&mut self) -> ParseR<(Type, StorageClass, Qualifiers)> { 891 | #[derive(PartialEq, Debug, Clone)] 892 | enum Size { 893 | Short, 894 | Normal, 895 | Long, 896 | LLong, 897 | } 898 | #[derive(PartialEq, Debug, Clone)] 899 | enum PrimitiveType { 900 | Void, 901 | Char, 902 | Int, 903 | Float, 904 | Double, 905 | } 906 | 907 | let mut kind: Option = None; 908 | let mut sign: Option = None; 909 | let mut size = Size::Normal; 910 | let mut sclass = StorageClass::Auto; 911 | let mut userty: Option = None; 912 | let mut qualifiers = Qualifiers::new(); 913 | 914 | loop { 915 | let tok = self.lexer.get()?; 916 | 917 | if kind.is_none() { 918 | if let &TokenKind::Identifier(ref maybe_userty_name) = &tok.kind { 919 | let maybe_userty = self.get_typedef(maybe_userty_name)?; 920 | if maybe_userty.is_some() { 921 | return Ok((maybe_userty.unwrap(), sclass, qualifiers)); 922 | } 923 | } 924 | } 925 | if !matches!(tok.kind, TokenKind::Keyword(_)) { 926 | self.lexer.unget(tok); 927 | break; 928 | } 929 | 930 | if let TokenKind::Keyword(keyw) = tok.kind { 931 | match &keyw { 932 | &Keyword::Typedef => sclass = StorageClass::Typedef, 933 | &Keyword::Extern => sclass = StorageClass::Extern, 934 | &Keyword::Static => sclass = StorageClass::Static, 935 | &Keyword::Auto => sclass = StorageClass::Auto, 936 | &Keyword::Register => sclass = StorageClass::Register, 937 | &Keyword::Const => qualifiers.q_const = true, 938 | &Keyword::ConstExpr => qualifiers.q_constexpr = true, 939 | &Keyword::Volatile => qualifiers.q_volatile = true, 940 | &Keyword::Inline => qualifiers.q_inline = true, 941 | &Keyword::Restrict => qualifiers.q_restrict = true, 942 | &Keyword::Noreturn => qualifiers.q_noreturn = true, 943 | &Keyword::Void => { 944 | if kind.is_some() { 945 | let peek = self.lexer.peek(); 946 | self.show_error_token(&peek?, "type mismatch"); 947 | } 948 | kind = Some(PrimitiveType::Void); 949 | } 950 | &Keyword::Char => { 951 | if kind.is_some() { 952 | let peek = self.lexer.peek(); 953 | self.show_error_token(&peek?, "type mismatch"); 954 | } 955 | kind = Some(PrimitiveType::Char); 956 | } 957 | &Keyword::Int => { 958 | if kind.is_some() { 959 | let peek = self.lexer.peek(); 960 | self.show_error_token(&peek?, "type mismatch"); 961 | } 962 | kind = Some(PrimitiveType::Int); 963 | } 964 | &Keyword::Float => { 965 | if kind.is_some() { 966 | let peek = self.lexer.peek(); 967 | self.show_error_token(&peek?, "type mismatch"); 968 | } 969 | kind = Some(PrimitiveType::Float); 970 | } 971 | &Keyword::Double => { 972 | if kind.is_some() { 973 | let peek = self.lexer.peek(); 974 | self.show_error_token(&peek?, "type mismatch"); 975 | } 976 | kind = Some(PrimitiveType::Double); 977 | } 978 | &Keyword::Signed => { 979 | if sign.is_some() { 980 | let peek = self.lexer.peek(); 981 | self.show_error_token(&peek?, "type mismatch"); 982 | }; 983 | 984 | sign = Some(Sign::Signed); 985 | } 986 | &Keyword::Unsigned => { 987 | if sign.is_some() { 988 | let peek = self.lexer.peek(); 989 | self.show_error_token(&peek?, "type mismatch"); 990 | }; 991 | 992 | sign = Some(Sign::Unsigned); 993 | } 994 | &Keyword::Short => size = Size::Short, 995 | &Keyword::Long => { 996 | if size == Size::Normal { 997 | size = Size::Long; 998 | } else if size == Size::Long { 999 | size = Size::LLong; 1000 | } 1001 | } 1002 | &Keyword::Struct => userty = Some(self.read_struct_def()?), 1003 | &Keyword::Union => userty = Some(self.read_union_def()?), 1004 | &Keyword::Enum => userty = Some(self.read_enum_def()?), 1005 | _ => {} 1006 | } 1007 | } else { 1008 | self.lexer.unget(tok); 1009 | break; 1010 | } 1011 | } 1012 | 1013 | // if sign is not expected, 1014 | // default is Signed 1015 | if sign.is_none() { 1016 | sign = Some(Sign::Signed); 1017 | } 1018 | 1019 | // TODO: add err handler 1020 | if userty.is_some() { 1021 | return Ok((userty.unwrap(), sclass, qualifiers)); 1022 | } 1023 | 1024 | if kind.is_some() { 1025 | match kind.unwrap() { 1026 | PrimitiveType::Void => return Ok((Type::Void, sclass, qualifiers)), 1027 | PrimitiveType::Char => return Ok((Type::Char(sign.unwrap()), sclass, qualifiers)), 1028 | PrimitiveType::Float => return Ok((Type::Float, sclass, qualifiers)), 1029 | PrimitiveType::Double => return Ok((Type::Double, sclass, qualifiers)), 1030 | _ => {} 1031 | } 1032 | } 1033 | 1034 | let ty = match size { 1035 | Size::Short => Type::Short(sign.unwrap()), 1036 | Size::Normal => Type::Int(sign.unwrap()), 1037 | Size::Long => Type::Long(sign.unwrap()), 1038 | Size::LLong => Type::LLong(sign.unwrap()), 1039 | }; 1040 | 1041 | Ok((ty, sclass, qualifiers)) 1042 | } 1043 | 1044 | fn read_struct_def(&mut self) -> ParseR { 1045 | self.read_rectype_def(true) 1046 | } 1047 | fn read_union_def(&mut self) -> ParseR { 1048 | self.read_rectype_def(false) 1049 | } 1050 | // rectype is abbreviation of 'record type' 1051 | fn read_rectype_tag(&mut self) -> ParseR> { 1052 | let maybe_tag = self.lexer.get()?; 1053 | if let TokenKind::Identifier(maybe_tag_name) = maybe_tag.kind { 1054 | Ok(Some(maybe_tag_name)) 1055 | } else { 1056 | self.lexer.unget(maybe_tag); 1057 | Ok(None) 1058 | } 1059 | } 1060 | fn read_rectype_def(&mut self, is_struct: bool) -> ParseR { 1061 | let tag = { 1062 | let opt_tag = self.read_rectype_tag()?; 1063 | if opt_tag.is_some() { 1064 | opt_tag.unwrap() 1065 | } else { 1066 | // if the rectype(struct|union) has no name(e.g. typedef struct { int a; } A), 1067 | // generate a random name 1068 | rand::thread_rng().gen_ascii_chars().take(8).collect() 1069 | } 1070 | }; 1071 | 1072 | let fields = self.read_rectype_fields()?; 1073 | let cur_tags = self.tags.back_mut().unwrap(); 1074 | 1075 | if fields.is_empty() { 1076 | Ok(match cur_tags.entry(tag) { 1077 | hash_map::Entry::Occupied(o) => o.get().clone(), 1078 | hash_map::Entry::Vacant(v) => { 1079 | let new_struct = if is_struct { 1080 | Type::Struct(v.key().to_string(), Vec::new()) 1081 | } else { 1082 | Type::Union(v.key().to_string(), Vec::new(), 0) 1083 | }; 1084 | v.insert(new_struct).clone() 1085 | } 1086 | }) 1087 | } else { 1088 | let new_rectype = if is_struct { 1089 | Type::Struct(tag.to_string(), fields) 1090 | } else { 1091 | // if union 1092 | let mut max_sz_ty_nth = 0; 1093 | let mut max_sz = 0; 1094 | for (i, field_decl) in (&fields).iter().enumerate() { 1095 | if let ASTKind::VariableDecl(ref ty, _, _, _) = field_decl.kind { 1096 | if ty.calc_size() > max_sz { 1097 | max_sz = ty.calc_size(); 1098 | max_sz_ty_nth = i; 1099 | } 1100 | } 1101 | } 1102 | Type::Union(tag.to_string(), fields, max_sz_ty_nth) 1103 | }; 1104 | Ok(match cur_tags.entry(tag) { 1105 | hash_map::Entry::Occupied(o) => { 1106 | *o.into_mut() = new_rectype.clone(); 1107 | new_rectype 1108 | } 1109 | hash_map::Entry::Vacant(v) => v.insert(new_rectype).clone(), 1110 | }) 1111 | } 1112 | } 1113 | fn read_rectype_fields(&mut self) -> ParseR> { 1114 | if !self.lexer.skip_symbol(Symbol::OpeningBrace)? { 1115 | return Ok(Vec::new()); 1116 | } 1117 | 1118 | let mut decls: Vec = Vec::new(); 1119 | loop { 1120 | let peek = self.lexer.peek()?; 1121 | if !self.is_type(&peek) { 1122 | break; 1123 | } 1124 | let (basety, _, _) = self.read_type_spec()?; 1125 | loop { 1126 | let (ty, name, _) = self.read_declarator(basety.clone())?; 1127 | if self.lexer.skip_symbol(Symbol::Colon)? { 1128 | // TODO: for now, designated bitwidth ignore 1129 | self.read_expr()?; 1130 | } 1131 | decls.push(AST::new( 1132 | ASTKind::VariableDecl(ty, name, StorageClass::Auto, None), 1133 | self.lexer.get_cur_pos(), 1134 | )); 1135 | if self.lexer.skip_symbol(Symbol::Comma)? { 1136 | continue; 1137 | } else { 1138 | expect_symbol_error!(self, Symbol::Semicolon, "expected ';'"); 1139 | } 1140 | break; 1141 | } 1142 | } 1143 | expect_symbol_error!(self, Symbol::ClosingBrace, "expected '}'"); 1144 | Ok(decls) 1145 | } 1146 | fn read_enum_def(&mut self) -> ParseR { 1147 | let (tag, exist_tag) = { 1148 | let opt_tag = self.read_rectype_tag()?; 1149 | if opt_tag.is_some() { 1150 | (opt_tag.unwrap(), true) 1151 | } else { 1152 | ("".to_string(), false) 1153 | } 1154 | }; 1155 | if exist_tag { 1156 | match self.tags.get(tag.as_str()) { 1157 | Some(&Type::Enum) => {} 1158 | None => {} 1159 | _ => { 1160 | let peek = self.lexer.peek(); 1161 | self.show_error_token(&peek?, "undefined enum"); 1162 | return Err(Error::Something); 1163 | } 1164 | } 1165 | } 1166 | 1167 | if !self.lexer.skip_symbol(Symbol::OpeningBrace)? { 1168 | if !exist_tag || !self.tags.contains(tag.as_str()) { 1169 | let peek = self.lexer.peek(); 1170 | self.show_error_token(&peek?, "do not redefine enum"); 1171 | return Err(Error::Something); 1172 | } 1173 | return Ok(Type::Int(Sign::Signed)); 1174 | } 1175 | 1176 | if exist_tag { 1177 | self.tags.add(tag, Type::Enum); 1178 | } 1179 | 1180 | let mut val = 0; 1181 | loop { 1182 | if self.lexer.skip_symbol(Symbol::ClosingBrace)? { 1183 | break; 1184 | } 1185 | let name = ident_val!(self.lexer.get()?); 1186 | if self.lexer.skip_symbol(Symbol::Assign)? { 1187 | val = match self.read_assign()?.eval_constexpr() { 1188 | Ok(val) => val, 1189 | Err(Error::Something) => { 1190 | let peek = self.lexer.peek()?; 1191 | self.show_error_token(&peek, "enum initialize value must be constant"); 1192 | 0 1193 | } 1194 | Err(e) => return Err(e), 1195 | }; 1196 | } 1197 | let constval = AST::new(ASTKind::Int(val, Bits::Bits32), self.lexer.get_cur_pos()); 1198 | val += 1; 1199 | self.env.add(name, constval); 1200 | if self.lexer.skip_symbol(Symbol::Comma)? { 1201 | continue; 1202 | } 1203 | if self.lexer.skip_symbol(Symbol::OpeningBrace)? { 1204 | break; 1205 | } 1206 | } 1207 | 1208 | Ok(Type::Int(Sign::Signed)) 1209 | } 1210 | 1211 | pub fn read_expr(&mut self) -> ParseR { 1212 | self.read_comma() 1213 | } 1214 | pub fn read_opt_expr(&mut self) -> ParseR { 1215 | if self.lexer.peek()?.kind == TokenKind::Symbol(Symbol::Semicolon) { 1216 | Ok(AST::new( 1217 | ASTKind::Compound(Vec::new()), 1218 | self.lexer.get_cur_pos(), 1219 | )) 1220 | } else { 1221 | self.read_expr() 1222 | } 1223 | } 1224 | fn read_comma(&mut self) -> ParseR { 1225 | let mut lhs = self.read_assign()?; 1226 | while self.lexer.skip_symbol(Symbol::Comma)? { 1227 | let rhs = self.read_assign()?; 1228 | lhs = AST::new( 1229 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Comma), 1230 | self.lexer.get_cur_pos(), 1231 | ) 1232 | } 1233 | Ok(lhs) 1234 | } 1235 | fn read_assign(&mut self) -> ParseR { 1236 | let mut lhs = self.read_logor()?; 1237 | if self.lexer.skip_symbol(Symbol::Question)? { 1238 | return self.read_ternary(lhs); 1239 | } 1240 | macro_rules! assign { 1241 | ($lhs:expr, $rhs:expr, $pos:expr) => { 1242 | AST::new( 1243 | ASTKind::BinaryOp(Box::new($lhs), Box::new($rhs), node::CBinOps::Assign), 1244 | $pos, 1245 | ) 1246 | }; 1247 | } 1248 | macro_rules! f { 1249 | ($op:ident) => { 1250 | lhs = assign!( 1251 | lhs.clone(), 1252 | AST::new( 1253 | ASTKind::BinaryOp( 1254 | Box::new(lhs), 1255 | Box::new(self.read_assign()?), 1256 | node::CBinOps::$op, 1257 | ), 1258 | self.lexer.get_cur_pos(), 1259 | ), 1260 | self.lexer.get_cur_pos() 1261 | ) 1262 | }; 1263 | } 1264 | loop { 1265 | let tok = self.lexer.get()?; 1266 | match tok.kind { 1267 | TokenKind::Symbol(Symbol::Assign) => { 1268 | lhs = assign!(lhs, self.read_assign()?, self.lexer.get_cur_pos()); 1269 | } 1270 | TokenKind::Symbol(Symbol::AssignAdd) => f!(Add), 1271 | TokenKind::Symbol(Symbol::AssignSub) => f!(Sub), 1272 | TokenKind::Symbol(Symbol::AssignMul) => f!(Mul), 1273 | TokenKind::Symbol(Symbol::AssignDiv) => f!(Div), 1274 | TokenKind::Symbol(Symbol::AssignMod) => f!(Rem), 1275 | TokenKind::Symbol(Symbol::AssignShl) => f!(Shl), 1276 | TokenKind::Symbol(Symbol::AssignShr) => f!(Shr), 1277 | TokenKind::Symbol(Symbol::AssignAnd) => f!(And), 1278 | TokenKind::Symbol(Symbol::AssignOr) => f!(Or), 1279 | TokenKind::Symbol(Symbol::AssignXor) => f!(Xor), 1280 | // TODO: implement more op 1281 | _ => { 1282 | self.lexer.unget(tok); 1283 | break; 1284 | } 1285 | } 1286 | } 1287 | Ok(lhs) 1288 | } 1289 | fn read_ternary(&mut self, cond: AST) -> ParseR { 1290 | let mut then_expr = self.read_expr()?; 1291 | expect_symbol_error!(self, Symbol::Colon, "expected ':'"); 1292 | let mut else_expr = self.read_assign()?; 1293 | let then_ty = self.get_expr_returning_ty(&then_expr)?; 1294 | let else_ty = self.get_expr_returning_ty(&else_expr)?; 1295 | if then_ty.is_arith_ty() && else_ty.is_arith_ty() { 1296 | let ty = self.usual_binary_ty_cov(then_ty, else_ty); 1297 | then_expr = self.cast_ast(&then_expr, &ty); 1298 | else_expr = self.cast_ast(&else_expr, &ty); 1299 | } 1300 | Ok(AST::new( 1301 | ASTKind::TernaryOp(Box::new(cond), Box::new(then_expr), Box::new(else_expr)), 1302 | self.lexer.get_cur_pos(), 1303 | )) 1304 | } 1305 | fn read_logor(&mut self) -> ParseR { 1306 | let mut lhs = self.read_logand()?; 1307 | while self.lexer.skip_symbol(Symbol::LOr)? { 1308 | let rhs = self.read_logand()?; 1309 | lhs = AST::new( 1310 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::LOr), 1311 | self.lexer.get_cur_pos(), 1312 | ); 1313 | } 1314 | Ok(lhs) 1315 | } 1316 | fn read_logand(&mut self) -> ParseR { 1317 | let mut lhs = self.read_or()?; 1318 | while self.lexer.skip_symbol(Symbol::LAnd)? { 1319 | let rhs = self.read_or()?; 1320 | lhs = AST::new( 1321 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::LAnd), 1322 | self.lexer.get_cur_pos(), 1323 | ); 1324 | } 1325 | Ok(lhs) 1326 | } 1327 | fn read_or(&mut self) -> ParseR { 1328 | let mut lhs = self.read_xor()?; 1329 | while self.lexer.skip_symbol(Symbol::Or)? { 1330 | let rhs = self.read_xor()?; 1331 | lhs = AST::new( 1332 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Or), 1333 | self.lexer.get_cur_pos(), 1334 | ); 1335 | } 1336 | Ok(lhs) 1337 | } 1338 | fn read_xor(&mut self) -> ParseR { 1339 | let mut lhs = self.read_and()?; 1340 | while self.lexer.skip_symbol(Symbol::Xor)? { 1341 | let rhs = self.read_and()?; 1342 | lhs = AST::new( 1343 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Xor), 1344 | self.lexer.get_cur_pos(), 1345 | ); 1346 | } 1347 | Ok(lhs) 1348 | } 1349 | fn read_and(&mut self) -> ParseR { 1350 | let mut lhs = self.read_eq_ne()?; 1351 | while self.lexer.skip_symbol(Symbol::Ampersand)? { 1352 | let rhs = self.read_eq_ne()?; 1353 | lhs = AST::new( 1354 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::And), 1355 | self.lexer.get_cur_pos(), 1356 | ); 1357 | } 1358 | Ok(lhs) 1359 | } 1360 | fn read_eq_ne(&mut self) -> ParseR { 1361 | let mut lhs = self.read_relation()?; 1362 | loop { 1363 | if self.lexer.skip_symbol(Symbol::Eq)? { 1364 | let rhs = self.read_relation()?; 1365 | lhs = AST::new( 1366 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Eq), 1367 | self.lexer.get_cur_pos(), 1368 | ); 1369 | } else if self.lexer.skip_symbol(Symbol::Ne)? { 1370 | let rhs = self.read_relation()?; 1371 | lhs = AST::new( 1372 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Ne), 1373 | self.lexer.get_cur_pos(), 1374 | ); 1375 | } else { 1376 | break; 1377 | } 1378 | } 1379 | Ok(lhs) 1380 | } 1381 | fn read_relation(&mut self) -> ParseR { 1382 | let mut lhs = self.read_shl_shr()?; 1383 | loop { 1384 | if self.lexer.skip_symbol(Symbol::Lt)? { 1385 | let rhs = self.read_shl_shr()?; 1386 | lhs = AST::new( 1387 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Lt), 1388 | self.lexer.get_cur_pos(), 1389 | ); 1390 | } else if self.lexer.skip_symbol(Symbol::Le)? { 1391 | let rhs = self.read_shl_shr()?; 1392 | lhs = AST::new( 1393 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Le), 1394 | self.lexer.get_cur_pos(), 1395 | ); 1396 | } else if self.lexer.skip_symbol(Symbol::Gt)? { 1397 | let rhs = self.read_shl_shr()?; 1398 | lhs = AST::new( 1399 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Gt), 1400 | self.lexer.get_cur_pos(), 1401 | ); 1402 | } else if self.lexer.skip_symbol(Symbol::Ge)? { 1403 | let rhs = self.read_shl_shr()?; 1404 | lhs = AST::new( 1405 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Ge), 1406 | self.lexer.get_cur_pos(), 1407 | ); 1408 | } else { 1409 | break; 1410 | } 1411 | } 1412 | Ok(lhs) 1413 | } 1414 | fn read_shl_shr(&mut self) -> ParseR { 1415 | let mut lhs = self.read_add_sub()?; 1416 | loop { 1417 | if self.lexer.skip_symbol(Symbol::Shl)? { 1418 | let rhs = self.read_add_sub()?; 1419 | lhs = AST::new( 1420 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Shl), 1421 | self.lexer.get_cur_pos(), 1422 | ); 1423 | } else if self.lexer.skip_symbol(Symbol::Shr)? { 1424 | let rhs = self.read_add_sub()?; 1425 | lhs = AST::new( 1426 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Shr), 1427 | self.lexer.get_cur_pos(), 1428 | ); 1429 | } else { 1430 | break; 1431 | } 1432 | } 1433 | Ok(lhs) 1434 | } 1435 | fn read_add_sub(&mut self) -> ParseR { 1436 | let mut lhs = self.read_mul_div_rem()?; 1437 | loop { 1438 | if self.lexer.skip_symbol(Symbol::Add)? { 1439 | let rhs = self.read_mul_div_rem()?; 1440 | lhs = AST::new( 1441 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Add), 1442 | self.lexer.get_cur_pos(), 1443 | ); 1444 | } else if self.lexer.skip_symbol(Symbol::Sub)? { 1445 | let rhs = self.read_mul_div_rem()?; 1446 | lhs = AST::new( 1447 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Sub), 1448 | self.lexer.get_cur_pos(), 1449 | ); 1450 | } else { 1451 | break; 1452 | } 1453 | } 1454 | Ok(lhs) 1455 | } 1456 | fn read_mul_div_rem(&mut self) -> ParseR { 1457 | let mut lhs = self.read_cast()?; 1458 | loop { 1459 | if self.lexer.skip_symbol(Symbol::Asterisk)? { 1460 | let rhs = self.read_cast()?; 1461 | lhs = AST::new( 1462 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Mul), 1463 | self.lexer.get_cur_pos(), 1464 | ); 1465 | } else if self.lexer.skip_symbol(Symbol::Div)? { 1466 | let rhs = self.read_cast()?; 1467 | lhs = AST::new( 1468 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Div), 1469 | self.lexer.get_cur_pos(), 1470 | ); 1471 | } else if self.lexer.skip_symbol(Symbol::Mod)? { 1472 | let rhs = self.read_cast()?; 1473 | lhs = AST::new( 1474 | ASTKind::BinaryOp(Box::new(lhs), Box::new(rhs), node::CBinOps::Rem), 1475 | self.lexer.get_cur_pos(), 1476 | ); 1477 | } else { 1478 | break; 1479 | } 1480 | } 1481 | Ok(lhs) 1482 | } 1483 | fn read_cast(&mut self) -> ParseR { 1484 | let tok = self.lexer.get()?; 1485 | let peek = self.lexer.peek()?; 1486 | if tok.kind == TokenKind::Symbol(Symbol::OpeningParen) && self.is_type(&peek) { 1487 | let basety = self.read_type_spec()?.0; 1488 | let ty = self.read_declarator(basety)?.0; 1489 | expect_symbol_error!(self, Symbol::ClosingParen, "expected ')'"); 1490 | return Ok(AST::new( 1491 | ASTKind::TypeCast(Box::new(self.read_cast()?), ty), 1492 | self.lexer.get_cur_pos(), 1493 | )); 1494 | } else { 1495 | self.lexer.unget(tok); 1496 | } 1497 | self.read_unary() 1498 | } 1499 | fn read_unary(&mut self) -> ParseR { 1500 | let tok = self.lexer.get()?; 1501 | match tok.kind { 1502 | TokenKind::Symbol(Symbol::Not) => { 1503 | return Ok(AST::new( 1504 | ASTKind::UnaryOp(Box::new(self.read_cast()?), node::CUnaryOps::LNot), 1505 | self.lexer.get_cur_pos(), 1506 | )) 1507 | } 1508 | TokenKind::Symbol(Symbol::BitwiseNot) => { 1509 | return Ok(AST::new( 1510 | ASTKind::UnaryOp(Box::new(self.read_cast()?), node::CUnaryOps::BNot), 1511 | self.lexer.get_cur_pos(), 1512 | )) 1513 | } 1514 | TokenKind::Symbol(Symbol::Add) => return self.read_cast(), 1515 | TokenKind::Symbol(Symbol::Sub) => { 1516 | return Ok(AST::new( 1517 | ASTKind::UnaryOp(Box::new(self.read_cast()?), node::CUnaryOps::Minus), 1518 | self.lexer.get_cur_pos(), 1519 | )) 1520 | } 1521 | TokenKind::Symbol(Symbol::Inc) => { 1522 | let pos = self.lexer.get_cur_pos(); 1523 | let var = self.read_cast()?; 1524 | return Ok(AST::new( 1525 | ASTKind::BinaryOp( 1526 | Box::new(var.clone()), 1527 | Box::new(AST::new( 1528 | ASTKind::BinaryOp( 1529 | Box::new(var), 1530 | Box::new(AST::new(ASTKind::Int(1, Bits::Bits32), pos.clone())), 1531 | node::CBinOps::Add, 1532 | ), 1533 | pos.clone(), 1534 | )), 1535 | node::CBinOps::Assign, 1536 | ), 1537 | pos, 1538 | )); 1539 | } 1540 | TokenKind::Symbol(Symbol::Dec) => { 1541 | let pos = self.lexer.get_cur_pos(); 1542 | let var = self.read_cast()?; 1543 | return Ok(AST::new( 1544 | ASTKind::BinaryOp( 1545 | Box::new(var.clone()), 1546 | Box::new(AST::new( 1547 | ASTKind::BinaryOp( 1548 | Box::new(var), 1549 | Box::new(AST::new(ASTKind::Int(1, Bits::Bits32), pos.clone())), 1550 | node::CBinOps::Sub, 1551 | ), 1552 | pos.clone(), 1553 | )), 1554 | node::CBinOps::Assign, 1555 | ), 1556 | pos, 1557 | )); 1558 | } 1559 | TokenKind::Symbol(Symbol::Asterisk) => { 1560 | return Ok(AST::new( 1561 | ASTKind::UnaryOp(Box::new(self.read_cast()?), node::CUnaryOps::Deref), 1562 | self.lexer.get_cur_pos(), 1563 | )) 1564 | } 1565 | TokenKind::Symbol(Symbol::Ampersand) => { 1566 | return Ok(AST::new( 1567 | ASTKind::UnaryOp(Box::new(self.read_cast()?), node::CUnaryOps::Addr), 1568 | self.lexer.get_cur_pos(), 1569 | )) 1570 | } 1571 | TokenKind::Symbol(Symbol::Sizeof) => { 1572 | // TODO: must fix this sloppy implementation 1573 | return self.read_sizeof(); 1574 | } 1575 | _ => {} 1576 | } 1577 | self.lexer.unget(tok); 1578 | self.read_postfix() 1579 | } 1580 | fn read_sizeof(&mut self) -> ParseR { 1581 | let tok = self.lexer.get()?; 1582 | let peek = self.lexer.peek()?; 1583 | if matches!(tok.kind, TokenKind::Symbol(Symbol::OpeningParen)) && self.is_type(&peek) { 1584 | let (basety, _, _) = self.read_type_spec()?; 1585 | let (ty, _, _) = self.read_declarator(basety)?; 1586 | self.lexer.skip_symbol(Symbol::ClosingParen)?; 1587 | return Ok(AST::new( 1588 | ASTKind::Int(ty.calc_size() as i64, Bits::Bits32), 1589 | self.lexer.get_cur_pos(), 1590 | )); 1591 | } 1592 | self.lexer.unget(tok); 1593 | let expr = self.read_unary()?; 1594 | Ok(AST::new( 1595 | ASTKind::Int(self.calc_sizeof(&expr)? as i64, Bits::Bits32), 1596 | self.lexer.get_cur_pos(), 1597 | )) 1598 | } 1599 | fn read_postfix(&mut self) -> ParseR { 1600 | let mut ast = self.read_primary()?; 1601 | loop { 1602 | if self.lexer.skip_symbol(Symbol::OpeningParen)? { 1603 | ast = self.read_func_call(ast)?; 1604 | continue; 1605 | } 1606 | if self.lexer.skip_symbol(Symbol::OpeningBoxBracket)? { 1607 | ast = AST::new( 1608 | ASTKind::Load(Box::new(self.read_index(ast)?)), 1609 | self.lexer.get_cur_pos(), 1610 | ); 1611 | continue; 1612 | } 1613 | if self.lexer.skip_symbol(Symbol::Point)? { 1614 | ast = AST::new( 1615 | ASTKind::Load(Box::new(self.read_field(ast)?)), 1616 | self.lexer.get_cur_pos(), 1617 | ); 1618 | continue; 1619 | } 1620 | if self.lexer.skip_symbol(Symbol::Arrow)? { 1621 | let pos = self.lexer.get_cur_pos(); 1622 | let field = self.read_field(AST::new( 1623 | ASTKind::UnaryOp(Box::new(ast), node::CUnaryOps::Deref), 1624 | pos.clone(), 1625 | ))?; 1626 | ast = AST::new(ASTKind::Load(Box::new(field)), pos); 1627 | continue; 1628 | } 1629 | if self.lexer.skip_symbol(Symbol::Inc)? { 1630 | return Ok(AST::new( 1631 | ASTKind::UnaryOp(Box::new(ast), node::CUnaryOps::Inc), 1632 | self.lexer.get_cur_pos(), 1633 | )); 1634 | } 1635 | if self.lexer.skip_symbol(Symbol::Dec)? { 1636 | return Ok(AST::new( 1637 | ASTKind::UnaryOp(Box::new(ast), node::CUnaryOps::Dec), 1638 | self.lexer.get_cur_pos(), 1639 | )); 1640 | } 1641 | break; 1642 | } 1643 | Ok(ast) 1644 | } 1645 | fn read_func_call(&mut self, f: AST) -> ParseR { 1646 | let pos = self.lexer.get_cur_pos(); 1647 | let mut args = Vec::new(); 1648 | if !self.lexer.skip_symbol(Symbol::ClosingParen)? { 1649 | loop { 1650 | match self.read_assign() { 1651 | Ok(arg) => args.push(arg), 1652 | Err(_) => {} 1653 | } 1654 | 1655 | if self.lexer.skip_symbol(Symbol::ClosingParen)? { 1656 | break; 1657 | } 1658 | if !self.lexer.skip_symbol(Symbol::Comma)? { 1659 | let peek = self.lexer.peek(); 1660 | self.show_error_token(&peek?, "expected ','"); 1661 | self.skip_until(Symbol::ClosingParen); 1662 | return Err(Error::Something); 1663 | } 1664 | } 1665 | } 1666 | 1667 | Ok(AST::new(ASTKind::FuncCall(Box::new(f), args), pos)) 1668 | } 1669 | fn read_index(&mut self, ast: AST) -> ParseR { 1670 | let idx = self.read_expr()?; 1671 | expect_symbol_error!(self, Symbol::ClosingBoxBracket, "expected ']'"); 1672 | Ok(AST::new( 1673 | ASTKind::BinaryOp(Box::new(ast), Box::new(idx), node::CBinOps::Add), 1674 | self.lexer.get_cur_pos(), 1675 | )) 1676 | } 1677 | 1678 | fn read_field(&mut self, ast: AST) -> ParseR { 1679 | let field = self.lexer.get()?; 1680 | if !matches!(field.kind, TokenKind::Identifier(_)) { 1681 | let peek = self.lexer.peek(); 1682 | self.show_error_token(&peek?, "expected field name"); 1683 | return Err(Error::Something); 1684 | } 1685 | 1686 | let field_name = ident_val!(field); 1687 | Ok(AST::new( 1688 | ASTKind::StructRef(Box::new(ast), field_name), 1689 | self.lexer.get_cur_pos(), 1690 | )) 1691 | } 1692 | 1693 | fn read_primary(&mut self) -> ParseR { 1694 | let tok = match self.lexer.get() { 1695 | Ok(tok) => tok, 1696 | Err(_) => { 1697 | let peek = self.lexer.peek(); 1698 | self.show_error_token(&peek?, "expected primary(number, string...), but reach EOF"); 1699 | return Err(Error::Something); 1700 | } 1701 | }; 1702 | 1703 | match tok.kind.clone() { 1704 | TokenKind::IntNumber(n, bits) => { 1705 | Ok(AST::new(ASTKind::Int(n, bits), self.lexer.get_cur_pos())) 1706 | } 1707 | TokenKind::FloatNumber(f) => Ok(AST::new(ASTKind::Float(f), self.lexer.get_cur_pos())), 1708 | TokenKind::Identifier(ident) => { 1709 | if let Some(ast) = self.env.get(ident.as_str()) { 1710 | return match ast.kind { 1711 | ASTKind::Variable(_, _) => Ok(AST::new( 1712 | ASTKind::Load(Box::new((*ast).clone())), 1713 | self.lexer.get_cur_pos(), 1714 | )), 1715 | _ => Ok((*ast).clone()), 1716 | }; 1717 | } 1718 | self.show_error_token( 1719 | &tok, 1720 | format!("not found the variable or function '{}'", ident).as_str(), 1721 | ); 1722 | Err(Error::Something) 1723 | } 1724 | TokenKind::String(s) => Ok(AST::new(ASTKind::String(s), self.lexer.get_cur_pos())), 1725 | TokenKind::Char(ch) => Ok(AST::new(ASTKind::Char(ch as i32), self.lexer.get_cur_pos())), 1726 | TokenKind::Symbol(sym) => match sym { 1727 | Symbol::OpeningParen => { 1728 | let expr = self.read_expr(); 1729 | if !self.lexer.skip_symbol(Symbol::ClosingParen)? { 1730 | self.show_error_token(&tok, "expected ')'"); 1731 | } 1732 | expr 1733 | } 1734 | _ => { 1735 | self.show_error_token( 1736 | &tok, 1737 | format!("expected primary section, but got {:?}", tok.kind).as_str(), 1738 | ); 1739 | Err(Error::Something) 1740 | } 1741 | }, 1742 | _ => { 1743 | self.show_error_token( 1744 | &tok, 1745 | format!("read_primary unknown token {:?}", tok.kind).as_str(), 1746 | ); 1747 | Err(Error::Something) 1748 | } 1749 | } 1750 | } 1751 | 1752 | fn usual_binary_ty_cov(&mut self, lhs: Type, rhs: Type) -> Type { 1753 | if lhs.priority() < rhs.priority() { 1754 | rhs 1755 | } else { 1756 | lhs 1757 | } 1758 | } 1759 | fn get_binary_expr_ty(&mut self, lhs: &AST, rhs: &AST, op: &node::CBinOps) -> ParseR { 1760 | fn cast(ty: Type) -> Type { 1761 | match ty { 1762 | Type::Array(elem_ty, _) => Type::Ptr(elem_ty), 1763 | Type::Func(_, _, _) => Type::Ptr(Box::new(ty)), 1764 | _ => ty, 1765 | } 1766 | } 1767 | let lhs_ty = cast(self.get_expr_returning_ty(lhs)?); 1768 | let rhs_ty = cast(self.get_expr_returning_ty(rhs)?); 1769 | if matches!(lhs_ty, Type::Ptr(_)) && matches!(rhs_ty, Type::Ptr(_)) { 1770 | if matches!(op, &node::CBinOps::Sub) { 1771 | return Ok(Type::Long(Sign::Signed)); 1772 | } 1773 | return Ok(Type::Int(Sign::Signed)); 1774 | } 1775 | if matches!(lhs_ty, Type::Ptr(_)) { 1776 | return Ok(lhs_ty); 1777 | } 1778 | if matches!(rhs_ty, Type::Ptr(_)) { 1779 | return Ok(rhs_ty); 1780 | } 1781 | return Ok(self.usual_binary_ty_cov(lhs_ty, rhs_ty)); 1782 | } 1783 | fn get_expr_returning_ty(&mut self, ast: &AST) -> ParseR { 1784 | let size = match ast.kind { 1785 | ASTKind::Int(_, Bits::Bits32) => Type::Int(Sign::Signed), 1786 | ASTKind::Int(_, Bits::Bits64) => Type::Long(Sign::Signed), 1787 | ASTKind::Float(_) => Type::Double, 1788 | ASTKind::Char(_) => Type::Char(Sign::Signed), 1789 | ASTKind::String(ref s) => { 1790 | Type::Array(Box::new(Type::Char(Sign::Signed)), s.len() as i32 + 1) 1791 | } 1792 | ASTKind::Load(ref v) => { 1793 | (*self.get_expr_returning_ty(&*v)?.get_elem_ty().unwrap()).clone() 1794 | } 1795 | ASTKind::Variable(ref ty, _) => Type::Ptr(Box::new((*ty).clone())), 1796 | ASTKind::UnaryOp(_, node::CUnaryOps::LNot) => Type::Int(Sign::Signed), 1797 | ASTKind::UnaryOp(ref expr, node::CUnaryOps::Minus) 1798 | | ASTKind::UnaryOp(ref expr, node::CUnaryOps::Inc) 1799 | | ASTKind::UnaryOp(ref expr, node::CUnaryOps::Dec) 1800 | | ASTKind::UnaryOp(ref expr, node::CUnaryOps::BNot) => { 1801 | self.get_expr_returning_ty(&*expr)? 1802 | } 1803 | ASTKind::UnaryOp(ref expr, node::CUnaryOps::Deref) => { 1804 | (*self.get_expr_returning_ty(&*expr)?.get_elem_ty().unwrap()).clone() 1805 | } 1806 | ASTKind::UnaryOp(ref expr, node::CUnaryOps::Addr) => { 1807 | Type::Ptr(Box::new(self.get_expr_returning_ty(&*expr)?)) 1808 | } 1809 | ASTKind::StructRef(ref expr, ref name) => { 1810 | let ty = self.get_expr_returning_ty(expr)?; 1811 | Type::Ptr(Box::new((*ty.get_field_ty(name.as_str()).unwrap()).clone())) 1812 | } 1813 | ASTKind::TypeCast(_, ref ty) => ty.clone(), 1814 | ASTKind::BinaryOp(ref lhs, ref rhs, ref op) => { 1815 | self.get_binary_expr_ty(&*lhs, &*rhs, &*op)? 1816 | } 1817 | ASTKind::TernaryOp(_, ref then, _) => self.get_expr_returning_ty(&*then)?, 1818 | ASTKind::FuncCall(ref func, _) => { 1819 | let func_ty = self.get_expr_returning_ty(func)?; 1820 | (*func_ty.get_return_ty().unwrap()).clone() 1821 | } 1822 | _ => panic!("unsupported: {:?}", ast.kind), 1823 | }; 1824 | Ok(size) 1825 | } 1826 | fn calc_sizeof(&mut self, ast: &AST) -> ParseR { 1827 | let ty = self.get_expr_returning_ty(ast)?; 1828 | Ok(ty.calc_size()) 1829 | } 1830 | 1831 | fn cast_ast(&mut self, expr: &AST, ty: &Type) -> AST { 1832 | AST::new( 1833 | ASTKind::TypeCast(Box::new(expr.clone()), ty.clone()), 1834 | expr.pos.clone(), 1835 | ) 1836 | } 1837 | } 1838 | -------------------------------------------------------------------------------- /src/types.rs: -------------------------------------------------------------------------------- 1 | use std::boxed::Box; 2 | use node::{ASTKind, AST}; 3 | 4 | #[derive(PartialEq, Debug, Clone, Hash)] 5 | pub enum Sign { 6 | Signed, 7 | Unsigned, 8 | } 9 | 10 | #[derive(PartialEq, Debug, Clone)] 11 | pub enum StorageClass { 12 | Typedef, 13 | Extern, 14 | Static, 15 | Auto, 16 | Register, 17 | } 18 | 19 | #[derive(Debug, Clone)] 20 | pub enum Type { 21 | Void, 22 | Char(Sign), 23 | Short(Sign), 24 | Int(Sign), 25 | Long(Sign), 26 | LLong(Sign), 27 | Float, 28 | Double, 29 | Ptr(Box), 30 | Array(Box, i32), // ary elem type, size 31 | Func(Box, Vec, bool), // return type, param types, vararg 32 | Struct(RectypeName, Vec), // name, fields 33 | Union(RectypeName, Vec, usize), // name, fields, means size of nth field is size of the union 34 | Enum, // as same as Int 35 | } 36 | 37 | pub type RectypeName = String; 38 | 39 | impl Type { 40 | pub fn get_elem_ty<'a>(&'a self) -> Option<&'a Type> { 41 | match self { 42 | &Type::Ptr(ref elem_ty) | &Type::Array(ref elem_ty, _) => Some(&**elem_ty), 43 | _ => None, 44 | } 45 | } 46 | pub fn get_return_ty<'a>(&'a self) -> Option<&'a Type> { 47 | match self { 48 | &Type::Func(ref ret_ty, _, _) => Some(&**ret_ty), 49 | _ => None, 50 | } 51 | } 52 | // pub fn get_params_count(&self) -> Option { 53 | // match self { 54 | // &Type::Func(_, ref params, _) => Some(params.len()), 55 | // _ => None, 56 | // } 57 | // } 58 | pub fn get_field_ty<'a>(&'a self, field_name: &str) -> Option<&'a Type> { 59 | match self { 60 | &Type::Struct(_, ref fields) | &Type::Union(_, ref fields, _) => { 61 | for field in fields { 62 | if let ASTKind::VariableDecl(ref ty, ref name, _, _) = field.kind { 63 | if *name == field_name { 64 | return Some(&*ty); 65 | } 66 | } 67 | } 68 | None 69 | } 70 | _ => None, 71 | } 72 | } 73 | pub fn get_all_fields_types<'a>(&'a self) -> Option> { 74 | match self { 75 | &Type::Struct(_, ref fields) | &Type::Union(_, ref fields, _) => { 76 | let fields_types = fields 77 | .iter() 78 | .map(|field| { 79 | if let ASTKind::VariableDecl(ref ty, _, _, _) = field.kind { 80 | &*ty 81 | } else { 82 | panic!() 83 | } 84 | }) 85 | .collect(); 86 | Some(fields_types) 87 | } 88 | _ => None, 89 | } 90 | } 91 | pub fn get_name(&self) -> Option { 92 | match self { 93 | &Type::Struct(ref name, _) | &Type::Union(ref name, _, _) => Some(name.to_owned()), 94 | _ => None, 95 | } 96 | } 97 | // TODO: any good name? 98 | pub fn conversion(self) -> Type { 99 | match self { 100 | Type::Array(elem_ty, _) => Type::Ptr(elem_ty), 101 | Type::Func(_, _, _) => Type::Ptr(Box::new(self)), 102 | _ => self, 103 | } 104 | } 105 | pub fn is_int_ty(&self) -> bool { 106 | match self { 107 | &Type::Char(_) | &Type::Short(_) | &Type::Int(_) | &Type::Long(_) | &Type::LLong(_) => { 108 | true 109 | } 110 | _ => false, 111 | } 112 | } 113 | pub fn is_float_ty(&self) -> bool { 114 | match self { 115 | &Type::Float | &Type::Double => true, 116 | _ => false, 117 | } 118 | } 119 | pub fn is_arith_ty(&self) -> bool { 120 | self.is_int_ty() || self.is_float_ty() 121 | } 122 | pub fn priority(&self) -> usize { 123 | match self { 124 | &Type::Void => 0, 125 | &Type::Char(_) => 1, 126 | &Type::Short(_) => 2, 127 | &Type::Int(_) => 3, 128 | &Type::Long(_) => 4, 129 | &Type::LLong(_) => 5, 130 | &Type::Float => 6, 131 | &Type::Double => 7, 132 | // &Type::Array(_, _) => 8, 133 | &Type::Enum => 8, 134 | // &Type::Ptr(_) => 10, 135 | // &Type::Struct(_, _) | &Type::Union(_, _, _) => 11, 136 | // &Type::Func(_, _, _) => 12, 137 | _ => panic!(), 138 | } 139 | } 140 | 141 | pub fn calc_size(&self) -> usize { 142 | match self { 143 | &Type::Void => 0, 144 | &Type::Char(_) => 1, 145 | &Type::Short(_) => 2, 146 | &Type::Int(_) => 4, 147 | &Type::Long(_) => 8, 148 | &Type::LLong(_) => 8, 149 | &Type::Float => 4, 150 | &Type::Double => 8, 151 | &Type::Ptr(ref _elemty) => 8, 152 | &Type::Array(ref elemty, ref size) => (*size * elemty.calc_size() as i32) as usize, 153 | &Type::Func(ref _ret_type, ref _param_types, ref _is_vararg) => 1, 154 | // TODO: must fix this sloppy implementation 155 | &Type::Struct(ref _name, ref fields) => { 156 | let mut size_total = 0; 157 | let calc_padding = |off, align| -> usize { 158 | if off % align == 0 { 159 | 0 160 | } else { 161 | align - off % align 162 | } 163 | }; 164 | for field in fields { 165 | size_total += if let ASTKind::VariableDecl(ref ty, _, _, _) = field.kind { 166 | let size = ty.calc_size(); 167 | size + calc_padding(size_total, size) 168 | } else { 169 | 0 170 | }; 171 | } 172 | size_total 173 | } 174 | &Type::Union(ref _name, ref fields, ref max_nth) => { 175 | if let ASTKind::VariableDecl(ref ty, _, _, _) = fields[*max_nth].kind { 176 | ty.calc_size() 177 | } else { 178 | 0 179 | } 180 | } 181 | &Type::Enum => 4, 182 | } 183 | } 184 | } 185 | --------------------------------------------------------------------------------