├── .gitignore ├── Makefile ├── Cargo.toml ├── README.md ├── .github └── workflows │ └── ci.yml ├── Cargo.lock ├── src ├── main.rs ├── type.rs ├── tokenize.rs ├── codegen.rs └── parse.rs └── test.c /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | tmp* 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | test: build 2 | ./target/debug/qcc test.c > tmp.s 3 | cc -static -o tmp tmp.s 4 | ./tmp 5 | 6 | build: 7 | cargo build 8 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "qcc" 3 | version = "0.1.0" 4 | edition = "2018" 5 | 6 | [dependencies] 7 | log = "0.4.0" 8 | env_logger = "0.8.4" 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # qcc 2 | A toy C compiler written in Rust 3 | 4 | ```console 5 | $ docker run --rm -it --user "$(id -u)":"$(id -g)" -v "$PWD":/usr/src/myapp -w /usr/src/myapp rust ./test.sh 6 | ``` 7 | 8 | ```console 9 | $ cat fib.c 10 | int main() 11 | { 12 | return fib(9); 13 | } 14 | 15 | int fib(int x) 16 | { 17 | if (x <= 1) 18 | return 1; 19 | return fib(x - 1) + fib(x - 2); 20 | } 21 | 22 | $ ./target/release/qcc fib.c 23 | $ cc -o tmp tmp.s 24 | $ ./tmp 25 | $ echo $? 26 | 55 27 | ``` 28 | 29 | ## Reference 30 | 31 | https://github.com/rui314/chibicc 32 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push] 4 | 5 | env: 6 | CARGO_INCREMENTAL: 0 7 | 8 | jobs: 9 | format: 10 | name: Format 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Cargo fmt 15 | run: cargo fmt --all -- --check 16 | 17 | check: 18 | name: Check 19 | runs-on: ubuntu-latest 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Cargo check 23 | run: cargo check 24 | 25 | lint: 26 | name: Lint 27 | runs-on: ubuntu-latest 28 | steps: 29 | - uses: actions/checkout@v2 30 | - name: Add clippy 31 | run: rustup component add clippy 32 | - name: Run lint 33 | run: cargo clippy 34 | 35 | test: 36 | name: Test 37 | runs-on: ubuntu-latest 38 | steps: 39 | - uses: actions/checkout@v2 40 | - run: make test 41 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "0.7.18" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "atty" 16 | version = "0.2.14" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 19 | dependencies = [ 20 | "hermit-abi", 21 | "libc", 22 | "winapi", 23 | ] 24 | 25 | [[package]] 26 | name = "cfg-if" 27 | version = "1.0.0" 28 | source = "registry+https://github.com/rust-lang/crates.io-index" 29 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 30 | 31 | [[package]] 32 | name = "env_logger" 33 | version = "0.8.4" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" 36 | dependencies = [ 37 | "atty", 38 | "humantime", 39 | "log", 40 | "regex", 41 | "termcolor", 42 | ] 43 | 44 | [[package]] 45 | name = "hermit-abi" 46 | version = "0.1.19" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 49 | dependencies = [ 50 | "libc", 51 | ] 52 | 53 | [[package]] 54 | name = "humantime" 55 | version = "2.1.0" 56 | source = "registry+https://github.com/rust-lang/crates.io-index" 57 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" 58 | 59 | [[package]] 60 | name = "libc" 61 | version = "0.2.107" 62 | source = "registry+https://github.com/rust-lang/crates.io-index" 63 | checksum = "fbe5e23404da5b4f555ef85ebed98fb4083e55a00c317800bc2a50ede9f3d219" 64 | 65 | [[package]] 66 | name = "log" 67 | version = "0.4.14" 68 | source = "registry+https://github.com/rust-lang/crates.io-index" 69 | checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" 70 | dependencies = [ 71 | "cfg-if", 72 | ] 73 | 74 | [[package]] 75 | name = "memchr" 76 | version = "2.4.1" 77 | source = "registry+https://github.com/rust-lang/crates.io-index" 78 | checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" 79 | 80 | [[package]] 81 | name = "qcc" 82 | version = "0.1.0" 83 | dependencies = [ 84 | "env_logger", 85 | "log", 86 | ] 87 | 88 | [[package]] 89 | name = "regex" 90 | version = "1.5.4" 91 | source = "registry+https://github.com/rust-lang/crates.io-index" 92 | checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" 93 | dependencies = [ 94 | "aho-corasick", 95 | "memchr", 96 | "regex-syntax", 97 | ] 98 | 99 | [[package]] 100 | name = "regex-syntax" 101 | version = "0.6.25" 102 | source = "registry+https://github.com/rust-lang/crates.io-index" 103 | checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" 104 | 105 | [[package]] 106 | name = "termcolor" 107 | version = "1.1.2" 108 | source = "registry+https://github.com/rust-lang/crates.io-index" 109 | checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" 110 | dependencies = [ 111 | "winapi-util", 112 | ] 113 | 114 | [[package]] 115 | name = "winapi" 116 | version = "0.3.9" 117 | source = "registry+https://github.com/rust-lang/crates.io-index" 118 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 119 | dependencies = [ 120 | "winapi-i686-pc-windows-gnu", 121 | "winapi-x86_64-pc-windows-gnu", 122 | ] 123 | 124 | [[package]] 125 | name = "winapi-i686-pc-windows-gnu" 126 | version = "0.4.0" 127 | source = "registry+https://github.com/rust-lang/crates.io-index" 128 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 129 | 130 | [[package]] 131 | name = "winapi-util" 132 | version = "0.1.5" 133 | source = "registry+https://github.com/rust-lang/crates.io-index" 134 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 135 | dependencies = [ 136 | "winapi", 137 | ] 138 | 139 | [[package]] 140 | name = "winapi-x86_64-pc-windows-gnu" 141 | version = "0.4.0" 142 | source = "registry+https://github.com/rust-lang/crates.io-index" 143 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 144 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::collections::LinkedList; 2 | use std::env; 3 | use std::fs::File; 4 | 5 | mod codegen; 6 | mod parse; 7 | mod tokenize; 8 | mod r#type; 9 | 10 | #[derive(Clone, Debug)] 11 | enum TokenKind { 12 | Keyword, 13 | Punct, 14 | Ident, 15 | Num(u16), 16 | Str { str: String, ty: Box }, 17 | Eof, 18 | } 19 | 20 | #[derive(Debug, Clone)] 21 | enum NodeKind { 22 | Add, 23 | Sub, 24 | Mul, 25 | Div, 26 | Eq, 27 | Ne, 28 | Lt, 29 | Le, 30 | Assign, 31 | Return, 32 | If { 33 | cond: Box, 34 | then: Box, 35 | els: Option>, 36 | }, 37 | While { 38 | cond: Box, 39 | then: Box, 40 | }, 41 | For { 42 | init: Box, 43 | inc: Option>, 44 | cond: Option>, 45 | then: Box, 46 | }, 47 | Deref, 48 | Addr, 49 | Block { 50 | body: Box>, 51 | }, 52 | ExprStmt, 53 | StmtExpr { 54 | body: Box>, 55 | }, 56 | FuncCall { 57 | name: String, 58 | args: Vec, 59 | }, 60 | Var(Var), 61 | Num(u16), 62 | Comma, 63 | Member(Member), 64 | } 65 | 66 | #[derive(Debug, Clone)] 67 | enum TypeKind { 68 | Int { 69 | size: u16, 70 | }, 71 | Char { 72 | size: u16, 73 | }, 74 | Func { 75 | params: Box>, 76 | return_ty: Option>, 77 | }, 78 | Ptr { 79 | size: u16, 80 | base: Box, 81 | }, 82 | Array { 83 | size: u16, 84 | len: u16, 85 | base: Box, 86 | }, 87 | Struct { 88 | size: u16, 89 | members: Vec, 90 | }, 91 | } 92 | 93 | #[derive(Debug, Clone)] 94 | struct Member { 95 | ty: Type, 96 | name: Option, 97 | offset: u16, 98 | } 99 | 100 | #[derive(Debug)] 101 | struct VarScope { 102 | name: String, 103 | var: Var, 104 | } 105 | 106 | #[derive(Debug)] 107 | struct Scope { 108 | vars: LinkedList, 109 | } 110 | 111 | impl Default for Scope { 112 | fn default() -> Self { 113 | Scope { 114 | vars: LinkedList::new(), 115 | } 116 | } 117 | } 118 | 119 | #[derive(Debug, Clone)] 120 | struct Type { 121 | kind: TypeKind, 122 | name: Option, 123 | } 124 | 125 | #[derive(Debug)] 126 | struct Tokens { 127 | locals: LinkedList, 128 | globals: LinkedList, 129 | tokens: Vec, 130 | scope: LinkedList, 131 | index: usize, 132 | functions: LinkedList, 133 | string_literal_id: usize, 134 | } 135 | 136 | #[derive(Debug)] 137 | struct Function { 138 | name: String, 139 | body: Node, 140 | params: LinkedList, 141 | locals: LinkedList, 142 | stack_size: Option, 143 | } 144 | 145 | #[derive(Debug, Clone)] 146 | struct Var { 147 | id: usize, 148 | name: String, 149 | offset: u16, 150 | ty: Type, 151 | is_local: bool, 152 | init_data: Option, 153 | } 154 | 155 | #[derive(Debug, Clone)] 156 | struct Token { 157 | kind: TokenKind, 158 | str: String, 159 | loc: usize, 160 | line_number: usize, 161 | } 162 | 163 | #[derive(Debug, Clone)] 164 | struct Node { 165 | kind: NodeKind, 166 | lhs: Option>, 167 | rhs: Option>, 168 | ty: Option, 169 | token: Token, 170 | } 171 | 172 | #[derive(Debug)] 173 | struct Cli { 174 | output: String, 175 | input: Option, 176 | help: bool, 177 | contents: Option, 178 | } 179 | 180 | impl Node { 181 | fn body(&self) -> Option> { 182 | match &self.kind { 183 | NodeKind::Block { body } | NodeKind::StmtExpr { body } => Some(*body.clone()), 184 | _ => None, 185 | } 186 | } 187 | } 188 | 189 | fn main() -> Result<(), Box> { 190 | use std::io::Write; 191 | 192 | env_logger::init(); 193 | 194 | let args = parse_args()?; 195 | if args.help { 196 | usage(0) 197 | } 198 | 199 | let contents = args.contents.unwrap(); 200 | let chars = contents.chars(); 201 | let mut asm = vec![]; 202 | 203 | let tokens = match Token::tokenize(chars.clone().collect::()) { 204 | Ok(tokens) => tokens, 205 | Err(e) => { 206 | eprintln!("{}", e); 207 | return Ok(()); 208 | } 209 | }; 210 | 211 | let mut tokens = Tokens::new(tokens); 212 | log::debug!("all tokens: {:?}", tokens); 213 | tokens.program(); 214 | 215 | log::debug!("parsed tokens: {:#?}", tokens); 216 | asm.push(format!(".file 1 \"{}\"", args.input.unwrap())); 217 | tokens.codegen(&mut asm); 218 | 219 | let mut file = File::create(args.output)?; 220 | file.write_all(format!("{}\n", asm.join("\n")).as_bytes())?; 221 | Ok(()) 222 | } 223 | 224 | fn usage(status: i32) { 225 | println!("qcc [ -o ] "); 226 | std::process::exit(status); 227 | } 228 | 229 | fn parse_args() -> Result { 230 | use std::io::{BufReader, Read}; 231 | 232 | let args: Vec = env::args().collect(); 233 | let mut args_iter = args.iter().skip(1); 234 | let mut cli_args = Cli { 235 | output: String::from("tmp.s"), 236 | input: None, 237 | help: false, 238 | contents: None, 239 | }; 240 | log::debug!("args: {:?}", args); 241 | 242 | while let Some(arg) = args_iter.next() { 243 | if arg == "--help" { 244 | cli_args.help = true; 245 | return Ok(cli_args); 246 | } 247 | 248 | if arg == "-o" { 249 | cli_args.output = args_iter.next().unwrap().clone(); 250 | continue; 251 | } 252 | 253 | cli_args.input = Some(arg.clone()); 254 | break; 255 | } 256 | 257 | log::debug!("cli_args: {:?}", cli_args); 258 | 259 | if let Some(file_path) = &cli_args.input { 260 | let file = File::open(file_path).map_err(|_| "failed to open a file")?; 261 | let mut buf_reader = BufReader::new(file); 262 | let mut contents = String::new(); 263 | buf_reader 264 | .read_to_string(&mut contents) 265 | .map_err(|_| "failed to read from a file")?; 266 | cli_args.contents = Some(contents); 267 | } else { 268 | let mut input = String::new(); 269 | let stdin = std::io::stdin(); 270 | stdin 271 | .lock() 272 | .read_to_string(&mut input) 273 | .map_err(|_| "failed to read from pipe")?; 274 | cli_args.contents = Some(input); 275 | } 276 | 277 | Ok(cli_args) 278 | } 279 | -------------------------------------------------------------------------------- /src/type.rs: -------------------------------------------------------------------------------- 1 | use std::collections::LinkedList; 2 | 3 | use crate::{Member, Node, NodeKind, Token, Type, TypeKind}; 4 | 5 | impl Type { 6 | pub fn type_int() -> Self { 7 | Self { 8 | kind: TypeKind::Int { size: 8 }, 9 | name: None, 10 | } 11 | } 12 | 13 | pub fn type_char() -> Self { 14 | Self { 15 | kind: TypeKind::Char { size: 1 }, 16 | name: None, 17 | } 18 | } 19 | 20 | pub fn type_struct(members: Vec, size: u16, token: Token) -> Self { 21 | Self { 22 | kind: TypeKind::Struct { members, size }, 23 | name: Some(token), 24 | } 25 | } 26 | 27 | pub fn is_integer(&self) -> bool { 28 | matches!(self.kind, TypeKind::Int { .. } | TypeKind::Char { .. }) 29 | } 30 | 31 | pub fn is_pointer(&self) -> bool { 32 | matches!(self.kind, TypeKind::Ptr { .. } | TypeKind::Array { .. }) 33 | } 34 | 35 | pub fn base(&self) -> Option { 36 | match &self.kind { 37 | TypeKind::Ptr { base, .. } | TypeKind::Array { base, .. } => Some(*base.clone()), 38 | _ => None, 39 | } 40 | } 41 | 42 | pub fn size(&self) -> Option { 43 | match &self.kind { 44 | TypeKind::Int { size } 45 | | TypeKind::Ptr { size, .. } 46 | | TypeKind::Array { size, .. } 47 | | TypeKind::Char { size, .. } 48 | | TypeKind::Struct { size, .. } => Some(size.clone()), 49 | _ => None, 50 | } 51 | } 52 | 53 | pub fn pointer_to(self) -> Self { 54 | Self { 55 | name: None, 56 | kind: TypeKind::Ptr { 57 | size: 8, 58 | base: Box::new(self), 59 | }, 60 | } 61 | } 62 | 63 | pub fn func_type(&self, params: Vec) -> Self { 64 | Self { 65 | name: None, 66 | kind: TypeKind::Func { 67 | params: Box::new(params), 68 | return_ty: Some(Box::new(self.clone())), 69 | }, 70 | } 71 | } 72 | 73 | pub fn array_of(self, len: u16) -> Self { 74 | match self.size() { 75 | Some(size) => Self { 76 | name: None, 77 | kind: TypeKind::Array { 78 | base: Box::new(self), 79 | size: size * len, 80 | len, 81 | }, 82 | }, 83 | None => unreachable!("size does not exist"), 84 | } 85 | } 86 | } 87 | 88 | impl Node { 89 | fn add_type_to_body(&mut self) { 90 | match &self.kind { 91 | NodeKind::Block { body } => { 92 | let mut body = body.clone(); 93 | for node in body.iter_mut() { 94 | node.add_type(); 95 | } 96 | self.kind = NodeKind::Block { body }; 97 | } 98 | NodeKind::StmtExpr { body } => { 99 | let mut body = body.clone(); 100 | for node in body.iter_mut() { 101 | node.add_type(); 102 | } 103 | self.kind = NodeKind::StmtExpr { body }; 104 | } 105 | _ => (), 106 | } 107 | } 108 | 109 | fn add_type_to_args(&mut self) { 110 | match &self.kind { 111 | NodeKind::FuncCall { name, args } => { 112 | let mut args = args.clone(); 113 | for node in args.iter_mut() { 114 | node.add_type(); 115 | } 116 | self.kind = NodeKind::FuncCall { 117 | name: name.clone(), 118 | args, 119 | }; 120 | } 121 | _ => (), 122 | } 123 | } 124 | 125 | pub fn add_type(&mut self) { 126 | if self.ty.is_some() { 127 | return; 128 | } 129 | 130 | if let Some(lhs) = self.lhs.as_mut() { 131 | lhs.add_type(); 132 | } 133 | if let Some(rhs) = self.rhs.as_mut() { 134 | rhs.add_type(); 135 | } 136 | 137 | self.add_type_to_body(); 138 | self.add_type_to_args(); 139 | 140 | if let NodeKind::FuncCall { args, .. } = &mut self.kind { 141 | for arg in args.iter_mut() { 142 | arg.add_type(); 143 | } 144 | } 145 | 146 | match &self.kind { 147 | NodeKind::Add | NodeKind::Sub | NodeKind::Mul | NodeKind::Div => { 148 | self.ty = self.lhs.as_ref().map(|lhs| lhs.ty.clone()).flatten() 149 | } 150 | NodeKind::Assign => { 151 | if let Some(lhs) = &self.lhs { 152 | if let Some(ty) = &lhs.ty { 153 | if let TypeKind::Array { .. } = ty.kind { 154 | panic!("not an lvalue"); 155 | } 156 | } 157 | } 158 | log::debug!("add type to assign"); 159 | self.ty = self.lhs.as_ref().map(|lhs| lhs.ty.clone()).flatten() 160 | } 161 | NodeKind::Eq 162 | | NodeKind::Ne 163 | | NodeKind::Lt 164 | | NodeKind::Le 165 | | NodeKind::Num(_) 166 | | NodeKind::FuncCall { .. } => self.ty = Some(Type::type_int()), 167 | NodeKind::Var(var) => self.ty = Some(var.ty.clone()), 168 | NodeKind::Comma => { 169 | if let Some(rhs) = &self.rhs { 170 | self.ty = rhs.ty.clone() 171 | } 172 | } 173 | NodeKind::Member(member) => self.ty = Some(member.ty.clone()), 174 | NodeKind::Addr => { 175 | self.ty = if let Some(TypeKind::Array { base, .. }) = self 176 | .lhs 177 | .as_ref() 178 | .map(|lhs| lhs.ty.as_ref()) 179 | .flatten() 180 | .map(|ty| ty.clone().kind) 181 | { 182 | Some(base.pointer_to()) 183 | } else { 184 | self.lhs 185 | .as_ref() 186 | .map(|lhs| lhs.ty.clone().map(|ty| ty.pointer_to())) 187 | .flatten() 188 | }; 189 | } 190 | NodeKind::Deref => { 191 | if let Some(Some(base)) = self 192 | .lhs 193 | .as_ref() 194 | .map(|lhs| lhs.ty.clone().map(|ty| ty.base())) 195 | .flatten() 196 | { 197 | self.ty = Some(base); 198 | return; 199 | } 200 | unreachable!("invalid pointer dereference") 201 | } 202 | NodeKind::StmtExpr { body } => { 203 | if let Some(stmt) = body.last() { 204 | log::debug!("stmt={:?}", stmt); 205 | self.ty = Some(stmt.ty.clone().unwrap()); 206 | return; 207 | } 208 | 209 | // unreachable!("statement expression returning void is not supported",) 210 | } 211 | _ => {} 212 | } 213 | 214 | log::debug!("type={:?}", self.ty); 215 | } 216 | } 217 | -------------------------------------------------------------------------------- /src/tokenize.rs: -------------------------------------------------------------------------------- 1 | use crate::{Token, TokenKind, Type}; 2 | use std::str::Chars; 3 | 4 | fn error_at(c: char, input: Chars, index: usize, error: String) -> String { 5 | let loc: Vec = input 6 | .clone() 7 | .enumerate() 8 | .filter(|(idx, _)| idx <= &index) 9 | .map(|(_, v)| v) 10 | .collect(); 11 | 12 | String::from(format!( 13 | "{}{}", 14 | input.clone().collect::(), 15 | format!( 16 | "{}^ {}", 17 | (1..loc.len()).map(|_| " ").collect::(), 18 | error 19 | ) 20 | )) 21 | } 22 | 23 | impl Token { 24 | pub fn new(kind: TokenKind, str: impl Into, loc: usize, line_number: usize) -> Self { 25 | let tok = Self { 26 | kind, 27 | str: str.into(), 28 | loc, 29 | line_number, 30 | }; 31 | tok 32 | } 33 | 34 | pub fn tokenize(p: String) -> Result, String> { 35 | let mut tokens = vec![]; 36 | 37 | let mut line_number = 1; 38 | let chars = p.chars(); 39 | let chars_vec = p.chars().collect::>(); 40 | let mut chars_iter = chars.clone().enumerate(); 41 | 42 | while let Some((i, p)) = chars_iter.next() { 43 | log::debug!("tokens={:?}", tokens); 44 | 45 | if p == '\n' { 46 | line_number += 1; 47 | } 48 | 49 | if is_line_comments(chars_vec.clone(), p, i) { 50 | chars_iter.next(); 51 | while let Some((_, p)) = chars_iter.next() { 52 | if p == '\n' { 53 | break; 54 | } 55 | } 56 | continue; 57 | } 58 | 59 | if is_block_comments(chars_vec.clone(), p, i) { 60 | chars_iter.next(); 61 | match chars_vec[i + 1..].iter().collect::().find("*/") { 62 | Some(idx) => { 63 | for _ in 0..idx { 64 | chars_iter.next(); 65 | } 66 | } 67 | None => { 68 | return Err(error_at( 69 | p, 70 | chars, 71 | i, 72 | "unterminated block comment".to_string(), 73 | )); 74 | } 75 | } 76 | chars_iter.next(); 77 | chars_iter.next(); 78 | continue; 79 | } 80 | 81 | if p.is_whitespace() { 82 | continue; 83 | } 84 | 85 | if p == '"' { 86 | let token = read_string_literal(&mut chars_iter, i, line_number); 87 | tokens.push(token?); 88 | continue; 89 | } 90 | 91 | if is_ident(p) { 92 | let mut ident = p.to_string(); 93 | if let Some(next_c) = chars_vec.get(i + 1) { 94 | if !(is_ident(*next_c) || is_number(*next_c)) { 95 | tokens.push(Self::new(TokenKind::Ident, ident, i, line_number)); 96 | continue; 97 | } 98 | } 99 | while let Some((i, c)) = chars_iter.next() { 100 | log::debug!("char={}", c); 101 | ident.push(c); 102 | if let Some(next_c) = chars_vec.get(i + 1) { 103 | if !(is_ident(*next_c) || is_number(*next_c)) { 104 | break; 105 | } 106 | } 107 | } 108 | tokens.push(Self::new(TokenKind::Ident, ident, i, line_number)); 109 | continue; 110 | } 111 | 112 | if is_punctuators(p) { 113 | let mut op = p.to_string(); 114 | if let Some(next_c) = chars_vec.get(i + 1) { 115 | if is_cmp_op(format!("{}{}", op, next_c)) { 116 | chars_iter.next(); 117 | op.push(*next_c) 118 | }; 119 | } 120 | tokens.push(Self::new(TokenKind::Punct, op, i, line_number)); 121 | continue; 122 | } 123 | 124 | if p.is_digit(10) { 125 | let mut number = vec![p]; 126 | if let Some(next_c) = chars_vec.get(i + 1) { 127 | if !next_c.is_digit(10) { 128 | tokens.push(Self::new( 129 | TokenKind::Num( 130 | number 131 | .iter() 132 | .collect::() 133 | .parse::() 134 | .or_else(|_| { 135 | Err(format!("cannot convert char to integer: {:?}", number)) 136 | })?, 137 | ), 138 | p, 139 | i, 140 | line_number, 141 | )); 142 | continue; 143 | } 144 | } 145 | let mut idx = i; 146 | while let Some((i, c)) = chars_iter.next() { 147 | number.push(c); 148 | idx = 1; 149 | if let Some(next_c) = chars_vec.get(i + 1) { 150 | if !next_c.is_digit(10) { 151 | break; 152 | } 153 | } 154 | } 155 | tokens.push(Self::new( 156 | TokenKind::Num(number.iter().collect::().parse::().or_else( 157 | |_| Err(format!("cannot convert char to integer: {:?}", number)), 158 | )?), 159 | p, 160 | idx, 161 | line_number, 162 | )); 163 | continue; 164 | }; 165 | return Err(error_at(p, chars, i, format!("invalid token: {}", p))); 166 | } 167 | 168 | tokens.push(Self::new(TokenKind::Eof, "", 0, 0)); 169 | convert_keywords(&mut tokens); 170 | Ok(tokens) 171 | } 172 | } 173 | 174 | fn is_keyword(token: impl Into) -> bool { 175 | [ 176 | "return", "if", "else", "while", "for", "int", "char", "sizeof", "struct", 177 | ] 178 | .contains(&token.into().as_ref()) 179 | } 180 | 181 | fn is_punctuators(ch: char) -> bool { 182 | ch == '+' 183 | || ch == '-' 184 | || ch == '*' 185 | || ch == '/' 186 | || ch == '(' 187 | || ch == ')' 188 | || ch == ';' 189 | || ch == '>' 190 | || ch == '<' 191 | || ch == '=' 192 | || ch == '!' 193 | || ch == '{' 194 | || ch == '}' 195 | || ch == '&' 196 | || ch == ',' 197 | || ch == '[' 198 | || ch == ']' 199 | || ch == '.' 200 | } 201 | 202 | fn is_cmp_op(op: String) -> bool { 203 | op == "==" || op == "!=" || op == "<=" || op == ">=" 204 | } 205 | 206 | fn is_ident(ch: char) -> bool { 207 | ('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_' 208 | } 209 | 210 | fn is_number(ch: char) -> bool { 211 | ('0'..='9').contains(&ch) 212 | } 213 | 214 | fn is_line_comments(chars: Vec, ch: char, i: usize) -> bool { 215 | if let Some(next_c) = chars.get(i + 1) { 216 | return format!("{}{}", ch, next_c) == "//"; 217 | } 218 | false 219 | } 220 | 221 | fn is_block_comments(chars: Vec, ch: char, i: usize) -> bool { 222 | if let Some(next_c) = chars.get(i + 1) { 223 | return format!("{}{}", ch, next_c) == "/*"; 224 | } 225 | false 226 | } 227 | 228 | fn convert_keywords(tokens: &mut Vec) { 229 | for token in tokens.iter_mut() { 230 | if let TokenKind::Ident = &token.kind { 231 | if is_keyword(&token.str) { 232 | token.kind = TokenKind::Keyword; 233 | } 234 | } 235 | } 236 | } 237 | 238 | fn read_string_literal( 239 | chars: &mut impl Iterator, 240 | column_number: usize, 241 | line_number: usize, 242 | ) -> Result { 243 | let mut str = String::new(); 244 | while let Some((_, c)) = chars.next() { 245 | log::debug!("string literal={}", c); 246 | log::debug!("str={}", str); 247 | if c == '\n' || c == '\0' { 248 | return Err(format!("unclosed string literal: {:?}", c)); 249 | } 250 | if c == '"' { 251 | break; 252 | } 253 | 254 | str.push(c); 255 | if c == '\\' { 256 | str.push(chars.next().unwrap().1); 257 | } 258 | } 259 | 260 | let mut buf = String::new(); 261 | let mut chars_iter = str.chars(); 262 | log::debug!("chars_iter={:?}", chars_iter); 263 | while let Some(c) = chars_iter.next() { 264 | if c == '\\' { 265 | buf.push_str(&read_escaped_char(&mut chars_iter)); 266 | } else { 267 | buf.push(c); 268 | } 269 | } 270 | Ok(Token::new( 271 | TokenKind::Str { 272 | str: buf.clone(), 273 | ty: Box::new(Type::type_char().array_of(buf.len() as u16 + 1)), 274 | }, 275 | str, 276 | column_number, 277 | line_number, 278 | )) 279 | } 280 | 281 | fn from_hex(c: char) -> u8 { 282 | if '0' <= c && c <= '9' { 283 | return c as u8 - '0' as u8; 284 | } 285 | if 'a' <= c && c <= 'f' { 286 | return c as u8 - 'a' as u8 + 10; 287 | } 288 | return c as u8 - 'A' as u8 + 10; 289 | } 290 | 291 | fn read_escaped_char(chars: &mut impl Iterator) -> String { 292 | let mut c = chars.next().unwrap(); 293 | if '0' <= c && c <= '7' { 294 | let mut ch = c as u8 - '0' as u8; 295 | match chars.next() { 296 | Some(ch) => c = ch, 297 | None => return (ch as char).to_string(), 298 | } 299 | for _ in 0..2 { 300 | if '0' <= c && c <= '7' { 301 | ch = (ch << 3) + (c as u8 - '0' as u8); 302 | match chars.next() { 303 | Some(ch) => c = ch, 304 | None => break, 305 | } 306 | } 307 | } 308 | return (ch as char).to_string(); 309 | } 310 | 311 | if c == 'x' { 312 | if let Some(ch) = chars.next() { 313 | c = ch 314 | } 315 | if !c.is_digit(16) { 316 | panic!("invalid hex escape sequence"); 317 | } 318 | 319 | let mut ch = from_hex(c); 320 | while let Some(char) = chars.next() { 321 | if !char.is_digit(16) { 322 | break; 323 | } 324 | ch = (ch << 4) + from_hex(char); 325 | } 326 | return (ch as char).to_string(); 327 | } 328 | 329 | match c { 330 | 'a' => String::from("\u{07}"), 331 | 'b' => String::from("\u{08}"), 332 | 't' => String::from("\u{09}"), 333 | 'n' => String::from("\u{0A}"), 334 | 'v' => String::from("\u{0B}"), 335 | 'f' => String::from("\u{0C}"), 336 | 'r' => String::from("\u{0D}"), 337 | 'e' => String::from("\u{1B}"), 338 | _ => c.to_string(), 339 | } 340 | } 341 | -------------------------------------------------------------------------------- /test.c: -------------------------------------------------------------------------------- 1 | // -*- c -*- 2 | 3 | // This is a line comment. 4 | 5 | /* 6 | * This is a block comment. 7 | */ 8 | 9 | int g1; 10 | int g2[4]; 11 | 12 | int assert(int expected, int actual, char *code) 13 | { 14 | if (expected == actual) 15 | { 16 | printf("%s => %d\n", code, actual); 17 | } 18 | else 19 | { 20 | printf("%s => %d expected but got %d\n", code, expected, actual); 21 | exit(1); 22 | } 23 | } 24 | 25 | int ret3() 26 | { 27 | return 3; 28 | } 29 | 30 | int ret5() 31 | { 32 | return 5; 33 | } 34 | 35 | int add2(int x, int y) 36 | { 37 | return x + y; 38 | } 39 | 40 | int sub2(int x, int y) 41 | { 42 | return x - y; 43 | } 44 | 45 | int add6(int a, int b, int c, int d, int e, int f) 46 | { 47 | return a + b + c + d + e + f; 48 | } 49 | 50 | int addx(int *x, int y) 51 | { 52 | return *x + y; 53 | } 54 | 55 | int sub_char(char a, char b, char c) 56 | { 57 | return a - b - c; 58 | } 59 | 60 | int fib(int x) 61 | { 62 | if (x <= 1) 63 | return 1; 64 | return fib(x - 1) + fib(x - 2); 65 | } 66 | 67 | int main() 68 | { 69 | assert(0, 0, "0"); 70 | assert(42, 42, "42"); 71 | assert(21, 5 + 20 - 4, "5 + 20 - 4"); 72 | assert(41, 12 + 34 - 5, "12 + 34 - 5"); 73 | assert(47, 5 + 6 * 7, "5 + 6 * 7"); 74 | assert(15, 5 * (9 - 6), "5 * (9 - 6)"); 75 | assert(4, (3 + 5) / 2, "(3 + 5) / 2"); 76 | assert(10, -10 + 20, "-10 + 20"); 77 | assert(1, (-3 + 5) / 2, "(-3 + 5) / 2"); 78 | assert(6, (-3 * +4) / -2, "(-3*+4)/-2"); 79 | assert(10, - -10, "- -10"); 80 | assert(10, - -+10, "- -+10"); 81 | 82 | assert(0, 0 == 1, "0 == 1"); 83 | assert(1, 42 == 42, "42 == 42"); 84 | assert(1, 0 != 1, "0 != 1"); 85 | assert(0, 42 != 42, "42 != 42"); 86 | 87 | assert(1, 0 < 1, "0 < 1"); 88 | assert(0, 1 < 1, "1 < 1"); 89 | assert(0, 2 < 1, "2 < 1"); 90 | assert(1, 0 <= 1, "0 <= 1"); 91 | assert(1, 1 <= 1, "1 <= 1"); 92 | assert(0, 2 <= 1, "2 <= 1"); 93 | 94 | assert(1, 1 > 0, "1 > 0"); 95 | assert(0, 1 > 1, "1 > 1"); 96 | assert(0, 1 > 2, "1 > 2"); 97 | assert(1, 1 >= 0, "1 >= 0"); 98 | assert(1, 1 >= 1, "1 >= 1"); 99 | assert(0, 1 >= 2, "1 >= 2"); 100 | 101 | assert(3, ({ int a; a=3; a; }), "int a; a=3; a;"); 102 | assert(8, ({ int a; int z; a=3; z=5; a+z; }), "int a; int z; a=3; z=5; a+z;"); 103 | assert(1, ({ int a=5; int z=4; a-z; }), "int a=5; int z=4; a-z;"); 104 | assert(15, ({ int a=3; int z=5; a*z; }), "int a=3; int z=5; a*z;"); 105 | assert(2, ({ int a=8; int z=4; a/z; }), "int a=8; int z=4; a/z;"); 106 | assert(6, ({ int a; int b; a=b=3; a+b; }), "int a; int b; a=b=3; a+b;"); 107 | 108 | assert(3, ({ int foo=3; foo; }), "int foo=3; foo"); 109 | assert(8, ({ int foo123=3; int bar=5; foo123+bar; }), "int foo123=3; int bar=5; return foo123+bar"); 110 | 111 | assert(3, ({ int x=0; if (0) x=2; else x=3; x; }), "int x=0; if (0) x=2; else x=3; x;"); 112 | assert(3, ({ int x=0; if (1-1) x=2; else x=3; x; }), "int x=0; if (1-1) x=2; else x=3; x;"); 113 | assert(2, ({ int x=0; if (1) x=2; else x=3; x; }), "int x=0; if (1) x=2; else x=3; x;"); 114 | assert(2, ({ int x=0; if (2-1) x=2; else x=3; x; }), "int x=0; if (2-1) x=2; else x=3; x;"); 115 | 116 | assert(3, ({ 1; {2;} 3; }), "1; {2;} 3;"); 117 | assert(10, ({ int i=0; i=0; while(i<10) i=i+1; i; }), "int i=0; i=0; while(i<10) i=i+1; i;"); 118 | assert(55, ({ int i=0; int j=0; while(i<=10) {j=i+j; i=i+1;} j; }), "int i=0; int j=0; while(i<=10) {j=i+j; i=i+1;} j;"); 119 | assert(55, ({ int i=0; int j=0; for (i=0; i<=10; i=i+1) j=i+j; j; }), "int i=0; int j=0; for (i=0; i<=10; i=i+1) j=i+j; j;"); 120 | 121 | assert(3, ret3(), "ret3();"); 122 | assert(5, ret5(), "ret5();"); 123 | assert(8, add2(3, 5), "add2(3, 5);"); 124 | assert(2, sub2(5, 3), "sub2(5, 3);"); 125 | assert(21, add6(1, 2, 3, 4, 5, 6), "add6(1,2,3,4,5,6)"); 126 | assert(66, add6(1, 2, add6(3, 4, 5, 6, 7, 8), 9, 10, 11), "add6(1,2,add6(3,4,5,6,7,8),9,10,11)"); 127 | assert(136, add6(1, 2, add6(3, add6(4, 5, 6, 7, 8, 9), 10, 11, 12, 13), 14, 15, 16), "add6(1,2,add6(3,add6(4,5,6,7,8,9),10,11,12,13),14,15,16)"); 128 | assert(55, fib(9), "fib(9)"); 129 | 130 | assert(3, ({ int x=3; *&x; }), "int x=3; *&x;"); 131 | assert(3, ({ int x=3; int *y=&x; int **z=&y; **z; }), "int x=3; int *y=&x; int **z=&y; **z;"); 132 | assert(5, ({ int x=3; int y=5; *(&x+1); }), "int x=3; int y=5; *(&x+1);"); 133 | assert(3, ({ int x=3; int y=5; *(&y-1); }), "int x=3; int y=5; *(&y-1);"); 134 | assert(5, ({ int x=3; int *y=&x; *y=5; x; }), "int x=3; int *y=&x; *y=5; x;"); 135 | assert(7, ({ int x=3; int y=5; *(&x+1)=7; y; }), "int x=3; int y=5; *(&x+1)=7; y;"); 136 | assert(7, ({ int x=3; int y=5; *(&y-2+1)=7; x; }), "int x=3; int y=5; *(&y-2+1)=7; x;"); 137 | assert(2, ({ int x=1; int y=1; int z=1; return &z-&x; }), "int x=1; int y=1; int z=1; return &z-&x;"); 138 | assert(5, ({ int x=3; return (&x+2)-&x+3; }), "int x=3; return (&x+2)-&x+3;"); 139 | 140 | assert(5, ({ int x=3; int y=5; int *z=&x; *(z+1); }), "int x=3; int y=5; int *z=&x; *(z+1);"); 141 | assert(3, ({ int x=3; int y=5; int *z=&y; *(z-1); }), "int x=3; int y=5; int *z=&y; *(z-1);"); 142 | assert(5, ({ int x=3; int *y=&x; *y=5; x; }), "int x=3; int *y=&x; *y=5; x;"); 143 | assert(7, ({ int x=3; int y=5; *(&x+1)=7; y; }), "int x=3; int y=5; *(&x+1)=7; y;"); 144 | assert(7, ({ int x=3; int y=5; *(&y-1)=7; x; }), "int x=3; int y=5; *(&y-1)=7; x;"); 145 | assert(8, ({ int x=3; int y=5; addx(&x, y); }), "int x=3; int y=5; addx(&x, y);"); 146 | 147 | assert(3, ({ int x[2]; int *y=&x; *y=3; *x; }), "int x[2]; int *y=&x; *y=3; *x;"); 148 | 149 | assert(3, ({ int x[3]; *x=3; *(x+1)=4; *(x+2)=5; *x; }), "int x[3]; *x=3; *(x+1)=4; *(x+2)=5; return *x;"); 150 | assert(4, ({ int x[3]; *x=3; *(x+1)=4; *(x+2)=5; *(x+1); }), "int x[3]; *x=3; *(x+1)=4; *(x+2)=5; return *(x+1);"); 151 | assert(5, ({ int x[3]; *x=3; *(x+1)=4; *(x+2)=5; *(x+2); }), "int x[3]; *x=3; *(x+1)=4; *(x+2)=5; return *(x+2);"); 152 | 153 | assert(0, ({ int x[2][3]; int *y=x; *y=0; **x; }), "int x[2][3]; int *y=x; *y=0; **x;"); 154 | assert(1, ({ int x[2][3]; int *y=x; *(y+1)=1; *(*x+1); }), "int x[2][3]; int *y=x; *(y+1)=1; *(*x+1);"); 155 | assert(2, ({ int x[2][3]; int *y=x; *(y+2)=2; *(*x+2); }), "int x[2][3]; int *y=x; *(y+2)=2; *(*x+2);"); 156 | assert(3, ({ int x[2][3]; int *y=x; *(y+3)=3; **(x+1); }), "int x[2][3]; int *y=x; *(y+3)=3; **(x+1);"); 157 | assert(4, ({ int x[2][3]; int *y=x; *(y+4)=4; *(*(x+1)+1); }), "int x[2][3]; int *y=x; *(y+4)=4; *(*(x+1)+1);"); 158 | assert(5, ({ int x[2][3]; int *y=x; *(y+5)=5; *(*(x+1)+2); }), "int x[2][3]; int *y=x; *(y+5)=5; *(*(x+1)+2);"); 159 | 160 | assert(3, ({ int x[3]; *x=3; x[1]=4; x[2]=5; x[0]; }), "int x[3]; *x=3; x[1]=4; x[2]=5; x[0];"); 161 | assert(4, ({ int x[3]; *x=3; x[1]=4; x[2]=5; x[1]; }), "int x[3]; *x=3; x[1]=4; x[2]=5; x[1];"); 162 | assert(5, ({ int x[3]; *x=3; x[1]=4; x[2]=5; x[2]; }), "int x[3]; *x=3; x[1]=4; x[2]=5; x[2];"); 163 | 164 | assert(0, ({ int x[2][3]; int *y=x; y[0]=0; x[0][0]; }), "int x[2][3]; int *y=x; y[0]=0; x[0][0];"); 165 | assert(1, ({ int x[2][3]; int *y=x; y[1]=1; x[0][1]; }), "int x[2][3]; int *y=x; y[1]=1; x[0][1];"); 166 | assert(2, ({ int x[2][3]; int *y=x; y[2]=2; x[0][2]; }), "int x[2][3]; int *y=x; y[2]=2; x[0][2];"); 167 | assert(3, ({ int x[2][3]; int *y=x; y[3]=3; x[1][0]; }), "int x[2][3]; int *y=x; y[3]=3; x[1][0];"); 168 | assert(4, ({ int x[2][3]; int *y=x; y[4]=4; x[1][1]; }), "int x[2][3]; int *y=x; y[4]=4; x[1][1];"); 169 | assert(5, ({ int x[2][3]; int *y=x; y[5]=5; x[1][2]; }), "int x[2][3]; int *y=x; y[5]=5; x[1][2];"); 170 | 171 | assert(8, ({ int x; sizeof(x); }), "int x; sizeof(x);"); 172 | assert(8, ({ int x; sizeof x; }), "int x; sizeof x;"); 173 | assert(8, ({ int *x; sizeof(x); }), "int *x; sizeof(x);"); 174 | assert(32, ({ int x[4]; sizeof(x); }), "int x[4]; sizeof(x);"); 175 | assert(96, ({ int x[3][4]; sizeof(x); }), "int x[3][4]; sizeof(x);"); 176 | assert(32, ({ int x[3][4]; sizeof(*x); }), "int x[3][4]; sizeof(*x);"); 177 | assert(8, ({ int x[3][4]; sizeof(**x); }), "int x[3][4]; sizeof(**x);"); 178 | assert(9, ({ int x[3][4]; sizeof(**x) + 1; }), "int x[3][4]; sizeof(**x) + 1;"); 179 | assert(9, ({ int x[3][4]; sizeof **x + 1; }), "int x[3][4]; sizeof **x + 1;"); 180 | assert(8, ({ int x[3][4]; sizeof(**x + 1); }), "int x[3][4]; sizeof(**x + 1);"); 181 | assert(8, ({ int x=1; sizeof(x=2); }), "int x=1; sizeof(x=2);"); 182 | assert(1, ({ int x=1; sizeof(x=2); x; }), "int x=1; sizeof(x=2); x;"); 183 | 184 | assert(0, g1, "g1"); 185 | g1 = 3; 186 | assert(3, g1, "g1"); 187 | 188 | g2[0] = 0; 189 | g2[1] = 1; 190 | g2[2] = 2; 191 | g2[3] = 3; 192 | assert(0, g2[0], "g2[0]"); 193 | assert(1, g2[1], "g2[1]"); 194 | assert(2, g2[2], "g2[2]"); 195 | assert(3, g2[3], "g2[3]"); 196 | 197 | assert(8, sizeof(g1), "sizeof(g1)"); 198 | assert(32, sizeof(g2), "sizeof(g2)"); 199 | 200 | assert(1, ({ char x=1; return x; }), "char x=1; return x;"); 201 | assert(1, ({ char x=1; char y=2; return x; }), "char x=1; char y=2; return x;"); 202 | assert(2, ({ char x=1; char y=2; return y; }), "char x=1; char y=2; return y;"); 203 | 204 | assert(1, ({ char x; sizeof(x); }), "char x; sizeof(x);"); 205 | assert(10, ({ char x[10]; sizeof(x); }), "char x[10]; sizeof(x);"); 206 | assert(1, sub_char(7, 3, 3), "sub_char(7, 3, 3)"); 207 | 208 | assert(0, ({ ""[0]; }), "\"\"[0];"); 209 | assert(1, ({ sizeof(""); }), "sizeof(\"\");"); 210 | 211 | assert(97, ({ "abc"[0]; }), "\"abc\"[0];"); 212 | assert(98, ({ "abc"[1]; }), "\"abc\"[1];"); 213 | assert(99, ({ "abc"[2]; }), "\"abc\"[2];"); 214 | assert(0, ({ "abc"[3]; }), "\"abc\"[3];"); 215 | assert(4, ({ sizeof("abc"); }), "sizeof(\"abc\");"); 216 | 217 | assert(7, ({ "\a"[0]; }), "\"\a\"[0]"); 218 | assert(8, ({ "\b"[0]; }), "\"\b\"[0]"); 219 | assert(9, ({ "\t"[0]; }), "\"\t\"[0]"); 220 | assert(10, ({ "\n"[0]; }), "\"\n\"[0]"); 221 | assert(11, ({ "\v"[0]; }), "\"\v\"[0]"); 222 | assert(12, ({ "\f"[0]; }), "\"\f\"[0]"); 223 | assert(13, ({ "\r"[0]; }), "\"\r\"[0]"); 224 | assert(27, ({ "\e"[0]; }), "\"\e\"[0]"); 225 | 226 | assert(106, ({ "\j"[0]; }), "\"\j\"[0];"); 227 | assert(107, ({ "\k"[0]; }), "\"\k\"[0];"); 228 | assert(108, ({ "\l"[0]; }), "\"\l\"[0];"); 229 | 230 | assert(7, ({ "\ax\ny"[0]; }), "\"\ax\ny\"[0];"); 231 | assert(120, ({ "\ax\ny"[1]; }), "\"\ax\ny\"[1];"); 232 | assert(10, ({ "\ax\ny"[2]; }), "\"\ax\ny\"[2];"); 233 | assert(121, ({ "\ax\ny"[3]; }), "\"\ax\ny\"[3];"); 234 | 235 | assert(0, ({ "\0"[0]; }), "\"\0\"[0];"); 236 | assert(16, ({ "\20"[0]; }), "\"\20\"[0];"); 237 | assert(65, ({ "\101"[0]; }), "\"\101\"[0];"); 238 | assert(104, ({ "\1500"[0]; }), "\"\1500\"[0];"); 239 | 240 | assert(0, ({ "\x00"[0]; }), "\"\x00\"[0];"); 241 | assert(119, ({ "\x77"[0]; }), "\"\x77\"[0];"); 242 | assert(165, ({ "\xA5"[0]; }), "\"\xA5\"[0];"); 243 | assert(255, ({ "\x00ff"[0]; }), "\"\x00ff\"[0];"); 244 | 245 | assert(2, ({ int x=2; { int x=3; } x; }), "int x=2; { int x=3; } x;"); 246 | assert(2, ({ int x=2; { int x=3; } int y=4; x; }), "int x=2; { int x=3; } int y=4; x;"); 247 | assert(3, ({ int x=2; { x=3; } x; }), "int x=2; { x=3; } x;"); 248 | 249 | assert(3, (1, 2, 3), "(1, 2, 3)"); 250 | assert(5, ({ int i=2, j=3; (i=5,j)=6; i; }), "int i=2, j=3; (i=5,j)=6; i;"); 251 | assert(6, ({ int i=2, j=3; (i=5,j)=6; j; }), "int i=2, j=3; (i=5,j)=6; j;"); 252 | 253 | assert(1, ({ struct {int a; int b;} x; x.a=1; x.b=2; x.a; }), "struct {int a; int b;} x; x.a=1; x.b=2; x.a;"); 254 | assert(2, ({ int x[5]; int *y=x+2; y-x; }), "int x[5]; int *y=x+2; y-x;"); 255 | 256 | assert(1, ({ struct {int a; int b;} x; x.a=1; x.b=2; x.a; }), "struct {int a; int b;} x; x.a=1; x.b=2; x.a;"); 257 | assert(2, ({ struct {int a; int b;} x; x.a=1; x.b=2; x.b; }), "struct {int a; int b;} x; x.a=1; x.b=2; x.b;"); 258 | assert(1, ({ struct {char a; int b; char c;} x; x.a=1; x.b=2; x.c=3; x.a; }), "struct {char a; int b; char c;} x; x.a=1; x.b=2; x.c=3; x.a;"); 259 | assert(2, ({ struct {char a; int b; char c;} x; x.b=1; x.b=2; x.c=3; x.b; }), "struct {char a; int b; char c;} a; x.b=x; x.a=2; x.b=3; x.b;"); 260 | assert(3, ({ struct {char a; int b; char c;} x; x.a=1; x.b=2; x.c=3; x.c; }), "struct {char a; int b; char c;} x; x.a=1; x.b=2; x.c=3; x.c;"); 261 | 262 | assert(0, ({ struct {int a; int b;} x[3]; int *p=x; p[0]=0; x[0].a; }), "struct {int a; int b;} x[3]; int *p=x; p[0]=0; x[0].a;"); 263 | assert(1, ({ struct {int a; int b;} x[3]; int *p=x; p[1]=1; x[0].b; }), "struct {int a; int b;} x[3]; int *p=x; p[1]=1; x[0].b;"); 264 | assert(2, ({ struct {int a; int b;} x[3]; int *p=x; p[2]=2; x[1].a; }), "struct {int a; int b;} x[3]; int *p=x; p[2]=2; x[1].a;"); 265 | assert(3, ({ struct {int a; int b;} x[3]; int *p=x; p[3]=3; x[1].b; }), "struct {int a; int b;} x[3]; int *p=x; p[3]=3; x[1].b;"); 266 | 267 | assert(6, ({ struct {int a[3]; int b[5];} x; int *p=&x; x.a[0]=6; p[0]; }), "struct {int a[3]; int b[5];} x; int *p=&x; x.a[0]=6; p[0];"); 268 | assert(7, ({ struct {int a[3]; int b[5];} x; int *p=&x; x.b[0]=7; p[3]; }), "struct {int a[3]; int b[5];} x; int *p=&x; x.b[0]=7; p[3];"); 269 | 270 | assert(6, ({ struct { struct { int b; } a; } x; x.a.b=6; x.a.b; }), "struct { struct { int b; } a; } x; x.a.b=6; x.a.b;"); 271 | 272 | assert(8, ({ struct {int a;} x; sizeof(x); }), "struct {int a;} x; sizeof(x);"); 273 | assert(16, ({ struct {int a; int b;} x; sizeof(x); }), "struct {int a; int b;} x; sizeof(x);"); 274 | assert(24, ({ struct {int a[3];} x; sizeof(x); }), "struct {int a[3];} x; sizeof(x);"); 275 | assert(32, ({ struct {int a;} x[4]; sizeof(x); }), "struct {int a;} x[4]; sizeof(x);"); 276 | assert(48, ({ struct {int a[3];} x[2]; sizeof(x); }), "struct {int a[3];} x[2]; sizeof(x)};"); 277 | assert(2, ({ struct {char a; char b;} x; sizeof(x); }), "struct {char a; char b;} x; sizeof(x);"); 278 | assert(9, ({ struct {char a; int b;} x; sizeof(x); }), "struct {char a; int b;} x; sizeof(x);"); 279 | 280 | printf("OK\n"); 281 | return 0; 282 | } 283 | -------------------------------------------------------------------------------- /src/codegen.rs: -------------------------------------------------------------------------------- 1 | use crate::{Function, Node, NodeKind, Tokens, TypeKind, Var}; 2 | 3 | const ARG_REG8: &[&str] = &["dil", "sil", "dl", "cl", "r8b", "r9b"]; 4 | const ARG_REG64: &[&str] = &["rdi", "rsi", "rdx", "rcx", "r8", "r9"]; 5 | 6 | impl Tokens { 7 | pub(crate) fn codegen(&mut self, asm: &mut Vec) { 8 | for func in &mut self.functions { 9 | func.stack_size = Some(func.assign_lvar_offset()); 10 | } 11 | self.emit_data(asm); 12 | let mut count = 0; 13 | for func in &self.functions { 14 | asm.push(String::from(".intel_syntax noprefix")); 15 | asm.push(format!(".globl {}", func.name)); 16 | asm.push(String::from(".text")); 17 | asm.push(format!("{}:", func.name)); 18 | 19 | asm.push(String::from(" push rbp")); 20 | asm.push(String::from(" mov rbp, rsp")); 21 | log::debug!("stack size={:?}", func.stack_size); 22 | asm.push(format!(" sub rsp, {}", func.stack_size.unwrap())); 23 | 24 | func.gen_param(asm); 25 | 26 | func.gen_stmt(&func.body, asm, &mut count); 27 | asm.push(String::from(" pop rax")); 28 | 29 | asm.push(String::from(" mov rsp, rbp")); 30 | asm.push(String::from(" pop rbp")); 31 | asm.push(String::from(" ret")); 32 | } 33 | } 34 | 35 | fn emit_data(&self, asm: &mut Vec) { 36 | for global in &self.globals { 37 | asm.push(String::from(".data")); 38 | asm.push(format!(".globl {}", global.name)); 39 | asm.push(format!("{}:", global.name)); 40 | 41 | if let Some(data) = global.init_data.as_ref() { 42 | for ch in data.chars() { 43 | asm.push(format!(" .byte {}", ch as u8)); 44 | } 45 | asm.push(String::from(" .byte 0")); 46 | } else { 47 | asm.push(format!(" .zero {}", global.ty.size().unwrap())); 48 | } 49 | } 50 | } 51 | } 52 | 53 | impl Function { 54 | fn gen_param(&self, asm: &mut Vec) { 55 | for (i, var) in self.params.iter().enumerate() { 56 | let var = self.find_lvar(&var).unwrap(); 57 | asm.push(String::from(" mov rax, rbp")); 58 | asm.push(format!(" sub rax, {}", var.offset)); 59 | asm.push(String::from(" push rax")); 60 | asm.push(format!(" push {}", ARG_REG64[i])); 61 | asm.push(String::from(" pop rdi")); 62 | asm.push(String::from(" pop rax")); 63 | if matches!(var.ty.size(), Some(size) if size == 1) { 64 | asm.push(String::from(" mov [rax], dil")); 65 | } else { 66 | asm.push(String::from(" mov [rax], rdi")); 67 | } 68 | asm.push(String::from(" push rdi")); 69 | } 70 | } 71 | 72 | fn assign_lvar_offset(&mut self) -> u16 { 73 | let mut offset = 0; 74 | log::debug!("locals={:?}", self.locals); 75 | for lvar in &mut self.locals.iter_mut() { 76 | offset += lvar.ty.size().unwrap(); 77 | lvar.offset = offset; 78 | } 79 | (offset + 16 - 1) / 16 * 16 80 | } 81 | 82 | fn find_lvar(&self, var: &Var) -> Option<&Var> { 83 | self.locals 84 | .iter() 85 | .find(|lvar| lvar.name == var.name && lvar.id == var.id) 86 | } 87 | 88 | fn load(&self, node: &Node, asm: &mut Vec) { 89 | if let Some(ty) = &node.ty { 90 | if let TypeKind::Array { .. } = ty.kind { 91 | return; 92 | } 93 | if matches!(ty.size(), Some(size) if size == 1) { 94 | asm.push(String::from(" movzx rax, BYTE PTR [rax]")); 95 | return; 96 | } 97 | } 98 | 99 | asm.push(String::from(" mov rax, [rax]")) 100 | } 101 | 102 | fn store(&self, node: &Node, asm: &mut Vec) { 103 | if let Some(ty) = &node.ty { 104 | if matches!(ty.size(), Some(size) if size == 1) { 105 | asm.push(String::from(" mov [rax], dil")); 106 | return; 107 | } 108 | } 109 | 110 | asm.push(String::from(" mov [rax], rdi")); 111 | } 112 | 113 | fn gen_lval(&self, node: &Node, asm: &mut Vec, count: &mut usize) { 114 | match &node.kind { 115 | NodeKind::Var(var) => { 116 | if var.is_local { 117 | asm.push(String::from(" mov rax, rbp")); 118 | asm.push(format!( 119 | " sub rax, {}", 120 | self.find_lvar(&var).unwrap().offset 121 | )); 122 | } else { 123 | asm.push(format!(" lea rax, {}[rip]", var.name)); 124 | } 125 | asm.push(String::from(" push rax")); 126 | } 127 | NodeKind::Deref => { 128 | if let Some(node) = node.lhs.as_ref() { 129 | self.gen_expr(&node, asm, count); 130 | } 131 | } 132 | NodeKind::Comma => { 133 | if let Some(node) = node.lhs.as_ref() { 134 | self.gen_expr(&node, asm, count); 135 | } 136 | if let Some(node) = node.rhs.as_ref() { 137 | self.gen_lval(&node, asm, count); 138 | } 139 | } 140 | NodeKind::Member(member) => { 141 | if let Some(node) = node.lhs.as_ref() { 142 | self.gen_lval(&node, asm, count); 143 | } 144 | asm.push(String::from(" pop rax")); 145 | asm.push(format!(" add rax, {}", member.offset)); 146 | asm.push(String::from(" push rax")); 147 | } 148 | _ => unreachable!("not lval"), 149 | } 150 | } 151 | 152 | pub fn gen_stmt(&self, node: &Node, asm: &mut Vec, count: &mut usize) { 153 | match &node.kind { 154 | NodeKind::Return => { 155 | if let Some(node) = node.lhs.as_ref() { 156 | self.gen_expr(node, asm, count); 157 | } 158 | asm.push(String::from(" pop rax")); 159 | asm.push(String::from(" mov rsp, rbp")); 160 | asm.push(String::from(" pop rbp")); 161 | asm.push(String::from(" ret")); 162 | return; 163 | } 164 | NodeKind::Block { body } => { 165 | for node in body.iter() { 166 | self.gen_stmt(node, asm, count); 167 | } 168 | return; 169 | } 170 | NodeKind::ExprStmt => { 171 | if let Some(node) = node.lhs.as_ref() { 172 | self.gen_expr(&node, asm, count); 173 | asm.push(String::from(" add rsp, 8")); 174 | } 175 | return; 176 | } 177 | NodeKind::If { cond, then, els } => { 178 | *count += 1; 179 | let c = count.clone(); 180 | self.gen_expr(&cond, asm, count); 181 | asm.push(String::from(" pop rax")); 182 | asm.push(String::from(" cmp rax, 0")); 183 | asm.push(format!(" je .L.else{}", c)); 184 | self.gen_stmt(&then, asm, count); 185 | asm.push(format!(" jmp .L.end{}", c)); 186 | asm.push(format!(".L.else{}:", c)); 187 | if let Some(els) = els { 188 | self.gen_stmt(&els, asm, count); 189 | } 190 | asm.push(format!(".L.end{}:", c)); 191 | return; 192 | } 193 | NodeKind::While { cond, then } => { 194 | *count += 1; 195 | let c = count.clone(); 196 | asm.push(format!(".L.begin{}:", c)); 197 | self.gen_expr(&cond, asm, count); 198 | asm.push(String::from(" pop rax")); 199 | asm.push(String::from(" cmp rax, 0")); 200 | asm.push(format!(" je .L.end{}", c)); 201 | self.gen_stmt(&then, asm, count); 202 | asm.push(format!(" jmp .L.begin{}", c)); 203 | asm.push(format!(".L.end{}:", c)); 204 | return; 205 | } 206 | NodeKind::For { 207 | init, 208 | cond, 209 | inc, 210 | then, 211 | } => { 212 | *count += 1; 213 | let c = count.clone(); 214 | self.gen_stmt(&init, asm, count); 215 | asm.push(format!(".L.begin{}:", c)); 216 | if let Some(cond) = cond { 217 | self.gen_expr(&cond, asm, count); 218 | asm.push(String::from(" pop rax")); 219 | asm.push(String::from(" cmp rax, 0")); 220 | asm.push(format!(" je .L.end{}", c)); 221 | } 222 | self.gen_stmt(&then, asm, count); 223 | if let Some(inc) = inc { 224 | self.gen_expr(&inc, asm, count); 225 | } 226 | asm.push(format!(" jmp .L.begin{}", c)); 227 | asm.push(format!(".L.end{}:", c)); 228 | return; 229 | } 230 | _ => (), 231 | } 232 | } 233 | 234 | pub fn gen_expr(&self, node: &Node, asm: &mut Vec, count: &mut usize) { 235 | asm.push(format!(" .loc 1 {}", node.token.line_number)); 236 | match &node.kind { 237 | NodeKind::Num(val) => { 238 | asm.push(format!(" push {}", val)); 239 | return; 240 | } 241 | NodeKind::Var { .. } | NodeKind::Member(_) => { 242 | self.gen_lval(&node, asm, count); 243 | asm.push(String::from(" pop rax")); 244 | self.load(&node, asm); 245 | asm.push(String::from(" push rax")); 246 | return; 247 | } 248 | NodeKind::Assign => { 249 | if let Some(node) = node.lhs.as_ref() { 250 | self.gen_lval(&node, asm, count); 251 | } 252 | if let Some(node) = node.rhs.as_ref() { 253 | self.gen_expr(&node, asm, count); 254 | } 255 | 256 | asm.push(String::from(" pop rdi")); 257 | asm.push(String::from(" pop rax")); 258 | self.store(&node, asm); 259 | asm.push(String::from(" push rdi")); 260 | return; 261 | } 262 | NodeKind::Addr => { 263 | if let Some(node) = node.lhs.as_ref() { 264 | self.gen_lval(&node, asm, count); 265 | } 266 | return; 267 | } 268 | NodeKind::Deref => { 269 | if let Some(node) = node.lhs.as_ref() { 270 | self.gen_expr(&node, asm, count); 271 | } 272 | asm.push(String::from(" pop rax")); 273 | self.load(&node, asm); 274 | asm.push(String::from(" push rax")); 275 | return; 276 | } 277 | NodeKind::StmtExpr { body } => { 278 | let mut body = body.clone(); 279 | let last = body.pop().unwrap(); 280 | for node in body.iter() { 281 | self.gen_stmt(&node, asm, count); 282 | } 283 | self.gen_expr(&last, asm, count); 284 | return; 285 | } 286 | NodeKind::FuncCall { name, args } => { 287 | let mut nargs = 0; 288 | for arg in args { 289 | self.gen_expr(&arg, asm, count); 290 | nargs += 1; 291 | } 292 | 293 | for i in (0..nargs).rev() { 294 | asm.push(format!(" pop {}", ARG_REG64[i])); 295 | } 296 | 297 | asm.push(String::from(" mov rax, 0")); 298 | asm.push(format!(" call {}", name)); 299 | asm.push(String::from(" push rax")); 300 | return; 301 | } 302 | NodeKind::Comma => { 303 | if let Some(node) = node.lhs.as_ref() { 304 | self.gen_expr(&node, asm, count); 305 | } 306 | if let Some(node) = node.rhs.as_ref() { 307 | self.gen_expr(&node, asm, count); 308 | } 309 | return; 310 | } 311 | _ => (), 312 | } 313 | 314 | if let Some(node) = node.lhs.as_ref() { 315 | self.gen_expr(&node, asm, count); 316 | } 317 | if let Some(node) = node.rhs.as_ref() { 318 | self.gen_expr(&node, asm, count); 319 | } 320 | asm.push(String::from(" pop rdi")); 321 | asm.push(String::from(" pop rax")); 322 | 323 | match node.kind { 324 | NodeKind::Add => { 325 | asm.push(String::from(" add rax, rdi")); 326 | } 327 | NodeKind::Sub => { 328 | asm.push(String::from(" sub rax, rdi")); 329 | } 330 | NodeKind::Mul => { 331 | asm.push(String::from(" imul rax, rdi")); 332 | } 333 | NodeKind::Div => { 334 | asm.push(String::from(" cqo")); 335 | asm.push(String::from(" idiv rdi")); 336 | } 337 | NodeKind::Eq => { 338 | asm.push(String::from(" cmp rax, rdi")); 339 | asm.push(String::from(" sete al")); 340 | asm.push(String::from(" movzb rax, al")); 341 | } 342 | NodeKind::Ne => { 343 | asm.push(String::from(" cmp rax, rdi")); 344 | asm.push(String::from(" setne al")); 345 | asm.push(String::from(" movzb rax, al")); 346 | } 347 | NodeKind::Lt => { 348 | asm.push(String::from(" cmp rax, rdi")); 349 | asm.push(String::from(" setl al")); 350 | asm.push(String::from(" movzb rax, al")); 351 | } 352 | NodeKind::Le => { 353 | asm.push(String::from(" cmp rax, rdi")); 354 | asm.push(String::from(" setle al")); 355 | asm.push(String::from(" movzb rax, al")); 356 | } 357 | _ => {} 358 | } 359 | 360 | asm.push(String::from(" push rax")); 361 | } 362 | } 363 | -------------------------------------------------------------------------------- /src/parse.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | Function, Member, Node, NodeKind, Scope, Token, TokenKind, Tokens, Type, TypeKind, Var, 3 | VarScope, 4 | }; 5 | use std::collections::LinkedList; 6 | 7 | impl Token { 8 | fn get_ident(&self) -> Option { 9 | match self.kind { 10 | TokenKind::Ident => Some(self.str.clone()), 11 | _ => None, 12 | } 13 | } 14 | } 15 | 16 | impl Node { 17 | fn new(kind: NodeKind, token: &Token) -> Self { 18 | Node { 19 | kind, 20 | lhs: None, 21 | rhs: None, 22 | ty: None, 23 | token: token.clone(), 24 | } 25 | } 26 | 27 | fn new_binary(kind: NodeKind, lhs: Node, rhs: Node, token: &Token) -> Self { 28 | Node { 29 | kind, 30 | lhs: Some(Box::new(lhs)), 31 | rhs: Some(Box::new(rhs)), 32 | ty: None, 33 | token: token.clone(), 34 | } 35 | } 36 | 37 | fn new_unary(kind: NodeKind, lhs: Node, token: &Token) -> Self { 38 | Node { 39 | kind, 40 | lhs: Some(Box::new(lhs)), 41 | rhs: None, 42 | ty: None, 43 | token: token.clone(), 44 | } 45 | } 46 | 47 | fn new_node_num(val: u16, token: &Token) -> Self { 48 | Node { 49 | kind: NodeKind::Num(val), 50 | lhs: None, 51 | rhs: None, 52 | ty: None, 53 | token: token.clone(), 54 | } 55 | } 56 | 57 | fn new_node_var(var: Var, ty: Type, token: &Token) -> Self { 58 | Node { 59 | kind: NodeKind::Var(var), 60 | lhs: None, 61 | rhs: None, 62 | ty: Some(ty), 63 | token: token.clone(), 64 | } 65 | } 66 | 67 | fn new_block(body: Vec, token: &Token) -> Self { 68 | Node { 69 | kind: NodeKind::Block { 70 | body: Box::new(body), 71 | }, 72 | lhs: None, 73 | rhs: None, 74 | ty: None, 75 | token: token.clone(), 76 | } 77 | } 78 | 79 | fn new_add(lhs: Node, rhs: Node, token: &Token) -> Self { 80 | let mut lhs = lhs; 81 | let mut rhs = rhs; 82 | 83 | lhs.add_type(); 84 | rhs.add_type(); 85 | 86 | if let Some(lhs_ty) = &lhs.ty { 87 | let lhs_ty = lhs_ty.clone(); 88 | if let Some(rhs_ty) = &rhs.ty { 89 | if lhs_ty.is_integer() && rhs_ty.is_integer() { 90 | return Node::new_binary(NodeKind::Add, lhs, rhs, token); 91 | } 92 | 93 | if lhs_ty.is_pointer() && rhs_ty.is_pointer() { 94 | panic!("invalid operands") 95 | } 96 | 97 | if !lhs_ty.is_pointer() && rhs_ty.is_pointer() { 98 | let tmp = lhs; 99 | lhs = rhs; 100 | rhs = tmp; 101 | } 102 | 103 | return Node::new_binary( 104 | NodeKind::Add, 105 | lhs, 106 | Node::new_binary( 107 | NodeKind::Mul, 108 | rhs, 109 | Self::new_node_num(lhs_ty.base().unwrap().size().unwrap(), token), 110 | token, 111 | ), 112 | token, 113 | ); 114 | } 115 | } 116 | 117 | unreachable!("invalid operands") 118 | } 119 | 120 | fn new_sub(lhs: Node, rhs: Node, token: &Token) -> Self { 121 | let mut lhs = lhs; 122 | let mut rhs = rhs; 123 | 124 | lhs.add_type(); 125 | rhs.add_type(); 126 | 127 | if let Some(lhs_ty) = &lhs.ty { 128 | if let Some(rhs_ty) = &rhs.ty { 129 | if lhs_ty.is_integer() && rhs_ty.is_integer() { 130 | return Node::new_binary(NodeKind::Sub, lhs, rhs, token); 131 | } 132 | 133 | if rhs_ty.is_integer() { 134 | match &lhs_ty.kind { 135 | TypeKind::Ptr { base, .. } | TypeKind::Array { base, .. } => { 136 | let mut rhs = Node::new_binary( 137 | NodeKind::Mul, 138 | rhs, 139 | Self::new_node_num(base.size().unwrap(), token), 140 | token, 141 | ); 142 | rhs.add_type(); 143 | let ty = lhs.ty.clone(); 144 | let mut node = Node::new_binary(NodeKind::Sub, lhs, rhs, token); 145 | node.ty = ty; 146 | return node; 147 | } 148 | _ => (), 149 | } 150 | } 151 | 152 | if rhs_ty.is_pointer() { 153 | match &lhs_ty.clone().kind { 154 | TypeKind::Ptr { base, .. } | TypeKind::Array { base, .. } => { 155 | let mut node = Node::new_binary(NodeKind::Sub, lhs, rhs, token); 156 | node.ty = Some(Type::type_int()); 157 | return Node::new_binary( 158 | NodeKind::Div, 159 | node, 160 | Self::new_node_num(base.size().unwrap(), token), 161 | token, 162 | ); 163 | } 164 | _ => (), 165 | } 166 | } 167 | } 168 | } 169 | 170 | panic!("invalid operands: lhs={:?}, rhs={:?}", lhs, rhs); 171 | } 172 | } 173 | 174 | impl Tokens { 175 | pub fn new(tokens: Vec) -> Self { 176 | let mut scope = LinkedList::new(); 177 | scope.push_front(Scope::default()); 178 | Tokens { 179 | locals: LinkedList::new(), 180 | globals: LinkedList::new(), 181 | scope, 182 | tokens, 183 | index: 0, 184 | functions: LinkedList::new(), 185 | string_literal_id: 0, 186 | } 187 | } 188 | 189 | fn next(&mut self) -> Option<&Token> { 190 | self.index += 1; 191 | self.tokens.get(self.index - 1) 192 | } 193 | 194 | fn next_token(&self) -> Option<&Token> { 195 | self.tokens.get(self.index + 1) 196 | } 197 | 198 | fn find_var(&self) -> Option<&Var> { 199 | for scope in self.scope.iter().rev() { 200 | for var in scope.vars.iter() { 201 | if var.name.len() == self.token().str.len() && var.name == self.token().str { 202 | return Some(&var.var); 203 | } 204 | } 205 | } 206 | None 207 | } 208 | 209 | fn enter_scope(&mut self) { 210 | self.scope.push_front(Scope::default()); 211 | } 212 | 213 | fn leave_scope(&mut self) { 214 | self.scope.pop_front(); 215 | } 216 | 217 | fn token(&self) -> &Token { 218 | &self.tokens[self.index] 219 | } 220 | 221 | pub fn expr(&mut self) -> Node { 222 | let mut node = self.assign(); 223 | 224 | let token = self.token().clone(); 225 | while self.consume(',') { 226 | node = Node::new_unary(NodeKind::ExprStmt, node, &token); 227 | node = Node::new_binary(NodeKind::Comma, node, self.assign(), &token); 228 | } 229 | node 230 | } 231 | 232 | fn assign(&mut self) -> Node { 233 | let mut node = self.equality(); 234 | if self.consume("=") { 235 | node = Node::new_binary(NodeKind::Assign, node, self.assign(), self.token()); 236 | } 237 | node 238 | } 239 | 240 | fn global_variable(&mut self) { 241 | let ty = self.declspec(); 242 | let mut first = true; 243 | 244 | while !self.consume(';') { 245 | if !first { 246 | self.expect(','); 247 | } 248 | first = false; 249 | 250 | let ty = self.declarator(ty.clone()); 251 | let gvar = self.add_gvar( 252 | ty.clone().name.unwrap().get_ident().unwrap(), 253 | ty.clone(), 254 | None, 255 | ); 256 | Node::new_node_var(gvar, ty, self.token()); 257 | } 258 | } 259 | 260 | pub fn is_function(&mut self) -> bool { 261 | if self.equal(';') { 262 | return false; 263 | } 264 | let mut tokens = Self { 265 | tokens: self.tokens.clone(), 266 | locals: LinkedList::new(), 267 | globals: LinkedList::new(), 268 | scope: LinkedList::new(), 269 | index: self.index, 270 | functions: LinkedList::new(), 271 | string_literal_id: 0, 272 | }; 273 | let ty = tokens.declspec(); 274 | let ty = tokens.declarator(ty); 275 | matches!(ty.kind, TypeKind::Func { .. }) 276 | } 277 | 278 | pub fn program(&mut self) { 279 | loop { 280 | log::debug!("program token={:?}", self.token()); 281 | if let TokenKind::Eof = self.token().kind { 282 | break; 283 | } 284 | 285 | if self.is_function() { 286 | let function = self.function(); 287 | self.functions.push_back(function); 288 | continue; 289 | } 290 | 291 | self.global_variable(); 292 | } 293 | log::debug!("functions={:?}", self.functions); 294 | } 295 | 296 | fn push_scope(&mut self, name: String, var: Var) -> Option<&VarScope> { 297 | let sc = VarScope { name, var }; 298 | if let Some(scope) = self.scope.front_mut() { 299 | scope.vars.push_front(sc); 300 | return scope.vars.front(); 301 | } 302 | None 303 | } 304 | 305 | fn add_lvar(&mut self, name: String, ty: Type) -> Var { 306 | let lvar = Var { 307 | id: self.locals.front().map_or(0, |lvar| lvar.id + 1), 308 | name: name.clone(), 309 | offset: 0, 310 | ty, 311 | is_local: true, 312 | init_data: None, 313 | }; 314 | self.locals.push_front(lvar.clone()); 315 | self.push_scope(name, lvar.clone()); 316 | lvar 317 | } 318 | 319 | fn add_gvar(&mut self, name: String, ty: Type, init_data: Option) -> Var { 320 | let gvar = Var { 321 | id: 0, 322 | name: name.clone(), 323 | offset: 0, 324 | ty, 325 | is_local: false, 326 | init_data, 327 | }; 328 | self.globals.push_front(gvar.clone()); 329 | self.push_scope(name, gvar.clone()); 330 | gvar 331 | } 332 | 333 | fn new_string_literal(&mut self, ty: Type, init_data: String) -> Var { 334 | let name = format!(".L..{}", self.string_literal_id); 335 | self.string_literal_id += 1; 336 | self.add_gvar(name, ty, Some(init_data)) 337 | } 338 | 339 | fn get_number(&self) -> u16 { 340 | if let TokenKind::Num(val) = self.token().kind { 341 | return val; 342 | } 343 | unreachable!("expected a number: {:?}", self.token()); 344 | } 345 | 346 | fn declspec(&mut self) -> Type { 347 | if self.consume("char") { 348 | return Type::type_char(); 349 | } 350 | 351 | if self.consume("int") { 352 | return Type::type_int(); 353 | } 354 | 355 | if self.consume("struct") { 356 | return self.struct_decl(); 357 | } 358 | 359 | self.error_token("typename expected"); 360 | unreachable!() 361 | } 362 | 363 | fn struct_members(&mut self) -> Type { 364 | let mut members = Vec::new(); 365 | let token = self.token().clone(); 366 | 367 | let mut offset = 0; 368 | while !self.consume('}') { 369 | let basety = self.declspec(); 370 | 371 | let mut i = 0; 372 | log::debug!("struct members={:?} i={}", basety, i); 373 | while !self.consume(';') { 374 | log::debug!("i={}", i); 375 | if i != 0 { 376 | self.expect(','); 377 | } 378 | i += 1; 379 | 380 | let ty = self.declarator(basety.clone()); 381 | members.push(Member { 382 | ty: ty.clone(), 383 | name: ty.name.clone(), 384 | offset, 385 | }); 386 | offset += ty.size().unwrap(); 387 | } 388 | } 389 | 390 | Type::type_struct(members, offset, token) 391 | } 392 | 393 | fn struct_decl(&mut self) -> Type { 394 | self.expect('{'); 395 | self.struct_members() 396 | } 397 | 398 | fn struct_ref(&self, lhs: &mut Node) -> Node { 399 | lhs.add_type(); 400 | match &lhs.ty { 401 | Some(ty) if matches!(ty.kind, TypeKind::Struct { .. }) => (), 402 | _ => self.error_token("not a struct`"), 403 | } 404 | 405 | Node::new_unary( 406 | NodeKind::Member(self.get_struct_member(lhs.ty.clone().unwrap())), 407 | lhs.clone(), 408 | self.token(), 409 | ) 410 | } 411 | 412 | fn get_struct_member(&self, ty: Type) -> Member { 413 | if let TypeKind::Struct { members, .. } = ty.kind { 414 | log::debug!( 415 | "struct members={:?} token name={} token loc={}", 416 | members, 417 | self.token().str, 418 | self.token().loc 419 | ); 420 | let name = self.token().clone(); 421 | let member = members 422 | .iter() 423 | .find(|member| { 424 | member.name.as_ref().map_or(false, |member_name| { 425 | member_name.str == name.str && member_name.loc != name.loc 426 | }) 427 | }) 428 | .unwrap(); 429 | return member.clone(); 430 | } 431 | self.error_token("no such member"); 432 | unreachable!() 433 | } 434 | 435 | fn func_params(&mut self, ty: Type) -> Type { 436 | let mut params = Vec::new(); 437 | 438 | while !self.consume(')') { 439 | log::debug!("type_suffix token={:?}", self.token()); 440 | if params.len() > 0 { 441 | self.expect(","); 442 | } 443 | let basety = self.declspec(); 444 | let ty = self.declarator(basety); 445 | params.push(ty); 446 | } 447 | 448 | ty.func_type(params) 449 | } 450 | 451 | fn type_suffix(&mut self, ty: Type) -> Type { 452 | if self.consume("(") { 453 | return self.func_params(ty); 454 | } 455 | 456 | if self.consume('[') { 457 | let sz = self.get_number(); 458 | self.next(); 459 | self.expect(']'); 460 | return self.type_suffix(ty.clone()).array_of(sz); 461 | } 462 | ty 463 | } 464 | 465 | fn declarator(&mut self, ty: Type) -> Type { 466 | let mut ty = ty; 467 | while self.consume('*') { 468 | ty = ty.pointer_to(); 469 | } 470 | 471 | if !matches!(self.token().kind, TokenKind::Ident) { 472 | panic!("expected a variable name, got {:?}", self.token()); 473 | } 474 | 475 | let func_name = self.token().clone(); 476 | self.next(); 477 | log::debug!("declarator token={:?}", self.token()); 478 | let mut ty = self.type_suffix(ty); 479 | ty.name = Some(func_name); 480 | ty 481 | } 482 | 483 | fn declaration(&mut self) -> Node { 484 | let basety = self.declspec(); 485 | let mut body = Vec::new(); 486 | 487 | let mut i = 0; 488 | while !self.consume(';') { 489 | if i > 0 { 490 | self.expect(','); 491 | } 492 | i += 1; 493 | 494 | let ty = self.declarator(basety.clone()); 495 | let lvar = self.add_lvar(ty.clone().name.unwrap().get_ident().unwrap(), ty.clone()); 496 | let lhs = Node::new_node_var(lvar, ty, self.token()); 497 | 498 | if !self.consume('=') { 499 | continue; 500 | } 501 | 502 | let rhs = self.assign(); 503 | let node = Node::new_binary(NodeKind::Assign, lhs, rhs, self.token()); 504 | body.push(Node::new_unary(NodeKind::ExprStmt, node, self.token())); 505 | } 506 | 507 | log::debug!("body={:?}", body); 508 | let node = Node::new_block(body, self.token()); 509 | log::debug!("declaration last token={:?}", self.token()); 510 | node 511 | } 512 | 513 | fn stmt(&mut self) -> Node { 514 | if self.consume("if") { 515 | self.expect('('); 516 | let cond = self.expr(); 517 | self.expect(')'); 518 | let then = self.stmt(); 519 | let mut node = Node::new( 520 | NodeKind::If { 521 | cond: Box::new(cond), 522 | then: Box::new(then), 523 | els: None, 524 | }, 525 | self.token(), 526 | ); 527 | if self.consume("else") { 528 | let els = self.stmt(); 529 | if let NodeKind::If { cond, then, .. } = node.kind { 530 | node.kind = NodeKind::If { 531 | cond: cond, 532 | then: then, 533 | els: Some(Box::new(els)), 534 | }; 535 | } 536 | } 537 | return node; 538 | }; 539 | 540 | if self.consume("while") { 541 | self.expect('('); 542 | let cond = self.expr(); 543 | self.expect(')'); 544 | let then = self.stmt(); 545 | return Node::new( 546 | NodeKind::While { 547 | cond: Box::new(cond), 548 | then: Box::new(then), 549 | }, 550 | self.token(), 551 | ); 552 | }; 553 | 554 | if self.consume("for") { 555 | self.expect('('); 556 | let init = self.expr_stmt(); 557 | self.expect(';'); 558 | let mut cond = None; 559 | let mut inc = None; 560 | 561 | if !self.consume(';') { 562 | cond = Some(self.expr()); 563 | self.expect(';'); 564 | } 565 | 566 | if !self.consume(')') { 567 | inc = Some(self.expr_stmt()); 568 | self.expect(')'); 569 | } 570 | 571 | let then = self.stmt(); 572 | return Node::new( 573 | NodeKind::For { 574 | init: Box::new(init), 575 | cond: cond.map(|c| Box::new(c)), 576 | inc: inc.map(|i| Box::new(i)), 577 | then: Box::new(then), 578 | }, 579 | self.token(), 580 | ); 581 | }; 582 | 583 | if self.consume("return") { 584 | let node = Node::new_unary(NodeKind::Return, self.expr(), self.token()); 585 | self.expect(';'); 586 | return node; 587 | }; 588 | 589 | if self.consume("{") { 590 | return self.compound_stmt(); 591 | } 592 | 593 | let node = self.expr_stmt(); 594 | self.expect(';'); 595 | node 596 | } 597 | 598 | fn compound_stmt(&mut self) -> Node { 599 | let mut body = Vec::new(); 600 | self.enter_scope(); 601 | while !self.consume("}") { 602 | let mut node = if self.is_type_name() { 603 | log::debug!( 604 | "declaration, token={:?}, index={}", 605 | self.token(), 606 | self.index 607 | ); 608 | self.declaration() 609 | } else { 610 | self.stmt() 611 | }; 612 | node.add_type(); 613 | body.push(node); 614 | } 615 | self.leave_scope(); 616 | Node::new_block(body, self.token()) 617 | } 618 | 619 | fn expr_stmt(&mut self) -> Node { 620 | if self.consume(';') { 621 | return Node::new_block(Vec::new(), self.token()); 622 | } 623 | 624 | let node = Node::new_unary(NodeKind::ExprStmt, self.expr(), self.token()); 625 | node 626 | } 627 | 628 | fn add(&mut self) -> Node { 629 | let mut node = self.mul(); 630 | 631 | loop { 632 | if self.consume('+') { 633 | node = Node::new_add(node, self.mul(), self.token()); 634 | } else if self.consume('-') { 635 | node = Node::new_sub(node, self.mul(), self.token()) 636 | } else { 637 | return node; 638 | } 639 | } 640 | } 641 | 642 | fn mul(&mut self) -> Node { 643 | let mut node = self.unary(); 644 | 645 | loop { 646 | if self.consume("*") { 647 | node = Node::new_binary(NodeKind::Mul, node, self.unary(), self.token()); 648 | } else if self.consume("/") { 649 | node = Node::new_binary(NodeKind::Div, node, self.unary(), self.token()); 650 | } else { 651 | return node; 652 | } 653 | } 654 | } 655 | 656 | /// unary = ("+" | "-" | "*" | "&") unary 657 | /// | postfix 658 | fn unary(&mut self) -> Node { 659 | if self.consume('+') { 660 | return self.unary(); 661 | } else if self.consume('-') { 662 | return Node::new_binary( 663 | NodeKind::Sub, 664 | Node::new_node_num(0, self.token()), 665 | self.unary(), 666 | self.token(), 667 | ); 668 | } else if self.consume('&') { 669 | return Node::new_unary(NodeKind::Addr, self.unary(), self.token()); 670 | } else if self.consume('*') { 671 | return Node::new_unary(NodeKind::Deref, self.unary(), self.token()); 672 | } 673 | self.postfix() 674 | } 675 | 676 | fn postfix(&mut self) -> Node { 677 | let mut node = self.primary(); 678 | 679 | loop { 680 | if self.consume('[') { 681 | let idx = self.expr(); 682 | self.expect(']'); 683 | node = Node::new_unary( 684 | NodeKind::Deref, 685 | Node::new_add(node, idx, self.token()), 686 | self.token(), 687 | ); 688 | continue; 689 | } 690 | 691 | if self.consume('.') { 692 | node = self.struct_ref(&mut node); 693 | self.next(); 694 | continue; 695 | } 696 | 697 | return node; 698 | } 699 | } 700 | 701 | fn primary(&mut self) -> Node { 702 | if self.consume('(') { 703 | if self.consume('{') { 704 | let mut body = self.compound_stmt().body().unwrap(); 705 | if let Some(last_node) = body.pop() { 706 | body.push(*last_node.lhs.unwrap()); 707 | } 708 | let node = Node::new( 709 | NodeKind::StmtExpr { 710 | body: Box::new(body), 711 | }, 712 | self.token(), 713 | ); 714 | self.expect(')'); 715 | return node; 716 | } 717 | let node = self.expr(); 718 | self.expect(')'); 719 | return node; 720 | } 721 | 722 | if self.consume("sizeof") { 723 | let mut node = self.unary(); 724 | node.add_type(); 725 | return Node::new_node_num(node.ty.unwrap().size().unwrap(), self.token()); 726 | } 727 | 728 | if let TokenKind::Ident = self.token().kind { 729 | if self.next_equal("(") { 730 | return self.funcall(); 731 | } 732 | 733 | let var = self.find_var(); 734 | let node = match var { 735 | Some(var) => Node::new_node_var(var.clone(), var.ty.clone(), self.token()), 736 | None => panic!( 737 | "undefined variable: {:?}, locals={:?}, global={:?}, scope={:?}", 738 | self.token(), 739 | self.locals, 740 | self.globals, 741 | self.scope 742 | ), 743 | }; 744 | 745 | self.next(); 746 | return node; 747 | } 748 | 749 | if let TokenKind::Str { ty, str } = self.token().clone().kind { 750 | let var = self.new_string_literal(*ty, str); 751 | log::debug!("string literal: {:?}", var); 752 | self.next(); 753 | return Node::new_node_var(var.clone(), var.ty, self.token()); 754 | } 755 | 756 | if let TokenKind::Num(val) = self.token().kind { 757 | let node = Node::new_node_num(val, self.token()); 758 | self.next(); 759 | return node; 760 | } 761 | 762 | panic!("primary: unexpected token {:?}", self.token()); 763 | } 764 | 765 | fn function(&mut self) -> Function { 766 | let ty = self.declspec(); 767 | let ty = self.declarator(ty); 768 | self.locals = LinkedList::new(); 769 | self.enter_scope(); 770 | 771 | if let TypeKind::Func { params, .. } = ty.clone().kind { 772 | let mut func_params = LinkedList::new(); 773 | 774 | log::debug!("function params={:?}", params); 775 | for param in params.iter() { 776 | let lvar = self.add_lvar( 777 | param.clone().name.unwrap().get_ident().unwrap(), 778 | param.clone(), 779 | ); 780 | func_params.push_back(lvar.clone()) 781 | } 782 | log::debug!("function token={:?}", self.token()); 783 | 784 | let name = ty.clone().name.unwrap().get_ident().unwrap(); 785 | log::debug!("function name={:?}", name); 786 | 787 | self.expect('{'); 788 | let function = Function { 789 | name, 790 | body: self.compound_stmt(), 791 | params: func_params, 792 | locals: self.locals.clone(), 793 | stack_size: None, 794 | }; 795 | self.leave_scope(); 796 | return function; 797 | } 798 | unreachable!("ty is not function") 799 | } 800 | 801 | fn funcall(&mut self) -> Node { 802 | let start = self.token().clone(); 803 | self.next(); 804 | self.next(); 805 | let mut args = Vec::new(); 806 | while !self.consume(')') { 807 | if args.len() > 0 { 808 | log::debug!("args len={}", args.len()); 809 | self.expect(','); 810 | } 811 | args.push(self.assign()); 812 | } 813 | log::debug!("tokentokentoken={:?}", self.token()); 814 | Node::new( 815 | NodeKind::FuncCall { 816 | name: start.str, 817 | args, 818 | }, 819 | self.token(), 820 | ) 821 | } 822 | 823 | fn equality(&mut self) -> Node { 824 | let mut node = self.relational(); 825 | 826 | loop { 827 | if self.consume("==") { 828 | node = Node::new_binary(NodeKind::Eq, node, self.relational(), self.token()); 829 | } else if self.consume("!=") { 830 | node = Node::new_binary(NodeKind::Ne, node, self.relational(), self.token()); 831 | } else { 832 | return node; 833 | } 834 | } 835 | } 836 | 837 | fn relational(&mut self) -> Node { 838 | let mut node = self.add(); 839 | 840 | loop { 841 | if self.consume("<") { 842 | node = Node::new_binary(NodeKind::Lt, node, self.add(), self.token()); 843 | } else if self.consume("<=") { 844 | node = Node::new_binary(NodeKind::Le, node, self.add(), self.token()); 845 | } else if self.consume(">") { 846 | node = Node::new_binary(NodeKind::Lt, self.add(), node, self.token()); 847 | } else if self.consume(">=") { 848 | node = Node::new_binary(NodeKind::Le, self.add(), node, self.token()); 849 | } else { 850 | return node; 851 | } 852 | } 853 | } 854 | 855 | fn expect(&mut self, op: impl Into) { 856 | let token = self.token(); 857 | let op = op.into(); 858 | if matches!(token.kind, TokenKind::Keyword) && matches!(token.kind, TokenKind::Punct) 859 | || token.str.to_string() != op 860 | { 861 | self.error_token(format!("expected: `{}`, actual: `{}`", op, token.str)) 862 | } 863 | self.next(); 864 | } 865 | 866 | fn consume(&mut self, op: impl Into) -> bool { 867 | let token = self.token(); 868 | let op = op.into(); 869 | if matches!(token.kind, TokenKind::Keyword) && matches!(token.kind, TokenKind::Punct) 870 | || token.str.to_string() != op 871 | { 872 | return false; 873 | } 874 | self.next(); 875 | true 876 | } 877 | 878 | fn equal(&self, op: impl Into) -> bool { 879 | let token = self.token(); 880 | let op = op.into(); 881 | if !matches!(token.kind, TokenKind::Keyword) && !matches!(token.kind, TokenKind::Punct) 882 | || token.str.to_string() != op 883 | { 884 | return false; 885 | } 886 | true 887 | } 888 | 889 | fn next_equal(&self, op: impl Into) -> bool { 890 | if let Some(token) = self.next_token() { 891 | let op = op.into(); 892 | if !matches!(token.kind, TokenKind::Keyword) && !matches!(token.kind, TokenKind::Punct) 893 | || token.str.to_string() != op 894 | { 895 | return false; 896 | } 897 | } 898 | true 899 | } 900 | 901 | fn is_type_name(&self) -> bool { 902 | self.equal("int") || self.equal("char") || self.equal("struct") 903 | } 904 | 905 | fn error_token(&self, msg: impl Into) { 906 | panic!( 907 | "{}\n{}^ {} {}:{}", 908 | self.tokens 909 | .iter() 910 | .map(|token| token.str.clone()) 911 | .collect::(), 912 | (1..self.index).map(|_| " ").collect::(), 913 | msg.into(), 914 | self.token().line_number, 915 | self.token().loc, 916 | ) 917 | } 918 | } 919 | --------------------------------------------------------------------------------