├── .gitignore ├── Cargo.toml ├── codegen ├── Cargo.toml ├── lib.s └── src │ ├── brute_alloc.rs │ ├── graph_alloc.rs │ ├── lib.rs │ ├── mips.rs │ └── mips_gen.rs ├── common ├── Cargo.toml └── src │ ├── errors.rs │ ├── ignore_result.rs │ ├── indent_printer.rs │ ├── lib.rs │ ├── loc.rs │ ├── op.rs │ └── ref.rs ├── driver ├── Cargo.toml └── src │ ├── cli.rs │ ├── lib.rs │ ├── test.rs │ └── test_util.rs ├── print ├── Cargo.toml └── src │ ├── ast.rs │ ├── lib.rs │ ├── mips.rs │ ├── scope.rs │ └── tac.rs ├── readme.md ├── syntax ├── Cargo.toml └── src │ ├── ast.rs │ ├── lib.rs │ ├── parser.rs │ ├── parser_ll.rs │ ├── symbol.rs │ └── ty.rs ├── tac ├── Cargo.toml └── src │ ├── iter.rs │ └── lib.rs ├── tacgen ├── Cargo.toml └── src │ ├── info.rs │ └── lib.rs ├── tacopt ├── Cargo.toml └── src │ ├── aliveness.rs │ ├── bb.rs │ ├── common_expr.rs │ ├── const_prop.rs │ ├── copy_prop.rs │ ├── flow.rs │ └── lib.rs └── typeck ├── Cargo.toml └── src ├── lib.rs ├── scope_stack.rs ├── symbol_pass.rs └── type_pass.rs /.gitignore: -------------------------------------------------------------------------------- 1 | Cargo.lock 2 | .idea 3 | target 4 | testcase -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | cargo-features = ["profile-overrides"] 2 | 3 | [workspace] 4 | members = [ 5 | "common", 6 | "print", 7 | "syntax", 8 | "typeck", 9 | "tac", 10 | "tacgen", 11 | "tacopt", 12 | "codegen", 13 | "driver", 14 | ] 15 | 16 | [profile.dev.overrides."*"] 17 | opt-level = 3 -------------------------------------------------------------------------------- /codegen/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "codegen" 3 | version = "0.1.0" 4 | authors = ["MashPlant <740678788@qq.com>"] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | common = { path = "../common" } 9 | tac = { path = "../tac" } 10 | tacopt = { path = "../tacopt" } 11 | bitset = { git = "https://github.com/MashPlant/bitset" } -------------------------------------------------------------------------------- /codegen/lib.s: -------------------------------------------------------------------------------- 1 | # below are the runtime libary function for decaf 2 | 3 | .data 4 | _STRING_FT: 5 | .ascii "false" 6 | .byte 0, 0, 0 7 | .asciiz "true" 8 | 9 | .text 10 | .globl _PrintBool 11 | _PrintBool: 12 | sll $a0, $a0, 3 # 0 -> 0, 1 -> 8, index 8 is the begining of "true" 13 | la $a0, _STRING_FT($a0) 14 | li $v0, 4 # print_string 15 | syscall 16 | jr $ra 17 | 18 | .text 19 | .globl _ReadLine 20 | _ReadLine: 21 | li $a0, 64 # allocate space, fixed size 64 22 | li $v0, 9 # sbrk 23 | syscall 24 | move $a0, $v0 25 | li $a1, 64 26 | li $v0, 8 # read_string 27 | syscall 28 | move $v0, $a0 29 | # if there is a '\n', make it a '\0' 30 | _RLLoop: 31 | lb $a1, ($a0) 32 | beqz $a1, _RLDone 33 | addiu $a1, $a1, -10 34 | beqz $a1, _RLDone 35 | addiu $a0, $a0, 1 36 | j _RLLoop 37 | _RLDone: 38 | sb $a1, ($a0) # store '\0' on original '\n' or '\0' 39 | jr $ra 40 | 41 | .text 42 | .globl _StringEqual 43 | _StringEqual: 44 | li $v0, 1 45 | _SELoop: 46 | lb $a2, ($a0) 47 | lb $a3, ($a1) 48 | seq $v0, $a2, $a3 49 | beqz $v0, _SEDone 50 | beqz $a2, _SEDone 51 | addu $a0, $a0, 1 52 | addu $a1, $a1, 1 53 | j _SELoop 54 | _SEDone: 55 | jr $ra -------------------------------------------------------------------------------- /codegen/src/brute_alloc.rs: -------------------------------------------------------------------------------- 1 | use crate::{mips_gen::FuncGen, mips::{regs::*, AsmTemplate::*}, Reg}; 2 | 3 | // the easiest way to do register allocation: no allocation at all 4 | // all virtual registers are stored in stack, when actual registers are needed, they are loaded into at most 3 temporary registers 5 | // note that calling convention is still followed, thanks to the design of PreColored register 6 | 7 | impl FuncGen<'_, '_> { 8 | pub fn brute_alloc(&mut self) { 9 | const READ: [Regs; 3] = [T0, T1, T2]; // only allocate these 3 registers 10 | for idx in 0..self.bb.len() { 11 | let old = std::mem::replace(&mut self.bb[idx].0, Vec::new()); 12 | let mut new = Vec::with_capacity(old.len() * 2); 13 | for mut t in old { 14 | let (mut r, w) = t.rw_mut(); 15 | for (idx, r) in r.iter_mut().enumerate() { 16 | if let Some(r) = r { 17 | if let Reg::Virtual(r1) = r { 18 | let slot = self.find_spill_slot(*r1); 19 | **r = Reg::Allocated(READ[idx] as u32); 20 | new.push(Lw(**r, Reg::PreColored(SP as u32), slot)); 21 | } 22 | } 23 | } 24 | let wb = if let Some(w) = w { 25 | if let Reg::Virtual(w1) = w { 26 | let slot = self.find_spill_slot(*w1); 27 | *w = Reg::Allocated(READ[2] as u32); 28 | Some(Sw(*w, Reg::PreColored(SP as u32), slot)) 29 | } else { None } 30 | } else { None }; 31 | new.push(t); 32 | if let Some(wb) = wb { new.push(wb); } // write back goes after this instruction 33 | } 34 | self.bb[idx].0 = new; 35 | } 36 | } 37 | } -------------------------------------------------------------------------------- /codegen/src/graph_alloc.rs: -------------------------------------------------------------------------------- 1 | // http://www.cse.iitm.ac.in/~krishna/cs6013/george.pdf 2 | 3 | use common::{HashSet, IndexSet}; 4 | use std::marker::PhantomData; 5 | use crate::Reg; 6 | 7 | pub trait AllocCtx: Sized { 8 | // number of registers to allocate 9 | const K: u32; 10 | 11 | // return (initial virtual registers, initial nodes) 12 | // pre-colored and normal registers are indexed in the same way, you can distinguish them by using different number ranges 13 | fn initial(&self) -> (Vec, Vec); 14 | 15 | // build inference graph, this should include building all edges and all possible moves 16 | // to be more specific, implementation should call allocator.add_edge to build edges 17 | // and initialize allocator.work_list_moves, allocator.nodes[..].move_list to build moves 18 | fn build(&self, allocator: &mut Allocator); 19 | 20 | // generate spill related code, no need to build inference graph here, because build() will be called again 21 | fn rewrite(&mut self, spilled_nodes: &HashSet); 22 | 23 | // use result[..].color to replace transform virtual registers in asm into physical registers 24 | fn finish(&mut self, result: &[Node]); 25 | } 26 | 27 | pub struct Node { 28 | pub degree: u32, 29 | pub alias: u32, 30 | pub color: Reg, 31 | pub adj_list: Vec, 32 | pub move_list: Vec<(u32, u32)>, 33 | } 34 | 35 | impl Node { 36 | pub fn new(color: Reg) -> Node { 37 | // pre-colored register's degree is set to a very big value(>= K + number of nodes is ok) 38 | let degree = if let Reg::PreColored(_) = color { std::u32::MAX } else { 0 }; 39 | Node { degree, alias: 0, color, adj_list: Vec::new(), move_list: Vec::new() } 40 | } 41 | 42 | pub fn pre_colored(&self) -> bool { 43 | match self.color { Reg::PreColored(_) => true, _ => false, } 44 | } 45 | 46 | pub fn expect_colored(&self) -> u32 { 47 | match self.color { 48 | Reg::PreColored(r) | Reg::Allocated(r) => r, 49 | Reg::Virtual(r) => panic!("Register allocation not finished yet, now is virtual register {}.", r), 50 | } 51 | } 52 | } 53 | 54 | // some fields the paper mentions are not really necessary, I leave them in comments 55 | // some fields doesn't need to be a set, because only push(guaranteed unique) and iteration are required 56 | // some fields need to be a set and need to record insertion order, use IndexSet 57 | pub struct Allocator { 58 | pub nodes: Vec, 59 | // machine registers, preassigned a color 60 | // pre_colored: HashSet, 61 | // virtual registers, not preassigned a color and not yet processed by the algorithm 62 | initial: Vec, 63 | // list of low-degree non-move-related nodes 64 | simplify_work_list: HashSet, 65 | // low-degree move-related nodes 66 | freeze_work_list: HashSet, 67 | // high-degree nodes 68 | spill_work_list: HashSet, 69 | // nodes marked for spilling during this round; initially empty 70 | spilled_nodes: HashSet, 71 | // registers that have been coalesced; 72 | // when the move u = v is coalesced, one of u or v is added to this set, and the other is put back on some work list 73 | coalesced_nodes: HashSet, 74 | // nodes successfully colored 75 | // colored_nodes: Vec, 76 | // stack containing temporaries removed from the graph 77 | select_stack: IndexSet, 78 | // moves that have been coalesced 79 | // coalesced_moves: HashSet<(u32, u32)>, 80 | // moves whose source and target interfere 81 | // constrained_moves: HashSet<(u32, u32)>, 82 | // moves that will no longer be considered for coalescing 83 | // frozen_moves: HashSet<(u32, u32)>, 84 | // moves enabled for possible coalescing 85 | pub work_list_moves: HashSet<(u32, u32)>, 86 | // moves not yet ready for coalescing 87 | active_moves: HashSet<(u32, u32)>, 88 | adj_set: HashSet<(u32, u32)>, 89 | _p: PhantomData, 90 | } 91 | 92 | impl Allocator { 93 | pub fn work(ctx: &mut A) { 94 | // unluckily cannot use #[derive(Default)] because A may not be Default, even though PhantomData is 95 | // I still don't know why rust has such a requirement 96 | let mut a = Allocator { nodes: Vec::new(), initial: Vec::new(), simplify_work_list: HashSet::new(), freeze_work_list: HashSet::new(), spill_work_list: HashSet::new(), spilled_nodes: HashSet::new(), coalesced_nodes: HashSet::new(), select_stack: IndexSet::default(), work_list_moves: HashSet::new(), active_moves: HashSet::new(), adj_set: HashSet::new(), _p: PhantomData }; 97 | // actually no information in `a` is preserved for the next loop 98 | // because in this simple variant of this algo, all coalesces are discarded if spill happens 99 | // so the only reason for creating `a` outside the loop is to reuse some memory 100 | // should remember to clear all fields after each iteration step (`initial` and `nodes` doesn't have to be cleared because they will be reassigned) 101 | let nodes = loop { 102 | let (initial, nodes) = ctx.initial(); 103 | a.initial = initial; 104 | a.nodes = nodes; 105 | ctx.build(&mut a); 106 | a.mk_work_list(); 107 | loop { 108 | match () { // just to avoid many if-else 109 | _ if !a.simplify_work_list.is_empty() => a.simplify(), 110 | _ if !a.work_list_moves.is_empty() => a.coalesce(), 111 | _ if !a.freeze_work_list.is_empty() => a.freeze(), 112 | _ if !a.spill_work_list.is_empty() => a.select_spill(), 113 | _ => break, 114 | } 115 | } 116 | a.assign_color(); 117 | if !a.spilled_nodes.is_empty() { 118 | a.rewrite_program(ctx); 119 | } else { break a.nodes; } 120 | }; 121 | ctx.finish(&nodes); 122 | } 123 | 124 | pub fn add_edge(&mut self, u: u32, v: u32) { 125 | if u != v && !self.adj_set.contains(&(u, v)) { 126 | self.adj_set.insert((u, v)); 127 | self.adj_set.insert((v, u)); 128 | let (u, v) = (u as usize, v as usize); 129 | // pre colored register can be the dest of edge, but not the src(or it's adj_list may be too big) 130 | // its degree will not grow, but can decrease starting from std::u32::MAX(still won't have any effect, can never have a degree < K) 131 | if !self.nodes[u].pre_colored() { 132 | self.nodes[u].adj_list.push(v as u32); 133 | self.nodes[u].degree += 1; 134 | } 135 | if !self.nodes[v].pre_colored() { 136 | self.nodes[v].adj_list.push(u as u32); 137 | self.nodes[v].degree += 1; 138 | } 139 | } 140 | } 141 | 142 | // the paper defines many functions that return a set of nodes, we don't really need to allocate space for a set, using an iterator is better 143 | // however rust's lifetime requirement almost make it impossible to define such functions that return an iterator 144 | // because it must borrow self as a whole, so you can't modify any other fields, even though they are not involved in this iterator 145 | // the solution is to inline these functions manually, then rustc knows that it will borrows some fields of self 146 | 147 | fn mk_work_list(&mut self) { 148 | unimplemented!() 149 | } 150 | 151 | fn simplify(&mut self) { 152 | unimplemented!() 153 | } 154 | 155 | fn coalesce(&mut self) { 156 | unimplemented!() 157 | } 158 | 159 | fn get_alias(&self, mut _n: u32) -> u32 { 160 | unimplemented!() 161 | } 162 | 163 | fn freeze(&mut self) { 164 | unimplemented!() 165 | } 166 | 167 | fn select_spill(&mut self) { 168 | unimplemented!() 169 | } 170 | 171 | fn assign_color(&mut self) { 172 | let mut available = HashSet::with_capacity(A::K as usize); 173 | for &n in self.select_stack.iter().rev() { // pop all, need to traverse reversely 174 | available.clear(); 175 | for i in 0..A::K { available.insert(i); } 176 | for &w in &self.nodes[n as usize].adj_list { 177 | let a = self.get_alias(w); 178 | match self.nodes[a as usize].color { 179 | Reg::PreColored(r) | Reg::Allocated(r) => { available.remove(&r); } 180 | Reg::Virtual(_) => {} 181 | }; 182 | } 183 | // PreColored nodes should never be added to select_stack 184 | // so this color assignment will not give a PreColored node a wrong color 185 | if let Some(r) = available.iter().nth(0) { 186 | self.nodes[n as usize].color = Reg::Allocated(*r); 187 | } else { 188 | self.spilled_nodes.insert(n); 189 | } 190 | } 191 | self.select_stack.clear(); 192 | for &n in &self.coalesced_nodes { 193 | self.nodes[n as usize].color = self.nodes[self.get_alias(n) as usize].color; 194 | } 195 | } 196 | 197 | fn rewrite_program(&mut self, ctx: &mut A) { 198 | ctx.rewrite(&self.spilled_nodes); 199 | self.spilled_nodes.clear(); 200 | self.coalesced_nodes.clear(); 201 | self.active_moves.clear(); 202 | self.adj_set.clear(); 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /codegen/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod mips; 2 | pub mod mips_gen; 3 | pub mod graph_alloc; 4 | pub mod brute_alloc; 5 | 6 | pub enum AllocMethod { Graph, Brute } 7 | 8 | // PreColored/Allocated's values both mean machine register number 9 | #[derive(Copy, Clone, Eq, PartialEq)] 10 | pub enum Reg { 11 | PreColored(u32), 12 | Allocated(u32), 13 | Virtual(u32), 14 | } 15 | 16 | impl Reg { 17 | // no matter what kind of reg it is, this function just return its id, although their meaning may be different 18 | pub fn id(self) -> u32 { 19 | match self { Reg::PreColored(r) => r, Reg::Allocated(r) => r, Reg::Virtual(r) => r } 20 | } 21 | } -------------------------------------------------------------------------------- /codegen/src/mips.rs: -------------------------------------------------------------------------------- 1 | use common::{BinOp, IgnoreResult, UnOp}; 2 | use std::fmt; 3 | use crate::Reg; 4 | 5 | pub mod regs { 6 | use std::ops::RangeInclusive; 7 | 8 | // some special registers: 9 | // $fp: we use it like other callee saved register 10 | // $ra: we use it like a mix of caller & callee saved register 11 | // - a function may need to store/recover its value in prologue/epilogue(like callee saved) 12 | // - it's value is not preserved during function call(like caller saved) 13 | // we choose to place it at CALLEE_SAVE, but mark it as `w` in function call inst 14 | // $zero, $at, $k0, $k1, $gp, $sp: we don't use them in register allocation 15 | 16 | // the order doesn't really matter, for convenience, the 26 registers for allocation are placed at 0..26 17 | #[derive(Copy, Clone, Eq, PartialEq)] 18 | pub enum Regs { V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, S0, S1, S2, S3, S4, S5, S6, S7, FP, RA, /* <-allocable | unallocable-> */ ZERO, AT, K0, K1, GP, SP } 19 | 20 | pub use Regs::*; 21 | 22 | pub const REG_N: u32 = 32; 23 | pub const NAME: [&str; 32] = ["$v0", "$v1", "$a0", "$a1", "$a2", "$a3", "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", "$t9", "$s0", "$s1", "$s2", "$s3", "$s4", "$s5", "$s6", "$s7", "$fp", "$ra", "$zero", "$at", "$k0", "$k1", "$gp", "$sp", ]; 24 | pub const ALLOC: RangeInclusive = V0 as u32..=RA as u32; 25 | pub const ALLOC_N: u32 = 26; 26 | pub const ARG: RangeInclusive = A0 as u32..=A3 as u32; 27 | pub const ARG_N: u32 = 4; 28 | pub const CALLER_SAVE: RangeInclusive = V0 as u32..=T9 as u32; 29 | pub const CALLEE_SAVE: RangeInclusive = S0 as u32..=RA as u32; 30 | } 31 | 32 | // only the syscalls that we used directly in codegen are listed here 33 | // e.g.: ReadString is handled by a library function, so it not listed here 34 | #[derive(Copy, Clone)] 35 | pub enum SysCall { PrintInt = 1, PrintString = 4, ReadInt = 5, Sbrk = 9, Exit = 10 } 36 | 37 | pub const WORD_SIZE: i32 = 4; 38 | 39 | pub enum AsmTemplate { 40 | Bin(BinOp, Reg, Reg, Reg), 41 | BinI(BinOp, Reg, Reg, Imm), 42 | Un(UnOp, Reg, Reg), 43 | Mv(Reg, Reg), 44 | Jal(String), 45 | Jalr(Reg), 46 | J(String), 47 | // expanded to jr $ra, since we don't have any other usage of jr, I just use ret 48 | Ret, 49 | B(String, Reg, bool /* z */), 50 | Lw(Reg /* dst */, Reg /* base */, Imm), 51 | Sw(Reg /* src */, Reg /* base */, Imm), 52 | Li(Reg, Imm), 53 | La(Reg, String), 54 | Label(String), 55 | SysCall(SysCall), 56 | } 57 | 58 | #[derive(Copy, Clone, Eq, PartialEq)] 59 | pub enum Imm { 60 | Int(i32), 61 | // `Tag` is used as a placeholder for undecided immediate value 62 | Tag(u32), 63 | } 64 | 65 | impl AsmTemplate { 66 | // clear `r` and `w`, put all registers it reads into `r`, put all registers it writes into `w` 67 | pub fn rw(&self, r: &mut Vec, w: &mut Vec) { 68 | use AsmTemplate::*; 69 | r.clear(); 70 | w.clear(); 71 | match *self { 72 | Bin(_, w1, r1, r2) => { 73 | r.push(r1); 74 | r.push(r2); 75 | w.push(w1); 76 | } 77 | BinI(_, w1, r1, _) | Un(_, w1, r1) | Mv(w1, r1) | Lw(w1, r1, _) => { 78 | r.push(r1); 79 | w.push(w1); 80 | } 81 | Jal(_) | Jalr(_) => { 82 | for a in regs::ARG { r.push(Reg::PreColored(a as u32)); } 83 | for crs in regs::CALLER_SAVE { w.push(Reg::PreColored(crs as u32)); } 84 | w.push(Reg::PreColored(regs::RA as u32)); 85 | if let Jalr(r1) = *self { r.push(r1); } 86 | } 87 | J(_) | Label(_) => {} 88 | Ret => { 89 | for ces in regs::CALLEE_SAVE { r.push(Reg::PreColored(ces as u32)); } 90 | r.push(Reg::PreColored(regs::V0 as u32)); 91 | r.push(Reg::PreColored(regs::V1 as u32)); // though we don't use v1 now, maybe we can extend it in the future? 92 | } 93 | B(_, r1, _) => r.push(r1), 94 | Sw(r1, r2, _) => { 95 | r.push(r1); 96 | r.push(r2); 97 | } 98 | Li(w1, _) | La(w1, _) => w.push(w1), 99 | SysCall(_) => { 100 | // syscall doesn't changed any (allocable) register except V0 for ret value 101 | r.push(Reg::PreColored(regs::A0 as u32)); // now we use A0 at most 102 | r.push(Reg::PreColored(regs::V0 as u32)); // syscall id 103 | w.push(Reg::PreColored(regs::V0 as u32)); // syscall ret value 104 | } 105 | }; 106 | } 107 | 108 | // different from rw(), the pre-colored registers are not taken into consideration(because you can't spill them) 109 | // so the max size of the return value is fixed and doesn't need a Vec to store 110 | pub fn rw_mut(&mut self) -> ([Option<&mut Reg>; 2], Option<&mut Reg>) { 111 | use AsmTemplate::*; 112 | match self { 113 | Bin(_, w1, r1, r2) => ([Some(r1), Some(r2)], Some(w1)), 114 | BinI(_, w1, r1, _) | Un(_, w1, r1) | Mv(w1, r1) | Lw(w1, r1, _) => 115 | ([Some(r1), None], Some(w1)), 116 | Jal(_) | J(_) | Label(_) | Ret => ([None, None], None), 117 | Jalr(r1) | B(_, r1, _) => ([Some(r1), None], None), 118 | Sw(r1, r2, _) => ([Some(r1), Some(r2)], None), 119 | Li(w1, _) | La(w1, _) => ([None, None], Some(w1)), 120 | SysCall(_) => ([None, None], None) 121 | } 122 | } 123 | 124 | pub fn imm_mut(&mut self) -> Option<&mut Imm> { 125 | use AsmTemplate::*; 126 | match self { 127 | BinI(_, _, _, i) | Lw(_, _, i) | Sw(_, _, i) | Li(_, i) => Some(i), _ => None 128 | } 129 | } 130 | 131 | // filter useless asm using some simple rules 132 | pub fn useless(&self) -> bool { 133 | match *self { 134 | AsmTemplate::BinI(op, d, l, r) if d.id() == l.id() => match op { 135 | // And is bitwise and, but it can only be applied to bool in decaf, so And 1 is nop 136 | BinOp::Add | BinOp::Sub | BinOp::Or if r == Imm::Int(0) => true, 137 | BinOp::Mul | BinOp::Div | BinOp::And if r == Imm::Int(1) => true, 138 | _ => false 139 | } 140 | AsmTemplate::Mv(w, r) if w.id() == r.id() => true, 141 | _ => false, 142 | } 143 | } 144 | } 145 | 146 | 147 | impl fmt::Debug for Imm { 148 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { 149 | match self { Imm::Int(i) => write!(f, "{}", i), Imm::Tag(i) => write!(f, "_I{}", i), } 150 | } 151 | } 152 | 153 | impl fmt::Debug for Reg { 154 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { 155 | match self { 156 | Reg::PreColored(r) | Reg::Allocated(r) => write!(f, "{}", regs::NAME[*r as usize]), 157 | Reg::Virtual(r) => write!(f, "_R{}", r), 158 | } 159 | } 160 | } 161 | 162 | impl fmt::Debug for AsmTemplate { 163 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { 164 | use AsmTemplate::*; 165 | match self { 166 | Bin(op, w1, r1, r2) => write!(f, "{} {:?}, {:?}, {:?}", bin_str(*op), w1, r1, r2), 167 | BinI(op, w1, r1, i) => write!(f, "{} {:?}, {:?}, {:?}", bin_str(*op), w1, r1, i), 168 | Un(op, w1, r1) => write!(f, "{} {:?}, {:?}", un_str(*op), w1, r1), 169 | Mv(w1, r1) => write!(f, "move {:?}, {:?}", w1, r1), 170 | Jal(l) => write!(f, "jal {}", l), 171 | Jalr(r1) => write!(f, "jalr {:?}", r1), 172 | J(l) => write!(f, "j {}", l), 173 | Ret => write!(f, "jr $ra"), 174 | B(l, r1, z) => write!(f, "{} {:?}, {}", if *z { "beqz" } else { "bnez" }, r1, l), 175 | Lw(w1, r1, i) => write!(f, "lw {:?}, {:?}({:?})", w1, i, r1), 176 | Sw(r1, r2, i) => write!(f, "sw {:?}, {:?}({:?})", r1, i, r2), 177 | Li(r1, i) => write!(f, "li {:?}, {:?}", r1, i), 178 | La(r1, a) => write!(f, "la {:?}, {}", r1, a), 179 | Label(l) => write!(f, "{}", l), 180 | SysCall(id) => { 181 | writeln!(f, "li $v0, {}", *id as u32).ignore(); 182 | write!(f, "syscall") 183 | } 184 | } 185 | } 186 | } 187 | 188 | // we will output a lot of pseudo mips instructions, and depend on assembler or simulator to translate these pseudo instructions 189 | pub fn bin_str(op: BinOp) -> &'static str { 190 | use BinOp::*; 191 | match op { Add => "addu", Sub => "subu", Mul => "mul", Div => "div", Mod => "rem", And => "and", Or => "or", Eq => "seq", Ne => "sne", Lt => "slt", Le => "sle", Gt => "sgt", Ge => "sge" } 192 | } 193 | 194 | pub fn un_str(op: UnOp) -> &'static str { 195 | match op { UnOp::Neg => "neg", UnOp::Not => "not" } 196 | } -------------------------------------------------------------------------------- /codegen/src/mips_gen.rs: -------------------------------------------------------------------------------- 1 | use crate::{graph_alloc::*, mips::{*, regs::*}, Reg, AllocMethod}; 2 | use tacopt::{bb::{FuncBB, NextKind}, flow::{Flow, Or, FlowElem}}; 3 | use tac::{Tac, TacProgram, Operand, CallKind, Intrinsic}; 4 | use common::{HashSet, HashMap, BinOp}; 5 | use bitset::traits::*; 6 | 7 | pub struct FuncGen<'a, 'b> { 8 | pub(crate) param_num: u32, 9 | pub(crate) reg_num: u32, 10 | // for functions that this function calls, not the parameter of this function (which is `param_num`) 11 | pub(crate) ch_param_num: u32, 12 | pub(crate) name: &'b str, 13 | pub(crate) program: &'b TacProgram<'a>, 14 | // we do need to insert in the SomeContainer, but rust's LinkedList's api is so limited 15 | // and we do not need arbitrary insertion/deletion, so a Vec will be enough 16 | pub(crate) bb: Vec<(Vec, [Option; 2])>, 17 | // map virtual reg's id to its offset from $sp 18 | pub(crate) spill2slot: HashMap, 19 | } 20 | 21 | // all virtual register's id >= REG_N, all pre-colored or allocated register's id < REG_N, id can be the index in Allocator::nodes 22 | // m for machine, v for virtual 23 | fn mreg(r: Regs) -> Reg { Reg::PreColored(r as u32) } 24 | 25 | fn vreg(r: u32) -> Reg { Reg::Virtual(r + REG_N) } 26 | 27 | impl AllocCtx for FuncGen<'_, '_> { 28 | const K: u32 = ALLOC_N; 29 | 30 | fn initial(&self) -> (Vec, Vec) { 31 | // there are only ALLOC_N registers to allocate, but there are REG_N pre-colored nodes 32 | // (by definition, a machine register <=> a pre-colored node) 33 | ((REG_N..self.reg_num + REG_N).collect(), (0..self.reg_num + REG_N).map(|r| if r < REG_N { 34 | Node::new(Reg::PreColored(r)) 35 | } else { 36 | Node::new(Reg::Virtual(r)) 37 | }).collect()) 38 | } 39 | 40 | fn build(&self, allocator: &mut Allocator) { 41 | let mut aliveness_flow = self.analyze(); 42 | let each = aliveness_flow.each(); 43 | let FlowElem { in_: out, .. } = aliveness_flow.split(); 44 | let (mut r, mut w) = (Vec::new(), Vec::new()); 45 | for (off, b) in self.bb.iter().enumerate().map(|b| (b.0 * each, &(b.1).0)) { 46 | let live = &mut out[off..off + each]; 47 | for t in b.iter().rev() { 48 | if let &AsmTemplate::Mv(w1, r1) = t { 49 | let (w1, r1) = (w1.id(), r1.id()); 50 | if Self::involved_in_alloc(w1) && Self::involved_in_alloc(r1) { 51 | live.bsdel(r1); 52 | allocator.nodes[w1 as usize].move_list.push((w1, r1)); 53 | allocator.nodes[r1 as usize].move_list.push((w1, r1)); 54 | allocator.work_list_moves.insert((w1, r1)); 55 | } 56 | } 57 | t.rw(&mut r, &mut w); 58 | for w in w.iter().copied().map(Reg::id) { 59 | for l in live.bsones() { 60 | if Self::involved_in_alloc(w) && Self::involved_in_alloc(l) { 61 | allocator.add_edge(w, l); 62 | } 63 | } 64 | } 65 | w.iter().copied().map(Reg::id).for_each(|w| live.bsdel(w)); 66 | r.iter().copied().map(Reg::id).for_each(|r| live.bsset(r)); 67 | } 68 | } 69 | } 70 | 71 | fn rewrite(&mut self, spilled_nodes: &HashSet) { 72 | for idx in 0..self.bb.len() { 73 | let old = std::mem::replace(&mut self.bb[idx].0, Vec::new()); 74 | let mut new = Vec::with_capacity(old.len() * 2); 75 | for t in old { 76 | match t { 77 | // if this inst is move, rewriting it can be simplified if at least one of the operands is not spilled 78 | AsmTemplate::Mv(w1, r1) => { 79 | let (w1, r1) = (w1.id(), r1.id()); 80 | match (spilled_nodes.contains(&w1), spilled_nodes.contains(&r1)) { 81 | (true, true) => self.do_rewrite(t, spilled_nodes, &mut new), 82 | (false, true) => { 83 | let slot = self.find_spill_slot(r1); 84 | new.push(AsmTemplate::Lw(Reg::Virtual(w1), mreg(SP), slot)); 85 | } 86 | (true, false) => { 87 | let slot = self.find_spill_slot(w1); 88 | new.push(AsmTemplate::Sw(Reg::Virtual(r1), mreg(SP), slot)); 89 | } 90 | (false, false) => new.push(t), 91 | } 92 | } 93 | t => self.do_rewrite(t, spilled_nodes, &mut new), 94 | } 95 | } 96 | self.bb[idx].0 = new; 97 | } 98 | } 99 | 100 | fn finish(&mut self, result: &[Node]) { 101 | for (b, _) in &mut self.bb { 102 | for t in b { 103 | let (mut r, w) = t.rw_mut(); 104 | for r in r.iter_mut() { 105 | if let Some(r) = r { 106 | if let Reg::Virtual(r1) = **r { 107 | **r = Reg::Allocated(result[r1 as usize].expect_colored()); 108 | } 109 | } 110 | } 111 | if let Some(w) = w { 112 | if let Reg::Virtual(w1) = *w { 113 | *w = Reg::Allocated(result[w1 as usize].expect_colored()); 114 | } 115 | } 116 | } 117 | } 118 | } 119 | } 120 | 121 | impl<'a: 'b, 'b> FuncGen<'a, 'b> { 122 | pub fn work(f: &'b FuncBB<'a>, p: &'b TacProgram<'a>, m: AllocMethod) -> Vec { 123 | // reg_num is not inced by K, and new_reg() doesn't either, so all usage of virtual register id need to inc K 124 | // including those using f's inst and those generated to meet calling convention 125 | let mut fu = FuncGen { param_num: f.param_num, reg_num: f.reg_num, ch_param_num: 0, name: &f.name, program: p, bb: Vec::new(), spill2slot: HashMap::new() }; 126 | fu.populate(f); 127 | match m { AllocMethod::Graph => Allocator::work(&mut fu), AllocMethod::Brute => fu.brute_alloc() } 128 | fu.fill_imm_tag(); 129 | fu.bb.into_iter() 130 | .flat_map(|(b, _)| b.into_iter()) 131 | .filter(|asm| !asm.useless()) 132 | .collect() 133 | } 134 | 135 | // for all virtual registers in f, inc it by REG_N before adding to self 136 | fn populate(&mut self, f: &FuncBB<'a>) { 137 | let (pro, epi) = self.build_prologue_epilogue(); 138 | self.bb = vec![(pro, [Some(1), None])]; 139 | for (idx, b1) in f.bb.iter().enumerate() { 140 | let mut b2 = Vec::new(); 141 | if !(b1.prev.is_empty() || (b1.prev.len() == 1 && b1.prev[0] + 1 == idx as u32)) { 142 | b2.push(AsmTemplate::Label(format!("{}_L{}:", self.name, idx + 1))); 143 | } 144 | let mut arg_num = 0; 145 | for t in b1.iter() { 146 | self.select_inst(t.tac.get(), &mut b2, &mut arg_num); 147 | } 148 | // generate ret/jmp/..., and return the `next` by the way 149 | let next = self.build_next(idx as u32, f.bb.len() as u32 + 1, b1.next, &mut b2); 150 | self.bb.push((b2, next)); 151 | } 152 | self.bb.push((epi, [None, None])); 153 | } 154 | 155 | // prologue: 156 | // 1. adjust $sp to leave enough space for spilling 157 | // 2. move function arguments to virtual registers representing function arguments 158 | // 3. save all callee-saved registers ($sp is not included) 159 | // epilogue: 160 | // 1. restore all callee-saved registers ($sp is not included) 161 | // 2. adjust $sp back 162 | // 3. do return (jr $ra) 163 | fn build_prologue_epilogue(&mut self) -> (Vec, Vec) { 164 | use AsmTemplate::*; 165 | let (mut pro, mut epi) = (Vec::new(), Vec::new()); 166 | pro.push(BinI(BinOp::Sub, mreg(SP), mreg(SP), Imm::Tag(0))); 167 | // f use %i for the ith argument 168 | for i in 0..self.param_num { 169 | match ARG.nth(i as usize) { 170 | Some(a) => pro.push(AsmTemplate::Mv(vreg(i), Reg::PreColored(a))), 171 | None => pro.push(AsmTemplate::Lw(vreg(i), mreg(SP), Imm::Tag(i))), 172 | } 173 | } 174 | // Tac::Ret should mv return value(if any) to v0 and jmp here 175 | epi.push(Label(format!("{}_Ret:", self.name))); 176 | for ces in CALLEE_SAVE { 177 | let tmp = self.new_reg(); 178 | pro.push(Mv(vreg(tmp), Reg::PreColored(ces))); 179 | epi.push(Mv(Reg::PreColored(ces), vreg(tmp))); 180 | } 181 | epi.push(BinI(BinOp::Add, mreg(SP), mreg(SP), Imm::Tag(0))); 182 | epi.push(Ret); 183 | (pro, epi) 184 | } 185 | } 186 | 187 | impl<'a: 'b, 'b> FuncGen<'a, 'b> { 188 | // 0..Self::K : physical allocable registers 189 | // Self::K..REG_N : physical unallocable registers, though they are unallocable, they may still be used in some insts 190 | // REG_N : virtual registers 191 | fn involved_in_alloc(r: u32) -> bool { 192 | r < Self::K /* an allocatable machine register */ || r >= REG_N /* an virtual register */ 193 | } 194 | 195 | fn new_reg(&mut self) -> u32 { (self.reg_num, self.reg_num += 1).0 } 196 | 197 | // find a unique slot on the stack for `vreg` to spill 198 | // assume `vreg` >= REG_N, i.e., it is a legal virtual register id 199 | pub(crate) fn find_spill_slot(&mut self, vreg: u32) -> Imm { 200 | let vreg = vreg - REG_N; 201 | if vreg < self.param_num { // function arguments already have places to spill 202 | Imm::Tag(vreg) 203 | } else { 204 | let new_slot = (self.spill2slot.len() as i32 + self.ch_param_num as i32) * WORD_SIZE; 205 | Imm::Int(*self.spill2slot.entry(vreg).or_insert(new_slot)) 206 | } 207 | } 208 | 209 | fn fill_imm_tag(&mut self) { 210 | let self_stack = (self.spill2slot.len() as i32 + self.ch_param_num as i32) * WORD_SIZE; 211 | for (b, _) in &mut self.bb { 212 | for t in b { 213 | if let Some(imm) = t.imm_mut() { 214 | if let Imm::Tag(t) = *imm { 215 | // there are 3 places uses Imm::Tag, all can use the same way to compute 216 | // 1. $sp -= _ in prologue, tag = 0 217 | // 2. $sp += _ in epilogue, tag = 0 218 | // 3. the offset of arguments of this function on stack, where tag = t for t_th(0 based index) argument 219 | *imm = Imm::Int(self_stack + t as i32 * WORD_SIZE); 220 | } 221 | } 222 | } 223 | } 224 | } 225 | 226 | // do register spilling 227 | // add memory read before inst, add memory write after inst, if necessary 228 | fn do_rewrite(&mut self, mut t: AsmTemplate, spilled_nodes: &HashSet, new: &mut Vec) { 229 | let (mut r, w) = t.rw_mut(); 230 | for r in r.iter_mut() { 231 | if let Some(Reg::Virtual(r)) = r { 232 | if spilled_nodes.contains(r) { 233 | let slot = self.find_spill_slot(*r); 234 | *r = self.new_reg() + REG_N; 235 | new.push(AsmTemplate::Lw(Reg::Virtual(*r), mreg(SP), slot)); 236 | } 237 | } 238 | } 239 | match w { 240 | Some(Reg::Virtual(w)) if spilled_nodes.contains(w) => { 241 | let slot = self.find_spill_slot(*w); 242 | *w = self.new_reg() + REG_N; 243 | let w = *w; 244 | new.push(t); 245 | new.push(AsmTemplate::Sw(Reg::Virtual(w), mreg(SP), slot)); 246 | } 247 | _ => new.push(t), 248 | } 249 | } 250 | } 251 | 252 | impl FuncGen<'_, '_> { 253 | fn analyze(&self) -> Flow { 254 | let mut aliveness_flow = Flow::::new(self.bb.len(), (self.reg_num + REG_N) as usize); 255 | let each = aliveness_flow.each(); 256 | let FlowElem { gen: use_, kill: def, .. } = aliveness_flow.split(); 257 | for (idx, b) in self.bb.iter().enumerate() { 258 | let off = idx * each; 259 | Self::compute_use_def(&b.0, &mut use_[off..off + each], &mut def[off..off + each]); 260 | } 261 | aliveness_flow.solve(self.bb.iter().enumerate().map(|b| (b.0, (b.1).1.iter().filter(|n| n.is_some()).map(|n| n.unwrap() as usize)))); 262 | aliveness_flow 263 | } 264 | 265 | fn compute_use_def(b: &[AsmTemplate], use_: &mut [u32], def: &mut [u32]) { 266 | let (mut r, mut w) = (Vec::new(), Vec::new()); 267 | for t in b.iter().rev() { 268 | t.rw(&mut r, &mut w); 269 | w.iter().copied().map(Reg::id).for_each(|w| { 270 | def.bsset(w); 271 | use_.bsdel(w); 272 | }); 273 | r.iter().copied().map(Reg::id).for_each(|r| { 274 | use_.bsset(r); 275 | def.bsdel(r); 276 | }); 277 | } 278 | } 279 | } 280 | 281 | impl FuncGen<'_, '_> { 282 | fn select_inst(&mut self, t: Tac, b: &mut Vec, arg_num: &mut u32) { 283 | use AsmTemplate::*; 284 | match t { 285 | Tac::Bin { op, dst, lr } => { 286 | match lr { 287 | [Operand::Const(l), Operand::Const(r)] => b.push(Li(vreg(dst), Imm::Int(op.eval(l, r)))), 288 | [Operand::Reg(l), Operand::Const(r)] => b.push(BinI(op, vreg(dst), vreg(l), Imm::Int(r))), 289 | [Operand::Const(l), Operand::Reg(r)] => if let Some(inv) = op.invert() { 290 | b.push(BinI(inv, vreg(dst), vreg(r), Imm::Int(l))) 291 | } else { 292 | let tmp = self.build_operand(Operand::Const(l), b); 293 | b.push(Bin(op, vreg(dst), tmp, vreg(r))); 294 | } 295 | [Operand::Reg(l), Operand::Reg(r)] => b.push(Bin(op, vreg(dst), vreg(l), vreg(r))) 296 | } 297 | } 298 | Tac::Un { op, dst, r } => match r[0] { 299 | Operand::Const(r) => b.push(Li(vreg(dst), Imm::Int(op.eval(r)))), 300 | Operand::Reg(r) => b.push(Un(op, vreg(dst), vreg(r))), 301 | } 302 | Tac::Assign { dst, src } => self.build_mv(vreg(dst), src[0], b), 303 | Tac::Param { src } => { 304 | let src = self.build_operand(src[0], b); 305 | match ARG.nth(*arg_num as usize) { 306 | Some(a) => b.push(Mv(Reg::PreColored(a), src)), 307 | None => b.push(Sw(src, mreg(SP), Imm::Int(*arg_num as i32 * WORD_SIZE))), 308 | } 309 | *arg_num += 1; 310 | } 311 | Tac::Call { dst, kind } => { 312 | let called = match kind { 313 | CallKind::Virtual(r, _) => { 314 | let r = self.build_operand(r[0], b); 315 | b.push(Jalr(r)); 316 | true 317 | } 318 | CallKind::Static(f, _) => { 319 | b.push(Jal(self.program.func[f as usize].name.clone())); 320 | true 321 | } 322 | CallKind::Intrinsic(i) => self.build_intrinsic(i, b), 323 | }; 324 | if called { 325 | // once it is really a function call, ch_param_num should grows from 4 326 | // because calling convention says the first 4 argument should have their slots on the stack 327 | self.ch_param_num = self.ch_param_num.max(*arg_num).max(4); 328 | } 329 | *arg_num = 0; 330 | if let Some(dst) = dst { b.push(Mv(vreg(dst), mreg(V0))); } 331 | } 332 | Tac::Load { dst, base, off, .. } => { 333 | let base = self.build_operand(base[0], b); 334 | b.push(Lw(vreg(dst), base, Imm::Int(off))); 335 | } 336 | Tac::Store { src_base, off, .. } => { 337 | let (src, base) = (self.build_operand(src_base[0], b), self.build_operand(src_base[1], b)); 338 | b.push(Sw(src, base, Imm::Int(off))); 339 | } 340 | Tac::LoadStr { dst, s } => b.push(AsmTemplate::La(vreg(dst), format!("_STRING{}", s))), 341 | Tac::LoadVTbl { dst, v } => b.push(AsmTemplate::La(vreg(dst), format!("_{}", self.program.vtbl[v as usize].class))), 342 | Tac::LoadFunc { dst, f } => b.push(AsmTemplate::La(vreg(dst), self.program.func[f as usize].name.clone())), 343 | Tac::Label { .. } | Tac::Ret { .. } | Tac::Jmp { .. } | Tac::Jif { .. } => unreachable!("Shouldn't meet Ret/Jmp/Jif/Label in a tac bb."), 344 | } 345 | } 346 | 347 | // the returned reg can only be used for read 348 | fn build_operand(&mut self, src: Operand, b: &mut Vec) -> Reg { 349 | match src { 350 | Operand::Reg(r) => vreg(r), 351 | Operand::Const(c) => if c == 0 { mreg(ZERO) } else { 352 | let new = vreg(self.new_reg()); 353 | b.push(AsmTemplate::Li(new, Imm::Int(c))); 354 | new 355 | } 356 | } 357 | } 358 | 359 | fn build_mv(&self, dst: Reg, src: Operand, b: &mut Vec) { 360 | match src { 361 | Operand::Reg(r) => b.push(AsmTemplate::Mv(dst, vreg(r))), 362 | Operand::Const(c) => b.push(AsmTemplate::Li(dst, Imm::Int(c))), 363 | } 364 | } 365 | 366 | // some intrinsic functions can be translated to syscall directly (of course it is not efficient, but it is easy to implement) 367 | // return true if a real function call is generated 368 | fn build_intrinsic(&self, i: Intrinsic, b: &mut Vec) -> bool { 369 | use Intrinsic::*; 370 | match i { 371 | _Alloc => b.push(AsmTemplate::SysCall(SysCall::Sbrk)), 372 | _ReadInt => b.push(AsmTemplate::SysCall(SysCall::ReadInt)), 373 | _PrintInt => b.push(AsmTemplate::SysCall(SysCall::PrintInt)), 374 | _PrintString => b.push(AsmTemplate::SysCall(SysCall::PrintString)), 375 | _Halt => b.push(AsmTemplate::SysCall(SysCall::Exit)), 376 | _ReadLine | _StringEqual | _PrintBool => { 377 | b.push(AsmTemplate::Jal(format!("{:?}", i))); 378 | return true; 379 | } 380 | } 381 | false 382 | } 383 | 384 | // `epilogue` is the index of epilogue bb 385 | // note that all jump target should inc by 1, because prologue takes index 0 386 | fn build_next(&mut self, idx: u32, epilogue: u32, next: NextKind, b: &mut Vec) -> [Option; 2] { 387 | match next { 388 | // turn ret into jmp to the last bb(epilogue) 389 | NextKind::Ret(src) => { 390 | if let Some(src) = src { 391 | self.build_mv(mreg(V0), src, b); 392 | } 393 | if idx + 2 != epilogue { // + 2, 1 for "prologue takes index 0", 1 for next bb should inc by 1 naturally 394 | b.push(AsmTemplate::J(format!("{}_Ret", self.name))); 395 | } 396 | [Some(epilogue), None] 397 | } 398 | NextKind::Jmp(jump) => { 399 | if idx + 1 != jump { 400 | b.push(AsmTemplate::J(format!("{}_L{}", self.name, jump + 1))); 401 | } 402 | [Some(jump + 1), None] 403 | } 404 | NextKind::Jif { cond, z, fail, jump } => { 405 | b.push(AsmTemplate::B(format!("{}_L{}", self.name, jump + 1), vreg(cond), z)); 406 | // if we don't do any optimization on cfg, then `idx + 1 == fail` will always be true 407 | // because this is the situation when cfg is initially constructed 408 | if idx + 1 != fail { 409 | b.push(AsmTemplate::J(format!("{}_L{}", self.name, fail + 1))); 410 | } 411 | [Some(fail + 1), Some(jump + 1)] 412 | } 413 | NextKind::Halt => { 414 | self.build_intrinsic(Intrinsic::_Halt, b); 415 | [None, None] 416 | } 417 | } 418 | } 419 | } -------------------------------------------------------------------------------- /common/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "common" 3 | version = "0.1.0" 4 | authors = ["MashPlant <740678788@qq.com>"] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | indexmap = "1.0.2" 9 | hashbrown = { version = "0.5", features = ["nightly"] } -------------------------------------------------------------------------------- /common/src/errors.rs: -------------------------------------------------------------------------------- 1 | use crate::{loc::{Loc, NO_LOC}, MAIN_CLASS}; 2 | use std::fmt; 3 | 4 | pub struct Error<'a, Ty>(pub Loc, pub ErrorKind<'a, Ty>); 5 | 6 | // Errors implements Debug, it prints errors line by line 7 | pub struct Errors<'a, Ty>(pub Vec>); 8 | 9 | impl Default for Errors<'_, Ty> { 10 | fn default() -> Self { Self(vec![]) } 11 | } 12 | 13 | impl<'a, Ty> Errors<'a, Ty> { 14 | // can save some typing in checking the program 15 | // because when issuing an error, it often follows return a false / error type, which is the default 16 | // if the compiler complains that it needs type hint, in many cases you can omit the ;, and it will be deduced to () 17 | pub fn issue(&mut self, loc: Loc, e: ErrorKind<'a, Ty>) -> T { 18 | self.0.push(Error(loc, e)); 19 | Default::default() 20 | } 21 | 22 | // guarantee to be stable, because there may be multiple errors in one loc 23 | pub fn sorted(mut self) -> Self { 24 | self.0.sort_by_key(|e| e.0); 25 | self 26 | } 27 | } 28 | 29 | impl fmt::Debug for Error<'_, Ty> { 30 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 31 | match self.0 { 32 | NO_LOC => write!(f, "*** Error: {:?}", self.1), 33 | loc => write!(f, "*** Error at {:?}: {:?}", loc, self.1), 34 | } 35 | } 36 | } 37 | 38 | pub enum ErrorKind<'a, Ty> { 39 | UnclosedStr(&'a str), 40 | NewlineInStr(&'a str), 41 | InvalidEscape, 42 | IntTooLarge(&'a str), 43 | UnrecognizedChar(char), 44 | SyntaxError, 45 | ConflictDeclaration { prev: Loc, name: &'a str }, 46 | NoSuchClass(&'a str), 47 | CyclicInheritance, 48 | NoMainClass, 49 | VoidArrayElement, 50 | VoidVar(&'a str), 51 | OverrideVar(&'a str), 52 | OverrideMismatch { func: &'a str, p: &'a str }, 53 | IncompatibleUnary { op: &'a str, r: Ty }, 54 | IncompatibleBinary { l: Ty, op: &'a str, r: Ty }, 55 | TestNotBool, 56 | BreakOutOfLoop, 57 | UndeclaredVar(&'a str), 58 | RefInStatic { field: &'a str, func: &'a str }, 59 | BadFieldAccess { name: &'a str, owner: Ty }, 60 | PrivateFieldAccess { name: &'a str, owner: Ty }, 61 | NoSuchField { name: &'a str, owner: Ty }, 62 | NotFunc { name: &'a str, owner: Ty }, 63 | LengthWithArgument(u32), 64 | ArgcMismatch { name: &'a str, expect: u32, actual: u32 }, 65 | ArgMismatch { loc: u32, arg: Ty, param: Ty }, 66 | ThisInStatic, 67 | NotObject(Ty), 68 | BadPrintArg { loc: u32, ty: Ty }, 69 | ReturnMismatch { expect: Ty, actual: Ty }, 70 | NewArrayNotInt, 71 | IndexNotArray, 72 | IndexNotInt, 73 | NoReturn, 74 | } 75 | 76 | impl fmt::Debug for ErrorKind<'_, Ty> { 77 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 78 | use ErrorKind::*; 79 | match self { 80 | UnclosedStr(s) => write!(f, "unterminated string constant \"{}", s), 81 | NewlineInStr(s) => write!(f, "illegal newline in string constant \"{}", s), 82 | InvalidEscape => write!(f, "illegal escape character"), 83 | IntTooLarge(s) => write!(f, "integer literal {} is too large", s), 84 | UnrecognizedChar(ch) => write!(f, "unrecognized character '{}'", ch), 85 | SyntaxError => write!(f, "syntax error"), 86 | ConflictDeclaration { prev, name } => write!(f, "declaration of '{}' here conflicts with earlier declaration at {:?}", name, prev), 87 | NoSuchClass(name) => write!(f, "class '{}' not found", name), 88 | CyclicInheritance => write!(f, "illegal class inheritance (should be acyclic)"), 89 | NoMainClass => write!(f, "no legal Main class named '{}' was found", MAIN_CLASS), 90 | VoidArrayElement => write!(f, "array element type must be non-void known type"), 91 | VoidVar(name) => write!(f, "cannot declare identifier '{}' as void type", name), 92 | OverrideVar(name) => write!(f, "overriding variable is not allowed for var '{}'", name), 93 | OverrideMismatch { func, p } => write!(f, "overriding method '{}' doesn't match the type signature in class '{}'", func, p), 94 | IncompatibleUnary { op, r } => write!(f, "incompatible operand: {} {:?}", op, r), 95 | IncompatibleBinary { l, op, r } => write!(f, "incompatible operands: {:?} {} {:?}", l, op, r), 96 | TestNotBool => write!(f, "test expression must have bool type"), 97 | BreakOutOfLoop => write!(f, "'break' is only allowed inside a loop"), 98 | UndeclaredVar(name) => write!(f, "undeclared variable '{}'", name), 99 | RefInStatic { field, func } => write!(f, "can not reference a non-static field '{}' from static method '{}'", field, func), 100 | BadFieldAccess { name, owner } => write!(f, "cannot access field '{}' from '{:?}'", name, owner), 101 | PrivateFieldAccess { name, owner } => write!(f, "field '{}' of '{:?}' not accessible here", name, owner), 102 | NoSuchField { name, owner } => write!(f, "field '{}' not found in '{:?}'", name, owner), 103 | LengthWithArgument(cnt) => write!(f, "function 'length' expects 0 argument(s) but {} given", cnt), 104 | NotFunc { name, owner } => write!(f, "'{}' is not a method in class '{:?}'", name, owner), 105 | ArgcMismatch { name, expect, actual } => write!(f, "function '{}' expects {} argument(s) but {} given", name, expect, actual), 106 | ArgMismatch { loc, arg, param } => write!(f, "incompatible argument {}: {:?} given, {:?} expected", loc, arg, param), 107 | ThisInStatic => write!(f, "can not use this in static function"), 108 | NotObject(ty) => write!(f, "{:?} is not a class type", ty), 109 | BadPrintArg { loc, ty } => write!(f, "incompatible argument {}: {:?} given, int/bool/string expected", loc, ty), 110 | ReturnMismatch { expect, actual } => write!(f, "incompatible return: {:?} given, {:?} expected", actual, expect), 111 | NewArrayNotInt => write!(f, "new array length must be an integer"), 112 | IndexNotArray => write!(f, "[] can only be applied to arrays"), 113 | IndexNotInt => write!(f, "array subscript must be an integer"), 114 | NoReturn => write!(f, "missing return statement: control reaches end of non-void block"), 115 | } 116 | } 117 | } 118 | 119 | impl fmt::Debug for Errors<'_, Ty> { 120 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { 121 | for e in &self.0 { writeln!(f, "{:?}", e)? } 122 | Ok(()) 123 | } 124 | } -------------------------------------------------------------------------------- /common/src/ignore_result.rs: -------------------------------------------------------------------------------- 1 | // it can be helpful when you know a Result is definitely Ok/Some and doesn't need its value 2 | // because it can suppress the warning from rustc about 'unused result which must be used' 3 | // (of course if you don't care about warnings, this is useless) 4 | pub trait IgnoreResult: Sized { 5 | fn ignore(self) {} 6 | } 7 | 8 | impl IgnoreResult for Result {} 9 | 10 | impl IgnoreResult for Option {} -------------------------------------------------------------------------------- /common/src/indent_printer.rs: -------------------------------------------------------------------------------- 1 | use crate::{INDENT, INDENT_STR}; 2 | use std::fmt; 3 | 4 | #[derive(Default)] 5 | pub struct IndentPrinter { 6 | indent: String, 7 | content: String, 8 | } 9 | 10 | impl IndentPrinter { 11 | #[inline(always)] 12 | pub fn indent(&mut self, f: impl FnOnce(&mut IndentPrinter)) { 13 | self.inc(); 14 | f(self); 15 | self.dec(); 16 | } 17 | 18 | // in the most cases you don't need to use inc and dec directly 19 | pub fn inc(&mut self) { self.indent += INDENT_STR; } 20 | 21 | pub fn dec(&mut self) { for _ in 0..INDENT { self.indent.pop(); } } 22 | 23 | pub fn finish(self) -> String { self.content } 24 | } 25 | 26 | // this implementation add '\n' to content by default, so use write!(...) for a normal new line text 27 | // for an empty new line, still need writeln!(p) or write!(p, "\n") 28 | impl fmt::Write for IndentPrinter { 29 | fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> { 30 | for l in s.lines() { 31 | self.content += self.indent.as_ref(); 32 | self.content += l; 33 | self.content.push('\n'); 34 | } 35 | Ok(()) 36 | } 37 | 38 | // args need to be formatted with the default formatter instead of IndentPrinter 39 | fn write_fmt(&mut self, args: fmt::Arguments<'_>) -> Result<(), fmt::Error> { 40 | self.write_str(&format!("{}", args)) 41 | } 42 | } -------------------------------------------------------------------------------- /common/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod loc; 2 | pub mod errors; 3 | pub mod ignore_result; 4 | pub mod indent_printer; 5 | pub mod r#ref; 6 | pub mod op; 7 | 8 | pub use crate::{loc::*, errors::*, ignore_result::*, indent_printer::*, r#ref::*, op::*}; 9 | use hashbrown::hash_map::DefaultHashBuilder; 10 | 11 | pub const MAIN_CLASS: &str = "Main"; 12 | pub const MAIN_METHOD: &str = "main"; 13 | pub const LENGTH: &str = "length"; 14 | const INDENT: u32 = 4; 15 | const INDENT_STR: &str = " "; 16 | 17 | // DefaultHashBuilder is the default hash of hashbrown, seems faster than RandomState (the default hash of IndexMap/Set & std HashMap/Set) 18 | // place these type alias here just for convenience 19 | pub type IndexMap = indexmap::IndexMap; 20 | pub type IndexSet = indexmap::IndexSet; 21 | pub type HashMap = hashbrown::HashMap; 22 | pub type HashSet = hashbrown::HashSet; -------------------------------------------------------------------------------- /common/src/loc.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | // Loc(line, column), counting from 1 4 | // so 0 is invalid for both, and Loc(0, 0) means NO_LOC 5 | // (of course we can use Option, but I think NO_LOC is also convenient to use, and it saves space) 6 | #[derive(Copy, Clone, Eq, PartialEq, Default, Ord, PartialOrd)] 7 | pub struct Loc(pub u32, pub u32); 8 | 9 | pub const NO_LOC: Loc = Loc(0, 0); 10 | 11 | impl Loc { 12 | pub fn next_line(&mut self) { 13 | self.0 += 1; 14 | self.1 = 1; 15 | } 16 | 17 | pub fn next_col(&mut self) { self.1 += 1; } 18 | } 19 | 20 | impl fmt::Debug for Loc { 21 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 22 | write!(f, "({},{})", self.0, self.1) 23 | } 24 | } -------------------------------------------------------------------------------- /common/src/op.rs: -------------------------------------------------------------------------------- 1 | // maybe a better location for these 2 enums is in crate `syntax`? 2 | // but several other crates also use them, and don't use anything else in `syntax` 3 | // place them here can eliminate this dependency and(maybe?) reduce compile time 4 | #[derive(Copy, Clone, Eq, PartialEq, Hash)] 5 | pub enum BinOp { Add, Sub, Mul, Div, Mod, And, Or, Eq, Ne, Lt, Le, Gt, Ge } 6 | 7 | #[derive(Copy, Clone, Eq, PartialEq, Hash)] 8 | pub enum UnOp { Neg, Not } 9 | 10 | impl BinOp { 11 | // an operator style string, used in printing tac 12 | pub fn to_op_str(self) -> &'static str { 13 | use BinOp::*; 14 | match self { Add => "+", Sub => "-", Mul => "*", Div => "/", Mod => "%", And => "&&", Or => "||", Eq => "==", Ne => "!=", Lt => "<", Le => "<=", Gt => ">", Ge => ">=" } 15 | } 16 | 17 | // an abbreviate word for, used in printing ast 18 | pub fn to_word_str(self) -> &'static str { 19 | use BinOp::*; 20 | match self { Add => "ADD", Sub => "SUB", Mul => "MUL", Div => "DIV", Mod => "MOD", And => "AND", Or => "OR", Eq => "EQ", Ne => "NE", Lt => "LT", Le => "LE", Gt => "GT", Ge => "GE" } 21 | } 22 | 23 | // e.g.: x op1 y <=> y op2 x, this can be helpful because mips's I instructions use imm as rhs 24 | // self.invert() == Some(self) <=> self is commutative 25 | pub fn invert(self) -> Option { 26 | use BinOp::*; 27 | match self { Add => Some(Add), Mul => Some(Mul), And => Some(And), Or => Some(Or), Eq => Some(Eq), Ne => Some(Ne), Lt => Some(Gt), Le => Some(Ge), Gt => Some(Lt), Ge => Some(Le), Sub | Div | Mod => None, } 28 | } 29 | 30 | // return None if self = Div or Mod and r = 0 31 | pub fn try_eval(self, l: i32, r: i32) -> Option { 32 | use BinOp::*; 33 | match self { 34 | Add => Some(l.wrapping_add(r)), 35 | Sub => Some(l.wrapping_sub(r)), 36 | Mul => Some(l.wrapping_mul(r)), 37 | Div => l.checked_div(r), 38 | Mod => l.checked_rem(r), 39 | And => Some(((l != 0) && (r != 0)) as i32), 40 | Or => Some(((l != 0) || (r != 0)) as i32), 41 | Eq => Some((l == r) as i32), 42 | Ne => Some((l != r) as i32), 43 | Lt => Some((l < r) as i32), 44 | Le => Some((l <= r) as i32), 45 | Gt => Some((l > r) as i32), 46 | Ge => Some((l >= r) as i32), 47 | } 48 | } 49 | 50 | // div 0 or mod 0 is regarded as ub here, just use 0 to represent the value 51 | pub fn eval(self, l: i32, r: i32) -> i32 { self.try_eval(l, r).unwrap_or(0) } 52 | } 53 | 54 | impl UnOp { 55 | pub fn to_op_str(self) -> &'static str { 56 | match self { UnOp::Neg => "-", UnOp::Not => "!" } 57 | } 58 | 59 | pub fn to_word_str(self) -> &'static str { 60 | match self { UnOp::Neg => "NEG", UnOp::Not => "NOT" } 61 | } 62 | 63 | pub fn eval(self, r: i32) -> i32 { 64 | match self { UnOp::Neg => r.wrapping_neg(), UnOp::Not => (r == 0) as i32, } 65 | } 66 | } -------------------------------------------------------------------------------- /common/src/ref.rs: -------------------------------------------------------------------------------- 1 | use std::{hash::{Hash, Hasher}, cmp::Ordering, ops::Deref}; 2 | 3 | // comparing reference by their pointer value (this is 100% safe rust) 4 | pub struct Ref<'a, T>(pub &'a T); 5 | 6 | impl Clone for Ref<'_, T> { 7 | fn clone(&self) -> Self { Self(self.0) } 8 | } 9 | 10 | impl Copy for Ref<'_, T> {} 11 | 12 | impl PartialEq for Ref<'_, T> { 13 | fn eq(&self, other: &Self) -> bool { 14 | self.0 as *const T == other.0 as *const T 15 | } 16 | } 17 | 18 | impl Eq for Ref<'_, T> {} 19 | 20 | impl PartialOrd for Ref<'_, T> { 21 | fn partial_cmp(&self, other: &Self) -> Option { 22 | (self.0 as *const T).partial_cmp(&(other.0 as *const T)) 23 | } 24 | } 25 | 26 | impl Ord for Ref<'_, T> { 27 | fn cmp(&self, other: &Self) -> Ordering { 28 | (self.0 as *const T).cmp(&(other.0 as *const T)) 29 | } 30 | } 31 | 32 | impl Hash for Ref<'_, T> { 33 | fn hash(&self, state: &mut H) { 34 | (self.0 as *const T).hash(state) 35 | } 36 | } 37 | 38 | impl Deref for Ref<'_, T> { 39 | type Target = T; 40 | 41 | fn deref(&self) -> &Self::Target { self.0 } 42 | } 43 | -------------------------------------------------------------------------------- /driver/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "driver" 3 | version = "0.1.0" 4 | authors = ["MashPlant <740678788@qq.com>"] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | common = { path = "../common" } 9 | print = { path = "../print" } 10 | syntax = { path = "../syntax" } 11 | typeck = { path = "../typeck" } 12 | tac = { path = "../tac" } 13 | tacgen = { path = "../tacgen" } 14 | tacopt = { path = "../tacopt" } 15 | codegen = { path = "../codegen" } 16 | typed-arena = "1.4.1" 17 | tacvm = { git = "https://github.com/MashPlant/tacvm" } 18 | colored = "1.8" 19 | clap = "2" 20 | 21 | [lib] 22 | path = "src/lib.rs" 23 | 24 | [[bin]] 25 | name = "decaf" 26 | path = "src/cli.rs" 27 | 28 | [[bin]] 29 | name = "test" 30 | path = "src/test.rs" -------------------------------------------------------------------------------- /driver/src/cli.rs: -------------------------------------------------------------------------------- 1 | use driver::{Pa, Alloc}; 2 | use clap::{Arg, App}; 3 | use std::{io, fs, process}; 4 | 5 | fn main() -> io::Result<()> { 6 | let matches = App::new("decaf") 7 | .arg(Arg::with_name("input").required(true)) 8 | .arg(Arg::with_name("output").long("output").short("o").takes_value(true)) 9 | .arg(Arg::with_name("target").long("target").short("t").takes_value(true).default_value("pa5")) 10 | .get_matches(); 11 | let pa = match matches.value_of("target").unwrap() { 12 | t if t.eq_ignore_ascii_case("pa1a") => Pa::Pa1a, 13 | t if t.eq_ignore_ascii_case("pa1b") => Pa::Pa1b, 14 | t if t.eq_ignore_ascii_case("pa2") => Pa::Pa2, 15 | t if t.eq_ignore_ascii_case("pa3") => Pa::Pa3, 16 | t if t.eq_ignore_ascii_case("pa4") => Pa::Pa4, 17 | t if t.eq_ignore_ascii_case("pa5") => Pa::Pa5, 18 | t => { 19 | eprintln!("invalid target pa: `{}`", t); 20 | process::exit(1); 21 | } 22 | }; 23 | let input = matches.value_of("input").unwrap(); 24 | let result = match driver::compile(&fs::read_to_string(input)?, &Alloc::default(), pa.to_cfg()) { 25 | Ok(p) => p, 26 | Err(e) => format!("{:?}", e), 27 | }; 28 | if let Some(output) = matches.value_of("output") { 29 | fs::write(output, result) 30 | } else { 31 | print!("{}", result); 32 | Ok(()) 33 | } 34 | } -------------------------------------------------------------------------------- /driver/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(result_map_or_else)] 2 | 3 | pub mod test_util; 4 | 5 | use common::{IndentPrinter, Errors}; 6 | use syntax::{ASTAlloc, Ty, parser, parser_ll}; 7 | use typeck::TypeCkAlloc; 8 | use tacopt::bb::FuncBB; 9 | use codegen::mips_gen::FuncGen; 10 | use tac::TacNode; 11 | use typed_arena::Arena; 12 | 13 | pub use test_util::*; 14 | 15 | #[derive(Eq, PartialEq, Copy, Clone)] 16 | pub enum Stage { Parse, TypeCk, Tac, TacOpt, Asm } 17 | 18 | #[derive(Copy, Clone)] 19 | pub enum Parser { LL, LR } 20 | 21 | #[derive(Copy, Clone)] 22 | pub struct CompileCfg { 23 | pub stage: Stage, 24 | pub parser: Parser, 25 | } 26 | 27 | #[derive(Default)] 28 | pub struct Alloc<'a> { 29 | ast: ASTAlloc<'a>, 30 | typeck: TypeCkAlloc<'a>, 31 | tac: Arena>, 32 | } 33 | 34 | // it is recommended to use this function to debug your compiler 35 | // `code` can be provided by hard-coded string literal, `cfg` can be provided by `Pa::Pax.to_cfg()` 36 | pub fn compile<'a>(code: &'a str, alloc: &'a Alloc<'a>, cfg: CompileCfg) -> Result>> { 37 | let mut p = IndentPrinter::default(); 38 | let pr = match cfg.parser { 39 | Parser::LL => parser_ll::work(code, &alloc.ast)?, 40 | Parser::LR => parser::work(code, &alloc.ast)?, 41 | }; 42 | if cfg.stage == Stage::Parse { 43 | print::ast::program(&pr, &mut p); 44 | return Ok(p.finish()); 45 | } 46 | typeck::work(&pr, &alloc.typeck)?; 47 | if cfg.stage == Stage::TypeCk { 48 | print::scope::program(&pr, &mut p); 49 | return Ok(p.finish()); 50 | } 51 | let mut tp = tacgen::work(&pr, &alloc.tac); 52 | if cfg.stage == Stage::Tac { 53 | print::tac::program(&tp, &mut p); 54 | return Ok(p.finish()); 55 | } 56 | if cfg.stage == Stage::Asm { 57 | print::mips::data(&tp, &mut p); 58 | } 59 | let mut new_funcs = Vec::new(); 60 | for f in &tp.func { 61 | let mut fu = FuncBB::new(f); 62 | fu.optimizen(10); 63 | if cfg.stage == Stage::Asm { 64 | let asm = FuncGen::work(&fu, &tp, codegen::AllocMethod::Graph); 65 | print::mips::func(&asm, &f.name, &mut p); 66 | } else { // cfg.stage == Stage::TacOpt 67 | new_funcs.push(fu.to_tac_func()); 68 | } 69 | } 70 | if cfg.stage == Stage::TacOpt { 71 | tp.func = new_funcs; 72 | print::tac::program(&tp, &mut p); 73 | Ok(p.finish()) 74 | } else { Ok(p.finish() + include_str!("../../codegen/lib.s")) } 75 | } -------------------------------------------------------------------------------- /driver/src/test.rs: -------------------------------------------------------------------------------- 1 | use driver::*; 2 | 3 | fn main() { 4 | for result in test_all("testcase/S4", Pa::Pa4).unwrap() { 5 | println!("{:?}", result); 6 | } 7 | } -------------------------------------------------------------------------------- /driver/src/test_util.rs: -------------------------------------------------------------------------------- 1 | use std::{io::{self, BufReader}, fs::{self, File}, fmt, panic, path::{Path, PathBuf}, any::Any, sync::{Arc, Mutex}, process::{Command, Stdio}}; 2 | use colored::*; 3 | use crate::{CompileCfg, Parser, Stage, Alloc}; 4 | 5 | #[derive(Copy, Clone)] 6 | pub enum Pa { Pa1a, Pa1b, Pa2, Pa3, Pa4, Pa5 } 7 | 8 | impl Pa { 9 | pub fn to_cfg(self) -> CompileCfg { 10 | CompileCfg { 11 | stage: match self { 12 | Pa::Pa1a | Pa::Pa1b => Stage::Parse, 13 | Pa::Pa2 => Stage::TypeCk, 14 | Pa::Pa3 => Stage::Tac, 15 | Pa::Pa4 => Stage::TacOpt, 16 | Pa::Pa5 => Stage::Asm, 17 | }, 18 | parser: match self { Pa::Pa1b => Parser::LL, _ => Parser::LR }, 19 | } 20 | } 21 | } 22 | 23 | const SPIM_PATH: &str = "spim"; 24 | // ignore first SPIM_INFO_LINE line(s), don't compare them 25 | const SPIM_INFO_LINE: usize = 1; 26 | 27 | // `folder` should be the path of folder containing `pa_path` and other tools 28 | // `pa_path` should be relevant to `folder`, i.e., `folder`/`pa_path` is the real path to pa folder 29 | pub fn test_all(path: impl AsRef, pa: Pa) -> io::Result> { 30 | // make color work properly on windows(powershell) 31 | // if it still doesn't work, or you simply dislike the color, add `colored::control::set_override(false);` before calling `test_all` 32 | #[cfg(target_os = "windows")] let _ = control::set_virtual_terminal(true); 33 | 34 | let path = path.as_ref(); 35 | let ans = path.join("result"); 36 | let out = path.join("out"); 37 | if !out.exists() { fs::create_dir_all(&out)?; } 38 | 39 | let mut files = fs::read_dir(path)?.filter_map(|f| { 40 | let path = f.ok()?.path(); 41 | let name = path.file_name()?.to_str()?; // in normal case none of the above 3 ? will fail 42 | if path.is_file() && name.ends_with(".decaf") { Some(name.to_owned()) } else { None } 43 | }).collect::>(); 44 | files.sort_unstable(); // the order of fs::read_dir may be strange, sort them for better debugging 45 | let ret = files.iter().map(|f| { 46 | test_one_caught(path.join(f), out.join(f).with_extension("result"), ans.join(f).with_extension("result"), pa) 47 | }).collect(); 48 | Ok(ret) 49 | } 50 | 51 | pub fn test_one_caught(i: impl AsRef, o: impl AsRef, ans: impl AsRef, pa: Pa) -> TestResult { 52 | let loc = Arc::new(Mutex::new(None)); 53 | let loc1 = loc.clone(); 54 | panic::set_hook(Box::new(move |panic_info| if let Some(l) = panic_info.location() { 55 | *loc1.lock().unwrap() = Some(PanicLoc { file: l.file().to_owned(), line: l.line(), col: l.column() }); 56 | })); 57 | let ret = panic::catch_unwind(panic::AssertUnwindSafe(|| test_one(&i, &o, &ans, pa))) 58 | .unwrap_or_else(|e| TestResult::new(i, o, ans, ResultKind::RuntimeError(PanicInfo { payload: get_payload(e), loc: loc.lock().unwrap().clone() }))); 59 | let _ = panic::take_hook(); 60 | ret 61 | } 62 | 63 | pub fn test_one(i: impl AsRef, o: impl AsRef, ans: impl AsRef, pa: Pa) -> TestResult { 64 | let ignore_line = if let Pa::Pa5 = pa { SPIM_INFO_LINE } else { 0 }; // in pa5 we ignore first SPIM_INFO_LINE line(s) 65 | let kind = run(&i, &o, pa).and_then(|out| Ok((out, fs::read_to_string(&ans)?))) 66 | .map_or_else(ResultKind::IOError, |(out, ans)| ResultKind::new(&out, &ans, ignore_line)); 67 | TestResult::new(i, o, ans, kind) 68 | } 69 | 70 | pub fn run(i: impl AsRef, o: impl AsRef, pa: Pa) -> io::Result { 71 | let o = o.as_ref(); 72 | let cfg = pa.to_cfg(); 73 | let out = match crate::compile(&fs::read_to_string(i)?, &Alloc::default(), cfg) { 74 | Ok(p) => match cfg.stage { 75 | Stage::Parse | Stage::TypeCk => (fs::write(o, &p), p).1, 76 | Stage::Tac | Stage::TacOpt => { 77 | fs::write(o.with_extension("tac"), &p)?; 78 | tacvm::work(&p, 100_000, 1000, true, true, 79 | Box::new(BufReader::new(io::stdin())), 80 | Box::new(File::create(&o)?), 81 | Box::new(File::create(o.with_extension("info"))?), 82 | )?; 83 | fs::read_to_string(o)? 84 | } 85 | Stage::Asm => { 86 | fs::write(o.with_extension("s"), &p)?; 87 | Command::new(SPIM_PATH).arg("-file").arg(o.with_extension("s")) 88 | .stdout(Stdio::from(File::create(&o)?)).spawn()?.wait()?; 89 | fs::read_to_string(o)? 90 | } 91 | } 92 | Err(e) => { 93 | let out = format!("{:?}", e); 94 | fs::write(o, &out)?; 95 | out 96 | } 97 | }; 98 | Ok(out) 99 | } 100 | 101 | pub struct TestResult { 102 | pub file: PathBuf, 103 | pub out: PathBuf, 104 | pub ans: PathBuf, 105 | pub kind: ResultKind, 106 | } 107 | 108 | impl TestResult { 109 | pub fn new(file: impl AsRef, out: impl AsRef, ans: impl AsRef, kind: ResultKind) -> TestResult { 110 | TestResult { file: file.as_ref().into(), out: out.as_ref().into(), ans: ans.as_ref().into(), kind } 111 | } 112 | } 113 | 114 | pub enum ResultKind { 115 | Pass, 116 | Fail { first_diff: usize, out: String, ans: String }, 117 | IOError(io::Error), 118 | RuntimeError(PanicInfo), 119 | } 120 | 121 | impl ResultKind { 122 | pub fn new(out: &str, ans: &str, ignore_line: usize) -> ResultKind { 123 | let (mut out_lines, mut ans_lines) = (out.lines().skip(ignore_line), ans.lines().skip(ignore_line)); 124 | let mut first_diff = ignore_line + 1; 125 | // it seems there is no builtin iter function that implement "zip and pad the shorter one" 126 | loop { 127 | match (out_lines.next(), ans_lines.next()) { 128 | (None, None) => break ResultKind::Pass, 129 | (out, ans) => { 130 | let (out, ans) = (out.unwrap_or(""), ans.unwrap_or("")); 131 | if out != ans { 132 | break ResultKind::Fail { first_diff, out: out.to_owned(), ans: ans.to_owned() }; 133 | } 134 | } 135 | } 136 | first_diff += 1; 137 | } 138 | } 139 | } 140 | 141 | impl fmt::Debug for TestResult { 142 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { 143 | write!(f, "{}: ", self.file.display())?; 144 | match &self.kind { 145 | ResultKind::Pass => write!(f, "{}", "Pass".green()), 146 | ResultKind::Fail { first_diff, out, ans } => { 147 | writeln!(f, "{}: {}", "Fail".red(), format!("first different line on {}", first_diff).yellow())?; 148 | writeln!(f, "{}", format!("your line: \"{}\" ({}:{})", out, self.out.display(), first_diff).yellow())?; 149 | write!(f, "{}", format!("ans line: \"{}\" ({}:{})", ans, self.ans.display(), first_diff).yellow()) 150 | } 151 | ResultKind::IOError(e) => write!(f, "{}: {}", "IOError".red(), e.to_string().yellow()), 152 | ResultKind::RuntimeError(e) => { 153 | write!(f, "{}", "RuntimeError".red())?; 154 | if let Some(payload) = &e.payload { 155 | write!(f, ": {}", format!("panicked at `{}`", payload).yellow())?; 156 | } 157 | if let Some(loc) = &e.loc { 158 | write!(f, "{}", format!(", {:?}", loc).yellow())?; 159 | } 160 | Ok(()) 161 | } 162 | } 163 | } 164 | } 165 | 166 | // std::panic::Location uses an borrowed `file`, which can't be conveniently stored 167 | #[derive(Clone)] 168 | pub struct PanicLoc { 169 | pub file: String, 170 | pub line: u32, 171 | pub col: u32, 172 | } 173 | 174 | // std::panic::PanicInfo's `payload` is a Box, which can't be printed(actually it can, but no useful information will be printed) 175 | pub struct PanicInfo { 176 | pub payload: Option, 177 | pub loc: Option, 178 | } 179 | 180 | impl fmt::Debug for PanicLoc { 181 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { 182 | write!(f, "{}:{}:{}", self.file, self.line, self.col) 183 | } 184 | } 185 | 186 | // try to get the String or str content from Any 187 | fn get_payload(e: Box) -> Option { 188 | e.downcast::().map(|s| *s) 189 | .or_else(|payload| payload.downcast::<&str>().map(|s| (*s).to_owned())) 190 | .ok() 191 | } 192 | -------------------------------------------------------------------------------- /print/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "print" 3 | version = "0.1.0" 4 | authors = ["MashPlant <740678788@qq.com>"] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | syntax = { path = "../syntax" } 9 | common = { path = "../common" } 10 | tac = { path = "../tac" } 11 | tacopt = { path = "../tacopt" } 12 | codegen = { path = "../codegen" } -------------------------------------------------------------------------------- /print/src/ast.rs: -------------------------------------------------------------------------------- 1 | use common::{IndentPrinter, IgnoreResult}; 2 | use syntax::*; 3 | use std::fmt::Write; 4 | 5 | pub fn program(pr: &Program, p: &mut IndentPrinter) { pr.print(p); } 6 | 7 | trait Printable { 8 | fn print(&self, p: &mut IndentPrinter); 9 | } 10 | 11 | // generate a impl block for Display types 12 | macro_rules! print_basic { 13 | ($($t: ty)*) => {$( 14 | impl Printable for $t { 15 | fn print(&self, p: &mut IndentPrinter) { write!(p, "{}", self).ignore() } 16 | } 17 | )*}; 18 | } 19 | 20 | print_basic!(i32 bool str); 21 | 22 | impl Printable for [T] { 23 | fn print(&self, p: &mut IndentPrinter) { 24 | write!(p, "List").ignore(); 25 | p.indent(|p| if self.is_empty() { write!(p, "").ignore(); } else { for x in self { x.print(p); } }) 26 | } 27 | } 28 | 29 | impl Printable for Option { 30 | fn print(&self, p: &mut IndentPrinter) { 31 | if let Some(x) = self { x.print(p); } else { write!(p, "").ignore(); } 32 | } 33 | } 34 | 35 | impl Printable for Box { 36 | fn print(&self, p: &mut IndentPrinter) { self.as_ref().print(p); } 37 | } 38 | 39 | impl Printable for &T { 40 | fn print(&self, p: &mut IndentPrinter) { (*self).print(p); } 41 | } 42 | 43 | impl Printable for SynTy<'_> { 44 | fn print(&self, p: &mut IndentPrinter) { 45 | for _ in 0..self.arr { 46 | write!(p, "TArray @ {:?}", self.loc).ignore(); 47 | p.inc(); 48 | } 49 | match &self.kind { 50 | SynTyKind::Int => write!(p, "TInt @ {:?}", self.loc).ignore(), 51 | SynTyKind::Bool => write!(p, "TBool @ {:?}", self.loc).ignore(), 52 | SynTyKind::String => write!(p, "TString @ {:?}", self.loc).ignore(), 53 | SynTyKind::Void => write!(p, "TVoid @ {:?}", self.loc).ignore(), 54 | SynTyKind::Named(c) => { 55 | write!(p, "TClass @ {:?}", self.loc).ignore(); 56 | p.indent(|p| c.print(p)); 57 | } 58 | } 59 | for _ in 0..self.arr { p.dec(); } 60 | } 61 | } 62 | 63 | // generate a impl block for struct, $name is the struct's name IN AST (which may be different or the same with struct's name) 64 | // $field are expressions separated by spaces, they can access self's field, they will be printed sequentially 65 | macro_rules! print_struct { 66 | ($t: ty, $self_: ident, $loc: expr, $name: ident, $($field: expr)*) => { 67 | impl Printable for $t { 68 | fn print(&$self_, p: &mut IndentPrinter) { 69 | write!(p, "{} @ {:?}", stringify!($name), $loc).ignore(); 70 | p.indent(|p| { $($field.print(p);)* }); 71 | } 72 | } 73 | }; 74 | } 75 | 76 | // generate a match block for enum 77 | // $variant is both the name of variant in enum and in ast, so the must have the same name 78 | macro_rules! print_enum { 79 | ($e: expr, $loc: expr, $p: expr, $name: ident, $($variant: ident => $($field: expr)*),*) => { 80 | match &$e { 81 | $($variant($name) => { 82 | write!($p, "{} @ {:?}", stringify!($variant), $loc).ignore(); 83 | $p.indent(|p| { $($field.print(p);)* }); 84 | })* 85 | } 86 | }; 87 | } 88 | 89 | // self.class[0] must be valid, because parser requires their are at least one class 90 | print_struct!(Program<'_>, self, self.class[0].loc, TopLevel, self.class); 91 | print_struct!(ClassDef<'_>, self, self.loc, ClassDef, self.name self.parent self.field); 92 | print_struct!(VarDef<'_>, self, self.loc, LocalVarDef, self.syn_ty self.name self.init()); 93 | print_struct!(Block<'_>, self, self.loc, Block, self.stmt); 94 | 95 | impl Printable for FieldDef<'_> { 96 | fn print(&self, p: &mut IndentPrinter) { 97 | match self { 98 | FieldDef::VarDef(v) => { 99 | write!(p, "VarDef @ {:?}", v.loc).ignore(); 100 | p.indent(|p| { 101 | v.syn_ty.print(p); 102 | v.name.print(p); 103 | v.init().print(p); 104 | }); 105 | } 106 | FieldDef::FuncDef(f) => { 107 | write!(p, "MethodDef @ {:?}", f.loc).ignore(); 108 | p.indent(|p| { 109 | if f.static_ { "STATIC".print(p); } 110 | f.name.print(p); 111 | f.ret.print(p); 112 | f.param.print(p); 113 | f.body.print(p); 114 | }); 115 | } 116 | } 117 | } 118 | } 119 | 120 | impl Printable for Stmt<'_> { 121 | #[allow(unused_variables)] 122 | fn print(&self, p: &mut IndentPrinter) { 123 | use StmtKind::*; 124 | print_enum!(self.kind, self.loc, p, x, 125 | Assign => x.dst x.src, LocalVarDef => x.syn_ty x.name x.init(), ExprEval => x, Skip => , If => x.cond x.on_true x.on_false, 126 | While => x.cond x.body, For => x.init x.cond x.update x.body, Return => x, Print => x, Break => , Block => x.stmt 127 | ); 128 | } 129 | } 130 | 131 | impl Printable for Expr<'_> { 132 | #[allow(unused_variables)] 133 | fn print(&self, p: &mut IndentPrinter) { 134 | use ExprKind::*; 135 | print_enum!(self.kind, self.loc, p, x, 136 | VarSel => x.owner x.name, IndexSel => x.arr x.idx, IntLit => x, BoolLit => x, StringLit => "\"".to_owned() + x + "\"", 137 | NullLit => , Call => x.func x.arg, Unary => x.op.to_word_str() x.r, Binary => x.op.to_word_str() x.l x.r, 138 | This => , ReadInt => , ReadLine => , NewClass => x.name, NewArray => x.elem x.len, ClassTest => x.expr x.name, 139 | ClassCast => x.expr x.name 140 | ); 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /print/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod ast; 2 | pub mod scope; 3 | pub mod tac; 4 | pub mod mips; -------------------------------------------------------------------------------- /print/src/mips.rs: -------------------------------------------------------------------------------- 1 | use common::{IndentPrinter, IgnoreResult}; 2 | use tac::{TacProgram}; 3 | use codegen::mips::AsmTemplate; 4 | use std::fmt::Write; 5 | 6 | pub fn data(pr: &TacProgram, p: &mut IndentPrinter) { 7 | write!(p, ".data").ignore(); 8 | write!(p, ".align 2").ignore(); 9 | for v in &pr.vtbl { 10 | write!(p, "_{}:", v.class).ignore(); 11 | p.indent(|p| { 12 | if let Some(pa) = v.parent { 13 | write!(p, ".word _{}", pr.vtbl[pa as usize].class).ignore(); 14 | } else { 15 | write!(p, ".word 0").ignore(); 16 | } 17 | write!(p, ".word _STRING{}", pr.str_pool.get_full(v.class).expect("tacgen should have put class name into `str_pool`").0).ignore(); 18 | for &f in &v.func { 19 | write!(p, ".word {}", pr.func[f as usize].name).ignore(); 20 | } 21 | }); 22 | } 23 | writeln!(p).ignore(); 24 | write!(p, ".data").ignore(); 25 | for (idx, s) in pr.str_pool.iter().enumerate() { 26 | write!(p, "_STRING{}:", idx).ignore(); 27 | p.indent(|p| write!(p, ".asciiz \"{}\"", s).ignore()); 28 | } 29 | writeln!(p).ignore(); 30 | } 31 | 32 | pub fn func(f: &[AsmTemplate], name: &str, p: &mut IndentPrinter) { 33 | write!(p, ".text").ignore(); 34 | write!(p, ".globl {}", name).ignore(); 35 | write!(p, "{}:", name).ignore(); 36 | p.indent(|p| for asm in f { write!(p, "{:?}", asm).ignore(); }); 37 | writeln!(p).ignore(); 38 | } -------------------------------------------------------------------------------- /print/src/scope.rs: -------------------------------------------------------------------------------- 1 | use common::{IndentPrinter, IgnoreResult}; 2 | use syntax::{ast::*, Scope}; 3 | use std::fmt::Write; 4 | 5 | fn show_scope(s: &Scope, p: &mut IndentPrinter) { 6 | let mut s = s.iter().map(|(_, &sym)| sym).collect::>(); 7 | s.sort_unstable_by_key(|x| x.loc()); 8 | if s.is_empty() { write!(p, "").ignore(); } else { for s in s { write!(p, "{:?}", s).ignore(); } } 9 | } 10 | 11 | pub fn program(pr: &Program, p: &mut IndentPrinter) { 12 | write!(p, "GLOBAL SCOPE:").ignore(); 13 | p.indent(|p| { 14 | show_scope(&pr.scope.borrow(), p); 15 | for c in &pr.class { class_def(c, p); } 16 | }); 17 | } 18 | 19 | pub fn class_def(c: &ClassDef, p: &mut IndentPrinter) { 20 | write!(p, "CLASS SCOPE OF '{}':", c.name).ignore(); 21 | p.indent(|p| { 22 | show_scope(&c.scope.borrow(), p); 23 | for f in &c.field { 24 | if let FieldDef::FuncDef(f) = f { func_def(f, p); } 25 | } 26 | }); 27 | } 28 | 29 | pub fn func_def(f: &FuncDef, p: &mut IndentPrinter) { 30 | write!(p, "FORMAL SCOPE OF '{}':", f.name).ignore(); 31 | p.indent(|p| { 32 | show_scope(&f.scope.borrow(), p); 33 | block(&f.body, p); 34 | }); 35 | } 36 | 37 | pub fn block(b: &Block, p: &mut IndentPrinter) { 38 | write!(p, "LOCAL SCOPE:").ignore(); 39 | p.indent(|p| { 40 | show_scope(&b.scope.borrow(), p); 41 | for s in &b.stmt { 42 | match &s.kind { 43 | StmtKind::If(i) => { 44 | block(&i.on_true, p); 45 | if let Some(on_false) = &i.on_false { block(on_false, p); } 46 | } 47 | StmtKind::While(w) => block(&w.body, p), 48 | StmtKind::For(f) => block(&f.body, p), 49 | StmtKind::Block(b) => block(b, p), 50 | _ => {} 51 | } 52 | } 53 | }); 54 | } -------------------------------------------------------------------------------- /print/src/tac.rs: -------------------------------------------------------------------------------- 1 | use tac::{TacProgram, Tac, CallKind}; 2 | use common::{IndentPrinter, IgnoreResult}; 3 | use std::fmt::Write; 4 | 5 | pub fn program(pr: &TacProgram, p: &mut IndentPrinter) { 6 | for v in &pr.vtbl { 7 | write!(p, "VTBL<_{}> {{", v.class).ignore(); 8 | p.indent(|p| { 9 | if let Some(pa) = v.parent { 10 | write!(p, "VTBL<_{}>", pr.vtbl[pa as usize].class).ignore(); 11 | } else { write!(p, "0").ignore(); } 12 | write!(p, r#""{}""#, v.class).ignore(); 13 | for &f in &v.func { 14 | write!(p, "FUNC<{}>", pr.func[f as usize].name).ignore(); 15 | } 16 | }); 17 | write!(p, "}}\n\n").ignore(); 18 | } 19 | for f in &pr.func { 20 | write!(p, "FUNC<{}> {{", f.name).ignore(); 21 | p.indent(|p| { 22 | let mut iter = f.first; // manually iterate, because we don't have TacIter to use 23 | while let Some(t) = iter { 24 | write_tac(t.tac.get(), pr, p); 25 | iter = t.next.get(); 26 | } 27 | }); 28 | write!(p, "}}\n\n").ignore(); 29 | } 30 | } 31 | 32 | pub fn write_tac(t: Tac, pr: &TacProgram, p: &mut IndentPrinter) { 33 | use Tac::*; 34 | match t { 35 | Bin { op, dst, lr } => write!(p, "%{} = ({:?} {} {:?})", dst, lr[0], op.to_op_str(), lr[1]), 36 | Un { op, dst, r } => write!(p, "%{} = {} {:?}", dst, op.to_op_str(), r[0]), 37 | Assign { dst, src } => write!(p, "%{} = {:?}", dst, src[0]), 38 | Param { src } => write!(p, "parm {:?}", src[0]), 39 | Call { dst, kind, } => write!(p, "{}call {}", dst.map(|dst| format!("%{} = ", dst)).unwrap_or(String::new()), match kind { 40 | CallKind::Virtual(fp, _) => format!("{:?}", fp[0]), 41 | CallKind::Static(f, _) => pr.func[f as usize].name.clone(), 42 | CallKind::Intrinsic(i) => format!("{:?}", i), 43 | }), 44 | Ret { src } => if let Some(src) = src { write!(p, "return {:?}", src[0]) } else { write!(p, "return") }, 45 | Jmp { label } => write!(p, "branch %{}", label), 46 | Jif { label, z, cond } => write!(p, "if ({:?} {} 0) branch %{}", cond[0], if z { "==" } else { "!=" }, label), 47 | Label { label } => write!(p, "%{}:", label), 48 | Load { dst, base, off, .. } => write!(p, "%{} = *({:?} {} {})", dst, base[0], if off >= 0 { '+' } else { '-' }, off.abs()), 49 | Store { src_base, off, .. } => write!(p, "*({:?} {} {}) = {:?}", src_base[1], if off >= 0 { '+' } else { '-' }, off.abs(), src_base[0]), 50 | LoadStr { dst, s } => write!(p, "%{} = \"{}\"", dst, pr.str_pool.get_index(s as usize).unwrap()), 51 | LoadVTbl { dst, v } => write!(p, "%{} = VTBL<_{}>", dst, pr.vtbl[v as usize].class), 52 | LoadFunc { dst, f } => write!(p, "%{} = FUNC<{}>", dst, pr.func[f as usize].name), 53 | }.ignore(); 54 | } -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | The public version of decaf pa. 4 | 5 | The code involved in pa1a, pa2, pa3 are the same as the private version of decaf pa, because in them your task is only extending language features, which are different on different years. By contrast, the code involved in pa1b, pa4, pa5 may miss necessary code, because in them your task involves completing existing code, which are the same on different years. 6 | 7 | # Testcases 8 | 9 | It is on the way, now the `testcase` folder doesn't contain any testcase. We will later identify a set of testcases that are suitable for publication. 10 | 11 | # Documentation (experiment guide) 12 | 13 | [decaf-doc](https://mashplant.gitbook.io/decaf-doc/) 14 | 15 | # Build & Run 16 | 17 | You need a nightly rust compiler. It is tested on `rustc 1.38.0-nightly`, there is no guarantee about any older version (I believe that a newer version won't break the code). 18 | 19 | Run: 20 | 21 | ``` 22 | cargo run --bin test # for testing your implemetation using the testcase folder 23 | # or 24 | cargo run --bin decaf # for a command line app 25 | ``` 26 | 27 | The command line app (with name `decaf`) support the following arguments: 28 | 29 | ``` 30 | # required, the input decaf file path 31 | --target= # required, can be pa1a, pa1b, pa2, pa3, pa4, pa5 32 | --output= # optional, the output path; if not specified, it prints to stdout 33 | ``` 34 | 35 | # Common problems 36 | 37 | 1. The color (printed by `test`) is not working properly on Windows 38 | 39 | Add `colored::control::set_override(false);` before calling `test_all` to disable color. 40 | 41 | 2. --target=pa1b/pa4/pa5 panicked at `unimplemented!()` 42 | 43 | Of course, they are simply not implemented. But there is also fallback code for the unimplemented code: 44 | 45 | - to make pa1b work, in `syntax/src/parser_ll.rs`, change the line `unimplemented!()` to `return StackItem::_Fail;` 46 | - to make pa4 work, in `tacopt/src/bb.rs`, remove the line `crate::aliveness::work(self);` 47 | - to make pa5 work, first make pa4 work, then in `driver/src/lib.rs`, change the line `let asm = FuncGen::work(&fu, &tp, codegen::AllocMethod::Graph);` to `let asm = FuncGen::work(&fu, &tp, codegen::AllocMethod::Brute);` 48 | 49 | Of course the fallback code won't have exactly the same functionality as the code we expect you to implement, but as least they can make the compiler working. -------------------------------------------------------------------------------- /syntax/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "syntax" 3 | version = "0.1.0" 4 | authors = ["MashPlant <740678788@qq.com>"] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | common = { path = "../common" } 9 | parser-macros = { git = "https://github.com/MashPlant/lalr1" } 10 | typed-arena = "1.4.1" 11 | lazy_static = "1.3.0" 12 | derive_more = "0.15.0" -------------------------------------------------------------------------------- /syntax/src/ast.rs: -------------------------------------------------------------------------------- 1 | use crate::{ty::*, symbol::*}; 2 | use common::{Loc, Ref, BinOp, UnOp}; 3 | use typed_arena::Arena; 4 | use std::cell::{Cell, RefCell}; 5 | 6 | #[derive(Default)] 7 | pub struct ASTAlloc<'a> { 8 | pub(crate) class: Arena>, 9 | pub(crate) func: Arena>, 10 | pub(crate) var: Arena>, 11 | pub(crate) program: Arena>, 12 | } 13 | 14 | // all Cell> in `Program` and statements are initialized in symbol_pass.rs 15 | // all ... in expressions are initialized in type_pass.rs 16 | 17 | pub struct Program<'a> { 18 | pub class: Vec<&'a ClassDef<'a>>, 19 | pub main: Cell>>, 20 | pub scope: RefCell>, 21 | } 22 | 23 | pub struct ClassDef<'a> { 24 | pub loc: Loc, 25 | pub name: &'a str, 26 | pub parent: Option<&'a str>, 27 | pub field: Vec>, 28 | pub parent_ref: Cell>>, 29 | pub scope: RefCell>, 30 | } 31 | 32 | impl<'a> ClassDef<'a> { 33 | pub fn extends(&self, rhs: &ClassDef<'a>) -> bool { 34 | let mut c = self; 35 | loop { 36 | if Ref(c) == Ref(rhs) { break true; } 37 | if let Some(p) = c.parent_ref.get() { c = p; } else { break false; } 38 | } 39 | } 40 | 41 | // will recursively lookup in all its parent 42 | pub fn lookup(&self, name: &str) -> Option> { 43 | let mut c = self; 44 | loop { 45 | match c.scope.borrow().get(name) { 46 | Some(&symbol) => break Some(symbol), 47 | None => match c.parent_ref.get() { 48 | Some(p) => c = p, 49 | None => break None, 50 | } 51 | } 52 | } 53 | } 54 | } 55 | 56 | #[derive(derive_more::From, Copy, Clone)] 57 | pub enum FieldDef<'a> { 58 | FuncDef(&'a FuncDef<'a>), 59 | VarDef(&'a VarDef<'a>), 60 | } 61 | 62 | impl FieldDef<'_> { 63 | pub fn loc(&self) -> Loc { 64 | match self { FieldDef::FuncDef(f) => f.loc, FieldDef::VarDef(v) => v.loc } 65 | } 66 | } 67 | 68 | pub struct FuncDef<'a> { 69 | pub loc: Loc, 70 | pub name: &'a str, 71 | pub ret: SynTy<'a>, 72 | pub param: Vec<&'a VarDef<'a>>, 73 | pub static_: bool, 74 | pub body: Block<'a>, 75 | // placing ret and param ty in one slice is mainly to some space, especially the size of struct Ty 76 | // [0] is ret_ty, [1..] is parm_ty 77 | pub ret_param_ty: Cell]>>, 78 | // `class` will always be set during typeck (no matter whether it is static) 79 | pub class: Cell>>, 80 | pub scope: RefCell>, 81 | } 82 | 83 | impl<'a> FuncDef<'a> { 84 | pub fn ret_ty(&self) -> Ty<'a> { self.ret_param_ty.get().unwrap()[0] } 85 | } 86 | 87 | pub struct VarDef<'a> { 88 | pub loc: Loc, 89 | pub name: &'a str, 90 | pub syn_ty: SynTy<'a>, 91 | // if this is in an ClassDef, `init` must be None 92 | // if `syn_ty` is `Var` (not in the basic framework), `init` must be Some 93 | pub init: Option<(Loc, Expr<'a>)>, 94 | pub ty: Cell>, 95 | pub owner: Cell>>, 96 | } 97 | 98 | impl<'a> VarDef<'a> { 99 | pub fn init(&self) -> Option<&Expr<'a>> { self.init.as_ref().map(|(_, e)| e) } 100 | } 101 | 102 | pub struct Stmt<'a> { 103 | pub loc: Loc, 104 | pub kind: StmtKind<'a>, 105 | } 106 | 107 | #[derive(derive_more::From)] 108 | pub enum StmtKind<'a> { 109 | // below 4 are Simple 110 | Assign(Assign<'a>), 111 | LocalVarDef(&'a VarDef<'a>), 112 | ExprEval(Expr<'a>), 113 | Skip(Skip), 114 | // rust-clippy tells me this variant's size is too big, use a Box to wrap it 115 | If(Box>), 116 | While(While<'a>), 117 | For(For<'a>), 118 | Return(Option>), 119 | Print(Vec>), 120 | Break(Break), 121 | Block(Block<'a>), 122 | } 123 | 124 | pub struct Assign<'a> { 125 | pub dst: Expr<'a>, 126 | pub src: Expr<'a>, 127 | } 128 | 129 | pub struct Block<'a> { 130 | pub loc: Loc, 131 | pub stmt: Vec>, 132 | pub scope: RefCell>, 133 | } 134 | 135 | pub struct If<'a> { 136 | pub cond: Expr<'a>, 137 | pub on_true: Block<'a>, 138 | pub on_false: Option>, 139 | } 140 | 141 | pub struct While<'a> { 142 | pub cond: Expr<'a>, 143 | pub body: Block<'a>, 144 | } 145 | 146 | pub struct For<'a> { 147 | pub init: Box>, 148 | pub cond: Expr<'a>, 149 | pub update: Box>, 150 | pub body: Block<'a>, 151 | } 152 | 153 | pub struct Expr<'a> { 154 | pub loc: Loc, 155 | pub ty: Cell>, 156 | pub kind: ExprKind<'a>, 157 | } 158 | 159 | #[derive(derive_more::From)] 160 | pub enum ExprKind<'a> { 161 | VarSel(VarSel<'a>), 162 | IndexSel(IndexSel<'a>), 163 | IntLit(i32), 164 | BoolLit(bool), 165 | StringLit(&'a str), 166 | NullLit(NullLit), 167 | Call(Call<'a>), 168 | Unary(Unary<'a>), 169 | Binary(Binary<'a>), 170 | This(This), 171 | ReadInt(ReadInt), 172 | ReadLine(ReadLine), 173 | NewClass(NewClass<'a>), 174 | NewArray(NewArray<'a>), 175 | ClassTest(ClassTest<'a>), 176 | ClassCast(ClassCast<'a>), 177 | } 178 | 179 | pub struct VarSel<'a> { 180 | pub owner: Option>>, 181 | pub name: &'a str, 182 | pub var: Cell>>, 183 | } 184 | 185 | pub struct IndexSel<'a> { 186 | pub arr: Box>, 187 | pub idx: Box>, 188 | } 189 | 190 | pub struct Call<'a> { 191 | // the framework only support `func` as VarSel 192 | // hint: there are 2 places using `func` as VarSel, and there are 2 unimplemented!() respectively 193 | pub func: Box>, 194 | pub arg: Vec>, 195 | pub func_ref: Cell>>, 196 | } 197 | 198 | pub struct Binary<'a> { 199 | pub op: BinOp, 200 | pub l: Box>, 201 | pub r: Box>, 202 | } 203 | 204 | pub struct Unary<'a> { 205 | pub op: UnOp, 206 | pub r: Box>, 207 | } 208 | 209 | pub struct NewClass<'a> { 210 | pub name: &'a str, 211 | pub class: Cell>>, 212 | } 213 | 214 | pub struct NewArray<'a> { 215 | pub elem: SynTy<'a>, 216 | pub len: Box>, 217 | } 218 | 219 | pub struct ClassTest<'a> { 220 | pub expr: Box>, 221 | pub name: &'a str, 222 | pub class: Cell>>, 223 | } 224 | 225 | pub struct ClassCast<'a> { 226 | pub name: &'a str, 227 | pub expr: Box>, 228 | pub class: Cell>>, 229 | } 230 | 231 | // some unit struct, they exist just to make match pattern consistent(all patterns are like Xxx(x)) 232 | pub struct Skip; 233 | 234 | pub struct Break; 235 | 236 | pub struct NullLit; 237 | 238 | pub struct This; 239 | 240 | pub struct ReadInt; 241 | 242 | pub struct ReadLine; 243 | -------------------------------------------------------------------------------- /syntax/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(proc_macro_hygiene)] // allow proc macro output macro definition 2 | 3 | pub mod ast; 4 | pub mod parser; 5 | pub mod parser_ll; 6 | pub mod ty; 7 | pub mod symbol; 8 | 9 | pub use ast::*; 10 | pub use ty::*; 11 | pub use symbol::*; 12 | 13 | // below are some helper functions for parser 14 | 15 | use common::{Loc, Errors, ErrorKind, NO_LOC}; 16 | 17 | // save a little typing than writing "Default::default()" 18 | pub(crate) fn dft() -> T { T::default() } 19 | 20 | pub(crate) fn mk_stmt(loc: Loc, kind: StmtKind) -> Stmt { Stmt { loc, kind } } 21 | 22 | pub(crate) fn mk_expr(loc: Loc, kind: ExprKind) -> Expr { Expr { loc, ty: dft(), kind } } 23 | 24 | pub(crate) fn mk_int_lit<'a, T>(loc: Loc, s: &'a str, error: &mut Errors<'a, T>) -> Expr<'a> { 25 | let val = if s.starts_with("0x") { i32::from_str_radix(&s[2..], 16) } else { s.parse() } 26 | .unwrap_or_else(|_| error.issue(loc, ErrorKind::IntTooLarge(s))); 27 | mk_expr(loc, val.into()) 28 | } 29 | 30 | // make a block from a single statement(which may already be a block) 31 | fn mk_block(s: Stmt) -> Block { 32 | if let StmtKind::Block(b) = s.kind { b } else { Block { loc: s.loc, stmt: vec![s], scope: dft() } } 33 | } 34 | 35 | pub(crate) trait VecExt: Sized { 36 | type Item; 37 | 38 | fn pushed(self, i: ::Item) -> Self; 39 | 40 | fn reversed(self) -> Self; 41 | } 42 | 43 | impl VecExt for Vec { 44 | type Item = T; 45 | 46 | fn pushed(mut self, i: Self::Item) -> Self { (self.push(i), self).1 } 47 | 48 | fn reversed(mut self) -> Self { (self.reverse(), self).1 } 49 | } 50 | 51 | // assume s begin with ", this is not checked 52 | pub(crate) fn check_str<'a, T>(s: &'a str, error: &mut Errors<'a, T>, mut loc: Loc) { 53 | if s.len() <= 1 || !s.ends_with('"') { 54 | error.issue(loc, ErrorKind::UnclosedStr(&s[1..])) 55 | } 56 | let s = &s[1..s.len() - 1]; 57 | loc.next_col(); 58 | let mut escape = NO_LOC; 59 | let mut idx = 0; 60 | for ch in s.chars() { 61 | idx += ch.len_utf8(); 62 | match ch { 63 | '\\' => escape = if escape == NO_LOC { loc } else { NO_LOC }, 64 | 'n' | 'r' | 't' | '"' => escape = NO_LOC, 65 | '\r' => continue, // just ignore 66 | _ => { 67 | if escape != NO_LOC { 68 | error.issue::<()>(escape, ErrorKind::InvalidEscape); 69 | escape = NO_LOC; 70 | } 71 | // for NewlineInStr error, the reported string segment is from beginning to(including) this '\n' 72 | // (though I don't think it is very sensible, I think reporting the whole string will be better) 73 | if ch == '\n' { error.issue(loc, ErrorKind::NewlineInStr(&s[0..idx])) } 74 | } 75 | } 76 | if ch == '\n' { loc.next_line(); } else { loc.next_col(); } 77 | } 78 | if escape != NO_LOC { 79 | error.issue(escape, ErrorKind::InvalidEscape) 80 | } 81 | } -------------------------------------------------------------------------------- /syntax/src/parser.rs: -------------------------------------------------------------------------------- 1 | use crate::{ast::*, ty::*, VecExt, dft, check_str, mk_stmt, mk_expr, mk_int_lit, mk_block}; 2 | use parser_macros::lalr1; 3 | use common::{ErrorKind, Loc, BinOp, UnOp, Errors, NO_LOC}; 4 | 5 | pub fn work<'p>(code: &'p str, alloc: &'p ASTAlloc<'p>) -> Result<&'p Program<'p>, Errors<'p, Ty<'p>>> { 6 | let mut parser = Parser { alloc, error: Errors::default() }; 7 | let mut lexer = Lexer::new(code.as_bytes()); // Lexer can be used independently from Parser, you can use it to debug 8 | match parser.parse(&mut lexer) { 9 | Ok(program) if parser.error.0.is_empty() => Ok(program), 10 | Err(token) => { 11 | let mut error = parser.error; 12 | let loc = Loc(token.line, token.col); 13 | match token.ty { 14 | TokenKind::_Err => error.issue(loc, ErrorKind::UnrecognizedChar(token.piece[0] as char)), 15 | TokenKind::UntermString => { 16 | check_str(token.str(), &mut error, loc); 17 | error.issue(lexer.loc(), ErrorKind::SyntaxError) 18 | } 19 | _ => error.issue(loc, ErrorKind::SyntaxError), 20 | } 21 | Err(error) 22 | } 23 | _ => Err(parser.error), 24 | } 25 | } 26 | 27 | pub struct Parser<'p> { 28 | pub alloc: &'p ASTAlloc<'p>, 29 | // just some simple errors like IntTooLarger, cannot recover or record parser errors 30 | pub error: Errors<'p, Ty<'p>>, 31 | } 32 | 33 | impl<'p> Token<'p> { 34 | pub fn str(&self) -> &'p str { std::str::from_utf8(self.piece).unwrap() } 35 | pub fn loc(&self) -> Loc { Loc(self.line, self.col) } 36 | } 37 | 38 | impl Lexer<'_> { 39 | pub fn loc(&self) -> Loc { Loc(self.line, self.col) } 40 | } 41 | 42 | fn mk_bin<'p>(l: Expr<'p>, r: Expr<'p>, loc: Loc, op: BinOp) -> Expr<'p> { 43 | mk_expr(loc, Binary { l: Box::new(l), op, r: Box::new(r) }.into()) 44 | } 45 | 46 | #[lalr1(Program)] 47 | #[lex(r##" 48 | priority = [ 49 | { assoc = 'left', terms = ['Or'] }, 50 | { assoc = 'left', terms = ['And'] }, 51 | { assoc = 'left', terms = ['Eq', 'Ne'] }, 52 | { assoc = 'no_assoc', terms = ['Le', 'Ge', 'Lt', 'Gt'] }, 53 | { assoc = 'left', terms = ['Add', 'Sub'] }, 54 | { assoc = 'left', terms = ['Mul', 'Div', 'Mod'] }, 55 | { assoc = 'left', terms = ['UMinus', 'Not', 'RPar'] }, 56 | { assoc = 'left', terms = ['LBrk', 'Dot', 'LPar'] }, 57 | { assoc = 'left', terms = ['Empty'] }, 58 | { assoc = 'left', terms = ['Else'] }, 59 | ] 60 | 61 | [lexical] 62 | 'void' = 'Void' 63 | 'int' = 'Int' 64 | 'bool' = 'Bool' 65 | 'string' = 'String' 66 | 'new' = 'New' 67 | 'null' = 'Null' 68 | 'true' = 'True' 69 | 'false' = 'False' 70 | 'class' = 'Class' 71 | 'extends' = 'Extends' 72 | 'this' = 'This' 73 | 'while' = 'While' 74 | 'for' = 'For' 75 | 'if' = 'If' 76 | 'else' = 'Else' 77 | 'return' = 'Return' 78 | 'break' = 'Break' 79 | 'Print' = 'Print' 80 | 'ReadInteger' = 'ReadInteger' 81 | 'ReadLine' = 'ReadLine' 82 | 'static' = 'Static' 83 | 'instanceof' = 'InstanceOf' 84 | '<=' = 'Le' 85 | '>=' = 'Ge' 86 | '==' = 'Eq' 87 | '!=' = 'Ne' 88 | '&&' = 'And' 89 | '\|\|' = 'Or' 90 | '\+' = 'Add' 91 | '-' = 'Sub' 92 | '\*' = 'Mul' 93 | '/' = 'Div' 94 | '%' = 'Mod' 95 | '=' = 'Assign' 96 | '<' = 'Lt' 97 | '>' = 'Gt' 98 | '\.' = 'Dot' 99 | ',' = 'Comma' 100 | ';' = 'Semi' # short for semicolon 101 | '!' = 'Not' 102 | '\(' = 'LPar' # short for parenthesis 103 | '\)' = 'RPar' 104 | '\[' = 'LBrk' # short for bracket 105 | '\]' = 'RBrk' 106 | '\{' = 'LBrc' # short for brace 107 | '\}' = 'RBrc' 108 | ':' = 'Colon' 109 | # line break in a StringLit will be reported by parser's semantic act 110 | '"[^"\\]*(\\.[^"\\]*)*"' = 'StringLit' 111 | '"[^"\\]*(\\.[^"\\]*)*' = 'UntermString' 112 | '//[^\n]*' = '_Eps' 113 | '\s+' = '_Eps' 114 | '\d+|(0x[0-9a-fA-F]+)' = 'IntLit' 115 | '[A-Za-z]\w*' = 'Id' 116 | '.' = '_Err' 117 | "##)] 118 | impl<'p> Parser<'p> { 119 | #[rule(Program -> ClassList)] 120 | fn program(&self, class: Vec<&'p ClassDef<'p>>) -> &'p Program<'p> { 121 | self.alloc.program.alloc(Program { class, main: dft(), scope: dft() }) 122 | } 123 | 124 | #[rule(ClassList -> ClassList ClassDef)] 125 | fn class_list(l: Vec<&'p ClassDef<'p>>, r: &'p ClassDef<'p>) -> Vec<&'p ClassDef<'p>> { l.pushed(r) } 126 | #[rule(ClassList -> ClassDef)] 127 | fn class_list1(c: &'p ClassDef<'p>) -> Vec<&'p ClassDef<'p>> { vec![c] } 128 | 129 | #[rule(ClassDef -> Class Id MaybeExtends LBrc FieldList RBrc)] 130 | fn class_def(&self, c: Token, name: Token, parent: Option<&'p str>, _l: Token, field: Vec>, _r: Token) -> &'p ClassDef<'p> { 131 | self.alloc.class.alloc(ClassDef { loc: c.loc(), name: name.str(), parent, field, parent_ref: dft(), scope: dft() }) 132 | } 133 | 134 | #[rule(MaybeExtends -> Extends Id)] 135 | fn maybe_extends1(_e: Token, name: Token) -> Option<&'p str> { Some(name.str()) } 136 | #[rule(MaybeExtends ->)] 137 | fn maybe_extends0() -> Option<&'p str> { None } 138 | 139 | #[rule(FieldList -> FieldList VarDef Semi)] 140 | fn field_list_v(l: Vec>, r: &'p VarDef<'p>, _s: Token) -> Vec> { l.pushed(r.into()) } 141 | #[rule(FieldList -> FieldList FuncDef)] 142 | fn field_list_f(l: Vec>, r: &'p FuncDef<'p>) -> Vec> { l.pushed(r.into()) } 143 | #[rule(FieldList ->)] 144 | fn field_list0() -> Vec> { vec![] } 145 | 146 | #[rule(FuncDef -> Static Type Id LPar VarDefListOrEmpty RPar Block)] 147 | fn func_def1(&self, _s: Token, ret: SynTy<'p>, name: Token, _l: Token, param: Vec<&'p VarDef<'p>>, _r: Token, body: Block<'p>) -> &'p FuncDef<'p> { 148 | self.alloc.func.alloc(FuncDef { loc: name.loc(), name: name.str(), ret, param, static_: true, body, ret_param_ty: dft(), class: dft(), scope: dft() }) 149 | } 150 | #[rule(FuncDef -> Type Id LPar VarDefListOrEmpty RPar Block)] 151 | fn func_def0(&self, ret: SynTy<'p>, name: Token, _l: Token, param: Vec<&'p VarDef<'p>>, _r: Token, body: Block<'p>) -> &'p FuncDef<'p> { 152 | self.alloc.func.alloc(FuncDef { loc: name.loc(), name: name.str(), ret, param, static_: false, body, ret_param_ty: dft(), class: dft(), scope: dft() }) 153 | } 154 | 155 | // the `VarDef` in grammar only supports VarDef without init value 156 | #[rule(VarDef -> Type Id)] 157 | fn var_def(&self, syn_ty: SynTy<'p>, name: Token) -> &'p VarDef<'p> { 158 | self.alloc.var.alloc(VarDef { loc: name.loc(), name: name.str(), syn_ty, init: None, ty: dft(), owner: dft() }) 159 | } 160 | 161 | #[rule(VarDefListOrEmpty -> VarDefList)] 162 | fn var_def_list_or_empty1(l: Vec<&'p VarDef<'p>>) -> Vec<&'p VarDef<'p>> { l } 163 | #[rule(VarDefListOrEmpty ->)] 164 | fn var_def_list_or_empty0() -> Vec<&'p VarDef<'p>> { vec![] } 165 | 166 | #[rule(VarDefList -> VarDefList Comma VarDef)] 167 | fn var_def_list(l: Vec<&'p VarDef<'p>>, _c: Token, r: &'p VarDef<'p>) -> Vec<&'p VarDef<'p>> { l.pushed(r) } 168 | #[rule(VarDefList -> VarDef)] 169 | fn var_def_list1(v: &'p VarDef<'p>) -> Vec<&'p VarDef<'p>> { vec![v] } 170 | 171 | #[rule(Block -> LBrc StmtList RBrc)] 172 | fn block(l: Token, stmt: Vec>, _r: Token) -> Block<'p> { Block { loc: l.loc(), stmt, scope: dft() } } 173 | 174 | #[rule(StmtList -> StmtList Stmt)] 175 | fn stmt_list(l: Vec>, r: Stmt<'p>) -> Vec> { l.pushed(r) } 176 | #[rule(StmtList ->)] 177 | fn stmt_list0() -> Vec> { vec![] } 178 | 179 | #[rule(Stmt -> Simple Semi)] 180 | fn stmt_simple(s: Stmt<'p>, _s: Token) -> Stmt<'p> { s } 181 | #[rule(Stmt -> If LPar Expr RPar Stmt MaybeElse)] 182 | fn stmt_if(i: Token, _l: Token, cond: Expr<'p>, _r: Token, on_true: Stmt<'p>, on_false: Option>) -> Stmt<'p> { 183 | mk_stmt(i.loc(), Box::new(If { cond, on_true: mk_block(on_true), on_false }).into()) 184 | } 185 | #[rule(Stmt -> While LPar Expr RPar Stmt)] 186 | fn stmt_while(w: Token, _l: Token, cond: Expr<'p>, _r: Token, body: Stmt<'p>) -> Stmt<'p> { 187 | mk_stmt(w.loc(), While { cond, body: mk_block(body) }.into()) 188 | } 189 | #[rule(Stmt -> For LPar Simple Semi Expr Semi Simple RPar Stmt)] 190 | fn stmt_for(f: Token, _l: Token, init: Stmt<'p>, _s1: Token, cond: Expr<'p>, _s2: Token, update: Stmt<'p>, _r: Token, body: Stmt<'p>) -> Stmt<'p> { 191 | mk_stmt(f.loc(), For { init: Box::new(init), cond, update: Box::new(update), body: mk_block(body) }.into()) 192 | } 193 | #[rule(Stmt -> Return Expr Semi)] 194 | fn stmt_return1(r: Token, expr: Expr<'p>, _s: Token) -> Stmt<'p> { mk_stmt(r.loc(), Some(expr).into()) } 195 | #[rule(Stmt -> Return Semi)] 196 | fn stmt_return0(r: Token, _s: Token) -> Stmt<'p> { mk_stmt(r.loc(), None.into()) } 197 | #[rule(Stmt -> Print LPar ExprList RPar Semi)] 198 | fn stmt_print(p: Token, _l: Token, print: Vec>, _r: Token, _s: Token) -> Stmt<'p> { mk_stmt(p.loc(), print.into()) } 199 | #[rule(Stmt -> Break Semi)] 200 | fn stmt_break(b: Token, _s: Token) -> Stmt<'p> { mk_stmt(b.loc(), Break.into()) } 201 | #[rule(Stmt -> Block)] 202 | fn stmt_block(b: Block<'p>) -> Stmt<'p> { mk_stmt(b.loc, b.into()) } 203 | 204 | #[rule(MaybeElse -> Else Stmt)] 205 | fn maybe_else1(_e: Token, b: Stmt<'p>) -> Option> { Some(mk_block(b)) } 206 | #[rule(MaybeElse ->)] 207 | #[prec(Empty)] 208 | fn maybe_else0() -> Option> { None } 209 | 210 | #[rule(Simple -> LValue Assign Expr)] 211 | fn simple_assign(dst: Expr<'p>, a: Token, src: Expr<'p>) -> Stmt<'p> { mk_stmt(a.loc(), Assign { dst, src }.into()) } 212 | #[rule(Simple -> VarDef)] // the VarDef without init 213 | fn simple_var_def(v: &'p VarDef<'p>) -> Stmt<'p> { mk_stmt(v.loc, v.into()) } 214 | #[rule(Simple -> Type Id Assign Expr)] // the VarDef with init 215 | fn simple_var_def_init(&self, syn_ty: SynTy<'p>, name: Token, a: Token, init: Expr<'p>) -> Stmt<'p> { 216 | let loc = name.loc(); 217 | mk_stmt(loc, (&*self.alloc.var.alloc(VarDef { loc, name: name.str(), syn_ty, init: Some((a.loc(), init)), ty: dft(), owner: dft() })).into()) 218 | } 219 | #[rule(Simple -> Expr)] 220 | fn simple_mk_expr(e: Expr<'p>) -> Stmt<'p> { mk_stmt(e.loc, e.into()) } 221 | #[rule(Simple ->)] 222 | fn simple_skip() -> Stmt<'p> { mk_stmt(NO_LOC, Skip.into()) } 223 | 224 | #[rule(Expr -> LValue)] 225 | fn expr_lvalue(l: Expr<'p>) -> Expr<'p> { l } 226 | #[rule(Expr -> VarSel LPar ExprListOrEmpty RPar)] 227 | fn expr_call(func: Expr<'p>, l: Token, arg: Vec>, _r: Token) -> Expr<'p> { 228 | mk_expr(l.loc(), Call { func: Box::new(func), arg, func_ref: dft() }.into()) 229 | } 230 | #[rule(Expr -> IntLit)] 231 | fn expr_int(&mut self, i: Token) -> Expr<'p> { mk_int_lit(i.loc(), i.str(), &mut self.error) } 232 | #[rule(Expr -> True)] 233 | fn expr_true(t: Token) -> Expr<'p> { mk_expr(t.loc(), true.into()) } 234 | #[rule(Expr -> False)] 235 | fn expr_false(f: Token) -> Expr<'p> { mk_expr(f.loc(), false.into()) } 236 | #[rule(Expr -> StringLit)] 237 | fn expr_string(&mut self, s: Token) -> Expr<'p> { 238 | let (loc, str) = (s.loc(), s.str()); 239 | check_str(str, &mut self.error, loc); 240 | mk_expr(loc, str[1..str.len() - 1].into()) 241 | } 242 | #[rule(Expr -> Null)] 243 | fn expr_null(n: Token) -> Expr<'p> { mk_expr(n.loc(), NullLit.into()) } 244 | #[rule(Expr -> LPar Expr RPar)] 245 | fn expr_paren(_l: Token, m: Expr<'p>, _r: Token) -> Expr<'p> { m } 246 | #[rule(Expr -> Expr Add Expr)] 247 | fn expr_add(l: Expr<'p>, op: Token, r: Expr<'p>) -> Expr<'p> { mk_bin(l, r, op.loc(), BinOp::Add) } 248 | #[rule(Expr -> Expr Sub Expr)] 249 | fn expr_sub(l: Expr<'p>, op: Token, r: Expr<'p>) -> Expr<'p> { mk_bin(l, r, op.loc(), BinOp::Sub) } 250 | #[rule(Expr -> Expr Mul Expr)] 251 | fn expr_mul(l: Expr<'p>, op: Token, r: Expr<'p>) -> Expr<'p> { mk_bin(l, r, op.loc(), BinOp::Mul) } 252 | #[rule(Expr -> Expr Div Expr)] 253 | fn expr_div(l: Expr<'p>, op: Token, r: Expr<'p>) -> Expr<'p> { mk_bin(l, r, op.loc(), BinOp::Div) } 254 | #[rule(Expr -> Expr Mod Expr)] 255 | fn expr_mod(l: Expr<'p>, op: Token, r: Expr<'p>) -> Expr<'p> { mk_bin(l, r, op.loc(), BinOp::Mod) } 256 | #[rule(Expr -> Expr Eq Expr)] 257 | fn expr_eq(l: Expr<'p>, op: Token, r: Expr<'p>) -> Expr<'p> { mk_bin(l, r, op.loc(), BinOp::Eq) } 258 | #[rule(Expr -> Expr Ne Expr)] 259 | fn expr_ne(l: Expr<'p>, op: Token, r: Expr<'p>) -> Expr<'p> { mk_bin(l, r, op.loc(), BinOp::Ne) } 260 | #[rule(Expr -> Expr Lt Expr)] 261 | fn expr_lt(l: Expr<'p>, op: Token, r: Expr<'p>) -> Expr<'p> { mk_bin(l, r, op.loc(), BinOp::Lt) } 262 | #[rule(Expr -> Expr Le Expr)] 263 | fn expr_le(l: Expr<'p>, op: Token, r: Expr<'p>) -> Expr<'p> { mk_bin(l, r, op.loc(), BinOp::Le) } 264 | #[rule(Expr -> Expr Ge Expr)] 265 | fn expr_ge(l: Expr<'p>, op: Token, r: Expr<'p>) -> Expr<'p> { mk_bin(l, r, op.loc(), BinOp::Ge) } 266 | #[rule(Expr -> Expr Gt Expr)] 267 | fn expr_gt(l: Expr<'p>, op: Token, r: Expr<'p>) -> Expr<'p> { mk_bin(l, r, op.loc(), BinOp::Gt) } 268 | #[rule(Expr -> Expr And Expr)] 269 | fn expr_and(l: Expr<'p>, op: Token, r: Expr<'p>) -> Expr<'p> { mk_bin(l, r, op.loc(), BinOp::And) } 270 | #[rule(Expr -> Expr Or Expr)] 271 | fn expr_or(l: Expr<'p>, op: Token, r: Expr<'p>) -> Expr<'p> { mk_bin(l, r, op.loc(), BinOp::Or) } 272 | #[rule(Expr -> ReadInteger LPar RPar)] 273 | fn expr_read_int(r: Token, _l: Token, _r: Token) -> Expr<'p> { mk_expr(r.loc(), ReadInt.into()) } 274 | #[rule(Expr -> ReadLine LPar RPar)] 275 | fn expr_read_line(r: Token, _l: Token, _r: Token) -> Expr<'p> { mk_expr(r.loc(), ReadLine.into()) } 276 | #[rule(Expr -> This)] 277 | fn expr_this(t: Token) -> Expr<'p> { mk_expr(t.loc(), This.into()) } 278 | #[rule(Expr -> New Id LPar RPar)] 279 | fn expr_new_class(n: Token, name: Token, _l: Token, _r: Token) -> Expr<'p> { 280 | mk_expr(n.loc(), NewClass { name: name.str(), class: dft() }.into()) 281 | } 282 | #[rule(Expr -> New Type LBrk Expr RBrk)] 283 | fn expr_new_array(n: Token, elem: SynTy<'p>, _l: Token, len: Expr<'p>, _r: Token) -> Expr<'p> { 284 | mk_expr(n.loc(), NewArray { elem, len: Box::new(len) }.into()) 285 | } 286 | #[rule(Expr -> InstanceOf LPar Expr Comma Id RPar)] 287 | fn expr_instanceof(i: Token, _l: Token, e: Expr<'p>, _c: Tokenm, name: Token, _r: Token) -> Expr<'p> { 288 | mk_expr(i.loc(), ClassTest { expr: Box::new(e), name: name.str(), class: dft() }.into()) 289 | } 290 | #[rule(Expr -> LPar Class Id RPar Expr)] 291 | fn expr_cast(_l: Token, _c: Token, name: Token, _r: Token, e: Expr<'p>) -> Expr<'p> { 292 | mk_expr(e.loc, ClassCast { expr: Box::new(e), name: name.str(), class: dft() }.into()) 293 | } 294 | #[rule(Expr -> Sub Expr)] 295 | #[prec(UMinus)] 296 | fn expr_neg(s: Token, r: Expr<'p>) -> Expr<'p> { 297 | mk_expr(s.loc(), Unary { op: UnOp::Neg, r: Box::new(r) }.into()) 298 | } 299 | #[rule(Expr -> Not Expr)] 300 | fn expr_not(n: Token, r: Expr<'p>) -> Expr<'p> { 301 | mk_expr(n.loc(), Unary { op: UnOp::Not, r: Box::new(r) }.into()) 302 | } 303 | 304 | #[rule(ExprList -> ExprList Comma Expr)] 305 | fn expr_list(l: Vec>, _c: Token, r: Expr<'p>) -> Vec> { l.pushed(r) } 306 | #[rule(ExprList -> Expr)] 307 | fn expr_list1(e: Expr<'p>) -> Vec> { vec![e] } 308 | 309 | #[rule(ExprListOrEmpty -> ExprList)] 310 | fn expr_list_or_empty1(e: Vec>) -> Vec> { e } 311 | #[rule(ExprListOrEmpty ->)] 312 | fn expr_list_or_empty0() -> Vec> { vec![] } 313 | 314 | #[rule(MaybeOwner -> Expr Dot)] 315 | fn maybe_owner1(e: Expr<'p>, _d: Token) -> Option>> { Some(Box::new(e)) } 316 | #[rule(MaybeOwner ->)] 317 | fn maybe_owner0() -> Option>> { None } 318 | 319 | #[rule(VarSel -> MaybeOwner Id)] 320 | fn var_sel(owner: Option>>, name: Token) -> Expr<'p> { 321 | mk_expr(name.loc(), VarSel { owner, name: name.str(), var: dft() }.into()) 322 | } 323 | 324 | #[rule(LValue -> VarSel)] 325 | fn lvalue_var_sel(e: Expr<'p>) -> Expr<'p> { e } 326 | #[rule(LValue -> Expr LBrk Expr RBrk)] 327 | fn lvalue_index(arr: Expr<'p>, l: Token, idx: Expr<'p>, _r: Token) -> Expr<'p> { 328 | mk_expr(l.loc(), IndexSel { arr: Box::new(arr), idx: Box::new(idx) }.into()) 329 | } 330 | 331 | #[rule(Type -> Int)] 332 | fn type_int(i: Token) -> SynTy<'p> { SynTy { loc: i.loc(), arr: 0, kind: SynTyKind::Int } } 333 | #[rule(Type -> Bool)] 334 | fn type_bool(b: Token) -> SynTy<'p> { SynTy { loc: b.loc(), arr: 0, kind: SynTyKind::Bool } } 335 | #[rule(Type -> Void)] 336 | fn type_void(v: Token) -> SynTy<'p> { SynTy { loc: v.loc(), arr: 0, kind: SynTyKind::Void } } 337 | #[rule(Type -> String)] 338 | fn type_string(s: Token) -> SynTy<'p> { SynTy { loc: s.loc(), arr: 0, kind: SynTyKind::String } } 339 | #[rule(Type -> Class Id)] 340 | fn type_class(c: Token, name: Token) -> SynTy<'p> { SynTy { loc: c.loc(), arr: 0, kind: SynTyKind::Named(name.str()) } } 341 | #[rule(Type -> Type LBrk RBrk)] 342 | fn type_array(mut ty: SynTy<'p>, _l: Token, _r: Token) -> SynTy<'p> { (ty.arr += 1, ty).1 } 343 | } 344 | -------------------------------------------------------------------------------- /syntax/src/parser_ll.rs: -------------------------------------------------------------------------------- 1 | // many lines are just copied from parser.rs 2 | // though these types have the same name(Parser, Token, Lexer, ...), actually they are different types 3 | use crate::{ast::*, ty::*, VecExt, dft, check_str, mk_expr, mk_stmt, mk_int_lit, mk_block}; 4 | use parser_macros::ll1; 5 | use common::{ErrorKind, Loc, NO_LOC, BinOp, UnOp, Errors, HashSet, HashMap}; 6 | 7 | pub fn work<'p>(code: &'p str, alloc: &'p ASTAlloc<'p>) -> Result<&'p Program<'p>, Errors<'p, Ty<'p>>> { 8 | let mut parser = Parser { alloc, error: Errors::default() }; 9 | match parser.parse(&mut Lexer::new(code.as_bytes())) { 10 | Some(program) if parser.error.0.is_empty() => Ok(program), 11 | _ => Err(parser.error) 12 | } 13 | } 14 | 15 | pub struct Parser<'p> { 16 | pub alloc: &'p ASTAlloc<'p>, 17 | pub error: Errors<'p, Ty<'p>>, 18 | } 19 | 20 | impl<'p> Parser<'p> { 21 | fn error(&mut self, token: &Token<'p>, lexer_loc: Loc) { 22 | let loc = token.loc(); 23 | match token.ty { 24 | TokenKind::_Err => if self.error.0.last().map(|x| x.0) != Some(loc) { 25 | self.error.issue(loc, ErrorKind::UnrecognizedChar(token.piece[0] as char)) 26 | } 27 | TokenKind::UntermString => { 28 | check_str(token.str(), &mut self.error, loc); 29 | self.error.issue(lexer_loc, ErrorKind::SyntaxError) 30 | } 31 | _ => if self.error.0.last().map(|x| x.0) != Some(loc) { 32 | self.error.issue(loc, ErrorKind::SyntaxError) 33 | } 34 | } 35 | } 36 | 37 | // parse impl with some error recovering, called by the generated `parse` function 38 | fn _parse<'l: 'p>(&mut self, target: u32, lookahead: &mut Token<'l>, lexer: &mut Lexer<'l>, f: &HashSet) -> StackItem<'p> { 39 | let target = target as usize; 40 | // these are some global variables which may be invisible to IDE, so fetch them here for convenience 41 | let follow: &[HashSet] = &*FOLLOW; 42 | let table: &[HashMap)>] = &*TABLE; 43 | let is_nt = |x: u32| x < NT_NUM; 44 | 45 | let mut end = f.clone(); 46 | end.extend(follow[target].iter()); 47 | let table = &table[target]; 48 | let (prod, rhs) = if let Some(x) = table.get(&(lookahead.ty as u32)) { x } else { 49 | self.error(lookahead, lexer.loc()); 50 | unimplemented!() 51 | }; 52 | let value_stk = rhs.iter().map(|&x| { 53 | if is_nt(x) { 54 | self._parse(x, lookahead, lexer, &end) 55 | } else if lookahead.ty as u32 == x { 56 | let token = *lookahead; 57 | *lookahead = lexer.next(); 58 | StackItem::_Token(token) 59 | } else { 60 | self.error(lookahead, lexer.loc()); 61 | StackItem::_Fail 62 | } 63 | }).collect::>(); 64 | self.act(*prod, value_stk) 65 | } 66 | } 67 | 68 | impl<'p> Token<'p> { 69 | pub fn str(&self) -> &'p str { std::str::from_utf8(self.piece).unwrap() } 70 | pub fn loc(&self) -> Loc { Loc(self.line, self.col) } 71 | } 72 | 73 | impl Lexer<'_> { 74 | pub fn loc(&self) -> Loc { Loc(self.line, self.col) } 75 | } 76 | 77 | type Terms<'p> = Vec<(Expr<'p>, (Loc, BinOp))>; 78 | 79 | fn merge_terms<'p>(mut l: Expr<'p>, ts: Terms<'p>) -> Expr<'p> { 80 | for (r, (loc, op)) in ts.into_iter().rev() { 81 | l = mk_expr(loc, Binary { op, l: Box::new(l), r: Box::new(r) }.into()); 82 | } 83 | l 84 | } 85 | 86 | fn merge_idx_id_call<'p>(mut l: Expr<'p>, ts: Vec>) -> Expr<'p> { 87 | for t in ts.into_iter().rev() { 88 | match t { 89 | IndexOrIdOrCall::Index(loc, idx) => 90 | l = mk_expr(loc, IndexSel { arr: Box::new(l), idx: Box::new(idx) }.into()), 91 | IndexOrIdOrCall::IdOrCall(loc, name, maybe_call) => match maybe_call { 92 | Some((call_loc, arg)) => { 93 | let func = Box::new(mk_expr(loc, VarSel { owner: Some(Box::new(l)), name, var: dft() }.into())); 94 | l = mk_expr(call_loc, Call { func, arg, func_ref: dft() }.into()); 95 | } 96 | None => l = mk_expr(loc, VarSel { owner: Some(Box::new(l)), name, var: dft() }.into()), 97 | } 98 | } 99 | } 100 | l 101 | } 102 | 103 | // this is pub because StackItem is pub(maybe you need it? though not very likely) 104 | pub enum IndexOrIdOrCall<'p> { 105 | Index(Loc, Expr<'p>), 106 | IdOrCall(Loc, &'p str, Option<(Loc, Vec>)>), 107 | } 108 | 109 | pub enum NewClassOrArray<'p> { 110 | NewClass(&'p str), 111 | NewArray(SynTy<'p>, Expr<'p>), 112 | } 113 | 114 | #[ll1(Program)] 115 | #[lex(r##" 116 | priority = [] 117 | 118 | [lexical] 119 | 'void' = 'Void' 120 | 'int' = 'Int' 121 | 'bool' = 'Bool' 122 | 'string' = 'String' 123 | 'new' = 'New' 124 | 'null' = 'Null' 125 | 'true' = 'True' 126 | 'false' = 'False' 127 | 'class' = 'Class' 128 | 'extends' = 'Extends' 129 | 'this' = 'This' 130 | 'while' = 'While' 131 | 'for' = 'For' 132 | 'if' = 'If' 133 | 'else' = 'Else' 134 | 'return' = 'Return' 135 | 'break' = 'Break' 136 | 'Print' = 'Print' 137 | 'ReadInteger' = 'ReadInteger' 138 | 'ReadLine' = 'ReadLine' 139 | 'static' = 'Static' 140 | 'instanceof' = 'InstanceOf' 141 | '<=' = 'Le' 142 | '>=' = 'Ge' 143 | '==' = 'Eq' 144 | '!=' = 'Ne' 145 | '&&' = 'And' 146 | '\|\|' = 'Or' 147 | '\+' = 'Add' 148 | '-' = 'Sub' 149 | '\*' = 'Mul' 150 | '/' = 'Div' 151 | '%' = 'Mod' 152 | '=' = 'Assign' 153 | '<' = 'Lt' 154 | '>' = 'Gt' 155 | '\.' = 'Dot' 156 | ',' = 'Comma' 157 | ';' = 'Semi' # short for semicolon 158 | '!' = 'Not' 159 | '\(' = 'LPar' # short for parenthesis 160 | '\)' = 'RPar' 161 | '\[' = 'LBrk' # short for bracket 162 | '\]' = 'RBrk' 163 | '\{' = 'LBrc' # short for brace 164 | '\}' = 'RBrc' 165 | ':' = 'Colon' 166 | # line break in a StringLit will be reported by parser's semantic act 167 | '"[^"\\]*(\\.[^"\\]*)*"' = 'StringLit' 168 | '"[^"\\]*(\\.[^"\\]*)*' = 'UntermString' 169 | '//[^\n]*' = '_Eps' 170 | '\s+' = '_Eps' 171 | '\d+|(0x[0-9a-fA-F]+)' = 'IntLit' 172 | '[A-Za-z]\w*' = 'Id' 173 | '.' = '_Err' 174 | "##)] 175 | impl<'p> Parser<'p> { 176 | #[rule(Program -> ClassList)] 177 | fn program(&self, class: Vec<&'p ClassDef<'p>>) -> &'p Program<'p> { 178 | self.alloc.program.alloc(Program { class: class.reversed(), main: dft(), scope: dft() }) 179 | } 180 | 181 | // in this way, the classes will be pushed from left to right, so the order is wrong 182 | // but in Program -> ClassList, a `class.reverse()` makes it correct 183 | // the same method is applied in many places(for consistency, I recommend all XxxList to be reversed) 184 | #[rule(ClassList -> ClassDef ClassList)] 185 | fn class_list(l: &'p ClassDef<'p>, r: Vec<&'p ClassDef<'p>>) -> Vec<&'p ClassDef<'p>> { r.pushed(l) } 186 | #[rule(ClassList ->)] 187 | fn class_list1() -> Vec<&'p ClassDef<'p>> { vec![] } 188 | 189 | #[rule(ClassDef -> Class Id MaybeExtends LBrc FieldList RBrc)] 190 | fn class_def(&self, c: Token, name: Token, parent: Option<&'p str>, _l: Token, field: Vec>, _r: Token) -> &'p ClassDef<'p> { 191 | self.alloc.class.alloc(ClassDef { loc: c.loc(), name: name.str(), parent, field: field.reversed(), parent_ref: dft(), scope: dft() }) 192 | } 193 | 194 | #[rule(MaybeExtends -> Extends Id)] 195 | fn maybe_extends1(_e: Token, name: Token) -> Option<&'p str> { Some(name.str()) } 196 | #[rule(MaybeExtends ->)] 197 | fn maybe_extends0() -> Option<&'p str> { None } 198 | 199 | #[rule(FieldList -> FieldDef FieldList)] 200 | fn field_list(l: FieldDef<'p>, r: Vec>) -> Vec> { r.pushed(l) } 201 | #[rule(FieldList ->)] 202 | fn field_list0() -> Vec> { vec![] } 203 | 204 | #[rule(FieldDef -> Static Type Id LPar VarDefListOrEmpty RPar Block)] 205 | fn field_def_f1(&self, _s: Token, ret: SynTy<'p>, name: Token, _l: Token, param: Vec<&'p VarDef<'p>>, _r: Token, body: Block<'p>) -> FieldDef<'p> { 206 | let (loc, name) = (name.loc(), name.str()); 207 | FieldDef::FuncDef(self.alloc.func.alloc(FuncDef { loc, name, ret, param: param.reversed(), static_: true, body, ret_param_ty: dft(), class: dft(), scope: dft() })) 208 | } 209 | #[rule(FieldDef -> Type Id FuncOrVar)] 210 | fn field_def_fv(&self, syn_ty: SynTy<'p>, name: Token, fov: Option<(Vec<&'p VarDef<'p>>, Block<'p>)>) -> FieldDef<'p> { 211 | let (loc, name) = (name.loc(), name.str()); 212 | if let Some((param, body)) = fov { 213 | FieldDef::FuncDef(self.alloc.func.alloc(FuncDef { loc, name, ret: syn_ty, param: param.reversed(), static_: false, body, ret_param_ty: dft(), class: dft(), scope: dft() })) 214 | } else { 215 | FieldDef::VarDef(self.alloc.var.alloc(VarDef { loc, name, syn_ty, init: None, ty: dft(), owner: dft() })) 216 | } 217 | } 218 | 219 | #[rule(FuncOrVar -> LPar VarDefListOrEmpty RPar Block)] 220 | fn func_or_var_f(_l: Token, param: Vec<&'p VarDef<'p>>, _r: Token, body: Block<'p>) -> Option<(Vec<&'p VarDef<'p>>, Block<'p>)> { Some((param, body)) } 221 | #[rule(FuncOrVar -> Semi)] 222 | fn func_or_var_v(_s: Token) -> Option<(Vec<&'p VarDef<'p>>, Block<'p>)> { None } 223 | 224 | #[rule(VarDefListOrEmpty -> VarDefList)] 225 | fn var_def_list_or_empty1(l: Vec<&'p VarDef<'p>>) -> Vec<&'p VarDef<'p>> { l } 226 | #[rule(VarDefListOrEmpty ->)] 227 | fn var_def_list_or_empty0() -> Vec<&'p VarDef<'p>> { vec![] } 228 | #[rule(VarDefList -> VarDef VarDefListRem)] 229 | fn var_def_list(l: &'p VarDef<'p>, r: Vec<&'p VarDef<'p>>) -> Vec<&'p VarDef<'p>> { r.pushed(l) } 230 | #[rule(VarDefListRem -> Comma VarDef VarDefListRem)] 231 | fn var_def_list_rem(_c: Token, l: &'p VarDef<'p>, r: Vec<&'p VarDef<'p>>) -> Vec<&'p VarDef<'p>> { r.pushed(l) } 232 | #[rule(VarDefListRem ->)] 233 | fn var_def_list_rem0() -> Vec<&'p VarDef<'p>> { vec![] } 234 | 235 | // the logic of ExprList is completely the same as VarDefList... 236 | #[rule(ExprListOrEmpty -> ExprList)] 237 | fn expr_list_or_empty1(l: Vec>) -> Vec> { l } 238 | #[rule(ExprListOrEmpty ->)] 239 | fn expr_list_or_empty0() -> Vec> { vec![] } 240 | #[rule(ExprList -> Expr ExprListRem)] 241 | fn expr_list(l: Expr<'p>, r: Vec>) -> Vec> { r.pushed(l) } 242 | #[rule(ExprListRem -> Comma Expr ExprListRem)] 243 | fn expr_list_rem(_c: Token, l: Expr<'p>, r: Vec>) -> Vec> { r.pushed(l) } 244 | #[rule(ExprListRem ->)] 245 | fn expr_list_rem0() -> Vec> { vec![] } 246 | 247 | #[rule(VarDef -> Type Id)] 248 | fn var_def(&self, syn_ty: SynTy<'p>, name: Token) -> &'p VarDef<'p> { 249 | self.alloc.var.alloc(VarDef { loc: name.loc(), name: name.str(), syn_ty, init: None, ty: dft(), owner: dft() }) 250 | } 251 | 252 | #[rule(Block -> LBrc StmtList RBrc)] 253 | fn block(l: Token, stmt: Vec>, _r: Token) -> Block<'p> { 254 | Block { loc: l.loc(), stmt: stmt.reversed(), scope: dft() } 255 | } 256 | 257 | #[rule(StmtList -> Stmt StmtList)] 258 | fn stmt_list(l: Stmt<'p>, r: Vec>) -> Vec> { r.pushed(l) } 259 | #[rule(StmtList ->)] 260 | fn stmt_list0() -> Vec> { vec![] } 261 | 262 | #[rule(Stmt -> Simple Semi)] 263 | fn stmt_simple(s: Stmt<'p>, _s: Token) -> Stmt<'p> { s } 264 | #[rule(Stmt -> If LPar Expr RPar Stmt MaybeElse)] 265 | fn stmt_if(i: Token, _l: Token, cond: Expr<'p>, _r: Token, on_true: Stmt<'p>, on_false: Option>) -> Stmt<'p> { 266 | mk_stmt(i.loc(), Box::new(If { cond, on_true: mk_block(on_true), on_false }).into()) 267 | } 268 | #[rule(Stmt -> While LPar Expr RPar Stmt)] 269 | fn stmt_while(w: Token, _l: Token, cond: Expr<'p>, _r: Token, body: Stmt<'p>) -> Stmt<'p> { 270 | mk_stmt(w.loc(), While { cond, body: mk_block(body) }.into()) 271 | } 272 | #[rule(Stmt -> For LPar Simple Semi Expr Semi Simple RPar Stmt)] 273 | fn stmt_for(f: Token, _l: Token, init: Stmt<'p>, _s1: Token, cond: Expr<'p>, _s2: Token, update: Stmt<'p>, _r: Token, body: Stmt<'p>) -> Stmt<'p> { 274 | mk_stmt(f.loc(), For { init: Box::new(init), cond, update: Box::new(update), body: mk_block(body) }.into()) 275 | } 276 | #[rule(Stmt -> Return MaybeExpr Semi)] 277 | fn stmt_return(r: Token, expr: Option>, _s: Token) -> Stmt<'p> { mk_stmt(r.loc(), expr.into()) } 278 | #[rule(Stmt -> Print LPar ExprList RPar Semi)] 279 | fn stmt_print(p: Token, _l: Token, print: Vec>, _r: Token, _s: Token) -> Stmt<'p> { mk_stmt(p.loc(), print.reversed().into()) } 280 | #[rule(Stmt -> Break Semi)] 281 | fn stmt_break(b: Token, _s: Token) -> Stmt<'p> { mk_stmt(b.loc(), Break.into()) } 282 | #[rule(Stmt -> Block)] 283 | fn stmt_block(b: Block<'p>) -> Stmt<'p> { mk_stmt(b.loc, b.into()) } 284 | 285 | #[rule(Simple -> Expr MaybeAssign)] 286 | fn simple_assign_or_expr(e: Expr<'p>, assign: Option<(Loc, Expr<'p>)>) -> Stmt<'p> { 287 | if let Some((loc, src)) = assign { 288 | mk_stmt(loc, Assign { dst: e, src }.into()) 289 | } else { 290 | mk_stmt(e.loc, e.into()) 291 | } 292 | } 293 | #[rule(Simple -> Type Id MaybeAssign)] 294 | fn simple_var_def(&self, syn_ty: SynTy<'p>, name: Token, init: Option<(Loc, Expr<'p>)>) -> Stmt<'p> { 295 | let loc = name.loc(); 296 | mk_stmt(loc, (&*self.alloc.var.alloc(VarDef { loc, name: name.str(), syn_ty, init, ty: dft(), owner: dft() })).into()) 297 | } 298 | #[rule(Simple ->)] 299 | fn simple_skip() -> Stmt<'p> { mk_stmt(NO_LOC, Skip.into()) } 300 | 301 | #[rule(MaybeAssign -> Assign Expr)] 302 | fn maybe_assign1(a: Token, src: Expr<'p>) -> Option<(Loc, Expr<'p>)> { Some((a.loc(), src)) } 303 | #[rule(MaybeAssign ->)] 304 | fn maybe_assign0() -> Option<(Loc, Expr<'p>)> { None } 305 | 306 | #[rule(Blocked -> Stmt)] 307 | fn blocked(s: Stmt<'p>) -> Block<'p> { 308 | if let StmtKind::Block(b) = s.kind { b } else { Block { loc: s.loc, stmt: vec![s], scope: dft() } } 309 | } 310 | 311 | // maybe_else1/0 will cause a conflict, and will choose this production because it appears earlier 312 | // this is the ONLY conflict allowed in our parser 313 | #[rule(MaybeElse -> Else Blocked)] 314 | fn maybe_else1(_e: Token, b: Block<'p>) -> Option> { Some(b) } 315 | #[rule(MaybeElse ->)] 316 | fn maybe_else0() -> Option> { None } 317 | 318 | #[rule(MaybeExpr -> Expr)] 319 | fn maybe_expr1(e: Expr<'p>) -> Option> { Some(e) } 320 | #[rule(MaybeExpr ->)] 321 | fn maybe_expr0() -> Option> { None } 322 | 323 | #[rule(Op1 -> Or)] 324 | fn op1(o: Token) -> (Loc, BinOp) { (o.loc(), BinOp::Or) } 325 | 326 | #[rule(Op2 -> And)] 327 | fn op2(a: Token) -> (Loc, BinOp) { (a.loc(), BinOp::And) } 328 | 329 | #[rule(Op3 -> Eq)] 330 | fn op3_eq(e: Token) -> (Loc, BinOp) { (e.loc(), BinOp::Eq) } 331 | #[rule(Op3 -> Ne)] 332 | fn op3_ne(n: Token) -> (Loc, BinOp) { (n.loc(), BinOp::Ne) } 333 | 334 | #[rule(Op4 -> Lt)] 335 | fn op4_lt(l: Token) -> (Loc, BinOp) { (l.loc(), BinOp::Lt) } 336 | #[rule(Op4 -> Le)] 337 | fn op4_le(l: Token) -> (Loc, BinOp) { (l.loc(), BinOp::Le) } 338 | #[rule(Op4 -> Ge)] 339 | fn op4_ge(g: Token) -> (Loc, BinOp) { (g.loc(), BinOp::Ge) } 340 | #[rule(Op4 -> Gt)] 341 | fn op4_gt(g: Token) -> (Loc, BinOp) { (g.loc(), BinOp::Gt) } 342 | 343 | #[rule(Op5 -> Add)] 344 | fn op5_add(a: Token) -> (Loc, BinOp) { (a.loc(), BinOp::Add) } 345 | #[rule(Op5 -> Sub)] 346 | fn op5_sub(s: Token) -> (Loc, BinOp) { (s.loc(), BinOp::Sub) } 347 | 348 | #[rule(Op6 -> Mul)] 349 | fn op6_add(m: Token) -> (Loc, BinOp) { (m.loc(), BinOp::Mul) } 350 | #[rule(Op6 -> Div)] 351 | fn op6_div(d: Token) -> (Loc, BinOp) { (d.loc(), BinOp::Div) } 352 | #[rule(Op6 -> Mod)] 353 | fn op6_mod(m: Token) -> (Loc, BinOp) { (m.loc(), BinOp::Mod) } 354 | 355 | #[rule(Op7 -> Sub)] 356 | fn op7_neg(n: Token) -> (Loc, UnOp) { (n.loc(), UnOp::Neg) } 357 | #[rule(Op7 -> Not)] 358 | fn op7_not(n: Token) -> (Loc, UnOp) { (n.loc(), UnOp::Not) } 359 | 360 | #[rule(Expr -> Expr1)] 361 | fn expr(e: Expr<'p>) -> Expr<'p> { e } 362 | 363 | #[rule(Expr1 -> Expr2 Term1)] 364 | fn expr1(l: Expr<'p>, ts: Terms<'p>) -> Expr<'p> { merge_terms(l, ts) } 365 | #[rule(Term1 -> Op1 Expr2 Term1)] // or 366 | fn term1(o: (Loc, BinOp), l: Expr<'p>, r: Terms<'p>) -> Terms<'p> { r.pushed((l, o)) } 367 | #[rule(Term1 ->)] 368 | fn term1_0() -> Terms<'p> { vec![] } 369 | 370 | // the logic of Expr2 is completely the same as Expr1... 371 | #[rule(Expr2 -> Expr3 Term2)] 372 | fn expr2(l: Expr<'p>, ts: Terms<'p>) -> Expr<'p> { merge_terms(l, ts) } 373 | #[rule(Term2 -> Op2 Expr3 Term2)] // and 374 | fn term2(o: (Loc, BinOp), l: Expr<'p>, r: Terms<'p>) -> Terms<'p> { r.pushed((l, o)) } 375 | #[rule(Term2 ->)] 376 | fn term2_0() -> Terms<'p> { vec![] } 377 | 378 | #[rule(Expr3 -> Expr4 Term3)] 379 | fn expr3(l: Expr<'p>, ts: Terms<'p>) -> Expr<'p> { merge_terms(l, ts) } 380 | #[rule(Term3 -> Op3 Expr4 Term3)] // eq, ne 381 | fn term3(o: (Loc, BinOp), l: Expr<'p>, r: Terms<'p>) -> Terms<'p> { r.pushed((l, o)) } 382 | #[rule(Term3 ->)] 383 | fn term3_0() -> Terms<'p> { vec![] } 384 | 385 | #[rule(Expr4 -> Expr5 Term4)] 386 | fn expr4(l: Expr<'p>, ts: Terms<'p>) -> Expr<'p> { merge_terms(l, ts) } 387 | #[rule(Term4 -> Op4 Expr5 Term4)] // lt, le, ge, gt 388 | fn term4(o: (Loc, BinOp), l: Expr<'p>, r: Terms<'p>) -> Terms<'p> { r.pushed((l, o)) } 389 | #[rule(Term4 ->)] 390 | fn term4_0() -> Terms<'p> { vec![] } 391 | 392 | #[rule(Expr5 -> Expr6 Term5)] 393 | fn expr5(l: Expr<'p>, ts: Terms<'p>) -> Expr<'p> { merge_terms(l, ts) } 394 | #[rule(Term5 -> Op5 Expr6 Term5)] // add sub 395 | fn term5(o: (Loc, BinOp), l: Expr<'p>, r: Terms<'p>) -> Terms<'p> { r.pushed((l, o)) } 396 | #[rule(Term5 ->)] 397 | fn term5_0() -> Terms<'p> { vec![] } 398 | 399 | #[rule(Expr6 -> Expr7 Term6)] 400 | fn expr6(l: Expr<'p>, ts: Terms<'p>) -> Expr<'p> { merge_terms(l, ts) } 401 | #[rule(Term6 -> Op6 Expr7 Term6)] // mul, div, mod 402 | fn term6(o: (Loc, BinOp), l: Expr<'p>, r: Terms<'p>) -> Terms<'p> { r.pushed((l, o)) } 403 | #[rule(Term6 ->)] 404 | fn term6_0() -> Terms<'p> { vec![] } 405 | 406 | #[rule(Expr7 -> Op7 Expr7)] // not, neg 407 | fn expr7_op8(o: (Loc, UnOp), r: Expr<'p>) -> Expr<'p> { 408 | mk_expr(o.0, Unary { op: o.1, r: Box::new(r) }.into()) 409 | } 410 | #[rule(Expr7 -> LPar ParenOrCast)] 411 | fn expr7_par_or_cast(_l: Token, e: Expr<'p>) -> Expr<'p> { e } 412 | #[rule(Expr7 -> Expr8)] 413 | fn expr7_8(e: Expr<'p>) -> Expr<'p> { e } 414 | 415 | #[rule(ParenOrCast -> Expr RPar Term8)] 416 | fn paren_or_cast_p(l: Expr<'p>, _r: Token, ts: Vec>) -> Expr<'p> { merge_idx_id_call(l, ts) } 417 | #[rule(ParenOrCast -> Class Id RPar Expr7)] 418 | fn paren_or_cast_c(_c: Token, name: Token, _r: Token, e: Expr<'p>) -> Expr<'p> { 419 | mk_expr(e.loc, ClassCast { name: name.str(), expr: Box::new(e), class: dft() }.into()) 420 | } 421 | 422 | #[rule(Expr8 -> Expr9 Term8)] 423 | fn expr8(l: Expr<'p>, ts: Vec>) -> Expr<'p> { merge_idx_id_call(l, ts) } 424 | 425 | #[rule(Term8 -> LBrk Expr RBrk Term8)] 426 | fn term8_index(l: Token, idx: Expr<'p>, _r: Token, r: Vec>) -> Vec> { r.pushed(IndexOrIdOrCall::Index(l.loc(), idx)) } 427 | #[rule(Term8 -> Dot Id IdOrCall Term8)] 428 | fn term8_id_or_call(_d: Token, name: Token, arg: Option<(Loc, Vec>)>, r: Vec>) -> Vec> { 429 | r.pushed(IndexOrIdOrCall::IdOrCall(name.loc(), name.str(), arg)) 430 | } 431 | #[rule(Term8 ->)] 432 | fn term8_0() -> Vec> { vec![] } 433 | 434 | #[rule(IdOrCall -> LPar ExprListOrEmpty RPar)] 435 | fn id_or_call_c(l: Token, arg: Vec>, _r: Token) -> Option<(Loc, Vec>)> { Some((l.loc(), arg.reversed())) } 436 | #[rule(IdOrCall ->)] 437 | fn id_or_call_i() -> Option<(Loc, Vec>)> { None } 438 | 439 | #[rule(Expr9 -> IntLit)] 440 | fn expr9_int(&mut self, i: Token) -> Expr<'p> { mk_int_lit(i.loc(), i.str(), &mut self.error) } 441 | #[rule(Expr9 -> True)] 442 | fn expr9_true(t: Token) -> Expr<'p> { mk_expr(t.loc(), true.into()) } 443 | #[rule(Expr9 -> False)] 444 | fn expr9_false(f: Token) -> Expr<'p> { mk_expr(f.loc(), false.into()) } 445 | #[rule(Expr9 -> StringLit)] 446 | fn expr9_string(&mut self, s: Token) -> Expr<'p> { 447 | let (loc, str) = (s.loc(), s.str()); 448 | check_str(str, &mut self.error, loc); 449 | mk_expr(loc, str[1..str.len() - 1].into()) 450 | } 451 | #[rule(Expr9 -> Null)] 452 | fn expr9_null(n: Token) -> Expr<'p> { mk_expr(n.loc(), NullLit.into()) } 453 | #[rule(Expr9 -> ReadInteger LPar RPar)] 454 | fn expr9_read_integer(r: Token, _l: Token, _r: Token) -> Expr<'p> { mk_expr(r.loc(), ReadInt.into()) } 455 | #[rule(Expr9 -> ReadLine LPar RPar)] 456 | fn expr9_read_line(r: Token, _l: Token, _r: Token) -> Expr<'p> { mk_expr(r.loc(), ReadLine.into()) } 457 | #[rule(Expr9 -> This)] 458 | fn expr9_this(t: Token) -> Expr<'p> { mk_expr(t.loc(), This.into()) } 459 | #[rule(Expr9 -> InstanceOf LPar Expr Comma Id RPar)] 460 | fn expr9_instanceof(i: Token, _l: Token, expr: Expr<'p>, _c: Tokenm, name: Token, _r: Token) -> Expr<'p> { 461 | mk_expr(i.loc(), ClassTest { expr: Box::new(expr), name: name.str(), class: dft() }.into()) 462 | } 463 | #[rule(Expr9 -> Id IdOrCall)] 464 | fn expr9_id_or_call(name: Token, ioc: Option<(Loc, Vec>)>) -> Expr<'p> { 465 | match ioc { 466 | Some((loc, arg)) => { 467 | let func = Box::new(mk_expr(name.loc(), VarSel { owner: None, name: name.str(), var: dft() }.into())); 468 | mk_expr(loc, Call { func, arg, func_ref: dft() }.into()) 469 | } 470 | None => mk_expr(name.loc(), VarSel { owner: None, name: name.str(), var: dft() }.into()), 471 | } 472 | } 473 | #[rule(Expr9 -> New NewClassOrArray)] 474 | fn expr9_new(n: Token, noa: NewClassOrArray<'p>) -> Expr<'p> { 475 | let loc = n.loc(); 476 | match noa { 477 | NewClassOrArray::NewClass(name) => mk_expr(loc, NewClass { name, class: dft() }.into()), 478 | NewClassOrArray::NewArray(elem, len) => mk_expr(loc, NewArray { elem, len: Box::new(len) }.into()), 479 | } 480 | } 481 | 482 | #[rule(NewClassOrArray -> Id LPar RPar)] 483 | fn new_class_or_array_c(name: Token, _l: Token, _r: Token) -> NewClassOrArray<'p> { 484 | NewClassOrArray::NewClass(name.str()) 485 | } 486 | #[rule(NewClassOrArray -> SimpleType LBrk NewArrayRem)] 487 | fn new_class_or_array_a(mut ty: SynTy<'p>, _l: Token, dim_len: (u32, Expr<'p>)) -> NewClassOrArray<'p> { 488 | ty.arr = dim_len.0; 489 | NewClassOrArray::NewArray(ty, dim_len.1) 490 | } 491 | 492 | #[rule(NewArrayRem -> RBrk LBrk NewArrayRem)] 493 | fn new_array_rem(_r: Token, l: Token, mut dim_len: (u32, Expr<'p>)) -> (u32, Expr<'p>) { (dim_len.0 += 1, dim_len).1 } 494 | #[rule(NewArrayRem -> Expr RBrk)] 495 | fn new_array_rem0(len: Expr<'p>, _r: Token) -> (u32, Expr<'p>) { (0, len) } 496 | 497 | #[rule(SimpleType -> Int)] 498 | fn type_int(i: Token) -> SynTy<'p> { SynTy { loc: i.loc(), arr: 0, kind: SynTyKind::Int } } 499 | #[rule(SimpleType -> Bool)] 500 | fn type_bool(b: Token) -> SynTy<'p> { SynTy { loc: b.loc(), arr: 0, kind: SynTyKind::Bool } } 501 | #[rule(SimpleType -> Void)] 502 | fn type_void(v: Token) -> SynTy<'p> { SynTy { loc: v.loc(), arr: 0, kind: SynTyKind::Void } } 503 | #[rule(SimpleType -> String)] 504 | fn type_string(s: Token) -> SynTy<'p> { SynTy { loc: s.loc(), arr: 0, kind: SynTyKind::String } } 505 | #[rule(SimpleType -> Class Id)] 506 | fn type_class(c: Token, name: Token) -> SynTy<'p> { SynTy { loc: c.loc(), arr: 0, kind: SynTyKind::Named(name.str()) } } 507 | #[rule(Type -> SimpleType ArrayDim)] 508 | fn type_array(mut ty: SynTy<'p>, dim: u32) -> SynTy<'p> { (ty.arr = dim, ty).1 } 509 | 510 | #[rule(ArrayDim -> LBrk RBrk ArrayDim)] 511 | fn array_type(l: Token, _r: Token, dim: u32) -> u32 { dim + 1 } 512 | #[rule(ArrayDim ->)] 513 | fn array_type0() -> u32 { 0 } 514 | } 515 | -------------------------------------------------------------------------------- /syntax/src/symbol.rs: -------------------------------------------------------------------------------- 1 | use crate::{Block, ClassDef, FuncDef, VarDef, Program, Ty}; 2 | use common::{Loc, HashMap}; 3 | use std::{cell::{RefMut, Ref}, fmt}; 4 | 5 | pub type Scope<'a> = HashMap<&'a str, Symbol<'a>>; 6 | 7 | #[derive(Copy, Clone)] 8 | pub enum Symbol<'a> { 9 | Var(&'a VarDef<'a>), 10 | Func(&'a FuncDef<'a>), 11 | This(&'a FuncDef<'a>), 12 | Class(&'a ClassDef<'a>), 13 | } 14 | 15 | impl<'a> Symbol<'a> { 16 | pub fn name(&self) -> &'a str { 17 | match self { 18 | Symbol::Var(v) => v.name, 19 | Symbol::Func(f) => f.name, 20 | Symbol::This(_) => "this", 21 | Symbol::Class(c) => c.name, 22 | } 23 | } 24 | 25 | pub fn loc(&self) -> Loc { 26 | match self { 27 | Symbol::Var(v) => v.loc, 28 | Symbol::Func(f) | Symbol::This(f) => f.loc, 29 | Symbol::Class(c) => c.loc, 30 | } 31 | } 32 | 33 | // for symbol This & Class, will return the type of their class object 34 | pub fn ty(&self) -> Ty<'a> { 35 | match self { 36 | Symbol::Var(v) => v.ty.get(), 37 | Symbol::Func(f) => Ty::mk_func(f), 38 | Symbol::This(f) => Ty::mk_obj(f.class.get().unwrap()), 39 | Symbol::Class(c) => Ty::mk_obj(c), 40 | } 41 | } 42 | 43 | pub fn is_var(&self) -> bool { if let Symbol::Var(_) = self { true } else { false } } 44 | pub fn is_func(&self) -> bool { if let Symbol::Func(_) = self { true } else { false } } 45 | pub fn is_this(&self) -> bool { if let Symbol::This(_) = self { true } else { false } } 46 | pub fn is_class(&self) -> bool { if let Symbol::Class(_) = self { true } else { false } } 47 | } 48 | 49 | #[derive(Copy, Clone)] 50 | pub enum ScopeOwner<'a> { 51 | Local(&'a Block<'a>), 52 | Param(&'a FuncDef<'a>), 53 | Class(&'a ClassDef<'a>), 54 | Global(&'a Program<'a>), 55 | } 56 | 57 | impl<'a> ScopeOwner<'a> { 58 | // boilerplate code... 59 | pub fn scope(&self) -> Ref<'a, Scope<'a>> { 60 | use ScopeOwner::*; 61 | match self { Local(x) => x.scope.borrow(), Param(x) => x.scope.borrow(), Class(x) => x.scope.borrow(), Global(x) => x.scope.borrow(), } 62 | } 63 | 64 | pub fn scope_mut(&self) -> RefMut<'a, Scope<'a>> { 65 | use ScopeOwner::*; 66 | match self { Local(x) => x.scope.borrow_mut(), Param(x) => x.scope.borrow_mut(), Class(x) => x.scope.borrow_mut(), Global(x) => x.scope.borrow_mut(), } 67 | } 68 | 69 | pub fn is_local(&self) -> bool { if let ScopeOwner::Local(_) = self { true } else { false } } 70 | pub fn is_param(&self) -> bool { if let ScopeOwner::Param(_) = self { true } else { false } } 71 | pub fn is_class(&self) -> bool { if let ScopeOwner::Class(_) = self { true } else { false } } 72 | pub fn is_global(&self) -> bool { if let ScopeOwner::Global(_) = self { true } else { false } } 73 | } 74 | 75 | impl fmt::Debug for Symbol<'_> { 76 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { 77 | match self { 78 | Symbol::Var(v) => write!(f, "{:?} -> variable {}{} : {:?}", v.loc, if v.owner.get().unwrap().is_param() { "@" } else { "" }, v.name, v.ty.get()), 79 | Symbol::Func(fu) => write!(f, "{:?} -> {}function {} : {:?}", fu.loc, if fu.static_ { "STATIC " } else { "" }, fu.name, Ty::mk_func(fu)), 80 | Symbol::This(fu) => write!(f, "{:?} -> variable @this : class {}", fu.loc, fu.class.get().unwrap().name), 81 | Symbol::Class(c) => { 82 | write!(f, "{:?} -> class {}", c.loc, c.name)?; 83 | if let Some(p) = c.parent_ref.get() { write!(f, " : {}", p.name) } else { Ok(()) } 84 | } 85 | } 86 | } 87 | } -------------------------------------------------------------------------------- /syntax/src/ty.rs: -------------------------------------------------------------------------------- 1 | use crate::{ClassDef, FuncDef}; 2 | use common::{Loc, Ref}; 3 | use std::fmt; 4 | 5 | #[derive(Eq, PartialEq)] 6 | pub enum SynTyKind<'a> { 7 | Int, 8 | Bool, 9 | String, 10 | Void, 11 | Named(&'a str), 12 | } 13 | 14 | #[derive(Eq, PartialEq)] 15 | pub struct SynTy<'a> { 16 | pub loc: Loc, 17 | pub arr: u32, 18 | pub kind: SynTyKind<'a>, 19 | } 20 | 21 | #[derive(Clone, Copy, Eq, PartialEq)] 22 | pub enum TyKind<'a> { 23 | Int, 24 | Bool, 25 | String, 26 | Void, 27 | Error, 28 | Null, 29 | // `Object` is `class A a` <- this `a` 30 | Object(Ref<'a, ClassDef<'a>>), 31 | // `Class` is `Class A { }` <- this `A` 32 | Class(Ref<'a, ClassDef<'a>>), 33 | // [0] = ret, [1..] = param 34 | Func(&'a [Ty<'a>]), 35 | } 36 | 37 | impl Default for TyKind<'_> { 38 | fn default() -> Self { TyKind::Error } 39 | } 40 | 41 | // arr > 0 <-> is array, for error/void type, arr can only be 0 42 | #[derive(Clone, Copy, Eq, PartialEq, Default)] 43 | pub struct Ty<'a> { 44 | pub arr: u32, 45 | pub kind: TyKind<'a>, 46 | } 47 | 48 | impl<'a> Ty<'a> { 49 | // make a type with array dimension = 0 50 | pub const fn new(kind: TyKind<'a>) -> Ty<'a> { Ty { arr: 0, kind } } 51 | 52 | // like Errors::issue, it can save some typing by returning a default value 53 | pub fn error_or(self, mut f: impl FnMut() -> T) -> T { 54 | if self == Ty::error() { T::default() } else { f() } 55 | } 56 | 57 | pub fn assignable_to(&self, rhs: Ty<'a>) -> bool { 58 | use TyKind::*; 59 | match (self.kind, rhs.kind) { 60 | (Error, _) | (_, Error) => true, 61 | _ if self.arr == rhs.arr => if self.arr == 0 { 62 | match (self.kind, rhs.kind) { 63 | (Int, Int) | (Bool, Bool) | (String, String) | (Void, Void) => true, 64 | (Object(c1), Object(Ref(c2))) => c1.extends(c2), 65 | (Null, Object(_)) => true, 66 | (Func(rp1), Func(rp2)) => { 67 | let (r1, p1, r2, p2) = (&rp1[0], &rp1[1..], &rp2[0], &rp2[1..]); 68 | r1.assignable_to(*r2) && p1.len() == p2.len() && p1.iter().zip(p2.iter()).all(|(p1, p2)| p2.assignable_to(*p1)) 69 | } 70 | _ => false, 71 | } 72 | } else { *self == rhs } 73 | _ => false, 74 | } 75 | } 76 | 77 | // why don't use const items? 78 | // it seems that const items can only have type Ty<'static>, which can NOT be casted to Ty<'a> 79 | pub const fn error() -> Ty<'a> { Ty::new(TyKind::Error) } 80 | pub const fn null() -> Ty<'a> { Ty::new(TyKind::Null) } 81 | pub const fn int() -> Ty<'a> { Ty::new(TyKind::Int) } 82 | pub const fn bool() -> Ty<'a> { Ty::new(TyKind::Bool) } 83 | pub const fn void() -> Ty<'a> { Ty::new(TyKind::Void) } 84 | pub const fn string() -> Ty<'a> { Ty::new(TyKind::String) } 85 | 86 | pub fn mk_obj(c: &'a ClassDef<'a>) -> Ty<'a> { Ty::new(TyKind::Object(Ref(c))) } 87 | pub fn mk_class(c: &'a ClassDef<'a>) -> Ty<'a> { Ty::new(TyKind::Class(Ref(c))) } 88 | pub fn mk_func(f: &'a FuncDef<'a>) -> Ty<'a> { Ty::new(TyKind::Func(f.ret_param_ty.get().unwrap())) } 89 | 90 | // if you want something like `is_void()`, just use `== Ty::void()` 91 | pub fn is_arr(&self) -> bool { self.arr > 0 } 92 | pub fn is_func(&self) -> bool { self.arr == 0 && if let TyKind::Func(_) = self.kind { true } else { false } } 93 | pub fn is_class(&self) -> bool { self.arr == 0 && if let TyKind::Class(_) = self.kind { true } else { false } } 94 | pub fn is_object(&self) -> bool { self.arr == 0 && if let TyKind::Object(_) = self.kind { true } else { false } } 95 | } 96 | 97 | impl fmt::Debug for Ty<'_> { 98 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { 99 | match &self.kind { 100 | TyKind::Int => write!(f, "int"), 101 | TyKind::Bool => write!(f, "bool"), 102 | TyKind::String => write!(f, "string"), 103 | TyKind::Void => write!(f, "void"), 104 | TyKind::Error => write!(f, "error"), // we don't expect to reach this case in printing scope info 105 | TyKind::Null => write!(f, "null"), 106 | TyKind::Object(c) | TyKind::Class(c) => write!(f, "class {}", c.name), 107 | // the printing format may be different from other experiment framework's 108 | // it is not because their format is hard to implement in rust, but because I simply don't like their format, 109 | // which introduces unnecessary complexity, and doesn't increase readability 110 | TyKind::Func(ret_param) => { 111 | let (ret, param) = (ret_param[0], &ret_param[1..]); 112 | write!(f, "{:?}(", ret)?; 113 | for (idx, p) in param.iter().enumerate() { 114 | write!(f, "{:?}{}", p, if idx + 1 == param.len() { "" } else { ", " })?; 115 | } 116 | write!(f, ")") 117 | } 118 | }?; 119 | for _ in 0..self.arr { write!(f, "[]")?; } 120 | Ok(()) 121 | } 122 | } -------------------------------------------------------------------------------- /tac/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tac" 3 | version = "0.1.0" 4 | authors = ["MashPlant <740678788@qq.com>"] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | common = { path = "../common" } 9 | typed-arena = "1.4.1" -------------------------------------------------------------------------------- /tac/src/iter.rs: -------------------------------------------------------------------------------- 1 | use crate::TacNode; 2 | use std::iter::FusedIterator; 3 | 4 | // these codes are basically copied from std::collections::LinkedList 5 | 6 | #[derive(Copy, Clone)] 7 | pub struct TacIter<'a> { 8 | first: Option<&'a TacNode<'a>>, 9 | last: Option<&'a TacNode<'a>>, 10 | len: usize, 11 | } 12 | 13 | impl<'a> TacIter<'a> { 14 | pub fn new(first: Option<&'a TacNode<'a>>, last: Option<&'a TacNode<'a>>, len: usize) -> TacIter<'a> { 15 | TacIter { first, last, len } 16 | } 17 | } 18 | 19 | impl<'a> Iterator for TacIter<'a> { 20 | type Item = &'a TacNode<'a>; 21 | 22 | fn next(&mut self) -> Option { 23 | if self.len != 0 { 24 | self.first.map(|x| { 25 | self.len -= 1; 26 | self.first = x.next.get(); 27 | x 28 | }) 29 | } else { None } 30 | } 31 | 32 | fn size_hint(&self) -> (usize, Option) { (self.len, Some(self.len)) } 33 | fn count(self) -> usize { self.len } 34 | } 35 | 36 | impl<'a> DoubleEndedIterator for TacIter<'a> { 37 | fn next_back(&mut self) -> Option { 38 | if self.len != 0 { 39 | self.last.map(|x| { 40 | self.len -= 1; 41 | self.last = x.prev.get(); 42 | x 43 | }) 44 | } else { None } 45 | } 46 | } 47 | 48 | impl ExactSizeIterator for TacIter<'_> {} 49 | 50 | impl FusedIterator for TacIter<'_> {} -------------------------------------------------------------------------------- /tac/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod iter; 2 | 3 | pub use iter::TacIter; 4 | 5 | use common::{BinOp, UnOp, IndexSet}; 6 | use std::{cell::Cell, fmt::{self, Debug}}; 7 | use typed_arena::Arena; 8 | 9 | #[derive(Default)] 10 | pub struct TacProgram<'a> { 11 | pub vtbl: Vec>, 12 | pub func: Vec>, 13 | pub str_pool: IndexSet<&'a str>, 14 | } 15 | 16 | pub struct VTbl<'a> { 17 | // parent is index in Program::vtbl 18 | pub parent: Option, 19 | pub class: &'a str, 20 | // element in `func` is index in Program::func 21 | pub func: Vec, 22 | } 23 | 24 | pub struct TacFunc<'a> { 25 | pub param_num: u32, 26 | pub reg_num: u32, 27 | // we don't store the number of tac here, so we can't use TacIter 28 | // TacIter has more functions than we need to iterate over a function, but such functions are unnecessary 29 | pub first: Option<&'a TacNode<'a>>, 30 | pub last: Option<&'a TacNode<'a>>, 31 | pub alloc: &'a Arena>, 32 | pub name: String, 33 | } 34 | 35 | impl<'a> TacFunc<'a> { 36 | pub fn empty(alloc: &'a Arena>, name: String, param_num: u32) -> TacFunc<'a> { 37 | TacFunc { param_num, reg_num: 0, first: None, last: None, alloc, name } 38 | } 39 | 40 | pub fn push(&mut self, t: Tac) -> &mut Self { 41 | let tac = self.alloc.alloc(TacNode { tac: t.into(), prev: None.into(), next: None.into() }); 42 | if let Some(last) = &mut self.last { 43 | tac.prev.set(Some(last)); 44 | last.next.set(Some(tac)); 45 | *last = tac; 46 | } else { 47 | self.first = Some(tac); 48 | self.last = Some(tac); 49 | } 50 | self 51 | } 52 | } 53 | 54 | pub struct TacNode<'a> { 55 | pub tac: Cell, 56 | pub prev: Cell>>, 57 | pub next: Cell>>, 58 | } 59 | 60 | // `u32` can either mean register number or label id, its meaning is easy to distinguish according to the context 61 | // use array to allow function `rw(_mut)` return a slice 62 | #[derive(Copy, Clone)] 63 | pub enum Tac { 64 | Bin { op: BinOp, dst: u32, lr: [Operand; 2] }, 65 | Un { op: UnOp, dst: u32, r: [Operand; 1] }, 66 | Assign { dst: u32, src: [Operand; 1] }, 67 | Param { src: [Operand; 1] }, 68 | // if there is CallHint in `kind`: obj == true => it can change result of Load Obj, arr == true => like Obj 69 | // else (now only intrinsic call) => it doesn't affect any Load result 70 | Call { dst: Option, kind: CallKind }, 71 | Ret { src: Option<[Operand; 1]> }, 72 | // label in Jmp & Je & Jne & Label 73 | Jmp { label: u32 }, 74 | // Jif stands for Jz and Jnz, determined by `z` 75 | Jif { label: u32, z: bool, cond: [Operand; 1] }, 76 | Label { label: u32 }, 77 | // `hint` can help common expression elimination since we don't have alias analysis yet 78 | // for Load: Immutable => result only depends on base + off, Obj => Store to Obj can change result, Arr => like Obj 79 | // for Store: Immutable => it doesn't affect any Load result, Obj => correspond to Obj in Load, Arr => like Obj 80 | Load { dst: u32, base: [Operand; 1], off: i32, hint: MemHint }, 81 | Store { src_base: [Operand; 2], off: i32, hint: MemHint }, 82 | // s: the index in TacProgram::str_pool 83 | LoadStr { dst: u32, s: u32 }, 84 | // v: the index in TacProgram::vtbl 85 | LoadVTbl { dst: u32, v: u32 }, 86 | // v: the index in TacProgram::func 87 | LoadFunc { dst: u32, f: u32 }, 88 | } 89 | 90 | impl Tac { 91 | // r can be Operand, but w can only be reg, and there is at most 1 w 92 | pub fn rw(&self) -> (&[Operand], Option) { 93 | use Tac::*; 94 | match self { 95 | Bin { dst, lr, .. } => (lr, Some(*dst)), 96 | Un { dst, r, .. } | Assign { dst, src: r } | Load { dst, base: r, .. } => (r, Some(*dst)), 97 | Param { src } => (src, None), 98 | Call { dst, kind } => (if let CallKind::Virtual(fp, _) = kind { fp } else { &[] }, *dst), 99 | Ret { src } => (src.as_ref().map(|src| src.as_ref()).unwrap_or(&[]), None), 100 | Jmp { .. } | Label { .. } => (&[], None), 101 | Jif { cond, .. } => (cond, None), 102 | Store { src_base, .. } => (src_base, None), 103 | LoadStr { dst, .. } | LoadVTbl { dst, .. } | LoadFunc { dst, .. } => (&[], Some(*dst)), 104 | } 105 | } 106 | 107 | // basically copied from `rw`, there is no better way in rust to write two functions, one is &self -> &result, another is &mut self -> &mut result 108 | // for example, the implementation of Iter and IterMut in many std collections are almost duplicate codes 109 | pub fn rw_mut(&mut self) -> (&mut [Operand], Option<&mut u32>) { 110 | use Tac::*; 111 | match self { 112 | Bin { dst, lr, .. } => (lr, Some(dst)), 113 | Un { dst, r, .. } | Assign { dst, src: r } | Load { dst, base: r, .. } => (r, Some(dst)), 114 | Param { src } => (src, None), 115 | Call { dst, kind } => 116 | (if let CallKind::Virtual(fp, _) = kind { fp } else { &mut [] }, dst.as_mut()), 117 | Ret { src } => (src.as_mut().map(|src| src.as_mut()).unwrap_or(&mut []), None), 118 | Jmp { .. } | Label { .. } => (&mut [], None), 119 | Jif { cond, .. } => (cond, None), 120 | Store { src_base, .. } => (src_base, None), 121 | LoadStr { dst, .. } | LoadVTbl { dst, .. } | LoadFunc { dst, .. } => (&mut [], Some(dst)), 122 | } 123 | } 124 | } 125 | 126 | #[derive(Copy, Clone)] 127 | pub enum CallKind { 128 | Virtual([Operand; 1], CallHint), 129 | // the index of func in TacProgram, can be static OR NEW 130 | Static(u32, CallHint), 131 | Intrinsic(Intrinsic), 132 | } 133 | 134 | #[derive(Copy, Clone, Hash, Eq, PartialEq)] 135 | pub enum Operand { Reg(u32), Const(i32) } 136 | 137 | impl Debug for Operand { 138 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 139 | match self { Operand::Reg(r) => write!(f, "%{}", r), Operand::Const(c) => write!(f, "{}", c) } 140 | } 141 | } 142 | 143 | #[derive(Copy, Clone, Eq, PartialEq, Debug)] 144 | pub enum Intrinsic { _Alloc, _ReadLine, _ReadInt, _StringEqual, _PrintInt, _PrintString, _PrintBool, _Halt } 145 | 146 | impl Intrinsic { 147 | pub fn has_ret(self) -> bool { 148 | use Intrinsic::*; 149 | match self { _Alloc | _ReadLine | _ReadInt | _StringEqual => true, _PrintInt | _PrintString | _PrintBool | _Halt => false } 150 | } 151 | } 152 | 153 | #[derive(Copy, Clone, Eq, PartialEq)] 154 | pub enum MemHint { Immutable, Obj, Arr } 155 | 156 | #[derive(Copy, Clone)] 157 | pub struct CallHint { 158 | pub arg_obj: bool, 159 | pub arg_arr: bool, 160 | } 161 | 162 | pub const INT_SIZE: i32 = 4; 163 | 164 | pub const INDEX_OUT_OF_BOUND: &str = r#"Decaf runtime error: Array subscript out of bounds\n"#; 165 | pub const NEW_ARR_NEG: &str = r#"Decaf runtime error: Cannot create negative-sized array\n"#; 166 | pub const BAD_CAST1: &str = r#"Decaf runtime error: "#; 167 | pub const BAD_CAST2: &str = r#" cannot be cast to "#; 168 | pub const BAD_CAST3: &str = r#"\n"#; -------------------------------------------------------------------------------- /tacgen/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tacgen" 3 | version = "0.1.0" 4 | authors = ["MashPlant <740678788@qq.com>"] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | common = { path = "../common" } 9 | syntax = { path = "../syntax" } 10 | tac = { path = "../tac" } 11 | typed-arena = "1.4.1" -------------------------------------------------------------------------------- /tacgen/src/info.rs: -------------------------------------------------------------------------------- 1 | use syntax::FuncDef; 2 | use common::IndexMap; 3 | 4 | // these structs are used in tacgen to keep some intermediate information 5 | 6 | pub struct VarInfo { 7 | // if the var is a VarDef in class, `off` is the offset in object pointer 8 | // if the var is a VarDef in function, `off` is a virtual register number 9 | pub off: u32, 10 | } 11 | 12 | pub struct FuncInfo { 13 | // the offset in vtbl 14 | // vtbl[0] = parent, vtbl[1] = class name 15 | pub off: u32, 16 | // which function it is in TacProgram (index in TacProgram::func) 17 | pub idx: u32, 18 | } 19 | 20 | pub struct ClassInfo<'a> { 21 | pub field_num: u32, 22 | // which vtbl it's vtbl is in TacProgram (index in TacProgram::vtbl) 23 | pub idx: u32, 24 | pub vtbl: IndexMap<&'a str, &'a FuncDef<'a>>, 25 | } -------------------------------------------------------------------------------- /tacgen/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod info; 2 | 3 | use syntax::{ast::*, ty::*, ScopeOwner}; 4 | use ::tac::{self, *, Tac::{self, *}, Operand::*, Intrinsic::*}; 5 | use common::{Ref, MAIN_METHOD, BinOp::*, UnOp::*, IndexSet, IndexMap, HashMap}; 6 | use typed_arena::Arena; 7 | use crate::info::*; 8 | 9 | #[derive(Default)] 10 | struct TacGen<'a> { 11 | // `reg_num` and `label_num` are manually set at the beginning of every function 12 | reg_num: u32, 13 | label_num: u32, 14 | loop_stk: Vec, 15 | // Id & Index will behave differently when they are the lhs of an assignment 16 | // cur_assign contains the current assign rhs operand, or None if the current handling expr doesn't involve in assign 17 | cur_assign: Option, 18 | str_pool: IndexSet<&'a str>, 19 | // `*_info` just works like extra fields to those structs, their specific meaning can be found at `struct *Info` 20 | var_info: HashMap>, VarInfo>, 21 | func_info: HashMap>, FuncInfo>, 22 | class_info: HashMap>, ClassInfo<'a>>, 23 | } 24 | 25 | pub fn work<'a>(p: &'a Program<'a>, alloc: &'a Arena>) -> TacProgram<'a> { 26 | TacGen::default().program(p, alloc) 27 | } 28 | 29 | impl<'a> TacGen<'a> { 30 | fn program(mut self, p: &Program<'a>, alloc: &'a Arena>) -> TacProgram<'a> { 31 | let mut tp = TacProgram::default(); 32 | for (idx, &c) in p.class.iter().enumerate() { 33 | self.define_str(c.name); 34 | self.resolve_field(c); 35 | self.class_info.get_mut(&Ref(c)).unwrap().idx = idx as u32; 36 | tp.func.push(self.build_new(c, alloc)); 37 | } 38 | { 39 | let mut idx = tp.func.len() as u32; // their are already some `_Xxx._new` functions in tp.func, so can't start from 0 40 | for &c in &p.class { 41 | for &f in &c.field { 42 | if let FieldDef::FuncDef(f) = f { 43 | self.func_info.get_mut(&Ref(f)).unwrap().idx = idx; 44 | idx += 1; 45 | } 46 | } 47 | } 48 | } 49 | for &c in &p.class { 50 | for f in &c.field { 51 | if let FieldDef::FuncDef(fu) = f { 52 | let this = if fu.static_ { 0 } else { 1 }; 53 | for (idx, p) in fu.param.iter().enumerate() { 54 | self.var_info.insert(Ref(p), VarInfo { off: idx as u32 + this }); 55 | } 56 | // these regs are occupied by parameters 57 | self.reg_num = fu.param.len() as u32 + this; 58 | self.label_num = 0; 59 | let name = if Ref(c) == Ref(p.main.get().unwrap()) && fu.name == MAIN_METHOD { MAIN_METHOD.into() } else { format!("_{}.{}", c.name, fu.name) }; 60 | let mut f = TacFunc::empty(alloc, name, self.reg_num); 61 | self.block(&fu.body, &mut f); 62 | f.reg_num = self.reg_num; 63 | // add an return at the end of return-void function 64 | if fu.ret_ty() == Ty::void() { f.push(Tac::Ret { src: None }); } 65 | tp.func.push(f); 66 | } 67 | } 68 | } 69 | for &c in &p.class { 70 | tp.vtbl.push(tac::VTbl { 71 | parent: c.parent_ref.get().map(|p| self.class_info[&Ref(p)].idx), 72 | class: c.name, 73 | func: self.class_info[&Ref(c)].vtbl.iter().map(|(_, &f)| self.func_info[&Ref(f)].idx).collect(), 74 | }); 75 | } 76 | tp.str_pool = self.str_pool; 77 | tp 78 | } 79 | 80 | fn block(&mut self, b: &Block<'a>, f: &mut TacFunc<'a>) { 81 | for s in &b.stmt { self.stmt(s, f); } 82 | } 83 | 84 | fn stmt(&mut self, s: &Stmt<'a>, f: &mut TacFunc<'a>) { 85 | use StmtKind::*; 86 | match &s.kind { 87 | Assign(a) => { 88 | self.cur_assign = Some(self.expr(&a.src, f)); 89 | self.expr(&a.dst, f); 90 | } 91 | LocalVarDef(v) => { 92 | let reg = self.reg(); 93 | self.var_info.insert(Ref(v), VarInfo { off: reg }); 94 | let init = v.init.as_ref().map(|(_, e)| self.expr(e, f)).unwrap_or(Const(0)); 95 | f.push(Tac::Assign { dst: reg, src: [init] }); 96 | } 97 | ExprEval(e) => { self.expr(e, f); } 98 | Skip(_) => {} 99 | If(i) => { 100 | let before_else = self.label(); 101 | let cond = self.expr(&i.cond, f); 102 | f.push(Jif { label: before_else, z: true, cond: [cond] }); 103 | self.block(&i.on_true, f); 104 | if let Some(of) = &i.on_false { 105 | let after_else = self.label(); 106 | f.push(Jmp { label: after_else }); 107 | f.push(Label { label: before_else }); 108 | self.block(of, f); 109 | f.push(Label { label: after_else }); 110 | } else { 111 | f.push(Label { label: before_else }); 112 | } 113 | } 114 | While(w) => { 115 | // jump before_cond 116 | // before_body: 117 | // body 118 | // before_cond: 119 | // compute cond 120 | // if cond jump before_body 121 | // after_body: (for break's use) 122 | let (before_cond, before_body, after_body) = (self.label(), self.label(), self.label()); 123 | self.loop_stk.push(after_body); 124 | f.push(Jmp { label: before_cond }); 125 | f.push(Label { label: before_body }); 126 | self.block(&w.body, f); 127 | f.push(Label { label: before_cond }); 128 | let cond = self.expr(&w.cond, f); 129 | f.push(Jif { label: before_body, z: false, cond: [cond] }); 130 | f.push(Label { label: after_body }); 131 | self.loop_stk.pop(); 132 | } 133 | For(fo) => { 134 | // init 135 | // jump before_cond 136 | // before_body: 137 | // body 138 | // update 139 | // before_cond: 140 | // compute cond 141 | // if cond jump before_body 142 | // after_body: (for break's use) 143 | let (before_cond, before_body, after_body) = (self.label(), self.label(), self.label()); 144 | self.loop_stk.push(after_body); 145 | self.stmt(&fo.init, f); 146 | f.push(Jmp { label: before_cond }); 147 | f.push(Label { label: before_body }); 148 | self.block(&fo.body, f); 149 | self.stmt(&fo.update, f); 150 | f.push(Label { label: before_cond }); 151 | let cond = self.expr(&fo.cond, f); 152 | f.push(Jif { label: before_body, z: false, cond: [cond] }); 153 | f.push(Label { label: after_body }); 154 | self.loop_stk.pop(); 155 | } 156 | Return(r) => { 157 | let src = r.as_ref().map(|e| [self.expr(e, f)]); 158 | f.push(Ret { src }); 159 | } 160 | Print(p) => for e in p { 161 | let reg = self.expr(e, f); 162 | f.push(Param { src: [reg] }); 163 | match e.ty.get() { 164 | t if t == Ty::int() => { self.intrinsic(_PrintInt, f); } 165 | t if t == Ty::bool() => { self.intrinsic(_PrintBool, f); } 166 | t if t == Ty::string() => { self.intrinsic(_PrintString, f); } 167 | t => unreachable!("Shouldn't meet type {:?} in Print in these phase, type checking should have reported error.", t), 168 | } 169 | } 170 | Break(_) => { f.push(Jmp { label: *self.loop_stk.last().unwrap() }); } 171 | Block(b) => self.block(b, f), 172 | } 173 | } 174 | 175 | fn expr(&mut self, e: &Expr<'a>, f: &mut TacFunc<'a>) -> Operand { 176 | use ExprKind::*; 177 | let assign = self.cur_assign.take(); 178 | match &e.kind { 179 | VarSel(v) => { 180 | // if `e` is a class name, v.var.get() may be None, this happens only when calling a static function with class name 181 | let var = if let Some(var) = v.var.get() { var } else { return Reg(0); }; 182 | let off = self.var_info[&Ref(var)].off; // may be register id or offset in class 183 | match var.owner.get().unwrap() { 184 | ScopeOwner::Local(_) | ScopeOwner::Param(_) => if let Some(src) = assign { // `off` is register 185 | f.push(Tac::Assign { dst: off, src: [src] }); 186 | // the return value won't be used, so just return a meaningless Reg(0), the below Reg(0)s are the same 187 | Reg(0) 188 | } else { Reg(off) } 189 | ScopeOwner::Class(_) => { // `off` is offset 190 | // `this` is at argument 0 191 | let owner = v.owner.as_ref().map(|o| self.expr(o, f)).unwrap_or(Reg(0)); 192 | if let Some(src) = assign { 193 | f.push(Store { src_base: [src, owner], off: off as i32 * INT_SIZE, hint: MemHint::Obj }); 194 | Reg(0) 195 | } else { 196 | let dst = self.reg(); 197 | f.push(Load { dst, base: [owner], off: off as i32 * INT_SIZE, hint: MemHint::Obj }); 198 | Reg(dst) 199 | } 200 | } 201 | ScopeOwner::Global(_) => unreachable!("Impossible to declare a variable in global scope."), 202 | } 203 | } 204 | IndexSel(i) => { 205 | let (arr, idx) = (self.expr(&i.arr, f), self.expr(&i.idx, f)); 206 | let (ok, len, cmp) = (self.reg(), self.length(arr, f), self.reg()); 207 | let (err, after) = (self.label(), self.label()); 208 | f.push(Bin { op: Ge, dst: ok, lr: [idx, Const(0)] }) 209 | .push(Bin { op: Lt, dst: cmp, lr: [idx, len] }) 210 | .push(Bin { op: And, dst: ok, lr: [Reg(ok), Reg(cmp)] }) 211 | .push(Jif { label: err, z: true, cond: [Reg(ok)] }); 212 | // range check passed if reach here 213 | let off = self.reg(); 214 | f.push(Bin { op: Mul, dst: off, lr: [idx, Const(INT_SIZE)] }) 215 | .push(Bin { op: Add, dst: off, lr: [Reg(off), arr] }); 216 | let ret = if let Some(src) = assign { 217 | f.push(Store { src_base: [src, Reg(off)], off: 0, hint: MemHint::Arr }); 218 | Reg(0) 219 | } else { 220 | let dst = self.reg(); 221 | f.push(Load { dst, base: [Reg(off)], off: 0, hint: MemHint::Arr }); 222 | Reg(dst) 223 | }; 224 | f.push(Jmp { label: after }); 225 | self.re(INDEX_OUT_OF_BOUND, f.push(Label { label: err })); 226 | f.push(Label { label: after }); 227 | ret 228 | } 229 | IntLit(i) => Const(*i), 230 | BoolLit(b) => Const(*b as i32), 231 | StringLit(s) => { 232 | let dst = self.reg(); 233 | f.push(LoadStr { dst, s: self.define_str(s) }); 234 | Reg(dst) 235 | } 236 | NullLit(_) => Const(0), 237 | Call(c) => { 238 | let v = if let ExprKind::VarSel(v) = &c.func.kind { v } else { unimplemented!() }; 239 | match &v.owner { 240 | Some(o) if o.ty.get().is_arr() => { 241 | let arr = self.expr(o, f); 242 | self.length(arr, f) 243 | } 244 | _ => { 245 | let fu = c.func_ref.get().unwrap(); 246 | let ret = if fu.ret_ty() != Ty::void() { Some(self.reg()) } else { None }; 247 | let args = c.arg.iter().map(|a| self.expr(a, f)).collect::>(); 248 | let hint = CallHint { 249 | arg_obj: c.arg.iter().any(|a| a.ty.get().is_class()) || !fu.static_, 250 | arg_arr: c.arg.iter().any(|a| a.ty.get().arr > 0), 251 | }; 252 | if fu.static_ { 253 | if let Some(o) = v.owner.as_ref() { let _ = self.expr(o, f); } 254 | for a in args { f.push(Param { src: [a] }); } 255 | f.push(Tac::Call { dst: ret, kind: CallKind::Static(self.func_info[&Ref(fu)].idx, hint) }); 256 | } else { 257 | // Reg(0) is `this` 258 | let owner = v.owner.as_ref().map(|o| self.expr(o, f)).unwrap_or(Reg(0)); 259 | f.push(Param { src: [owner] }); 260 | for a in args { f.push(Param { src: [a] }); } 261 | let slot = self.reg(); 262 | let off = self.func_info[&Ref(fu)].off; 263 | f.push(Load { dst: slot, base: [owner], off: 0, hint: MemHint::Immutable }) 264 | .push(Load { dst: slot, base: [Reg(slot)], off: off as i32 * INT_SIZE, hint: MemHint::Immutable }); 265 | f.push(Tac::Call { dst: ret, kind: CallKind::Virtual([Reg(slot)], hint) }); 266 | } 267 | Reg(ret.unwrap_or(0)) // if ret is None, the result can't be assigned to others, so 0 will not be used 268 | } 269 | } 270 | } 271 | Unary(u) => { 272 | let (r, dst) = (self.expr(&u.r, f), self.reg()); 273 | f.push(Un { op: u.op, dst, r: [r] }); 274 | Reg(dst) 275 | } 276 | Binary(b) => { 277 | let (l, r) = (self.expr(&b.l, f), self.expr(&b.r, f)); 278 | match b.op { 279 | Eq | Ne if b.l.ty.get() == Ty::string() => { 280 | f.push(Param { src: [l] }).push(Param { src: [r] }); 281 | let dst = self.intrinsic(_StringEqual, f).unwrap(); 282 | if b.op == Ne { 283 | f.push(Un { op: Not, dst, r: [Reg(dst)] }); 284 | } 285 | Reg(dst) 286 | } 287 | op => { 288 | let dst = self.reg(); 289 | f.push(Bin { op, dst, lr: [l, r] }); 290 | Reg(dst) 291 | } 292 | } 293 | } 294 | This(_) => Reg(0), 295 | ReadInt(_) => Reg(self.intrinsic(_ReadInt, f).unwrap()), 296 | ReadLine(_) => Reg(self.intrinsic(_ReadLine, f).unwrap()), 297 | NewClass(n) => { 298 | let dst = self.reg(); 299 | // by design, a class's new func in functions have the same index as its vtbl in vtbls 300 | f.push(Tac::Call { dst: Some(dst), kind: CallKind::Static(self.class_info[&Ref(n.class.get().unwrap())].idx, CallHint { arg_obj: false, arg_arr: false }) }); 301 | Reg(dst) 302 | } 303 | NewArray(n) => { 304 | let len = self.expr(&n.len, f); 305 | let (ok, before_cond, before_body) = (self.label(), self.label(), self.label()); 306 | let (cmp, ptr) = (self.reg(), self.reg()); 307 | f.push(Bin { op: Lt, dst: cmp, lr: [len, Const(0)] }) 308 | .push(Jif { label: ok, z: true, cond: [Reg(cmp)] }); 309 | self.re(NEW_ARR_NEG, f); 310 | f.push(Label { label: ok }); 311 | let arr = self.intrinsic(_Alloc, f 312 | .push(Bin { op: Mul, dst: ptr, lr: [len, Const(INT_SIZE)] }) 313 | .push(Bin { op: Add, dst: ptr, lr: [Reg(ptr), Const(INT_SIZE)] }) // now ptr = bytes to allocate 314 | .push(Param { src: [Reg(ptr)] })).unwrap(); 315 | f.push(Bin { op: Add, dst: ptr, lr: [Reg(arr), Reg(ptr)] }); // now ptr = end of array 316 | f.push(Bin { op: Add, dst: arr, lr: [Reg(arr), Const(INT_SIZE)] }); // now arr = begin of array([0]) 317 | f.push(Jmp { label: before_cond }) // loop(reversely), set all to 0 318 | .push(Label { label: before_body }) 319 | .push(Bin { op: Sub, dst: ptr, lr: [Reg(ptr), Const(INT_SIZE)] }) 320 | .push(Store { src_base: [Const(0), Reg(ptr)], off: 0, hint: MemHint::Arr }) 321 | .push(Label { label: before_cond }) 322 | .push(Bin { op: Eq, dst: cmp, lr: [Reg(ptr), Reg(arr)] }) 323 | .push(Jif { label: before_body, z: true, cond: [Reg(cmp)] }); // when ptr == arr, loop end 324 | f.push(Store { src_base: [len, Reg(arr)], off: -INT_SIZE, hint: MemHint::Immutable }); // arr[-1] = len 325 | Reg(arr) 326 | } 327 | ClassTest(t) => { 328 | let obj = self.expr(&t.expr, f); 329 | self.check_cast(obj, self.class_info[&Ref(t.class.get().unwrap())].idx, f) 330 | } 331 | ClassCast(t) => { 332 | let obj = self.expr(&t.expr, f); 333 | let check = self.check_cast(obj, self.class_info[&Ref(t.class.get().unwrap())].idx, f); 334 | let (msg, vtbl, ok) = (self.reg(), self.reg(), self.label()); 335 | f.push(Jif { label: ok, z: false, cond: [check] }); 336 | let s = self.define_str(BAD_CAST1); // borrow checker... 337 | self.intrinsic(_PrintString, f.push(LoadStr { dst: msg, s }).push(Param { src: [Reg(msg)] })); 338 | self.intrinsic(_PrintString, f.push(Load { dst: vtbl, base: [obj], off: 0, hint: MemHint::Immutable }) 339 | .push(Load { dst: msg, base: [Reg(vtbl)], off: INT_SIZE as i32, hint: MemHint::Immutable }).push(Param { src: [Reg(msg)] })); 340 | let s = self.define_str(BAD_CAST2); 341 | self.intrinsic(_PrintString, f.push(LoadStr { dst: msg, s }).push(Param { src: [Reg(msg)] })); 342 | let s = self.define_str(t.name); 343 | self.intrinsic(_PrintString, f.push(LoadStr { dst: msg, s }).push(Param { src: [Reg(msg)] })); 344 | let s = self.define_str(BAD_CAST3); 345 | self.intrinsic(_PrintString, f.push(LoadStr { dst: msg, s }).push(Param { src: [Reg(msg)] })); 346 | self.intrinsic(_Halt, f); 347 | f.push(Label { label: ok }); 348 | obj 349 | } 350 | } 351 | } 352 | } 353 | 354 | impl<'a> TacGen<'a> { 355 | // define a string in str pool and return its id, this id can be used in Tac::LoadStr 356 | fn define_str(&mut self, s: &'a str) -> u32 { self.str_pool.insert_full(s).0 as u32 } 357 | 358 | fn reg(&mut self) -> u32 { (self.reg_num, self.reg_num += 1).0 } 359 | 360 | fn label(&mut self) -> u32 { (self.label_num, self.label_num += 1).0 } 361 | 362 | // if you don't need to modify the returned register, it is more recommended to use Const(i) 363 | fn int(&mut self, i: i32, f: &mut TacFunc<'a>) -> u32 { 364 | let dst = self.reg(); 365 | f.push(Tac::Assign { dst, src: [Const(i)] }); 366 | dst 367 | } 368 | 369 | // perform an intrinsic call, return value is Some if this intrinsic call has return value 370 | fn intrinsic(&mut self, i: Intrinsic, f: &mut TacFunc<'a>) -> Option { 371 | let dst = if i.has_ret() { Some(self.reg()) } else { None }; 372 | f.push(Tac::Call { dst, kind: CallKind::Intrinsic(i) }); 373 | dst 374 | } 375 | 376 | // read the length of `arr` (caller should guarantee `arr` is really an array) 377 | fn length(&mut self, arr: Operand, f: &mut TacFunc<'a>) -> Operand { 378 | let dst = self.reg(); 379 | f.push(Load { dst, base: [arr], off: -(INT_SIZE as i32), hint: MemHint::Immutable }); 380 | Reg(dst) 381 | } 382 | 383 | // re is short for for runtime error; this function prints a message and call halt 384 | fn re(&mut self, msg: &'static str, f: &mut TacFunc<'a>) { 385 | let src = self.reg(); 386 | let s = self.define_str(msg); 387 | self.intrinsic(_PrintString, f.push(LoadStr { dst: src, s }).push(Param { src: [Reg(src)] })); 388 | self.intrinsic(_Halt, f); 389 | } 390 | 391 | fn check_cast(&mut self, obj: Operand, vtbl_idx: u32, f: &mut TacFunc<'a>) -> Operand { 392 | // ret = 0 393 | // while (cur) 394 | // ret = (cur == target) 395 | // if ret = 1 396 | // break 397 | // cur = cur->parent 398 | let (ret, cur, target) = (self.int(0, f), self.reg(), self.reg()); 399 | let (before_cond, after_body) = (self.label(), self.label()); 400 | f.push(LoadVTbl { dst: target, v: vtbl_idx }); 401 | f.push(Load { dst: cur, base: [obj], off: 0, hint: MemHint::Immutable }); 402 | f.push(Label { label: before_cond }); 403 | f.push(Jif { label: after_body, z: true, cond: [Reg(cur)] }); 404 | f.push(Bin { op: Eq, dst: ret, lr: [Reg(cur), Reg(target)] }).push(Jif { label: after_body, z: false, cond: [Reg(ret)] }); 405 | f.push(Load { dst: cur, base: [Reg(cur)], off: 0, hint: MemHint::Immutable }); 406 | f.push(Jmp { label: before_cond }); 407 | f.push(Label { label: after_body }); 408 | Reg(ret) 409 | } 410 | } 411 | 412 | impl<'a> TacGen<'a> { 413 | // `idx` in ClassInfo & FuncInfo is not determined here, just set them to a meaningless value (0) 414 | // all functions (static & virtual) are inserted into self.func_info 415 | // this function relies on the fact that no cyclic inheritance exist, which is guaranteed in typeck 416 | fn resolve_field(&mut self, c: &'a ClassDef<'a>) { 417 | if !self.class_info.contains_key(&Ref(c)) { 418 | let (mut field_num, mut vtbl) = if let Some(p) = c.parent_ref.get() { 419 | self.resolve_field(p); 420 | let p = &self.class_info[&Ref(p)]; 421 | (p.field_num, p.vtbl.clone()) 422 | } else { (1, IndexMap::default()) }; 423 | for f in &c.field { 424 | match f { 425 | FieldDef::FuncDef(f) => if !f.static_ { 426 | if let Some((idx, _, p_f)) = vtbl.get_full_mut(f.name) { 427 | // + 2, because 0 is parent vtbl, 1 is class name 428 | self.func_info.insert(Ref(f), FuncInfo { off: idx as u32 + 2, idx: 0 }); 429 | *p_f = f; // override 430 | } else { 431 | self.func_info.insert(Ref(f), FuncInfo { off: vtbl.len() as u32 + 2, idx: 0 }); 432 | vtbl.insert(f.name, f); 433 | } 434 | } else { 435 | // `off` is useless for static functions 436 | self.func_info.insert(Ref(f), FuncInfo { off: 0, idx: 0 }); 437 | } 438 | FieldDef::VarDef(v) => { 439 | self.var_info.insert(Ref(v), VarInfo { off: field_num }); 440 | field_num += 1; 441 | } 442 | } 443 | } 444 | self.class_info.insert(Ref(c), ClassInfo { field_num, idx: 0, vtbl }); 445 | } 446 | } 447 | 448 | fn build_new(&mut self, c: &'a ClassDef<'a>, alloc: &'a Arena>) -> TacFunc<'a> { 449 | self.reg_num = 0; 450 | let ClassInfo { field_num, idx, .. } = self.class_info[&Ref(c)]; 451 | let mut f = TacFunc::empty(alloc, format!("_{}._new", c.name), 0); 452 | f.push(Param { src: [Const(field_num as i32 * INT_SIZE)] }); 453 | let ret = self.intrinsic(_Alloc, &mut f).unwrap(); 454 | let vtbl = self.reg(); 455 | f.push(LoadVTbl { dst: vtbl, v: idx }); 456 | f.push(Store { src_base: [Reg(vtbl), Reg(ret)], off: 0, hint: MemHint::Immutable }); 457 | for i in 1..field_num { 458 | f.push(Store { src_base: [Const(0), Reg(ret)], off: i as i32 * INT_SIZE, hint: MemHint::Obj }); 459 | } 460 | f.push(Ret { src: Some([Reg(ret)]) }); 461 | f.reg_num = self.reg_num; 462 | f 463 | } 464 | } -------------------------------------------------------------------------------- /tacopt/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tacopt" 3 | version = "0.1.0" 4 | authors = ["MashPlant <740678788@qq.com>"] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | common = { path = "../common" } 9 | tac = { path = "../tac" } 10 | typed-arena = "1.4.1" 11 | bitset = { git = "https://github.com/MashPlant/bitset" } -------------------------------------------------------------------------------- /tacopt/src/aliveness.rs: -------------------------------------------------------------------------------- 1 | use crate::bb::FuncBB; 2 | 3 | pub fn work(_f: &mut FuncBB) { 4 | unimplemented!() 5 | } -------------------------------------------------------------------------------- /tacopt/src/bb.rs: -------------------------------------------------------------------------------- 1 | use tac::{TacNode, Tac, TacFunc, Operand, TacIter, CallKind, Intrinsic::_Halt}; 2 | use common::Ref; 3 | use typed_arena::Arena; 4 | 5 | pub struct BB<'a> { 6 | pub len: u32, 7 | // the ret/jmp/jif/label is NOT included in the link list defined by first -> last 8 | // don't forget that ret/jif may read a register in data flow analysis 9 | pub first: Option<&'a TacNode<'a>>, 10 | pub last: Option<&'a TacNode<'a>>, 11 | pub next: NextKind, 12 | pub prev: Vec, 13 | } 14 | 15 | #[derive(Copy, Clone)] 16 | pub enum NextKind { 17 | Ret(Option), 18 | Jmp(u32), 19 | // `cond` can only be Reg, if input tac's cond is a Const, it will be transformed into Jmp 20 | Jif { cond: u32, z: bool, fail: u32, jump: u32 }, 21 | Halt, 22 | } 23 | 24 | impl BB<'_> { 25 | // return the out edges of this bb 26 | // there are 2 out edges in one bb at most, so the return value's type is `[Option; 2]` 27 | pub fn next(&self) -> [Option; 2] { 28 | match self.next { 29 | NextKind::Ret(_) | NextKind::Halt => [None, None], 30 | NextKind::Jmp(jump) => [Some(jump), None], 31 | NextKind::Jif { fail, jump, .. } => [Some(fail), Some(jump)], 32 | } 33 | } 34 | 35 | // returns the register id it reads(if any) 36 | pub fn next_r(&self) -> Option { 37 | match self.next { 38 | NextKind::Ret(r) => match r { Some(Operand::Reg(r)) => Some(r), _ => None } 39 | NextKind::Jif { cond, .. } => Some(cond), 40 | _ => None 41 | } 42 | } 43 | 44 | // returns mut ref to the register id it reads(if any) 45 | pub fn next_r_mut(&mut self) -> Option<&mut u32> { 46 | match &mut self.next { 47 | NextKind::Ret(r) => match r { Some(Operand::Reg(r)) => Some(r), _ => None } 48 | NextKind::Jif { cond, .. } => Some(cond), 49 | _ => None 50 | } 51 | } 52 | } 53 | 54 | impl<'a> BB<'a> { 55 | pub fn iter(&self) -> TacIter<'a> { 56 | TacIter::new(self.first, self.last, self.len as usize) 57 | } 58 | 59 | // delete `t` from the linked list in self 60 | // `t` should belong to that linked list, but it is not checked 61 | // the link on `t` is not cut down, so you can safely delete a tac while iterating over it 62 | pub fn del(&mut self, t: &'a TacNode<'a>) { 63 | self.len -= 1; 64 | match self.first { 65 | Some(first) if Ref(first) == Ref(t) => { 66 | first.prev.set(None); 67 | self.first = first.next.get(); 68 | } 69 | _ => match self.last { 70 | Some(last) if Ref(last) == Ref(t) => { 71 | last.next.set(None); 72 | self.last = last.prev.get(); 73 | } 74 | _ => { 75 | let (prev, next) = (t.prev.get().unwrap(), t.next.get().unwrap()); 76 | next.prev.set(Some(prev)); 77 | prev.next.set(Some(next)); 78 | } 79 | } 80 | } 81 | } 82 | 83 | // insert `new` after `loc` in the linked list in self 84 | // `loc` should belong to that linked list, but it is not checked 85 | pub fn insert_after(&mut self, loc: &'a TacNode<'a>, new: &'a TacNode<'a>) { 86 | self.len += 1; 87 | match self.last { 88 | Some(last) if Ref(last) == Ref(loc) => { 89 | last.next.set(Some(new)); 90 | new.prev.set(Some(last)); 91 | self.last = Some(new); 92 | } 93 | _ => { 94 | let next = loc.next.get().unwrap(); 95 | next.prev.set(Some(new)); 96 | new.next.set(Some(next)); 97 | loc.next.set(Some(new)); 98 | new.prev.set(Some(loc)); 99 | } 100 | } 101 | } 102 | 103 | // `prev_with_entry` means returning an iterator yielding `prev` list element 104 | // while adding an virtual entry node, which has an edge to the first node 105 | // `this` is self's index in FuncBB::bb + 1, so `this == 1` means that it is the first node 106 | pub fn prev_with_entry<'b>(&'b self, this: usize) -> (usize, impl IntoIterator + Clone + 'b) { 107 | (this, self.prev.iter().map(|x| *x as usize + 1).chain(if this == 1 { Some(0) } else { None })) 108 | } 109 | } 110 | 111 | pub struct FuncBB<'a> { 112 | // some fields copied from TacFunc, they may change during optimization, so do copy rather than borrow 113 | pub param_num: u32, 114 | pub reg_num: u32, 115 | pub alloc: &'a Arena>, 116 | pub bb: Vec>, 117 | // I admit it is not perfect design, we need to clone func's name here for convenience 118 | // nevertheless, the affect on performance is very little 119 | pub name: String, 120 | } 121 | 122 | impl<'a> FuncBB<'a> { 123 | // construct control flow graph from `f`, the returned `FuncBB` contains such information 124 | // only `FuncBB` can be used for future optimization and codegen, and `TacFunc` cannot 125 | // `f` should returns or halts on every execution path, otherwise `simplify` will panic 126 | pub fn new(f: &TacFunc<'a>) -> FuncBB<'a> { 127 | let mut bb = Vec::new(); 128 | let mut label2bb = Vec::new(); // label2bb[label id] = bb id of this label 129 | let mut labels = Vec::new(); // labels = {bb id | bb.next contains label id} 130 | let mut iter = f.first; 131 | while let Some(first) = iter { 132 | // is_next: 0 for this label belongs to this bb, 1 for this label belongs to the next bb 133 | let mut mark_label = |label: u32, is_next: u32| { 134 | let label = label as usize; 135 | if label2bb.len() <= label { label2bb.resize(label + 1, 0); } 136 | label2bb[label] = bb.len() as u32 + is_next; 137 | }; 138 | let mut first = Some(first); 139 | while let Some(t) = first { 140 | if let Tac::Label { label } = t.tac.get() { 141 | mark_label(label, 0); 142 | first = t.next.get(); 143 | } else { break; } 144 | } 145 | let (mut cur, mut first, mut last) = (first, None, None); 146 | let mut len = 0; // ret/jmp/jif/label are not counted 147 | // mark `has_label` as true if `next` contains `Some(label)` 148 | // label index should be remapped to bb index that the label belongs to 149 | let mut has_label = false; 150 | let next = loop { 151 | if let Some(cur1) = cur { 152 | match cur1.tac.get() { 153 | Tac::Label { label } => { 154 | mark_label(label, 1); 155 | break NextKind::Jmp(bb.len() as u32 + 1); 156 | } 157 | Tac::Jmp { label } => { 158 | has_label = true; 159 | break NextKind::Jmp(label); 160 | } 161 | Tac::Jif { label, z, cond } => break match cond[0] { 162 | Operand::Const(c) => if (c == 0) == z { // (Jz, and is z) or (Jnz and is not z), do the jump 163 | has_label = true; 164 | NextKind::Jmp(label) 165 | } else { NextKind::Jmp(bb.len() as u32 + 1) } 166 | Operand::Reg(r) => { 167 | has_label = true; 168 | NextKind::Jif { cond: r, z, fail: bb.len() as u32 + 1, jump: label } 169 | } 170 | }, 171 | Tac::Ret { src } => break NextKind::Ret(src.map(|src| src[0])), 172 | Tac::Call { kind: CallKind::Intrinsic(_Halt), .. } => break NextKind::Halt, 173 | _ => { 174 | if first.is_none() { first = cur; } 175 | last = cur; 176 | len += 1; 177 | } 178 | } 179 | cur = cur1.next.get(); 180 | } else { 181 | // reaching here means the last tac is not `return`, but we still don't add `return` here, instead we add `jmp` 182 | // in this way, if the last bb is reachable, it will be certain to cause panicking in `simplify` 183 | break NextKind::Jmp(bb.len() as u32 + 1); 184 | } 185 | }; 186 | iter = cur.and_then(|cur| cur.next.get()); 187 | if has_label { labels.push(bb.len() as u32); } 188 | if let Some(first) = first { first.prev.set(None); } 189 | if let Some(last) = last { last.next.set(None); } 190 | bb.push(BB { len, first, last, next, prev: vec![] }); 191 | } 192 | for unfill in labels { 193 | match &mut bb[unfill as usize].next { 194 | NextKind::Jmp(jump) | NextKind::Jif { jump, .. } => *jump = label2bb[*jump as usize], _ => {} 195 | } 196 | } 197 | FuncBB { param_num: f.param_num, reg_num: f.reg_num, alloc: f.alloc, bb: simplify(bb), name: f.name.clone() } 198 | } 199 | 200 | pub fn optimize(&mut self) { 201 | crate::common_expr::work(self); 202 | crate::copy_prop::work(self); 203 | crate::const_prop::work(self); 204 | crate::aliveness::work(self); 205 | } 206 | 207 | pub fn optimizen(&mut self, n: u32) { 208 | for _ in 0..n { self.optimize(); } 209 | } 210 | 211 | pub fn new_reg(&mut self) -> u32 { 212 | (self.reg_num, self.reg_num += 1).0 213 | } 214 | 215 | // convert self back into a `TacFunc`, which can be used for execution 216 | pub fn to_tac_func(&self) -> TacFunc<'a> { 217 | let mut f = TacFunc::empty(self.alloc, self.name.clone(), self.param_num); 218 | f.reg_num = self.reg_num; 219 | for (idx, b) in self.bb.iter().enumerate() { 220 | // generate label and jump only when necessary 221 | if !(b.prev.is_empty() || (b.prev.len() == 1 && b.prev[0] + 1 == idx as u32)) { 222 | f.push(Tac::Label { label: idx as u32 }); 223 | } 224 | // shouldn't have ret/... here 225 | for t in b.iter() { f.push(t.tac.get()); } 226 | match b.next { 227 | NextKind::Ret(src) => { f.push(Tac::Ret { src: src.map(|src| [src]) }); } 228 | NextKind::Jmp(jump) => if jump != idx as u32 + 1 { f.push(Tac::Jmp { label: jump }); } 229 | NextKind::Jif { cond, z, jump, .. } => { f.push(Tac::Jif { label: jump, z, cond: [Operand::Reg(cond)] }); } 230 | NextKind::Halt => { f.push(Tac::Call { dst: None, kind: CallKind::Intrinsic(_Halt) }); } 231 | }; 232 | } 233 | f 234 | } 235 | } 236 | 237 | // `simplify` will remove all unreachable nodes, and set each node's `prev` to the proper value 238 | // the old value of each node's `prev` is not used here 239 | // it is possible to trigger `index out of bounds` here (if constraint is violated), see the comment in `FuncBB::new` 240 | pub(crate) fn simplify(mut bb: Vec) -> Vec { 241 | fn dfs(x: usize, bb: &mut [BB], vis: &mut [bool]) { 242 | if vis[x] { return; } 243 | vis[x] = true; 244 | bb[x].next().iter().filter_map(|&x| x).for_each(|x| dfs(x as usize, bb, vis)); 245 | } 246 | let mut vis = vec![false; bb.len()]; 247 | dfs(0, &mut bb, &mut vis); 248 | let mut actual = vec![0; bb.len()]; // exclusive prefix sum of `vis` 249 | for i in 1..bb.len() { 250 | actual[i] += actual[i - 1] + vis[i - 1] as u32; 251 | } 252 | let mut new = Vec::with_capacity(bb.len()); 253 | for (_, mut b) in bb.into_iter().enumerate().filter(|(idx, _)| vis[*idx]) { 254 | b.next = match b.next { 255 | NextKind::Jmp(jump) => NextKind::Jmp(actual[jump as usize]), 256 | NextKind::Jif { cond, z, fail, jump } => 257 | NextKind::Jif { cond, z, fail: actual[fail as usize], jump: actual[jump as usize] }, 258 | n => n 259 | }; 260 | b.prev.clear(); 261 | new.push(b); 262 | } 263 | for idx in 0..new.len() { // borrow checker... 264 | new[idx].next().iter().filter_map(|&x| x).for_each(|x| new[x as usize].prev.push(idx as u32)); 265 | } 266 | new 267 | } 268 | -------------------------------------------------------------------------------- /tacopt/src/common_expr.rs: -------------------------------------------------------------------------------- 1 | use crate::{bb::{FuncBB, BB}, flow::{FlowElem, Flow, And}}; 2 | use common::{BinOp, UnOp, HashMap, HashSet, Ref}; 3 | use tac::{Tac, Operand, MemHint, CallKind, TacNode, TacIter}; 4 | use bitset::traits::*; 5 | 6 | pub fn work(f: &mut FuncBB) { WorkCtx::new(f).work(f); } 7 | 8 | #[derive(Copy, Clone, Eq, PartialEq, Hash)] 9 | enum TacRhs { 10 | Bin(BinOp, [Operand; 2]), 11 | Un(UnOp, [Operand; 1]), 12 | Load([Operand; 1], i32), 13 | } 14 | 15 | impl TacRhs { 16 | fn from_tac(tac: Tac) -> Option { 17 | match tac { 18 | Tac::Bin { op, lr, .. } => Some(TacRhs::Bin(op, lr)), 19 | Tac::Un { op, r, .. } => Some(TacRhs::Un(op, r)), 20 | Tac::Load { base, off, .. } => { Some(TacRhs::Load(base, off)) } 21 | _ => None 22 | } 23 | } 24 | 25 | fn r(&self) -> &[Operand] { 26 | match self { 27 | TacRhs::Bin(_, lr) => lr, 28 | TacRhs::Un(_, r) => r, 29 | TacRhs::Load(base, _) => base, 30 | } 31 | } 32 | } 33 | 34 | // return whether this tac kill (obj, arr) 35 | fn mem_kill(kind: Tac) -> (bool, bool) { 36 | match kind { 37 | Tac::Store { hint, .. } => match hint { 38 | MemHint::Immutable => (false, false), 39 | MemHint::Obj => (true, false), 40 | MemHint::Arr => (false, true), 41 | } 42 | Tac::Call { kind, .. } => match kind { 43 | CallKind::Virtual(_, hint) | CallKind::Static(_, hint) => (hint.arg_obj, hint.arg_arr), 44 | _ => (false, false) 45 | } 46 | _ => (false, false) 47 | } 48 | } 49 | 50 | // all Box<[u32]> are bitset of expression(TacRhs) id 51 | struct WorkCtx<'a> { 52 | // write2id: k -> v: writing to this k can affect the result of TacRhs in v 53 | write2id: HashMap>, 54 | rhs2id: HashMap, 55 | // tac2id: tac to its TacRhs's id 56 | tac2id: HashMap>, u32>, 57 | // obj/arr: these TacRhs are Load, and they load from obj/arr 58 | obj: Box<[u32]>, 59 | arr: Box<[u32]>, 60 | // used in dfs, avoid circular bb link crashing dfs 61 | vis: Vec, 62 | } 63 | 64 | impl<'a> WorkCtx<'a> { 65 | pub fn new(f: &FuncBB<'a>) -> WorkCtx<'a> { 66 | let (mut write2id, mut rhs2id, mut tac2id) = (HashMap::new(), HashMap::new(), HashMap::new()); 67 | let (mut obj, mut arr) = (HashSet::new(), HashSet::new()); 68 | for b in &f.bb { 69 | for t in b.iter() { 70 | let tac = t.tac.get(); 71 | if let Some(rhs) = TacRhs::from_tac(tac) { 72 | let id = rhs2id.len() as u32; 73 | let id = *rhs2id.entry(rhs).or_insert(id); 74 | tac2id.insert(Ref(t), id); 75 | if let Tac::Load { hint, .. } = tac { 76 | match hint { 77 | MemHint::Immutable => {} 78 | MemHint::Obj => { obj.insert(id); } 79 | MemHint::Arr => { arr.insert(id); } 80 | }; 81 | } 82 | for r in rhs.r() { 83 | if let Operand::Reg(r) = r { 84 | write2id.entry(*r).or_insert_with(HashSet::new).insert(id); 85 | } 86 | } 87 | } 88 | } 89 | } 90 | let (obj, arr) = (iter2bs(&obj, rhs2id.len()), iter2bs(&arr, rhs2id.len())); 91 | let write2id = write2id.iter().map(|(&k, v)| (k, iter2bs(v, rhs2id.len()))).collect(); 92 | WorkCtx { write2id, rhs2id, tac2id, obj, arr, vis: vec![false; f.bb.len()] } 93 | } 94 | 95 | pub fn work(&mut self, f: &mut FuncBB<'a>) { 96 | let mut available_expr_flow = Flow::::new(f.bb.len() + 1, self.rhs2id.len()); 97 | let each = available_expr_flow.each(); 98 | let FlowElem { gen, kill, out, .. } = available_expr_flow.split(); 99 | // add offset 1, leave index 0 as an virtual entry node 100 | // initial value of out is U, except for entry node 101 | // entry node has an edge to the first node, that's what `prev_with_entry` does 102 | for (off, b) in f.bb.iter().enumerate().map(|(idx, bb)| ((idx + 1) * each, bb)) { 103 | self.compute_gen_kill(b, &mut gen[off..off + each], &mut kill[off..off + each]); 104 | } 105 | for x in out.iter_mut().skip(each) { *x = !0; } 106 | available_expr_flow.solve(f.bb.iter().enumerate().map(|b| b.1.prev_with_entry(b.0 + 1))); 107 | let FlowElem { in_, .. } = available_expr_flow.split(); 108 | for idx in 0..f.bb.len() { // borrow checker... 109 | let off = (idx + 1) * each; 110 | self.do_optimize(idx, f, &mut in_[off..off + each]); 111 | } 112 | } 113 | 114 | fn compute_gen_kill(&self, b: &BB, gen: &mut [u32], kill: &mut [u32]) { 115 | for t in b.iter() { 116 | let tac = t.tac.get(); 117 | if let Some(rhs) = TacRhs::from_tac(tac).map(|rhs| self.rhs2id[&rhs]) { gen.bsset(rhs) } 118 | if let Some(w) = tac.rw().1.and_then(|w| self.write2id.get(&w)) { 119 | kill.bsor(w); 120 | gen.bsandn(w); // this has to be done after gen.bsset(rhs), because x = x + y doesn't gen x + y 121 | } 122 | let (obj, arr) = mem_kill(tac); 123 | if obj { 124 | kill.bsor(&self.obj); 125 | gen.bsandn(&self.obj); 126 | } 127 | if arr { 128 | kill.bsor(&self.arr); 129 | gen.bsandn(&self.arr); 130 | } 131 | } 132 | } 133 | 134 | // all available expression with index = `rhs` be replaced by computing it to `new` and copy `new` to original dst 135 | fn dfs(&mut self, idx: usize, f: &mut FuncBB<'a>, iter: impl IntoIterator>, rhs: u32, new: u32) { 136 | if self.vis[idx] { return; } 137 | self.vis[idx] = true; 138 | for t in iter { 139 | if self.tac2id.get(&Ref(t)) == Some(&rhs) { 140 | let mut tac = t.tac.get(); 141 | let dst = std::mem::replace(tac.rw_mut().1.expect("This tac with rhs must also have a lhs."), new); 142 | t.tac.set(tac); // the lhs of `tac` is changed to `new` 143 | let copy = f.alloc.alloc(TacNode { tac: Tac::Assign { dst, src: [Operand::Reg(new)] }.into(), prev: None.into(), next: None.into() }); 144 | f.bb[idx].insert_after(t, copy); 145 | return; 146 | } 147 | } 148 | for i in 0..f.bb[idx].prev.len() { 149 | let prev = f.bb[idx].prev[i] as usize; 150 | self.dfs(prev, f, f.bb[prev].iter().rev(), rhs, new); 151 | } 152 | } 153 | 154 | fn do_optimize(&mut self, idx: usize, f: &mut FuncBB<'a>, in_: &mut [u32]) { 155 | for t in f.bb[idx].iter() { 156 | let tac = t.tac.get(); 157 | if let Some(rhs) = TacRhs::from_tac(tac) { 158 | let rhs = self.rhs2id[&rhs]; 159 | if in_.bsget(rhs) { 160 | let new = f.new_reg(); 161 | for v in &mut self.vis { *v = false; } 162 | // `prev` will iterate over all tac before `t` reversely 163 | // TacIter::len is set to infinity (!0), so will iterate over all tac between `first` (inclusive) and `t` (exclusive) 164 | // can't use .iter().enumerate() to get the current length between `first` and `t`, because `dfs` may insert new tac 165 | let prev = TacIter::new(f.bb[idx].first, Some(t), !0).rev().skip(1); 166 | self.dfs(idx, f, prev, rhs, new); 167 | let dst = tac.rw().1.expect("The tac with rhs must also have a lhs."); 168 | t.tac.set(Tac::Assign { dst, src: [Operand::Reg(new)] }); 169 | } 170 | } 171 | if let Some(rhs) = TacRhs::from_tac(tac).map(|rhs| self.rhs2id[&rhs]) { in_.bsset(rhs) } 172 | if let Some(w) = tac.rw().1.and_then(|w| self.write2id.get(&w)) { in_.bsandn(w) } 173 | let (obj, arr) = mem_kill(tac); 174 | if obj { in_.bsandn(&self.obj); } 175 | if arr { in_.bsandn(&self.arr); } 176 | } 177 | } 178 | } -------------------------------------------------------------------------------- /tacopt/src/const_prop.rs: -------------------------------------------------------------------------------- 1 | use crate::bb::{FuncBB, NextKind, simplify}; 2 | use tac::{Tac, Operand}; 3 | 4 | #[derive(Copy, Clone, Eq, PartialEq, Debug)] 5 | enum Value { Unk, Const(i32), Nac } 6 | 7 | fn meet(x: Value, y: Value) -> Value { 8 | match (x, y) { 9 | (Value::Const(x), Value::Const(y)) if x == y => Value::Const(x), 10 | (v, Value::Unk) | (Value::Unk, v) => v, 11 | _ => Value::Nac, 12 | } 13 | } 14 | 15 | fn transfer(tac: Tac, env: &mut [Value]) { 16 | use Tac::*; 17 | use Operand::*; 18 | use Value::{Const as C, Nac, Unk}; 19 | match tac { 20 | Bin { op, dst, lr } => { 21 | let lr = match lr { 22 | [Const(l), Const(r)] => (C(l), C(r)), 23 | [Reg(l), Const(r)] => (env[l as usize], C(r)), 24 | [Const(l), Reg(r)] => (C(l), env[r as usize]), 25 | [Reg(l), Reg(r)] => (env[l as usize], env[r as usize]), 26 | }; 27 | env[dst as usize] = match lr { 28 | (C(l), C(r)) => C(op.eval(l, r)), 29 | (Nac, _) | (_, Nac) => Nac, 30 | _ => Unk, // neither is Nac and not both Const => Unk 31 | }; 32 | } 33 | Un { op, dst, r } => env[dst as usize] = match r[0] { 34 | Const(r) => C(op.eval(r)), 35 | Reg(r) => match env[r as usize] { C(r) => C(op.eval(r)), r => r }, 36 | }, 37 | Assign { dst, src } => env[dst as usize] = match src[0] { Const(r) => C(r), Reg(r) => env[r as usize] }, 38 | Call { dst, .. } => if let Some(dst) = dst { env[dst as usize] = Nac } 39 | Load { dst, .. } => env[dst as usize] = Nac, 40 | // actually LoadStr and LoadVTbl won't give `dst` a Unk 41 | // but as long as the implementation is correct, `dst` can never be used in calculation, so giving them Unk is okay 42 | LoadStr { dst, .. } | LoadVTbl { dst, .. } | LoadFunc { dst, .. } => env[dst as usize] = Unk, 43 | Param { .. } | Ret { .. } | Jmp { .. } | Label { .. } | Jif { .. } | Store { .. } => {} 44 | } 45 | } 46 | 47 | pub fn work(f: &mut FuncBB) { 48 | let (n, each) = (f.bb.len(), f.reg_num as usize); 49 | let mut flow = vec![Value::Unk; n * each]; 50 | for i in 0..f.param_num as usize { 51 | flow[i] = Value::Nac; // flow[i] is in the entry bb, and setting them is enough 52 | } 53 | let mut tmp = flow.clone(); // tmp is used to detect whether `flow` has changed 54 | loop { 55 | for (idx, b) in f.bb.iter().enumerate() { 56 | for next in b.next().iter().filter_map(|n| n.map(|n| n as usize)) { 57 | let (off, off1) = (idx * each, next * each); 58 | for i in 0..each { 59 | flow[off1 + i] = meet(flow[off1 + i], flow[off + i]); 60 | } 61 | } 62 | } 63 | for (idx, b) in f.bb.iter().enumerate() { 64 | let env = &mut flow[idx * each..(idx + 1) * each]; 65 | for t in b.iter() { transfer(t.tac.get(), env); } 66 | } 67 | if flow != tmp { tmp.clone_from_slice(&flow); } else { break; } 68 | } 69 | let mut flow_changed = false; // whether the edges in flow graph have changed 70 | for (idx, b) in f.bb.iter_mut().enumerate() { 71 | let env = &mut flow[idx * each..(idx + 1) * each]; 72 | for t in b.iter() { 73 | let mut tac = t.tac.get(); 74 | for r in tac.rw_mut().0 { 75 | if let Operand::Reg(r1) = *r { 76 | if let Value::Const(r1) = env[r1 as usize] { *r = Operand::Const(r1); } 77 | } 78 | } 79 | transfer(t.tac.get(), env); // old value 80 | t.tac.set(tac); 81 | } 82 | match &mut b.next { 83 | NextKind::Ret(Some(r)) => if let Operand::Reg(r1) = *r { 84 | if let Value::Const(r1) = env[r1 as usize] { *r = Operand::Const(r1); } 85 | } 86 | &mut NextKind::Jif { cond, z, fail, jump } => if let Value::Const(c) = env[cond as usize] { 87 | b.next = if (c == 0) == z { NextKind::Jmp(jump) } else { NextKind::Jmp(fail) }; 88 | flow_changed = true; 89 | } 90 | _ => {} 91 | } 92 | } 93 | if flow_changed { f.bb = simplify(std::mem::replace(&mut f.bb, Vec::new())); } 94 | } 95 | -------------------------------------------------------------------------------- /tacopt/src/copy_prop.rs: -------------------------------------------------------------------------------- 1 | use crate::{bb::{FuncBB, BB}, flow::{FlowElem, Flow, And}}; 2 | use common::{HashSet, HashMap, IndexSet}; 3 | use tac::{Tac, Operand}; 4 | use bitset::traits::*; 5 | 6 | pub fn work(f: &mut FuncBB) { 7 | let mut reg2copy = HashMap::new(); // x = y -> add the index of (x, y) in copy2id to reg2copy[x], reg2copy[y] 8 | let mut copy2id = IndexSet::default(); 9 | for b in &mut f.bb { 10 | for t in b.iter() { 11 | if let Tac::Assign { dst, src } = t.tac.get() { 12 | if let Operand::Reg(src) = src[0] { 13 | if dst == src { // just delete it, doesn't even need propagation 14 | b.del(t); 15 | } else { 16 | let id = copy2id.insert_full((dst, src)).0 as u32; 17 | reg2copy.entry(dst).or_insert_with(HashSet::new).insert(id); 18 | reg2copy.entry(src).or_insert_with(HashSet::new).insert(id); 19 | } 20 | } 21 | } 22 | } 23 | } 24 | let reg2copy = reg2copy.iter().map(|(k, v)| (*k, iter2bs(v, copy2id.len()))).collect(); 25 | // + 1 to let 0 be the virtual entry node, with `out` = empty 26 | let mut copy_prop_flow = Flow::::new(f.bb.len() + 1, copy2id.len()); 27 | let each = copy_prop_flow.each(); 28 | let FlowElem { gen, kill, out, .. } = copy_prop_flow.split(); 29 | for (off, b) in f.bb.iter().enumerate().map(|b| ((b.0 + 1) * each, b.1)) { 30 | compute_gen_kill(b, &mut gen[off..off + each], &mut kill[off..off + each], ®2copy, ©2id); 31 | } 32 | for x in out.iter_mut().skip(each) { *x = !0; } // initial value of out is U, except for entry node 33 | copy_prop_flow.solve(f.bb.iter().enumerate().map(|b| b.1.prev_with_entry(b.0 + 1))); 34 | let FlowElem { in_, .. } = copy_prop_flow.split(); 35 | for (off, b) in f.bb.iter_mut().enumerate().map(|b| ((b.0 + 1) * each, b.1)) { 36 | do_optimize(b, &mut in_[off..off + each], ®2copy, ©2id); 37 | } 38 | } 39 | 40 | fn compute_gen_kill(b: &BB, gen: &mut [u32], kill: &mut [u32], reg2copy: &HashMap>, copy2id: &IndexSet<(u32, u32)>) { 41 | for t in b.iter() { 42 | let tac = t.tac.get(); 43 | tac.rw().1.map(|w| reg2copy.get(&w).map(|copy| { 44 | kill.bsor(copy); 45 | gen.bsandn(copy); 46 | })); 47 | if let Tac::Assign { dst, src } = tac { 48 | if let Operand::Reg(src) = src[0] { 49 | gen.bsset(copy2id.get_full(&(dst, src)).unwrap().0); 50 | } 51 | } 52 | } 53 | } 54 | 55 | fn do_optimize(b: &mut BB, in_: &mut [u32], reg2copy: &HashMap>, copy2id: &IndexSet<(u32, u32)>) { 56 | fn lookup(reg: u32, in_: &[u32], copy2id: &IndexSet<(u32, u32)>) -> u32 { 57 | for (id, &(dst, src)) in copy2id.iter().enumerate() { 58 | if in_.bsget(id) && reg == dst { // propagate, allow cascading multi-level copy in one pass of optimization 59 | return lookup(src, in_, copy2id); 60 | } 61 | } 62 | reg // failed to find any further copy, just return reg 63 | } 64 | for t in b.iter() { 65 | let mut tac = t.tac.get(); 66 | // modify the operand to do copy propagation 67 | tac.rw_mut().0.iter_mut().for_each(|r| if let Operand::Reg(reg) = r { *reg = lookup(*reg, in_, copy2id) }); 68 | // compute in_ for the next tac 69 | tac.rw().1.map(|w| reg2copy.get(&w).map(|copy| in_.bsandn(copy))); 70 | if let Tac::Assign { dst, src: [Operand::Reg(src)] } = t.tac.get() { // old value 71 | in_.bsset(copy2id.get_full(&(dst, src)).unwrap().0) 72 | } 73 | t.tac.set(tac); 74 | } 75 | if let Some(r) = b.next_r_mut() { *r = lookup(*r, in_, copy2id); } 76 | } -------------------------------------------------------------------------------- /tacopt/src/flow.rs: -------------------------------------------------------------------------------- 1 | // https://en.wikipedia.org/wiki/Semilattice 2 | pub trait Meet { 3 | const TOP: T; 4 | 5 | fn meet(x: T, y: T) -> T; 6 | } 7 | 8 | pub struct And; 9 | 10 | impl Meet for And { 11 | const TOP: u32 = !0; 12 | 13 | fn meet(x: u32, y: u32) -> u32 { x & y } 14 | } 15 | 16 | pub struct Or; 17 | 18 | impl Meet for Or { 19 | const TOP: u32 = 0; 20 | 21 | fn meet(x: u32, y: u32) -> u32 { x | y } 22 | } 23 | 24 | // this is a forward flow infrastructure 25 | // if you want a backward flow, do the replacement: backward { use_, kill, in_, out } <-> forward { gen, def, out, in_ } 26 | // please forgive me for making code more complex, but I really want to improve performance 27 | pub struct Flow { 28 | inner: Vec, 29 | // inner.len() == 4 * n * each for `n` elements each using `each` u32 30 | n: usize, 31 | each: usize, 32 | _p: std::marker::PhantomData, 33 | } 34 | 35 | impl> Flow { 36 | // `each` is the number of BITS 37 | pub fn new(n: usize, each: usize) -> Flow { 38 | let each = bitset::traits::bslen(each); 39 | Flow { inner: vec![0; 4 * n * each], n, each, _p: std::marker::PhantomData } 40 | } 41 | 42 | // if the edges of a node in the graph are empty, the `in_` will always be M::TOP 43 | // which may not be desirable if M is `And`, so you need to add a virtual entry node with `out` = empty 44 | // but if M is `Or`, it is okay without that virtual node, because M::TOP == 0, which is what we expect 45 | pub fn solve>(&mut self, graph: impl IntoIterator + Clone) { 46 | let (n, each) = (self.n, self.each); 47 | assert_eq!(4 * n * each, self.inner.len()); 48 | // rust doesn't allow index out of range when slicing, even if the slice is empty, so just return to avoid a panic!() 49 | if self.inner.is_empty() { return; } 50 | 51 | for in_ in self.split().in_ { *in_ = M::TOP; } 52 | let mut changed = true; 53 | while changed { 54 | changed = false; 55 | let FlowElem { in_, out, .. } = self.split(); 56 | for (this, edges) in graph.clone() { 57 | let off = this * each; 58 | for edge in edges { 59 | let off1 = edge * each; 60 | for (x, y) in in_[off..off + each].iter_mut().zip(out[off1..off1 + each].iter()) { 61 | *x = M::meet(*x, *y); 62 | } 63 | } 64 | } 65 | let FlowElem { gen, kill, in_, out, .. } = self.split(); 66 | for i in 0..(n * each) { 67 | // I have checked output assembly, unfortunately rustc & llvm currently can NOT optimize these range checks 68 | // if this project is not a course exercise, I will not hesitate to use `unsafe` here 69 | let ox = out[i]; 70 | out[i] = gen[i] | (in_[i] & !kill[i]); 71 | changed |= out[i] != ox; 72 | } 73 | } 74 | } 75 | 76 | pub fn get(&mut self, idx: usize) -> FlowElem { 77 | let each = self.each; 78 | let FlowElem { gen, kill, in_, out } = self.split(); 79 | FlowElem { gen: &mut gen[idx..idx + each], kill: &mut kill[idx..idx + each], in_: &mut in_[idx..idx + each], out: &mut out[idx..idx + each] } 80 | } 81 | 82 | pub fn split(&mut self) -> FlowElem { 83 | let each_arr = self.n * self.each; 84 | let (gen, rem) = self.inner.split_at_mut(each_arr); 85 | let (kill, rem) = rem.split_at_mut(each_arr); 86 | let (in_, rem) = rem.split_at_mut(each_arr); 87 | let (out, _) = rem.split_at_mut(each_arr); 88 | FlowElem { gen, kill, in_, out } 89 | } 90 | 91 | pub fn n(&self) -> usize { self.n } 92 | pub fn each(&self) -> usize { self.each } 93 | } 94 | 95 | pub struct FlowElem<'a> { 96 | pub gen: &'a mut [u32], 97 | pub kill: &'a mut [u32], 98 | pub in_: &'a mut [u32], 99 | pub out: &'a mut [u32], 100 | } 101 | -------------------------------------------------------------------------------- /tacopt/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod bb; 2 | pub mod flow; 3 | pub mod common_expr; 4 | pub mod const_prop; 5 | pub mod copy_prop; 6 | pub mod aliveness; -------------------------------------------------------------------------------- /typeck/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "typeck" 3 | version = "0.1.0" 4 | authors = ["MashPlant <740678788@qq.com>"] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | common = { path = "../common" } 9 | syntax = { path = "../syntax" } 10 | typed-arena = "1.4.1" 11 | hashbrown = { version = "0.5", features = ["nightly"] } -------------------------------------------------------------------------------- /typeck/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod scope_stack; 2 | mod symbol_pass; 3 | mod type_pass; 4 | 5 | use common::{Errors, ErrorKind::*, Ref}; 6 | use syntax::{FuncDef, ClassDef, SynTy, SynTyKind, ScopeOwner, Ty, TyKind, Program, VarDef}; 7 | use typed_arena::Arena; 8 | use std::ops::{Deref, DerefMut}; 9 | use crate::{symbol_pass::SymbolPass, type_pass::TypePass, scope_stack::ScopeStack}; 10 | 11 | // if you want to alloc other types, you can add them to TypeCkAlloc 12 | #[derive(Default)] 13 | pub struct TypeCkAlloc<'a> { 14 | pub ty: Arena>, 15 | } 16 | 17 | pub fn work<'a>(p: &'a Program<'a>, alloc: &'a TypeCkAlloc<'a>) -> Result<(), Errors<'a, Ty<'a>>> { 18 | let mut s = SymbolPass(TypeCk { errors: Errors(vec![]), scopes: ScopeStack::new(p), loop_cnt: 0, cur_used: false, cur_func: None, cur_class: None, cur_var_def: None, alloc }); 19 | s.program(p); 20 | if !s.errors.0.is_empty() { return Err(s.0.errors.sorted()); } 21 | let mut t = TypePass(s.0); 22 | t.program(p); 23 | if !t.errors.0.is_empty() { return Err(t.0.errors.sorted()); } 24 | Ok(()) 25 | } 26 | 27 | struct TypeCk<'a> { 28 | errors: Errors<'a, Ty<'a>>, 29 | scopes: ScopeStack<'a>, 30 | loop_cnt: u32, 31 | // `cur_used` is only used to determine 2 kinds of errors: 32 | // Class.var (cur_used == true) => BadFieldAssess; Class (cur_used == false) => UndeclaredVar 33 | cur_used: bool, 34 | cur_func: Option<&'a FuncDef<'a>>, 35 | cur_class: Option<&'a ClassDef<'a>>, 36 | // actually only use cur_var_def's loc 37 | // if cur_var_def is Some, will use it's loc to search for symbol in TypePass::var_sel 38 | // this can reject code like `int a = a;` 39 | cur_var_def: Option<&'a VarDef<'a>>, 40 | alloc: &'a TypeCkAlloc<'a>, 41 | } 42 | 43 | impl<'a> TypeCk<'a> { 44 | // is_arr can be helpful if you want the type of array while only having its element type (to avoid cloning other fields) 45 | fn ty(&mut self, s: &SynTy<'a>, is_arr: bool) -> Ty<'a> { 46 | let kind = match &s.kind { 47 | SynTyKind::Int => TyKind::Int, 48 | SynTyKind::Bool => TyKind::Bool, 49 | SynTyKind::String => TyKind::String, 50 | SynTyKind::Void => TyKind::Void, 51 | SynTyKind::Named(name) => if let Some(c) = self.scopes.lookup_class(name) { 52 | TyKind::Object(Ref(c)) 53 | } else { self.issue(s.loc, NoSuchClass(name)) }, 54 | }; 55 | match kind { 56 | TyKind::Error => Ty::error(), 57 | TyKind::Void if s.arr != 0 => self.issue(s.loc, VoidArrayElement), 58 | _ => Ty { arr: s.arr + (is_arr as u32), kind } 59 | } 60 | } 61 | } 62 | 63 | impl<'a> Deref for TypeCk<'a> { 64 | type Target = Errors<'a, Ty<'a>>; 65 | fn deref(&self) -> &Self::Target { &self.errors } 66 | } 67 | 68 | impl<'a> DerefMut for TypeCk<'a> { 69 | fn deref_mut(&mut self) -> &mut Self::Target { &mut self.errors } 70 | } 71 | 72 | trait TypeCkTrait<'a> { 73 | fn scoped R, R>(&mut self, s: ScopeOwner<'a>, f: F) -> R; 74 | } 75 | 76 | impl<'a, T: DerefMut>> TypeCkTrait<'a> for T { 77 | fn scoped R, R>(&mut self, s: ScopeOwner<'a>, mut f: F) -> R { 78 | self.deref_mut().scopes.open(s); 79 | let ret = f(self); 80 | self.deref_mut().scopes.close(); 81 | ret 82 | } 83 | } -------------------------------------------------------------------------------- /typeck/src/scope_stack.rs: -------------------------------------------------------------------------------- 1 | use std::iter; 2 | use common::Loc; 3 | use syntax::{ScopeOwner, Symbol, ClassDef, Program}; 4 | 5 | pub(crate) struct ScopeStack<'a> { 6 | // `global` must be ScopeOwner::Global, but we will not depend on this, so just define it as ScopeOwner 7 | global: ScopeOwner<'a>, 8 | stack: Vec>, 9 | } 10 | 11 | impl<'a> ScopeStack<'a> { 12 | pub fn new(p: &'a Program<'a>) -> Self { 13 | Self { global: ScopeOwner::Global(p), stack: vec![] } 14 | } 15 | 16 | pub fn lookup(&self, name: &'a str) -> Option<(Symbol<'a>, ScopeOwner<'a>)> { 17 | self.stack.iter().rev().chain(iter::once(&self.global)) 18 | .filter_map(|&owner| owner.scope().get(name).map(|&sym| (sym, owner))) 19 | .next() 20 | } 21 | 22 | // do lookup, but will ignore those local symbols whose loc >= the given loc 23 | pub fn lookup_before(&self, name: &'a str, loc: Loc) -> Option> { 24 | self.stack.iter().rev().chain(iter::once(&self.global)) 25 | .filter_map(|&owner| owner.scope().get(name).copied().filter(|sym| !(owner.is_local() && sym.loc() >= loc))) 26 | .next() 27 | } 28 | 29 | pub fn declare(&mut self, sym: Symbol<'a>) { 30 | self.cur_owner().scope_mut().insert(sym.name(), sym); 31 | } 32 | 33 | // if `owner` is ScopeOwner::Class, then will recursively open all its ancestors 34 | pub fn open(&mut self, owner: ScopeOwner<'a>) { 35 | if let ScopeOwner::Class(c) = owner { 36 | if let Some(p) = c.parent_ref.get() { 37 | self.open(ScopeOwner::Class(p)); 38 | } 39 | } 40 | self.stack.push(owner); 41 | } 42 | 43 | // the global scope is not affected 44 | pub fn close(&mut self) { 45 | let owner = self.stack.pop().unwrap(); 46 | if let ScopeOwner::Class(_) = owner { 47 | self.stack.clear(); // all scopes in the stack is its ancestors 48 | } 49 | } 50 | 51 | pub fn cur_owner(&self) -> ScopeOwner<'a> { 52 | *self.stack.last().unwrap_or(&self.global) 53 | } 54 | 55 | pub fn lookup_class(&self, name: &'a str) -> Option<&'a ClassDef<'a>> { 56 | self.global.scope().get(name).map(|class| match class { 57 | Symbol::Class(c) => *c, 58 | _ => unreachable!("Global scope should only contain classes."), 59 | }) 60 | } 61 | } -------------------------------------------------------------------------------- /typeck/src/symbol_pass.rs: -------------------------------------------------------------------------------- 1 | use crate::{TypeCk, TypeCkTrait}; 2 | use common::{ErrorKind::*, Ref, MAIN_CLASS, MAIN_METHOD, NO_LOC, HashMap, HashSet}; 3 | use syntax::{ast::*, ScopeOwner, Symbol, Ty}; 4 | use std::{ops::{Deref, DerefMut}, iter}; 5 | use hashbrown::hash_map::Entry; 6 | 7 | pub(crate) struct SymbolPass<'a>(pub TypeCk<'a>); 8 | 9 | // some boilerplate code... 10 | impl<'a> Deref for SymbolPass<'a> { 11 | type Target = TypeCk<'a>; 12 | fn deref(&self) -> &Self::Target { &self.0 } 13 | } 14 | 15 | impl<'a> DerefMut for SymbolPass<'a> { 16 | fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } 17 | } 18 | 19 | impl<'a> SymbolPass<'a> { 20 | pub fn program(&mut self, p: &'a Program<'a>) { 21 | // the global scope is already opened, so no need to open it here 22 | for c in &p.class { 23 | if let Some(prev) = self.scopes.lookup_class(c.name) { 24 | self.issue(c.loc, ConflictDeclaration { prev: prev.loc, name: c.name }) 25 | } else { 26 | self.scopes.declare(Symbol::Class(c)); 27 | } 28 | } 29 | for c in &p.class { 30 | if let Some(p) = c.parent { 31 | c.parent_ref.set(self.scopes.lookup_class(p)); 32 | if c.parent_ref.get().is_none() { self.issue(c.loc, NoSuchClass(p)) } 33 | } 34 | } 35 | // detect cyclic inheritance 36 | let mut vis = HashMap::new(); 37 | for (idx, c) in p.class.iter().enumerate() { 38 | let mut c = *c; 39 | let mut last = c; // this assignment is useless, the value of `last` never comes from it when used 40 | loop { 41 | match vis.entry(Ref(c)) { 42 | Entry::Vacant(v) => { 43 | v.insert(idx); 44 | if let Some(p) = c.parent_ref.get() { (last = c, c = p); } else { break; } 45 | } 46 | Entry::Occupied(o) => { 47 | if *o.get() == idx { self.issue(last.loc, CyclicInheritance) } 48 | break; 49 | } 50 | } 51 | } 52 | } 53 | // errors related to inheritance are considered as fatal errors, return after these checks if a error occurred 54 | if !self.errors.0.is_empty() { return; } 55 | let mut checked = HashSet::new(); 56 | for c in &p.class { 57 | self.class_def(c, &mut checked); 58 | if c.name == MAIN_CLASS { p.main.set(Some(c)); } 59 | } 60 | if p.main.get().map(|c| match c.scope.borrow().get(MAIN_METHOD) { 61 | Some(Symbol::Func(main)) if main.static_ && main.param.is_empty() && main.ret_ty() == Ty::void() => false, _ => true 62 | }).unwrap_or(true) { self.issue(NO_LOC, NoMainClass) } 63 | } 64 | 65 | fn class_def(&mut self, c: &'a ClassDef<'a>, checked: &mut HashSet>>) { 66 | if !checked.insert(Ref(c)) { return; } 67 | if let Some(p) = c.parent_ref.get() { self.class_def(p, checked); } 68 | self.cur_class = Some(c); 69 | self.scoped(ScopeOwner::Class(c), |s| for f in &c.field { 70 | match f { FieldDef::FuncDef(f) => s.func_def(f), FieldDef::VarDef(v) => s.var_def(v) }; 71 | }); 72 | } 73 | 74 | fn func_def(&mut self, f: &'a FuncDef<'a>) { 75 | let ret_ty = self.ty(&f.ret, false); 76 | self.scoped(ScopeOwner::Param(f), |s| { 77 | if !f.static_ { s.scopes.declare(Symbol::This(f)); } 78 | for v in &f.param { s.var_def(v); } 79 | s.block(&f.body); 80 | }); 81 | let ret_param_ty = iter::once(ret_ty).chain(f.param.iter().map(|v| v.ty.get())); 82 | let ret_param_ty = self.alloc.ty.alloc_extend(ret_param_ty); 83 | f.ret_param_ty.set(Some(ret_param_ty)); 84 | f.class.set(self.cur_class); 85 | let ok = if let Some((sym, owner)) = self.scopes.lookup(f.name) { 86 | match (self.scopes.cur_owner(), owner) { 87 | (ScopeOwner::Class(c), ScopeOwner::Class(p)) if Ref(c) != Ref(p) => { 88 | match sym { 89 | Symbol::Func(pf) => { 90 | if f.static_ || pf.static_ { 91 | self.issue(f.loc, ConflictDeclaration { prev: pf.loc, name: f.name }) 92 | } else if !Ty::mk_func(f).assignable_to(Ty::mk_func(pf)) { 93 | self.issue(f.loc, OverrideMismatch { func: f.name, p: p.name }) 94 | } else { true } 95 | } 96 | _ => self.issue(f.loc, ConflictDeclaration { prev: sym.loc(), name: f.name }), 97 | } 98 | } 99 | _ => self.issue(f.loc, ConflictDeclaration { prev: sym.loc(), name: f.name }), 100 | } 101 | } else { true }; 102 | if ok { self.scopes.declare(Symbol::Func(f)); } 103 | } 104 | 105 | fn var_def(&mut self, v: &'a VarDef<'a>) { 106 | v.ty.set(self.ty(&v.syn_ty, false)); 107 | if v.ty.get() == Ty::void() { self.issue(v.loc, VoidVar(v.name)) } 108 | let ok = if let Some((sym, owner)) = self.scopes.lookup(v.name) { 109 | match (self.scopes.cur_owner(), owner) { 110 | (ScopeOwner::Class(c1), ScopeOwner::Class(c2)) if Ref(c1) != Ref(c2) && sym.is_var() => 111 | self.issue(v.loc, OverrideVar(v.name)), 112 | (ScopeOwner::Class(_), ScopeOwner::Class(_)) | (_, ScopeOwner::Param(_)) | (_, ScopeOwner::Local(_)) => 113 | self.issue(v.loc, ConflictDeclaration { prev: sym.loc(), name: v.name }), 114 | _ => true, 115 | } 116 | } else { true }; 117 | if ok { 118 | v.owner.set(Some(self.scopes.cur_owner())); 119 | self.scopes.declare(Symbol::Var(v)); 120 | } 121 | } 122 | 123 | fn block(&mut self, b: &'a Block<'a>) { 124 | self.scoped(ScopeOwner::Local(b), |s| for st in &b.stmt { s.stmt(st); }); 125 | } 126 | 127 | fn stmt(&mut self, s: &'a Stmt<'a>) { 128 | match &s.kind { 129 | StmtKind::LocalVarDef(v) => self.var_def(v), 130 | StmtKind::If(i) => { 131 | self.block(&i.on_true); 132 | if let Some(of) = &i.on_false { self.block(of); } 133 | } 134 | StmtKind::While(w) => self.block(&w.body), 135 | StmtKind::For(f) => self.scoped(ScopeOwner::Local(&f.body), |s| { 136 | s.stmt(&f.init); 137 | s.stmt(&f.update); 138 | for st in &f.body.stmt { s.stmt(st); } 139 | }), 140 | StmtKind::Block(b) => self.block(b), 141 | _ => {} 142 | }; 143 | } 144 | } -------------------------------------------------------------------------------- /typeck/src/type_pass.rs: -------------------------------------------------------------------------------- 1 | use crate::{TypeCk, TypeCkTrait}; 2 | use common::{ErrorKind::*, Loc, LENGTH, BinOp, UnOp, ErrorKind, Ref}; 3 | use syntax::ast::*; 4 | use syntax::{ScopeOwner, Symbol, ty::*}; 5 | use std::ops::{Deref, DerefMut}; 6 | 7 | pub(crate) struct TypePass<'a>(pub TypeCk<'a>); 8 | 9 | impl<'a> Deref for TypePass<'a> { 10 | type Target = TypeCk<'a>; 11 | fn deref(&self) -> &Self::Target { &self.0 } 12 | } 13 | 14 | impl<'a> DerefMut for TypePass<'a> { 15 | fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } 16 | } 17 | 18 | impl<'a> TypePass<'a> { 19 | pub fn program(&mut self, p: &'a Program<'a>) { 20 | for c in &p.class { self.class_def(c); } 21 | } 22 | 23 | fn class_def(&mut self, c: &'a ClassDef<'a>) { 24 | self.cur_class = Some(c); 25 | self.scoped(ScopeOwner::Class(c), |s| for f in &c.field { 26 | if let FieldDef::FuncDef(f) = f { 27 | s.cur_func = Some(f); 28 | let ret = s.scoped(ScopeOwner::Param(f), |s| s.block(&f.body)); 29 | if !ret && f.ret_ty() != Ty::void() { s.issue(f.body.loc, ErrorKind::NoReturn) } 30 | }; 31 | }); 32 | } 33 | 34 | fn block(&mut self, b: &'a Block<'a>) -> bool { 35 | let mut ret = false; 36 | self.scoped(ScopeOwner::Local(b), |s| for st in &b.stmt { ret = s.stmt(st); }); 37 | ret 38 | } 39 | 40 | // return whether this stmt has a return value 41 | fn stmt(&mut self, s: &'a Stmt<'a>) -> bool { 42 | match &s.kind { 43 | StmtKind::Assign(a) => { 44 | let (l, r) = (self.expr(&a.dst), self.expr(&a.src)); 45 | if !r.assignable_to(l) { self.issue(s.loc, IncompatibleBinary { l, op: "=", r }) } 46 | false 47 | } 48 | StmtKind::LocalVarDef(v) => { 49 | self.cur_var_def = Some(v); 50 | if let Some((loc, e)) = &v.init { 51 | let (l, r) = (v.ty.get(), self.expr(e)); 52 | if !r.assignable_to(l) { self.issue(*loc, IncompatibleBinary { l, op: "=", r }) } 53 | } 54 | self.cur_var_def = None; 55 | false 56 | } 57 | StmtKind::ExprEval(e) => { 58 | self.expr(e); 59 | false 60 | } 61 | StmtKind::Skip(_) => false, 62 | StmtKind::If(i) => { 63 | self.check_bool(&i.cond); 64 | // `&` is not short-circuit evaluated 65 | self.block(&i.on_true) & i.on_false.as_ref().map(|b| self.block(b)).unwrap_or(false) 66 | } 67 | StmtKind::While(w) => { 68 | self.check_bool(&w.cond); 69 | self.loop_cnt += 1; 70 | self.block(&w.body); 71 | self.loop_cnt -= 1; 72 | false 73 | } 74 | StmtKind::For(f) => self.scoped(ScopeOwner::Local(&f.body), |s| { 75 | s.stmt(&f.init); 76 | s.check_bool(&f.cond); 77 | s.stmt(&f.update); 78 | s.loop_cnt += 1; 79 | for st in &f.body.stmt { s.stmt(st); } // not calling block(), because the scope is already opened 80 | s.loop_cnt -= 1; 81 | false 82 | }), 83 | StmtKind::Return(r) => { 84 | let expect = self.cur_func.unwrap().ret_ty(); 85 | let actual = r.as_ref().map(|e| self.expr(e)).unwrap_or(Ty::void()); 86 | if !actual.assignable_to(expect) { self.issue(s.loc, ReturnMismatch { actual, expect }) } 87 | actual != Ty::void() 88 | } 89 | StmtKind::Print(p) => { 90 | for (i, e) in p.iter().enumerate() { 91 | let ty = self.expr(e); 92 | if ty != Ty::bool() && ty != Ty::int() && ty != Ty::string() { 93 | ty.error_or(|| self.issue(e.loc, BadPrintArg { loc: i as u32 + 1, ty })) 94 | } 95 | } 96 | false 97 | } 98 | StmtKind::Break(_) => { 99 | if self.loop_cnt == 0 { self.issue(s.loc, BreakOutOfLoop) } 100 | false 101 | } 102 | StmtKind::Block(b) => self.block(b), 103 | } 104 | } 105 | 106 | // e.ty is set to the return value 107 | fn expr(&mut self, e: &'a Expr<'a>) -> Ty<'a> { 108 | use ExprKind::*; 109 | let ty = match &e.kind { 110 | VarSel(v) => self.var_sel(v, e.loc), 111 | IndexSel(i) => { 112 | let (arr, idx) = (self.expr(&i.arr), self.expr(&i.idx)); 113 | if idx != Ty::int() { idx.error_or(|| self.issue(e.loc, IndexNotInt)) } 114 | match arr { 115 | Ty { arr, kind } if arr > 0 => Ty { arr: arr - 1, kind }, 116 | e => e.error_or(|| self.issue(i.arr.loc, IndexNotArray)), 117 | } 118 | } 119 | IntLit(_) | ReadInt(_) => Ty::int(), BoolLit(_) => Ty::bool(), StringLit(_) | ReadLine(_) => Ty::string(), NullLit(_) => Ty::null(), 120 | Call(c) => self.call(c, e.loc), 121 | Unary(u) => { 122 | let r = self.expr(&u.r); 123 | let (ty, op) = match u.op { UnOp::Neg => (Ty::int(), "-"), UnOp::Not => (Ty::bool(), "!"), }; 124 | if r != ty { r.error_or(|| self.issue(e.loc, IncompatibleUnary { op, r })) } 125 | ty 126 | } 127 | Binary(b) => { 128 | use BinOp::*; 129 | let (l, r) = (self.expr(&b.l), self.expr(&b.r)); 130 | if l == Ty::error() || r == Ty::error() { 131 | // not using wildcard match, so that if we add new operators in the future, compiler can tell us 132 | match b.op { Add | Sub | Mul | Div | Mod => Ty::int(), And | Or | Eq | Ne | Lt | Le | Gt | Ge => Ty::bool() } 133 | } else { 134 | let (ret, ok) = match b.op { 135 | Add | Sub | Mul | Div | Mod => (Ty::int(), l == Ty::int() && r == Ty::int()), 136 | Lt | Le | Gt | Ge => (Ty::bool(), l == Ty::int() && r == Ty::int()), 137 | Eq | Ne => (Ty::bool(), l.assignable_to(r) || r.assignable_to(l)), 138 | And | Or => (Ty::bool(), l == Ty::bool() && r == Ty::bool()) 139 | }; 140 | if !ok { self.issue(e.loc, IncompatibleBinary { l, op: b.op.to_op_str(), r }) } 141 | ret 142 | } 143 | } 144 | This(_) => { 145 | if self.cur_func.unwrap().static_ { self.issue(e.loc, ThisInStatic) } 146 | Ty::mk_obj(self.cur_class.unwrap()) 147 | } 148 | NewClass(n) => if let Some(c) = self.scopes.lookup_class(n.name) { 149 | n.class.set(Some(c)); 150 | Ty::mk_obj(c) 151 | } else { self.issue(e.loc, NoSuchClass(n.name)) }, 152 | NewArray(n) => { 153 | let len = self.expr(&n.len); 154 | if len != Ty::int() { len.error_or(|| self.issue(n.len.loc, NewArrayNotInt)) } 155 | self.ty(&n.elem, true) 156 | } 157 | ClassTest(c) => { 158 | let src = self.expr(&c.expr); 159 | if !src.is_object() { src.error_or(|| self.issue(e.loc, NotObject(src))) } 160 | if let Some(cl) = self.scopes.lookup_class(c.name) { 161 | c.class.set(Some(cl)); 162 | Ty::bool() 163 | } else { self.issue(e.loc, NoSuchClass(c.name)) } 164 | } 165 | ClassCast(c) => { 166 | let src = self.expr(&c.expr); 167 | if !src.is_object() { src.error_or(|| self.issue(e.loc, NotObject(src))) } 168 | if let Some(cl) = self.scopes.lookup_class(c.name) { 169 | c.class.set(Some(cl)); 170 | Ty::mk_obj(cl) 171 | } else { self.issue(e.loc, NoSuchClass(c.name)) } 172 | } 173 | }; 174 | e.ty.set(ty); 175 | ty 176 | } 177 | 178 | fn var_sel(&mut self, v: &'a VarSel<'a>, loc: Loc) -> Ty<'a> { 179 | // (no owner)not_found_var / ClassName(no field) / (no owner)method => UndeclaredVar 180 | // object.not_found_var => NoSuchField 181 | // (no owner)field_var && cur function is static => RefInStatic 182 | // .a (e.g.: Class.a, 1.a) / object.method => BadFieldAccess 183 | // object.field_var, where object's class is not self or any of ancestors => PrivateFieldAccess 184 | 185 | if let Some(owner) = &v.owner { 186 | self.cur_used = true; 187 | let owner = self.expr(owner); 188 | self.cur_used = false; 189 | match owner { 190 | Ty { arr: 0, kind: TyKind::Object(Ref(c)) } => if let Some(sym) = c.lookup(v.name) { 191 | match sym { 192 | Symbol::Var(var) => { 193 | v.var.set(Some(var)); 194 | // only allow self & descendents to access field 195 | if !self.cur_class.unwrap().extends(c) { 196 | self.issue(loc, PrivateFieldAccess { name: v.name, owner }) 197 | } 198 | var.ty.get() 199 | } 200 | _ => self.issue(loc, BadFieldAccess { name: v.name, owner }), 201 | } 202 | } else { self.issue(loc, NoSuchField { name: v.name, owner }) }, 203 | e => e.error_or(|| self.issue(loc, BadFieldAccess { name: v.name, owner })), 204 | } 205 | } else { 206 | // if this stmt is in an VarDef, it cannot access the variable that is being declared 207 | if let Some(sym) = self.scopes.lookup_before(v.name, self.cur_var_def.map(|v| v.loc).unwrap_or(loc)) { 208 | match sym { 209 | Symbol::Var(var) => { 210 | v.var.set(Some(var)); 211 | if var.owner.get().unwrap().is_class() { 212 | let cur = self.cur_func.unwrap(); 213 | if cur.static_ { 214 | self.issue(loc, RefInStatic { field: v.name, func: cur.name }) 215 | } 216 | } 217 | var.ty.get() 218 | } 219 | Symbol::Class(c) if self.cur_used => { Ty::mk_class(c) } 220 | _ => self.issue(loc, UndeclaredVar(v.name)), 221 | } 222 | } else { self.issue(loc, UndeclaredVar(v.name)) } 223 | } 224 | } 225 | 226 | fn call(&mut self, c: &'a Call<'a>, loc: Loc) -> Ty<'a> { 227 | let v = if let ExprKind::VarSel(v) = &c.func.kind { v } else { unimplemented!() }; 228 | let owner = if let Some(owner) = &v.owner { 229 | self.cur_used = true; 230 | let owner = self.expr(owner); 231 | self.cur_used = false; 232 | if owner == Ty::error() { return Ty::error(); } 233 | if v.name == LENGTH && owner.is_arr() { 234 | if !c.arg.is_empty() { 235 | self.issue(loc, LengthWithArgument(c.arg.len() as u32)) 236 | } 237 | return Ty::int(); 238 | } 239 | owner 240 | } else { Ty::mk_obj(self.cur_class.unwrap()) }; 241 | match owner { 242 | Ty { arr: 0, kind: TyKind::Object(Ref(cl)) } | Ty { arr: 0, kind: TyKind::Class(Ref(cl)) } => { 243 | if let Some(sym) = cl.lookup(v.name) { 244 | match sym { 245 | Symbol::Func(f) => { 246 | c.func_ref.set(Some(f)); 247 | if owner.is_class() && !f.static_ { 248 | // Class.not_static_method() 249 | self.issue(loc, BadFieldAccess { name: v.name, owner }) 250 | } 251 | if v.owner.is_none() { 252 | let cur = self.cur_func.unwrap(); 253 | if cur.static_ && !f.static_ { 254 | self.issue(loc, RefInStatic { field: f.name, func: cur.name }) 255 | } 256 | } 257 | self.check_arg_param(&c.arg, f.ret_param_ty.get().unwrap(), f.name, loc) 258 | } 259 | _ => self.issue(loc, NotFunc { name: v.name, owner }), 260 | } 261 | } else { self.issue(loc, NoSuchField { name: v.name, owner }) } 262 | } 263 | _ => self.issue(loc, BadFieldAccess { name: v.name, owner }), 264 | } 265 | } 266 | } 267 | 268 | impl<'a> TypePass<'a> { 269 | fn check_bool(&mut self, e: &'a Expr<'a>) { 270 | let ty = self.expr(e); 271 | if ty != Ty::bool() { ty.error_or(|| self.issue(e.loc, TestNotBool)) } 272 | } 273 | 274 | fn check_arg_param(&mut self, arg: &'a [Expr<'a>], ret_param: &[Ty<'a>], name: &'a str, loc: Loc) -> Ty<'a> { 275 | let (ret, param) = (ret_param[0], &ret_param[1..]); 276 | if param.len() != arg.len() { 277 | self.issue(loc, ArgcMismatch { name, expect: param.len() as u32, actual: arg.len() as u32 }) 278 | } 279 | for (idx, arg0) in arg.iter().enumerate() { 280 | let arg = self.expr(arg0); 281 | if let Some(¶m) = param.get(idx) { 282 | if !arg.assignable_to(param) { 283 | self.issue(arg0.loc, ArgMismatch { loc: idx as u32 + 1, arg, param }) 284 | } 285 | } 286 | } 287 | ret 288 | } 289 | } --------------------------------------------------------------------------------