├── rustfmt.toml ├── .gitignore ├── src ├── lib │ ├── asm │ │ ├── mod.rs │ │ ├── dump.rs │ │ ├── symtab.rs │ │ └── chunk.rs │ ├── vm │ │ ├── mod.rs │ │ ├── dict.rs │ │ ├── num.rs │ │ ├── obj.rs │ │ ├── gc.rs │ │ └── exec.rs │ ├── mod.rs │ ├── visitor.rs │ ├── source.rs │ ├── token.rs │ ├── ast.rs │ └── lexer.rs └── main.rs ├── Cargo.toml ├── README.md └── Cargo.lock /rustfmt.toml: -------------------------------------------------------------------------------- 1 | tab_spaces = 2 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | .idea 4 | -------------------------------------------------------------------------------- /src/lib/asm/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod chunk; 2 | pub mod codegen; 3 | pub mod dump; 4 | pub mod symtab; 5 | -------------------------------------------------------------------------------- /src/lib/vm/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod dict; 2 | pub mod exec; 3 | pub mod gc; 4 | pub mod num; 5 | pub mod obj; 6 | -------------------------------------------------------------------------------- /src/lib/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod asm; 2 | pub mod ast; 3 | pub mod lexer; 4 | pub mod parser; 5 | pub mod source; 6 | pub mod token; 7 | pub mod visitor; 8 | pub mod vm; 9 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "naive" 3 | version = "0.1.0" 4 | authors = ["hsiaosiyuan0 "] 5 | edition = "2018" 6 | 7 | [lib] 8 | name = "lib" 9 | path = "src/lib/mod.rs" 10 | 11 | [dependencies] 12 | unic-ucd = "0.9.0" 13 | serde = { version = "1.0", features = ["derive"] } 14 | serde_yaml = "0.8" 15 | byteorder = "1" 16 | linked_hash_set = "0.1.3" 17 | -------------------------------------------------------------------------------- /src/lib/vm/dict.rs: -------------------------------------------------------------------------------- 1 | use crate::vm::gc::*; 2 | 3 | impl JsDict { 4 | pub fn set(&mut self, k: JsObjPtr, v: JsObjPtr) { 5 | let k = as_str(k).d.as_str(); 6 | as_obj(v).inc(); 7 | match self.d.get(k) { 8 | Some(old) => as_obj(*old).dec(), 9 | _ => (), 10 | } 11 | self.d.insert(k.to_owned(), v); 12 | } 13 | 14 | pub fn get(&mut self, k: JsObjPtr) -> JsObjPtr { 15 | let gc = as_gc(as_obj(self).gc()); 16 | let k = as_str(k).d.as_str(); 17 | match self.d.get(k) { 18 | Some(v) => *v, 19 | None => gc.js_undef(), 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate lib; 2 | use lib::asm::codegen::Codegen; 3 | use lib::vm::exec::Vm; 4 | use std::path::Path; 5 | use std::process::exit; 6 | use std::{env, fs}; 7 | 8 | fn main() { 9 | let args: Vec = env::args().collect(); 10 | let file = match args.get(1) { 11 | Some(arg) => arg, 12 | _ => exit({ 13 | println!("missing file"); 14 | 1 15 | }), 16 | }; 17 | let file = Path::new(file); 18 | let src = match fs::read_to_string(file) { 19 | Ok(src) => src, 20 | Err(e) => exit({ 21 | format!("unable to read file: {:?}", e); 22 | 1 23 | }), 24 | }; 25 | let chk = Codegen::gen(src.as_str()); 26 | let mut vm = Vm::new(chk, (1 << 20) * 100); 27 | match vm.exec() { 28 | Err(e) => exit({ 29 | eprintln!("error: {:?}", e); 30 | 1 31 | }), 32 | Ok(_) => (), 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Naive 2 | 3 | This is a naive JavaScript engine implemented in pure Rust to improve my JavaScript knowledge. 4 | 5 | [![asciicast](https://asciinema.org/a/NSyybvRaFimC4OLIDtarKJkII.svg)](https://asciinema.org/a/NSyybvRaFimC4OLIDtarKJkII) 6 | 7 | ## Checklist 8 | 9 | ### Frontend 10 | 11 | * [x] Lexer 12 | * [x] Recursive descent parser with [Operator Precedence Algorithm](https://en.wikipedia.org/wiki/Operator-precedence_parser) to optimize operators parsing 13 | * [x] Generates abstract syntax tree on basis of [ESTree](https://github.com/estree/estree) 14 | 15 | ### Intermediate 16 | 17 | * [x] Generates bytecode(opcode) from AST 18 | * [ ] Bytecode analysis 19 | * [ ] Optimizes bytecode according to the analysis result 20 | 21 | ### VM 22 | 23 | * [x] Reference-counting GC with Mark-and-Sweep to deal with the reference cycle 24 | * [x] Common Flow Control Statements(if-else, for, while, do-while) 25 | * [x] Closure 26 | * [x] Mathematical Operations 27 | * [x] Function Invocation 28 | * [x] Logical Operations 29 | * [ ] Bitwise Operations 30 | * [x] Object Literal 31 | * [x] New Expression 32 | * [x] Member Access Expression 33 | * [ ] For-in Statement 34 | * [ ] Switch Case Statement 35 | * [ ] Try-catch Statement 36 | * [ ] Debug 37 | 38 | ## How to run 39 | 40 | Thanks to the Rust development infrastructure, run this engine from source is very easy, just: 41 | 42 | ``` 43 | $ cargo build 44 | $ cargo run your-script.js 45 | ``` 46 | 47 | It's still in development, after it’s completed I’d like to write a series of posts to describe how to build a JavaScript engine from scratch, stay tuned. 48 | 49 | Feel free to leave any response by submitting a issue. Star/Fork to subscribe its updates is welcome. -------------------------------------------------------------------------------- /src/lib/vm/num.rs: -------------------------------------------------------------------------------- 1 | use crate::vm::gc::*; 2 | use std::f64; 3 | 4 | impl JsNumber { 5 | pub fn add(&mut self, b: JsNumPtr) -> JsNumPtr { 6 | let gc = as_gc(as_obj(self).gc()); 7 | let n = gc.new_num(false); 8 | let a = self.d; 9 | let b = as_num(b).d; 10 | as_num(n).d = a + b; 11 | n 12 | } 13 | 14 | pub fn sub(&mut self, b: JsNumPtr) -> JsNumPtr { 15 | let gc = as_gc(as_obj(self).gc()); 16 | let n = gc.new_num(false); 17 | let a = self.d; 18 | let b = as_num(b).d; 19 | as_num(n).d = a - b; 20 | n 21 | } 22 | 23 | pub fn mul(&mut self, b: JsNumPtr) -> JsNumPtr { 24 | let gc = as_gc(as_obj(self).gc()); 25 | let n = gc.new_num(false); 26 | let a = self.d; 27 | let b = as_num(b).d; 28 | as_num(n).d = a * b; 29 | n 30 | } 31 | 32 | pub fn div(&mut self, b: JsNumPtr) -> JsNumPtr { 33 | let gc = as_gc(as_obj(self).gc()); 34 | let n = gc.new_num(false); 35 | let a = self.d; 36 | let b = as_num(b).d; 37 | as_num(n).d = a / b; 38 | n 39 | } 40 | 41 | pub fn modulo(&mut self, b: JsNumPtr) -> JsNumPtr { 42 | let gc = as_gc(as_obj(self).gc()); 43 | let n = gc.new_num(false); 44 | let a = self.d; 45 | let b = as_num(b).d; 46 | as_num(n).d = a % b; 47 | n 48 | } 49 | 50 | pub fn eq(&mut self, b: JsNumPtr) -> bool { 51 | let a = self.d; 52 | let b = as_num(b).d; 53 | a == b 54 | } 55 | 56 | pub fn set_v_str(&mut self, b: JsStrPtr) { 57 | self.d = match as_str(b).d.parse().ok() { 58 | Some(v) => v, 59 | _ => f64::NAN, 60 | } 61 | } 62 | 63 | pub fn set_v_bool(&mut self, b: JsObjPtr) { 64 | let gc = as_gc(as_obj(self).gc()); 65 | let b = b as JsBoolPtr; 66 | self.d = if b == gc.js_true() { 1.0 } else { 0.0 } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/lib/asm/dump.rs: -------------------------------------------------------------------------------- 1 | extern crate byteorder; 2 | 3 | use crate::asm::chunk::*; 4 | use byteorder::{BigEndian, WriteBytesExt}; 5 | use std::io::Write; 6 | use std::mem; 7 | 8 | pub struct Dumper { 9 | buf: Vec, 10 | chk: Chunk, 11 | } 12 | 13 | impl Dumper { 14 | pub fn w_byte(&mut self, x: u8) { 15 | self.buf.write(&[x]).ok(); 16 | } 17 | 18 | pub fn w_i64(&mut self, x: i64) { 19 | self.buf.write(&x.to_be_bytes()).ok(); 20 | } 21 | 22 | pub fn w_u64(&mut self, x: u64) { 23 | self.buf.write(&x.to_be_bytes()).ok(); 24 | } 25 | 26 | pub fn w_u32(&mut self, x: u32) { 27 | self.buf.write(&x.to_be_bytes()).ok(); 28 | } 29 | 30 | pub fn w_size(&mut self, x: u64) { 31 | self.buf.write(&x.to_be_bytes()).ok(); 32 | } 33 | 34 | pub fn w_double(&mut self, x: f64) { 35 | let mut buf = [0u8; mem::size_of::()]; 36 | buf.as_mut().write_f64::(x).ok(); 37 | self.buf.write(&buf).ok(); 38 | } 39 | 40 | pub fn w_string(&mut self, s: &str) { 41 | self.w_size(s.len() as u64); 42 | self.buf.write(s.as_bytes()).ok(); 43 | } 44 | 45 | pub fn process(&mut self) { 46 | self.w_header(); 47 | self.w_byte(self.chk.upval_cnt); 48 | let ptr = &self.chk.fun_tpl as *const FnTpl; 49 | unsafe { self.w_fun_tpl(&(*ptr)) } 50 | } 51 | 52 | fn w_header(&mut self) { 53 | self.w_string(self.chk.sig); 54 | self.w_u64(self.chk.ver); 55 | } 56 | 57 | fn w_const(&mut self, c: &Const) { 58 | self.w_byte(c.typ_id()); 59 | match c { 60 | Const::String(v) => self.w_string(v.as_str()), 61 | Const::Number(v) => self.w_double(*v), 62 | } 63 | } 64 | 65 | fn w_upval(&mut self, u: &UpvalDesc) { 66 | self.w_byte(u.in_stack as u8); 67 | self.w_u32(u.idx); 68 | } 69 | 70 | fn w_fun_tpl(&mut self, f: &FnTpl) { 71 | self.w_byte(f.param_cnt); 72 | self.w_byte(f.is_vararg as u8); 73 | self.w_code(&f.code); 74 | self.w_consts(&f.consts); 75 | self.w_upvals(&f.upvals); 76 | self.w_fun_tpls(&f.fun_tpls); 77 | } 78 | 79 | fn w_code(&mut self, insts: &Vec) { 80 | self.w_u64(insts.len() as u64); 81 | insts.iter().for_each(|inst| self.w_u32(inst.raw)); 82 | } 83 | 84 | fn w_consts(&mut self, cs: &Vec) { 85 | self.w_u64(cs.len() as u64); 86 | cs.iter().for_each(|c| self.w_const(c)); 87 | } 88 | 89 | fn w_upvals(&mut self, uvs: &Vec) { 90 | self.w_u64(uvs.len() as u64); 91 | uvs.iter().for_each(|uv| self.w_upval(uv)); 92 | } 93 | 94 | fn w_fun_tpls(&mut self, tpls: &Vec) { 95 | self.w_u64(tpls.len() as u64); 96 | tpls.iter().for_each(|tpl| self.w_fun_tpl(tpl)); 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/lib/visitor.rs: -------------------------------------------------------------------------------- 1 | use crate::ast::*; 2 | 3 | pub trait AstVisitor { 4 | fn prog(&mut self, prog: &Prog) -> Result; 5 | 6 | fn stmt(&mut self, stmt: &Stmt) -> Result { 7 | match stmt { 8 | Stmt::Block(s) => self.block_stmt(s), 9 | Stmt::VarDec(s) => self.var_dec_stmt(s), 10 | Stmt::Empty(s) => self.empty_stmt(s), 11 | Stmt::Expr(s) => self.expr_stmt(s), 12 | Stmt::If(s) => self.if_stmt(s), 13 | Stmt::For(s) => self.for_stmt(s), 14 | Stmt::ForIn(s) => self.for_in_stmt(s), 15 | Stmt::DoWhile(s) => self.do_while_stmt(s), 16 | Stmt::While(s) => self.while_stmt(s), 17 | Stmt::Cont(s) => self.cont_stmt(s), 18 | Stmt::Break(s) => self.break_stmt(s), 19 | Stmt::Return(s) => self.ret_stmt(s), 20 | Stmt::With(s) => self.with_stmt(s), 21 | Stmt::Switch(s) => self.switch_stmt(s), 22 | Stmt::Throw(s) => self.throw_stmt(s), 23 | Stmt::Try(s) => self.try_stmt(s), 24 | Stmt::Debugger(s) => self.debug_stmt(s), 25 | Stmt::Function(s) => self.fn_stmt(s), 26 | } 27 | } 28 | 29 | fn block_stmt(&mut self, stmt: &BlockStmt) -> Result; 30 | fn var_dec_stmt(&mut self, stmt: &VarDec) -> Result; 31 | fn empty_stmt(&mut self, stmt: &EmptyStmt) -> Result; 32 | fn expr_stmt(&mut self, stmt: &ExprStmt) -> Result; 33 | fn if_stmt(&mut self, stmt: &IfStmt) -> Result; 34 | fn for_stmt(&mut self, stmt: &ForStmt) -> Result; 35 | fn for_in_stmt(&mut self, stmt: &ForInStmt) -> Result; 36 | fn do_while_stmt(&mut self, stmt: &DoWhileStmt) -> Result; 37 | fn while_stmt(&mut self, stmt: &WhileStmt) -> Result; 38 | fn cont_stmt(&mut self, stmt: &ContStmt) -> Result; 39 | fn break_stmt(&mut self, stmt: &BreakStmt) -> Result; 40 | fn ret_stmt(&mut self, stmt: &ReturnStmt) -> Result; 41 | fn with_stmt(&mut self, stmt: &WithStmt) -> Result; 42 | fn switch_stmt(&mut self, stmt: &SwitchStmt) -> Result; 43 | fn throw_stmt(&mut self, stmt: &ThrowStmt) -> Result; 44 | fn try_stmt(&mut self, stmt: &TryStmt) -> Result; 45 | fn debug_stmt(&mut self, stmt: &DebugStmt) -> Result; 46 | fn fn_stmt(&mut self, stmt: &FnDec) -> Result; 47 | 48 | fn expr(&mut self, expr: &Expr) -> Result { 49 | match expr { 50 | Expr::Primary(ex) => self.primary_expr(ex), 51 | Expr::Member(ex) => self.member_expr(ex), 52 | Expr::New(ex) => self.new_expr(ex), 53 | Expr::Call(ex) => self.call_expr(ex), 54 | Expr::Unary(ex) => self.unary_expr(ex), 55 | Expr::Binary(ex) => self.binary_expr(ex), 56 | Expr::Assignment(ex) => self.assign_expr(ex), 57 | Expr::Conditional(ex) => self.cond_expr(ex), 58 | Expr::Sequence(ex) => self.seq_expr(ex), 59 | } 60 | } 61 | 62 | fn member_expr(&mut self, expr: &MemberExpr) -> Result; 63 | fn new_expr(&mut self, expr: &NewExpr) -> Result; 64 | fn call_expr(&mut self, expr: &CallExpr) -> Result; 65 | fn unary_expr(&mut self, expr: &UnaryExpr) -> Result; 66 | fn binary_expr(&mut self, expr: &BinaryExpr) -> Result; 67 | fn assign_expr(&mut self, expr: &AssignExpr) -> Result; 68 | fn cond_expr(&mut self, expr: &CondExpr) -> Result; 69 | fn seq_expr(&mut self, expr: &SeqExpr) -> Result; 70 | fn primary_expr(&mut self, expr: &PrimaryExpr) -> Result { 71 | match expr { 72 | PrimaryExpr::This(ex) => self.this_expr(ex), 73 | PrimaryExpr::Identifier(ex) => self.id_expr(ex), 74 | PrimaryExpr::Literal(ex) => self.literal(ex), 75 | PrimaryExpr::ArrayLiteral(ex) => self.array_literal(ex), 76 | PrimaryExpr::ObjectLiteral(ex) => self.object_literal(ex), 77 | PrimaryExpr::Parenthesized(ex) => self.paren_expr(ex), 78 | PrimaryExpr::Function(ex) => self.fn_expr(ex), 79 | } 80 | } 81 | 82 | fn this_expr(&mut self, expr: &ThisExprData) -> Result; 83 | fn id_expr(&mut self, expr: &IdData) -> Result; 84 | fn array_literal(&mut self, expr: &ArrayData) -> Result; 85 | fn object_literal(&mut self, expr: &ObjectData) -> Result; 86 | fn paren_expr(&mut self, expr: &ParenData) -> Result; 87 | fn fn_expr(&mut self, expr: &FnDec) -> Result; 88 | fn literal(&mut self, expr: &Literal) -> Result { 89 | match expr { 90 | Literal::RegExp(ex) => self.regexp_expr(ex), 91 | Literal::Null(ex) => self.null_expr(ex), 92 | Literal::Undef(ex) => self.undef_expr(ex), 93 | Literal::String(ex) => self.str_expr(ex), 94 | Literal::Bool(ex) => self.bool_expr(ex), 95 | Literal::Numeric(ex) => self.num_expr(ex), 96 | } 97 | } 98 | 99 | fn regexp_expr(&mut self, expr: &RegExpData) -> Result; 100 | fn null_expr(&mut self, expr: &NullData) -> Result; 101 | fn undef_expr(&mut self, expr: &UndefData) -> Result; 102 | fn str_expr(&mut self, expr: &StringData) -> Result; 103 | fn bool_expr(&mut self, expr: &BoolData) -> Result; 104 | fn num_expr(&mut self, expr: &NumericData) -> Result; 105 | } 106 | -------------------------------------------------------------------------------- /src/lib/source.rs: -------------------------------------------------------------------------------- 1 | use std::collections::VecDeque; 2 | use std::str::Chars; 3 | 4 | pub struct Source<'a> { 5 | pub chs: Chars<'a>, 6 | pub peeked: VecDeque, 7 | pub line: i32, 8 | pub column: i32, 9 | } 10 | 11 | pub const EOL: char = '\n'; 12 | 13 | pub fn is_line_terminator(c: char) -> bool { 14 | let cc = c as u32; 15 | cc == 0x0a || cc == 0x0d || cc == 0x2028 || cc == 0x2029 16 | } 17 | 18 | impl<'a> Source<'a> { 19 | pub fn new(code: &'a String) -> Self { 20 | Source { 21 | chs: code.chars(), 22 | peeked: VecDeque::with_capacity(3), 23 | line: 1, 24 | column: 0, 25 | } 26 | } 27 | 28 | fn next_join_crlf(&mut self) -> Option { 29 | match self.chs.next() { 30 | Some(c) => { 31 | if is_line_terminator(c) { 32 | if c == '\r' { 33 | if let Some(c) = self.chs.next() { 34 | if c != '\n' { 35 | self.peeked.push_back(c); 36 | } 37 | } 38 | } 39 | Some(EOL) 40 | } else { 41 | Some(c) 42 | } 43 | } 44 | _ => None, 45 | } 46 | } 47 | 48 | pub fn read(&mut self) -> Option { 49 | let c = match self.peeked.pop_front() { 50 | Some(c) => Some(c), 51 | _ => self.next_join_crlf(), 52 | }; 53 | if let Some(c) = c { 54 | if c == EOL { 55 | self.line += 1; 56 | self.column = 0; 57 | } else { 58 | self.column += 1; 59 | } 60 | } 61 | c 62 | } 63 | 64 | pub fn peek(&mut self) -> Option { 65 | match self.peeked.front().cloned() { 66 | Some(c) => Some(c), 67 | _ => match self.next_join_crlf() { 68 | Some(c) => { 69 | self.peeked.push_back(c); 70 | Some(c) 71 | } 72 | _ => None, 73 | }, 74 | } 75 | } 76 | 77 | pub fn test_ahead(&mut self, ch: char) -> bool { 78 | match self.peek() { 79 | Some(c) => c == ch, 80 | _ => false, 81 | } 82 | } 83 | 84 | pub fn test_ahead_or(&mut self, c1: char, c2: char) -> bool { 85 | match self.peek() { 86 | Some(c) => c == c1 || c == c2, 87 | _ => false, 88 | } 89 | } 90 | 91 | pub fn test_ahead_chs(&mut self, chs: &[char]) -> bool { 92 | let mut pass = true; 93 | for i in 0..self.peeked.len() { 94 | pass = match self.peeked.get(i) { 95 | Some(c) => *c == chs[i], 96 | _ => false, 97 | }; 98 | if !pass { 99 | return false; 100 | } 101 | } 102 | for i in self.peeked.len()..chs.len() { 103 | pass = match self.next_join_crlf() { 104 | Some(c) => { 105 | self.peeked.push_back(c); 106 | c == chs[i] 107 | } 108 | _ => false, 109 | }; 110 | if !pass { 111 | return false; 112 | } 113 | } 114 | pass 115 | } 116 | 117 | pub fn test_ahead2(&mut self, c1: char, c2: char) -> bool { 118 | self.test_ahead_chs(&[c1, c2]) 119 | } 120 | 121 | pub fn test_ahead3(&mut self, c1: char, c2: char, c3: char) -> bool { 122 | self.test_ahead_chs(&[c1, c2, c3]) 123 | } 124 | 125 | pub fn advance(&mut self) { 126 | self.read(); 127 | } 128 | 129 | pub fn advance2(&mut self) { 130 | self.read(); 131 | self.read(); 132 | } 133 | } 134 | 135 | #[cfg(test)] 136 | mod source_tests { 137 | use super::*; 138 | 139 | #[test] 140 | fn peekable_peek() { 141 | let code = String::from("hello world"); 142 | let mut src = Source::new(&code); 143 | assert_eq!('h', src.peek().unwrap()); 144 | assert_eq!('h', src.peek().unwrap()); 145 | src.read(); 146 | assert_eq!('e', src.peek().unwrap()); 147 | } 148 | 149 | #[test] 150 | fn peekable_ahead() { 151 | let code = String::from("hello world"); 152 | let mut src = Source::new(&code); 153 | assert!(src.test_ahead('h')); 154 | assert_eq!('h', src.peek().unwrap()); 155 | assert!(src.test_ahead_chs(&['h', 'e'])); 156 | assert_eq!('h', src.peek().unwrap()); 157 | src.read(); 158 | assert_eq!('e', src.peek().unwrap()); 159 | assert!(src.test_ahead_chs(&['e', 'l', 'l'])); 160 | src.read(); 161 | src.read(); 162 | src.read(); 163 | src.read(); 164 | assert_eq!(' ', src.peek().unwrap()); 165 | } 166 | 167 | #[test] 168 | fn join_crlf() { 169 | let code = String::from("1\u{0d}\u{0a}2\u{0d}3\u{0a}"); 170 | let mut src = Source::new(&code); 171 | assert!(src.test_ahead_chs(&['1', EOL])); 172 | src.read(); 173 | assert!(src.test_ahead(EOL)); 174 | assert_eq!(EOL, src.read().unwrap()); 175 | assert_eq!((2, 0), (src.line, src.column)); 176 | src.read(); 177 | src.read(); 178 | assert_eq!((3, 0), (src.line, src.column)); 179 | src.read(); 180 | src.read(); 181 | assert_eq!((4, 0), (src.line, src.column)); 182 | assert_eq!(None, src.read()); 183 | } 184 | 185 | #[test] 186 | fn line_terminator() { 187 | let code = String::from("\u{2028}\u{0a}\u{0d}\u{0a}"); 188 | let mut src = Source::new(&code); 189 | assert_eq!((1, 0), (src.line, src.column)); 190 | assert_eq!(EOL, src.read().unwrap()); 191 | assert_eq!((2, 0), (src.line, src.column)); 192 | assert_eq!(EOL, src.read().unwrap()); 193 | assert_eq!((3, 0), (src.line, src.column)); 194 | assert_eq!(EOL, src.read().unwrap()); 195 | assert_eq!((4, 0), (src.line, src.column)); 196 | } 197 | 198 | #[test] 199 | fn peek() { 200 | let code = String::from("\u{2028}\u{0a}\u{0d}\u{0a}"); 201 | let mut src = Source::new(&code); 202 | assert_eq!(EOL, src.peek().unwrap()); 203 | src.read(); 204 | assert_eq!(EOL, src.peek().unwrap()); 205 | src.read(); 206 | assert_eq!(EOL, src.peek().unwrap()); 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /src/lib/vm/obj.rs: -------------------------------------------------------------------------------- 1 | use crate::vm::gc::*; 2 | use std::f64; 3 | use std::ptr::eq; 4 | 5 | impl GcObj { 6 | pub fn pass_by_value(&self) -> bool { 7 | match self.kind { 8 | GcObjKind::String | GcObjKind::Number => true, 9 | _ => false, 10 | } 11 | } 12 | 13 | pub fn check_coercible(&self) -> bool { 14 | match self.kind { 15 | GcObjKind::Undef | GcObjKind::Null => false, 16 | _ => true, 17 | } 18 | } 19 | 20 | pub fn x_pass_by_value(&mut self) -> JsObjPtr { 21 | let gc = as_gc(self.gc()); 22 | match self.kind { 23 | GcObjKind::String => { 24 | let v = gc.new_str(false); 25 | as_str(v).d = as_str(self).d.clone(); 26 | as_obj_ptr(v) 27 | } 28 | GcObjKind::Number => { 29 | let v = gc.new_num(false); 30 | as_num(v).d = as_num(self).d; 31 | as_obj_ptr(v) 32 | } 33 | _ => panic!(), 34 | } 35 | } 36 | 37 | pub fn eqs_true(&mut self) -> bool { 38 | self.kind == GcObjKind::Boolean && as_bool(self).d 39 | } 40 | 41 | pub fn eqs_false(&mut self) -> bool { 42 | self.kind == GcObjKind::Boolean && !as_bool(self).d 43 | } 44 | 45 | pub fn t_pri(&mut self) -> JsObjPtr { 46 | // TODO:: call [[DefaultValue]] internal method 47 | as_obj(self).inc(); 48 | as_obj_ptr(self) 49 | } 50 | 51 | pub fn t_num(&mut self) -> JsNumPtr { 52 | let gc = as_gc(self.gc()); 53 | match self.kind { 54 | GcObjKind::Undef => { 55 | let n = gc.new_num(false); 56 | as_num(n).d = f64::NAN; 57 | n 58 | } 59 | GcObjKind::Null => gc.new_num(false), 60 | GcObjKind::Boolean => { 61 | let n = gc.new_num(false); 62 | let is_true = as_obj(self).eqs_true(); 63 | as_num(n).d = if is_true { 1.0 } else { 0.0 }; 64 | n 65 | } 66 | GcObjKind::Number => { 67 | self.inc(); 68 | as_obj_ptr(self) as JsNumPtr 69 | } 70 | GcObjKind::String => { 71 | let n = gc.new_num(false); 72 | as_num(n).set_v_str(as_str(self)); 73 | n 74 | } 75 | _ => as_obj(self.t_pri()).t_num(), 76 | } 77 | } 78 | 79 | pub fn t_bool(&mut self) -> JsBoolPtr { 80 | let gc = as_gc(self.gc()); 81 | match self.kind { 82 | GcObjKind::Undef => gc.js_false(), 83 | GcObjKind::Null => gc.js_false(), 84 | GcObjKind::Boolean => as_obj_ptr(self) as JsBoolPtr, 85 | GcObjKind::Number => { 86 | let n = as_num(self); 87 | if n.d == 0.0 || n.d.is_nan() { 88 | return gc.js_false(); 89 | } 90 | gc.js_true() 91 | } 92 | GcObjKind::String => { 93 | let s = as_str(self); 94 | if s.d.len() == 0 { 95 | return gc.js_false(); 96 | } 97 | gc.js_true() 98 | } 99 | _ => gc.js_true(), 100 | } 101 | } 102 | 103 | pub fn is_compound(&self) -> bool { 104 | match self.kind { 105 | GcObjKind::String 106 | | GcObjKind::Number 107 | | GcObjKind::Boolean 108 | | GcObjKind::Null 109 | | GcObjKind::Undef => false, 110 | _ => true, 111 | } 112 | } 113 | 114 | pub fn eq(a: JsObjPtr, b: JsObjPtr) -> bool { 115 | let gc = as_gc(as_obj(a).gc()); 116 | let mut local_scope = LocalScope::new(); 117 | let ta = as_obj(a).kind; 118 | let tb = as_obj(b).kind; 119 | 120 | if ta == tb { 121 | if ta == GcObjKind::Undef { 122 | return true; 123 | } else if ta == GcObjKind::Null { 124 | return true; 125 | } else if ta == GcObjKind::Number { 126 | let av = as_num(a).d; 127 | let bv = as_num(b).d; 128 | if av.is_nan() { 129 | return false; 130 | } 131 | if bv.is_nan() { 132 | return false; 133 | } 134 | return av == bv; 135 | } else if ta == GcObjKind::String { 136 | let av = &as_str(a).d; 137 | let bv = &as_str(b).d; 138 | return av == bv; 139 | } else if ta == GcObjKind::Boolean { 140 | return a == b; 141 | } 142 | } else { 143 | if ta == GcObjKind::Undef && tb == GcObjKind::Null 144 | || ta == GcObjKind::Null && tb == GcObjKind::Undef 145 | { 146 | return true; 147 | } 148 | if ta == GcObjKind::Number && tb == GcObjKind::String { 149 | let nb = gc.new_num(false); 150 | local_scope.reg(nb); 151 | as_num(nb).set_v_str(as_str(b)); 152 | return as_num(a).eq(nb); 153 | } 154 | if ta == GcObjKind::String && tb == GcObjKind::Number { 155 | let na = gc.new_num(false); 156 | local_scope.reg(na); 157 | as_num(na).set_v_str(as_str(a)); 158 | return as_num(b).eq(na); 159 | } 160 | if ta == GcObjKind::Boolean { 161 | let na = gc.new_num(false); 162 | local_scope.reg(na); 163 | as_num(na).set_v_bool(a); 164 | return GcObj::eq(as_obj_ptr(na), b); 165 | } 166 | if tb == GcObjKind::Boolean { 167 | let nb = gc.new_num(false); 168 | local_scope.reg(nb); 169 | as_num(nb).set_v_bool(b); 170 | return GcObj::eq(a, as_obj_ptr(nb)); 171 | } 172 | if (ta == GcObjKind::Number || ta == GcObjKind::String) && as_obj(b).is_compound() { 173 | let pri = as_obj(b).t_pri(); 174 | local_scope.reg(pri); 175 | return GcObj::eq(a, pri); 176 | } 177 | if (tb == GcObjKind::Number || tb == GcObjKind::String) && as_obj(a).is_compound() { 178 | let pri = as_obj(a).t_pri(); 179 | local_scope.reg(pri); 180 | return GcObj::eq(b, pri); 181 | } 182 | } 183 | return false; 184 | } 185 | 186 | pub fn lt(a: JsObjPtr, b: JsObjPtr) -> bool { 187 | let mut local_scope = LocalScope::new(); 188 | let gc = as_gc(as_obj(a).gc()); 189 | 190 | let pa = as_obj(a).t_pri(); 191 | local_scope.reg(a); 192 | 193 | let pb = as_obj(b).t_pri(); 194 | local_scope.reg(b); 195 | 196 | let ta = as_obj(pa).kind; 197 | let tb = as_obj(pb).kind; 198 | 199 | if !(ta == GcObjKind::String && tb == GcObjKind::String) { 200 | let na = as_obj(pa).t_num(); 201 | local_scope.reg(na); 202 | let nb = as_obj(pb).t_num(); 203 | local_scope.reg(nb); 204 | 205 | // js spec says that if `na` above(`nx` in spec) is NAN then just return `undefined` 206 | // I probably understand why, since compare NAN meaningless so return `undefined` looks sound, 207 | // but I think just return `false` is only the intuitive way, here we just on the basis of 208 | // what the CLua does - just return `false` to keep `LE` to return only one type value 209 | if as_num(na).d.is_nan() { 210 | // return gc.js_undef(); 211 | return false; 212 | } 213 | if as_num(nb).d.is_nan() { 214 | // return gc.js_undef(); 215 | return false; 216 | } 217 | if as_num(na).d == as_num(nb).d { 218 | return false; 219 | } 220 | if as_num(na).d < as_num(nb).d { 221 | return true; 222 | } 223 | return false; 224 | } 225 | 226 | let sa = as_str(pa); 227 | let sb = as_str(pb); 228 | if sa.d.starts_with(sb.d.as_str()) { 229 | return false; 230 | } 231 | if sb.d.starts_with(sa.d.as_str()) { 232 | return true; 233 | } 234 | 235 | let cbs = sb.d.chars().collect::>(); 236 | for ca in sa.d.chars().enumerate() { 237 | let cb = *cbs.get(ca.0).unwrap(); 238 | if ca.1 < cb { 239 | return true; 240 | } else if ca.1 > cb { 241 | return false; 242 | } 243 | } 244 | return false; 245 | } 246 | 247 | pub fn le(a: JsObjPtr, b: JsObjPtr) -> bool { 248 | GcObj::lt(a, b) || GcObj::eq(a, b) 249 | } 250 | } 251 | -------------------------------------------------------------------------------- /src/lib/asm/symtab.rs: -------------------------------------------------------------------------------- 1 | use crate::ast::*; 2 | use crate::visitor::AstVisitor; 3 | use linked_hash_set::LinkedHashSet; 4 | use std::collections::{HashMap, HashSet}; 5 | use std::ptr::{drop_in_place, null_mut}; 6 | 7 | pub type ScopePtr = *mut Scope; 8 | 9 | pub fn as_scope(ptr: ScopePtr) -> &'static mut Scope { 10 | unsafe { &mut (*ptr) } 11 | } 12 | 13 | #[derive(Debug)] 14 | pub struct Scope { 15 | pub id: usize, 16 | parent: ScopePtr, 17 | subs: Vec, 18 | pub params: HashSet, 19 | pub bindings: LinkedHashSet, 20 | } 21 | 22 | impl Scope { 23 | fn new(id: usize) -> ScopePtr { 24 | Box::into_raw(Box::new(Scope { 25 | id, 26 | parent: null_mut(), 27 | subs: vec![], 28 | params: HashSet::new(), 29 | bindings: LinkedHashSet::new(), 30 | })) 31 | } 32 | 33 | pub fn add_binding(&mut self, n: &str) { 34 | self.bindings.insert(n.to_string()); 35 | } 36 | 37 | pub fn has_binding(&self, n: &str) -> bool { 38 | self.bindings.contains(n) 39 | } 40 | 41 | pub fn add_param(&mut self, n: &str) { 42 | self.params.insert(n.to_owned()); 43 | } 44 | 45 | pub fn has_param(&self, n: &str) -> bool { 46 | self.params.contains(n) 47 | } 48 | } 49 | 50 | #[derive(Debug)] 51 | pub struct SymTab { 52 | i: usize, 53 | scopes: HashMap, 54 | s: ScopePtr, 55 | } 56 | 57 | impl SymTab { 58 | pub fn new() -> SymTab { 59 | let s = Scope::new(0); 60 | let mut scopes = HashMap::new(); 61 | scopes.insert(as_scope(s).id, s); 62 | SymTab { i: 1, scopes, s } 63 | } 64 | 65 | pub fn enter_scope(&mut self) { 66 | let s = Scope::new(self.i); 67 | self.scopes.insert(self.i, s); 68 | self.i += 1; 69 | as_scope(s).parent = self.s; 70 | as_scope(self.s).subs.push(s); 71 | self.s = s; 72 | } 73 | 74 | pub fn leave_scope(&mut self) { 75 | self.s = as_scope(self.s).parent; 76 | } 77 | 78 | fn add_binding(&mut self, n: &str) { 79 | as_scope(self.s).add_binding(n); 80 | } 81 | 82 | fn add_param(&mut self, n: &str) { 83 | as_scope(self.s).add_param(n); 84 | } 85 | 86 | pub fn get_scope(&self, i: usize) -> ScopePtr { 87 | *self.scopes.get(&i).unwrap() 88 | } 89 | } 90 | 91 | impl Drop for SymTab { 92 | fn drop(&mut self) { 93 | self 94 | .scopes 95 | .values() 96 | .for_each(|s| unsafe { drop_in_place(*s) }); 97 | } 98 | } 99 | 100 | impl AstVisitor<(), ()> for SymTab { 101 | fn prog(&mut self, prog: &Prog) -> Result<(), ()> { 102 | prog.body.iter().for_each(|s| self.stmt(s).unwrap()); 103 | Ok(()) 104 | } 105 | 106 | fn block_stmt(&mut self, stmt: &BlockStmt) -> Result<(), ()> { 107 | stmt.body.iter().for_each(|s| self.stmt(s).unwrap()); 108 | Ok(()) 109 | } 110 | 111 | fn var_dec_stmt(&mut self, stmt: &VarDec) -> Result<(), ()> { 112 | stmt.decs.iter().for_each(|dec| { 113 | self.add_binding(dec.id.id().name.as_str()); 114 | if let Some(init) = &dec.init { 115 | self.expr(init).ok(); 116 | } 117 | }); 118 | Ok(()) 119 | } 120 | 121 | fn empty_stmt(&mut self, _stmt: &EmptyStmt) -> Result<(), ()> { 122 | Ok(()) 123 | } 124 | 125 | fn expr_stmt(&mut self, stmt: &ExprStmt) -> Result<(), ()> { 126 | self.expr(&stmt.expr).ok(); 127 | Ok(()) 128 | } 129 | 130 | fn if_stmt(&mut self, stmt: &IfStmt) -> Result<(), ()> { 131 | self.stmt(&stmt.cons).ok(); 132 | if let Some(s) = &stmt.alt { 133 | self.stmt(s).ok(); 134 | } 135 | Ok(()) 136 | } 137 | 138 | fn for_stmt(&mut self, stmt: &ForStmt) -> Result<(), ()> { 139 | if let Some(init) = &stmt.init { 140 | match init { 141 | ForFirst::VarDec(dec) => self.var_dec_stmt(dec).unwrap(), 142 | _ => (), 143 | } 144 | } 145 | self.stmt(&stmt.body).ok(); 146 | Ok(()) 147 | } 148 | 149 | fn for_in_stmt(&mut self, stmt: &ForInStmt) -> Result<(), ()> { 150 | match &stmt.left { 151 | ForFirst::VarDec(dec) => self.var_dec_stmt(dec).unwrap(), 152 | _ => (), 153 | } 154 | self.stmt(&stmt.body).ok(); 155 | Ok(()) 156 | } 157 | 158 | fn do_while_stmt(&mut self, stmt: &DoWhileStmt) -> Result<(), ()> { 159 | self.stmt(&stmt.body).ok(); 160 | Ok(()) 161 | } 162 | 163 | fn while_stmt(&mut self, stmt: &WhileStmt) -> Result<(), ()> { 164 | self.stmt(&stmt.body).ok(); 165 | Ok(()) 166 | } 167 | 168 | fn cont_stmt(&mut self, _stmt: &ContStmt) -> Result<(), ()> { 169 | Ok(()) 170 | } 171 | 172 | fn break_stmt(&mut self, _stmt: &BreakStmt) -> Result<(), ()> { 173 | Ok(()) 174 | } 175 | 176 | fn ret_stmt(&mut self, stmt: &ReturnStmt) -> Result<(), ()> { 177 | if let Some(s) = &stmt.argument { 178 | self.expr(s).ok(); 179 | } 180 | Ok(()) 181 | } 182 | 183 | fn with_stmt(&mut self, _stmt: &WithStmt) -> Result<(), ()> { 184 | Ok(()) 185 | } 186 | 187 | fn switch_stmt(&mut self, stmt: &SwitchStmt) -> Result<(), ()> { 188 | stmt 189 | .cases 190 | .iter() 191 | .for_each(|case| case.cons.iter().for_each(|s| self.stmt(s).unwrap())); 192 | Ok(()) 193 | } 194 | 195 | fn throw_stmt(&mut self, stmt: &ThrowStmt) -> Result<(), ()> { 196 | Ok(()) 197 | } 198 | 199 | fn try_stmt(&mut self, stmt: &TryStmt) -> Result<(), ()> { 200 | self.stmt(&stmt.block).ok(); 201 | if let Some(h) = &stmt.handler { 202 | self.stmt(&h.body).ok(); 203 | } 204 | if let Some(f) = &stmt.finalizer { 205 | self.stmt(&f).ok(); 206 | } 207 | Ok(()) 208 | } 209 | 210 | fn debug_stmt(&mut self, stmt: &DebugStmt) -> Result<(), ()> { 211 | Ok(()) 212 | } 213 | 214 | fn fn_stmt(&mut self, stmt: &FnDec) -> Result<(), ()> { 215 | if let Some(id) = &stmt.id { 216 | let f_name = id.id().name.as_str(); 217 | self.add_binding(f_name); 218 | } 219 | self.enter_scope(); 220 | stmt.params.iter().for_each(|p| { 221 | let n = p.id().name.as_str(); 222 | self.add_param(n); 223 | self.add_binding(n); 224 | }); 225 | self.stmt(&stmt.body).ok(); 226 | self.leave_scope(); 227 | Ok(()) 228 | } 229 | 230 | fn member_expr(&mut self, expr: &MemberExpr) -> Result<(), ()> { 231 | self.expr(&expr.object).ok(); 232 | self.expr(&expr.property).ok(); 233 | Ok(()) 234 | } 235 | 236 | fn new_expr(&mut self, expr: &NewExpr) -> Result<(), ()> { 237 | self.expr(&expr.callee).ok(); 238 | expr 239 | .arguments 240 | .iter() 241 | .for_each(|arg| self.expr(arg).unwrap()); 242 | Ok(()) 243 | } 244 | 245 | fn call_expr(&mut self, expr: &CallExpr) -> Result<(), ()> { 246 | self.expr(&expr.callee).ok(); 247 | expr 248 | .arguments 249 | .iter() 250 | .for_each(|arg| self.expr(arg).unwrap()); 251 | Ok(()) 252 | } 253 | 254 | fn unary_expr(&mut self, expr: &UnaryExpr) -> Result<(), ()> { 255 | self.expr(&expr.argument).ok(); 256 | Ok(()) 257 | } 258 | 259 | fn binary_expr(&mut self, expr: &BinaryExpr) -> Result<(), ()> { 260 | self.expr(&expr.left).ok(); 261 | self.expr(&expr.right).ok(); 262 | Ok(()) 263 | } 264 | 265 | fn assign_expr(&mut self, expr: &AssignExpr) -> Result<(), ()> { 266 | self.expr(&expr.left).ok(); 267 | self.expr(&expr.right).ok(); 268 | Ok(()) 269 | } 270 | 271 | fn cond_expr(&mut self, expr: &CondExpr) -> Result<(), ()> { 272 | self.expr(&expr.test).ok(); 273 | self.expr(&expr.cons).ok(); 274 | self.expr(&expr.alt).ok(); 275 | Ok(()) 276 | } 277 | 278 | fn seq_expr(&mut self, expr: &SeqExpr) -> Result<(), ()> { 279 | expr.exprs.iter().for_each(|expr| self.expr(expr).unwrap()); 280 | Ok(()) 281 | } 282 | 283 | fn this_expr(&mut self, _expr: &ThisExprData) -> Result<(), ()> { 284 | Ok(()) 285 | } 286 | 287 | fn id_expr(&mut self, _expr: &IdData) -> Result<(), ()> { 288 | Ok(()) 289 | } 290 | 291 | fn array_literal(&mut self, expr: &ArrayData) -> Result<(), ()> { 292 | expr.value.iter().for_each(|expr| self.expr(expr).unwrap()); 293 | Ok(()) 294 | } 295 | 296 | fn object_literal(&mut self, expr: &ObjectData) -> Result<(), ()> { 297 | expr.properties.iter().for_each(|p| { 298 | self.expr(&p.key).ok(); 299 | self.expr(&p.value).ok(); 300 | }); 301 | Ok(()) 302 | } 303 | 304 | fn paren_expr(&mut self, expr: &ParenData) -> Result<(), ()> { 305 | self.expr(&expr.value).ok(); 306 | Ok(()) 307 | } 308 | 309 | fn fn_expr(&mut self, expr: &FnDec) -> Result<(), ()> { 310 | // the id of function expression is not being used as local variable 311 | // consider this code: `var a = function b() {}; b();` will produce 312 | // ReferenceError `b is not defined` 313 | self.enter_scope(); 314 | expr.params.iter().for_each(|p| { 315 | let n = p.id().name.as_str(); 316 | self.add_param(n); 317 | self.add_binding(n); 318 | }); 319 | self.stmt(&expr.body).ok(); 320 | self.leave_scope(); 321 | Ok(()) 322 | } 323 | 324 | fn regexp_expr(&mut self, _expr: &RegExpData) -> Result<(), ()> { 325 | Ok(()) 326 | } 327 | 328 | fn null_expr(&mut self, _expr: &NullData) -> Result<(), ()> { 329 | Ok(()) 330 | } 331 | 332 | fn undef_expr(&mut self, _expr: &UndefData) -> Result<(), ()> { 333 | Ok(()) 334 | } 335 | 336 | fn str_expr(&mut self, _expr: &StringData) -> Result<(), ()> { 337 | Ok(()) 338 | } 339 | 340 | fn bool_expr(&mut self, _expr: &BoolData) -> Result<(), ()> { 341 | Ok(()) 342 | } 343 | 344 | fn num_expr(&mut self, _expr: &NumericData) -> Result<(), ()> { 345 | Ok(()) 346 | } 347 | } 348 | 349 | #[cfg(test)] 350 | mod symtab_tests { 351 | use super::*; 352 | use crate::lexer::*; 353 | use crate::parser::*; 354 | use crate::source::*; 355 | use crate::token::*; 356 | 357 | #[test] 358 | fn scope() { 359 | init_token_data(); 360 | 361 | let code = String::from("var a; function f(b) {var a; return function f(c) {var d}}"); 362 | let src = Source::new(&code); 363 | let mut lexer = Lexer::new(src); 364 | let mut parser = Parser::new(&mut lexer); 365 | let mut symtab = SymTab::new(); 366 | let ast = parser.prog().ok().unwrap(); 367 | symtab.prog(&ast).unwrap(); 368 | 369 | let mut i = 0; 370 | let s0 = as_scope(*symtab.scopes.get(&i).unwrap()); 371 | assert!(s0.has_binding("a")); 372 | assert!(s0.has_binding("f")); 373 | 374 | i = 1; 375 | let s1 = as_scope(*symtab.scopes.get(&i).unwrap()); 376 | assert!(s1.has_binding("a")); 377 | assert!(s1.has_binding("b")); 378 | 379 | i = 2; 380 | let s1 = as_scope(*symtab.scopes.get(&i).unwrap()); 381 | assert!(s1.has_binding("c")); 382 | assert!(s1.has_binding("d")); 383 | } 384 | } 385 | -------------------------------------------------------------------------------- /src/lib/asm/chunk.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::fmt; 3 | use std::intrinsics::transmute; 4 | use std::sync::Once; 5 | 6 | #[derive(Debug, Clone)] 7 | pub enum Const { 8 | String(String), 9 | Number(f64), 10 | } 11 | 12 | impl Const { 13 | pub fn new_str(s: &str) -> Self { 14 | Const::String(s.to_string()) 15 | } 16 | 17 | pub fn new_num(n: f64) -> Self { 18 | Const::Number(n) 19 | } 20 | 21 | pub fn typ_id(&self) -> u8 { 22 | match self { 23 | Const::String(_) => 0, 24 | Const::Number(_) => 1, 25 | } 26 | } 27 | 28 | pub fn is_str(&self) -> bool { 29 | match self { 30 | Const::String(_) => true, 31 | _ => false, 32 | } 33 | } 34 | 35 | pub fn is_num(&self) -> bool { 36 | match self { 37 | Const::Number(_) => true, 38 | _ => false, 39 | } 40 | } 41 | 42 | pub fn num(&self) -> f64 { 43 | match self { 44 | Const::Number(v) => *v, 45 | _ => panic!(), 46 | } 47 | } 48 | 49 | pub fn str(&self) -> &str { 50 | match self { 51 | Const::String(v) => v.as_str(), 52 | _ => panic!(), 53 | } 54 | } 55 | 56 | pub fn eq(&self, c: &Const) -> bool { 57 | if self.typ_id() == c.typ_id() { 58 | let eq = match self { 59 | Const::Number(v) => *v == c.num(), 60 | Const::String(v) => v == c.str(), 61 | }; 62 | return eq; 63 | } 64 | false 65 | } 66 | } 67 | 68 | #[derive(Debug, Clone)] 69 | pub struct UpvalDesc { 70 | pub name: String, 71 | pub in_stack: bool, 72 | pub idx: u32, 73 | } 74 | 75 | #[derive(Debug, Clone)] 76 | pub struct Local { 77 | pub name: String, 78 | } 79 | 80 | #[derive(Debug, Eq, PartialEq, Copy, Clone)] 81 | pub enum OpMode { 82 | ABC, 83 | ABx, 84 | AsBx, 85 | } 86 | 87 | #[derive(Clone)] 88 | pub struct Inst { 89 | pub raw: u32, 90 | } 91 | 92 | impl Inst { 93 | pub fn new() -> Self { 94 | Inst { raw: 0 } 95 | } 96 | 97 | pub fn new_abc(op: OpCode, a: u32, b: u32, c: u32) -> Self { 98 | let mut inst = Inst::new(); 99 | inst.set_op(op); 100 | inst.set_a(a); 101 | inst.set_b(b); 102 | inst.set_c(c); 103 | inst 104 | } 105 | 106 | pub fn new_a_bx(op: OpCode, a: u32, bx: u32) -> Self { 107 | let mut inst = Inst::new(); 108 | inst.set_op(op); 109 | inst.set_a(a); 110 | inst.set_bx(bx); 111 | inst 112 | } 113 | 114 | pub fn new_a_sbx(op: OpCode, a: u32, sbx: i32) -> Self { 115 | let mut inst = Inst::new(); 116 | inst.set_op(op); 117 | inst.set_a(a); 118 | inst.set_sbx(sbx); 119 | inst 120 | } 121 | 122 | pub fn a(&self) -> u32 { 123 | (self.raw >> 6) & 0xff 124 | } 125 | 126 | pub fn b(&self) -> u32 { 127 | (self.raw >> 23) & 0x1ff 128 | } 129 | 130 | pub fn c(&self) -> u32 { 131 | (self.raw >> 14) & 0x1ff 132 | } 133 | 134 | pub fn bx(&self) -> u32 { 135 | (self.raw >> 14) & 0x3ffff 136 | } 137 | 138 | pub fn sbx(&self) -> i32 { 139 | let t = ((self.raw >> 14) & 0x3ffff) as i32; 140 | t - 131071 141 | } 142 | 143 | pub fn op(&self) -> u32 { 144 | self.raw & 0x3f 145 | } 146 | 147 | pub fn set_op(&mut self, op: OpCode) { 148 | self.raw = (self.raw & !0x3f) | (op as u32); 149 | } 150 | 151 | pub fn set_a(&mut self, a: u32) { 152 | self.raw = (self.raw & !(0xff << 6)) | (a << 6); 153 | } 154 | 155 | pub fn set_b(&mut self, b: u32) { 156 | self.raw = (self.raw & !(0x1ff << 23)) | (b << 23); 157 | } 158 | 159 | pub fn set_c(&mut self, c: u32) { 160 | self.raw = (self.raw & !(0x1ff << 14)) | (c << 14); 161 | } 162 | 163 | pub fn set_bx(&mut self, bx: u32) { 164 | self.raw = (self.raw & !(0x3ffff << 14)) | (bx << 14); 165 | } 166 | 167 | pub fn set_sbx(&mut self, mut sbx: i32) { 168 | sbx += 131071; 169 | let sbx = sbx as u32; 170 | self.raw = (self.raw & !(0x3ffff << 14)) | (sbx << 14);; 171 | } 172 | 173 | // converts an integer to a "floating point byte", from CLua 174 | pub fn int2fb(mut x: u32) -> u32 { 175 | let mut e = 0; /* exponent */ 176 | if x < 8 { 177 | return x; 178 | } 179 | while x >= 8 << 4 { 180 | /* coarse steps */ 181 | x = (x + 0xf) >> 4; /* x = ceil(x / 16) */ 182 | e += 4; 183 | } 184 | while x >= 8 << 1 { 185 | /* fine steps */ 186 | x = (x + 1) >> 1; /* x = ceil(x / 2) */ 187 | e += 1; 188 | } 189 | ((e + 1) << 3) | (x - 8) 190 | } 191 | 192 | pub fn fb2int(x: u32) -> u32 { 193 | if x < 8 { 194 | return x; 195 | } 196 | ((x & 7) + 8) << ((x >> 3) - 1) 197 | } 198 | } 199 | 200 | impl fmt::Debug for Inst { 201 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 202 | let op = OpCode::from_u32(self.op()); 203 | match op.mode() { 204 | OpMode::ABC => write!( 205 | f, 206 | "{:#?}{{ A: {}, B: {}, C: {} }}", 207 | op, 208 | self.a(), 209 | self.b(), 210 | self.c() 211 | ), 212 | OpMode::ABx => write!(f, "{:#?}{{ A: {}, Bx: {} }}", op, self.a(), self.bx()), 213 | OpMode::AsBx => write!(f, "{:#?}{{ A: {}, sBx: {} }}", op, self.a(), self.sbx()), 214 | } 215 | } 216 | } 217 | 218 | #[derive(Debug, Clone)] 219 | pub struct FnTpl { 220 | pub param_cnt: u8, 221 | pub is_vararg: bool, 222 | pub code: Vec, 223 | pub consts: Vec, 224 | pub upvals: Vec, 225 | pub locals: Vec, 226 | pub fun_tpls: Vec, 227 | } 228 | 229 | impl FnTpl { 230 | pub fn new() -> Self { 231 | FnTpl { 232 | param_cnt: 0, 233 | is_vararg: false, 234 | code: vec![], 235 | consts: vec![], 236 | upvals: vec![], 237 | locals: vec![], 238 | fun_tpls: vec![], 239 | } 240 | } 241 | } 242 | 243 | #[derive(Debug)] 244 | pub struct Chunk { 245 | pub sig: &'static str, 246 | pub ver: u64, 247 | pub upval_cnt: u8, 248 | pub fun_tpl: FnTpl, 249 | } 250 | 251 | #[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)] 252 | pub enum OpCode { 253 | MOVE, 254 | LOADK, 255 | LOADKX, 256 | LOADBOO, 257 | LOADNUL, 258 | LOADUNDEF, 259 | GETUPVAL, 260 | GETTABUP, 261 | GETTABLE, 262 | SETTABUP, 263 | SETUPVAL, 264 | SETTABLE, 265 | NEWTABLE, 266 | NEWARRAY, 267 | INITARRAY, 268 | THIS, 269 | ADD, 270 | SUB, 271 | MUL, 272 | MOD, 273 | DIV, 274 | LT, 275 | LE, 276 | EQ, 277 | EQS, 278 | JMP, 279 | TEST, 280 | TESTSET, 281 | BITAND, 282 | BITOR, 283 | BITXOR, 284 | SHL, 285 | SAR, 286 | SHR, 287 | UNM, 288 | NOT, 289 | BITNOT, 290 | CLOSURE, 291 | CALL, 292 | RETURN, 293 | NEW, 294 | } 295 | 296 | static mut OPCODE_NAME: Option> = None; 297 | static mut OPCODE_MODE: Option> = None; 298 | 299 | macro_rules! gen_opcode_map { 300 | ($($op:expr => $name:expr, $mode:expr)*) => { 301 | { 302 | let mut op_name = HashMap::new(); 303 | let mut op_mode = HashMap::new(); 304 | $( 305 | op_name.insert($op, $name); 306 | op_mode.insert($op, $mode); 307 | )* 308 | (Some(op_name), Some(op_mode)) 309 | } 310 | }; 311 | } 312 | 313 | fn init_opcodes() { 314 | let (op_name, op_mode) = gen_opcode_map! { 315 | OpCode::MOVE => "MOVE", OpMode::ABC 316 | OpCode::LOADK => "LOADK", OpMode::ABx 317 | OpCode::LOADKX => "LOADKX", OpMode::ABx 318 | OpCode::LOADBOO => "LOADBOO", OpMode::ABC 319 | OpCode::LOADNUL => "LOADNUL", OpMode::ABC 320 | OpCode::LOADUNDEF => "LOADUNDEF", OpMode::ABC 321 | OpCode::GETUPVAL => "GETUPVAL", OpMode::ABC 322 | OpCode::GETTABUP => "GETTABUP", OpMode::ABC 323 | OpCode::GETTABLE => "GETTABLE", OpMode::ABC 324 | OpCode::SETTABUP => "SETTABUP", OpMode::ABC 325 | OpCode::SETUPVAL => "SETUPVAL", OpMode::ABC 326 | OpCode::SETTABLE => "SETTABLE", OpMode::ABC 327 | OpCode::NEWTABLE => "NEWTABLE", OpMode::ABC 328 | OpCode::NEWARRAY => "NEWARRAY", OpMode::ABC 329 | OpCode::INITARRAY => "INITARRAY", OpMode::ABC 330 | OpCode::THIS => "THIS", OpMode::ABC 331 | OpCode::ADD => "ADD", OpMode::ABC 332 | OpCode::SUB => "SUB", OpMode::ABC 333 | OpCode::MUL => "MUL", OpMode::ABC 334 | OpCode::MOD => "MOD", OpMode::ABC 335 | OpCode::DIV => "DIV", OpMode::ABC 336 | OpCode::LT => "LT", OpMode::ABC 337 | OpCode::LE => "LE", OpMode::ABC 338 | OpCode::EQ => "EQ", OpMode::ABC 339 | OpCode::EQS => "EQS", OpMode::ABC 340 | OpCode::JMP => "JMP", OpMode::AsBx 341 | OpCode::TEST => "TEST", OpMode::ABC 342 | OpCode::TESTSET => "TESTSET", OpMode::ABC 343 | OpCode::BITAND => "BITAND", OpMode::ABC 344 | OpCode::BITOR => "BITOR", OpMode::ABC 345 | OpCode::BITXOR => "BITXOR", OpMode::ABC 346 | OpCode::SHL => "SHL", OpMode::ABC 347 | OpCode::SAR => "SAR", OpMode::ABC 348 | OpCode::SHR => "SHR", OpMode::ABC 349 | OpCode::UNM => "UNM", OpMode::ABC 350 | OpCode::NOT => "NOT", OpMode::ABC 351 | OpCode::BITNOT => "BITNOT", OpMode::ABC 352 | OpCode::CLOSURE => "CLOSURE", OpMode::ABC 353 | OpCode::CALL => "CALL", OpMode::ABC 354 | OpCode::RETURN => "RETURN", OpMode::ABC 355 | OpCode::NEW => "NEW", OpMode::ABC 356 | }; 357 | unsafe { 358 | OPCODE_NAME = op_name; 359 | OPCODE_MODE = op_mode; 360 | } 361 | } 362 | 363 | static INIT_OPCODE_DATA_ONCE: Once = Once::new(); 364 | pub fn init_opcode_data() { 365 | INIT_OPCODE_DATA_ONCE.call_once(|| { 366 | init_opcodes(); 367 | }); 368 | } 369 | 370 | pub fn op_to_name(op: &OpCode) -> &'static str { 371 | unsafe { OPCODE_NAME.as_ref().unwrap().get(op).unwrap() } 372 | } 373 | 374 | pub fn op_to_mode(op: &OpCode) -> &'static OpMode { 375 | unsafe { OPCODE_MODE.as_ref().unwrap().get(op).unwrap() } 376 | } 377 | 378 | impl OpCode { 379 | pub fn from_u32(x: u32) -> Self { 380 | unsafe { transmute(x as u8) } 381 | } 382 | 383 | pub fn mode(&self) -> OpMode { 384 | *op_to_mode(self) 385 | } 386 | 387 | pub fn eq(&self, op: u32) -> bool { 388 | OpCode::from_u32(op) == *self 389 | } 390 | } 391 | 392 | #[cfg(test)] 393 | mod chunk_tests { 394 | use super::*; 395 | 396 | #[test] 397 | fn inst_test() { 398 | let mut inst = Inst::new(); 399 | inst.set_op(OpCode::LOADUNDEF); 400 | inst.set_a(0); 401 | inst.set_b(1); 402 | assert_eq!(0, inst.a()); 403 | assert_eq!(1, inst.b()); 404 | 405 | let mut inst = Inst::new(); 406 | inst.set_op(OpCode::LOADUNDEF); 407 | inst.set_a(1); 408 | inst.set_bx(20); 409 | assert_eq!(1, inst.a()); 410 | assert_eq!(20, inst.bx()); 411 | 412 | let mut inst = Inst::new(); 413 | inst.set_op(OpCode::LOADUNDEF); 414 | inst.set_a(1); 415 | inst.set_sbx(-20); 416 | assert_eq!(1, inst.a()); 417 | assert_eq!(-20, inst.sbx()); 418 | } 419 | 420 | #[test] 421 | fn opcode_test() { 422 | init_opcodes(); 423 | 424 | assert_eq!(OpMode::ABC, OpCode::from_u32(0).mode()); 425 | assert_eq!(OpMode::ABx, OpCode::from_u32(1).mode()); 426 | } 427 | } 428 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "byteorder" 5 | version = "1.3.2" 6 | source = "registry+https://github.com/rust-lang/crates.io-index" 7 | 8 | [[package]] 9 | name = "dtoa" 10 | version = "0.4.4" 11 | source = "registry+https://github.com/rust-lang/crates.io-index" 12 | 13 | [[package]] 14 | name = "linked-hash-map" 15 | version = "0.5.2" 16 | source = "registry+https://github.com/rust-lang/crates.io-index" 17 | 18 | [[package]] 19 | name = "linked_hash_set" 20 | version = "0.1.3" 21 | source = "registry+https://github.com/rust-lang/crates.io-index" 22 | dependencies = [ 23 | "linked-hash-map 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", 24 | ] 25 | 26 | [[package]] 27 | name = "matches" 28 | version = "0.1.8" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | 31 | [[package]] 32 | name = "naive" 33 | version = "0.1.0" 34 | dependencies = [ 35 | "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", 36 | "linked_hash_set 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", 37 | "serde 1.0.94 (registry+https://github.com/rust-lang/crates.io-index)", 38 | "serde_yaml 0.8.9 (registry+https://github.com/rust-lang/crates.io-index)", 39 | "unic-ucd 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 40 | ] 41 | 42 | [[package]] 43 | name = "proc-macro2" 44 | version = "0.4.30" 45 | source = "registry+https://github.com/rust-lang/crates.io-index" 46 | dependencies = [ 47 | "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 48 | ] 49 | 50 | [[package]] 51 | name = "quote" 52 | version = "0.6.12" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | dependencies = [ 55 | "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", 56 | ] 57 | 58 | [[package]] 59 | name = "serde" 60 | version = "1.0.94" 61 | source = "registry+https://github.com/rust-lang/crates.io-index" 62 | dependencies = [ 63 | "serde_derive 1.0.94 (registry+https://github.com/rust-lang/crates.io-index)", 64 | ] 65 | 66 | [[package]] 67 | name = "serde_derive" 68 | version = "1.0.94" 69 | source = "registry+https://github.com/rust-lang/crates.io-index" 70 | dependencies = [ 71 | "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", 72 | "quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", 73 | "syn 0.15.39 (registry+https://github.com/rust-lang/crates.io-index)", 74 | ] 75 | 76 | [[package]] 77 | name = "serde_yaml" 78 | version = "0.8.9" 79 | source = "registry+https://github.com/rust-lang/crates.io-index" 80 | dependencies = [ 81 | "dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", 82 | "linked-hash-map 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", 83 | "serde 1.0.94 (registry+https://github.com/rust-lang/crates.io-index)", 84 | "yaml-rust 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", 85 | ] 86 | 87 | [[package]] 88 | name = "syn" 89 | version = "0.15.39" 90 | source = "registry+https://github.com/rust-lang/crates.io-index" 91 | dependencies = [ 92 | "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", 93 | "quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", 94 | "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 95 | ] 96 | 97 | [[package]] 98 | name = "unic-char-property" 99 | version = "0.9.0" 100 | source = "registry+https://github.com/rust-lang/crates.io-index" 101 | dependencies = [ 102 | "unic-char-range 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 103 | ] 104 | 105 | [[package]] 106 | name = "unic-char-range" 107 | version = "0.9.0" 108 | source = "registry+https://github.com/rust-lang/crates.io-index" 109 | 110 | [[package]] 111 | name = "unic-common" 112 | version = "0.9.0" 113 | source = "registry+https://github.com/rust-lang/crates.io-index" 114 | 115 | [[package]] 116 | name = "unic-ucd" 117 | version = "0.9.0" 118 | source = "registry+https://github.com/rust-lang/crates.io-index" 119 | dependencies = [ 120 | "unic-ucd-age 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 121 | "unic-ucd-bidi 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 122 | "unic-ucd-block 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 123 | "unic-ucd-case 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 124 | "unic-ucd-category 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 125 | "unic-ucd-common 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 126 | "unic-ucd-hangul 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 127 | "unic-ucd-ident 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 128 | "unic-ucd-name 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 129 | "unic-ucd-name_aliases 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 130 | "unic-ucd-normal 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 131 | "unic-ucd-segment 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 132 | "unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 133 | ] 134 | 135 | [[package]] 136 | name = "unic-ucd-age" 137 | version = "0.9.0" 138 | source = "registry+https://github.com/rust-lang/crates.io-index" 139 | dependencies = [ 140 | "unic-char-property 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 141 | "unic-char-range 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 142 | "unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 143 | ] 144 | 145 | [[package]] 146 | name = "unic-ucd-bidi" 147 | version = "0.9.0" 148 | source = "registry+https://github.com/rust-lang/crates.io-index" 149 | dependencies = [ 150 | "unic-char-property 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 151 | "unic-char-range 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 152 | "unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 153 | ] 154 | 155 | [[package]] 156 | name = "unic-ucd-block" 157 | version = "0.9.0" 158 | source = "registry+https://github.com/rust-lang/crates.io-index" 159 | dependencies = [ 160 | "unic-char-property 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 161 | "unic-char-range 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 162 | "unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 163 | ] 164 | 165 | [[package]] 166 | name = "unic-ucd-case" 167 | version = "0.9.0" 168 | source = "registry+https://github.com/rust-lang/crates.io-index" 169 | dependencies = [ 170 | "unic-char-property 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 171 | "unic-char-range 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 172 | "unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 173 | ] 174 | 175 | [[package]] 176 | name = "unic-ucd-category" 177 | version = "0.9.0" 178 | source = "registry+https://github.com/rust-lang/crates.io-index" 179 | dependencies = [ 180 | "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", 181 | "unic-char-property 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 182 | "unic-char-range 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 183 | "unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 184 | ] 185 | 186 | [[package]] 187 | name = "unic-ucd-common" 188 | version = "0.9.0" 189 | source = "registry+https://github.com/rust-lang/crates.io-index" 190 | dependencies = [ 191 | "unic-char-property 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 192 | "unic-char-range 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 193 | "unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 194 | ] 195 | 196 | [[package]] 197 | name = "unic-ucd-hangul" 198 | version = "0.9.0" 199 | source = "registry+https://github.com/rust-lang/crates.io-index" 200 | dependencies = [ 201 | "unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 202 | ] 203 | 204 | [[package]] 205 | name = "unic-ucd-ident" 206 | version = "0.9.0" 207 | source = "registry+https://github.com/rust-lang/crates.io-index" 208 | dependencies = [ 209 | "unic-char-property 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 210 | "unic-char-range 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 211 | "unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 212 | ] 213 | 214 | [[package]] 215 | name = "unic-ucd-name" 216 | version = "0.9.0" 217 | source = "registry+https://github.com/rust-lang/crates.io-index" 218 | dependencies = [ 219 | "unic-char-property 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 220 | "unic-ucd-hangul 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 221 | "unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 222 | ] 223 | 224 | [[package]] 225 | name = "unic-ucd-name_aliases" 226 | version = "0.9.0" 227 | source = "registry+https://github.com/rust-lang/crates.io-index" 228 | dependencies = [ 229 | "unic-char-property 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 230 | "unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 231 | ] 232 | 233 | [[package]] 234 | name = "unic-ucd-normal" 235 | version = "0.9.0" 236 | source = "registry+https://github.com/rust-lang/crates.io-index" 237 | dependencies = [ 238 | "unic-char-property 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 239 | "unic-char-range 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 240 | "unic-ucd-category 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 241 | "unic-ucd-hangul 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 242 | "unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 243 | ] 244 | 245 | [[package]] 246 | name = "unic-ucd-segment" 247 | version = "0.9.0" 248 | source = "registry+https://github.com/rust-lang/crates.io-index" 249 | dependencies = [ 250 | "unic-char-property 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 251 | "unic-char-range 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 252 | "unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 253 | ] 254 | 255 | [[package]] 256 | name = "unic-ucd-version" 257 | version = "0.9.0" 258 | source = "registry+https://github.com/rust-lang/crates.io-index" 259 | dependencies = [ 260 | "unic-common 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 261 | ] 262 | 263 | [[package]] 264 | name = "unicode-xid" 265 | version = "0.1.0" 266 | source = "registry+https://github.com/rust-lang/crates.io-index" 267 | 268 | [[package]] 269 | name = "yaml-rust" 270 | version = "0.4.3" 271 | source = "registry+https://github.com/rust-lang/crates.io-index" 272 | dependencies = [ 273 | "linked-hash-map 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", 274 | ] 275 | 276 | [metadata] 277 | "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" 278 | "checksum dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ea57b42383d091c85abcc2706240b94ab2a8fa1fc81c10ff23c4de06e2a90b5e" 279 | "checksum linked-hash-map 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "ae91b68aebc4ddb91978b11a1b02ddd8602a05ec19002801c5666000e05e0f83" 280 | "checksum linked_hash_set 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3c7c91c4c7bbeb4f2f7c4e5be11e6a05bd6830bc37249c47ce1ad86ad453ff9c" 281 | "checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" 282 | "checksum proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)" = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" 283 | "checksum quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "faf4799c5d274f3868a4aae320a0a182cbd2baee377b378f080e16a23e9d80db" 284 | "checksum serde 1.0.94 (registry+https://github.com/rust-lang/crates.io-index)" = "076a696fdea89c19d3baed462576b8f6d663064414b5c793642da8dfeb99475b" 285 | "checksum serde_derive 1.0.94 (registry+https://github.com/rust-lang/crates.io-index)" = "ef45eb79d6463b22f5f9e16d283798b7c0175ba6050bc25c1a946c122727fe7b" 286 | "checksum serde_yaml 0.8.9 (registry+https://github.com/rust-lang/crates.io-index)" = "38b08a9a90e5260fe01c6480ec7c811606df6d3a660415808c3c3fa8ed95b582" 287 | "checksum syn 0.15.39 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d960b829a55e56db167e861ddb43602c003c7be0bee1d345021703fac2fb7c" 288 | "checksum unic-char-property 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221" 289 | "checksum unic-char-range 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc" 290 | "checksum unic-common 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" 291 | "checksum unic-ucd 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "625b18f7601e1127504a20ae731dc3c7826d0e86d5f7fe3434f8137669240efd" 292 | "checksum unic-ucd-age 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6c8cfdfe71af46b871dc6af2c24fcd360e2f3392ee4c5111877f2947f311671c" 293 | "checksum unic-ucd-bidi 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d1d568b51222484e1f8209ce48caa6b430bf352962b877d592c29ab31fb53d8c" 294 | "checksum unic-ucd-block 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6b2a16f2d7ecd25325a1053ca5a66e7fa1b68911a65c5e97f8d2e1b236b6f1d7" 295 | "checksum unic-ucd-case 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d98d6246a79bac6cf66beee01422bda7c882e11d837fa4969bfaaba5fdea6d3" 296 | "checksum unic-ucd-category 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1b8d4591f5fcfe1bd4453baaf803c40e1b1e69ff8455c47620440b46efef91c0" 297 | "checksum unic-ucd-common 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e9b78b910beafa1aae5c59bf00877c6cece1c5db28a1241ad801e86cecdff4ad" 298 | "checksum unic-ucd-hangul 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "eb1dc690e19010e1523edb9713224cba5ef55b54894fe33424439ec9a40c0054" 299 | "checksum unic-ucd-ident 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e230a37c0381caa9219d67cf063aa3a375ffed5bf541a452db16e744bdab6987" 300 | "checksum unic-ucd-name 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9c8fc55a45b2531089dc1773bf60c1f104b38e434b774ffc37b9c29a9b0f492e" 301 | "checksum unic-ucd-name_aliases 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6b7674212643087699ba247a63dd05f1204c7e4880ec9342e545a7cffcc6a46f" 302 | "checksum unic-ucd-normal 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "86aed873b8202d22b13859dda5fe7c001d271412c31d411fd9b827e030569410" 303 | "checksum unic-ucd-segment 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2079c122a62205b421f499da10f3ee0f7697f012f55b675e002483c73ea34700" 304 | "checksum unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" 305 | "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" 306 | "checksum yaml-rust 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "65923dd1784f44da1d2c3dbbc5e822045628c590ba72123e1c73d3c230c4434d" 307 | -------------------------------------------------------------------------------- /src/lib/vm/gc.rs: -------------------------------------------------------------------------------- 1 | use crate::asm::chunk::*; 2 | use linked_hash_set::LinkedHashSet; 3 | use std::collections::{HashMap, HashSet}; 4 | use std::mem; 5 | use std::os::raw::c_void; 6 | use std::ptr::{drop_in_place, null, null_mut}; 7 | use std::sync::Once; 8 | 9 | pub type GcObjPtr = *mut GcObj; 10 | pub type JsObj = GcObj; 11 | pub type JsObjPtr = GcObjPtr; 12 | 13 | pub type JsStrPtr = *mut JsString; 14 | pub type JsNumPtr = *mut JsNumber; 15 | pub type JsBoolPtr = *mut JsBoolean; 16 | pub type JsArrPtr = *mut JsArray; 17 | pub type JsDictPtr = *mut JsDict; 18 | pub type JsFunPtr = *mut JsFunction; 19 | 20 | pub type UpValPtr = *mut UpVal; 21 | 22 | pub type GcPtr = *mut Gc; 23 | 24 | #[inline(always)] 25 | pub fn as_obj_ptr(ptr: *mut T) -> JsObjPtr { 26 | ptr as JsObjPtr 27 | } 28 | 29 | #[inline(always)] 30 | pub fn as_obj(ptr: *mut T) -> &'static mut GcObj { 31 | unsafe { &mut (*(ptr as GcObjPtr)) } 32 | } 33 | 34 | #[inline(always)] 35 | pub fn as_str(ptr: *mut T) -> &'static mut JsString { 36 | unsafe { &mut (*(ptr as JsStrPtr)) } 37 | } 38 | 39 | #[inline(always)] 40 | pub fn as_num(ptr: *mut T) -> &'static mut JsNumber { 41 | unsafe { &mut (*(ptr as JsNumPtr)) } 42 | } 43 | 44 | #[inline(always)] 45 | pub fn as_bool(ptr: *mut T) -> &'static mut JsBoolean { 46 | unsafe { &mut (*(ptr as JsBoolPtr)) } 47 | } 48 | 49 | #[inline(always)] 50 | pub fn as_arr(ptr: *mut T) -> &'static mut JsArray { 51 | unsafe { &mut (*(ptr as JsArrPtr)) } 52 | } 53 | 54 | #[inline(always)] 55 | pub fn as_dict(ptr: *mut T) -> &'static mut JsDict { 56 | unsafe { &mut (*(ptr as JsDictPtr)) } 57 | } 58 | 59 | #[inline(always)] 60 | pub fn as_fun(ptr: *mut T) -> &'static mut JsFunction { 61 | unsafe { &mut (*(ptr as JsFunPtr)) } 62 | } 63 | 64 | #[inline(always)] 65 | pub fn as_uv(ptr: *mut T) -> &'static mut UpVal { 66 | unsafe { &mut (*(ptr as UpValPtr)) } 67 | } 68 | 69 | #[inline(always)] 70 | pub fn as_gc(ptr: *mut T) -> &'static mut Gc { 71 | unsafe { &mut (*(ptr as GcPtr)) } 72 | } 73 | 74 | #[repr(C)] 75 | #[derive(Debug, Eq, PartialEq, Copy, Clone)] 76 | pub enum GcObjKind { 77 | String, 78 | Number, 79 | 80 | Array, 81 | Dict, 82 | Function, 83 | 84 | UpVal, 85 | 86 | Boolean, 87 | Null, 88 | Undef, 89 | } 90 | 91 | pub type Deinit = fn(ptr: JsObjPtr); 92 | 93 | fn default_deinit(ptr: JsObjPtr) {} 94 | 95 | #[repr(C)] 96 | #[derive(Debug, Clone)] 97 | pub struct GcObj { 98 | ref_cnt: usize, 99 | pub kind: GcObjKind, 100 | gc: GcPtr, 101 | deinit: Deinit, 102 | } 103 | 104 | impl GcObj { 105 | pub fn inc(&mut self) { 106 | self.ref_cnt += 1; 107 | } 108 | 109 | pub fn dec(&mut self) { 110 | as_gc(self.gc).dec(self); 111 | } 112 | 113 | pub fn gc(&self) -> GcPtr { 114 | self.gc 115 | } 116 | } 117 | 118 | #[repr(C)] 119 | #[derive(Debug, Clone)] 120 | pub struct JsString { 121 | base: GcObj, 122 | pub d: String, 123 | } 124 | 125 | fn js_str_deinit(ptr: JsObjPtr) { 126 | let r = as_str(ptr); 127 | let s = mem::size_of_val(r); 128 | unsafe { 129 | drop_in_place(r); 130 | } 131 | as_gc(r.base.gc).heap_size -= s; 132 | } 133 | 134 | impl JsString { 135 | pub fn new(gc: &mut Gc, is_root: bool) -> JsStrPtr { 136 | let ptr = Box::into_raw(Box::new(JsString { 137 | base: GcObj { 138 | ref_cnt: 1, 139 | kind: GcObjKind::String, 140 | gc, 141 | deinit: js_str_deinit, 142 | }, 143 | d: "".to_owned(), 144 | })); 145 | gc.register(as_obj_ptr(ptr), is_root); 146 | ptr 147 | } 148 | } 149 | 150 | #[repr(C)] 151 | #[derive(Debug, Clone)] 152 | pub struct JsNumber { 153 | base: GcObj, 154 | pub d: f64, 155 | } 156 | 157 | fn js_num_deinit(ptr: JsObjPtr) { 158 | let r = as_num(ptr); 159 | let s = mem::size_of_val(r); 160 | unsafe { 161 | drop_in_place(r); 162 | } 163 | as_gc(r.base.gc).heap_size -= s; 164 | } 165 | 166 | impl JsNumber { 167 | fn new(gc: &mut Gc, is_root: bool) -> JsNumPtr { 168 | let ptr = Box::into_raw(Box::new(JsNumber { 169 | base: GcObj { 170 | ref_cnt: 1, 171 | kind: GcObjKind::Number, 172 | deinit: js_num_deinit, 173 | gc, 174 | }, 175 | d: 0.0, 176 | })); 177 | gc.register(as_obj_ptr(ptr), is_root); 178 | ptr 179 | } 180 | } 181 | 182 | #[repr(C)] 183 | #[derive(Debug, Clone)] 184 | pub struct JsBoolean { 185 | base: GcObj, 186 | pub d: bool, 187 | } 188 | 189 | #[repr(C)] 190 | #[derive(Debug, Clone)] 191 | pub struct JsArray { 192 | base: GcObj, 193 | d: Vec, 194 | } 195 | 196 | fn js_arr_deinit(ptr: JsObjPtr) { 197 | let r = as_arr(ptr); 198 | let s = mem::size_of_val(r); 199 | 200 | let gc = as_gc(r.base.gc); 201 | for pp in &r.d { 202 | gc.dec(*pp); 203 | } 204 | 205 | unsafe { 206 | drop_in_place(r); 207 | } 208 | gc.heap_size -= s; 209 | } 210 | 211 | impl JsArray { 212 | fn new(gc: &mut Gc, is_root: bool) -> JsArrPtr { 213 | let ptr = Box::into_raw(Box::new(JsArray { 214 | base: GcObj { 215 | ref_cnt: 1, 216 | kind: GcObjKind::Array, 217 | deinit: js_arr_deinit, 218 | gc, 219 | }, 220 | d: vec![], 221 | })); 222 | gc.register(as_obj_ptr(ptr), is_root); 223 | ptr 224 | } 225 | } 226 | 227 | #[repr(C)] 228 | #[derive(Debug, Clone)] 229 | pub struct JsDict { 230 | base: GcObj, 231 | pub d: HashMap, 232 | } 233 | 234 | fn js_dict_deinit(ptr: JsObjPtr) { 235 | let r = as_dict(ptr); 236 | let s = mem::size_of_val(r); 237 | 238 | let gc = as_gc(r.base.gc); 239 | for (_, pp) in &r.d { 240 | gc.dec(*pp); 241 | } 242 | 243 | unsafe { 244 | drop_in_place(r); 245 | } 246 | gc.heap_size -= s; 247 | } 248 | 249 | impl JsDict { 250 | fn new(gc: &mut Gc, is_root: bool) -> JsDictPtr { 251 | let ptr = Box::into_raw(Box::new(JsDict { 252 | base: GcObj { 253 | ref_cnt: 1, 254 | kind: GcObjKind::Dict, 255 | deinit: js_dict_deinit, 256 | gc, 257 | }, 258 | d: HashMap::new(), 259 | })); 260 | gc.register(as_obj_ptr(ptr), is_root); 261 | ptr 262 | } 263 | 264 | pub fn insert(&mut self, k: &str, v: JsObjPtr) { 265 | self.d.insert(k.to_owned(), v); 266 | } 267 | } 268 | 269 | #[repr(C)] 270 | #[derive(Debug, Clone)] 271 | pub struct UpVal { 272 | base: GcObj, 273 | pub v: JsObjPtr, 274 | } 275 | 276 | fn upval_deinit(ptr: JsObjPtr) { 277 | let r = as_uv(ptr); 278 | let s = mem::size_of_val(r); 279 | 280 | let gc = as_gc(r.base.gc); 281 | gc.dec(r.v); 282 | 283 | unsafe { 284 | drop_in_place(r); 285 | } 286 | gc.heap_size -= s; 287 | } 288 | 289 | impl UpVal { 290 | fn new(gc: &mut Gc, v: JsObjPtr, is_root: bool) -> UpValPtr { 291 | as_obj(v).inc(); 292 | let ptr = Box::into_raw(Box::new(UpVal { 293 | base: GcObj { 294 | ref_cnt: 1, 295 | kind: GcObjKind::UpVal, 296 | deinit: upval_deinit, 297 | gc, 298 | }, 299 | v, 300 | })); 301 | gc.register(as_obj_ptr(ptr), is_root); 302 | ptr 303 | } 304 | } 305 | 306 | #[repr(C)] 307 | #[derive(Debug, Clone)] 308 | pub struct JsFunction { 309 | base: GcObj, 310 | pub f: *const c_void, 311 | pub is_native: bool, 312 | pub upvals: Vec, 313 | pub prototype: JsObjPtr, 314 | } 315 | 316 | fn js_fun_deinit(ptr: JsObjPtr) { 317 | let r = as_fun(ptr); 318 | let s = mem::size_of_val(r); 319 | 320 | r.upvals.iter().for_each(|uv| as_obj(*uv).dec()); 321 | 322 | unsafe { 323 | drop_in_place(r); 324 | } 325 | as_gc(r.base.gc).heap_size -= s; 326 | } 327 | 328 | impl JsFunction { 329 | fn new(gc: &mut Gc, is_root: bool) -> JsFunPtr { 330 | let ptr = Box::into_raw(Box::new(JsFunction { 331 | base: GcObj { 332 | ref_cnt: 1, 333 | kind: GcObjKind::Function, 334 | deinit: js_fun_deinit, 335 | gc, 336 | }, 337 | f: null(), 338 | is_native: false, 339 | upvals: vec![], 340 | prototype: null_mut(), 341 | })); 342 | gc.register(as_obj_ptr(ptr), is_root); 343 | ptr 344 | } 345 | } 346 | 347 | #[repr(C)] 348 | #[derive(Debug)] 349 | pub struct Gc { 350 | heap_size: usize, 351 | max_heap_size: usize, 352 | obj_list: LinkedHashSet, 353 | roots: HashSet, 354 | marked: Vec, 355 | js_null_: JsObjPtr, 356 | js_undef_: JsObjPtr, 357 | js_true_: JsBoolPtr, 358 | js_false_: JsBoolPtr, 359 | } 360 | 361 | static INIT_GC_DATA_ONCE: Once = Once::new(); 362 | 363 | impl Gc { 364 | pub fn new(max_heap_size: usize) -> Box { 365 | let mut gc = Box::new(Gc { 366 | heap_size: 0, 367 | max_heap_size, 368 | obj_list: LinkedHashSet::new(), 369 | roots: HashSet::new(), 370 | marked: vec![], 371 | js_null_: null_mut(), 372 | js_undef_: null_mut(), 373 | js_true_: null_mut(), 374 | js_false_: null_mut(), 375 | }); 376 | gc.init_data(); 377 | gc 378 | } 379 | 380 | fn init_data(&mut self) { 381 | self.js_null_ = Box::into_raw(Box::new(GcObj { 382 | ref_cnt: 1, 383 | kind: GcObjKind::Null, 384 | gc: self as GcPtr, 385 | deinit: default_deinit, 386 | })); 387 | 388 | self.js_undef_ = Box::into_raw(Box::new(GcObj { 389 | ref_cnt: 1, 390 | kind: GcObjKind::Undef, 391 | gc: self as GcPtr, 392 | deinit: default_deinit, 393 | })); 394 | 395 | self.js_true_ = Box::into_raw(Box::new(JsBoolean { 396 | base: GcObj { 397 | ref_cnt: 1, 398 | kind: GcObjKind::Boolean, 399 | gc: self as GcPtr, 400 | deinit: default_deinit, 401 | }, 402 | d: true, 403 | })); 404 | 405 | self.js_false_ = Box::into_raw(Box::new(JsBoolean { 406 | base: GcObj { 407 | ref_cnt: 1, 408 | kind: GcObjKind::Boolean, 409 | gc: self as GcPtr, 410 | deinit: default_deinit, 411 | }, 412 | d: false, 413 | })); 414 | } 415 | 416 | pub fn js_null(&self) -> JsObjPtr { 417 | self.js_null_ 418 | } 419 | 420 | pub fn js_undef(&self) -> JsObjPtr { 421 | self.js_undef_ 422 | } 423 | 424 | pub fn js_true(&self) -> JsBoolPtr { 425 | self.js_true_ 426 | } 427 | 428 | pub fn js_false(&self) -> JsBoolPtr { 429 | self.js_false_ 430 | } 431 | 432 | pub fn new_str(&mut self, is_root: bool) -> JsStrPtr { 433 | let need = mem::size_of::(); 434 | self.xgc(need); 435 | self.heap_size += need; 436 | JsString::new(self, is_root) 437 | } 438 | 439 | pub fn new_str_from_kst(&mut self, kst: &Const, is_root: bool) -> JsStrPtr { 440 | let s = self.new_str(is_root); 441 | as_str(s).d = kst.str().to_owned(); 442 | s 443 | } 444 | 445 | pub fn new_obj_from_kst(&mut self, kst: &Const, is_root: bool) -> JsObjPtr { 446 | match kst { 447 | Const::String(_) => as_obj_ptr(self.new_str_from_kst(kst, is_root)), 448 | Const::Number(_) => as_obj_ptr(self.new_num_from_kst(kst, is_root)), 449 | } 450 | } 451 | 452 | pub fn new_num(&mut self, is_root: bool) -> JsNumPtr { 453 | let need = mem::size_of::(); 454 | self.xgc(need); 455 | self.heap_size += need; 456 | JsNumber::new(self, is_root) 457 | } 458 | 459 | pub fn new_num_from_kst(&mut self, kst: &Const, is_root: bool) -> JsNumPtr { 460 | let n = self.new_num(is_root); 461 | as_num(n).d = kst.num(); 462 | n 463 | } 464 | 465 | pub fn new_arr(&mut self, is_root: bool) -> JsArrPtr { 466 | let need = mem::size_of::(); 467 | self.xgc(need); 468 | self.heap_size += need; 469 | JsArray::new(self, is_root) 470 | } 471 | 472 | pub fn new_dict(&mut self, is_root: bool) -> JsDictPtr { 473 | let need = mem::size_of::(); 474 | self.xgc(need); 475 | self.heap_size += need; 476 | JsDict::new(self, is_root) 477 | } 478 | 479 | pub fn new_fun(&mut self, is_root: bool) -> JsFunPtr { 480 | let need = mem::size_of::(); 481 | self.xgc(need); 482 | self.heap_size += need; 483 | JsFunction::new(self, is_root) 484 | } 485 | 486 | pub fn new_upval(&mut self, v: JsObjPtr, is_root: bool) -> UpValPtr { 487 | let need = mem::size_of::(); 488 | self.xgc(need); 489 | self.heap_size += need; 490 | UpVal::new(self, v, is_root) 491 | } 492 | 493 | pub fn register(&mut self, ptr: JsObjPtr, is_root: bool) { 494 | self.obj_list.insert(ptr); 495 | if is_root { 496 | self.roots.insert(ptr); 497 | } 498 | } 499 | 500 | pub fn remove_root(&mut self, ptr: JsObjPtr) { 501 | self.roots.remove(&as_obj_ptr(ptr)); 502 | } 503 | 504 | pub fn append_root(&mut self, ptr: JsObjPtr) { 505 | self.roots.insert(ptr); 506 | } 507 | 508 | pub fn inc(&self, ptr: *mut T) { 509 | as_obj(ptr).ref_cnt += 1; 510 | } 511 | 512 | pub fn dec(&mut self, ptr: *mut T) { 513 | let ptr = ptr as JsObjPtr; 514 | if !self.obj_list.contains(&ptr) { 515 | return; 516 | } 517 | 518 | let obj = as_obj(ptr); 519 | match obj.kind { 520 | GcObjKind::Null | GcObjKind::Undef | GcObjKind::Boolean => return, 521 | _ => (), 522 | } 523 | let em = format!("dropping dangling object {:#?} {:#?}", ptr, obj); 524 | assert!(obj.ref_cnt > 0, em); 525 | obj.ref_cnt -= 1; 526 | if obj.ref_cnt == 0 { 527 | self.drop(ptr); 528 | } 529 | } 530 | 531 | pub fn drop(&mut self, ptr: *mut T) { 532 | let ptr = ptr as JsObjPtr; 533 | let obj = as_obj(ptr); 534 | self.obj_list.remove(&ptr); 535 | self.roots.remove(&ptr); 536 | (obj.deinit)(obj); 537 | } 538 | 539 | pub fn xgc(&mut self, need: usize) { 540 | let s = self.heap_size + need; 541 | if s >= self.max_heap_size { 542 | self.gc(); 543 | assert!(self.heap_size + need <= self.max_heap_size); 544 | } 545 | } 546 | 547 | fn reset_all_ref_cnt(&self) { 548 | for pp in &self.obj_list { 549 | as_obj(*pp).ref_cnt = 0; 550 | } 551 | } 552 | 553 | fn mark_phase(&mut self) { 554 | for root in &self.roots { 555 | self.marked.push(*root); 556 | } 557 | while let Some(ptr) = self.marked.pop() { 558 | let obj = as_obj(ptr); 559 | obj.ref_cnt += 1; 560 | if obj.ref_cnt == 1 { 561 | match obj.kind { 562 | GcObjKind::String 563 | | GcObjKind::Number 564 | | GcObjKind::Boolean 565 | | GcObjKind::Undef 566 | | GcObjKind::Null => (), 567 | GcObjKind::Array => { 568 | for pp in &as_arr(obj).d { 569 | self.marked.push(*pp) 570 | } 571 | } 572 | GcObjKind::Dict => { 573 | for (_, pp) in &as_dict(obj).d { 574 | self.marked.push(*pp) 575 | } 576 | } 577 | GcObjKind::UpVal => { 578 | self.marked.push(as_uv(obj).v); 579 | } 580 | GcObjKind::Function => { 581 | for pp in &as_fun(obj).upvals { 582 | self.marked.push(as_obj_ptr(*pp)) 583 | } 584 | } 585 | } 586 | } 587 | } 588 | } 589 | 590 | fn sweep_phase(&mut self) { 591 | let mut cc = vec![]; 592 | for pp in &self.obj_list { 593 | let ptr = *pp; 594 | if as_obj(ptr).ref_cnt == 0 { 595 | cc.push(ptr); 596 | } 597 | } 598 | cc.iter().for_each(|ptr| { 599 | self.drop(*ptr); 600 | }); 601 | } 602 | 603 | pub fn gc(&mut self) { 604 | self.reset_all_ref_cnt(); 605 | self.mark_phase(); 606 | self.sweep_phase(); 607 | } 608 | } 609 | 610 | impl Drop for Gc { 611 | fn drop(&mut self) { 612 | for p in self.obj_list.clone() { 613 | if self.obj_list.contains(&p) { 614 | self.dec(p); 615 | } 616 | } 617 | } 618 | } 619 | 620 | pub struct LocalScope { 621 | vals: HashSet, 622 | } 623 | 624 | impl LocalScope { 625 | pub fn new() -> Self { 626 | LocalScope { 627 | vals: HashSet::new(), 628 | } 629 | } 630 | 631 | pub fn reg(&mut self, v: *mut T) -> JsObjPtr { 632 | let v = v as JsObjPtr; 633 | self.vals.insert(v); 634 | v 635 | } 636 | } 637 | 638 | impl Drop for LocalScope { 639 | fn drop(&mut self) { 640 | self.vals.iter().for_each(|v| as_obj(*v).dec()) 641 | } 642 | } 643 | 644 | #[cfg(test)] 645 | mod gc_tests { 646 | use super::*; 647 | 648 | #[test] 649 | fn str_test() { 650 | let mut gc = Gc::new(100); 651 | let str1 = gc.new_str(true); 652 | gc.dec(str1); 653 | let str2 = gc.new_str(true); 654 | assert_eq!(gc.obj_list.len(), 1); 655 | } 656 | 657 | #[test] 658 | fn arr_test() { 659 | let mut gc = Gc::new(100); 660 | let arr = gc.new_arr(true); 661 | gc.dec(arr); 662 | assert_eq!(gc.obj_list.len(), 0); 663 | } 664 | 665 | #[test] 666 | fn dict_test() { 667 | let mut gc = Gc::new(300); 668 | let dict = gc.new_dict(true); 669 | let str1 = gc.new_str(false); 670 | let arr = gc.new_arr(false); 671 | as_dict(dict).insert("test", as_obj_ptr(arr)); 672 | assert_eq!(gc.obj_list.len(), 3); 673 | 674 | gc.dec(dict); 675 | gc.gc(); 676 | assert_eq!(gc.obj_list.len(), 0); 677 | } 678 | } 679 | -------------------------------------------------------------------------------- /src/lib/token.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{HashMap, HashSet}; 2 | use std::sync::Once; 3 | 4 | #[derive(Debug, Copy, Clone)] 5 | pub struct Position { 6 | pub line: i32, 7 | pub column: i32, 8 | } 9 | 10 | impl Position { 11 | pub fn new() -> Position { 12 | Position { line: 0, column: 0 } 13 | } 14 | } 15 | 16 | #[derive(Debug, Copy, Clone)] 17 | pub struct SourceLoc { 18 | pub start: Position, 19 | pub end: Position, 20 | } 21 | 22 | impl SourceLoc { 23 | pub fn new() -> Self { 24 | SourceLoc { 25 | start: Position::new(), 26 | end: Position::new(), 27 | } 28 | } 29 | } 30 | 31 | #[derive(Debug, Copy, Clone)] 32 | pub struct KeywordData { 33 | pub kind: Keyword, 34 | pub loc: SourceLoc, 35 | } 36 | #[derive(Debug, Eq, PartialEq, Hash, Copy, Clone)] 37 | pub enum Keyword { 38 | Break, 39 | Do, 40 | Instanceof, 41 | Typeof, 42 | Case, 43 | Else, 44 | New, 45 | Var, 46 | Catch, 47 | Finally, 48 | Return, 49 | Void, 50 | Continue, 51 | For, 52 | Switch, 53 | While, 54 | Debugger, 55 | Function, 56 | This, 57 | With, 58 | Default, 59 | If, 60 | Throw, 61 | Delete, 62 | In, 63 | Try, 64 | // future reserved words 65 | Class, 66 | Enum, 67 | Extends, 68 | Super, 69 | Const, 70 | Export, 71 | Import, 72 | } 73 | 74 | // `$be` denotes the `beforeExpr` attribute of token. 75 | // Since the grammar of regexp of js is a island grammar when see it at the point of view of the whole js grammar, 76 | // we have two opportunities to process that island grammar, either at tokenizing phase or parsing phase. 77 | // 78 | // Here we use a manner taken from [acorn](https://github.com/acornjs/acorn/blob/master/acorn/src/tokentype.js), 79 | // it attaches a `beforeExpr` attribute to each token type to indicate that the slashes after those tokens 80 | // would be the beginning of regexp if the value of their `beforeExpr` attributes are `true`, 81 | // it works at tokenizing phase therefore it can obey the definition of regexp to produce RegExprLiteral tokens. 82 | macro_rules! gen_map { 83 | ($($k:expr => $v:expr, $be:expr)*) => { 84 | { 85 | let mut s = HashSet::new(); 86 | $( 87 | s.insert($v); 88 | )* 89 | let mut kv = HashMap::new(); 90 | $( 91 | kv.insert($k, $v); 92 | )* 93 | let mut vk = HashMap::new(); 94 | $( 95 | vk.insert($v, $k); 96 | )* 97 | let mut be = HashSet::new(); 98 | $( 99 | be.insert($k); 100 | )* 101 | (Some(s), Some(kv), Some(vk), Some(be)) 102 | } 103 | }; 104 | } 105 | 106 | macro_rules! gen_map_syb { 107 | ($($k:expr => $v:expr, $be:expr, $pcd:expr)*) => { 108 | { 109 | let (s, kv, vk, be) = gen_map! { 110 | $( 111 | $k => $v, $be 112 | )* 113 | }; 114 | let mut pcdm = HashMap::new(); 115 | $( 116 | pcdm.insert($k, $pcd); 117 | )* 118 | (s, kv, vk, be, Some(pcdm)) 119 | } 120 | }; 121 | } 122 | 123 | static mut KEYWORDS_SET: Option> = None; 124 | static mut KEYWORDS_KEY_NAME: Option> = None; 125 | static mut KEYWORDS_NAME_KEY: Option> = None; 126 | static mut KEYWORDS_BEFORE_EXPR_SET: Option> = None; 127 | fn init_keywords() { 128 | let (s, kv, vk, be) = gen_map! { 129 | Keyword::Break => "break", false 130 | Keyword::Do => "do", true 131 | Keyword::Instanceof => "instanceof", true 132 | Keyword::Typeof => "typeof", true 133 | Keyword::Case => "case", true 134 | Keyword::Else => "else", true 135 | Keyword::New => "new", true 136 | Keyword::Var => "var", false 137 | Keyword::Catch => "catch", false 138 | Keyword::Finally => "finally", false 139 | Keyword::Return => "return", true 140 | Keyword::Void => "void", true 141 | Keyword::Continue => "continue", false 142 | Keyword::For => "for", false 143 | Keyword::Switch => "switch", false 144 | Keyword::While => "while", false 145 | Keyword::Debugger => "debugger", false 146 | Keyword::Function => "function", false 147 | Keyword::This => "this", false 148 | Keyword::With => "with", false 149 | Keyword::Default => "default", true 150 | Keyword::If => "if", false 151 | Keyword::Throw => "throw", true 152 | Keyword::Delete => "delete", true 153 | Keyword::In => "in", true 154 | Keyword::Try => "try", false 155 | // future reserved words 156 | Keyword::Class => "class", false 157 | Keyword::Enum => "enum", false 158 | Keyword::Extends => "extends", true 159 | Keyword::Super => "super", false 160 | Keyword::Const => "const", false 161 | Keyword::Export => "export", false 162 | Keyword::Import => "import", false 163 | }; 164 | unsafe { 165 | KEYWORDS_SET = s; 166 | KEYWORDS_KEY_NAME = kv; 167 | KEYWORDS_NAME_KEY = vk; 168 | KEYWORDS_BEFORE_EXPR_SET = be; 169 | } 170 | } 171 | pub fn is_keyword(s: &str) -> bool { 172 | unsafe { KEYWORDS_SET.as_ref().unwrap().contains(s) } 173 | } 174 | pub fn keyword_to_name(v: &Keyword) -> &'static str { 175 | unsafe { KEYWORDS_KEY_NAME.as_ref().unwrap().get(v).unwrap() } 176 | } 177 | pub fn name_to_keyword(s: &str) -> Keyword { 178 | unsafe { 179 | KEYWORDS_NAME_KEY 180 | .as_ref() 181 | .unwrap() 182 | .get(s) 183 | .unwrap() 184 | .to_owned() 185 | } 186 | } 187 | impl Keyword { 188 | pub fn name(&self) -> &'static str { 189 | keyword_to_name(self) 190 | } 191 | 192 | pub fn is_before_expr(&self) -> bool { 193 | unsafe { KEYWORDS_BEFORE_EXPR_SET.as_ref().unwrap().contains(self) } 194 | } 195 | } 196 | 197 | #[derive(Debug, Eq, PartialEq, Hash, Copy, Clone)] 198 | pub enum Symbol { 199 | BraceL, 200 | BraceR, 201 | ParenL, 202 | ParenR, 203 | BracketL, 204 | BracketR, 205 | Dot, 206 | Semi, 207 | Comma, 208 | BinOpStart, 209 | LT, 210 | GT, 211 | LE, 212 | GE, 213 | Eq, 214 | NotEq, 215 | EqStrict, 216 | NotEqStrict, 217 | Add, 218 | Sub, 219 | Mul, 220 | Div, 221 | Mod, 222 | Inc, 223 | Dec, 224 | SHL, 225 | SAR, 226 | SHR, 227 | BitAnd, 228 | BitOr, 229 | BitXor, 230 | Not, 231 | BitNot, 232 | And, 233 | Or, 234 | BinOpEnd, 235 | Conditional, 236 | Colon, 237 | AssignStart, 238 | Assign, 239 | AssignAdd, 240 | AssignSub, 241 | AssignMul, 242 | AssignDiv, 243 | AssignMod, 244 | AssignSHL, 245 | AssignSAR, 246 | AssignSHR, 247 | AssignBitAnd, 248 | AssignBitOr, 249 | AssignBitXor, 250 | AssignEnd, 251 | } 252 | 253 | #[derive(Debug, Clone)] 254 | pub struct SymbolData { 255 | pub kind: Symbol, 256 | pub loc: SourceLoc, 257 | } 258 | 259 | static mut SYMBOLS_SET: Option> = None; 260 | static mut SYMBOLS_KEY_NAME: Option> = None; 261 | static mut SYMBOLS_NAME_KEY: Option> = None; 262 | static mut SYMBOLS_BEFORE_EXPR_SET: Option> = None; 263 | static mut SYMBOLS_KEY_PRECEDENCE: Option> = None; 264 | fn init_symbols() { 265 | let (s, kv, vk, be, pcd) = gen_map_syb! { 266 | Symbol::BraceL => "{", true, 0 267 | Symbol::BraceR => "}", false, 0 268 | Symbol::ParenL => "(", true, 20 269 | Symbol::ParenR => ")", false, 0 270 | Symbol::BracketL => "[", true, 19 271 | Symbol::BracketR => "]", false, 0 272 | Symbol::Dot => ".", true, 19 273 | Symbol::Semi => ";", true, 0 274 | Symbol::Comma => ",", true, 1 275 | Symbol::LT => "<", true, 11 276 | Symbol::GT => ">", true, 11 277 | Symbol::LE => "<=", true, 11 278 | Symbol::GE => ">=", true, 11 279 | Symbol::Eq => "==", true, 10 280 | Symbol::NotEq => "!=", true, 10 281 | Symbol::EqStrict => "===", true, 10 282 | Symbol::NotEqStrict => "!==", true, 10 283 | Symbol::Add => "+", true, 13 284 | Symbol::Sub => "-", true, 13 285 | Symbol::Mul => "*", true, 14 286 | Symbol::Div => "/", true, 14 287 | Symbol::Mod => "%", true, 14 288 | Symbol::Inc => "++", false, 16 289 | Symbol::Dec => "--", false, 16 290 | Symbol::SHL => "<<", true, 12 291 | Symbol::SAR => ">>", true, 12 292 | Symbol::SHR => ">>>", true, 12 293 | Symbol::BitAnd => "&", true, 9 294 | Symbol::BitOr => "|", true, 7 295 | Symbol::BitXor => "^", true, 8 296 | Symbol::Not => "!", true, 16 297 | Symbol::BitNot => "~", true, 16 298 | Symbol::And => "&&", true, 6 299 | Symbol::Or => "||", true, 5 300 | Symbol::Conditional => "?", true, 4 301 | Symbol::Colon => ":", true, 4 302 | Symbol::Assign => "=", true, 3 303 | Symbol::AssignAdd => "+=", true, 3 304 | Symbol::AssignSub => "-=", true, 3 305 | Symbol::AssignMul => "*=", true, 3 306 | Symbol::AssignDiv => "/=", true, 3 307 | Symbol::AssignMod => "%=", true, 3 308 | Symbol::AssignSHL => "<<=", true, 3 309 | Symbol::AssignSAR => ">>=", true, 3 310 | Symbol::AssignSHR => ">>>=", true, 3 311 | Symbol::AssignBitAnd => "&=", true, 3 312 | Symbol::AssignBitOr => "|=", true, 3 313 | Symbol::AssignBitXor => "^=", true, 3 314 | }; 315 | unsafe { 316 | SYMBOLS_SET = s; 317 | SYMBOLS_KEY_NAME = kv; 318 | SYMBOLS_NAME_KEY = vk; 319 | SYMBOLS_BEFORE_EXPR_SET = be; 320 | SYMBOLS_KEY_PRECEDENCE = pcd; 321 | } 322 | } 323 | pub fn is_symbol(s: &str) -> bool { 324 | unsafe { SYMBOLS_SET.as_ref().unwrap().contains(s) } 325 | } 326 | pub fn symbol_to_name(v: &Symbol) -> &'static str { 327 | unsafe { SYMBOLS_KEY_NAME.as_ref().unwrap().get(v).unwrap() } 328 | } 329 | pub fn symbol_pcd(v: &Symbol) -> i32 { 330 | unsafe { 331 | SYMBOLS_KEY_PRECEDENCE 332 | .as_ref() 333 | .unwrap() 334 | .get(v) 335 | .unwrap() 336 | .to_owned() 337 | } 338 | } 339 | pub fn name_to_symbol(s: &str) -> Symbol { 340 | unsafe { 341 | SYMBOLS_NAME_KEY 342 | .as_ref() 343 | .unwrap() 344 | .get(s) 345 | .unwrap() 346 | .to_owned() 347 | } 348 | } 349 | impl Symbol { 350 | pub fn name(&self) -> &'static str { 351 | symbol_to_name(self) 352 | } 353 | 354 | pub fn is_before_expr(&self) -> bool { 355 | unsafe { SYMBOLS_BEFORE_EXPR_SET.as_ref().unwrap().contains(self) } 356 | } 357 | } 358 | 359 | #[derive(Debug, Eq, PartialEq, Hash, Copy, Clone)] 360 | pub enum CtxKeyword { 361 | Implements, 362 | Let, 363 | Private, 364 | Public, 365 | Interface, 366 | Package, 367 | Protected, 368 | Static, 369 | Yield, 370 | } 371 | 372 | #[derive(Debug, Clone)] 373 | pub struct CtxKeywordData { 374 | pub kind: CtxKeyword, 375 | pub loc: SourceLoc, 376 | } 377 | 378 | static mut CTX_KEYWORD_SET: Option> = None; 379 | static mut CTX_KEYWORD_KEY_NAME: Option> = None; 380 | static mut CTX_KEYWORD_NAME_KEY: Option> = None; 381 | static mut CTX_KEYWORD_BEFORE_EXPR_SET: Option> = None; 382 | fn init_ctx_keyword() { 383 | let (s, kv, vk, be) = gen_map! { 384 | CtxKeyword::Implements => "implements", false 385 | CtxKeyword::Let => "let", false 386 | CtxKeyword::Private => "private", false 387 | CtxKeyword::Public => "public", false 388 | CtxKeyword::Interface => "interface", false 389 | CtxKeyword::Package => "package", false 390 | CtxKeyword::Protected => "protected", false 391 | CtxKeyword::Static => "static", false 392 | CtxKeyword::Yield => "yield", true 393 | 394 | }; 395 | unsafe { 396 | CTX_KEYWORD_SET = s; 397 | CTX_KEYWORD_KEY_NAME = kv; 398 | CTX_KEYWORD_NAME_KEY = vk; 399 | CTX_KEYWORD_BEFORE_EXPR_SET = be; 400 | } 401 | } 402 | pub fn is_ctx_keyword(s: &str) -> bool { 403 | unsafe { CTX_KEYWORD_SET.as_ref().unwrap().contains(s) } 404 | } 405 | pub fn ctx_keyword_to_name(v: &CtxKeyword) -> &'static str { 406 | unsafe { CTX_KEYWORD_KEY_NAME.as_ref().unwrap().get(v).unwrap() } 407 | } 408 | pub fn name_to_ctx_keyword(s: &str) -> CtxKeyword { 409 | unsafe { 410 | CTX_KEYWORD_NAME_KEY 411 | .as_ref() 412 | .unwrap() 413 | .get(s) 414 | .unwrap() 415 | .to_owned() 416 | } 417 | } 418 | impl CtxKeyword { 419 | pub fn name(&self) -> &'static str { 420 | ctx_keyword_to_name(self) 421 | } 422 | 423 | pub fn is_before_expr(&self) -> bool { 424 | unsafe { CTX_KEYWORD_BEFORE_EXPR_SET.as_ref().unwrap().contains(self) } 425 | } 426 | } 427 | 428 | #[derive(Debug, Clone)] 429 | pub struct IdentifierData { 430 | pub value: String, 431 | pub loc: SourceLoc, 432 | } 433 | 434 | #[derive(Debug, Clone)] 435 | pub struct NullLiteralData { 436 | pub loc: SourceLoc, 437 | } 438 | pub fn is_null(s: &str) -> bool { 439 | s == "null" 440 | } 441 | 442 | #[derive(Debug, Eq, PartialEq, Clone)] 443 | pub enum BooleanLiteral { 444 | True, 445 | False, 446 | } 447 | 448 | #[derive(Debug, Clone)] 449 | pub struct BooleanLiteralData { 450 | pub kind: BooleanLiteral, 451 | pub loc: SourceLoc, 452 | } 453 | pub fn is_bool(s: &str) -> bool { 454 | s == "true" || s == "false" 455 | } 456 | pub fn name_to_bool(s: &str) -> BooleanLiteral { 457 | match s { 458 | "true" => BooleanLiteral::True, 459 | "false" => BooleanLiteral::False, 460 | _ => panic!(), 461 | } 462 | } 463 | impl BooleanLiteral { 464 | pub fn name(&self) -> &'static str { 465 | match self { 466 | BooleanLiteral::True => "true", 467 | BooleanLiteral::False => "false", 468 | } 469 | } 470 | } 471 | 472 | #[derive(Debug, Clone)] 473 | pub struct StringLiteralData { 474 | pub value: String, 475 | pub loc: SourceLoc, 476 | } 477 | 478 | #[derive(Debug, Clone)] 479 | pub struct NumericLiteralData { 480 | pub value: String, 481 | pub loc: SourceLoc, 482 | } 483 | 484 | #[derive(Debug, Clone)] 485 | pub struct RegExpLiteralData { 486 | pub value: String, 487 | pub loc: SourceLoc, 488 | } 489 | 490 | #[derive(Debug, Clone)] 491 | pub struct EofData { 492 | pub loc: SourceLoc, 493 | } 494 | 495 | #[derive(Debug, Clone)] 496 | pub enum Token { 497 | Keyword(KeywordData), 498 | Symbol(SymbolData), 499 | ContextualKeyword(CtxKeywordData), 500 | Identifier(IdentifierData), 501 | NullLiteral(NullLiteralData), 502 | BooleanLiteral(BooleanLiteralData), 503 | StringLiteral(StringLiteralData), 504 | NumericLiteral(NumericLiteralData), 505 | RegExpLiteral(RegExpLiteralData), 506 | Eof(EofData), 507 | Nil, 508 | } 509 | 510 | impl Token { 511 | pub fn is_keyword(&self) -> bool { 512 | match self { 513 | Token::Keyword(_) => true, 514 | _ => false, 515 | } 516 | } 517 | 518 | pub fn is_keyword_kind(&self, k: Keyword) -> bool { 519 | match self { 520 | Token::Keyword(data) => data.kind == k, 521 | _ => false, 522 | } 523 | } 524 | 525 | pub fn is_keyword_kind_in(&self, ks: &Vec) -> bool { 526 | match self { 527 | Token::Keyword(d) => ks.contains(&d.kind), 528 | _ => false, 529 | } 530 | } 531 | 532 | pub fn is_keyword_bin(&self, not_in: bool) -> bool { 533 | match self { 534 | Token::Keyword(d) => !not_in && d.kind == Keyword::In, 535 | _ => false, 536 | } 537 | } 538 | 539 | pub fn keyword_pcd(&self) -> i32 { 540 | match self { 541 | Token::Keyword(d) => match d.kind { 542 | Keyword::In => 11, 543 | _ => -1, 544 | }, 545 | _ => -1, 546 | } 547 | } 548 | 549 | pub fn keyword_data(&self) -> &KeywordData { 550 | match self { 551 | Token::Keyword(data) => data, 552 | _ => panic!(), 553 | } 554 | } 555 | 556 | pub fn is_symbol(&self) -> bool { 557 | match self { 558 | Token::Symbol(_) => true, 559 | _ => false, 560 | } 561 | } 562 | 563 | pub fn symbol_pcd(&self) -> i32 { 564 | match self { 565 | Token::Symbol(s) => symbol_pcd(&s.kind), 566 | _ => panic!(), 567 | } 568 | } 569 | 570 | pub fn is_symbol_bin(&self) -> bool { 571 | match self { 572 | Token::Symbol(s) => { 573 | let s = s.kind as i32; 574 | let start = Symbol::BinOpStart as i32; 575 | let end = Symbol::BinOpEnd as i32; 576 | s > start && s < end 577 | } 578 | _ => false, 579 | } 580 | } 581 | 582 | pub fn is_symbol_assign(&self) -> bool { 583 | match self { 584 | Token::Symbol(s) => { 585 | let s = s.kind as i32; 586 | let start = Symbol::AssignStart as i32; 587 | let end = Symbol::AssignEnd as i32; 588 | s > start && s < end 589 | } 590 | _ => false, 591 | } 592 | } 593 | 594 | pub fn is_symbol_kind(&self, k: Symbol) -> bool { 595 | match self { 596 | Token::Symbol(s) => s.kind == k, 597 | _ => false, 598 | } 599 | } 600 | 601 | pub fn is_symbol_kind_in(&self, ks: &Vec) -> bool { 602 | match self { 603 | Token::Symbol(d) => ks.contains(&d.kind), 604 | _ => false, 605 | } 606 | } 607 | 608 | pub fn symbol_data(&self) -> &SymbolData { 609 | match self { 610 | Token::Symbol(data) => data, 611 | _ => panic!(), 612 | } 613 | } 614 | 615 | pub fn is_ctx_keyword(&self) -> bool { 616 | match self { 617 | Token::ContextualKeyword(_) => true, 618 | _ => false, 619 | } 620 | } 621 | 622 | pub fn ctx_keyword_data(&self) -> &CtxKeywordData { 623 | match self { 624 | Token::ContextualKeyword(data) => data, 625 | _ => panic!(), 626 | } 627 | } 628 | 629 | pub fn is_id(&self) -> bool { 630 | match self { 631 | Token::Identifier(v) => !v.value.eq("undefined"), 632 | _ => false, 633 | } 634 | } 635 | 636 | pub fn id_data(&self) -> &IdentifierData { 637 | match self { 638 | Token::Identifier(data) => data, 639 | _ => panic!(), 640 | } 641 | } 642 | 643 | pub fn is_undef(&self) -> bool { 644 | match self { 645 | Token::Identifier(v) => v.value.eq("undefined"), 646 | _ => false, 647 | } 648 | } 649 | 650 | pub fn undef_data(&self) -> &IdentifierData { 651 | match self { 652 | Token::Identifier(data) => data, 653 | _ => panic!(), 654 | } 655 | } 656 | 657 | pub fn is_null(&self) -> bool { 658 | match self { 659 | Token::NullLiteral(_) => true, 660 | _ => false, 661 | } 662 | } 663 | 664 | pub fn null_data(&self) -> &NullLiteralData { 665 | match self { 666 | Token::NullLiteral(data) => data, 667 | _ => panic!(), 668 | } 669 | } 670 | 671 | pub fn is_bool(&self) -> bool { 672 | match self { 673 | Token::BooleanLiteral(_) => true, 674 | _ => false, 675 | } 676 | } 677 | 678 | pub fn bool_data(&self) -> &BooleanLiteralData { 679 | match self { 680 | Token::BooleanLiteral(data) => data, 681 | _ => panic!(), 682 | } 683 | } 684 | 685 | pub fn is_str(&self) -> bool { 686 | match self { 687 | Token::StringLiteral(_) => true, 688 | _ => false, 689 | } 690 | } 691 | 692 | pub fn str_data(&self) -> &StringLiteralData { 693 | match self { 694 | Token::StringLiteral(data) => data, 695 | _ => panic!(), 696 | } 697 | } 698 | 699 | pub fn is_num(&self) -> bool { 700 | match self { 701 | Token::NumericLiteral(_) => true, 702 | _ => false, 703 | } 704 | } 705 | 706 | pub fn num_data(&self) -> &NumericLiteralData { 707 | match self { 708 | Token::NumericLiteral(data) => data, 709 | _ => panic!(), 710 | } 711 | } 712 | 713 | pub fn is_regexp(&self) -> bool { 714 | match self { 715 | Token::RegExpLiteral(_) => true, 716 | _ => false, 717 | } 718 | } 719 | 720 | pub fn regexp_data(&self) -> &RegExpLiteralData { 721 | match self { 722 | Token::RegExpLiteral(data) => data, 723 | _ => panic!(), 724 | } 725 | } 726 | 727 | pub fn is_before_expr(&self) -> bool { 728 | match self { 729 | Token::Keyword(data) => data.kind.is_before_expr(), 730 | Token::Symbol(data) => data.kind.is_before_expr(), 731 | Token::ContextualKeyword(data) => data.kind.is_before_expr(), 732 | _ => false, 733 | } 734 | } 735 | 736 | pub fn is_eof(&self) -> bool { 737 | match self { 738 | Token::Eof(_) => true, 739 | _ => false, 740 | } 741 | } 742 | 743 | pub fn loc(&self) -> &SourceLoc { 744 | match self { 745 | Token::Keyword(data) => &data.loc, 746 | Token::Symbol(data) => &data.loc, 747 | Token::ContextualKeyword(data) => &data.loc, 748 | Token::Identifier(data) => &data.loc, 749 | Token::NullLiteral(data) => &data.loc, 750 | Token::BooleanLiteral(data) => &data.loc, 751 | Token::StringLiteral(data) => &data.loc, 752 | Token::NumericLiteral(data) => &data.loc, 753 | Token::RegExpLiteral(data) => &data.loc, 754 | Token::Eof(data) => &data.loc, 755 | Token::Nil => panic!(), 756 | } 757 | } 758 | } 759 | 760 | static INIT_TOKEN_DATA_ONCE: Once = Once::new(); 761 | pub fn init_token_data() { 762 | INIT_TOKEN_DATA_ONCE.call_once(|| { 763 | init_keywords(); 764 | init_symbols(); 765 | init_ctx_keyword(); 766 | }); 767 | } 768 | 769 | #[cfg(test)] 770 | mod token_tests { 771 | use super::*; 772 | 773 | #[test] 774 | fn name() { 775 | init_token_data(); 776 | assert_eq!("break", Keyword::Break.name()); 777 | assert_eq!("}", Symbol::BraceR.name()); 778 | assert_eq!("yield", CtxKeyword::Yield.name()); 779 | } 780 | 781 | #[test] 782 | fn static_map_set() { 783 | init_token_data(); 784 | assert!(is_keyword("break")); 785 | assert!(is_symbol("{")); 786 | assert!(is_ctx_keyword("implements")); 787 | assert!(Symbol::Assign.is_before_expr()); 788 | assert!(Keyword::Return.is_before_expr()); 789 | assert!(CtxKeyword::Yield.is_before_expr()); 790 | } 791 | } 792 | -------------------------------------------------------------------------------- /src/lib/ast.rs: -------------------------------------------------------------------------------- 1 | use crate::token::*; 2 | use std::rc::Rc; 3 | 4 | #[derive(Debug)] 5 | pub enum Literal { 6 | RegExp(RegExpData), 7 | Null(NullData), 8 | Undef(UndefData), 9 | String(StringData), 10 | Bool(BoolData), 11 | Numeric(NumericData), 12 | } 13 | 14 | impl Literal { 15 | pub fn is_regexp(&self) -> bool { 16 | match self { 17 | Literal::RegExp(_) => true, 18 | _ => false, 19 | } 20 | } 21 | 22 | pub fn is_null(&self) -> bool { 23 | match self { 24 | Literal::Null(_) => true, 25 | _ => false, 26 | } 27 | } 28 | 29 | pub fn is_str(&self) -> bool { 30 | match self { 31 | Literal::String(_) => true, 32 | _ => false, 33 | } 34 | } 35 | 36 | pub fn is_bool(&self) -> bool { 37 | match self { 38 | Literal::Bool(_) => true, 39 | _ => false, 40 | } 41 | } 42 | 43 | pub fn is_num(&self) -> bool { 44 | match self { 45 | Literal::Numeric(_) => true, 46 | _ => false, 47 | } 48 | } 49 | 50 | pub fn regexp(&self) -> &RegExpData { 51 | match self { 52 | Literal::RegExp(d) => d, 53 | _ => panic!(), 54 | } 55 | } 56 | 57 | pub fn null(&self) -> &NullData { 58 | match self { 59 | Literal::Null(d) => d, 60 | _ => panic!(), 61 | } 62 | } 63 | 64 | pub fn str(&self) -> &StringData { 65 | match self { 66 | Literal::String(d) => d, 67 | _ => panic!(), 68 | } 69 | } 70 | 71 | pub fn bool(&self) -> &BoolData { 72 | match self { 73 | Literal::Bool(d) => d, 74 | _ => panic!(), 75 | } 76 | } 77 | 78 | pub fn num(&self) -> &NumericData { 79 | match self { 80 | Literal::Numeric(d) => d, 81 | _ => panic!(), 82 | } 83 | } 84 | 85 | pub fn loc(&self) -> &SourceLoc { 86 | match self { 87 | Literal::RegExp(d) => &d.loc, 88 | Literal::Null(d) => &d.loc, 89 | Literal::Undef(d) => &d.loc, 90 | Literal::Numeric(d) => &d.loc, 91 | Literal::String(d) => &d.loc, 92 | Literal::Bool(d) => &d.loc, 93 | } 94 | } 95 | } 96 | 97 | impl From for Literal { 98 | fn from(f: RegExpData) -> Self { 99 | Literal::RegExp(f) 100 | } 101 | } 102 | 103 | impl From for Literal { 104 | fn from(f: NullData) -> Self { 105 | Literal::Null(f) 106 | } 107 | } 108 | 109 | impl From for Literal { 110 | fn from(f: UndefData) -> Self { 111 | Literal::Undef(f) 112 | } 113 | } 114 | 115 | impl From for Literal { 116 | fn from(f: StringData) -> Self { 117 | Literal::String(f) 118 | } 119 | } 120 | 121 | impl From for Literal { 122 | fn from(f: BoolData) -> Self { 123 | Literal::Bool(f) 124 | } 125 | } 126 | 127 | impl From for Literal { 128 | fn from(f: NumericData) -> Self { 129 | Literal::Numeric(f) 130 | } 131 | } 132 | 133 | #[derive(Debug)] 134 | pub struct FnDec { 135 | pub loc: SourceLoc, 136 | pub id: Option, 137 | pub params: Vec, 138 | pub body: Stmt, 139 | } 140 | 141 | #[derive(Debug)] 142 | pub enum PrimaryExpr { 143 | This(ThisExprData), 144 | Identifier(IdData), 145 | Literal(Literal), 146 | ArrayLiteral(ArrayData), 147 | ObjectLiteral(ObjectData), 148 | Parenthesized(ParenData), 149 | Function(Rc), 150 | } 151 | 152 | impl PrimaryExpr { 153 | pub fn is_id(&self) -> bool { 154 | match self { 155 | PrimaryExpr::Identifier(_) => true, 156 | _ => false, 157 | } 158 | } 159 | 160 | pub fn is_this(&self) -> bool { 161 | match self { 162 | PrimaryExpr::This(_) => true, 163 | _ => false, 164 | } 165 | } 166 | 167 | pub fn is_literal(&self) -> bool { 168 | match self { 169 | PrimaryExpr::Literal(_) => true, 170 | _ => false, 171 | } 172 | } 173 | 174 | pub fn is_array(&self) -> bool { 175 | match self { 176 | PrimaryExpr::ArrayLiteral(_) => true, 177 | _ => false, 178 | } 179 | } 180 | 181 | pub fn is_object(&self) -> bool { 182 | match self { 183 | PrimaryExpr::ObjectLiteral(_) => true, 184 | _ => false, 185 | } 186 | } 187 | 188 | pub fn is_paren(&self) -> bool { 189 | match self { 190 | PrimaryExpr::Parenthesized(_) => true, 191 | _ => false, 192 | } 193 | } 194 | 195 | pub fn is_fn(&self) -> bool { 196 | match self { 197 | PrimaryExpr::Function(_) => true, 198 | _ => false, 199 | } 200 | } 201 | 202 | pub fn this(&self) -> &ThisExprData { 203 | match self { 204 | PrimaryExpr::This(d) => d, 205 | _ => panic!(), 206 | } 207 | } 208 | 209 | pub fn literal(&self) -> &Literal { 210 | match self { 211 | PrimaryExpr::Literal(d) => d, 212 | _ => panic!(), 213 | } 214 | } 215 | 216 | pub fn array(&self) -> &ArrayData { 217 | match self { 218 | PrimaryExpr::ArrayLiteral(d) => d, 219 | _ => panic!(), 220 | } 221 | } 222 | 223 | pub fn object(&self) -> &ObjectData { 224 | match self { 225 | PrimaryExpr::ObjectLiteral(d) => d, 226 | _ => panic!(), 227 | } 228 | } 229 | 230 | pub fn paren(&self) -> &ParenData { 231 | match self { 232 | PrimaryExpr::Parenthesized(d) => d, 233 | _ => panic!(), 234 | } 235 | } 236 | 237 | pub fn id(&self) -> &IdData { 238 | match self { 239 | PrimaryExpr::Identifier(d) => d, 240 | _ => panic!(), 241 | } 242 | } 243 | 244 | pub fn fn_expr(&self) -> &Rc { 245 | match self { 246 | PrimaryExpr::Function(expr) => expr, 247 | _ => panic!(), 248 | } 249 | } 250 | 251 | pub fn loc(&self) -> &SourceLoc { 252 | match self { 253 | PrimaryExpr::This(d) => &d.loc, 254 | PrimaryExpr::Identifier(d) => &d.loc, 255 | PrimaryExpr::Literal(d) => &d.loc(), 256 | PrimaryExpr::ArrayLiteral(d) => &d.loc, 257 | PrimaryExpr::ObjectLiteral(d) => &d.loc, 258 | PrimaryExpr::Parenthesized(d) => &d.loc, 259 | PrimaryExpr::Function(d) => &d.loc, 260 | } 261 | } 262 | } 263 | 264 | impl From for PrimaryExpr { 265 | fn from(f: ThisExprData) -> Self { 266 | PrimaryExpr::This(f) 267 | } 268 | } 269 | 270 | impl From for PrimaryExpr { 271 | fn from(f: IdData) -> Self { 272 | PrimaryExpr::Identifier(f) 273 | } 274 | } 275 | 276 | impl From for PrimaryExpr { 277 | fn from(f: Literal) -> Self { 278 | PrimaryExpr::Literal(f) 279 | } 280 | } 281 | 282 | impl From for PrimaryExpr { 283 | fn from(f: StringData) -> Self { 284 | let node: Literal = f.into(); 285 | node.into() 286 | } 287 | } 288 | 289 | impl From for PrimaryExpr { 290 | fn from(f: NullData) -> Self { 291 | let node: Literal = f.into(); 292 | node.into() 293 | } 294 | } 295 | 296 | impl From for PrimaryExpr { 297 | fn from(f: UndefData) -> Self { 298 | let node: Literal = f.into(); 299 | node.into() 300 | } 301 | } 302 | 303 | impl From for PrimaryExpr { 304 | fn from(f: RegExpData) -> Self { 305 | let node: Literal = f.into(); 306 | node.into() 307 | } 308 | } 309 | 310 | impl From for PrimaryExpr { 311 | fn from(f: BoolData) -> Self { 312 | let node: Literal = f.into(); 313 | node.into() 314 | } 315 | } 316 | 317 | impl From for PrimaryExpr { 318 | fn from(f: NumericData) -> Self { 319 | let node: Literal = f.into(); 320 | node.into() 321 | } 322 | } 323 | 324 | impl From for PrimaryExpr { 325 | fn from(f: ArrayData) -> Self { 326 | PrimaryExpr::ArrayLiteral(f) 327 | } 328 | } 329 | 330 | impl From for PrimaryExpr { 331 | fn from(f: FnDec) -> Self { 332 | let expr = Rc::new(f); 333 | PrimaryExpr::Function(expr) 334 | } 335 | } 336 | 337 | impl From for PrimaryExpr { 338 | fn from(f: ParenData) -> Self { 339 | PrimaryExpr::Parenthesized(f) 340 | } 341 | } 342 | 343 | #[derive(Debug)] 344 | pub struct UnaryExpr { 345 | pub loc: SourceLoc, 346 | pub op: Token, 347 | pub argument: Expr, 348 | pub prefix: bool, 349 | } 350 | 351 | #[derive(Debug)] 352 | pub struct BinaryExpr { 353 | pub loc: SourceLoc, 354 | pub op: Token, 355 | pub left: Expr, 356 | pub right: Expr, 357 | } 358 | 359 | #[derive(Debug)] 360 | pub struct MemberExpr { 361 | pub loc: SourceLoc, 362 | pub object: Expr, 363 | pub property: Expr, 364 | pub computed: bool, 365 | } 366 | 367 | #[derive(Debug)] 368 | pub struct NewExpr { 369 | pub loc: SourceLoc, 370 | pub callee: Expr, 371 | pub arguments: Vec, 372 | } 373 | 374 | #[derive(Debug)] 375 | pub struct CallExpr { 376 | pub loc: SourceLoc, 377 | pub callee: Expr, 378 | pub arguments: Vec, 379 | } 380 | 381 | #[derive(Debug)] 382 | pub struct CondExpr { 383 | pub loc: SourceLoc, 384 | pub test: Expr, 385 | pub cons: Expr, 386 | pub alt: Expr, 387 | } 388 | 389 | #[derive(Debug)] 390 | pub struct AssignExpr { 391 | pub loc: SourceLoc, 392 | pub op: Token, 393 | pub left: Expr, 394 | pub right: Expr, 395 | } 396 | 397 | #[derive(Debug)] 398 | pub struct SeqExpr { 399 | pub loc: SourceLoc, 400 | pub exprs: Vec, 401 | } 402 | 403 | #[derive(Debug, Clone)] 404 | pub enum Expr { 405 | Primary(Rc), 406 | Member(Rc), 407 | New(Rc), 408 | Call(Rc), 409 | Unary(Rc), 410 | Binary(Rc), 411 | Assignment(Rc), 412 | Conditional(Rc), 413 | Sequence(Rc), 414 | } 415 | 416 | impl Expr { 417 | pub fn is_unary(&self) -> bool { 418 | match self { 419 | Expr::Unary(_) => true, 420 | _ => false, 421 | } 422 | } 423 | 424 | pub fn is_member(&self) -> bool { 425 | match self { 426 | Expr::Member(_) => true, 427 | _ => false, 428 | } 429 | } 430 | 431 | pub fn is_new(&self) -> bool { 432 | match self { 433 | Expr::New(_) => true, 434 | _ => false, 435 | } 436 | } 437 | 438 | pub fn is_call(&self) -> bool { 439 | match self { 440 | Expr::Call(_) => true, 441 | _ => false, 442 | } 443 | } 444 | 445 | pub fn is_bin(&self) -> bool { 446 | match self { 447 | Expr::Binary(_) => true, 448 | _ => false, 449 | } 450 | } 451 | 452 | pub fn is_cond(&self) -> bool { 453 | match self { 454 | Expr::Conditional(_) => true, 455 | _ => false, 456 | } 457 | } 458 | 459 | pub fn is_assign(&self) -> bool { 460 | match self { 461 | Expr::Assignment(_) => true, 462 | _ => false, 463 | } 464 | } 465 | 466 | pub fn is_seq(&self) -> bool { 467 | match self { 468 | Expr::Sequence(_) => true, 469 | _ => false, 470 | } 471 | } 472 | 473 | pub fn primary(&self) -> &PrimaryExpr { 474 | match self { 475 | Expr::Primary(expr) => expr, 476 | _ => panic!(), 477 | } 478 | } 479 | 480 | pub fn member(&self) -> &MemberExpr { 481 | match self { 482 | Expr::Member(expr) => expr, 483 | _ => panic!(), 484 | } 485 | } 486 | 487 | pub fn unary(&self) -> &UnaryExpr { 488 | match self { 489 | Expr::Unary(expr) => expr, 490 | _ => panic!(), 491 | } 492 | } 493 | 494 | pub fn new_expr(&self) -> &NewExpr { 495 | match self { 496 | Expr::New(expr) => expr, 497 | _ => panic!(), 498 | } 499 | } 500 | 501 | pub fn call_expr(&self) -> &CallExpr { 502 | match self { 503 | Expr::Call(expr) => expr, 504 | _ => panic!(), 505 | } 506 | } 507 | 508 | pub fn bin_expr(&self) -> &BinaryExpr { 509 | match self { 510 | Expr::Binary(expr) => expr, 511 | _ => panic!(), 512 | } 513 | } 514 | 515 | pub fn cond_expr(&self) -> &CondExpr { 516 | match self { 517 | Expr::Conditional(expr) => expr, 518 | _ => panic!(), 519 | } 520 | } 521 | 522 | pub fn assign_expr(&self) -> &AssignExpr { 523 | match self { 524 | Expr::Assignment(expr) => expr, 525 | _ => panic!(), 526 | } 527 | } 528 | 529 | pub fn seq_expr(&self) -> &SeqExpr { 530 | match self { 531 | Expr::Sequence(expr) => expr, 532 | _ => panic!(), 533 | } 534 | } 535 | } 536 | 537 | impl From for Expr { 538 | fn from(f: UnaryExpr) -> Self { 539 | Expr::Unary(Rc::new(f)) 540 | } 541 | } 542 | 543 | impl From for Expr { 544 | fn from(f: PrimaryExpr) -> Self { 545 | let expr = Rc::new(f); 546 | Expr::Primary(expr) 547 | } 548 | } 549 | 550 | impl From for Expr { 551 | fn from(f: NewExpr) -> Self { 552 | let expr = Rc::new(f); 553 | Expr::New(expr) 554 | } 555 | } 556 | 557 | impl From for Expr { 558 | fn from(f: CallExpr) -> Self { 559 | let expr = Rc::new(f); 560 | Expr::Call(expr) 561 | } 562 | } 563 | 564 | impl From for Expr { 565 | fn from(f: BinaryExpr) -> Self { 566 | let expr = Rc::new(f); 567 | Expr::Binary(expr) 568 | } 569 | } 570 | 571 | impl From for Expr { 572 | fn from(f: CondExpr) -> Self { 573 | let expr = Rc::new(f); 574 | Expr::Conditional(expr) 575 | } 576 | } 577 | 578 | impl From for Expr { 579 | fn from(f: AssignExpr) -> Self { 580 | let expr = Rc::new(f); 581 | Expr::Assignment(expr) 582 | } 583 | } 584 | 585 | impl From for Expr { 586 | fn from(f: SeqExpr) -> Self { 587 | let expr = Rc::new(f); 588 | Expr::Sequence(expr) 589 | } 590 | } 591 | 592 | #[derive(Debug)] 593 | pub struct ThisExprData { 594 | pub loc: SourceLoc, 595 | } 596 | 597 | impl ThisExprData { 598 | pub fn new(loc: SourceLoc) -> Self { 599 | ThisExprData { loc } 600 | } 601 | } 602 | 603 | #[derive(Debug)] 604 | pub struct IdData { 605 | pub loc: SourceLoc, 606 | pub name: String, 607 | } 608 | 609 | impl IdData { 610 | pub fn new(loc: SourceLoc, name: String) -> Self { 611 | IdData { loc, name } 612 | } 613 | } 614 | 615 | #[derive(Debug)] 616 | pub struct RegExpData { 617 | pub loc: SourceLoc, 618 | pub value: String, 619 | } 620 | 621 | impl RegExpData { 622 | pub fn new(loc: SourceLoc, value: String) -> Self { 623 | RegExpData { loc, value } 624 | } 625 | } 626 | 627 | #[derive(Debug)] 628 | pub struct NullData { 629 | pub loc: SourceLoc, 630 | } 631 | 632 | impl NullData { 633 | pub fn new(loc: SourceLoc) -> Self { 634 | NullData { loc } 635 | } 636 | } 637 | 638 | #[derive(Debug)] 639 | pub struct UndefData { 640 | pub loc: SourceLoc, 641 | } 642 | 643 | impl UndefData { 644 | pub fn new(loc: SourceLoc) -> Self { 645 | UndefData { loc } 646 | } 647 | } 648 | 649 | #[derive(Debug)] 650 | pub struct StringData { 651 | pub loc: SourceLoc, 652 | pub value: String, 653 | } 654 | 655 | impl StringData { 656 | pub fn new(loc: SourceLoc, value: String) -> Self { 657 | StringData { loc, value } 658 | } 659 | } 660 | 661 | #[derive(Debug)] 662 | pub struct BoolData { 663 | pub loc: SourceLoc, 664 | pub value: bool, 665 | } 666 | 667 | impl BoolData { 668 | pub fn new(loc: SourceLoc, value: bool) -> Self { 669 | BoolData { loc, value } 670 | } 671 | } 672 | 673 | #[derive(Debug)] 674 | pub struct NumericData { 675 | pub loc: SourceLoc, 676 | pub value: String, 677 | } 678 | 679 | impl NumericData { 680 | pub fn new(loc: SourceLoc, value: String) -> Self { 681 | NumericData { loc, value } 682 | } 683 | } 684 | 685 | #[derive(Debug)] 686 | pub struct ArrayData { 687 | pub loc: SourceLoc, 688 | pub value: Vec, 689 | } 690 | 691 | #[derive(Debug)] 692 | pub struct ObjectProperty { 693 | pub loc: SourceLoc, 694 | pub key: Expr, 695 | pub value: Expr, 696 | } 697 | 698 | #[derive(Debug)] 699 | pub struct ObjectData { 700 | pub loc: SourceLoc, 701 | pub properties: Vec, 702 | } 703 | 704 | #[derive(Debug)] 705 | pub struct ParenData { 706 | pub loc: SourceLoc, 707 | pub value: Expr, 708 | } 709 | 710 | #[derive(Debug)] 711 | pub struct ExprStmt { 712 | pub expr: Expr, 713 | } 714 | 715 | #[derive(Debug, Clone)] 716 | pub struct BlockStmt { 717 | pub loc: SourceLoc, 718 | pub body: Vec, 719 | } 720 | 721 | #[derive(Debug)] 722 | pub struct VarDecor { 723 | pub id: PrimaryExpr, 724 | pub init: Option, 725 | } 726 | 727 | #[derive(Debug)] 728 | pub struct VarDec { 729 | pub loc: SourceLoc, 730 | pub decs: Vec, 731 | } 732 | 733 | #[derive(Debug)] 734 | pub struct EmptyStmt { 735 | pub loc: SourceLoc, 736 | } 737 | 738 | #[derive(Debug)] 739 | pub struct IfStmt { 740 | pub loc: SourceLoc, 741 | pub test: Expr, 742 | pub cons: Stmt, 743 | pub alt: Option, 744 | } 745 | 746 | #[derive(Debug)] 747 | pub enum ForFirst { 748 | VarDec(Rc), 749 | Expr(Expr), 750 | } 751 | 752 | #[derive(Debug)] 753 | pub struct ForStmt { 754 | pub loc: SourceLoc, 755 | pub init: Option, 756 | pub test: Option, 757 | pub update: Option, 758 | pub body: Stmt, 759 | } 760 | 761 | #[derive(Debug)] 762 | pub struct ForInStmt { 763 | pub loc: SourceLoc, 764 | pub left: ForFirst, 765 | pub right: Expr, 766 | pub body: Stmt, 767 | } 768 | 769 | #[derive(Debug)] 770 | pub struct DoWhileStmt { 771 | pub loc: SourceLoc, 772 | pub test: Expr, 773 | pub body: Stmt, 774 | } 775 | 776 | #[derive(Debug)] 777 | pub struct WhileStmt { 778 | pub loc: SourceLoc, 779 | pub test: Expr, 780 | pub body: Stmt, 781 | } 782 | 783 | #[derive(Debug)] 784 | pub struct ContStmt { 785 | pub loc: SourceLoc, 786 | } 787 | 788 | #[derive(Debug)] 789 | pub struct BreakStmt { 790 | pub loc: SourceLoc, 791 | } 792 | 793 | #[derive(Debug)] 794 | pub struct ReturnStmt { 795 | pub loc: SourceLoc, 796 | pub argument: Option, 797 | } 798 | 799 | #[derive(Debug)] 800 | pub struct WithStmt { 801 | pub loc: SourceLoc, 802 | pub object: Expr, 803 | pub body: Stmt, 804 | } 805 | 806 | #[derive(Debug)] 807 | pub struct SwitchCase { 808 | pub test: Option, 809 | pub cons: Vec, 810 | } 811 | 812 | #[derive(Debug)] 813 | pub struct SwitchStmt { 814 | pub loc: SourceLoc, 815 | pub discrim: Expr, 816 | pub cases: Vec, 817 | } 818 | 819 | #[derive(Debug)] 820 | pub struct ThrowStmt { 821 | pub loc: SourceLoc, 822 | pub argument: Expr, 823 | } 824 | 825 | #[derive(Debug)] 826 | pub struct CatchClause { 827 | pub id: PrimaryExpr, 828 | pub body: Stmt, 829 | } 830 | 831 | #[derive(Debug)] 832 | pub struct TryStmt { 833 | pub loc: SourceLoc, 834 | pub block: Stmt, 835 | pub handler: Option, 836 | pub finalizer: Option, 837 | } 838 | 839 | #[derive(Debug)] 840 | pub struct DebugStmt { 841 | pub loc: SourceLoc, 842 | } 843 | 844 | #[derive(Debug, Clone)] 845 | pub enum Stmt { 846 | Block(Rc), 847 | VarDec(Rc), 848 | Empty(Rc), 849 | Expr(Rc), 850 | If(Rc), 851 | For(Rc), 852 | ForIn(Rc), 853 | DoWhile(Rc), 854 | While(Rc), 855 | Cont(Rc), 856 | Break(Rc), 857 | Return(Rc), 858 | With(Rc), 859 | Switch(Rc), 860 | Throw(Rc), 861 | Try(Rc), 862 | Debugger(Rc), 863 | Function(Rc), 864 | } 865 | 866 | impl From for Stmt { 867 | fn from(f: BlockStmt) -> Self { 868 | let expr = Rc::new(f); 869 | Stmt::Block(expr) 870 | } 871 | } 872 | 873 | impl From for Stmt { 874 | fn from(f: ExprStmt) -> Self { 875 | let expr = Rc::new(f); 876 | Stmt::Expr(expr) 877 | } 878 | } 879 | 880 | impl From for Stmt { 881 | fn from(f: VarDec) -> Self { 882 | let expr = Rc::new(f); 883 | Stmt::VarDec(expr) 884 | } 885 | } 886 | 887 | impl From for Stmt { 888 | fn from(f: IfStmt) -> Self { 889 | let expr = Rc::new(f); 890 | Stmt::If(expr) 891 | } 892 | } 893 | 894 | impl From for Stmt { 895 | fn from(f: ForInStmt) -> Self { 896 | let expr = Rc::new(f); 897 | Stmt::ForIn(expr) 898 | } 899 | } 900 | 901 | impl From for Stmt { 902 | fn from(f: ForStmt) -> Self { 903 | let expr = Rc::new(f); 904 | Stmt::For(expr) 905 | } 906 | } 907 | 908 | impl From for Stmt { 909 | fn from(f: DoWhileStmt) -> Self { 910 | let expr = Rc::new(f); 911 | Stmt::DoWhile(expr) 912 | } 913 | } 914 | 915 | impl From for Stmt { 916 | fn from(f: WhileStmt) -> Self { 917 | let expr = Rc::new(f); 918 | Stmt::While(expr) 919 | } 920 | } 921 | 922 | impl From for Stmt { 923 | fn from(f: ContStmt) -> Self { 924 | let expr = Rc::new(f); 925 | Stmt::Cont(expr) 926 | } 927 | } 928 | 929 | impl From for Stmt { 930 | fn from(f: BreakStmt) -> Self { 931 | let expr = Rc::new(f); 932 | Stmt::Break(expr) 933 | } 934 | } 935 | 936 | impl From for Stmt { 937 | fn from(f: ReturnStmt) -> Self { 938 | let expr = Rc::new(f); 939 | Stmt::Return(expr) 940 | } 941 | } 942 | 943 | impl From for Stmt { 944 | fn from(f: EmptyStmt) -> Self { 945 | let expr = Rc::new(f); 946 | Stmt::Empty(expr) 947 | } 948 | } 949 | 950 | impl From for Stmt { 951 | fn from(f: WithStmt) -> Self { 952 | let expr = Rc::new(f); 953 | Stmt::With(expr) 954 | } 955 | } 956 | 957 | impl From for Stmt { 958 | fn from(f: SwitchStmt) -> Self { 959 | let expr = Rc::new(f); 960 | Stmt::Switch(expr) 961 | } 962 | } 963 | 964 | impl From for Stmt { 965 | fn from(f: DebugStmt) -> Self { 966 | let expr = Rc::new(f); 967 | Stmt::Debugger(expr) 968 | } 969 | } 970 | 971 | impl From for Stmt { 972 | fn from(f: TryStmt) -> Self { 973 | let expr = Rc::new(f); 974 | Stmt::Try(expr) 975 | } 976 | } 977 | 978 | impl From for Stmt { 979 | fn from(f: ThrowStmt) -> Self { 980 | let expr = Rc::new(f); 981 | Stmt::Throw(expr) 982 | } 983 | } 984 | 985 | impl From for Stmt { 986 | fn from(f: FnDec) -> Self { 987 | let expr = Rc::new(f); 988 | Stmt::Function(expr) 989 | } 990 | } 991 | 992 | impl Stmt { 993 | pub fn is_block(&self) -> bool { 994 | match self { 995 | Stmt::Block(_) => true, 996 | _ => false, 997 | } 998 | } 999 | 1000 | pub fn is_expr(&self) -> bool { 1001 | match self { 1002 | Stmt::Expr(_) => true, 1003 | _ => false, 1004 | } 1005 | } 1006 | 1007 | pub fn is_var(&self) -> bool { 1008 | match self { 1009 | Stmt::VarDec(_) => true, 1010 | _ => false, 1011 | } 1012 | } 1013 | 1014 | pub fn is_if(&self) -> bool { 1015 | match self { 1016 | Stmt::If(_) => true, 1017 | _ => false, 1018 | } 1019 | } 1020 | 1021 | pub fn is_for(&self) -> bool { 1022 | match self { 1023 | Stmt::For(_) => true, 1024 | _ => false, 1025 | } 1026 | } 1027 | 1028 | pub fn is_for_in(&self) -> bool { 1029 | match self { 1030 | Stmt::ForIn(_) => true, 1031 | _ => false, 1032 | } 1033 | } 1034 | 1035 | pub fn is_do_while(&self) -> bool { 1036 | match self { 1037 | Stmt::DoWhile(_) => true, 1038 | _ => false, 1039 | } 1040 | } 1041 | 1042 | pub fn is_while_stmt(&self) -> bool { 1043 | match self { 1044 | Stmt::While(_) => true, 1045 | _ => false, 1046 | } 1047 | } 1048 | 1049 | pub fn is_ret(&self) -> bool { 1050 | match self { 1051 | Stmt::Return(_) => true, 1052 | _ => false, 1053 | } 1054 | } 1055 | 1056 | pub fn is_cont(&self) -> bool { 1057 | match self { 1058 | Stmt::Cont(_) => true, 1059 | _ => false, 1060 | } 1061 | } 1062 | 1063 | pub fn is_break(&self) -> bool { 1064 | match self { 1065 | Stmt::Break(_) => true, 1066 | _ => false, 1067 | } 1068 | } 1069 | 1070 | pub fn is_empty(&self) -> bool { 1071 | match self { 1072 | Stmt::Empty(_) => true, 1073 | _ => false, 1074 | } 1075 | } 1076 | 1077 | pub fn is_with(&self) -> bool { 1078 | match self { 1079 | Stmt::With(_) => true, 1080 | _ => false, 1081 | } 1082 | } 1083 | 1084 | pub fn is_switch(&self) -> bool { 1085 | match self { 1086 | Stmt::Switch(_) => true, 1087 | _ => false, 1088 | } 1089 | } 1090 | 1091 | pub fn is_debug(&self) -> bool { 1092 | match self { 1093 | Stmt::Debugger(_) => true, 1094 | _ => false, 1095 | } 1096 | } 1097 | 1098 | pub fn is_try(&self) -> bool { 1099 | match self { 1100 | Stmt::Try(_) => true, 1101 | _ => false, 1102 | } 1103 | } 1104 | 1105 | pub fn is_throw(&self) -> bool { 1106 | match self { 1107 | Stmt::Throw(_) => true, 1108 | _ => false, 1109 | } 1110 | } 1111 | 1112 | pub fn is_fn(&self) -> bool { 1113 | match self { 1114 | Stmt::Function(_) => true, 1115 | _ => false, 1116 | } 1117 | } 1118 | 1119 | pub fn block(&self) -> &BlockStmt { 1120 | match self { 1121 | Stmt::Block(s) => s, 1122 | _ => panic!(), 1123 | } 1124 | } 1125 | 1126 | pub fn expr(&self) -> &ExprStmt { 1127 | match self { 1128 | Stmt::Expr(s) => s, 1129 | _ => panic!(), 1130 | } 1131 | } 1132 | 1133 | pub fn var_dec(&self) -> &VarDec { 1134 | match self { 1135 | Stmt::VarDec(s) => s, 1136 | _ => panic!(), 1137 | } 1138 | } 1139 | 1140 | pub fn if_stmt(&self) -> &IfStmt { 1141 | match self { 1142 | Stmt::If(s) => s, 1143 | _ => panic!(), 1144 | } 1145 | } 1146 | 1147 | pub fn for_stmt(&self) -> &ForStmt { 1148 | match self { 1149 | Stmt::For(s) => s, 1150 | _ => panic!(), 1151 | } 1152 | } 1153 | 1154 | pub fn for_in(&self) -> &ForInStmt { 1155 | match self { 1156 | Stmt::ForIn(s) => s, 1157 | _ => panic!(), 1158 | } 1159 | } 1160 | 1161 | pub fn do_while(&self) -> &DoWhileStmt { 1162 | match self { 1163 | Stmt::DoWhile(s) => s, 1164 | _ => panic!(), 1165 | } 1166 | } 1167 | 1168 | pub fn while_stmt(&self) -> &WhileStmt { 1169 | match self { 1170 | Stmt::While(s) => s, 1171 | _ => panic!(), 1172 | } 1173 | } 1174 | 1175 | pub fn cont(&self) -> &ContStmt { 1176 | match self { 1177 | Stmt::Cont(s) => s, 1178 | _ => panic!(), 1179 | } 1180 | } 1181 | 1182 | pub fn break_stmt(&self) -> &BreakStmt { 1183 | match self { 1184 | Stmt::Break(s) => s, 1185 | _ => panic!(), 1186 | } 1187 | } 1188 | 1189 | pub fn ret_stmt(&self) -> &ReturnStmt { 1190 | match self { 1191 | Stmt::Return(s) => s, 1192 | _ => panic!(), 1193 | } 1194 | } 1195 | 1196 | pub fn empty(&self) -> &EmptyStmt { 1197 | match self { 1198 | Stmt::Empty(s) => s, 1199 | _ => panic!(), 1200 | } 1201 | } 1202 | 1203 | pub fn with_stmt(&self) -> &WithStmt { 1204 | match self { 1205 | Stmt::With(s) => s, 1206 | _ => panic!(), 1207 | } 1208 | } 1209 | 1210 | pub fn switch_stmt(&self) -> &SwitchStmt { 1211 | match self { 1212 | Stmt::Switch(s) => s, 1213 | _ => panic!(), 1214 | } 1215 | } 1216 | 1217 | pub fn debug_stmt(&self) -> &DebugStmt { 1218 | match self { 1219 | Stmt::Debugger(s) => s, 1220 | _ => panic!(), 1221 | } 1222 | } 1223 | 1224 | pub fn try_stmt(&self) -> &TryStmt { 1225 | match self { 1226 | Stmt::Try(s) => s, 1227 | _ => panic!(), 1228 | } 1229 | } 1230 | 1231 | pub fn throw_stmt(&self) -> &ThrowStmt { 1232 | match self { 1233 | Stmt::Throw(s) => s, 1234 | _ => panic!(), 1235 | } 1236 | } 1237 | 1238 | pub fn fn_dec(&self) -> &FnDec { 1239 | match self { 1240 | Stmt::Function(s) => s, 1241 | _ => panic!(), 1242 | } 1243 | } 1244 | } 1245 | 1246 | #[derive(Debug)] 1247 | pub struct Prog { 1248 | pub body: Vec, 1249 | } 1250 | -------------------------------------------------------------------------------- /src/lib/vm/exec.rs: -------------------------------------------------------------------------------- 1 | use crate::asm::chunk::*; 2 | use crate::vm::gc::*; 3 | use crate::vm::obj::*; 4 | use std::collections::{HashMap, HashSet}; 5 | use std::mem; 6 | use std::os::raw::c_void; 7 | use std::ptr::{drop_in_place, null_mut}; 8 | 9 | #[derive(Debug)] 10 | pub struct RuntimeError { 11 | pub msg: String, 12 | } 13 | 14 | impl RuntimeError { 15 | fn new(msg: &str) -> Self { 16 | RuntimeError { 17 | msg: msg.to_owned(), 18 | } 19 | } 20 | } 21 | 22 | pub type CallInfoPtr = *mut CallInfo; 23 | 24 | #[inline(always)] 25 | pub fn as_ci(ptr: *mut T) -> &'static mut CallInfo { 26 | unsafe { &mut (*(ptr as CallInfoPtr)) } 27 | } 28 | 29 | #[repr(C)] 30 | #[derive(Debug, Clone)] 31 | pub struct CallInfo { 32 | prev: CallInfoPtr, 33 | next: CallInfoPtr, 34 | pc: u32, 35 | fun: u32, 36 | base: u32, 37 | is_native: bool, 38 | open_upvals: HashMap, 39 | is_new: bool, 40 | this: JsObjPtr, 41 | } 42 | 43 | impl CallInfo { 44 | pub fn new() -> CallInfoPtr { 45 | Box::into_raw(Box::new(CallInfo { 46 | prev: null_mut(), 47 | next: null_mut(), 48 | pc: 0, 49 | fun: 0, 50 | base: 0, 51 | is_native: false, 52 | open_upvals: HashMap::new(), 53 | is_new: false, 54 | this: null_mut(), 55 | })) 56 | } 57 | 58 | pub fn set_this(&mut self, this: JsObjPtr) { 59 | if !self.this.is_null() { 60 | as_obj(self.this).dec(); 61 | } 62 | if !this.is_null() { 63 | as_obj(this).inc(); 64 | } 65 | self.this = this; 66 | } 67 | } 68 | 69 | impl Drop for CallInfo { 70 | fn drop(&mut self) { 71 | if !self.this.is_null() { 72 | as_obj(self.this).dec(); 73 | } 74 | } 75 | } 76 | 77 | pub enum RKValue { 78 | Kst(&'static Const), 79 | JsObj(JsObjPtr), 80 | } 81 | 82 | pub type VmPtr = *mut Vm; 83 | 84 | #[inline(always)] 85 | pub fn as_vm(ptr: *mut T) -> &'static mut Vm { 86 | unsafe { &mut (*(ptr as VmPtr)) } 87 | } 88 | 89 | pub struct Vm { 90 | gc: Box, 91 | c: Chunk, 92 | ci: CallInfoPtr, 93 | s: Vec, 94 | pub env: JsDictPtr, 95 | } 96 | 97 | impl JsFunction { 98 | fn tpl(&self) -> &FnTpl { 99 | unsafe { &(*(self.f as *const FnTpl)) } 100 | } 101 | } 102 | 103 | impl Gc { 104 | pub fn new_fun_native(&mut self, f: NativeFn, is_root: bool) -> JsFunPtr { 105 | let nf = self.new_fun(is_root); 106 | as_fun(nf).is_native = true; 107 | as_fun(nf).f = f as *const c_void; 108 | nf 109 | } 110 | } 111 | 112 | pub type NativeFn = fn(vm: VmPtr); 113 | 114 | fn print_obj(vm: VmPtr) { 115 | let args = as_vm(vm).get_args(); 116 | args.iter().for_each(|arg| match as_obj(*arg).kind { 117 | GcObjKind::Undef => println!("undefined"), 118 | GcObjKind::Number => println!("{:#?}", as_num(*arg).d), 119 | GcObjKind::String => println!("{:#?}", as_str(*arg).d), 120 | GcObjKind::Function => println!("{:#?}", as_fun(*arg)), 121 | _ => (), 122 | }); 123 | } 124 | 125 | impl Vm { 126 | pub fn new(c: Chunk, max_heap_size: usize) -> Box { 127 | let mut v = Box::new(Vm { 128 | gc: Gc::new(max_heap_size), 129 | c, 130 | ci: CallInfo::new(), 131 | s: vec![], 132 | env: null_mut(), 133 | }); 134 | v.env = v.gc.new_dict(true); 135 | 136 | let print_fn = v.gc.new_fun(false); 137 | as_fun(print_fn).is_native = true; 138 | as_fun(print_fn).f = print_obj as *const c_void;; 139 | as_obj(print_fn).inc(); 140 | as_dict(v.env).insert("print", as_obj_ptr(print_fn)); 141 | 142 | v.s.push(v.env as GcObjPtr); 143 | 144 | let f = v.gc.new_fun(true); 145 | let fr = as_fun(f); 146 | fr.f = &v.c.fun_tpl as *const FnTpl as *const c_void; 147 | fr.is_native = false; 148 | v.s.push(f as GcObjPtr); 149 | 150 | let uv = v.gc.new_upval(as_obj_ptr(v.env), false); 151 | as_ci(v.ci).fun = 1; 152 | as_ci(v.ci).base = 2; 153 | as_ci(v.ci).open_upvals.insert(0, uv); 154 | 155 | as_obj(uv).inc(); 156 | fr.upvals.insert(0, uv); 157 | 158 | v 159 | } 160 | 161 | fn set_stack_slot(&mut self, i: u32, v: JsObjPtr) { 162 | self.gc.inc(v); 163 | let i = i as usize; 164 | if i > self.s.len() - 1 { 165 | self.s.resize(i + 1, self.gc.js_undef()); 166 | } 167 | match self.s.get(i) { 168 | Some(old) => { 169 | if !(*old).is_null() { 170 | self.gc.remove_root(*old); 171 | self.gc.dec(*old); 172 | } 173 | } 174 | _ => (), 175 | } 176 | self.gc.append_root(v); 177 | self.s[i] = v; 178 | } 179 | 180 | fn get_fn(&self) -> Option { 181 | if self.ci.is_null() { 182 | return None; 183 | } 184 | let fi = as_ci(self.ci).fun as usize; 185 | match self.s.get(fi) { 186 | Some(f) => Some(*f as JsFunPtr), 187 | _ => None, 188 | } 189 | } 190 | 191 | fn fetch(&mut self) -> Result, RuntimeError> { 192 | let f = match self.get_fn() { 193 | Some(f) => f, 194 | _ => return Ok(None), 195 | }; 196 | let f = as_fun(f); 197 | if f.is_native { 198 | return Err(RuntimeError::new("using native fun as js")); 199 | } 200 | let pc = as_ci(self.ci).pc as usize; 201 | as_ci(self.ci).pc += 1; 202 | let code = &f.tpl().code; 203 | Ok(code.get(pc)) 204 | } 205 | 206 | pub fn exec(&mut self) -> Result<(), RuntimeError> { 207 | loop { 208 | let i = match self.fetch() { 209 | Err(e) => return Err(e), 210 | Ok(inst) => match inst { 211 | None => break, 212 | Some(i) => i.clone(), 213 | }, 214 | }; 215 | match self.dispatch(i) { 216 | Err(e) => return Err(e), 217 | Ok(_) => (), 218 | } 219 | } 220 | Ok(()) 221 | } 222 | 223 | fn get_kst(&self, r: u32) -> Option<&'static Const> { 224 | let r = r as usize; 225 | let mask = 1 << 8; 226 | let is_k = r & mask == 256; 227 | if !is_k { 228 | return None; 229 | } 230 | let f = self.get_fn().unwrap(); 231 | let cs = &as_fun(f).tpl().consts; 232 | Some(&cs[r - 256]) 233 | } 234 | 235 | fn rk(&self, base: u32, r: u32) -> RKValue { 236 | if let Some(k) = self.get_kst(r) { 237 | return RKValue::Kst(k); 238 | } 239 | RKValue::JsObj(self.get_stack_item(base + r)) 240 | } 241 | 242 | fn x_rk(&mut self, base: u32, r: u32) -> (JsObjPtr, bool) { 243 | let mut is_new = false; 244 | let v = match self.rk(base, r) { 245 | RKValue::Kst(kst) => { 246 | is_new = true; 247 | self.gc.new_obj_from_kst(kst, false) 248 | } 249 | RKValue::JsObj(k) => k, 250 | }; 251 | (v, is_new) 252 | } 253 | 254 | fn get_args(&self) -> Vec { 255 | let ci = as_ci(self.ci); 256 | let len = ci.base - ci.fun - 1; 257 | let mut args = vec![]; 258 | for i in 1..=len { 259 | args.push(self.get_stack_item(ci.fun + i)); 260 | } 261 | args 262 | } 263 | 264 | fn set_return(&mut self, ret: JsObjPtr) { 265 | self.set_stack_slot(as_ci(self.ci).fun, ret); 266 | } 267 | 268 | fn get_stack_item(&self, i: u32) -> JsObjPtr { 269 | match self.s.get(i as usize) { 270 | Some(v) => { 271 | if v.is_null() { 272 | self.gc.js_undef() 273 | } else { 274 | *v 275 | } 276 | } 277 | _ => self.gc.js_undef(), 278 | } 279 | } 280 | 281 | fn get_upval(&self, i: u32) -> UpValPtr { 282 | let f = as_fun(self.get_fn().unwrap()); 283 | *f.upvals.get(i as usize).unwrap() 284 | } 285 | 286 | fn get_fn_tpl(&self, i: u32) -> *const FnTpl { 287 | let cf = self.get_fn().unwrap(); 288 | let tpl = &as_fun(cf).tpl().fun_tpls[i as usize]; 289 | tpl as *const FnTpl 290 | } 291 | 292 | fn find_upvals(&mut self, uv_desc: &UpvalDesc) -> UpValPtr { 293 | let ci = as_ci(self.ci); 294 | let i = ci.base + uv_desc.idx; 295 | let open = match as_ci(self.ci).open_upvals.get(&i) { 296 | Some(uv) => *uv, 297 | None => { 298 | let v = self.get_stack_item(i); 299 | let uv = self.gc.new_upval(v, false); 300 | as_ci(self.ci).open_upvals.insert(i, uv); 301 | uv 302 | } 303 | }; 304 | open 305 | } 306 | 307 | fn close_upvals(&mut self) { 308 | let ci = as_ci(self.ci); 309 | if ci.is_native { 310 | return; 311 | } 312 | for (i, uv) in &ci.open_upvals { 313 | as_obj(ci.open_upvals[&i]).dec(); 314 | } 315 | ci.open_upvals.clear(); 316 | } 317 | 318 | fn clean_ci_stack(&mut self, ret_num: u32) { 319 | let f = as_ci(self.ci).fun + ret_num; 320 | let len = self.s.len() as u32; 321 | for i in f..len { 322 | self.set_stack_slot(i, self.gc.js_undef()); 323 | } 324 | } 325 | 326 | fn post_call(&mut self, ret_num: u32) { 327 | self.close_upvals(); 328 | self.clean_ci_stack(ret_num); 329 | 330 | let cci = self.ci; 331 | let pci = as_ci(cci).prev; 332 | if !pci.is_null() { 333 | as_ci(pci).next = null_mut(); 334 | } 335 | self.ci = pci; 336 | unsafe { 337 | drop_in_place(cci); 338 | } 339 | } 340 | 341 | fn dispatch(&mut self, i: Inst) -> Result<(), RuntimeError> { 342 | let op = OpCode::from_u32(i.op()); 343 | match op { 344 | OpCode::GETTABUP => { 345 | let mut ls = LocalScope::new(); 346 | let ra = i.a(); 347 | let rc = i.c(); 348 | let base = as_ci(self.ci).base; 349 | let (k, is_new) = self.x_rk(base, i.c()); 350 | if is_new { 351 | ls.reg(k); 352 | } 353 | let uv = as_uv(self.get_upval(i.b())); 354 | let v = as_dict(uv.v).get(k); 355 | self.set_stack_slot(as_ci(self.ci).base + ra, v); 356 | } 357 | OpCode::SETTABUP => { 358 | let mut ls = LocalScope::new(); 359 | let base = as_ci(self.ci).base; 360 | let (k, is_new) = self.x_rk(base, i.b()); 361 | if is_new { 362 | ls.reg(k); 363 | } 364 | let (v, is_new) = self.x_rk(base, i.c()); 365 | if is_new { 366 | ls.reg(v); 367 | } 368 | let uv = as_uv(self.get_upval(i.a())); 369 | as_dict(uv.v).set(k, v); 370 | } 371 | OpCode::LOADK => { 372 | let mut ls = LocalScope::new(); 373 | let ra = i.a(); 374 | let (v, is_new) = self.x_rk(0, i.bx()); 375 | if is_new { 376 | ls.reg(v); 377 | } 378 | self.set_stack_slot(as_ci(self.ci).base + ra, v); 379 | } 380 | OpCode::GETUPVAL => { 381 | let f = self.get_fn().unwrap(); 382 | let uvs = &as_fun(f).upvals; 383 | let uv = uvs[i.b() as usize]; 384 | self.set_stack_slot(as_ci(self.ci).base + i.a(), as_uv(uv).v); 385 | } 386 | OpCode::CLOSURE => { 387 | let mut ls = LocalScope::new(); 388 | let f = self.gc.new_fun(false); 389 | as_fun(f).f = self.get_fn_tpl(i.bx()) as *const c_void; 390 | ls.reg(f); 391 | 392 | let uv_desc = &as_fun(f).tpl().upvals; 393 | uv_desc.iter().for_each(|uvd| { 394 | if uvd.in_stack { 395 | let uv = self.find_upvals(uvd); 396 | as_obj(uv).inc(); 397 | as_fun(f).upvals.push(uv); 398 | } else { 399 | let cf = self.get_fn().unwrap(); 400 | let uvs = &as_fun(cf).upvals; 401 | let uv = *uvs.get(uvd.idx as usize).unwrap(); 402 | let uv = self.gc.new_upval(as_uv(uv).v, false); 403 | as_fun(f).upvals.push(uv); 404 | } 405 | }); 406 | 407 | self.set_stack_slot(as_ci(self.ci).base + i.a(), as_obj_ptr(f)); 408 | } 409 | OpCode::MOVE => { 410 | let base = as_ci(self.ci).base; 411 | let v = self.get_stack_item(base + i.b()); 412 | self.set_stack_slot(base + i.a(), v); 413 | } 414 | OpCode::TEST => { 415 | let base = as_ci(self.ci).base; 416 | let v = self.get_stack_item(base + i.a()); 417 | let b = as_obj(v).t_bool(); 418 | let com = if as_obj(b).eqs_true() { 1 } else { 0 }; 419 | let eq = com == i.c(); 420 | if eq { 421 | as_ci(self.ci).pc += 1; 422 | } 423 | } 424 | OpCode::TESTSET => { 425 | let base = as_ci(self.ci).base; 426 | let b = self.get_stack_item(base + i.b()); 427 | let tb = as_obj(b).t_bool(); 428 | let com = if as_obj(tb).eqs_true() { 1 } else { 0 }; 429 | let eq = com == i.c(); 430 | if eq { 431 | as_ci(self.ci).pc += 1; 432 | } else { 433 | self.set_stack_slot(base + i.a(), b); 434 | } 435 | } 436 | OpCode::JMP => { 437 | let pc = as_ci(self.ci).pc as i32; 438 | as_ci(self.ci).pc = (pc + i.sbx()) as u32; 439 | } 440 | OpCode::LOADBOO => { 441 | let base = as_ci(self.ci).base; 442 | let b = if i.b() == 1 { 443 | self.gc.js_true() 444 | } else { 445 | self.gc.js_false() 446 | }; 447 | self.set_stack_slot(base + i.a(), as_obj_ptr(b)); 448 | if i.c() == 1 { 449 | as_ci(self.ci).pc += 1; 450 | } 451 | } 452 | OpCode::EQ | OpCode::LT | OpCode::LE => { 453 | let mut ls = LocalScope::new(); 454 | let base = as_ci(self.ci).base; 455 | let b = self.get_stack_item(base + i.b()); 456 | let (b, is_new) = self.x_rk(base, i.b()); 457 | if is_new { 458 | ls.reg(b); 459 | } 460 | let (c, is_new) = self.x_rk(base, i.c()); 461 | if is_new { 462 | ls.reg(c); 463 | } 464 | let com = match op { 465 | OpCode::EQ => GcObj::eq(b, c), 466 | OpCode::LT => GcObj::lt(b, c), 467 | OpCode::LE => GcObj::le(b, c), 468 | _ => unimplemented!(), 469 | }; 470 | let com = if com { 1 } else { 0 }; 471 | if com != i.a() { 472 | as_ci(self.ci).pc += 1; 473 | } 474 | } 475 | OpCode::ADD | OpCode::SUB | OpCode::MUL | OpCode::DIV | OpCode::MOD => { 476 | let mut ls = LocalScope::new(); 477 | let base = as_ci(self.ci).base; 478 | 479 | let (a, is_new) = self.x_rk(base, i.b()); 480 | if is_new { 481 | ls.reg(a); 482 | } 483 | let a = as_obj(a).t_num(); 484 | ls.reg(a); 485 | 486 | let (b, is_new) = self.x_rk(base, i.c()); 487 | if is_new { 488 | ls.reg(b); 489 | } 490 | let b = as_obj(b).t_num(); 491 | ls.reg(b); 492 | 493 | let v = match op { 494 | OpCode::ADD => as_num(a).add(as_num(b)), 495 | OpCode::SUB => as_num(a).sub(as_num(b)), 496 | OpCode::MUL => as_num(a).mul(as_num(b)), 497 | OpCode::DIV => as_num(a).div(as_num(b)), 498 | OpCode::MOD => as_num(a).modulo(as_num(b)), 499 | _ => panic!(), 500 | }; 501 | ls.reg(v); 502 | self.set_stack_slot(as_ci(self.ci).base + i.a(), as_obj_ptr(v)); 503 | } 504 | OpCode::NEWTABLE => { 505 | let mut ls = LocalScope::new(); 506 | let base = as_ci(self.ci).base; 507 | let v = self.gc.new_dict(false); 508 | ls.reg(v); 509 | self.set_stack_slot(base + i.a(), as_obj_ptr(v)); 510 | } 511 | OpCode::SETTABLE => { 512 | let mut ls = LocalScope::new(); 513 | let base = as_ci(self.ci).base; 514 | let tb = self.get_stack_item(base + i.a()); 515 | if !as_obj(tb).check_coercible() { 516 | panic!("TypeError") 517 | } 518 | let tb = as_dict(tb); 519 | let (k, is_new) = self.x_rk(base, i.b()); 520 | if is_new { 521 | ls.reg(k); 522 | } 523 | let (v, is_new) = self.x_rk(base, i.c()); 524 | if is_new { 525 | ls.reg(v); 526 | } 527 | tb.set(k, v); 528 | } 529 | OpCode::GETTABLE => { 530 | let mut ls = LocalScope::new(); 531 | let base = as_ci(self.ci).base; 532 | let tb = self.get_stack_item(base + i.b()); 533 | if !as_obj(tb).check_coercible() { 534 | panic!("TypeError") 535 | } 536 | // TODO:: dispatch to various prop resolve handler(number, string, array) 537 | let tb = as_dict(tb); 538 | let (k, is_new) = self.x_rk(base, i.c()); 539 | if is_new { 540 | ls.reg(k); 541 | } 542 | let v = tb.get(k); 543 | self.set_stack_slot(base + i.a(), v); 544 | 545 | // set this for calling function on object 546 | as_ci(self.ci).set_this(as_obj_ptr(tb)); 547 | } 548 | OpCode::RETURN => { 549 | let b = i.b(); 550 | let mut ret_num = b - 1; 551 | if as_ci(self.ci).is_new { 552 | ret_num = 1; 553 | let ret = as_ci(self.ci).this; 554 | self.set_stack_slot(as_ci(self.ci).fun, ret); 555 | } else if ret_num == 1 { 556 | let ret = self.get_stack_item(as_ci(self.ci).base + i.a()); 557 | self.set_stack_slot(as_ci(self.ci).fun, ret); 558 | } 559 | self.post_call(ret_num); 560 | } 561 | OpCode::CALL => { 562 | let fi = as_ci(self.ci).base + i.a(); 563 | let fp = self.get_stack_item(fi); 564 | assert_eq!( 565 | as_obj(fp).kind, 566 | GcObjKind::Function, 567 | "Uncaught TypeError: not a function" 568 | ); 569 | let f = as_fun(fp); 570 | 571 | let ci = CallInfo::new(); 572 | as_ci(ci).fun = fi; 573 | as_ci(ci).base = fi + i.b(); 574 | 575 | as_ci(ci).prev = self.ci; 576 | let cci = self.ci; 577 | as_ci(cci).next = ci; 578 | as_ci(ci).set_this(as_ci(cci).this); 579 | self.ci = ci; 580 | 581 | if f.is_native { 582 | as_ci(ci).is_native = true; 583 | unsafe { 584 | let f = std::mem::transmute::<*const c_void, NativeFn>(f.f); 585 | f(self) 586 | } 587 | self.post_call(i.c() - 1); 588 | } else { 589 | let mut ls = LocalScope::new(); 590 | let n_args = i.b() - 1; 591 | let r_arg = as_ci(self.ci).fun + 1; 592 | let t_arg = as_ci(self.ci).base; 593 | for i in 0..n_args { 594 | let mut v = self.get_stack_item(r_arg + i); 595 | if as_obj(v).pass_by_value() { 596 | v = as_obj(v).x_pass_by_value(); 597 | ls.reg(v); 598 | } 599 | self.set_stack_slot(t_arg + i, v); 600 | } 601 | self.exec(); 602 | } 603 | } 604 | OpCode::THIS => { 605 | let base = as_ci(self.ci).base; 606 | let this = as_ci(self.ci).this; 607 | self.set_stack_slot(base + i.a(), this); 608 | } 609 | OpCode::NEW => { 610 | let fi = as_ci(self.ci).base + i.a(); 611 | let fp = self.get_stack_item(fi); 612 | assert_eq!( 613 | as_obj(fp).kind, 614 | GcObjKind::Function, 615 | "Uncaught TypeError: not a function" 616 | ); 617 | let f = as_fun(fp); 618 | 619 | let ci = CallInfo::new(); 620 | as_ci(ci).fun = fi; 621 | as_ci(ci).base = fi + i.b(); 622 | 623 | as_ci(ci).prev = self.ci; 624 | let cci = self.ci; 625 | as_ci(cci).next = ci; 626 | self.ci = ci; 627 | 628 | // we'll implement this later 629 | if f.is_native { 630 | unimplemented!() 631 | } 632 | 633 | let mut ls = LocalScope::new(); 634 | let this = self.gc.new_dict(false); 635 | ls.reg(this); 636 | 637 | let n_args = i.b() - 1; 638 | let r_arg = as_ci(self.ci).fun + 1; 639 | let t_arg = as_ci(self.ci).base; 640 | for i in 0..n_args { 641 | let mut v = self.get_stack_item(r_arg + i); 642 | if as_obj(v).pass_by_value() { 643 | v = as_obj(v).x_pass_by_value(); 644 | ls.reg(v); 645 | } 646 | self.set_stack_slot(t_arg + i, v); 647 | } 648 | 649 | as_ci(self.ci).set_this(as_obj_ptr(this)); 650 | as_ci(self.ci).is_new = true; 651 | self.exec(); 652 | } 653 | // TODO:: 654 | _ => (), 655 | } 656 | Ok(()) 657 | } 658 | } 659 | 660 | impl Vm { 661 | pub fn register_native_fn(&mut self, name: &str, nf: NativeFn) { 662 | let f = self.gc.new_fun_native(nf, false); 663 | as_dict(self.env).insert(name, as_obj_ptr(f)); 664 | } 665 | } 666 | 667 | #[cfg(test)] 668 | mod exec_tests { 669 | use super::*; 670 | use crate::asm::codegen::*; 671 | 672 | #[test] 673 | fn exec_init_test() { 674 | let chk = Codegen::gen( 675 | "var a = 1 676 | print(a) 677 | assert(1, a) 678 | print(native_fn()) 679 | ", 680 | ); 681 | let mut vm = Vm::new(chk, 1024); 682 | 683 | vm.register_native_fn("assert", |vm: VmPtr| { 684 | let args = as_vm(vm).get_args(); 685 | assert_eq!(args.len(), 2); 686 | let a = args[0]; 687 | let b = args[1]; 688 | assert_eq!(as_num(a).d, as_num(b).d); 689 | println!("assert ok"); 690 | }); 691 | 692 | vm.register_native_fn("native_fn", |vm: VmPtr| { 693 | let mut ls = LocalScope::new(); 694 | let ret = as_vm(vm).gc.new_str(false); 695 | ls.reg(ret); 696 | as_str(ret).d.push_str("return from native call"); 697 | as_vm(vm).set_return(as_obj_ptr(ret)); 698 | }); 699 | 700 | vm.exec(); 701 | } 702 | 703 | fn new_vm(chk: Chunk) -> Box { 704 | let mut vm = Vm::new(chk, 1 << 20); 705 | 706 | vm.register_native_fn("assert_str_eq", |vm: VmPtr| { 707 | let args = as_vm(vm).get_args(); 708 | assert_eq!(args.len(), 2); 709 | let a = args[0]; 710 | let b = args[1]; 711 | assert_eq!(as_str(a).d, as_str(b).d); 712 | }); 713 | 714 | vm.register_native_fn("assert_num_eq", |vm: VmPtr| { 715 | let args = as_vm(vm).get_args(); 716 | assert_eq!(args.len(), 2); 717 | let a = args[0]; 718 | let b = args[1]; 719 | assert_eq!(as_num(a).d, as_num(b).d); 720 | }); 721 | 722 | vm 723 | } 724 | 725 | #[test] 726 | fn js_fn_test() { 727 | let chk = Codegen::gen( 728 | " 729 | function f() { 730 | return 'return from f()' 731 | } 732 | 733 | assert_str_eq('return from f()', f()) 734 | ", 735 | ); 736 | 737 | let mut vm = new_vm(chk); 738 | vm.exec(); 739 | } 740 | 741 | #[test] 742 | fn closure_test() { 743 | let chk = Codegen::gen( 744 | " 745 | var a = 1 746 | 747 | function f() { 748 | var b = 1 749 | return function () { 750 | a = a + b 751 | return a 752 | } 753 | } 754 | 755 | function f1() { 756 | return a 757 | } 758 | 759 | var ff = f() 760 | assert_num_eq(1, f1()) 761 | ff() 762 | assert_num_eq(2, f1()) 763 | assert_num_eq(2, a) 764 | var b = f1() + 1 765 | var b = 3 766 | assert_num_eq(3, b) 767 | ", 768 | ); 769 | 770 | let mut vm = new_vm(chk); 771 | vm.exec(); 772 | } 773 | 774 | #[test] 775 | fn and_or_test() { 776 | let chk = Codegen::gen( 777 | " 778 | a = 1 779 | b = 2 780 | e = 3 781 | f = 4 782 | var c = a && b 783 | var d = e || f 784 | assert_num_eq(2, c) 785 | assert_num_eq(3, d) 786 | ", 787 | ); 788 | 789 | let mut vm = new_vm(chk); 790 | vm.exec(); 791 | } 792 | 793 | #[test] 794 | fn math_test() { 795 | let chk = Codegen::gen( 796 | " 797 | a = 1 798 | b = 2 799 | c = 3 800 | var d = a + b * c 801 | var e = a / b - c 802 | assert_num_eq(7, d) 803 | assert_num_eq(-2.5, e) 804 | ", 805 | ); 806 | 807 | let mut vm = new_vm(chk); 808 | vm.exec(); 809 | } 810 | 811 | #[test] 812 | fn if_else_test() { 813 | let chk = Codegen::gen( 814 | " 815 | a = 2 816 | if (a == 1) { 817 | c = 4 818 | } else if (a == 2) { 819 | c = 5 820 | } 821 | assert_num_eq(5, c) 822 | 823 | a = 0 824 | if (a) b = 1 else b = 0 825 | assert_num_eq(0, b) 826 | 827 | a = 1 828 | if (a) b = 1 else b = 0 829 | assert_num_eq(1, b) 830 | ", 831 | ); 832 | 833 | let mut vm = new_vm(chk); 834 | vm.exec(); 835 | } 836 | 837 | #[test] 838 | fn lt_le_test() { 839 | let chk = Codegen::gen( 840 | " 841 | b = 3 842 | a = 1 843 | if (a < 1) b = 0 else b = 1; 844 | assert_num_eq(1, b) 845 | 846 | b = 3 847 | a = 1 848 | if (a <= 1) b = 0 else b = 1; 849 | assert_num_eq(0, b) 850 | 851 | if (a >= 1) b = 0 else b = 1; 852 | assert_num_eq(0, b) 853 | 854 | b = 3 855 | if (b > 1) b = 0 else b = 1; 856 | assert_num_eq(0, b) 857 | 858 | a = 1 859 | b = '1' 860 | if (a == b) { b = 1 } else b = 0 861 | assert_num_eq(1, b) 862 | ", 863 | ); 864 | 865 | let mut vm = new_vm(chk); 866 | vm.exec(); 867 | } 868 | 869 | #[test] 870 | fn for_test() { 871 | let chk = Codegen::gen( 872 | " 873 | var a = 10 874 | function f(a) { 875 | var ret = 0 876 | for(var i = 1; i <= a; i++) { 877 | ret += i 878 | } 879 | return ret 880 | } 881 | var b = f(a) 882 | assert_num_eq(10, a) 883 | assert_num_eq(55, b) 884 | ", 885 | ); 886 | 887 | let mut vm = new_vm(chk); 888 | vm.exec(); 889 | } 890 | 891 | #[test] 892 | fn for_break_test() { 893 | let chk = Codegen::gen( 894 | " 895 | var a = 10 896 | function f(a) { 897 | var ret = 0 898 | for(var i = 1; ; i++) { 899 | if (i > a) break 900 | ret += i 901 | } 902 | return ret 903 | } 904 | var b = f(a) 905 | assert_num_eq(10, a) 906 | assert_num_eq(55, b) 907 | ", 908 | ); 909 | 910 | let mut vm = new_vm(chk); 911 | vm.exec(); 912 | } 913 | 914 | #[test] 915 | fn while_test() { 916 | let chk = Codegen::gen( 917 | " 918 | function f(a, b) { 919 | var ret = 0; 920 | var i = 1; 921 | while (i <= b) { 922 | ret += i 923 | ++i 924 | } 925 | return ret 926 | } 927 | var a = f(1, 10) 928 | assert_num_eq(55, a) 929 | 930 | function f1(a, b) { 931 | var ret = 0; 932 | while (true) { 933 | if (b == 0) break; 934 | ret += b 935 | b-- 936 | } 937 | return ret 938 | } 939 | var a = f1(1, 10) 940 | assert_num_eq(55, a) 941 | ", 942 | ); 943 | 944 | let mut vm = new_vm(chk); 945 | vm.exec(); 946 | } 947 | 948 | #[test] 949 | fn do_while_test() { 950 | let chk = Codegen::gen( 951 | " 952 | function f(a, b) { 953 | var ret = 0; 954 | do { 955 | ret += b 956 | b-- 957 | if (b == 0) break 958 | } while (true) 959 | return ret 960 | } 961 | var a = f(1, 10) 962 | assert_num_eq(55, a) 963 | ", 964 | ); 965 | 966 | let mut vm = new_vm(chk); 967 | vm.exec(); 968 | } 969 | 970 | #[test] 971 | fn pass_by_value_test() { 972 | let chk = Codegen::gen( 973 | " 974 | function f(a) { 975 | a += 1 976 | } 977 | var a = 1 978 | f(a) 979 | assert_num_eq(1, a) 980 | ", 981 | ); 982 | 983 | let mut vm = new_vm(chk); 984 | vm.exec(); 985 | } 986 | 987 | #[test] 988 | fn object_literal_test() { 989 | let chk = Codegen::gen( 990 | " 991 | var a = { b: 1, c: {d: '1'} } 992 | assert_num_eq(1, a.b); 993 | assert_str_eq('1', a.c.d) 994 | ", 995 | ); 996 | 997 | let mut vm = new_vm(chk); 998 | vm.exec(); 999 | } 1000 | 1001 | #[test] 1002 | fn new_object_test() { 1003 | let chk = Codegen::gen( 1004 | " 1005 | var Person = function (name, age) { 1006 | this.name = name 1007 | this.age = age 1008 | } 1009 | var obj = new Person('tom', 20) 1010 | assert_str_eq('tom', obj.name) 1011 | assert_num_eq(20, obj.age); 1012 | ", 1013 | ); 1014 | 1015 | let mut vm = new_vm(chk); 1016 | vm.exec(); 1017 | } 1018 | 1019 | #[test] 1020 | fn member_access_chain_test() { 1021 | let chk = Codegen::gen( 1022 | " 1023 | var a = { 1024 | b: { 1025 | v: 1 1026 | f: function () { 1027 | return this.v 1028 | } 1029 | } 1030 | }; 1031 | assert_num_eq(1, a.b.f()); 1032 | ", 1033 | ); 1034 | 1035 | let mut vm = new_vm(chk); 1036 | vm.exec(); 1037 | } 1038 | 1039 | #[test] 1040 | fn logic_test() { 1041 | let chk = Codegen::gen( 1042 | " 1043 | var a = 1 1044 | var b = 2 1045 | var c = a && b 1046 | assert_num_eq(2, c); 1047 | 1048 | var d = 0 1049 | var e = a && d 1050 | assert_num_eq(0, e); 1051 | 1052 | var f = d || b 1053 | var g = d && a 1054 | assert_num_eq(2, f); 1055 | assert_num_eq(0, g); 1056 | ", 1057 | ); 1058 | 1059 | let mut vm = new_vm(chk); 1060 | vm.exec(); 1061 | } 1062 | } 1063 | -------------------------------------------------------------------------------- /src/lib/lexer.rs: -------------------------------------------------------------------------------- 1 | use crate::source::*; 2 | use crate::token::*; 3 | use std::char; 4 | use std::collections::VecDeque; 5 | use std::rc::Rc; 6 | use std::str; 7 | use std::u32; 8 | use unic_ucd::GeneralCategory; 9 | 10 | pub struct TokenNextNewline { 11 | tok: Rc, 12 | next_is_line_terminator: bool, 13 | } 14 | 15 | pub struct Lexer<'a> { 16 | src: Source<'a>, 17 | tok: Rc, 18 | pub next_is_line_terminator: bool, 19 | peeked: VecDeque, 20 | } 21 | 22 | #[derive(Debug)] 23 | pub struct LexError { 24 | pub msg: String, 25 | } 26 | 27 | impl LexError { 28 | fn new(msg: String) -> Self { 29 | LexError { msg } 30 | } 31 | 32 | pub fn default() -> Self { 33 | LexError { 34 | msg: "".to_string(), 35 | } 36 | } 37 | } 38 | 39 | fn is_whitespace(c: char) -> bool { 40 | !is_line_terminator(c) && c.is_whitespace() 41 | } 42 | 43 | fn is_unicode_letter(c: char) -> bool { 44 | if c.is_uppercase() || c.is_lowercase() { 45 | return true; 46 | } 47 | match GeneralCategory::of(c) { 48 | GeneralCategory::TitlecaseLetter 49 | | GeneralCategory::ModifierLetter 50 | | GeneralCategory::OtherLetter 51 | | GeneralCategory::LetterNumber => true, 52 | _ => false, 53 | } 54 | } 55 | 56 | fn is_id_start(c: char) -> bool { 57 | if is_unicode_letter(c) { 58 | return true; 59 | } 60 | match c { 61 | '$' | '_' | '\\' => true, 62 | _ => false, 63 | } 64 | } 65 | 66 | fn is_id_part(c: char) -> bool { 67 | if is_id_start(c) { 68 | return true; 69 | } 70 | let cc = c as u32; 71 | if cc == 0x200c || cc == 0x200d { 72 | return true; 73 | } 74 | match GeneralCategory::of(c) { 75 | GeneralCategory::NonspacingMark 76 | | GeneralCategory::SpacingMark 77 | | GeneralCategory::DecimalNumber 78 | | GeneralCategory::ConnectorPunctuation => true, 79 | _ => false, 80 | } 81 | } 82 | 83 | fn is_single_escape_ch(c: char) -> bool { 84 | match c { 85 | '\'' | '"' | '\\' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' => true, 86 | _ => false, 87 | } 88 | } 89 | 90 | fn escape_ch(c: char) -> char { 91 | match c { 92 | '\'' => '\'', 93 | '"' => '"', 94 | '\\' => '\\', 95 | 'b' => '\x08', 96 | 'f' => '\x0c', 97 | 'n' => '\x0a', 98 | 'r' => '\x0d', 99 | 't' => '\x09', 100 | 'v' => '\x0b', 101 | _ => panic!(), 102 | } 103 | } 104 | 105 | fn is_non_escape_ch(c: char) -> bool { 106 | !is_single_escape_ch(c) && !is_line_terminator(c) && !c.is_ascii_digit() && c != 'x' && c != 'u' 107 | } 108 | 109 | impl<'a> Lexer<'a> { 110 | pub fn new(src: Source<'a>) -> Self { 111 | Lexer { 112 | src, 113 | tok: Rc::new(Token::Nil), 114 | next_is_line_terminator: false, 115 | peeked: VecDeque::new(), 116 | } 117 | } 118 | 119 | fn next_(&mut self) -> Result { 120 | self.skip_whitespace(); 121 | if self.ahead_is_id_start() { 122 | self.read_name() 123 | } else if self.ahead_is_decimal_int() { 124 | self.read_numeric() 125 | } else if self.ahead_is_string_start() { 126 | let t = self.src.read().unwrap(); 127 | self.read_string(t) 128 | } else if !self.ahead_is_eof() { 129 | self.read_symbol() 130 | } else { 131 | Ok(Token::Eof(EofData { 132 | loc: self.loc().clone(), 133 | })) 134 | } 135 | } 136 | 137 | pub fn peek(&mut self) -> Result, LexError> { 138 | match self.peeked.front() { 139 | Some(tok) => Ok(tok.tok.clone()), 140 | _ => match self.next_() { 141 | Ok(tok) => { 142 | let tok = Rc::new(tok); 143 | let next_is_line_terminator = self.ahead_is_line_terminator_or_eof(); 144 | self.peeked.push_back(TokenNextNewline { 145 | tok: tok.clone(), 146 | next_is_line_terminator, 147 | }); 148 | Ok(tok) 149 | } 150 | Err(e) => Err(e), 151 | }, 152 | } 153 | } 154 | 155 | pub fn next(&mut self) -> Result, LexError> { 156 | match self.peeked.pop_front() { 157 | Some(tn) => { 158 | self.tok = tn.tok; 159 | self.next_is_line_terminator = tn.next_is_line_terminator; 160 | Ok(self.tok.clone()) 161 | } 162 | _ => match self.next_() { 163 | Ok(tok) => { 164 | self.tok = Rc::new(tok); 165 | self.next_is_line_terminator = self.ahead_is_line_terminator_or_eof(); 166 | Ok(self.tok.clone()) 167 | } 168 | Err(e) => Err(e), 169 | }, 170 | } 171 | } 172 | 173 | pub fn advance(&mut self) { 174 | match self.next() { 175 | Ok(_) => (), 176 | Err(e) => panic!(e.msg), 177 | } 178 | } 179 | 180 | fn read_unicode_escape_seq(&mut self) -> Option { 181 | let mut hex = [0, 0, 0, 0]; 182 | for i in 0..hex.len() { 183 | match self.src.read() { 184 | Some(c) => { 185 | if c.is_ascii_hexdigit() { 186 | hex[i] = c as u8; 187 | } else { 188 | return None; 189 | } 190 | } 191 | _ => return None, 192 | } 193 | } 194 | let hex = str::from_utf8(&hex).unwrap(); 195 | match u32::from_str_radix(hex, 16) { 196 | Ok(i) => match char::from_u32(i) { 197 | Some(c) => Some(c), 198 | _ => None, // deformed unicode 199 | }, 200 | _ => None, // deformed hex digits 201 | } 202 | } 203 | 204 | fn ahead_is_id_start(&mut self) -> bool { 205 | match self.src.peek() { 206 | Some(c) => is_id_start(c), 207 | _ => false, 208 | } 209 | } 210 | 211 | fn ahead_is_id_part(&mut self) -> bool { 212 | match self.src.peek() { 213 | Some(c) => is_id_part(c), 214 | _ => false, 215 | } 216 | } 217 | 218 | fn errmsg(&self) -> String { 219 | format!( 220 | "Unexpected char at line: {} column: {}", 221 | self.src.line, self.src.column 222 | ) 223 | } 224 | 225 | pub fn pos(&self) -> Position { 226 | Position { 227 | line: self.src.line, 228 | column: self.src.column, 229 | } 230 | } 231 | 232 | pub fn loc(&self) -> SourceLoc { 233 | SourceLoc { 234 | start: self.pos(), 235 | end: Position::new(), 236 | } 237 | } 238 | 239 | fn fin_loc(&self, loc: SourceLoc) -> SourceLoc { 240 | let mut loc = loc; 241 | loc.end = self.pos(); 242 | loc 243 | } 244 | 245 | // we use the prior read char as a barrier which is passed by the formal parameter `bs`, 246 | // if bs is `\` then we can consider the next 4 characters must be a valid unicode escaping, 247 | // we try to turn the valid unicode escaping to it's associated char and return that char, 248 | // otherwise a lexer error is returned 249 | fn read_escape_unicode(&mut self, bs: char) -> Result { 250 | if bs == '\\' && self.src.test_ahead('u') { 251 | self.src.advance(); 252 | match self.read_unicode_escape_seq() { 253 | Some(ec) => Ok(ec), 254 | _ => Err(LexError::new(self.errmsg())), 255 | } 256 | } else { 257 | Ok(bs) 258 | } 259 | } 260 | 261 | fn read_id_part(&mut self) -> Result { 262 | let mut val = vec![]; 263 | loop { 264 | if self.ahead_is_id_part() { 265 | let c = self.src.read().unwrap(); 266 | match self.read_escape_unicode(c) { 267 | Ok(cc) => val.push(cc), 268 | Err(e) => return Err(e), 269 | } 270 | } else { 271 | break; 272 | } 273 | } 274 | Ok(val.into_iter().collect()) 275 | } 276 | 277 | pub fn read_name(&mut self) -> Result { 278 | let loc = self.loc(); 279 | let mut c = self.src.read().unwrap(); 280 | match self.read_escape_unicode(c) { 281 | Ok(cc) => c = cc, 282 | Err(e) => return Err(e), 283 | } 284 | let mut val = vec![c]; 285 | match self.read_id_part() { 286 | Ok(cc) => val.extend(cc.chars()), 287 | Err(e) => return Err(e), 288 | } 289 | let val: String = val.into_iter().collect(); 290 | if is_keyword(&val) { 291 | Ok(Token::Keyword(KeywordData { 292 | kind: name_to_keyword(&val), 293 | loc: self.fin_loc(loc), 294 | })) 295 | } else if is_ctx_keyword(&val) { 296 | Ok(Token::ContextualKeyword(CtxKeywordData { 297 | kind: name_to_ctx_keyword(&val), 298 | loc: self.fin_loc(loc), 299 | })) 300 | } else if is_bool(&val) { 301 | Ok(Token::BooleanLiteral(BooleanLiteralData { 302 | kind: name_to_bool(&val), 303 | loc: self.fin_loc(loc), 304 | })) 305 | } else if is_null(&val) { 306 | Ok(Token::NullLiteral(NullLiteralData { 307 | loc: self.fin_loc(loc), 308 | })) 309 | } else { 310 | Ok(Token::Identifier(IdentifierData { 311 | value: val, 312 | loc: self.fin_loc(loc), 313 | })) 314 | } 315 | } 316 | 317 | fn read_decimal_digits(&mut self) -> String { 318 | let mut ret = String::new(); 319 | loop { 320 | if let Some(c) = self.src.peek() { 321 | if c.is_ascii_digit() { 322 | ret.push(self.src.read().unwrap()); 323 | continue; 324 | } 325 | } 326 | break; 327 | } 328 | ret 329 | } 330 | 331 | fn read_exponent(&mut self) -> Result { 332 | let mut ret = String::new(); 333 | // consume e|E 334 | ret.push(self.src.read().unwrap()); 335 | if let Some(c) = self.src.peek() { 336 | if c == '+' || c == '-' { 337 | ret.push(self.src.read().unwrap()); 338 | } 339 | let digits = self.read_decimal_digits(); 340 | if digits.is_empty() { 341 | return Err(LexError::new(self.errmsg())); 342 | } else { 343 | ret.push_str(digits.as_str()); 344 | } 345 | } 346 | Ok(ret) 347 | } 348 | 349 | fn read_decimal_int_part(&mut self) -> String { 350 | let mut ret = String::new(); 351 | let c = self.src.read().unwrap(); 352 | ret.push(c); 353 | if c == '0' { 354 | return ret; 355 | } 356 | ret.push_str(self.read_decimal_digits().as_str()); 357 | ret 358 | } 359 | 360 | fn read_decimal(&mut self) -> Result { 361 | let c = self.src.peek().unwrap(); 362 | let mut ret = String::new(); 363 | let digits_opt = c != '.'; 364 | if c.is_ascii_digit() { 365 | ret.push_str(self.read_decimal_int_part().as_str()); 366 | } 367 | // here we process the fractional part 368 | // if decimal starts with dot then next digits is required to be present 369 | // if decimal starts non-zero digit then the digits of fractional part is optional 370 | if self.src.test_ahead('.') { 371 | ret.push(self.src.read().unwrap()); 372 | let digits = self.read_decimal_digits(); 373 | if digits.is_empty() && !digits_opt { 374 | return Err(LexError::new(self.errmsg())); 375 | } 376 | ret.push_str(digits.as_str()); 377 | } 378 | if self.src.test_ahead_or('e', 'E') { 379 | match self.read_exponent() { 380 | Ok(s) => ret.push_str(s.as_str()), 381 | err @ Err(_) => return err, 382 | } 383 | } 384 | 385 | Ok(ret) 386 | } 387 | 388 | fn read_hex(&mut self) -> Result { 389 | let mut ret = String::new(); 390 | ret.push(self.src.read().unwrap()); 391 | ret.push(self.src.read().unwrap()); 392 | let mut digits = vec![]; 393 | loop { 394 | match self.src.peek() { 395 | Some(c) => { 396 | if c.is_ascii_hexdigit() { 397 | digits.push(self.src.read().unwrap()); 398 | } else { 399 | break; 400 | } 401 | } 402 | _ => break, 403 | } 404 | } 405 | if digits.len() == 0 { 406 | Err(LexError::new(self.errmsg())) 407 | } else { 408 | let digits: String = digits.iter().collect(); 409 | ret.push_str(digits.as_str()); 410 | Ok(ret) 411 | } 412 | } 413 | 414 | fn ahead_is_decimal_int(&mut self) -> bool { 415 | if let Some(c) = self.src.peek() { 416 | if c == '.' { 417 | match self.src.chs.next() { 418 | Some(cc) => { 419 | self.src.peeked.push_back(cc); 420 | cc.is_ascii_digit() 421 | } 422 | _ => false, 423 | } 424 | } else { 425 | c.is_ascii_digit() 426 | } 427 | } else { 428 | false 429 | } 430 | } 431 | 432 | pub fn read_numeric(&mut self) -> Result { 433 | let loc = self.loc(); 434 | let value: Result; 435 | let mut is_hex = false; 436 | if self.src.test_ahead('0') { 437 | if let Some(c) = self.src.chs.next() { 438 | self.src.peeked.push_back(c); 439 | if c == 'x' || c == 'X' { 440 | is_hex = true; 441 | } 442 | } 443 | } 444 | if is_hex { 445 | value = self.read_hex(); 446 | } else { 447 | value = self.read_decimal(); 448 | } 449 | match value { 450 | Ok(v) => Ok(Token::NumericLiteral(NumericLiteralData { 451 | value: v, 452 | loc: self.fin_loc(loc), 453 | })), 454 | Err(e) => Err(e), 455 | } 456 | } 457 | 458 | fn ahead_is_string_start(&mut self) -> bool { 459 | match self.src.peek() { 460 | Some(c) => c == '\'' || c == '"', 461 | _ => false, 462 | } 463 | } 464 | 465 | fn read_string_escape_seq(&mut self) -> Result, LexError> { 466 | self.src.advance(); // consume `\` 467 | match self.src.read() { 468 | Some(mut c) => { 469 | if is_single_escape_ch(c) { 470 | c = escape_ch(c); 471 | } else if c == '0' { 472 | c = '\0'; 473 | // 0 [lookahead ∉ DecimalDigit] 474 | if let Some(c) = self.src.peek() { 475 | if c.is_ascii_digit() { 476 | return Err(LexError::new(self.errmsg())); 477 | } 478 | } 479 | } else if c == 'x' { 480 | let mut hex = [0, 0]; 481 | for i in 0..hex.len() { 482 | if let Some(cc) = self.src.read() { 483 | if cc.is_ascii_hexdigit() { 484 | hex[i] = cc as u8; 485 | continue; 486 | } 487 | } 488 | return Err(LexError::new(self.errmsg())); 489 | } 490 | // we've already check each char is a valid hex digit 491 | // so the entire hex digits can be safely converted to u32 492 | let hex = str::from_utf8(&hex).unwrap(); 493 | c = char::from_u32(u32::from_str_radix(hex, 16).ok().unwrap()).unwrap() 494 | } else if c == 'u' { 495 | match self.read_unicode_escape_seq() { 496 | Some(ec) => c = ec, 497 | _ => return Err(LexError::new(self.errmsg())), 498 | } 499 | } else if is_line_terminator(c) { 500 | // [lookahead ∉ ] 501 | if c == '\r' && self.src.test_ahead('\n') { 502 | self.src.advance(); 503 | return Err(LexError::new(self.errmsg())); 504 | } 505 | // here we meet the line continuation symbol, just remove it from source stream 506 | return Ok(None); 507 | } else if is_non_escape_ch(c) { 508 | // do nothing 509 | } else { 510 | return Err(LexError::new(self.errmsg())); 511 | } 512 | Ok(Some(c)) 513 | } 514 | _ => Err(LexError::new(self.errmsg())), 515 | } 516 | } 517 | 518 | fn read_string(&mut self, t: char) -> Result { 519 | let loc = self.loc(); 520 | let mut ret = String::new(); 521 | loop { 522 | match self.src.peek() { 523 | Some(c) => { 524 | if c == t { 525 | self.src.advance(); 526 | break; 527 | } else if c == '\\' { 528 | match self.read_string_escape_seq() { 529 | Ok(Some(c)) => ret.push(c), 530 | Err(e) => return Err(e), 531 | _ => (), 532 | } 533 | } else { 534 | ret.push(self.src.read().unwrap()); 535 | } 536 | } 537 | _ => break, 538 | } 539 | } 540 | Ok(Token::StringLiteral(StringLiteralData { 541 | value: ret, 542 | loc: self.fin_loc(loc), 543 | })) 544 | } 545 | 546 | fn ahead_is_regexp_start(&mut self) -> bool { 547 | match self.src.peek() { 548 | Some('/') => self.tok.is_before_expr(), 549 | _ => false, 550 | } 551 | } 552 | 553 | fn ahead_is_regexp_backslash_seq(&mut self) -> bool { 554 | match self.src.peek() { 555 | Some(c) => c == '\\', 556 | _ => false, 557 | } 558 | } 559 | 560 | fn read_regexp_backslash_seq(&mut self) -> Result { 561 | let mut ret = vec![self.src.read().unwrap()]; 562 | if self.ahead_is_line_terminator_or_eof() { 563 | Err(LexError::new(self.errmsg())) 564 | } else { 565 | ret.push(self.src.read().unwrap()); 566 | Ok(ret.into_iter().collect()) 567 | } 568 | } 569 | 570 | fn ahead_is_regexp_class(&mut self) -> bool { 571 | match self.src.peek() { 572 | Some(c) => c == '[', 573 | _ => false, 574 | } 575 | } 576 | 577 | fn read_regexp_class(&mut self) -> Result { 578 | let mut ret = vec![self.src.read().unwrap()]; 579 | loop { 580 | if self.ahead_is_regexp_backslash_seq() { 581 | match self.read_regexp_backslash_seq() { 582 | Ok(s) => ret.extend(s.chars()), 583 | Err(e) => return Err(e), 584 | } 585 | } else if self.ahead_is_line_terminator_or_eof() { 586 | return Err(LexError::new(self.errmsg())); 587 | } else { 588 | let c = self.src.read().unwrap(); 589 | ret.push(c); 590 | if c == ']' { 591 | break; 592 | } 593 | }; 594 | } 595 | Ok(ret.into_iter().collect()) 596 | } 597 | 598 | fn read_regexp_body(&mut self) -> Result { 599 | let mut ret = vec![self.src.read().unwrap()]; 600 | loop { 601 | if self.ahead_is_regexp_backslash_seq() { 602 | match self.read_regexp_backslash_seq() { 603 | Ok(s) => ret.extend(s.chars()), 604 | Err(e) => return Err(e), 605 | } 606 | } else if self.ahead_is_regexp_class() { 607 | match self.read_regexp_class() { 608 | Ok(s) => ret.extend(s.chars()), 609 | Err(e) => return Err(e), 610 | } 611 | } else if self.ahead_is_line_terminator_or_eof() { 612 | return Err(LexError::new(self.errmsg())); 613 | } else { 614 | let c = self.src.read().unwrap(); 615 | ret.push(c); 616 | if c == '/' { 617 | break; 618 | } 619 | } 620 | } 621 | Ok(ret.into_iter().collect()) 622 | } 623 | 624 | fn read_regexp_flags(&mut self) -> Result { 625 | let mut ret = vec![]; 626 | loop { 627 | if self.ahead_is_id_part() { 628 | match self.read_id_part() { 629 | Ok(s) => ret.extend(s.chars()), 630 | Err(e) => return Err(e), 631 | } 632 | } else { 633 | break; 634 | } 635 | } 636 | Ok(ret.into_iter().collect()) 637 | } 638 | 639 | fn read_regexp(&mut self) -> Result { 640 | let loc = self.loc(); 641 | match self.read_regexp_body() { 642 | Ok(mut body) => match self.read_regexp_flags() { 643 | Ok(flags) => { 644 | body.push_str(flags.as_str()); 645 | Ok(Token::RegExpLiteral(RegExpLiteralData { 646 | value: body, 647 | loc: self.fin_loc(loc), 648 | })) 649 | } 650 | Err(e) => Err(e), 651 | }, 652 | Err(e) => Err(e), 653 | } 654 | } 655 | 656 | fn read_symbol(&mut self) -> Result { 657 | if self.ahead_is_regexp_start() { 658 | return self.read_regexp(); 659 | } 660 | let loc = self.loc(); 661 | let mut s = vec![]; 662 | loop { 663 | if self.ahead_is_whitespace_or_eof() { 664 | break; 665 | } 666 | let c = self.src.peek().unwrap(); 667 | match c { 668 | '{' | '}' | '(' | ')' | '[' | ']' | '.' | ';' | ',' | '?' | ':' => { 669 | s.push(self.src.read().unwrap()); 670 | break; 671 | } 672 | '<' => { 673 | // < <<= << <= 674 | s.push(self.src.read().unwrap()); 675 | if self.src.test_ahead2('<', '=') { 676 | s.push(self.src.read().unwrap()); 677 | s.push(self.src.read().unwrap()); 678 | } else if self.src.test_ahead_or('<', '=') { 679 | s.push(self.src.read().unwrap()); 680 | } 681 | break; 682 | } 683 | '>' => { 684 | // > >>>= >>= >> >= 685 | s.push(self.src.read().unwrap()); 686 | if self.src.test_ahead3('>', '>', '=') { 687 | s.push(self.src.read().unwrap()); 688 | s.push(self.src.read().unwrap()); 689 | s.push(self.src.read().unwrap()); 690 | } else if self.src.test_ahead2('>', '=') { 691 | s.push(self.src.read().unwrap()); 692 | s.push(self.src.read().unwrap()); 693 | } else if self.src.test_ahead_or('>', '=') { 694 | s.push(self.src.read().unwrap()); 695 | } 696 | break; 697 | } 698 | '=' => { 699 | // = === == 700 | s.push(self.src.read().unwrap()); 701 | if self.src.test_ahead2('=', '=') { 702 | s.push(self.src.read().unwrap()); 703 | s.push(self.src.read().unwrap()); 704 | } else if self.src.test_ahead('=') { 705 | s.push(self.src.read().unwrap()); 706 | } 707 | break; 708 | } 709 | '!' => { 710 | // ! != !== 711 | s.push(self.src.read().unwrap()); 712 | if self.src.test_ahead2('=', '=') { 713 | s.push(self.src.read().unwrap()); 714 | s.push(self.src.read().unwrap()); 715 | } else if self.src.test_ahead('=') { 716 | s.push(self.src.read().unwrap()); 717 | } 718 | break; 719 | } 720 | '+' => { 721 | // + ++ += 722 | s.push(self.src.read().unwrap()); 723 | if self.src.test_ahead_or('+', '=') { 724 | s.push(self.src.read().unwrap()); 725 | } 726 | break; 727 | } 728 | '-' => { 729 | // - -- -= 730 | s.push(self.src.read().unwrap()); 731 | if self.src.test_ahead_or('-', '=') { 732 | s.push(self.src.read().unwrap()); 733 | } 734 | break; 735 | } 736 | '&' => { 737 | // & && &= 738 | s.push(self.src.read().unwrap()); 739 | if self.src.test_ahead_or('&', '=') { 740 | s.push(self.src.read().unwrap()); 741 | } 742 | break; 743 | } 744 | '|' => { 745 | // | || |= 746 | s.push(self.src.read().unwrap()); 747 | if self.src.test_ahead_or('|', '=') { 748 | s.push(self.src.read().unwrap()); 749 | } 750 | break; 751 | } 752 | '*' | '/' | '%' | '^' | '~' => { 753 | // pattern pattern= 754 | s.push(self.src.read().unwrap()); 755 | if self.src.test_ahead('=') { 756 | s.push(self.src.read().unwrap()); 757 | } 758 | break; 759 | } 760 | _ => return Err(LexError::new(self.errmsg())), 761 | } 762 | } 763 | let s: String = s.into_iter().collect(); 764 | if is_symbol(&s) { 765 | Ok(Token::Symbol(SymbolData { 766 | kind: name_to_symbol(&s), 767 | loc: self.fin_loc(loc), 768 | })) 769 | } else { 770 | Err(LexError::new(self.errmsg())) 771 | } 772 | } 773 | 774 | fn skip_comment_single(&mut self) { 775 | self.src.advance2(); 776 | loop { 777 | match self.src.read() { 778 | Some(EOL) | None => break, 779 | _ => (), 780 | }; 781 | } 782 | } 783 | 784 | fn skip_comment_multi(&mut self) { 785 | self.src.advance2(); 786 | loop { 787 | match self.src.read() { 788 | Some('*') => { 789 | if self.src.test_ahead('/') { 790 | self.src.advance(); 791 | break; 792 | } 793 | } 794 | None => break, 795 | _ => (), 796 | }; 797 | } 798 | } 799 | 800 | pub fn ahead_is_line_terminator_or_eof(&mut self) -> bool { 801 | match self.src.peek() { 802 | Some(c) => is_line_terminator(c), 803 | _ => true, 804 | } 805 | } 806 | 807 | pub fn ahead_is_eof(&mut self) -> bool { 808 | match self.src.peek() { 809 | Some(_) => false, 810 | _ => true, 811 | } 812 | } 813 | 814 | pub fn ahead_is_whitespace(&mut self) -> bool { 815 | match self.src.peek() { 816 | Some(c) => is_whitespace(c), 817 | _ => false, 818 | } 819 | } 820 | 821 | pub fn ahead_is_whitespace_or_line_terminator(&mut self) -> bool { 822 | match self.src.peek() { 823 | Some(c) => c.is_whitespace(), 824 | _ => false, 825 | } 826 | } 827 | 828 | pub fn ahead_is_whitespace_or_eof(&mut self) -> bool { 829 | match self.src.peek() { 830 | Some(c) => is_whitespace(c), 831 | _ => true, 832 | } 833 | } 834 | 835 | pub fn skip_whitespace(&mut self) { 836 | loop { 837 | if self.ahead_is_whitespace_or_line_terminator() { 838 | self.src.read(); 839 | } else if self.src.test_ahead2('/', '/') { 840 | self.skip_comment_single(); 841 | } else if self.src.test_ahead2('/', '*') { 842 | self.skip_comment_multi(); 843 | } else { 844 | break; 845 | } 846 | } 847 | } 848 | } 849 | 850 | #[cfg(test)] 851 | mod lexer_tests { 852 | use super::*; 853 | 854 | #[test] 855 | fn skip_whitespace() { 856 | let code = String::from( 857 | " // this is a single-line comment 858 | /* 859 | * this is a multiline comment 860 | */ hello world 861 | ", 862 | ); 863 | let src = Source::new(&code); 864 | let mut lex = Lexer::new(src); 865 | lex.skip_whitespace(); 866 | assert_eq!('h', lex.src.read().unwrap()); 867 | } 868 | 869 | #[test] 870 | fn unicode_letter() { 871 | let mut c: char = '\u{01c5}'; // title case 872 | assert!(is_unicode_letter(c)); 873 | c = '\u{1C90}'; // uppercase 874 | assert!(is_unicode_letter(c)); 875 | c = '\u{10D0}'; // lowercase 876 | assert!(is_unicode_letter(c)); 877 | c = '\u{0559}'; // modifier 878 | assert!(is_unicode_letter(c)); 879 | c = '\u{0920}'; // other letter 880 | assert!(is_unicode_letter(c)); 881 | c = '\u{2165}'; // letter number 882 | assert!(is_unicode_letter(c)); 883 | } 884 | 885 | #[test] 886 | fn unicode_escape_seq() { 887 | let code = String::from("\\u01c5\\u0920\\u1x23"); 888 | let src = Source::new(&code); 889 | let mut lex = Lexer::new(src); 890 | lex.src.advance2(); 891 | assert_eq!('\u{01c5}', lex.read_unicode_escape_seq().unwrap()); 892 | lex.src.advance2(); 893 | assert_eq!('\u{0920}', lex.read_unicode_escape_seq().unwrap()); 894 | lex.src.advance2(); 895 | assert_eq!(None, lex.read_unicode_escape_seq()); 896 | } 897 | 898 | #[test] 899 | fn read_name() { 900 | init_token_data(); 901 | 902 | let code = String::from("\\u01c5\\u0920 a aᢅ break let true null"); 903 | let src = Source::new(&code); 904 | let mut lex = Lexer::new(src); 905 | let mut tok = lex.read_name().ok().unwrap(); 906 | assert_eq!("\u{01c5}\u{0920}", tok.id_data().value); 907 | 908 | lex.skip_whitespace(); 909 | tok = lex.read_name().ok().unwrap(); 910 | assert_eq!("a", tok.id_data().value); 911 | 912 | lex.skip_whitespace(); 913 | tok = lex.read_name().ok().unwrap(); 914 | assert_eq!("a\u{1885}", tok.id_data().value); 915 | 916 | lex.skip_whitespace(); 917 | tok = lex.read_name().ok().unwrap(); 918 | assert_eq!("break", tok.keyword_data().kind.name()); 919 | 920 | lex.skip_whitespace(); 921 | tok = lex.read_name().ok().unwrap(); 922 | assert_eq!("let", tok.ctx_keyword_data().kind.name()); 923 | 924 | lex.skip_whitespace(); 925 | tok = lex.read_name().ok().unwrap(); 926 | assert_eq!("true", tok.bool_data().kind.name()); 927 | 928 | lex.skip_whitespace(); 929 | tok = lex.read_name().ok().unwrap(); 930 | assert!(tok.is_null()); 931 | } 932 | 933 | #[test] 934 | fn read_decimal() { 935 | let code = String::from("1 .1e1 1.e1 1.e+1 .1e-1"); 936 | let src = Source::new(&code); 937 | let mut lex = Lexer::new(src); 938 | let mut val = lex.read_decimal().ok().unwrap(); 939 | assert_eq!("1", val); 940 | lex.skip_whitespace(); 941 | val = lex.read_decimal().ok().unwrap(); 942 | assert_eq!(".1e1", val); 943 | lex.skip_whitespace(); 944 | val = lex.read_decimal().ok().unwrap(); 945 | assert_eq!("1.e1", val); 946 | lex.skip_whitespace(); 947 | val = lex.read_decimal().ok().unwrap(); 948 | assert_eq!("1.e+1", val); 949 | lex.skip_whitespace(); 950 | val = lex.read_decimal().ok().unwrap(); 951 | assert_eq!(".1e-1", val); 952 | } 953 | 954 | #[test] 955 | fn read_numeric() { 956 | let code = String::from("1 .1e1 0xa1 0X123"); 957 | let src = Source::new(&code); 958 | let mut lex = Lexer::new(src); 959 | let mut tok = lex.read_numeric().ok().unwrap(); 960 | assert_eq!("1", tok.num_data().value); 961 | 962 | lex.skip_whitespace(); 963 | tok = lex.read_numeric().ok().unwrap(); 964 | assert_eq!(".1e1", tok.num_data().value); 965 | 966 | lex.skip_whitespace(); 967 | tok = lex.read_numeric().ok().unwrap(); 968 | assert_eq!("0xa1", tok.num_data().value); 969 | 970 | lex.skip_whitespace(); 971 | tok = lex.read_numeric().ok().unwrap(); 972 | assert_eq!("0X123", tok.num_data().value); 973 | } 974 | 975 | #[test] 976 | fn read_string() { 977 | let code = String::from("'hello world' \"hello \\\n\\u4E16\\u754C\""); 978 | let src = Source::new(&code); 979 | let mut lex = Lexer::new(src); 980 | lex.src.advance(); 981 | let mut tok = lex.read_string('\'').ok().unwrap(); 982 | if let Token::StringLiteral(s) = tok { 983 | assert_eq!("hello world", s.value); 984 | } 985 | 986 | lex.skip_whitespace(); 987 | lex.src.advance(); 988 | tok = lex.read_string('"').ok().unwrap(); 989 | if let Token::StringLiteral(s) = tok { 990 | assert_eq!("hello 世界", s.value); 991 | } 992 | } 993 | 994 | #[test] 995 | fn next() { 996 | init_token_data(); 997 | 998 | let code = String::from("'hello world' break {} /test/ig"); 999 | let src = Source::new(&code); 1000 | let mut lex = Lexer::new(src); 1001 | let mut tok = lex.next(); 1002 | assert_eq!("hello world", tok.ok().unwrap().str_data().value); 1003 | 1004 | tok = lex.next(); 1005 | assert_eq!("break", tok.ok().unwrap().keyword_data().kind.name()); 1006 | 1007 | tok = lex.next(); 1008 | assert_eq!("{", tok.ok().unwrap().symbol_data().kind.name()); 1009 | 1010 | tok = lex.next(); 1011 | assert_eq!("}", tok.ok().unwrap().symbol_data().kind.name()); 1012 | 1013 | tok = lex.next(); 1014 | assert_eq!("/test/ig", tok.ok().unwrap().regexp_data().value); 1015 | } 1016 | 1017 | #[test] 1018 | fn loc() { 1019 | init_token_data(); 1020 | 1021 | let code = String::from("a\n bcd"); 1022 | let src = Source::new(&code); 1023 | let mut lex = Lexer::new(src); 1024 | let mut tok = lex.next().ok().unwrap(); 1025 | let mut td = tok.id_data(); 1026 | assert_eq!("a", td.value); 1027 | let mut loc = &td.loc; 1028 | assert_eq!(1, loc.start.line); 1029 | assert_eq!(0, loc.start.column); 1030 | assert_eq!(1, loc.end.line); 1031 | assert_eq!(1, loc.end.column); 1032 | 1033 | tok = lex.next().ok().unwrap(); 1034 | td = tok.id_data(); 1035 | loc = &td.loc; 1036 | assert_eq!(2, loc.start.line); 1037 | assert_eq!(2, loc.start.column); 1038 | assert_eq!(2, loc.end.line); 1039 | assert_eq!(5, loc.end.column); 1040 | } 1041 | } 1042 | --------------------------------------------------------------------------------