├── src ├── ast │ ├── ast_def │ │ ├── node.rs │ │ ├── mod.rs │ │ └── stmt_def │ │ │ ├── mod.rs │ │ │ ├── block_def.rs │ │ │ ├── stat_def.rs │ │ │ └── exp_def.rs │ ├── mod.rs │ ├── lexer │ │ ├── util.rs │ │ ├── mod.rs │ │ └── token.rs │ └── parser │ │ └── mod.rs ├── codegen │ ├── ir │ │ ├── ir_gen.rs │ │ └── mod.rs │ ├── sym_tb │ │ ├── scope.rs │ │ ├── sym.rs │ │ ├── mod.rs │ │ └── sym_tb.rs │ ├── target │ │ ├── target_gen.rs │ │ └── mod.rs │ └── mod.rs ├── util.rs └── main.rs ├── .gitignore ├── README.md ├── Cargo.lock ├── .idea ├── .gitignore ├── vcs.xml ├── modules.xml ├── inspectionProfiles │ └── Project_Default.xml └── RustLuac.iml └── Cargo.toml /src/ast/ast_def/node.rs: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/codegen/ir/ir_gen.rs: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/codegen/sym_tb/scope.rs: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/codegen/sym_tb/sym.rs: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/codegen/ir/mod.rs: -------------------------------------------------------------------------------- 1 | mod ir_gen; -------------------------------------------------------------------------------- /src/codegen/target/target_gen.rs: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/codegen/target/mod.rs: -------------------------------------------------------------------------------- 1 | mod target_gen; -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /ll_syntax_proofer/target -------------------------------------------------------------------------------- /src/ast/ast_def/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod node; 2 | pub mod stmt_def; -------------------------------------------------------------------------------- /src/codegen/mod.rs: -------------------------------------------------------------------------------- 1 | mod sym_tb; 2 | mod ir; 3 | mod target; -------------------------------------------------------------------------------- /src/codegen/sym_tb/mod.rs: -------------------------------------------------------------------------------- 1 | mod sym_tb; 2 | mod scope; 3 | mod sym; -------------------------------------------------------------------------------- /src/ast/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod lexer; 2 | pub mod parser; 3 | pub mod ast_def; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SimpleRustLuaJIT 2 | A simple lua jit implemented in Rust for HUST-Complier principle course. 3 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "RustLuac" 5 | version = "0.1.0" 6 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # 默认忽略的文件 2 | /shelf/ 3 | /workspace.xml 4 | # 数据源本地存储已忽略文件 5 | /../../../../../:\code\rust\RustLuac\.idea/dataSources/ 6 | /dataSources.local.xml 7 | # 基于编辑器的 HTTP 客户端请求 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /src/ast/ast_def/stmt_def/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod exp_def; 2 | pub mod stat_def; 3 | pub mod block_def; 4 | 5 | pub trait Exp {} 6 | 7 | pub enum StatType { 8 | EmptyStatTag 9 | } 10 | 11 | pub trait Stat { 12 | fn get_type(&self) -> StatType; 13 | } -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | #[macro_export] 2 | macro_rules! string_hash_map { 3 | ($($key : expr => $value : expr), *) => {{ 4 | let mut map = std::collections::HashMap::new(); 5 | $(map.insert(($key).to_string(), $value);)* 6 | map 7 | }} 8 | } 9 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "RustLuac" 3 | version = "0.1.0" 4 | authors = ["sohardforaname "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 10 | -------------------------------------------------------------------------------- /src/ast/ast_def/stmt_def/block_def.rs: -------------------------------------------------------------------------------- 1 | use crate::ast::ast_def::stmt_def::{Stat, Exp}; 2 | 3 | pub struct Block { 4 | pub last_line: usize, 5 | pub stats: Vec>, 6 | pub is_contain_ret: bool, 7 | pub ret_exps: Option>>, 8 | } 9 | 10 | impl Block { 11 | pub fn new(last_line: usize 12 | , stats: Vec> 13 | , opt_ret_exps: Option>>) -> Block { 14 | let is_contain_ret = opt_ret_exps.is_some(); 15 | Block { 16 | last_line, 17 | stats, 18 | is_contain_ret, 19 | ret_exps: opt_ret_exps, 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /src/codegen/sym_tb/sym_tb.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{HashMap, HashSet}; 2 | use crate::ast::lexer::token::Token; 3 | use std::borrow::Borrow; 4 | 5 | struct SymbolTable { 6 | scope_index: HashMap>, 7 | token_stack: Vec>, 8 | } 9 | 10 | impl SymbolTable { 11 | /*pub fn new() -> SymbolTable { 12 | SymbolTable { 13 | scope_index: HashMap::new(), 14 | token_stack: Vec::new(), 15 | } 16 | } 17 | pub fn query_token(&self, token: &Token) -> bool { 18 | self.scope_index.borrow().get(token).is_some() 19 | } 20 | 21 | pub fn insert_token(&mut self, token: &Token) { 22 | 23 | } 24 | 25 | pub fn push_scope() { 26 | 27 | } 28 | 29 | pub fn pop_scope() { 30 | 31 | }*/ 32 | } -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | mod ast; 2 | mod util; 3 | mod codegen; 4 | 5 | use ast::lexer::Lexer; 6 | use crate::ast::parser::Parser; 7 | use std::fs::File; 8 | use std::io::{Read, BufReader, BufRead}; 9 | 10 | fn main() { 11 | /* 12 | let mut parser: Parser = Parser::new(&"local a = 123 + 6 b = 1" 13 | .to_string(), &"D:\\test.txt".to_string()); 14 | parser.check_grammar(); 15 | */ 16 | 17 | let mut file: File = File::open("D:\\testLua.lua") 18 | .unwrap_or_else(|_| { panic!("File open error\n"); }); 19 | let mut code = String::new(); 20 | file.read_to_string(&mut code); 21 | 22 | let mut lexer = Lexer::new(&code); 23 | println!("{}", lexer.peek_token().as_ref().unwrap()); 24 | println!("{}", lexer.peek_token().as_ref().unwrap()); 25 | println!("{}", lexer.next_token().unwrap()); 26 | println!("{}", lexer.peek_token().as_ref().unwrap()); 27 | println!("{}", lexer.peek_token().as_ref().unwrap()); 28 | println!("{}", lexer.next_token().unwrap()); 29 | } 30 | -------------------------------------------------------------------------------- /src/ast/lexer/util.rs: -------------------------------------------------------------------------------- 1 | use std::iter::Peekable; 2 | 3 | pub struct CondIterator<'a, T, F> 4 | where T: 'a, T: Iterator { 5 | iter: &'a mut Peekable, 6 | filter: F, 7 | } 8 | 9 | impl<'a, T: Iterator, F> Iterator for CondIterator<'a, T, F> 10 | where F: FnMut(&T::Item) -> bool { 11 | 12 | type Item = T::Item; 13 | fn next(&mut self) -> Option { 14 | match self.iter.peek() { 15 | Some(val) => { 16 | match (self.filter)(val) { 17 | true => self.iter.next(), 18 | _ => None 19 | } 20 | }, 21 | None => None 22 | } 23 | } 24 | } 25 | 26 | pub trait CondTake<'a, T> 27 | where T: Iterator { 28 | fn take_conditional(self, filter: F) -> CondIterator<'a, T, F> where F: FnMut(&T::Item) -> bool; 29 | } 30 | 31 | impl<'a, T> CondTake<'a, T> for &'a mut Peekable where T: Iterator { 32 | fn take_conditional(self, filter: F) -> CondIterator<'a, T, F> { 33 | CondIterator:: { 34 | iter: self, 35 | filter, 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /.idea/RustLuac.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/ast/ast_def/stmt_def/stat_def.rs: -------------------------------------------------------------------------------- 1 | use crate::ast::ast_def::stmt_def::Exp; 2 | use crate::ast::ast_def::stmt_def::exp_def::{FuncDefExp, FuncCallExp}; 3 | use crate::ast::ast_def::stmt_def::block_def::Block; 4 | 5 | pub struct EmptyStat {} 6 | 7 | pub struct BreakStat { 8 | pub line: usize, 9 | } 10 | 11 | pub struct LabelStat { 12 | pub name: String, 13 | } 14 | 15 | pub struct GotoStat { 16 | pub target: String, 17 | } 18 | 19 | pub struct DoStat { 20 | pub block: Block, 21 | } 22 | 23 | pub struct WhileStat { 24 | pub exp: Box, 25 | pub block: Block, 26 | } 27 | 28 | pub struct RepeatStat { 29 | pub block: Block, 30 | pub exp: Box, 31 | } 32 | 33 | pub struct IfStat { 34 | pub exps: Vec>, 35 | pub blocks: Vec, 36 | } 37 | 38 | type FuncCallStat = FuncCallExp; 39 | 40 | pub struct StepForStat { 41 | pub beg_line: usize, 42 | pub block_beg_line: usize, 43 | pub var_name: String, 44 | pub init_exp: Box, 45 | pub lim_exp: Box, 46 | pub step_exp: Box, 47 | pub block: Block, 48 | } 49 | 50 | pub struct RangeForStat { 51 | pub block_beg_line: usize, 52 | pub name_list: Vec, 53 | pub exp_list: Vec>, 54 | pub block: Block, 55 | } 56 | 57 | pub struct LocalVarDefStat { 58 | pub last_line: usize, 59 | pub name_list: Vec, 60 | pub exp_list: Vec>, 61 | } 62 | 63 | pub struct AssignStat { 64 | pub last_line: usize, 65 | pub var_list: Vec>, 66 | pub exp_list: Vec>, 67 | } 68 | 69 | pub struct LocalFuncDefStat { 70 | pub name: String, 71 | pub exp: FuncDefExp, 72 | } 73 | -------------------------------------------------------------------------------- /src/ast/ast_def/stmt_def/exp_def.rs: -------------------------------------------------------------------------------- 1 | use crate::ast::ast_def::stmt_def::Exp; 2 | use crate::ast::ast_def::stmt_def::block_def::Block; 3 | 4 | pub struct NilExp { 5 | pub line: usize, 6 | } 7 | 8 | pub struct TrueExp { 9 | pub line: usize, 10 | } 11 | 12 | pub struct FalseExp { 13 | pub line: usize, 14 | } 15 | 16 | pub struct IntegerExp { 17 | pub line: usize, 18 | pub num: i64, 19 | } 20 | 21 | pub struct FloatExp { 22 | pub line: usize, 23 | pub num: f64, 24 | } 25 | 26 | pub struct StringExp { 27 | pub line: usize, 28 | pub str: String, 29 | } 30 | 31 | pub struct IDExp { 32 | pub line: usize, 33 | pub name: String, 34 | } 35 | 36 | pub struct UnopExp { 37 | pub line: usize, 38 | pub op: usize, 39 | pub exp: Box, 40 | } 41 | 42 | pub struct BinopExp { 43 | pub line: usize, 44 | pub op: usize, 45 | pub left_exp: Box, 46 | pub right_exp: Box, 47 | } 48 | 49 | pub struct ConExp { 50 | pub line: usize, 51 | pub exps: Vec>, 52 | } 53 | 54 | pub struct TableConsExp { 55 | pub line: usize, 56 | pub last_line: usize, 57 | pub key_exps: Vec>, 58 | pub val_exps: Vec>, 59 | } 60 | 61 | pub struct FuncDefExp { 62 | pub line: usize, 63 | pub last_line: usize, 64 | pub par_list: Vec, 65 | pub is_vararg: bool, 66 | pub block: Block, 67 | } 68 | 69 | pub struct ParensExp { 70 | pub in_exp: Box> 71 | } 72 | 73 | pub struct TableAccessExp { 74 | pub last_line: usize, 75 | pub prefix: Box, 76 | pub key: Box, 77 | } 78 | 79 | pub struct FuncCallExp { 80 | pub line: usize, 81 | pub last_line: usize, 82 | pub prefix: Box, 83 | pub name_exp: StringExp, 84 | pub args: Vec>, 85 | } -------------------------------------------------------------------------------- /src/ast/lexer/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod token; 2 | pub mod util; 3 | 4 | use std::iter::{Peekable, FromIterator, IntoIterator}; 5 | use std::collections::HashMap; 6 | use token::{get_key_word_map, get_opt_map, TokenType, Token}; 7 | use crate::ast::lexer::token::KeyWord; 8 | use std::vec::IntoIter; 9 | use std::ops::Not; 10 | 11 | pub struct Lexer { 12 | char_iter: Peekable>, 13 | opt_hash_map: HashMap, 14 | key_word_hash_map: HashMap, 15 | cur_line: usize, 16 | cur_column: usize, 17 | eof: bool, 18 | cur_token: Option, 19 | next_token: Option, 20 | } 21 | 22 | impl Lexer { 23 | pub fn new(source_code: &String) -> Lexer { 24 | let chars = Vec::from_iter(source_code.chars()); 25 | Lexer { 26 | char_iter: chars.into_iter().peekable(), 27 | opt_hash_map: get_opt_map(), 28 | key_word_hash_map: get_key_word_map(), 29 | cur_line: 1, 30 | cur_column: 1, 31 | eof: false, 32 | cur_token: None, 33 | next_token: None, 34 | } 35 | } 36 | 37 | fn iter_advance(&mut self, n: usize) { 38 | self.cur_column += n; 39 | } 40 | 41 | fn iter_new_line(&mut self) { 42 | self.cur_line += 1; 43 | self.cur_column = 1; 44 | } 45 | 46 | fn take_word(&mut self, filter: F) -> String 47 | where F: Fn(&char) -> bool { 48 | let mut res: String = String::new(); 49 | while if let Some(ch) = self.char_iter.peek() { 50 | filter(ch) 51 | } else { 52 | false 53 | } { 54 | res.push(self.char_iter.next().unwrap()); 55 | } 56 | res 57 | } 58 | 59 | fn parse_id(&mut self) -> (TokenType, String) { 60 | let char_filter = |ch: &char| { 61 | ch.is_alphanumeric() || *ch == '_' 62 | }; 63 | //let id_str: String = self.char_iter.by_ref().take_while(char_filter).collect(); 64 | let id_str = self.take_word(char_filter); 65 | 66 | self.iter_advance(id_str.len()); 67 | match self.key_word_hash_map.get(&id_str) { 68 | Some(key_word) => (TokenType::OptKeyWord(key_word.clone()), id_str.clone()), 69 | None => (TokenType::ID(id_str.clone()), id_str.clone()) 70 | } 71 | } 72 | 73 | fn parse_number(&mut self) -> (TokenType, String) { 74 | let num_filter = |ch: &char| { 75 | ch.is_alphanumeric() || *ch == '.' || *ch == '_' 76 | }; 77 | let num_str: String = self.take_word(num_filter); 78 | 79 | self.iter_advance(num_str.len()); 80 | (TokenType::Number(num_str.parse::().unwrap_or_else(|_| { 81 | panic!("Parse number token error: '{}'", num_str.clone()) 82 | })), num_str.clone()) 83 | } 84 | 85 | fn parse_str(&mut self) -> (TokenType, String) { 86 | let str_filter = |ch: &char| { 87 | *ch != '"' 88 | }; 89 | self.char_iter.next(); 90 | let str: String = self.take_word(str_filter); 91 | 92 | self.char_iter.next(); 93 | self.iter_advance(str.len() + 2); 94 | (TokenType::String(str.clone()), str.clone()) 95 | } 96 | 97 | fn parser_operator(&mut self) -> (TokenType, String) { 98 | for len in (1..4).rev() { 99 | let ope_str: String = self.char_iter.clone().take(len).collect(); 100 | if let Some(key_word) = self.opt_hash_map.get(&ope_str).cloned() { 101 | self.char_iter.by_ref().take(len).count(); 102 | self.iter_advance(len); 103 | return (TokenType::OptKeyWord(key_word), ope_str); 104 | } 105 | } 106 | panic!("Parse operator token error at {}: {}", self.cur_line, self.cur_column) 107 | } 108 | 109 | fn skip_comment(&mut self) { 110 | while if let Some(ch) = self.char_iter.peek() { 111 | (|ch: &char| { 112 | *ch != '\n' 113 | })(ch) 114 | } else { 115 | false 116 | } { 117 | self.char_iter.next(); 118 | } 119 | self.char_iter.next(); 120 | self.iter_new_line(); 121 | } 122 | 123 | fn handle_sub_and_comment(&mut self, cur_line: usize, cur_column: usize) -> Option { 124 | self.char_iter.next(); 125 | self.iter_advance(1); 126 | let next_val = self.char_iter.peek().unwrap_or_else(|| { &'\n' }); 127 | if *next_val == '-' { 128 | self.char_iter.next(); 129 | self.skip_comment(); 130 | return self.get_next_token(); 131 | } 132 | Some(Token::new(TokenType::OptKeyWord(KeyWord::SUB), "-".to_string(), cur_line, cur_column)) 133 | } 134 | 135 | 136 | fn get_next_token(&mut self) -> Option { 137 | match self.char_iter.peek() { 138 | Some(val) => { 139 | let token_info: (TokenType, String); 140 | let cur_line = self.cur_line; 141 | let cur_column = self.cur_column; 142 | 143 | if val.is_numeric() { 144 | token_info = self.parse_number(); 145 | } else if val.is_alphanumeric() || *val == '_' { 146 | token_info = self.parse_id(); 147 | } else if *val == '"' { 148 | token_info = self.parse_str(); 149 | } else if !val.is_ascii_graphic() { 150 | loop { 151 | match self.char_iter.peek() { 152 | Some(ch) => { 153 | if *ch == '\n' { 154 | self.iter_new_line(); 155 | } else if ch.is_ascii_graphic().not() { 156 | self.iter_advance(1); 157 | } else { 158 | break; 159 | } 160 | } 161 | None => { 162 | break; 163 | } 164 | } 165 | self.char_iter.next(); 166 | } 167 | return self.get_next_token(); 168 | } else if *val == '-' { 169 | return self.handle_sub_and_comment(cur_line, cur_column); 170 | } else { 171 | token_info = self.parser_operator(); 172 | } 173 | Some(Token::new(token_info.0, token_info.1, cur_line, cur_column)) 174 | } 175 | None => { 176 | if !self.eof { 177 | self.eof = true; 178 | Some(Token::eof()) 179 | } else { 180 | None 181 | } 182 | } 183 | } 184 | } 185 | } 186 | 187 | impl Lexer { 188 | pub fn peek_token_type(&mut self) -> TokenType { 189 | if self.next_token.is_none() { 190 | self.next_token = self.get_next_token(); 191 | } 192 | self.next_token.unwrap().type_id 193 | } 194 | 195 | pub fn next_token(&mut self) -> Option { 196 | let ret_token = self.next_token.clone(); 197 | self.next_token = self.get_next_token(); 198 | ret_token 199 | } 200 | } 201 | 202 | impl Iterator for Lexer { 203 | type Item = Token; 204 | fn next(&mut self) -> Option { 205 | self.get_next_token() 206 | } 207 | } -------------------------------------------------------------------------------- /src/ast/lexer/token.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use crate::string_hash_map; 3 | use std::fmt::{Display, Formatter, Result, Debug}; 4 | 5 | #[allow(dead_code)] 6 | #[derive(Debug, Clone, Hash)] 7 | pub enum KeyWord { 8 | ADD, 9 | SUB, 10 | MUL, 11 | DIV, 12 | FDIV, 13 | LSH, 14 | RSH, 15 | MOD, 16 | POW, 17 | ASS, 18 | EQU, 19 | NEQ, 20 | GR, 21 | LE, 22 | GRE, 23 | LEE, 24 | CON, 25 | DOT, 26 | AND, 27 | OR, 28 | LEN, 29 | MIN, 30 | NOT, 31 | LSM, 32 | RSM, 33 | LMI, 34 | RMI, 35 | LLA, 36 | RLA, 37 | COM, 38 | SEM, 39 | COL, 40 | PATH, 41 | 42 | BRK, 43 | DO, 44 | ELS, 45 | ELI, 46 | END, 47 | FAL, 48 | FOR, 49 | FUN, 50 | IF, 51 | IN, 52 | LOC, 53 | NIL, 54 | REP, 55 | RET, 56 | THE, 57 | TRU, 58 | UNT, 59 | WHI, 60 | GOT, 61 | } 62 | 63 | impl KeyWord { 64 | pub fn is_binary_operator(&self) -> bool { 65 | match *self { 66 | KeyWord::ADD | 67 | KeyWord::SUB | 68 | KeyWord::MUL | 69 | KeyWord::DIV | 70 | KeyWord::FDIV | 71 | KeyWord::LSH | 72 | KeyWord::RSH | 73 | KeyWord::MOD | 74 | KeyWord::POW | 75 | KeyWord::ASS | 76 | KeyWord::EQU | 77 | KeyWord::NEQ | 78 | KeyWord::GR | 79 | KeyWord::LE | 80 | KeyWord::GRE | 81 | KeyWord::LEE | 82 | KeyWord::CON | 83 | KeyWord::DOT | 84 | KeyWord::COM | 85 | KeyWord::AND | 86 | KeyWord::OR => true, 87 | _ => false 88 | } 89 | } 90 | 91 | pub fn is_unique_operator(&self) -> bool { 92 | match *self { 93 | KeyWord::LEN | 94 | KeyWord::MIN | 95 | KeyWord::NOT => true, 96 | _ => false 97 | } 98 | } 99 | 100 | pub fn is_divide_operator(&self) -> bool { 101 | match *self { 102 | KeyWord::LSM | 103 | KeyWord::RSM | 104 | KeyWord::LMI | 105 | KeyWord::RMI | 106 | KeyWord::LLA | 107 | KeyWord::RLA | 108 | KeyWord::PATH => true, 109 | _ => false 110 | } 111 | } 112 | 113 | pub fn get_display_str(&self) -> &'static str { 114 | match *self { 115 | KeyWord::ADD => "+", 116 | KeyWord::SUB => "-", 117 | KeyWord::MUL => "*", 118 | KeyWord::DIV => "/", 119 | KeyWord::FDIV => "//", 120 | KeyWord::LSH => "<<", 121 | KeyWord::RSH => ">>", 122 | KeyWord::MOD => "%", 123 | KeyWord::POW => "^", 124 | KeyWord::ASS => "=", 125 | KeyWord::EQU => "==", 126 | KeyWord::NEQ => "~=", 127 | KeyWord::GR => ">", 128 | KeyWord::LE => "<", 129 | KeyWord::GRE => ">=", 130 | KeyWord::LEE => "<=", 131 | KeyWord::CON => "..", 132 | KeyWord::DOT => ".", 133 | KeyWord::AND => "and", 134 | KeyWord::OR => "or", 135 | KeyWord::LEN => "#", 136 | KeyWord::MIN => "-", 137 | KeyWord::NOT => "not", 138 | KeyWord::LSM => "(", 139 | KeyWord::RSM => ")", 140 | KeyWord::LMI => "[", 141 | KeyWord::RMI => "]", 142 | KeyWord::LLA => "{", 143 | KeyWord::RLA => "}", 144 | KeyWord::COM => ",", 145 | KeyWord::SEM => ";", 146 | KeyWord::COL => ":", 147 | KeyWord::PATH => "::", 148 | 149 | KeyWord::BRK => "break", 150 | KeyWord::DO => "do", 151 | KeyWord::ELS => "else", 152 | KeyWord::ELI => "elseif", 153 | KeyWord::END => "end", 154 | KeyWord::FAL => "false", 155 | KeyWord::FOR => "for", 156 | KeyWord::FUN => "function", 157 | KeyWord::IF => "if", 158 | KeyWord::IN => "in", 159 | KeyWord::LOC => "local", 160 | KeyWord::NIL => "nil", 161 | KeyWord::REP => "repeat", 162 | KeyWord::RET => "return", 163 | KeyWord::THE => "then", 164 | KeyWord::TRU => "true", 165 | KeyWord::UNT => "until", 166 | KeyWord::WHI => "while", 167 | KeyWord::GOT => "goto", 168 | } 169 | } 170 | } 171 | 172 | impl PartialEq for KeyWord { 173 | fn eq(&self, key_word: &KeyWord) -> bool { 174 | *key_word == *self 175 | } 176 | } 177 | 178 | #[allow(dead_code)] 179 | #[derive(Debug, PartialEq, Eq, Clone)] 180 | pub enum TokenType { 181 | OptKeyWord(KeyWord), 182 | ID(String), 183 | String(String), 184 | Number(f64), 185 | EOF, 186 | } 187 | 188 | impl From for TokenType { 189 | fn from(key_word: KeyWord) -> TokenType { 190 | TokenType::OptKeyWord(key_word) 191 | } 192 | } 193 | 194 | impl TokenType { 195 | pub fn to_str(&self) -> &'static str { 196 | match self { 197 | TokenType::OptKeyWord(key_word) => key_word.get_display_str(), 198 | TokenType::Number(num) => "num", 199 | TokenType::String(str) => "str", 200 | TokenType::ID(str) => "id", 201 | _ => "eof" 202 | } 203 | } 204 | } 205 | 206 | impl PartialEq for TokenType { 207 | fn eq(&self, rhs: &TokenType) -> bool { 208 | match (self, rhs) { 209 | (TokenType::OptKeyWord(key_word1), TokenType::OptKeyWord(key_word2)) => 210 | key_word1.eq(key_word2), 211 | (TokenType::EOF, TokenType::EOF) | 212 | (TokenType::ID(_), TokenType::ID(_)) | 213 | (TokenType::String(_), TokenType::String(_)) | 214 | (TokenType::Number(_), TokenType::Number(_)) => true, 215 | _ => false 216 | } 217 | } 218 | } 219 | 220 | impl Display for TokenType { 221 | fn fmt(&self, f: &mut Formatter) -> Result { 222 | write!(f, "{}", self.to_str()) 223 | } 224 | } 225 | 226 | impl From for String { 227 | fn from(token_type: TokenType) -> String { 228 | token_type.to_string() 229 | } 230 | } 231 | 232 | #[derive(Debug, Clone)] 233 | pub struct Token { 234 | pub type_id: TokenType, 235 | pub raw_data: String, 236 | pub line: usize, 237 | pub column: usize, 238 | } 239 | 240 | impl Token { 241 | pub fn new(type_id: TokenType, raw_data: String, line: usize, column: usize) -> Token { 242 | Token { type_id, raw_data, line, column } 243 | } 244 | pub fn get_id(&self) -> Option<&String> { 245 | match self.type_id { 246 | TokenType::ID(ref id) => Some(id), 247 | _ => None 248 | } 249 | } 250 | 251 | pub fn get_num(&self) -> Option<&f64> { 252 | match self.type_id { 253 | TokenType::Number(ref num) => Some(num), 254 | _ => None 255 | } 256 | } 257 | 258 | pub fn get_key_word(&self) -> Option<&KeyWord> { 259 | match self.type_id { 260 | TokenType::OptKeyWord(ref key) => Some(key), 261 | _ => None 262 | } 263 | } 264 | 265 | 266 | pub fn eof() -> Token { 267 | Token { 268 | type_id: TokenType::EOF, 269 | raw_data: "".to_string(), 270 | line: 0, 271 | column: 0, 272 | } 273 | } 274 | } 275 | 276 | impl PartialEq for KeyWord { 277 | fn eq(&self, token: &Token) -> bool { 278 | token.type_id == TokenType::from(self.clone()) 279 | } 280 | } 281 | 282 | impl PartialEq for Token { 283 | fn eq(&self, token: &Token) -> bool { 284 | token.raw_data == self.raw_data 285 | } 286 | } 287 | 288 | impl Display for Token { 289 | fn fmt(&self, f: &mut Formatter) -> Result { 290 | write!(f, "Token: {{type: {}, raw: {}, line: {}, column: {}}}", 291 | self.type_id, self.raw_data, self.line, self.column) 292 | } 293 | } 294 | 295 | pub fn get_opt_map() -> HashMap { 296 | string_hash_map![ 297 | "+" => KeyWord::ADD, 298 | "-" => KeyWord::SUB, 299 | "*" => KeyWord::MUL, 300 | "/" => KeyWord::DIV, 301 | "//" => KeyWord::FDIV, 302 | "<<" => KeyWord::LSH, 303 | ">>" => KeyWord::RSH, 304 | "%" => KeyWord::MOD, 305 | "^" => KeyWord::POW, 306 | "=" => KeyWord::ASS, 307 | "==" => KeyWord::EQU, 308 | "~=" => KeyWord::NEQ, 309 | ">" => KeyWord::GR, 310 | "<" => KeyWord::LE, 311 | ">=" => KeyWord::GRE, 312 | "<=" => KeyWord::LEE, 313 | ".." => KeyWord::CON, 314 | "." => KeyWord::DOT, 315 | "and" => KeyWord::AND, 316 | "or" => KeyWord::OR, 317 | "#" => KeyWord::LEN, 318 | "-" => KeyWord::MIN, 319 | "not" => KeyWord::NOT, 320 | "(" => KeyWord::LSM, 321 | ")" => KeyWord::RSM, 322 | "[" => KeyWord::LMI, 323 | "]" => KeyWord::RMI, 324 | "{" => KeyWord::LLA, 325 | "}" => KeyWord::RLA, 326 | "," => KeyWord::COM, 327 | ";" => KeyWord::SEM, 328 | ":" => KeyWord::COL, 329 | "::" => KeyWord::PATH 330 | ] 331 | } 332 | 333 | pub fn get_key_word_map() -> HashMap { 334 | string_hash_map![ 335 | "break" => KeyWord::BRK, 336 | "do" => KeyWord::DO, 337 | "else" => KeyWord::ELS, 338 | "elseif" => KeyWord::ELI, 339 | "end" => KeyWord::END, 340 | "false" => KeyWord::FAL, 341 | "for" => KeyWord::FOR, 342 | "function" => KeyWord::FUN, 343 | "if" => KeyWord::IF, 344 | "in" => KeyWord::IN, 345 | "local" => KeyWord::LOC, 346 | "nil" => KeyWord::NIL, 347 | "repeat" => KeyWord::REP, 348 | "return" => KeyWord::RET, 349 | "then" => KeyWord::THE, 350 | "true" => KeyWord::TRU, 351 | "until" => KeyWord::UNT, 352 | "while" => KeyWord::WHI, 353 | "goto" => KeyWord::GOT 354 | ] 355 | } -------------------------------------------------------------------------------- /src/ast/parser/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::ast::lexer::Lexer; 2 | use crate::ast::lexer::token::{Token, TokenType, KeyWord}; 3 | use std::fmt::{Display, Formatter}; 4 | use crate::ast::ast_def::stmt_def::block_def::Block; 5 | use crate::ast::ast_def::stmt_def::{Stat, Exp, StatType}; 6 | use crate::ast::ast_def::stmt_def::stat_def::*; 7 | use crate::ast::ast_def::stmt_def::exp_def::{TrueExp, IntegerExp, FuncDefExp}; 8 | 9 | macro_rules! tk_from_kw { 10 | ($KEYWORD: expr) => { TokenType::from($KEYWORD) } 11 | } 12 | 13 | pub struct Parser { 14 | lexer: Lexer, 15 | } 16 | 17 | impl Parser { 18 | fn new(source_code: &String) -> Parser { 19 | Parser { lexer: Lexer::new(source_code) } 20 | } 21 | } 22 | 23 | impl Parser { 24 | fn is_ret_or_block_end(token_type: &TokenType) -> bool { 25 | match token_type { 26 | TokenType::OptKeyWord(key_word) => { 27 | match key_word { 28 | KeyWord::RET | 29 | KeyWord::END | 30 | KeyWord::ELI | 31 | KeyWord::ELS | 32 | KeyWord::UNT => true, 33 | _ => false 34 | } 35 | } 36 | TokenType::EOF => true, 37 | _ => false 38 | } 39 | } 40 | 41 | fn expected_token(&mut self, expected_type: TokenType) { 42 | if !self.lexer.peek_token_type().eq(&expected_type) { 43 | panic!("expected: {}", expected_type); 44 | } 45 | self.lexer.next_token(); 46 | } 47 | 48 | fn expected_id(&mut self) -> Token { 49 | match self.lexer.peek_token_type() { 50 | TokenType::ID(_) => self.lexer.next_token().unwrap(), 51 | _ => panic!("expected id") 52 | } 53 | } 54 | } 55 | 56 | impl Parser { 57 | fn parse_stat(&mut self) -> Box { 58 | match self.lexer.peek_token_type() { 59 | TokenType::OptKeyWord(key_word) => { 60 | match key_word { 61 | KeyWord::SEM => self.parse_empty_stat(), 62 | KeyWord::BRK => self.parse_break_stat(), 63 | KeyWord::PATH => self.parse_label_stat(), 64 | KeyWord::GOT => self.parse_goto_stat(), 65 | KeyWord::DO => self.parse_do_stat(), 66 | KeyWord::WHI => self.parse_while_stat(), 67 | KeyWord::REP => self.parse_repeat_stat(), 68 | KeyWord::IF => self.parse_if_stat(), 69 | KeyWord::FOR => self.parse_for_stat(), 70 | KeyWord::FUN => self.parse_func_def_stat(), 71 | KeyWord::LOC => self.parse_local_stat(), 72 | _ => self.parse_func_call_or_assign_stat() 73 | } 74 | } 75 | _ => self.parse_func_call_or_assign_stat() 76 | } 77 | } 78 | 79 | fn parse_empty_stat(&mut self) -> Box { 80 | self.expected_token(tk_from_kw!(KeyWord::SEM)); 81 | Box::new(EmptyStat {}) 82 | } 83 | fn parse_break_stat(&mut self) -> Box { 84 | self.expected_token(tk_from_kw!(KeyWord::BRK)); 85 | Box::new(BreakStat { line: 0 }) 86 | } 87 | fn parse_label_stat(&mut self) -> Box { 88 | self.expected_token(tk_from_kw!(KeyWord::PATH)); 89 | let name = self.expected_id().raw_data; 90 | self.expected_token(tk_from_kw!(KeyWord::PATH)); 91 | Box::new(LabelStat { name }) 92 | } 93 | fn parse_goto_stat(&mut self) -> Box { 94 | self.expected_token(tk_from_kw!(KeyWord::GOT)); 95 | let name = self.expected_id().raw_data; 96 | Box::new(GotoStat { target: name }) 97 | } 98 | fn parse_do_stat(&mut self) -> Box { 99 | self.expected_token(tk_from_kw!(KeyWord::DO)); 100 | let block = self.parse(); 101 | self.expected_token(tk_from_kw!(KeyWord::END)); 102 | Box::new(DoStat { block }) 103 | } 104 | fn parse_while_stat(&mut self) -> Box { 105 | self.expected_token(tk_from_kw!(KeyWord::WHI)); 106 | let exp = self.parse_exp(); 107 | self.expected_token(tk_from_kw!(KeyWord::DO)); 108 | let block = self.parse(); 109 | self.expected_token(tk_from_kw!(KeyWord::END)); 110 | Box::new(WhileStat { exp, block }) 111 | } 112 | fn parse_repeat_stat(&mut self) -> Box { 113 | self.expected_token(tk_from_kw!(KeyWord::REP)); 114 | let block = self.parse(); 115 | self.expected_token(tk_from_kw!(KeyWord::UNT)); 116 | let exp = self.parse_exp(); 117 | Box::new(RepeatStat { block, exp }) 118 | } 119 | fn parse_if_stat(&mut self) -> Box { 120 | let mut exps = Vec::new(); 121 | let mut blocks = Vec::new(); 122 | self.expected_token(tk_from_kw!(KeyWord::IF)); 123 | exps.push(self.parse_exp()); 124 | self.expected_token(tk_from_kw!(KeyWord::THE)); 125 | blocks.push(self.parse()); 126 | 127 | while self.lexer.peek_token_type().eq(&tk_from_kw!(KeyWord::ELI)) { 128 | self.lexer.next_token(); 129 | exps.push(self.parse_exp()); 130 | self.expected_token(tk_from_kw!(KeyWord::THE)); 131 | blocks.push(self.parse()); 132 | } 133 | 134 | if self.lexer.peek_token_type().eq(&tk_from_kw!(KeyWord::ELS)) { 135 | exps.push(Box::new(TrueExp { line: 0 })); 136 | blocks.push(self.parse()); 137 | } 138 | 139 | Box::new(IfStat { exps, blocks }) 140 | } 141 | fn parse_for_stat(&mut self) -> Box { 142 | self.expected_token(tk_from_kw!(KeyWord::FOR)); 143 | let name = self.expected_id().raw_data; 144 | if self.lexer.peek_token_type().eq(&tk_from_kw!(KeyWord::ASS)) { 145 | self.parse_step_for_stat(name) 146 | } else { 147 | self.parse_range_for_stat(name) 148 | } 149 | } 150 | fn parse_step_for_stat(&mut self, first_val: String) -> Box { 151 | self.expected_token(tk_from_kw!(KeyWord::ASS)); 152 | let init_exp = self.parse_exp(); 153 | self.expected_token(tk_from_kw!(KeyWord::COM)); 154 | let lim_exp = self.parse_exp(); 155 | 156 | let step_exp: Box; 157 | 158 | if self.lexer.peek_token_type().eq(&tk_from_kw!(KeyWord::COM)) { 159 | self.lexer.next_token(); 160 | step_exp = self.parse_exp(); 161 | } else { 162 | step_exp = Box::new(IntegerExp { line: 0, num: 1 }); 163 | } 164 | 165 | self.expected_token(tk_from_kw!(KeyWord::DO)); 166 | let block = self.parse(); 167 | self.expected_token(tk_from_kw!(KeyWord::END)); 168 | 169 | Box::new(StepForStat { 170 | beg_line: 0, 171 | block_beg_line: 0, 172 | var_name: name, 173 | init_exp, 174 | lim_exp, 175 | step_exp, 176 | block, 177 | }) 178 | } 179 | fn parse_range_for_stat(&mut self, first_val: String) -> Box { 180 | let name_list = self.parse_name_list(); 181 | self.expected_token(tk_from_kw!(KeyWord::ASS)); 182 | let exp_list = self.parse_exp_list(); 183 | self.expected_token(tk_from_kw!(KeyWord::DO)); 184 | let block = self.parse(); 185 | self.expected_token(tk_from_kw!(KeyWord::END)); 186 | Box::new(RangeForStat { block_beg_line: 0, name_list, exp_list, block }) 187 | } 188 | fn parse_func_def_stat(&mut self) -> FuncDefExp {} 189 | fn parse_local_stat(&mut self) -> Box { 190 | self.expected_token(tk_from_kw!(KeyWord::LOC)); 191 | if self.lexer.peek_token_type().eq(&tk_from_kw!(KeyWord::FUN)) { 192 | self.parse_local_func_def_stat() 193 | } else { 194 | self.parse_local_val_stat() 195 | } 196 | } 197 | fn parse_local_func_stat(&mut self) -> Box { 198 | self.expected_token(tk_from_kw!(KeyWord::FUN)); 199 | let name = self.expected_id().raw_data; 200 | let func_body = self.parse_func_def_stat(); 201 | Box::new(LocalFuncDefStat { name, exp: func_body }) 202 | } 203 | fn parse_local_val_stat(&mut self) -> Box { 204 | let name_list = self.parse_name_list(); 205 | let mut exp_list = Vec::new(); 206 | if self.lexer.peek_token_type().eq(&tk_from_kw!(KeyWord::ASS)) { 207 | self.lexer.next_token(); 208 | exps = self.parse_exp_list(); 209 | } 210 | Box::new(LocalVarDefStat { last_line: 0, name_list, exp_list }) 211 | } 212 | fn parse_func_call_or_assign_stat(&mut self) -> Box {} 213 | fn parse_func_call_stat(&mut self) -> Box {} 214 | fn parse_assign_stat(&mut self) -> Box {} 215 | } 216 | 217 | impl Parser { 218 | fn parse_exp_list(&mut self) -> Vec> { 219 | let mut exps = vec![self.parse_exp()]; 220 | while self.lexer.peek_token_type().eq(&tk_from_kw!(KeyWord::COM)) { 221 | self.lexer.next_token(); 222 | exps.push(self.parse_exp()); 223 | } 224 | exps 225 | } 226 | 227 | fn parse_name_list(&mut self) -> Vec {} 228 | } 229 | 230 | impl Parser { 231 | fn parse_exp(&mut self) -> Box {} 232 | 233 | fn parse_prefix_exp(&mut self) -> Box {} 234 | } 235 | 236 | impl Parser { 237 | fn parse_stats(&mut self) -> Vec> { 238 | let mut stats = Vec::new(); 239 | while !Parser::is_ret_or_block_end(&self.lexer.peek_token_type()) { 240 | let stat = self.parse_stat(); 241 | if let StatType::EmptyStatTag = stat.get_type() { 242 | continue; 243 | } 244 | stats.push(stat); 245 | } 246 | stats 247 | } 248 | 249 | fn parse_ret_exps(&mut self) -> Option>> { 250 | let mut exps = Vec::new(); 251 | if !self.lexer.peek_token_type().eq(tk_from_kw!(KeyWord::RET)) { 252 | return None; 253 | } 254 | self.lexer.next_token(); 255 | Some(match self.lexer.peek_token_type() { 256 | TokenType::OptKeyWord(key_word) => { 257 | match key_word { 258 | KeyWord::END | 259 | KeyWord::ELI | 260 | KeyWord::ELS | 261 | KeyWord::UNT => exps, 262 | KeyWord::SEM => { 263 | self.lexer.next_token(); 264 | exps 265 | } 266 | _ => panic!("grammar error!") 267 | } 268 | } 269 | TokenType::EOF => exps, 270 | _ => { 271 | exps = self.parse_exp_list(); 272 | if self.lexer.peek_token_type().eq(tk_from_kw!(KeyWord::SEM)) { 273 | self.lexer.next_token(); 274 | } 275 | exps 276 | } 277 | }) 278 | } 279 | 280 | pub fn parse(&mut self) -> Block { 281 | let stats = self.parse_stats(); 282 | let opt_ret_exps = self.parse_ret_exps(); 283 | Block::new(0, stats, opt_ret_exps) 284 | } 285 | } 286 | 287 | --------------------------------------------------------------------------------