├── .gitignore ├── src ├── vm │ ├── mod.rs │ └── instruction.rs ├── lib.rs ├── parser │ ├── mod.rs │ ├── block.rs │ ├── parser.rs │ ├── operator.rs │ ├── expression.rs │ └── statement.rs ├── main.rs ├── lexer │ ├── mod.rs │ ├── number.rs │ ├── string.rs │ └── lexer.rs └── binary │ ├── mod.rs │ └── reader.rs ├── README.md ├── Cargo.toml ├── LICENSE ├── .github └── workflows │ └── ci.yml └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /src/vm/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod instruction; 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Rua 2 | ======== 3 | 4 | Implements `Lua` in rust. 5 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate if_chain; 3 | 4 | mod lexer; 5 | mod parser; 6 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rua" 3 | version = "0.1.0" 4 | authors = ["condy "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | byteorder = "1.3.4" 11 | phf = { version = "0.8", features = ["macros"] } 12 | if_chain = "1.0.1" -------------------------------------------------------------------------------- /src/parser/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod block; 2 | pub use self::block::*; 3 | 4 | pub mod operator; 5 | pub use self::operator::*; 6 | 7 | pub mod expression; 8 | pub use self::expression::*; 9 | 10 | pub mod statement; 11 | pub use self::statement::*; 12 | 13 | pub mod parser; 14 | pub use self::parser::*; 15 | 16 | mod consteval; 17 | use self::consteval::*; 18 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | use std::fs::File; 3 | 4 | mod binary; 5 | use binary::undump; 6 | 7 | fn main() { 8 | let mut args = env::args(); 9 | if args.len() > 1 { 10 | let f = File::open(args.nth(1).unwrap()).expect("Failed to open file"); 11 | let proto = undump(f).expect("undump failed"); 12 | println!("proto = {:?}", proto); 13 | } 14 | 15 | println!("Hello, world!"); 16 | } 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Youmu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/lexer/mod.rs: -------------------------------------------------------------------------------- 1 | use std::char; 2 | 3 | pub mod lexer; 4 | pub use self::lexer::*; 5 | 6 | pub mod string; 7 | pub use self::string::*; 8 | 9 | pub mod number; 10 | pub use self::number::*; 11 | 12 | pub(in crate::lexer) fn from_u8(c: u8) -> char { 13 | char::from_u32(c as u32).unwrap_or(char::REPLACEMENT_CHARACTER) 14 | } 15 | 16 | pub(in crate::lexer) fn from_u16(c: u16) -> char { 17 | char::from_u32(c as u32).unwrap_or(char::REPLACEMENT_CHARACTER) 18 | } 19 | 20 | pub(in crate::lexer) fn ascii_to_hexdigit(c: u8) -> Option { 21 | match c { 22 | b'0' => Some(0), 23 | b'1' => Some(1), 24 | b'2' => Some(2), 25 | b'3' => Some(3), 26 | b'4' => Some(4), 27 | b'5' => Some(5), 28 | b'6' => Some(6), 29 | b'7' => Some(7), 30 | b'8' => Some(8), 31 | b'9' => Some(9), 32 | b'a' | b'A' => Some(10), 33 | b'b' | b'B' => Some(11), 34 | b'c' | b'C' => Some(12), 35 | b'd' | b'D' => Some(13), 36 | b'e' | b'E' => Some(14), 37 | b'f' | b'F' => Some(15), 38 | _ => None, 39 | } 40 | } 41 | 42 | pub(in crate::lexer) fn ascii_to_digit(c: u8) -> Option { 43 | match c { 44 | b'0' => Some(0), 45 | b'1' => Some(1), 46 | b'2' => Some(2), 47 | b'3' => Some(3), 48 | b'4' => Some(4), 49 | b'5' => Some(5), 50 | b'6' => Some(6), 51 | b'7' => Some(7), 52 | b'8' => Some(8), 53 | b'9' => Some(9), 54 | _ => None, 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | check: 7 | runs-on: ubuntu-latest 8 | 9 | env: 10 | RUSTFLAGS: "-D warnings" 11 | 12 | steps: 13 | - name: Checkout sources 14 | uses: actions/checkout@v2 15 | 16 | - name: Install stable toolchain 17 | uses: actions-rs/toolchain@v1 18 | with: 19 | profile: minimal 20 | toolchain: stable 21 | override: true 22 | 23 | - name: Check 24 | run: cargo check --all --all-features --all-targets 25 | 26 | test: 27 | runs-on: ${{ matrix.os }} 28 | strategy: 29 | matrix: 30 | os: [ubuntu-latest, macOS-latest, windows-latest] 31 | rust: [stable] 32 | 33 | env: 34 | RUSTFLAGS: "-D warnings" 35 | 36 | steps: 37 | - uses: actions-rs/toolchain@v1 38 | with: 39 | toolchain: ${{ matrix.rust }} 40 | profile: minimal 41 | - uses: actions/checkout@master 42 | - name: Test 43 | run: cargo test --all --all-features 44 | 45 | lints: 46 | name: Lints 47 | runs-on: ubuntu-latest 48 | steps: 49 | - name: Checkout sources 50 | uses: actions/checkout@v2 51 | 52 | - name: Install stable toolchain 53 | uses: actions-rs/toolchain@v1 54 | with: 55 | profile: minimal 56 | toolchain: stable 57 | override: true 58 | components: rustfmt, clippy 59 | 60 | - name: Run cargo fmt 61 | uses: actions-rs/cargo@v1 62 | with: 63 | command: fmt 64 | args: --all -- --check 65 | 66 | - name: Run cargo clippy 67 | uses: actions-rs/cargo@v1 68 | with: 69 | command: clippy 70 | args: -- -D warnings 71 | -------------------------------------------------------------------------------- /src/parser/block.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | 3 | use super::{Parser, ParserError, ReturnStatement, Statement}; 4 | use crate::lexer::Token; 5 | 6 | /// A block is a list of statements, which are executed sequentially: 7 | /// 8 | /// Block := { Statement } 9 | /// 10 | /// Lua has empty statements that allow you to separate statements with 11 | /// semicolons, start a block with a semicolon or write two semicolons in 12 | /// sequence: 13 | /// 14 | /// Statement := ';' 15 | /// 16 | /// A block can be explicitly delimited to produce a single statement: 17 | /// 18 | /// Statement := **do** Block **end** 19 | /// 20 | /// Explicit blocks are useful to control the scope of variable declarations. 21 | /// Explicit blocks are also sometimes used to add a return statement in the 22 | /// middle of another block. 23 | #[derive(Debug, PartialEq)] 24 | pub struct Block { 25 | pub stmts: Vec, 26 | pub retstmt: Option, 27 | } 28 | 29 | impl<'a, S: io::Read> Parser<'a, S> { 30 | /// Parses a block. 31 | /// 32 | /// A block can be in the following structures: 33 | /// 34 | /// 1. **while** expression **do** block **end** 35 | /// 2. **repeat** block **until** expression 36 | /// 3. **if** expression **then** block **else** block **end** 37 | /// 4. **if** expression **then** block **elseif** expression **then** block **end** 38 | /// 5. **do** block **end** in **for**loop or standalone 39 | /// 6. and function body 40 | /// 41 | /// where the bold are keyworkds. 42 | pub(crate) fn parse_block(&mut self) -> Result { 43 | let mut stmts = Vec::new(); 44 | let mut retstmt = None; 45 | 46 | loop { 47 | match self.peek(0)? { 48 | Some(&Token::SemiColon) => { 49 | self.advance(1); 50 | } 51 | Some(&Token::Return) => { 52 | retstmt = Some(self.parse_return_statement()?); 53 | break; 54 | } 55 | Some(&Token::End) | Some(&Token::Until) | Some(&Token::Else) 56 | | Some(&Token::ElseIf) => break, 57 | None => break, 58 | _ => { 59 | stmts.push(self.parse_statement()?); 60 | } 61 | } 62 | } 63 | 64 | Ok(Block { stmts, retstmt }) 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/binary/mod.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std::io::{self, BufReader}; 3 | use std::mem; 4 | 5 | mod reader; 6 | use reader::Reader; 7 | 8 | const RUA_SIGNATURE: &[u8; 4] = b"\x1bLua"; 9 | 10 | const RUA_MAJOR_VERSION: u8 = 5; 11 | const RUA_MINOR_VERSION: u8 = 3; 12 | const RUA_RELEASE_VERSION: u8 = 0; 13 | const RUA_VERSION: u8 = RUA_MAJOR_VERSION * 16 + RUA_MINOR_VERSION; 14 | 15 | const RUA_FORMAT: u8 = 0; // This it the official format 16 | const RUA_DATA: &[u8; 6] = b"\x19\x93\r\n\x1a\n"; // Lua 1.0 released at 1993 17 | const RUA_INT_SIZE: u8 = mem::size_of::() as u8; 18 | const RUA_SIZET_SIZE: u8 = mem::size_of::() as u8; 19 | const RUA_INSTRUCTION_SIZE: u8 = mem::size_of::() as u8; 20 | const RUA_INTEGER_SIZE: u8 = mem::size_of::() as u8; 21 | const RUA_NUMBER_SIZE: u8 = mem::size_of::() as u8; 22 | const RUA_INTEGER_DATA: i64 = 0x5678; 23 | const RUA_NUMBER_DATA: f64 = 370.5; 24 | 25 | /// The constants in Lua 26 | #[derive(PartialEq, Debug)] 27 | pub enum Constant { 28 | Nil, 29 | Boolean(bool), 30 | Number(f64), 31 | Integer(i64), 32 | Str(String), 33 | } 34 | 35 | /// The local variable in Lua 36 | #[derive(Eq, PartialEq, Debug, Hash)] 37 | pub struct LocalVariable { 38 | pub name: String, 39 | pub start_pc: u32, 40 | pub end_pc: u32, 41 | } 42 | 43 | /// TODO 44 | #[derive(Eq, PartialEq, Debug, Hash)] 45 | pub struct UpValue { 46 | pub instack: u8, 47 | pub idx: u8, 48 | } 49 | 50 | /// The prototype of a function 51 | #[derive(PartialEq, Debug)] 52 | pub struct Prototype { 53 | /// The source name where defined current function. 54 | /// 55 | /// If the function is anonymous function, the source name is empty. 56 | /// 57 | /// If the source starts with "@", it means the binary chunk is indeed 58 | /// compiled from the `Lua` source file. After removing the '@', the real 59 | /// file name is obtained. 60 | /// 61 | /// If the source starts with "=", it has special meaning (e.g. "=stdin" 62 | /// indicates that the binary chunk is compiled from standard input). 63 | /// 64 | /// If there is no "=", it indicates that the binary chunk is compiled from 65 | /// the string provided by the programmer, the source stores the string. 66 | pub source: Option, 67 | /// The first line of the function 68 | pub first_line: u32, 69 | /// The last line of the function 70 | pub last_line: u32, 71 | /// The number of fixed parameters of the function. The fixed parameters 72 | /// here are relative to the variadic length parameters (vararg). 73 | pub params: u8, 74 | /// Is it a variadic function? 75 | pub variadic: u8, 76 | /// TODO The number of register 77 | pub max_stack_size: u8, 78 | /// The instructions table. 79 | /// 80 | /// Each instruction occupied 4 bytes. 81 | pub instructions: Vec, 82 | /// The constant table is used to store literals that appear in Lua code, 83 | /// including `nil`, `boolean`, `integer`, `floating point number`, and 84 | /// `string`. 85 | /// 86 | /// Each constant starts with a 1-byte tag to identify what type of constant 87 | /// value is stored subsequently. 88 | pub constants: Vec, 89 | /// TODO 90 | /// 91 | /// an `UpValue` takes 2 bytes 92 | pub upvalues: Vec, 93 | /// Sub-prototypes 94 | pub protos: Vec, 95 | /// The line information of each instruction. 96 | pub line_infos: Vec, 97 | /// The local variable table 98 | pub local_vars: Vec, 99 | /// TODO 100 | /// 101 | /// The name of an `UpValue` 102 | pub upvalue_names: Vec, 103 | } 104 | 105 | /// 106 | pub fn undump(file: File) -> io::Result { 107 | let mut bufr = BufReader::new(file); 108 | let mut reader = Reader::new(&mut bufr); 109 | 110 | // Checks the magic number 111 | assert_eq!( 112 | reader.read_bytes(4)?, 113 | RUA_SIGNATURE, 114 | "not a precompiled chunk" 115 | ); 116 | 117 | // Checks version 118 | assert_eq!(reader.read_byte()?, RUA_VERSION, "version mismatch"); 119 | 120 | // Checks format 121 | assert_eq!(reader.read_byte()?, RUA_FORMAT, "format mismatch"); 122 | 123 | // Checks data 124 | assert_eq!(reader.read_bytes(6)?, RUA_DATA, "corrupted"); 125 | 126 | // Checks the size of int 127 | assert_eq!(reader.read_byte()?, RUA_INT_SIZE, "sizeof(int) mismatch"); 128 | 129 | // Checks the size of size_t 130 | assert_eq!( 131 | reader.read_byte()?, 132 | RUA_SIZET_SIZE, 133 | "sizeof(size_t) mismatch" 134 | ); 135 | 136 | // Checks the size of instruction 137 | assert_eq!( 138 | reader.read_byte()?, 139 | RUA_INSTRUCTION_SIZE, 140 | "sizeof(instruction) mismatch" 141 | ); 142 | 143 | // Checks the size of Integer 144 | assert_eq!( 145 | reader.read_byte()?, 146 | RUA_INTEGER_SIZE, 147 | "sizeof(Integer) mismatch" 148 | ); 149 | 150 | // Checks the size of Number 151 | assert_eq!( 152 | reader.read_byte()?, 153 | RUA_NUMBER_SIZE, 154 | "sizeof(Number) mismatch" 155 | ); 156 | 157 | // Checks the endianness of Integer 158 | assert_eq!( 159 | reader.read_integer()?, 160 | RUA_INTEGER_DATA, 161 | "endianness mismatch" 162 | ); 163 | 164 | // Checks the format of Number 165 | assert!( 166 | (reader.read_number()? - RUA_NUMBER_DATA).abs() < std::f64::EPSILON, 167 | "float format mismatch" 168 | ); 169 | 170 | // TODO 171 | let _upvalues = reader.read_byte()?; 172 | 173 | Ok(reader.read_prototype()?) 174 | } 175 | -------------------------------------------------------------------------------- /src/lexer/number.rs: -------------------------------------------------------------------------------- 1 | use std::f64; 2 | use std::io; 3 | 4 | use super::{ascii_to_digit, ascii_to_hexdigit}; 5 | use super::{Lexer, LexerError, Token}; 6 | 7 | impl<'a, S: io::Read> Lexer<'a, S> { 8 | /// Reads a hexdecimal/decimal integer or floating point number. Allows 9 | /// decimal integers (123), hex integers (0xc0debabe), decimal floating 10 | /// point with optional exponent and exponent sign (3.21e+1), and hex floats 11 | /// with optional exponent and exponent sign (0xe.2fp-1c). 12 | pub(crate) fn read_numeral(&mut self) -> Result { 13 | let p0 = self.peek(0)?.unwrap(); 14 | assert!(p0 == b'.' || p0.is_ascii_digit()); 15 | 16 | let p1 = self.peek(1)?; 17 | let is_hex = p0 == b'0' && (p1 == Some(b'x') || p1 == Some(b'X')); 18 | 19 | let base: f64; 20 | let conv_fn: fn(u8) -> Option; 21 | let expo_sym: [u8; 2]; 22 | if is_hex { 23 | base = 16.0; 24 | conv_fn = ascii_to_hexdigit; 25 | expo_sym = [b'p', b'P']; 26 | self.advance(2); 27 | } else { 28 | base = 10.0; 29 | conv_fn = ascii_to_digit; 30 | expo_sym = [b'e', b'E']; 31 | } 32 | 33 | let mut has_dot = 0; 34 | let mut digits_after_dot = 0; 35 | let mut result = 0.0; 36 | while let Some(c) = self.peek(0)? { 37 | if c == b'.' { 38 | has_dot += 1; 39 | // malformed 1..2 40 | if has_dot > 1 { 41 | return Err(LexerError::BadNumber); 42 | } 43 | } else if let Some(d) = conv_fn(c) { 44 | result = result * base + f64::from(d); 45 | digits_after_dot += has_dot; 46 | } else { 47 | break; 48 | } 49 | 50 | self.advance(1); 51 | } 52 | 53 | let mut has_expo = false; 54 | if let Some(c) = self.peek(0)? { 55 | if c == expo_sym[0] || c == expo_sym[1] { 56 | has_expo = true; 57 | self.advance(1); 58 | } else if c.is_ascii_alphabetic() || c == b'_' { 59 | // malformed 1.5p10 1.5_10 60 | return Err(LexerError::BadNumber); 61 | } 62 | } 63 | 64 | if has_expo { 65 | // Determine the sign of exponent part 66 | let mut negative = false; 67 | let c = self.peek(0)?.ok_or(LexerError::BadNumber)?; 68 | if c == b'+' || c == b'-' { 69 | self.advance(1); 70 | if c == b'-' { 71 | negative = true; 72 | } 73 | } 74 | 75 | let mut expo = 0u32; 76 | while let Some(c) = self.peek(0)? { 77 | if let Some(d) = ascii_to_digit(c) { 78 | expo = 10 * expo + u32::from(d); 79 | self.advance(1); 80 | } else if c.is_ascii_alphabetic() || c == b'_' { 81 | // malformed 2.33e10f 82 | return Err(LexerError::BadNumber); 83 | } else { 84 | break; 85 | } 86 | } 87 | 88 | let b: i32 = if is_hex { 2 } else { 10 }; 89 | if negative { 90 | result /= f64::from(b.pow(expo)); 91 | } else { 92 | result *= f64::from(b.pow(expo)); 93 | } 94 | } 95 | 96 | result /= base.powi(digits_after_dot); 97 | if has_expo || has_dot == 1 { 98 | Ok(Token::Number(result)) 99 | } else { 100 | Ok(Token::Integer(result as i64)) 101 | } 102 | } 103 | } 104 | 105 | #[cfg(test)] 106 | mod tests { 107 | use super::*; 108 | 109 | const EPS: f64 = 1e-5; 110 | fn float_equal(t: Token, f: f64) -> bool { 111 | match t { 112 | Token::Number(n) => (n - f).abs() < EPS, 113 | _ => false, 114 | } 115 | } 116 | 117 | #[test] 118 | fn decimal_integers() { 119 | let mut s: &[u8] = b"12345 114514"; 120 | let mut lex = Lexer::new(&mut s); 121 | assert_eq!(lex.next().unwrap(), Token::Integer(12345)); 122 | assert_eq!(lex.next().unwrap(), Token::Integer(114514)); 123 | } 124 | 125 | #[test] 126 | fn decimal_floats() { 127 | let mut s: &[u8] = b"1.5 .5"; 128 | let mut lex = Lexer::new(&mut s); 129 | assert!(float_equal(lex.next().unwrap(), 1.5)); 130 | assert!(float_equal(lex.next().unwrap(), 0.5)); 131 | 132 | let mut s: &[u8] = b"9.9e-2 8.8e3 7.065e+1"; 133 | let mut lex = Lexer::new(&mut s); 134 | assert!(float_equal(lex.next().unwrap(), 0.099)); 135 | assert!(float_equal(lex.next().unwrap(), 8800.0)); 136 | assert!(float_equal(lex.next().unwrap(), 70.65)); 137 | } 138 | 139 | #[test] 140 | fn hexadecimal_integers() { 141 | let mut s: &[u8] = b"0xffff 0XffFF"; 142 | let mut lex = Lexer::new(&mut s); 143 | assert_eq!(lex.next().unwrap(), Token::Integer(0xffff)); 144 | assert_eq!(lex.next().unwrap(), Token::Integer(0xffff)); 145 | } 146 | 147 | #[test] 148 | fn hexadecimal_floats() { 149 | let mut s: &[u8] = b"0xf.5p2"; 150 | let mut lex = Lexer::new(&mut s); 151 | assert!(float_equal(lex.next().unwrap(), 61.25)); 152 | } 153 | 154 | #[test] 155 | fn malformed_numbers() { 156 | let mut s: &[u8] = b"1..2"; 157 | let mut lex = Lexer::new(&mut s); 158 | assert_eq!(lex.next().unwrap_err(), LexerError::BadNumber); 159 | 160 | let mut s: &[u8] = b"1.5p10"; 161 | let mut lex = Lexer::new(&mut s); 162 | assert_eq!(lex.next().unwrap_err(), LexerError::BadNumber); 163 | 164 | let mut s: &[u8] = b"1.5_10"; 165 | let mut lex = Lexer::new(&mut s); 166 | assert_eq!(lex.next().unwrap_err(), LexerError::BadNumber); 167 | 168 | let mut s: &[u8] = b"2.33e10f"; 169 | let mut lex = Lexer::new(&mut s); 170 | assert_eq!(lex.next().unwrap_err(), LexerError::BadNumber); 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "byteorder" 5 | version = "1.3.4" 6 | source = "registry+https://github.com/rust-lang/crates.io-index" 7 | checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" 8 | 9 | [[package]] 10 | name = "c2-chacha" 11 | version = "0.2.3" 12 | source = "registry+https://github.com/rust-lang/crates.io-index" 13 | checksum = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" 14 | dependencies = [ 15 | "ppv-lite86", 16 | ] 17 | 18 | [[package]] 19 | name = "cfg-if" 20 | version = "0.1.10" 21 | source = "registry+https://github.com/rust-lang/crates.io-index" 22 | checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" 23 | 24 | [[package]] 25 | name = "getrandom" 26 | version = "0.1.14" 27 | source = "registry+https://github.com/rust-lang/crates.io-index" 28 | checksum = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" 29 | dependencies = [ 30 | "cfg-if", 31 | "libc", 32 | "wasi", 33 | ] 34 | 35 | [[package]] 36 | name = "if_chain" 37 | version = "1.0.1" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "1f7280c75fb2e2fc47080ec80ccc481376923acb04501957fc38f935c3de5088" 40 | 41 | [[package]] 42 | name = "libc" 43 | version = "0.2.67" 44 | source = "registry+https://github.com/rust-lang/crates.io-index" 45 | checksum = "eb147597cdf94ed43ab7a9038716637d2d1bf2bc571da995d0028dec06bd3018" 46 | 47 | [[package]] 48 | name = "phf" 49 | version = "0.8.0" 50 | source = "registry+https://github.com/rust-lang/crates.io-index" 51 | checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" 52 | dependencies = [ 53 | "phf_macros", 54 | "phf_shared", 55 | "proc-macro-hack", 56 | ] 57 | 58 | [[package]] 59 | name = "phf_generator" 60 | version = "0.8.0" 61 | source = "registry+https://github.com/rust-lang/crates.io-index" 62 | checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" 63 | dependencies = [ 64 | "phf_shared", 65 | "rand", 66 | ] 67 | 68 | [[package]] 69 | name = "phf_macros" 70 | version = "0.8.0" 71 | source = "registry+https://github.com/rust-lang/crates.io-index" 72 | checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" 73 | dependencies = [ 74 | "phf_generator", 75 | "phf_shared", 76 | "proc-macro-hack", 77 | "proc-macro2", 78 | "quote", 79 | "syn", 80 | ] 81 | 82 | [[package]] 83 | name = "phf_shared" 84 | version = "0.8.0" 85 | source = "registry+https://github.com/rust-lang/crates.io-index" 86 | checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" 87 | dependencies = [ 88 | "siphasher", 89 | ] 90 | 91 | [[package]] 92 | name = "ppv-lite86" 93 | version = "0.2.6" 94 | source = "registry+https://github.com/rust-lang/crates.io-index" 95 | checksum = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" 96 | 97 | [[package]] 98 | name = "proc-macro-hack" 99 | version = "0.5.11" 100 | source = "registry+https://github.com/rust-lang/crates.io-index" 101 | checksum = "ecd45702f76d6d3c75a80564378ae228a85f0b59d2f3ed43c91b4a69eb2ebfc5" 102 | dependencies = [ 103 | "proc-macro2", 104 | "quote", 105 | "syn", 106 | ] 107 | 108 | [[package]] 109 | name = "proc-macro2" 110 | version = "1.0.9" 111 | source = "registry+https://github.com/rust-lang/crates.io-index" 112 | checksum = "6c09721c6781493a2a492a96b5a5bf19b65917fe6728884e7c44dd0c60ca3435" 113 | dependencies = [ 114 | "unicode-xid", 115 | ] 116 | 117 | [[package]] 118 | name = "quote" 119 | version = "1.0.2" 120 | source = "registry+https://github.com/rust-lang/crates.io-index" 121 | checksum = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" 122 | dependencies = [ 123 | "proc-macro2", 124 | ] 125 | 126 | [[package]] 127 | name = "rand" 128 | version = "0.7.3" 129 | source = "registry+https://github.com/rust-lang/crates.io-index" 130 | checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" 131 | dependencies = [ 132 | "getrandom", 133 | "libc", 134 | "rand_chacha", 135 | "rand_core", 136 | "rand_hc", 137 | "rand_pcg", 138 | ] 139 | 140 | [[package]] 141 | name = "rand_chacha" 142 | version = "0.2.1" 143 | source = "registry+https://github.com/rust-lang/crates.io-index" 144 | checksum = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" 145 | dependencies = [ 146 | "c2-chacha", 147 | "rand_core", 148 | ] 149 | 150 | [[package]] 151 | name = "rand_core" 152 | version = "0.5.1" 153 | source = "registry+https://github.com/rust-lang/crates.io-index" 154 | checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" 155 | dependencies = [ 156 | "getrandom", 157 | ] 158 | 159 | [[package]] 160 | name = "rand_hc" 161 | version = "0.2.0" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" 164 | dependencies = [ 165 | "rand_core", 166 | ] 167 | 168 | [[package]] 169 | name = "rand_pcg" 170 | version = "0.2.1" 171 | source = "registry+https://github.com/rust-lang/crates.io-index" 172 | checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" 173 | dependencies = [ 174 | "rand_core", 175 | ] 176 | 177 | [[package]] 178 | name = "rua" 179 | version = "0.1.0" 180 | dependencies = [ 181 | "byteorder", 182 | "if_chain", 183 | "phf", 184 | ] 185 | 186 | [[package]] 187 | name = "siphasher" 188 | version = "0.3.1" 189 | source = "registry+https://github.com/rust-lang/crates.io-index" 190 | checksum = "83da420ee8d1a89e640d0948c646c1c088758d3a3c538f943bfa97bdac17929d" 191 | 192 | [[package]] 193 | name = "syn" 194 | version = "1.0.16" 195 | source = "registry+https://github.com/rust-lang/crates.io-index" 196 | checksum = "123bd9499cfb380418d509322d7a6d52e5315f064fe4b3ad18a53d6b92c07859" 197 | dependencies = [ 198 | "proc-macro2", 199 | "quote", 200 | "unicode-xid", 201 | ] 202 | 203 | [[package]] 204 | name = "unicode-xid" 205 | version = "0.2.0" 206 | source = "registry+https://github.com/rust-lang/crates.io-index" 207 | checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" 208 | 209 | [[package]] 210 | name = "wasi" 211 | version = "0.9.0+wasi-snapshot-preview1" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" 214 | -------------------------------------------------------------------------------- /src/parser/parser.rs: -------------------------------------------------------------------------------- 1 | use std::error; 2 | use std::fmt; 3 | use std::io; 4 | use std::rc::Rc; 5 | 6 | use crate::lexer::{Lexer, LexerError, Token}; 7 | 8 | /// ParserError 9 | /// 10 | /// There are 6 errors when parsing: 11 | /// 12 | /// - `Unexpected` token encountered 13 | /// - `EndOfStream` early ended 14 | /// - `AssignToExpression` invalid assignment 15 | /// - `ExpressionNotStatement` SuffixedExpression needed 16 | /// - `RecursionLimit` the recursion depth deeper than `MAX_RECURSION` 17 | /// - `LexerError` lexer level error 18 | #[derive(Debug, PartialEq)] 19 | pub enum ParserError { 20 | Unexpected { 21 | unexpected: String, 22 | expected: Option, 23 | }, 24 | EndOfStream { 25 | expected: Option, 26 | }, 27 | AssignToExpression, 28 | ExpressionNotStatement, 29 | RecursionLimit, 30 | LexerError(LexerError), 31 | DividedByZero, 32 | } 33 | 34 | impl fmt::Display for ParserError { 35 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 36 | match self { 37 | ParserError::Unexpected { 38 | unexpected, 39 | expected, 40 | } => { 41 | write!(f, "found {:?}", unexpected)?; 42 | if let Some(s) = expected { 43 | write!(f, ", expected {}", s)?; 44 | } 45 | Ok(()) 46 | } 47 | 48 | ParserError::EndOfStream { expected } => { 49 | write!(f, "unexpected end of token stream")?; 50 | if let Some(s) = expected { 51 | write!(f, ", expected {}", s)?; 52 | } 53 | Ok(()) 54 | } 55 | 56 | ParserError::AssignToExpression => write!(f, "cannot assign to expression"), 57 | ParserError::ExpressionNotStatement => write!(f, "expression is not a statement"), 58 | ParserError::RecursionLimit => write!(f, "recursion limit reached"), 59 | ParserError::LexerError(e) => write!(f, "{}", e), 60 | ParserError::DividedByZero => write!(f, "divided by zero"), 61 | } 62 | } 63 | } 64 | 65 | impl error::Error for ParserError {} 66 | 67 | /// Parser 68 | /// 69 | /// It implements a recursive decent parser. 70 | /// 71 | /// Visit https://en.wikipedia.org/wiki/Recursive_descent_parser for more 72 | /// information. 73 | pub struct Parser<'a, S: io::Read> { 74 | lexer: Lexer<'a, S>, 75 | tokens: Vec, 76 | recursion_guard: Rc<()>, 77 | } 78 | 79 | impl<'a, S: io::Read> Parser<'a, S> { 80 | /// Creates a new `Parser` 81 | pub fn new(src: &'a mut S) -> Parser<'a, S> { 82 | Parser { 83 | lexer: Lexer::new(src), 84 | tokens: Vec::new(), 85 | recursion_guard: Rc::new(()), 86 | } 87 | } 88 | 89 | /// Error if we have more than **MAX_RECURSION** guards live, otherwise 90 | /// return a new recursion guard (a recursion guard is just an `Rc` used 91 | /// solely for its live count). 92 | pub(crate) fn get_recursion_guard(&mut self) -> Result, ParserError> { 93 | // Maximum depth for nested function calls and syntactical nested 94 | // non-terminals in a program. 95 | const MAX_RECURSION: usize = 200; 96 | if Rc::strong_count(&self.recursion_guard) < MAX_RECURSION { 97 | Ok(self.recursion_guard.clone()) 98 | } else { 99 | Err(ParserError::RecursionLimit) 100 | } 101 | } 102 | 103 | /// Consumes the next token, returning an error if it's not a string or 104 | /// yielding it 105 | pub(crate) fn expect_string(&mut self) -> Result { 106 | match self.peek(0)? { 107 | Some(&Token::String(ref s)) => { 108 | let s2 = s.clone(); 109 | self.advance(1); 110 | Ok(s2) 111 | } 112 | None => Err(ParserError::EndOfStream { 113 | expected: Some("string".to_owned()), 114 | }), 115 | c => Err(ParserError::Unexpected { 116 | unexpected: format!("{:?}", c), 117 | expected: Some("string".to_owned()), 118 | }), 119 | } 120 | } 121 | 122 | /// Consumes the next token, returning an error if it's not an identifier or 123 | /// yielding it 124 | pub(crate) fn expect_identifier(&mut self) -> Result { 125 | match self.peek(0)? { 126 | Some(&Token::Identifier(ref s)) => { 127 | let s2 = s.clone(); 128 | self.advance(1); 129 | Ok(s2) 130 | } 131 | None => Err(ParserError::EndOfStream { 132 | expected: Some("identifier".to_owned()), 133 | }), 134 | c => Err(ParserError::Unexpected { 135 | unexpected: format!("{:?}", c), 136 | expected: Some("identifier".to_owned()), 137 | }), 138 | } 139 | } 140 | 141 | /// Consumes the next token, returning an error if it does not match the 142 | /// given token 143 | pub(crate) fn expect_next(&mut self, token: Token) -> Result<(), ParserError> { 144 | match self.peek(0)? { 145 | Some(c) if *c == token => { 146 | self.advance(1); 147 | Ok(()) 148 | } 149 | None => Err(ParserError::EndOfStream { 150 | expected: Some(format!("{:?}", token)), 151 | }), 152 | c => Err(ParserError::Unexpected { 153 | unexpected: format!("{:?}", c), 154 | expected: Some(format!("{:?}", token)), 155 | }), 156 | } 157 | } 158 | 159 | /// Skips tokens belonging to [0, n) 160 | pub(crate) fn advance(&mut self, n: usize) { 161 | assert!( 162 | n <= self.tokens.len(), 163 | "cannot advance over un-peeked tokens" 164 | ); 165 | self.tokens.drain(0..n); 166 | } 167 | 168 | /// Peeks (n+1)-tokens ahead, and returns the n-th token if possible 169 | pub(crate) fn peek(&mut self, n: usize) -> Result, ParserError> { 170 | while self.tokens.len() <= n { 171 | let token = self.lexer.next().map_err(ParserError::LexerError)?; 172 | if token != Token::None { 173 | self.tokens.push(token); 174 | } else { 175 | break; 176 | } 177 | } 178 | 179 | Ok(self.tokens.get(n)) 180 | } 181 | } 182 | 183 | #[cfg(test)] 184 | mod tests { 185 | use super::*; 186 | 187 | #[test] 188 | fn expect_string() { 189 | let mut s: &[u8] = b"'string'"; 190 | let mut parser = Parser::new(&mut s); 191 | assert_eq!(parser.expect_string().unwrap(), "string".to_owned()); 192 | } 193 | 194 | #[test] 195 | fn expect_identifier() { 196 | let mut s: &[u8] = b"foo"; 197 | let mut parser = Parser::new(&mut s); 198 | assert_eq!(parser.expect_identifier().unwrap(), "foo".to_owned()); 199 | } 200 | 201 | #[test] 202 | fn exceed_recursion_limit() { 203 | let mut s: &[u8] = b"foo"; 204 | let mut parser = Parser::new(&mut s); 205 | 206 | // 500 is enough larger than MAX_RECURSION 207 | let guards = (0..500) 208 | .map(|_| parser.get_recursion_guard()) 209 | .collect::>(); 210 | assert_eq!(guards.last(), Some(&Err(ParserError::RecursionLimit))); 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /src/parser/operator.rs: -------------------------------------------------------------------------------- 1 | use crate::lexer::Token; 2 | 3 | /// The precedence lower than any other operators 4 | pub const MIN_OPERATOR_PRECEDENCE: u8 = 0; 5 | 6 | /// Binary operator 7 | /// 8 | /// The precedence table: 9 | /// 10 | /// ```plain 11 | /// | operator | precedence | associativity | 12 | /// |-----------------|------------|---------------| 13 | /// | ^ | 12 | right | 14 | /// | * / // % | 10 | left | 15 | /// | + - | 9 | left | 16 | /// | .. | 8 | right | 17 | /// | << >> | 7 | left | 18 | /// | & | 6 | left | 19 | /// | ~ | 5 | left | 20 | /// | | | 4 | left | 21 | /// | == ~= < <= > >= | 3 | left | 22 | /// | and | 2 | left | 23 | /// | or | 1 | left | 24 | /// ``` 25 | /// 26 | /// # Reference 27 | /// 28 | /// https://www.lua.org/manual/5.3/manual.html#3.4.8 29 | #[derive(Debug, PartialEq, Clone, Copy)] 30 | pub enum BinaryOperator { 31 | Add, 32 | Sub, 33 | Mul, 34 | Div, 35 | IDiv, 36 | Mod, 37 | Power, 38 | Concat, 39 | And, 40 | Or, 41 | LessThan, 42 | LessEqual, 43 | GreaterThan, 44 | GreaterEqual, 45 | Equal, 46 | NotEqual, 47 | BitXor, 48 | BitAnd, 49 | BitOr, 50 | ShiftLeft, 51 | ShiftRight, 52 | } 53 | 54 | impl Precedence for BinaryOperator { 55 | fn precedence(&self) -> (u8, Associativity) { 56 | use BinaryOperator::*; 57 | 58 | match *self { 59 | Or => (1, Associativity::L), 60 | And => (2, Associativity::L), 61 | Equal | NotEqual | LessThan | LessEqual | GreaterThan | GreaterEqual => { 62 | (3, Associativity::L) 63 | } 64 | BitOr => (4, Associativity::L), 65 | BitXor => (5, Associativity::L), 66 | BitAnd => (6, Associativity::L), 67 | ShiftLeft | ShiftRight => (7, Associativity::L), 68 | Concat => (8, Associativity::R), 69 | Add | Sub => (9, Associativity::L), 70 | Mul | Div | IDiv | Mod => (10, Associativity::L), 71 | // NOTE All unary operators => 11 72 | Power => (12, Associativity::R), 73 | } 74 | } 75 | } 76 | 77 | /// Gets the binary operator associated with the given token, if it exists. 78 | pub fn get_binary_operator(token: &Token) -> Option { 79 | match *token { 80 | Token::Add => Some(BinaryOperator::Add), 81 | Token::Minus => Some(BinaryOperator::Sub), 82 | Token::Mul => Some(BinaryOperator::Mul), 83 | Token::Div => Some(BinaryOperator::Div), 84 | Token::IDiv => Some(BinaryOperator::IDiv), 85 | Token::Mod => Some(BinaryOperator::Mod), 86 | Token::Power => Some(BinaryOperator::Power), 87 | Token::Concat => Some(BinaryOperator::Concat), 88 | Token::And => Some(BinaryOperator::And), 89 | Token::Or => Some(BinaryOperator::Or), 90 | Token::LessThan => Some(BinaryOperator::LessThan), 91 | Token::LessEqual => Some(BinaryOperator::LessEqual), 92 | Token::GreaterThan => Some(BinaryOperator::GreaterThan), 93 | Token::GreaterEqual => Some(BinaryOperator::GreaterEqual), 94 | Token::Equal => Some(BinaryOperator::Equal), 95 | Token::NotEqual => Some(BinaryOperator::NotEqual), 96 | Token::BitXorNot => Some(BinaryOperator::BitXor), 97 | Token::BitAnd => Some(BinaryOperator::BitAnd), 98 | Token::BitOr => Some(BinaryOperator::BitOr), 99 | Token::ShiftLeft => Some(BinaryOperator::ShiftLeft), 100 | Token::ShiftRight => Some(BinaryOperator::ShiftRight), 101 | _ => None, 102 | } 103 | } 104 | 105 | /// Unary operator 106 | /// 107 | /// The precedence table: 108 | /// 109 | /// ```plain 110 | /// | operator | precedence | associativity | 111 | /// |-----------|------------|---------------| 112 | /// | not - ~ # | 11 | non | 113 | /// ``` 114 | #[derive(Debug, PartialEq, Clone, Copy)] 115 | pub enum UnaryOperator { 116 | Not, 117 | Minus, 118 | BitNot, 119 | Len, 120 | } 121 | 122 | impl Precedence for UnaryOperator { 123 | fn precedence(&self) -> (u8, Associativity) { 124 | (11, Associativity::Non) 125 | } 126 | } 127 | 128 | /// Gets the unary operator associated with the given token, if it exists. 129 | pub fn get_unary_operator(token: &Token) -> Option { 130 | match *token { 131 | Token::Not => Some(UnaryOperator::Not), 132 | Token::Minus => Some(UnaryOperator::Minus), 133 | Token::BitXorNot => Some(UnaryOperator::BitNot), 134 | Token::Len => Some(UnaryOperator::Len), 135 | _ => None, 136 | } 137 | } 138 | 139 | /// Operator associativity 140 | /// 141 | /// Associativity is only needed when the operators in an expression have the 142 | /// same precedence. 143 | #[derive(Debug, PartialEq, Clone, Copy)] 144 | pub enum Associativity { 145 | /// Non-associative 146 | Non, 147 | /// Left-associative 148 | L, 149 | /// Right-associative 150 | R, 151 | } 152 | 153 | pub trait Precedence { 154 | fn precedence(&self) -> (u8, Associativity); 155 | } 156 | 157 | #[cfg(test)] 158 | mod tests { 159 | use super::*; 160 | use crate::lexer::Lexer; 161 | 162 | #[test] 163 | fn binary_operator_precedence() { 164 | macro_rules! lexer_check_next_binary_operator { 165 | ($lexer:expr, $o:ident, $precedence:expr, $assoc:ident) => { 166 | let op = get_binary_operator(&$lexer.next().unwrap()).unwrap(); 167 | assert_eq!(op, BinaryOperator::$o); 168 | assert_eq!(op.precedence().0, $precedence); 169 | assert_eq!(op.precedence().1, Associativity::$assoc); 170 | }; 171 | } 172 | 173 | let mut s: &[u8] = b"^ * / // % + - .. << >> & ~ | == ~= < <= > >= and or"; 174 | let mut lexer = Lexer::new(&mut s); 175 | 176 | lexer_check_next_binary_operator!(lexer, Power, 12, R); 177 | lexer_check_next_binary_operator!(lexer, Mul, 10, L); 178 | lexer_check_next_binary_operator!(lexer, Div, 10, L); 179 | lexer_check_next_binary_operator!(lexer, IDiv, 10, L); 180 | lexer_check_next_binary_operator!(lexer, Mod, 10, L); 181 | lexer_check_next_binary_operator!(lexer, Add, 9, L); 182 | lexer_check_next_binary_operator!(lexer, Sub, 9, L); 183 | lexer_check_next_binary_operator!(lexer, Concat, 8, R); 184 | lexer_check_next_binary_operator!(lexer, ShiftLeft, 7, L); 185 | lexer_check_next_binary_operator!(lexer, ShiftRight, 7, L); 186 | lexer_check_next_binary_operator!(lexer, BitAnd, 6, L); 187 | lexer_check_next_binary_operator!(lexer, BitXor, 5, L); 188 | lexer_check_next_binary_operator!(lexer, BitOr, 4, L); 189 | lexer_check_next_binary_operator!(lexer, Equal, 3, L); 190 | lexer_check_next_binary_operator!(lexer, NotEqual, 3, L); 191 | lexer_check_next_binary_operator!(lexer, LessThan, 3, L); 192 | lexer_check_next_binary_operator!(lexer, LessEqual, 3, L); 193 | lexer_check_next_binary_operator!(lexer, GreaterThan, 3, L); 194 | lexer_check_next_binary_operator!(lexer, GreaterEqual, 3, L); 195 | lexer_check_next_binary_operator!(lexer, And, 2, L); 196 | lexer_check_next_binary_operator!(lexer, Or, 1, L); 197 | } 198 | 199 | #[test] 200 | fn unary_operator_precedence() { 201 | macro_rules! lexer_check_next_unary_operator { 202 | ($lexer:expr, $o:ident, $precedence:expr, $assoc:ident) => { 203 | let op = get_unary_operator(&$lexer.next().unwrap()).unwrap(); 204 | assert_eq!(op, UnaryOperator::$o); 205 | assert_eq!(op.precedence().0, $precedence); 206 | assert_eq!(op.precedence().1, Associativity::$assoc); 207 | }; 208 | } 209 | 210 | let mut s: &[u8] = b"not - ~ #"; 211 | let mut lexer = Lexer::new(&mut s); 212 | 213 | lexer_check_next_unary_operator!(lexer, Not, 11, Non); 214 | lexer_check_next_unary_operator!(lexer, Minus, 11, Non); 215 | lexer_check_next_unary_operator!(lexer, BitNot, 11, Non); 216 | lexer_check_next_unary_operator!(lexer, Len, 11, Non); 217 | } 218 | 219 | #[test] 220 | fn get_binary_operator_failed() { 221 | let token = Token::Assign; 222 | assert_eq!(get_binary_operator(&token), None); 223 | } 224 | 225 | #[test] 226 | fn get_unary_operator_failed() { 227 | let token = Token::Concat; 228 | assert_eq!(get_unary_operator(&token), None); 229 | } 230 | } 231 | -------------------------------------------------------------------------------- /src/vm/instruction.rs: -------------------------------------------------------------------------------- 1 | //! We assume that instructions are unsigned numbers. All instructions have an 2 | //! opcode in the first 6 bits. Instruction can have the following modes: 3 | //! 4 | //! ``` 5 | //! | mode | 31 .. 23 | 22 .. 14 | 13 .. 6 | 5 .. 0 | 6 | //! |-------+----------+----------+---------+-----------| 7 | //! | iABC | B: 9 | C: 9 | A: 8 | OpCode: 6 | 8 | //! | iABx | Bx: 18 | A: 8 | OpCode: 6 | 9 | //! | iAsBx | sBx: 18 | A: 8 | OpCode: 6 | 10 | //! | iAx | Ax: 26 | OpCode: 6 | 11 | //! ``` 12 | //! 13 | //! `A`, `B`, `C`, `Ax`, `Bx` are unsinged while `sBx` is singed. 14 | //! 15 | //! A signed argument is represented in excess K; that is the number value is 16 | //! the unsigned value minus K. K is exactly the maximum value for the argument 17 | //! (so that -max is represented by 0, and +max is represented by 2 * max), 18 | //! which is half the maximum for the corresponding unsigned argument. 19 | 20 | /// Basic instruction format. 21 | #[allow(non_camel_case_types)] 22 | #[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] 23 | #[repr(u8)] 24 | pub enum Mode { 25 | iABC, 26 | iABx, 27 | iAsBx, 28 | iAx, 29 | } 30 | 31 | /// Argument kind 32 | #[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] 33 | #[repr(u8)] 34 | pub enum ArgKind { 35 | /// Argument is not used 36 | N, 37 | /// Argument is used 38 | U, 39 | /// Argument is a register or a jump offset 40 | R, 41 | /// Argument is a register or register/constant 42 | K, 43 | } 44 | 45 | /// 46 | pub struct OpCode { 47 | pub kind1: ArgKind, 48 | pub kind2: ArgKind, 49 | pub mode: Mode, 50 | pub name: &'static str, 51 | } 52 | 53 | /// an instruction 54 | pub trait Instruction { 55 | fn name(&self) -> &'static str; 56 | fn mode(&self) -> Mode; 57 | fn opcode(&self) -> u8; 58 | fn kind1(&self) -> ArgKind; 59 | fn kind2(&self) -> ArgKind; 60 | fn get(&self) -> i32; 61 | } 62 | 63 | impl Instruction for u32 { 64 | fn name(&self) -> &'static str { 65 | OPCODES[self.opcode() as usize].name 66 | } 67 | 68 | fn mode(&self) -> Mode { 69 | OPCODES[self.opcode() as usize].mode 70 | } 71 | 72 | fn opcode(&self) -> u8 { 73 | (*self & 0b111111) as u8 74 | } 75 | 76 | fn kind1(&self) -> ArgKind { 77 | OPCODES[self.opcode() as usize].kind1 78 | } 79 | 80 | fn kind2(&self) -> ArgKind { 81 | OPCODES[self.opcode() as usize].kind2 82 | } 83 | 84 | fn get(&self) -> i32 { 85 | T::get_arg(*self) 86 | } 87 | } 88 | 89 | #[doc(hidden)] 90 | macro_rules! generate_opcode { 91 | ($kind1:ident, $kind2:ident, $mode:ident, $name:expr) => { 92 | OpCode { 93 | kind1: ArgKind::$kind1, 94 | kind2: ArgKind::$kind2, 95 | mode: Mode::$mode, 96 | name: $name, 97 | } 98 | }; 99 | } 100 | 101 | /// All opcodes 102 | /// 103 | /// ``` 104 | /// R(x) - register 105 | /// Kst(x) - constant (in constant table) [the most significant bit is 1] 106 | /// [i.e. larger than 0xff] 107 | /// RK(x) == if ISK(x) then Kst(INDEXK(x)) else R(x) 108 | /// ``` 109 | /// 110 | /// # Notes 111 | /// 112 | /// - In `CALL`, if (B == 0) then B = top. If (C == 0), then `top` is set to 113 | /// last_result+1. so next open instruction (`CALL`, `RETURN`, `SETLIST`) may 114 | /// use `top`. 115 | /// - In `VARARG`, if (B == 0) then use actual number of varargs and set top 116 | /// (like in `CALL` with C == 0). 117 | /// - In `RETURN`, if (B == 0) then return up to `top`. 118 | /// - In `SETLIST`, if (B == 0) then B = `top`; if (C == 0) then next 119 | /// instruction is `EXTRAARG(real c)`. 120 | /// - In `LOADKX`, the next instruction is always `EXTRAARG`. 121 | /// - For comparisons, A sppecifies what condition the test should accept 122 | /// (true or false) 123 | /// - All `skips` (pc++) assume that next instruction is a jump. 124 | const OPCODES: &'static [OpCode] = &[ 125 | // B C Mode Name 126 | generate_opcode!(R, N, iABC, "MOVE"), // R(A) := R(B) 127 | generate_opcode!(K, N, iABx, "LOADK"), // R(A) := Kst(Bx) 128 | generate_opcode!(N, N, iABx, "LOADKX"), // R(A) := Kst(extra arg) 129 | generate_opcode!(U, U, iABC, "LOADBOOL"), // R(A) := (bool)B; if (C) pc++ 130 | generate_opcode!(U, N, iABC, "LOADNIL"), // R(A), R(A+1), ..., R(A+B) := nil 131 | generate_opcode!(U, N, iABC, "GETUPVAL"), // R(A) := UpValue[B] 132 | generate_opcode!(U, K, iABC, "GETTABUP"), // R(A) := UpValue[B][RK(C)] 133 | generate_opcode!(R, K, iABC, "GETTABLE"), // R(A) := R(B)[RK(C)] 134 | generate_opcode!(K, K, iABC, "SETTABUP"), // UpValue[A][RK(B)] := RK(C) 135 | generate_opcode!(U, N, iABC, "SETUPVAL"), // UpValue[B] := R(A) 136 | generate_opcode!(K, K, iABC, "SETTABLE"), // R(A)[RK(B)] := RK(C) 137 | generate_opcode!(U, U, iABC, "NEWTABLE"), // R(A) := {} (size = B,C) 138 | generate_opcode!(R, K, iABC, "SELF"), // R(A+1) := R(B); R(A) := R(B)[RK(C)] 139 | generate_opcode!(K, K, iABC, "ADD"), // R(A) := RK(B) + RK(C) 140 | generate_opcode!(K, K, iABC, "SUB"), // R(A) := RK(B) - RK(C) 141 | generate_opcode!(K, K, iABC, "MUL"), // R(A) := RK(B) * RK(C) 142 | generate_opcode!(K, K, iABC, "MOD"), // R(A) := RK(B) % RK(C) 143 | generate_opcode!(K, K, iABC, "POW"), // R(A) := RK(B) ^ RK(C) 144 | generate_opcode!(K, K, iABC, "DIV"), // R(A) := RK(B) / RK(C) 145 | generate_opcode!(K, K, iABC, "IDIV"), // R(A) := RK(B) // RK(C) 146 | generate_opcode!(K, K, iABC, "BAND"), // R(A) := RK(B) & RK(C) 147 | generate_opcode!(K, K, iABC, "BOR"), // R(A) := RK(B) | RK(C) 148 | generate_opcode!(K, K, iABC, "BXOR"), // R(A) := RK(B) ~ RK(C) 149 | generate_opcode!(K, K, iABC, "SHL"), // R(A) := RK(B) << RK(C) 150 | generate_opcode!(K, K, iABC, "SHR"), // R(A) := RK(B) >> RK(C) 151 | generate_opcode!(R, N, iABC, "UNM"), // R(A) := -R(B) 152 | generate_opcode!(R, N, iABC, "BNOT"), // R(A) := ~R(B) 153 | generate_opcode!(R, N, iABC, "NOT"), // R(A) := not R(B) 154 | generate_opcode!(R, N, iABC, "LEN"), // R(A) := length of R(B) 155 | generate_opcode!(R, R, iABC, "CONCAT"), // R(A) := R(B) ... R(C) 156 | generate_opcode!(R, N, iAsBx, "JMP"), // pc+=sBx; if (A) close all upvalues >= R(A - 1) 157 | generate_opcode!(K, K, iABC, "EQ"), // if ((RK(B) == RK(C)) ~= A) then pc++ 158 | generate_opcode!(K, K, iABC, "LT"), // if ((RK(B) < RK(C)) ~= A) then pc++ 159 | generate_opcode!(K, K, iABC, "LE"), // if ((RK(B) <= RK(C)) ~= A) then pc++ 160 | generate_opcode!(N, U, iABC, "TEST"), // if not (R(A) <=> C) then pc++ 161 | generate_opcode!(R, U, iABC, "TESTSET"), // if (R(B) <=> C) then R(A) := R(B) else pc++ 162 | generate_opcode!(U, U, iABC, "CALL"), // R(A), ... ,R(A+C-2) := R(A)(R(A+1), ... ,R(A+B-1)) 163 | generate_opcode!(U, U, iABC, "TAILCALL"), // return R(A)(R(A+1), ... ,R(A+B-1)) 164 | generate_opcode!(U, N, iABC, "RETURN"), // return R(A), ... ,R(A+B-2) 165 | generate_opcode!(R, N, iAsBx, "FORLOOP"), // R(A)+=R(A+2); if R(A) i32; 180 | } 181 | 182 | #[doc(hidden)] 183 | macro_rules! generate_arg_getter_impl { 184 | ($name:ident, $signed:expr, $rshift:expr, $mask:expr) => { 185 | #[allow(non_camel_case_types)] 186 | pub struct $name; 187 | 188 | impl $name { 189 | const RSHIFT: u32 = $rshift; 190 | const MASK: u32 = $mask; 191 | } 192 | 193 | impl ArgGetter for $name { 194 | fn get_arg(x: u32) -> i32 { 195 | if ($signed) { 196 | // NOTE A signed argument is represented in excess K. 197 | (((x >> Self::RSHIFT) & Self::MASK) - Self::MASK / 2) as i32 198 | } else { 199 | ((x >> Self::RSHIFT) & Self::MASK) as i32 200 | } 201 | } 202 | } 203 | }; 204 | } 205 | 206 | generate_arg_getter_impl!(gA, false, 6, 0xff); 207 | generate_arg_getter_impl!(gB, false, 23, 0x1ff); 208 | generate_arg_getter_impl!(gC, false, 14, 0x1ff); 209 | generate_arg_getter_impl!(gBx, false, 14, 0x3ffff); 210 | generate_arg_getter_impl!(gsBx, true, 14, 0x3ffff); 211 | generate_arg_getter_impl!(gAx, false, 6, 0x3ffffff); 212 | 213 | #[cfg(test)] 214 | mod tests { 215 | use super::*; 216 | 217 | #[test] 218 | fn instruction_zero() { 219 | let instruction: u32 = 0; 220 | assert_eq!(instruction.name(), "MOVE "); 221 | assert_eq!(instruction.mode(), Mode::iABC); 222 | assert_eq!(instruction.opcode(), 0); 223 | assert_eq!(instruction.kind1(), ArgKind::R); 224 | assert_eq!(instruction.kind2(), ArgKind::N); 225 | assert_eq!(instruction.get::(), 0); 226 | assert_eq!(instruction.get::(), 0); 227 | assert_eq!(instruction.get::(), 0); 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /src/binary/reader.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | 3 | use byteorder::{NativeEndian, ReadBytesExt}; 4 | 5 | use super::{Constant, LocalVariable, Prototype, UpValue}; 6 | 7 | const TAG_NIL: u8 = 0x00; 8 | const TAG_BOOLEAN: u8 = 0x01; 9 | const TAG_NUMBER: u8 = 0x03; 10 | const TAG_INTEGER: u8 = 0x13; 11 | const TAG_SHORT_STR: u8 = 0x04; 12 | const TAG_LONG_STR: u8 = 0x14; 13 | 14 | /// A reader adaptor for Lua binary chunk 15 | pub struct Reader<'a, T: io::Read> { 16 | src: &'a mut T, 17 | } 18 | 19 | impl<'a, T: io::Read> Reader<'a, T> { 20 | /// Constructs a `Reader` from `File`, `BufReader` and etc... 21 | pub fn new(src: &'a mut T) -> Self { 22 | Self { src } 23 | } 24 | 25 | /// Returns 1-byte or yields an `io::Result::Err` 26 | pub fn read_byte(&mut self) -> io::Result { 27 | self.src.read_u8() 28 | } 29 | 30 | /// Returns n-bytes or yields an `io::Result::Err` 31 | pub fn read_bytes(&mut self, n: usize) -> io::Result> { 32 | let mut buf = Vec::with_capacity(n); 33 | buf.resize(n, b'\x00'); 34 | self.src.read_exact(buf.as_mut())?; 35 | Ok(buf) 36 | } 37 | 38 | /// Returns an `u32` or yields an `io::Result::Err` 39 | pub fn read_u32(&mut self) -> io::Result { 40 | self.src.read_u32::() 41 | } 42 | 43 | /// Returns an `u64` or yields an `io::Result::Err` 44 | pub fn read_u64(&mut self) -> io::Result { 45 | self.src.read_u64::() 46 | } 47 | 48 | /// Returns an `Integer` (represented by i64) or yields an `io::Result::Err` 49 | pub fn read_integer(&mut self) -> io::Result { 50 | self.src.read_i64::() 51 | } 52 | 53 | /// Returns a `Number` (represented by f64) or yields an `io::Result::Err` 54 | pub fn read_number(&mut self) -> io::Result { 55 | self.src.read_f64::() 56 | } 57 | 58 | /// Returns a `String` or yields an `io::Result::Err` 59 | pub fn read_string(&mut self) -> io::Result { 60 | Ok(self.read_string_impl()?.unwrap_or_else(String::new)) 61 | } 62 | 63 | /// Returns a `Vec` that is applied with `f` 64 | pub fn read_vec(&mut self, f: F) -> io::Result> 65 | where 66 | F: Fn(&mut Self) -> io::Result, 67 | { 68 | let n = self.read_u32()? as usize; 69 | let mut vec = Vec::with_capacity(n); 70 | for _i in 0..n { 71 | vec.push(f(self)?); 72 | } 73 | Ok(vec) 74 | } 75 | 76 | /// Returns a [`Constant`] in Lua or yields an `io::Result::Err`. 77 | /// 78 | /// It can be: 79 | /// 80 | /// - Nil 81 | /// - Boolean 82 | /// - Number 83 | /// - Integer 84 | /// - Str 85 | /// 86 | /// [`Constant`]: ../enum.Constant.html 87 | pub fn read_constant(&mut self) -> io::Result { 88 | use Constant::*; 89 | 90 | let tag = self.read_byte()?; 91 | let c = match tag { 92 | TAG_NIL => Nil, 93 | TAG_BOOLEAN => Boolean(self.read_byte()? != 0), 94 | TAG_NUMBER => Number(self.read_number()?), 95 | TAG_INTEGER => Integer(self.read_integer()?), 96 | TAG_SHORT_STR | TAG_LONG_STR => Str(self.read_string()?), 97 | _ => panic!("corrupted!"), 98 | }; 99 | Ok(c) 100 | } 101 | 102 | /// Returns an [`UpValue`] in Lua or yields an `io::Result::Err` 103 | /// 104 | /// [`UpValue`]: ../struct.UpValue.html 105 | pub fn read_upvalue(&mut self) -> io::Result { 106 | Ok(UpValue { 107 | instack: self.read_byte()?, 108 | idx: self.read_byte()?, 109 | }) 110 | } 111 | 112 | /// Returns a [`LocalVariable`] in Lua or yields an `io::Result::Err` 113 | /// 114 | /// [`LocalVariable`]: ../struct.LocalVariable.html 115 | pub fn read_local_variable(&mut self) -> io::Result { 116 | Ok(LocalVariable { 117 | name: self.read_string()?, 118 | start_pc: self.read_u32()?, 119 | end_pc: self.read_u32()?, 120 | }) 121 | } 122 | 123 | /// Returns a [`Prototype`] in Lua or yields an `io::Result::Err` 124 | /// 125 | /// [`Prototype`]: ../struct.Prototype.html 126 | pub fn read_prototype(&mut self) -> io::Result { 127 | self.read_prototype_impl(None) 128 | } 129 | 130 | fn read_prototype_impl(&mut self, parent: Option) -> io::Result { 131 | let src = self.read_string_impl()?.or(parent); 132 | Ok(Prototype { 133 | source: src, 134 | first_line: self.read_u32()?, 135 | last_line: self.read_u32()?, 136 | params: self.read_byte()?, 137 | variadic: self.read_byte()?, 138 | max_stack_size: self.read_byte()?, 139 | instructions: self.read_vec(|r| r.read_u32())?, 140 | constants: self.read_vec(|r| r.read_constant())?, 141 | upvalues: self.read_vec(|r| r.read_upvalue())?, 142 | protos: self.read_vec(|r| r.read_prototype())?, 143 | line_infos: self.read_vec(|r| r.read_u32())?, 144 | local_vars: self.read_vec(|r| r.read_local_variable())?, 145 | upvalue_names: self.read_vec(|r| r.read_string())?, 146 | }) 147 | } 148 | 149 | fn read_string_impl(&mut self) -> io::Result> { 150 | let mut sz = self.read_byte()? as usize; 151 | if sz == 0 { 152 | return Ok(None); 153 | } 154 | if sz == 0xff { 155 | sz = self.read_u64()? as usize; 156 | } 157 | 158 | let s = unsafe { String::from_utf8_unchecked(self.read_bytes(sz - 1)?) }; 159 | Ok(Some(s)) 160 | } 161 | } 162 | 163 | #[cfg(all(target_arch = "x86_64", target_endian = "little"))] 164 | #[cfg(test)] 165 | mod tests { 166 | use super::*; 167 | 168 | #[test] 169 | fn reader_new() { 170 | let mut src = b"0\n" as &[u8]; 171 | let _reader = Reader::new(&mut src); 172 | } 173 | 174 | #[test] 175 | fn reader_read_byte() { 176 | let mut src = b"\xff" as &[u8]; 177 | let mut reader = Reader::new(&mut src); 178 | assert_eq!(reader.read_byte().unwrap(), b'\xff'); 179 | } 180 | 181 | #[test] 182 | fn reader_read_bytes() { 183 | let mut src = b"\xff\xfe\x00" as &[u8]; 184 | let mut reader = Reader::new(&mut src); 185 | assert_eq!(reader.read_bytes(2).unwrap(), vec![b'\xff', b'\xfe']); 186 | } 187 | 188 | #[test] 189 | fn reader_read_u32() { 190 | let mut src = b"\x00\x01\x02\x03" as &[u8]; 191 | let mut reader = Reader::new(&mut src); 192 | assert_eq!(reader.read_u32().unwrap(), 0x03020100); 193 | } 194 | 195 | #[test] 196 | fn reader_read_u64() { 197 | let mut src = b"\x00\x01\x02\x03\x04\x05\x06\x07" as &[u8]; 198 | let mut reader = Reader::new(&mut src); 199 | assert_eq!(reader.read_u64().unwrap(), 0x0706050403020100); 200 | } 201 | 202 | #[test] 203 | fn reader_read_integer() { 204 | let mut src = b"\x00\x01\x02\x03\x04\x05\x06\x07" as &[u8]; 205 | let mut reader = Reader::new(&mut src); 206 | assert_eq!(reader.read_integer().unwrap(), 0x0706050403020100); 207 | } 208 | 209 | #[test] 210 | fn reader_read_number() { 211 | let mut src = b"\x00\x00\x00\x00\x00\x28\x77\x40" as &[u8]; 212 | let mut reader = Reader::new(&mut src); 213 | assert_eq!(reader.read_number().unwrap(), 370.5); 214 | } 215 | 216 | #[test] 217 | fn reader_read_string() { 218 | // null string 219 | let mut src = b"\x00" as &[u8]; 220 | let mut reader = Reader::new(&mut src); 221 | assert_eq!(reader.read_string().unwrap(), ""); 222 | 223 | // short string 224 | let mut src = b"\x02A" as &[u8]; 225 | let mut reader = Reader::new(&mut src); 226 | assert_eq!(reader.read_string().unwrap(), "A"); 227 | 228 | // long string 229 | let mut src = b"\xff\x00\x01\x00\x00\x00\x00\x00\x00aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" as &[u8]; 230 | let mut reader = Reader::new(&mut src); 231 | assert_eq!(reader.read_string().unwrap(), "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); 232 | } 233 | 234 | #[test] 235 | fn reader_read_vec() { 236 | let mut src = b"\x03\x00\x00\x00bbb" as &[u8]; 237 | let mut reader = Reader::new(&mut src); 238 | assert_eq!( 239 | reader.read_vec(|r| r.read_byte()).unwrap(), 240 | vec![b'b', b'b', b'b'] 241 | ); 242 | } 243 | 244 | #[test] 245 | fn reader_read_constant() { 246 | let mut src = &[TAG_NIL] as &[u8]; 247 | let mut reader = Reader::new(&mut src); 248 | assert_eq!(reader.read_constant().unwrap(), Constant::Nil); 249 | 250 | let mut src = &[TAG_BOOLEAN, b'\x01'] as &[u8]; 251 | let mut reader = Reader::new(&mut src); 252 | assert_eq!(reader.read_constant().unwrap(), Constant::Boolean(true)); 253 | 254 | let mut src = &[TAG_BOOLEAN, b'\x00'] as &[u8]; 255 | let mut reader = Reader::new(&mut src); 256 | assert_eq!(reader.read_constant().unwrap(), Constant::Boolean(false)); 257 | 258 | let mut src = &[ 259 | TAG_NUMBER, b'\x00', b'\x00', b'\x00', b'\x00', b'\x00', b'\x28', b'\x77', b'\x40', 260 | ] as &[u8]; 261 | let mut reader = Reader::new(&mut src); 262 | assert_eq!(reader.read_constant().unwrap(), Constant::Number(370.5)); 263 | 264 | let mut src = &[ 265 | TAG_INTEGER, 266 | b'\x01', 267 | b'\x02', 268 | b'\x03', 269 | b'\x04', 270 | b'\x05', 271 | b'\x06', 272 | b'\x07', 273 | b'\x08', 274 | ] as &[u8]; 275 | let mut reader = Reader::new(&mut src); 276 | assert_eq!( 277 | reader.read_constant().unwrap(), 278 | Constant::Integer(0x0807060504030201) 279 | ); 280 | 281 | let mut src = &[TAG_SHORT_STR, b'\x00'] as &[u8]; 282 | let mut reader = Reader::new(&mut src); 283 | assert_eq!( 284 | reader.read_constant().unwrap(), 285 | Constant::Str("".to_string()) 286 | ); 287 | 288 | let mut src = &[TAG_SHORT_STR, b'\x02', b'A'] as &[u8]; 289 | let mut reader = Reader::new(&mut src); 290 | assert_eq!( 291 | reader.read_constant().unwrap(), 292 | Constant::Str("A".to_string()) 293 | ); 294 | } 295 | 296 | #[test] 297 | fn reader_read_upvalue() { 298 | let mut src = b"\x01\x02" as &[u8]; 299 | let mut reader = Reader::new(&mut src); 300 | assert_eq!( 301 | reader.read_upvalue().unwrap(), 302 | UpValue { 303 | instack: b'\x01', 304 | idx: b'\x02', 305 | } 306 | ); 307 | } 308 | 309 | #[test] 310 | fn reader_read_local_variable() { 311 | let mut src = b"\x00\x01\x00\x00\x00\x03\x00\x00\x00" as &[u8]; 312 | let mut reader = Reader::new(&mut src); 313 | assert_eq!( 314 | reader.read_local_variable().unwrap(), 315 | LocalVariable { 316 | name: "".to_string(), 317 | start_pc: 0x01, 318 | end_pc: 0x03, 319 | } 320 | ); 321 | } 322 | 323 | #[test] 324 | fn reader_read_prototype() { 325 | let mut src = b"\x00\x00\x01\x02\x03\x01\x02\x03\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" as &[u8]; 326 | let mut reader = Reader::new(&mut src); 327 | assert_eq!( 328 | reader.read_prototype().unwrap(), 329 | Prototype { 330 | source: None, 331 | first_line: 0x03020100, 332 | last_line: 0x04030201, 333 | params: b'\x00', 334 | variadic: b'\x01', 335 | max_stack_size: b'\x00', 336 | instructions: Vec::new(), 337 | constants: Vec::new(), 338 | upvalues: Vec::new(), 339 | protos: Vec::new(), 340 | line_infos: Vec::new(), 341 | local_vars: Vec::new(), 342 | upvalue_names: Vec::new(), 343 | } 344 | ); 345 | } 346 | } 347 | -------------------------------------------------------------------------------- /src/lexer/string.rs: -------------------------------------------------------------------------------- 1 | use std::char; 2 | use std::io; 3 | 4 | use super::{ascii_to_digit, ascii_to_hexdigit}; 5 | use super::{from_u16, from_u8}; 6 | use super::{Lexer, LexerError, Token}; 7 | 8 | impl<'a, S: io::Read> Lexer<'a, S> { 9 | /// Reads a [=*[...]=*] sequence with matching numbers of '='. 10 | /// 11 | /// A long string literal starts with an opening long bracket of any level 12 | /// and ends at the first closing long bracket of the same level. Literals 13 | /// in this bracketed form can run for several lines, do not interpret any 14 | /// escape sequences, and ignore long brackets of any other level. 15 | pub(crate) fn read_long_string(&mut self) -> Result { 16 | assert_eq!(self.peek(0).unwrap().unwrap(), b'['); 17 | self.advance(1); 18 | 19 | let mut string_buf = String::new(); 20 | let mut open_sep_length = 0; 21 | while self.peek(0)? == Some(b'=') { 22 | self.advance(1); 23 | open_sep_length += 1; 24 | } 25 | 26 | if self.peek(0)? != Some(b'[') { 27 | return Err(LexerError::InvalidLongStringDelimiter); 28 | } 29 | self.advance(1); 30 | 31 | loop { 32 | let c = if let Some(c) = self.peek(0)? { 33 | c 34 | } else { 35 | return Err(LexerError::UnfinishedLongString); 36 | }; 37 | 38 | match c { 39 | b'\r' | b'\n' => { 40 | self.add_new_line()?; 41 | string_buf.push('\n'); 42 | } 43 | 44 | b']' => { 45 | let mut close_sep_length = 0; 46 | self.advance(1); 47 | while self.peek(0)? == Some(b'=') { 48 | self.advance(1); 49 | close_sep_length += 1; 50 | } 51 | 52 | if open_sep_length == close_sep_length && self.peek(0)? == Some(b']') { 53 | self.advance(1); 54 | break; 55 | } else { 56 | string_buf.push(']'); 57 | for _ in 0..close_sep_length { 58 | string_buf.push('='); 59 | } 60 | } 61 | } 62 | 63 | c => { 64 | string_buf.push(from_u8(c)); 65 | self.advance(1); 66 | } 67 | } 68 | } 69 | 70 | Ok(Token::String(string_buf)) 71 | } 72 | 73 | /// Reads a string on a single line delimited by ' or " that allows for \ 74 | /// escaping of certain characters. 75 | /// 76 | /// A short literal string can be delimited by matching single or double 77 | /// quotes, and can contain the following C-like escape sequences: 78 | /// 79 | /// - '\a' (bell) 80 | /// - '\b' (backspace) 81 | /// - '\f' (form feed) 82 | /// - '\n' (newline) 83 | /// - '\r' (carriage return) 84 | /// - '\t' (horizontal tab) 85 | /// - '\v' (vertical tab) 86 | /// - '\\' (backslash) 87 | /// - '\"' (quotation mark [double quote]) 88 | /// - '\'' (apostrophe [single quote]) 89 | /// 90 | /// A backslash followed by a line break results in a newline in the string. 91 | /// The escape sequence '\z' skips the following span of white-space 92 | /// characters, including line breaks; it is particularly useful to break 93 | /// and indent a long literal string into multiple lines without adding the 94 | /// newlines and spaces into the string contents. A short literal string 95 | /// cannot contain unescaped line breaks nor escapes not forming a valid 96 | /// escape sequence. We can specify any byte in a short literal string by 97 | /// its numeric value (including embedded zeros). This can be done with the 98 | /// escape sequence \xXX, where XX is a sequence of exactly two hexadecimal 99 | /// digits, or with the escape sequence \ddd, where ddd is a sequence of up 100 | /// to three decimal digits. 101 | /// 102 | /// The UTF-8 encoding of a Unicode character can be inserted in a literal 103 | /// string with the escape sequence \u{XXX} (note the mandatory enclosing 104 | /// brackets), where XXX is a sequence of one or more hexadecimal digits 105 | /// representing the character code point. 106 | pub(crate) fn read_short_string(&mut self) -> Result { 107 | let start_quote = self.peek(0).unwrap().unwrap(); 108 | assert!(start_quote == b'\'' || start_quote == b'\"'); 109 | self.advance(1); 110 | 111 | let mut string_buf = String::new(); 112 | loop { 113 | let c = if let Some(c) = self.peek(0)? { 114 | c 115 | } else { 116 | return Err(LexerError::UnfinishedShortString(start_quote)); 117 | }; 118 | 119 | if c == b'\r' || c == b'\n' { 120 | return Err(LexerError::UnfinishedShortString(start_quote)); 121 | } 122 | 123 | self.advance(1); 124 | if c == b'\\' { 125 | match self 126 | .peek(0)? 127 | .ok_or_else(|| LexerError::UnfinishedShortString(start_quote))? 128 | { 129 | b'a' => { 130 | self.advance(1); 131 | string_buf.push('\x07'); 132 | } 133 | 134 | b'b' => { 135 | self.advance(1); 136 | string_buf.push('\x08'); 137 | } 138 | 139 | b'f' => { 140 | self.advance(1); 141 | string_buf.push('\x0c'); 142 | } 143 | 144 | b'n' => { 145 | self.advance(1); 146 | string_buf.push('\n'); 147 | } 148 | 149 | b'r' => { 150 | self.advance(1); 151 | string_buf.push('\r'); 152 | } 153 | 154 | b't' => { 155 | self.advance(1); 156 | string_buf.push('\t'); 157 | } 158 | 159 | b'v' => { 160 | self.advance(1); 161 | string_buf.push('\x0b'); 162 | } 163 | 164 | b'\\' => { 165 | self.advance(1); 166 | string_buf.push('\\'); 167 | } 168 | 169 | b'\'' => { 170 | self.advance(1); 171 | string_buf.push('\''); 172 | } 173 | 174 | b'\"' => { 175 | self.advance(1); 176 | string_buf.push('\"'); 177 | } 178 | 179 | b'\r' | b'\n' => { 180 | self.add_new_line()?; 181 | string_buf.push('\n'); 182 | } 183 | 184 | // hexadecimal escape sequence, e.g. \x1f 185 | b'x' => { 186 | self.advance(1); 187 | let first = self 188 | .peek(0)? 189 | .and_then(ascii_to_hexdigit) 190 | .ok_or(LexerError::HexDigitExpected)?; 191 | let second = self 192 | .peek(1)? 193 | .and_then(ascii_to_hexdigit) 194 | .ok_or(LexerError::HexDigitExpected)?; 195 | string_buf.push(from_u8(first << 4 | second)); 196 | self.advance(2); 197 | } 198 | 199 | // UTF-8 escape sequence, e.g. \u{XXX} 200 | // The length of escape sequence must be greater than ZERO. 201 | b'u' => { 202 | if self.peek(1)? != Some(b'{') { 203 | return Err(LexerError::EscapeUnicodeStart); 204 | } 205 | self.advance(2); 206 | 207 | let mut seq_len: usize = 0; 208 | let mut u: u32 = 0; 209 | loop { 210 | if let Some(c) = self.peek(0)? { 211 | if c == b'}' { 212 | self.advance(1); 213 | break; 214 | } else if let Some(h) = ascii_to_hexdigit(c) { 215 | u = (u << 4) | h as u32; 216 | seq_len += 1; 217 | self.advance(1); 218 | } else { 219 | return Err(LexerError::EscapeUnicodeEnd); 220 | } 221 | } else { 222 | return Err(LexerError::EscapeUnicodeEnd); 223 | } 224 | } 225 | 226 | if seq_len == 0 { 227 | return Err(LexerError::EscapeUnicodeInvalid); 228 | } 229 | 230 | let c = char::from_u32(u).ok_or(LexerError::EscapeUnicodeInvalid)?; 231 | string_buf.push(c); 232 | } 233 | 234 | // The escape sequence '\z' skips the following span of 235 | // white-space characters, including line breaks. 236 | b'z' => { 237 | self.advance(1); 238 | while let Some(c) = self.peek(0)? { 239 | if c == b'\r' || c == b'\n' { 240 | self.add_new_line()?; 241 | } else if c.is_ascii_whitespace() { 242 | self.advance(1); 243 | } else { 244 | break; 245 | } 246 | } 247 | } 248 | 249 | // the escape sequence \ddd, where ddd is a sequence of up 250 | // to three decimal digits. 251 | c if c.is_ascii_digit() => { 252 | let mut u: u16 = 0; 253 | if let Some(d) = self.peek(0)?.and_then(ascii_to_digit) { 254 | u = 10 * u + d as u16; 255 | self.advance(1); 256 | } else { 257 | return Err(LexerError::EscapeDecimalInvalid); 258 | } 259 | 260 | if let Some(d) = self.peek(0)?.and_then(ascii_to_digit) { 261 | u = 10 * u + d as u16; 262 | self.advance(1); 263 | } 264 | 265 | if let Some(d) = self.peek(0)?.and_then(ascii_to_digit) { 266 | u = 10 * u + d as u16; 267 | self.advance(1); 268 | } 269 | 270 | if u > 255 { 271 | return Err(LexerError::EscapeDecimalTooLarge); 272 | } 273 | string_buf.push(from_u16(u)); 274 | } 275 | 276 | _ => return Err(LexerError::InvalidEscape), 277 | } 278 | } else if c == start_quote { 279 | break; 280 | } else { 281 | string_buf.push(from_u8(c)); 282 | } 283 | } 284 | 285 | Ok(Token::String(string_buf)) 286 | } 287 | } 288 | 289 | #[cfg(test)] 290 | mod tests { 291 | use super::*; 292 | 293 | #[test] 294 | fn long_string_invalid_delimiter() { 295 | let mut s: &[u8] = b"[==invalid==]"; 296 | let mut lex = Lexer::new(&mut s); 297 | assert_eq!( 298 | lex.next().unwrap_err(), 299 | LexerError::InvalidLongStringDelimiter 300 | ); 301 | } 302 | 303 | #[test] 304 | fn long_string_unfinished() { 305 | let mut s: &[u8] = b"[==[unfinished]"; 306 | let mut lex = Lexer::new(&mut s); 307 | assert_eq!(lex.next().unwrap_err(), LexerError::UnfinishedLongString); 308 | } 309 | 310 | #[test] 311 | fn long_string_crlf() { 312 | // \r\n 313 | let mut s: &[u8] = b"[==[\r\nhere]==]"; 314 | let mut lex = Lexer::new(&mut s); 315 | assert_eq!(lex.next().unwrap(), Token::String("\nhere".to_owned())); 316 | assert_eq!(lex.get_line(), 2); 317 | assert_eq!(lex.next().unwrap(), Token::None); 318 | 319 | // \n\n 320 | let mut s: &[u8] = b"[==[\n\ntwo lines]==]"; 321 | let mut lex = Lexer::new(&mut s); 322 | assert_eq!( 323 | lex.next().unwrap(), 324 | Token::String("\n\ntwo lines".to_owned()) 325 | ); 326 | assert_eq!(lex.get_line(), 3); 327 | 328 | // \r\r 329 | let mut s: &[u8] = b"[==[\r\rtwo lines]==]"; 330 | let mut lex = Lexer::new(&mut s); 331 | assert_eq!( 332 | lex.next().unwrap(), 333 | Token::String("\n\ntwo lines".to_owned()) 334 | ); 335 | assert_eq!(lex.get_line(), 3); 336 | 337 | // \n\r 338 | let mut s: &[u8] = b"[==[\n\rone line]==]"; 339 | let mut lex = Lexer::new(&mut s); 340 | assert_eq!(lex.next().unwrap(), Token::String("\none line".to_owned())); 341 | assert_eq!(lex.get_line(), 2); 342 | } 343 | 344 | #[test] 345 | fn long_string_many_brackets() { 346 | let mut s: &[u8] = b"[===[first line\n]==]]]]\rsecond line\n]===]"; 347 | let mut lex = Lexer::new(&mut s); 348 | assert_eq!( 349 | lex.next().unwrap(), 350 | Token::String("first line\n]==]]]]\nsecond line\n".to_owned()) 351 | ); 352 | } 353 | 354 | #[test] 355 | fn short_string_single_quote() { 356 | let mut s: &[u8] = b"'single quote'"; 357 | let mut lex = Lexer::new(&mut s); 358 | assert_eq!( 359 | lex.next().unwrap(), 360 | Token::String("single quote".to_owned()) 361 | ); 362 | } 363 | 364 | #[test] 365 | fn short_string_double_quotes() { 366 | let mut s: &[u8] = b"\"double quotes\""; 367 | let mut lex = Lexer::new(&mut s); 368 | assert_eq!( 369 | lex.next().unwrap(), 370 | Token::String("double quotes".to_owned()) 371 | ); 372 | } 373 | 374 | #[test] 375 | fn short_string_unfinished() { 376 | let mut s: &[u8] = b"'unfinished1"; 377 | let mut lex = Lexer::new(&mut s); 378 | assert_eq!( 379 | lex.next().unwrap_err(), 380 | LexerError::UnfinishedShortString(b'\'') 381 | ); 382 | 383 | let mut s: &[u8] = b"'unfinished2\n'"; 384 | let mut lex = Lexer::new(&mut s); 385 | assert_eq!( 386 | lex.next().unwrap_err(), 387 | LexerError::UnfinishedShortString(b'\'') 388 | ); 389 | 390 | let mut s: &[u8] = b"'unfinished3\\'"; 391 | let mut lex = Lexer::new(&mut s); 392 | assert_eq!( 393 | lex.next().unwrap_err(), 394 | LexerError::UnfinishedShortString(b'\'') 395 | ); 396 | } 397 | 398 | #[test] 399 | fn short_string_hexdigit_expected() { 400 | let mut s: &[u8] = b"'no hexdigits \\x'"; 401 | let mut lex = Lexer::new(&mut s); 402 | assert_eq!(lex.next().unwrap_err(), LexerError::HexDigitExpected); 403 | 404 | let mut s: &[u8] = b"'1 hexdigit \\xa'"; 405 | let mut lex = Lexer::new(&mut s); 406 | assert_eq!(lex.next().unwrap_err(), LexerError::HexDigitExpected); 407 | 408 | let mut s: &[u8] = b"'2 hexdigits \\x1f'"; 409 | let mut lex = Lexer::new(&mut s); 410 | assert_eq!( 411 | lex.next().unwrap(), 412 | Token::String("2 hexdigits \x1f".to_owned()) 413 | ); 414 | 415 | let mut s: &[u8] = b"'invalid hexdigits \\xxyz'"; 416 | let mut lex = Lexer::new(&mut s); 417 | assert_eq!(lex.next().unwrap_err(), LexerError::HexDigitExpected); 418 | } 419 | 420 | #[test] 421 | fn short_string_escape_unicode() { 422 | let mut s: &[u8] = b"'unicode \\u00c}'"; 423 | let mut lex = Lexer::new(&mut s); 424 | assert_eq!(lex.next().unwrap_err(), LexerError::EscapeUnicodeStart); 425 | 426 | let mut s: &[u8] = b"'unicode \\u{00c'"; 427 | let mut lex = Lexer::new(&mut s); 428 | assert_eq!(lex.next().unwrap_err(), LexerError::EscapeUnicodeEnd); 429 | 430 | let mut s: &[u8] = b"'unicode \\u{00c"; 431 | let mut lex = Lexer::new(&mut s); 432 | assert_eq!(lex.next().unwrap_err(), LexerError::EscapeUnicodeEnd); 433 | 434 | let mut s: &[u8] = b"'unicode \\u{110000}"; 435 | let mut lex = Lexer::new(&mut s); 436 | assert_eq!(lex.next().unwrap_err(), LexerError::EscapeUnicodeInvalid); 437 | 438 | let mut s: &[u8] = b"'unicode \\u{}'"; 439 | let mut lex = Lexer::new(&mut s); 440 | assert_eq!(lex.next().unwrap_err(), LexerError::EscapeUnicodeInvalid); 441 | 442 | let mut s: &[u8] = b"'unicode \\u{00c}'"; 443 | let mut lex = Lexer::new(&mut s); 444 | assert_eq!( 445 | lex.next().unwrap(), 446 | Token::String("unicode \x0c".to_owned()) 447 | ); 448 | 449 | let mut s: &[u8] = b"'unicode \\u{00c} unicode'"; 450 | let mut lex = Lexer::new(&mut s); 451 | assert_eq!( 452 | lex.next().unwrap(), 453 | Token::String("unicode \x0c unicode".to_owned()) 454 | ); 455 | } 456 | 457 | #[test] 458 | fn short_string_escape_decimal() { 459 | let mut s: &[u8] = b"'decimal \\12'"; 460 | let mut lex = Lexer::new(&mut s); 461 | assert_eq!( 462 | lex.next().unwrap(), 463 | Token::String("decimal \x0c".to_owned()) 464 | ); 465 | 466 | let mut s: &[u8] = b"'decimal \\9f0'"; 467 | let mut lex = Lexer::new(&mut s); 468 | assert_eq!( 469 | lex.next().unwrap(), 470 | Token::String("decimal \tf0".to_owned()) 471 | ); 472 | 473 | let mut s: &[u8] = b"'decimal \\256'"; 474 | let mut lex = Lexer::new(&mut s); 475 | assert_eq!(lex.next().unwrap_err(), LexerError::EscapeDecimalTooLarge); 476 | 477 | let mut s: &[u8] = b"'decimal \\097'"; 478 | let mut lex = Lexer::new(&mut s); 479 | assert_eq!(lex.next().unwrap(), Token::String("decimal a".to_owned())); 480 | } 481 | 482 | #[test] 483 | fn short_string_escape() { 484 | let mut s: &[u8] = b"'escape \\a \\b \\f \\n \\r \\t \\v \\\\ \\\' \\\"'"; 485 | let mut lex = Lexer::new(&mut s); 486 | assert_eq!( 487 | lex.next().unwrap(), 488 | Token::String("escape \x07 \x08 \x0c \n \r \t \x0b \\ \' \"".to_owned()) 489 | ); 490 | assert_eq!(lex.get_line(), 1); 491 | } 492 | 493 | #[test] 494 | fn short_string_slash_z() { 495 | let mut s: &[u8] = b"'slash z \\z\n line1\\z\n\\z\n\n'"; 496 | let mut lex = Lexer::new(&mut s); 497 | assert_eq!( 498 | lex.next().unwrap(), 499 | Token::String("slash z line1".to_owned()) 500 | ); 501 | assert_eq!(lex.get_line(), 5); 502 | } 503 | 504 | #[test] 505 | fn short_string_slash() { 506 | let mut s: &[u8] = b"'slash \\\nfirst \\\nsecond'"; 507 | let mut lex = Lexer::new(&mut s); 508 | assert_eq!( 509 | lex.next().unwrap(), 510 | Token::String("slash \nfirst \nsecond".to_owned()) 511 | ); 512 | assert_eq!(lex.get_line(), 3); 513 | } 514 | } 515 | -------------------------------------------------------------------------------- /src/lexer/lexer.rs: -------------------------------------------------------------------------------- 1 | use std::error; 2 | use std::fmt; 3 | use std::io; 4 | use std::u8; 5 | 6 | use phf::phf_map; 7 | 8 | use super::from_u8; 9 | 10 | /// Lua keywords are reserved and cannot be used as an identifier. 11 | const LUA_KEYWORDS: phf::Map<&'static str, Token> = phf_map! { 12 | "and" => Token::And, 13 | "break" => Token::Break, 14 | "do" => Token::Do, 15 | "else" => Token::Else, 16 | "elseif" => Token::ElseIf, 17 | "end" => Token::End, 18 | "false" => Token::False, 19 | "for" => Token::For, 20 | "function" => Token::Function, 21 | "goto" => Token::Goto, 22 | "if" => Token::If, 23 | "in" => Token::In, 24 | "local" => Token::Local, 25 | "nil" => Token::Nil, 26 | "not" => Token::Not, 27 | "or" => Token::Or, 28 | "repeat" => Token::Repeat, 29 | "return" => Token::Return, 30 | "then" => Token::Then, 31 | "true" => Token::True, 32 | "until" => Token::Until, 33 | "while" => Token::While, 34 | }; 35 | 36 | /// A lexical token is a string with an assigned and thus identified meaning. It 37 | /// is structured as a pair consisting of a token name and an optional token 38 | /// value. 39 | /// 40 | /// In `Lua`, the following _keywords_ are reserved and cannot be used as 41 | /// identifiers. 42 | /// 43 | /// `and` `break` `do` `else` `elseif` `end` `false` `for` `function` `goto` 44 | /// `if` `in` `local` `nil` `not` `or` `repeat` `return` `then` `true` `until` 45 | /// `while` 46 | /// 47 | /// The following strings denote other tokens: 48 | /// 49 | /// `+` `-` `*` `/` `%` `^` `#` `&` `~` `|` `<<` `>>` `//` `==` `~=` `<=` `>=` 50 | /// `<` `>` `=` `(` `)` `{` `}` `[` `]` `::` `;` `:` `,` `.` `..` `...` 51 | /// 52 | /// Plus `None` variant which indicates `EOF`. 53 | #[derive(Debug, Clone, PartialEq)] 54 | pub enum Token { 55 | And, 56 | Break, 57 | Do, 58 | Else, 59 | ElseIf, 60 | End, 61 | False, 62 | For, 63 | Function, 64 | Goto, 65 | If, 66 | In, 67 | Local, 68 | Nil, 69 | Not, 70 | Or, 71 | Repeat, 72 | Return, 73 | Then, 74 | True, 75 | Until, 76 | While, 77 | Add, 78 | Minus, 79 | Mul, 80 | Div, 81 | IDiv, 82 | Power, 83 | Mod, 84 | BitXorNot, 85 | BitAnd, 86 | BitOr, 87 | ShiftRight, 88 | ShiftLeft, 89 | LessThan, 90 | LessEqual, 91 | GreaterThan, 92 | GreaterEqual, 93 | Equal, 94 | NotEqual, 95 | Assign, 96 | Len, 97 | LeftParen, 98 | RightParen, 99 | LeftBrace, 100 | RightBrace, 101 | LeftBracket, 102 | RightBracket, 103 | Colon, 104 | SemiColon, 105 | DoubleColon, 106 | Comma, 107 | Dot, 108 | Concat, 109 | Dots, 110 | Identifier(String), 111 | String(String), 112 | Integer(i64), 113 | Number(f64), 114 | None, 115 | } 116 | 117 | /// `Lua` lexer error 118 | #[derive(Debug, PartialEq)] 119 | pub enum LexerError { 120 | UnfinishedShortString(u8), 121 | UnfinishedLongString, 122 | UnexpectedCharacter(u8), 123 | HexDigitExpected, 124 | EscapeUnicodeStart, 125 | EscapeUnicodeEnd, 126 | EscapeUnicodeInvalid, 127 | EscapeDecimalTooLarge, 128 | EscapeDecimalInvalid, 129 | InvalidEscape, 130 | InvalidLongStringDelimiter, 131 | BadNumber, 132 | IOError(String), 133 | } 134 | 135 | impl fmt::Display for LexerError { 136 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 137 | match self { 138 | LexerError::UnfinishedShortString(c) => write!( 139 | f, 140 | "short string not finished, expected matching {}", 141 | from_u8(*c) 142 | ), 143 | LexerError::UnfinishedLongString => write!(f, "unfinished long string"), 144 | LexerError::UnexpectedCharacter(c) => { 145 | write!(f, "unexpected character: '{}'", from_u8(*c)) 146 | } 147 | LexerError::HexDigitExpected => write!(f, "hexdecimal digit expected"), 148 | LexerError::EscapeUnicodeStart => write!(f, "missing '{{' in \\u{{xxxx}} escape"), 149 | LexerError::EscapeUnicodeEnd => write!(f, "missing '}}' in \\u{{xxxx}} escape"), 150 | LexerError::EscapeUnicodeInvalid => { 151 | write!(f, "invalid unicode value in \\u{{xxxx}} escape") 152 | } 153 | LexerError::EscapeDecimalTooLarge => write!(f, "\\ddd escape out of 0-255 range"), 154 | LexerError::EscapeDecimalInvalid => write!(f, "\\ddd escape format error"), 155 | LexerError::InvalidEscape => write!(f, "invalid escape sequence"), 156 | LexerError::InvalidLongStringDelimiter => write!(f, "invalid long string delimiter"), 157 | LexerError::BadNumber => write!(f, "malformed number"), 158 | LexerError::IOError(s) => write!(f, "IO Error: {}", s), 159 | } 160 | } 161 | } 162 | 163 | impl error::Error for LexerError {} 164 | 165 | /// Lexer 166 | /// 167 | /// See https://en.wikipedia.org/wiki/Lexical_analysis for more information. 168 | pub struct Lexer<'a, S: io::Read> { 169 | /// Source file 170 | src: &'a mut S, 171 | /// Characters buffer 172 | peek_buf: Vec, 173 | /// Input line counter 174 | line: usize, 175 | } 176 | 177 | impl<'a, S: io::Read> Lexer<'a, S> { 178 | /// Creates a new `Lexer` 179 | pub fn new(src: &'a mut S) -> Lexer<'a, S> { 180 | Lexer { 181 | src, 182 | peek_buf: Vec::new(), 183 | line: 1, 184 | } 185 | } 186 | 187 | /// Current line number of the source file, 1-indexed 188 | pub fn get_line(&self) -> usize { 189 | self.line 190 | } 191 | 192 | /// Gets the next `Token`, or `Token::None` if the end of file reached 193 | pub fn next(&mut self) -> Result { 194 | self.skip_whitespace()?; 195 | 196 | Ok(if let Some(c) = self.peek(0)? { 197 | match c { 198 | b'(' | b')' | b'{' | b'}' | b']' | b'+' | b'-' | b'*' | b'%' | b'^' | b'#' 199 | | b'&' | b'|' | b';' | b',' => { 200 | self.advance(1); 201 | match c { 202 | b'(' => Token::LeftParen, 203 | b')' => Token::RightParen, 204 | b'{' => Token::LeftBrace, 205 | b'}' => Token::RightBrace, 206 | b']' => Token::RightBracket, 207 | b'+' => Token::Add, 208 | b'-' => Token::Minus, 209 | b'*' => Token::Mul, 210 | b'%' => Token::Mod, 211 | b'^' => Token::Power, 212 | b'#' => Token::Len, 213 | b'&' => Token::BitAnd, 214 | b'|' => Token::BitOr, 215 | b';' => Token::SemiColon, 216 | b',' => Token::Comma, 217 | _ => unreachable!(), 218 | } 219 | } 220 | 221 | b'/' => { 222 | self.advance(1); 223 | if self.peek(0)? == Some(b'/') { 224 | self.advance(1); 225 | Token::IDiv 226 | } else { 227 | Token::Div 228 | } 229 | } 230 | 231 | b'~' => { 232 | self.advance(1); 233 | if self.peek(0)? == Some(b'=') { 234 | self.advance(1); 235 | Token::NotEqual 236 | } else { 237 | Token::BitXorNot 238 | } 239 | } 240 | 241 | // long string or simply '[' 242 | b'[' => match self.peek(1)? { 243 | Some(b'=') | Some(b'[') => { 244 | return self.read_long_string(); 245 | } 246 | _ => { 247 | self.advance(1); 248 | Token::LeftBracket 249 | } 250 | }, 251 | 252 | // short literal strings 253 | b'\"' | b'\'' => { 254 | return self.read_short_string(); 255 | } 256 | 257 | b'=' => { 258 | self.advance(1); 259 | if self.peek(0)? == Some(b'=') { 260 | self.advance(1); 261 | Token::Equal 262 | } else { 263 | Token::Assign 264 | } 265 | } 266 | 267 | b'<' => { 268 | self.advance(1); 269 | match self.peek(0)? { 270 | Some(b'=') => { 271 | self.advance(1); 272 | Token::LessEqual 273 | } 274 | Some(b'<') => { 275 | self.advance(1); 276 | Token::ShiftLeft 277 | } 278 | _ => Token::LessThan, 279 | } 280 | } 281 | 282 | b'>' => { 283 | self.advance(1); 284 | match self.peek(0)? { 285 | Some(b'=') => { 286 | self.advance(1); 287 | Token::GreaterEqual 288 | } 289 | Some(b'>') => { 290 | self.advance(1); 291 | Token::ShiftRight 292 | } 293 | _ => Token::GreaterThan, 294 | } 295 | } 296 | 297 | b':' => { 298 | self.advance(1); 299 | if self.peek(0)? == Some(b':') { 300 | self.advance(1); 301 | Token::DoubleColon 302 | } else { 303 | Token::Colon 304 | } 305 | } 306 | 307 | // '.', '..', '...', or number 308 | b'.' => { 309 | if self.peek(1)? == Some(b'.') { 310 | if self.peek(2)? == Some(b'.') { 311 | self.advance(3); 312 | Token::Dots 313 | } else { 314 | self.advance(2); 315 | Token::Concat 316 | } 317 | } else if self 318 | .peek(1)? 319 | .as_ref() 320 | .map(u8::is_ascii_digit) 321 | .unwrap_or(false) 322 | { 323 | return self.read_numeral(); 324 | } else { 325 | self.advance(1); 326 | Token::Dot 327 | } 328 | } 329 | 330 | c if c.is_ascii_digit() => { 331 | return self.read_numeral(); 332 | } 333 | 334 | // identifier or reserved word? 335 | c if c.is_ascii_alphabetic() || c == b'_' => { 336 | let mut string_buf = Vec::new(); 337 | string_buf.push(c); 338 | self.advance(1); 339 | 340 | while let Some(c) = self.peek(0)? { 341 | if c.is_ascii_alphanumeric() || c == b'_' { 342 | string_buf.push(c); 343 | self.advance(1); 344 | } else { 345 | break; 346 | } 347 | } 348 | 349 | let s = unsafe { String::from_utf8_unchecked(string_buf) }; 350 | if let Some(keyword) = LUA_KEYWORDS.get(s.as_str()) { 351 | keyword.clone() 352 | } else { 353 | Token::Identifier(s) 354 | } 355 | } 356 | 357 | c => return Err(LexerError::UnexpectedCharacter(c)), 358 | } 359 | } else { 360 | Token::None 361 | }) 362 | } 363 | 364 | /// Peeks (n+1)-bytes ahead, and returns the n-th byte if possible 365 | pub(crate) fn peek(&mut self, n: usize) -> Result, LexerError> { 366 | while self.peek_buf.len() <= n { 367 | let mut c = [0]; 368 | match self.src.read(&mut c) { 369 | Ok(0) => { 370 | break; 371 | } 372 | Ok(_) => { 373 | self.peek_buf.push(c[0]); 374 | } 375 | Err(e) => { 376 | if e.kind() != io::ErrorKind::Interrupted { 377 | return Err(LexerError::IOError(e.to_string())); 378 | } 379 | } 380 | } 381 | } 382 | 383 | Ok(self.peek_buf.get(n).cloned()) 384 | } 385 | 386 | /// Skips all whitespaces, including line breaks. 387 | pub(crate) fn skip_whitespace(&mut self) -> Result<(), LexerError> { 388 | while let Some(c) = self.peek(0)? { 389 | match c { 390 | ws if ws.is_ascii_whitespace() => { 391 | if ws == b'\r' || ws == b'\n' { 392 | self.add_new_line()?; 393 | } else { 394 | self.advance(1); 395 | } 396 | } 397 | 398 | b'#' if self.get_line() == 1 && self.peek(1)? == Some(b'!') => { 399 | // shebang, skip until end of line. 400 | self.advance(1); 401 | self.skip_until_eol()?; 402 | } 403 | 404 | b'-' => { 405 | if self.peek(1)? != Some(b'-') { 406 | break; 407 | } 408 | 409 | self.advance(2); 410 | match (self.peek(0)?, self.peek(1)?) { 411 | (Some(b'['), Some(b'=')) | (Some(b'['), Some(b'[')) => { 412 | // long comment, read and ignore the result 413 | self.read_long_string()?; 414 | } 415 | _ => { 416 | // short comment, skip until end of line 417 | self.skip_until_eol()?; 418 | } 419 | } 420 | } 421 | _ => break, 422 | } 423 | } 424 | 425 | Ok(()) 426 | } 427 | 428 | /// Skips the whole line 429 | pub(crate) fn skip_until_eol(&mut self) -> Result<(), LexerError> { 430 | while let Some(c) = self.peek(0)? { 431 | if c == b'\r' || c == b'\n' { 432 | self.add_new_line()?; 433 | break; 434 | } 435 | self.advance(1); 436 | } 437 | Ok(()) 438 | } 439 | 440 | /// Skips n-bytes 441 | pub(crate) fn advance(&mut self, n: usize) { 442 | assert!( 443 | n <= self.peek_buf.len(), 444 | "cannot advance over un-peeked characters" 445 | ); 446 | self.peek_buf.drain(0..n); 447 | } 448 | 449 | /// Starts a newline if it encouters `\r` or `\n`. 450 | /// 451 | /// See comments below for details. 452 | pub(crate) fn add_new_line(&mut self) -> Result<(), LexerError> { 453 | let c = self.peek(0)?.unwrap(); 454 | assert!(c == b'\r' || c == b'\n'); 455 | 456 | // Any kind of end-of-line sequence (carriage return, newline, carriage 457 | // return followed by newline, or newline followed by carriage return) 458 | // is converted to a simple newline. 459 | self.line += 1; 460 | self.advance(1); 461 | if let Some(next_char) = self.peek(0)? { 462 | if c == b'\n' && next_char == b'\r' || c == b'\r' && next_char == b'\n' { 463 | self.advance(1); 464 | } 465 | } 466 | 467 | Ok(()) 468 | } 469 | } 470 | 471 | #[cfg(test)] 472 | mod tests { 473 | use super::*; 474 | 475 | #[test] 476 | fn whitespace_short_comment() { 477 | let mut s: &[u8] = b" -- text in comments\n'string'"; 478 | let mut lex = Lexer::new(&mut s); 479 | assert_eq!(lex.next().unwrap(), Token::String("string".to_owned())); 480 | assert_eq!(lex.get_line(), 2); 481 | } 482 | 483 | #[test] 484 | fn whitespace_long_comment() { 485 | let mut s: &[u8] = 486 | b"--[[ text in long comments\nstill in comments\n]]\n'string after long comments'"; 487 | let mut lex = Lexer::new(&mut s); 488 | assert_eq!( 489 | lex.next().unwrap(), 490 | Token::String("string after long comments".to_owned()) 491 | ); 492 | assert_eq!(lex.get_line(), 4); 493 | } 494 | 495 | #[test] 496 | fn whitespace_shebang() { 497 | let mut s: &[u8] = b"#!/bin/lua arguments will be ignored\n 'string'"; 498 | let mut lex = Lexer::new(&mut s); 499 | assert_eq!(lex.next().unwrap(), Token::String("string".to_owned())); 500 | assert_eq!(lex.get_line(), 2); 501 | } 502 | 503 | #[test] 504 | fn keywords() { 505 | let mut s: &[u8] = b"and break do else elseif end false for function goto \ 506 | if in local nil not or repeat return then true until while"; 507 | let mut lex = Lexer::new(&mut s); 508 | assert_eq!(lex.next().unwrap(), Token::And); 509 | assert_eq!(lex.next().unwrap(), Token::Break); 510 | assert_eq!(lex.next().unwrap(), Token::Do); 511 | assert_eq!(lex.next().unwrap(), Token::Else); 512 | assert_eq!(lex.next().unwrap(), Token::ElseIf); 513 | assert_eq!(lex.next().unwrap(), Token::End); 514 | assert_eq!(lex.next().unwrap(), Token::False); 515 | assert_eq!(lex.next().unwrap(), Token::For); 516 | assert_eq!(lex.next().unwrap(), Token::Function); 517 | assert_eq!(lex.next().unwrap(), Token::Goto); 518 | assert_eq!(lex.next().unwrap(), Token::If); 519 | assert_eq!(lex.next().unwrap(), Token::In); 520 | assert_eq!(lex.next().unwrap(), Token::Local); 521 | assert_eq!(lex.next().unwrap(), Token::Nil); 522 | assert_eq!(lex.next().unwrap(), Token::Not); 523 | assert_eq!(lex.next().unwrap(), Token::Or); 524 | assert_eq!(lex.next().unwrap(), Token::Repeat); 525 | assert_eq!(lex.next().unwrap(), Token::Return); 526 | assert_eq!(lex.next().unwrap(), Token::Then); 527 | assert_eq!(lex.next().unwrap(), Token::True); 528 | assert_eq!(lex.next().unwrap(), Token::Until); 529 | assert_eq!(lex.next().unwrap(), Token::While); 530 | assert_eq!(lex.next().unwrap(), Token::None); 531 | } 532 | 533 | #[test] 534 | fn identifiers() { 535 | let mut s: &[u8] = b"usual identifiers except function"; 536 | let mut lex = Lexer::new(&mut s); 537 | assert_eq!(lex.next().unwrap(), Token::Identifier("usual".to_owned())); 538 | assert_eq!( 539 | lex.next().unwrap(), 540 | Token::Identifier("identifiers".to_owned()) 541 | ); 542 | assert_eq!(lex.next().unwrap(), Token::Identifier("except".to_owned())); 543 | assert_eq!(lex.next().unwrap(), Token::Function); 544 | } 545 | 546 | #[test] 547 | fn operators() { 548 | let mut s: &[u8] = b"(){}[]+-*/%^ # & | ;,//~ ~= = == > >> >= < << <= : :: . .. ..."; 549 | let mut lex = Lexer::new(&mut s); 550 | assert_eq!(lex.next().unwrap(), Token::LeftParen); 551 | assert_eq!(lex.next().unwrap(), Token::RightParen); 552 | assert_eq!(lex.next().unwrap(), Token::LeftBrace); 553 | assert_eq!(lex.next().unwrap(), Token::RightBrace); 554 | assert_eq!(lex.next().unwrap(), Token::LeftBracket); 555 | assert_eq!(lex.next().unwrap(), Token::RightBracket); 556 | assert_eq!(lex.next().unwrap(), Token::Add); 557 | assert_eq!(lex.next().unwrap(), Token::Minus); 558 | assert_eq!(lex.next().unwrap(), Token::Mul); 559 | assert_eq!(lex.next().unwrap(), Token::Div); 560 | assert_eq!(lex.next().unwrap(), Token::Mod); 561 | assert_eq!(lex.next().unwrap(), Token::Power); 562 | assert_eq!(lex.next().unwrap(), Token::Len); 563 | assert_eq!(lex.next().unwrap(), Token::BitAnd); 564 | assert_eq!(lex.next().unwrap(), Token::BitOr); 565 | assert_eq!(lex.next().unwrap(), Token::SemiColon); 566 | assert_eq!(lex.next().unwrap(), Token::Comma); 567 | assert_eq!(lex.next().unwrap(), Token::IDiv); 568 | assert_eq!(lex.next().unwrap(), Token::BitXorNot); 569 | assert_eq!(lex.next().unwrap(), Token::NotEqual); 570 | assert_eq!(lex.next().unwrap(), Token::Assign); 571 | assert_eq!(lex.next().unwrap(), Token::Equal); 572 | assert_eq!(lex.next().unwrap(), Token::GreaterThan); 573 | assert_eq!(lex.next().unwrap(), Token::ShiftRight); 574 | assert_eq!(lex.next().unwrap(), Token::GreaterEqual); 575 | assert_eq!(lex.next().unwrap(), Token::LessThan); 576 | assert_eq!(lex.next().unwrap(), Token::ShiftLeft); 577 | assert_eq!(lex.next().unwrap(), Token::LessEqual); 578 | assert_eq!(lex.next().unwrap(), Token::Colon); 579 | assert_eq!(lex.next().unwrap(), Token::DoubleColon); 580 | assert_eq!(lex.next().unwrap(), Token::Dot); 581 | assert_eq!(lex.next().unwrap(), Token::Concat); 582 | assert_eq!(lex.next().unwrap(), Token::Dots); 583 | } 584 | } 585 | -------------------------------------------------------------------------------- /src/parser/expression.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | 3 | use super::{ 4 | consteval_expression, get_binary_operator, get_unary_operator, Associativity, BinaryOperator, 5 | Block, Parser, ParserError, Precedence, UnaryOperator, MIN_OPERATOR_PRECEDENCE, 6 | }; 7 | use crate::lexer::Token; 8 | 9 | /// An expression in `Lua` can be: 10 | /// 11 | /// - `nil` 12 | /// - `true` 13 | /// - `false` 14 | /// - String 15 | /// - `Integer` 16 | /// - `Number` 17 | /// - Table 18 | /// - `...` 19 | /// - Identifier 20 | /// - Expression **BinaryOperator** Expression 21 | /// - **UnaryOperator** Expression 22 | /// - `(` Expression `)` 23 | /// - Function call 24 | /// 25 | /// `Integer` and `Number` are explained in **lexer** module. 26 | /// 27 | /// `nil`, `true`, `false`, String and Identifier are identical to their 28 | /// **lexer** tokens. 29 | /// 30 | /// See [`parse_table_constructor`] for the definition of a Table. 31 | /// 32 | /// The precedence of binary operators and unary operators are explained in 33 | /// [`operator`] module. 34 | #[derive(Debug, PartialEq)] 35 | pub enum Expression { 36 | Nil, 37 | True, 38 | False, 39 | Integer(i64), 40 | Number(f64), 41 | Identifier(String), 42 | String(String), 43 | Dots, 44 | Function(FunctionDefinition), 45 | TableConstructor(TableConstructor), 46 | Suffixed(SuffixedExpression), 47 | UnaryOperator(UnaryOperator, Box), 48 | BinaryOperator(BinaryOperator, Box, Box), 49 | } 50 | 51 | impl Default for Expression { 52 | fn default() -> Expression { 53 | Expression::Nil 54 | } 55 | } 56 | 57 | /// `SuffixedExpression` is a specical case of `Expression`. 58 | /// 59 | /// It can be the following: 60 | /// 61 | /// - `a.b` denotes table fields selector. A syntax sugar of `a["b"]` 62 | /// - `a[b]` denotes table indexed selector. 63 | /// - `a.b()` denotes a function call 64 | /// - `a.b:c()` denotes a method call which implies the first parameter is the 65 | /// self 66 | #[derive(Debug, PartialEq)] 67 | pub struct SuffixedExpression { 68 | pub primary: Box, 69 | pub suffixes: Vec, 70 | } 71 | 72 | #[derive(Debug, PartialEq)] 73 | pub enum Suffix { 74 | NamedField(String), 75 | IndexedField(Expression), 76 | FunctionCall(Vec), 77 | MethodCall(String, Vec), 78 | } 79 | 80 | /// A function definition more likely an anonymous function. 81 | #[derive(Debug, PartialEq)] 82 | pub struct FunctionDefinition { 83 | pub parameters: Vec, 84 | pub has_varargs: bool, 85 | pub body: Block, 86 | } 87 | 88 | #[derive(Debug, PartialEq)] 89 | pub struct TableConstructor { 90 | pub fields: Vec, 91 | } 92 | 93 | #[derive(Debug, PartialEq)] 94 | pub enum ConstructorField { 95 | Array(Expression), 96 | Record(RecordKey, Expression), 97 | } 98 | 99 | #[derive(Debug, PartialEq)] 100 | pub enum RecordKey { 101 | Named(String), 102 | Indexed(Expression), 103 | } 104 | 105 | impl<'a, S: io::Read> Parser<'a, S> { 106 | /// ```lua 107 | /// expr ::= subexpr 108 | /// ``` 109 | pub(crate) fn parse_expression(&mut self) -> Result { 110 | self.parse_sub_expression(MIN_OPERATOR_PRECEDENCE) 111 | } 112 | 113 | /// ```lua 114 | /// subexpr ::= (simpleexp | unop subexpr) (binop subexpr)* 115 | /// ``` 116 | /// 117 | /// where `binop` is any binary operator with a precedence higher than 118 | /// `min_precedence` or a right-associative operator whose precedence is 119 | /// equal to `min_precedence` 120 | /// 121 | /// Visit https://en.wikipedia.org/wiki/Operator-precedence_parser for more 122 | /// information. 123 | fn parse_sub_expression(&mut self, min_precedence: u8) -> Result { 124 | let _recursion_guard = self.get_recursion_guard()?; 125 | 126 | let mut lhs = if let Some(unary_op) = self.peek(0)?.and_then(get_unary_operator) { 127 | self.advance(1); 128 | consteval_expression(Expression::UnaryOperator( 129 | unary_op, 130 | Box::new(self.parse_sub_expression(unary_op.precedence().0)?), 131 | ))? 132 | } else { 133 | self.parse_simple_expression()? 134 | }; 135 | 136 | while let Some(binary_op) = self.peek(0)?.and_then(get_binary_operator) { 137 | let (precedence, assoc) = binary_op.precedence(); 138 | if precedence < min_precedence 139 | || precedence == min_precedence && assoc != Associativity::R 140 | { 141 | break; 142 | } 143 | 144 | self.advance(1); 145 | let rhs = self.parse_sub_expression(precedence)?; 146 | lhs = consteval_expression(Expression::BinaryOperator( 147 | binary_op, 148 | Box::new(lhs), 149 | Box::new(rhs), 150 | ))?; 151 | } 152 | 153 | Ok(lhs) 154 | } 155 | 156 | /// ```lua 157 | /// simpleexp ::= Number | Integer | String | Nil | True | False | ... | 158 | /// TableConstructor | FUNCTION body | suffixedexp 159 | /// ``` 160 | pub(crate) fn parse_simple_expression(&mut self) -> Result { 161 | let expr = match self.peek(0)? { 162 | Some(&Token::Number(n)) => Expression::Number(n), 163 | Some(&Token::Integer(i)) => Expression::Integer(i), 164 | Some(&Token::String(ref s)) => Expression::String(s.clone()), 165 | Some(&Token::Nil) => Expression::Nil, 166 | Some(&Token::True) => Expression::True, 167 | Some(&Token::False) => Expression::False, 168 | Some(&Token::Dots) => Expression::Dots, 169 | Some(&Token::LeftBrace) => { 170 | return Ok(Expression::TableConstructor( 171 | self.parse_table_constructor()?, 172 | )); 173 | } 174 | Some(&Token::Function) => { 175 | return Ok(Expression::Function(self.parse_function_definition()?)); 176 | } 177 | _ => return self.parse_suffixed_expression(), 178 | }; 179 | 180 | self.advance(1); 181 | Ok(expr) 182 | } 183 | 184 | /// A suffixed expression can be an `Identifier`, a table field and function calls. 185 | /// 186 | /// ```lua 187 | /// suffixedexp ::= primaryexp ('.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs)* 188 | /// ``` 189 | /// 190 | /// # Identifier 191 | /// 192 | /// A single name can denote a global variable or a local variable (or a 193 | /// function's formal parameter, which is a particular kind of local 194 | /// variable): 195 | /// 196 | /// ```lua 197 | /// var ::= Name 198 | /// ``` 199 | /// 200 | /// **Name** denotes identifiers. 201 | /// 202 | /// Any variable name is assumed to be global unless explicitly declared as 203 | /// a local. Local variables are lexically scoped: local variables can be 204 | /// freely accessed by functions defined inside their scope. 205 | /// 206 | /// Before the first assignment to a variable, its value is `nil`. 207 | /// 208 | /// # Table field 209 | /// 210 | /// Square brackets are used to index a table: 211 | /// 212 | /// ```lua 213 | /// var ::= prefixexp '[' exp ']' 214 | /// ``` 215 | /// 216 | /// The meaning of accesses to table fields can be changed via metatables. 217 | /// 218 | /// The syntax `var.Name` is just syntactic sugar for `var["Name"]`: 219 | /// 220 | /// ```lua 221 | /// var ::= prefixexp '.' Name 222 | /// ``` 223 | /// 224 | /// An access to a global variable `x` is equivalent to `_ENV.x`. Due to the 225 | /// way that chunks are compiled, `_ENV` is never a global name. 226 | /// 227 | /// # Function calls 228 | /// 229 | /// A function call in `Lua` has the following syntax: 230 | /// 231 | /// ```lua 232 | /// functioncall ::= prefixexp args 233 | /// ``` 234 | /// 235 | /// In a function call, first prefixexp and args are evaluated. If the value 236 | /// of prefixexp has type function, then this function is called with the 237 | /// given arguments. Otherwise, the prefixexp "call" metamethod is called, 238 | /// having as first argument the value of prefixexp, followed by the 239 | /// original call arguments. 240 | /// 241 | /// The form 242 | /// 243 | /// ```lua 244 | /// functioncall ::= prefixexp ':' Name args 245 | /// ``` 246 | /// 247 | /// can be used to call "methods". A call `v:name(args)` is syntactic sugar 248 | /// for `v.name(v,args)`, except that `v` is evaluated only once. 249 | /// 250 | /// Arguments have the following syntax: 251 | /// 252 | /// ```lua 253 | /// args ::= '(' (explist)? ')' 254 | /// args ::= TableConstructor 255 | /// args ::= LiteralString 256 | /// ``` 257 | /// 258 | /// All argument expressions are evaluated before the call. A call of the 259 | /// form `f{fields}` is syntactic sugar for `f({fields})`; that is, the 260 | /// argument list is a single new table. A call of the form `f'string'` (or 261 | /// `f"string"` or `f[[string]]`) is syntactic sugar for `f('string')`; that 262 | /// is, the argument list is a single literal string. 263 | pub(crate) fn parse_suffixed_expression(&mut self) -> Result { 264 | let primary = self.parse_primary_expression()?; 265 | let mut suffixes = Vec::new(); 266 | 267 | loop { 268 | match self.peek(0)? { 269 | Some(&Token::Dot) => { 270 | self.expect_next(Token::Dot)?; 271 | suffixes.push(Suffix::NamedField(self.expect_identifier()?)); 272 | } 273 | Some(&Token::LeftBracket) => { 274 | self.expect_next(Token::LeftBracket)?; 275 | suffixes.push(Suffix::IndexedField(self.parse_expression()?)); 276 | self.expect_next(Token::RightBracket)?; 277 | } 278 | Some(&Token::Colon) => { 279 | self.expect_next(Token::Colon)?; 280 | let name = self.expect_identifier()?; 281 | let args = self.parse_function_args()?; 282 | suffixes.push(Suffix::MethodCall(name, args)); 283 | } 284 | // The prefixes of `funcargs` 285 | Some(&Token::LeftParen) | Some(&Token::LeftBrace) | Some(&Token::String(_)) => { 286 | suffixes.push(Suffix::FunctionCall(self.parse_function_args()?)); 287 | } 288 | _ => break, 289 | } 290 | } 291 | 292 | // Nested structures elimination 293 | // 294 | // NOTE Incompatible with Lua 5.3 295 | // 296 | // ```lua 297 | // function f() return nil, nil, nil end 298 | // 299 | // -- returned value discarding not support 300 | // local a = (f()) 301 | // ``` 302 | Ok(if suffixes.is_empty() { 303 | primary 304 | } else { 305 | Expression::Suffixed(SuffixedExpression { 306 | primary: Box::new(primary), 307 | suffixes, 308 | }) 309 | }) 310 | } 311 | 312 | /// ```lua 313 | /// primaryexp ::= NAME | '(' expr ')' 314 | /// ``` 315 | fn parse_primary_expression(&mut self) -> Result { 316 | match self.peek(0)? { 317 | Some(&Token::LeftParen) => { 318 | self.expect_next(Token::LeftParen)?; 319 | let expr = self.parse_expression()?; 320 | self.expect_next(Token::RightParen)?; 321 | Ok(expr) 322 | } 323 | Some(&Token::Identifier(_)) => Ok(Expression::Identifier(self.expect_identifier()?)), 324 | token => Err(ParserError::Unexpected { 325 | unexpected: format!("{:?}", token), 326 | expected: Some("(expr) or Identifier".to_owned()), 327 | }), 328 | } 329 | } 330 | 331 | /// Parses function arguments. 332 | /// 333 | /// ```lua 334 | /// funcargs ::= '(' explist? ')' | TableConstructor | String 335 | /// ``` 336 | /// 337 | /// where **explist** is optional. 338 | pub(crate) fn parse_function_args(&mut self) -> Result, ParserError> { 339 | Ok(match self.peek(0)? { 340 | Some(&Token::LeftParen) => match self.peek(1)? { 341 | Some(&Token::RightParen) => { 342 | self.advance(2); 343 | vec![] 344 | } 345 | _ => { 346 | self.expect_next(Token::LeftParen)?; 347 | let args = self.parse_expression_list()?; 348 | self.expect_next(Token::RightParen)?; 349 | args 350 | } 351 | }, 352 | Some(&Token::LeftBrace) => vec![Expression::TableConstructor( 353 | self.parse_table_constructor()?, 354 | )], 355 | Some(&Token::String(_)) => vec![Expression::String(self.expect_string()?)], 356 | token => { 357 | return Err(ParserError::Unexpected { 358 | unexpected: format!("{:?}", token), 359 | expected: Some("(explist) | tableconstructor | literalstring".to_owned()), 360 | }) 361 | } 362 | }) 363 | } 364 | 365 | /// Table constructors are expressions that create tables. Every time a 366 | /// constructor is evaluated, a new table is created. A constructor can be 367 | /// used to create an empty table or to create a table and initialize some 368 | /// of its fields. The general syntax for constructors is 369 | /// 370 | /// ```lua 371 | /// tableconstructor ::= '{' [fieldlist] '}' 372 | /// fieldlist ::= field {fieldsep field} [fieldsep] 373 | /// field ::= '[' exp ']' '=' exp | Name '=' exp | exp 374 | /// fieldsep ::= ',' | ';' 375 | /// ``` 376 | /// 377 | /// Each field of the form [exp1] = exp2 adds to the new table an entry with 378 | /// key exp1 and value exp2. A field of the form name = exp is equivalent to 379 | /// ["name"] = exp. Finally, fields of the form exp are equivalent to [i] = 380 | /// exp, where i are consecutive integers starting with 1. Fields in the 381 | /// other formats do not affect this counting. For example, 382 | /// 383 | /// ```lua 384 | /// a = { [f(1)] = g; "x", "y"; x = 1, f(x), [30] = 23; 45 } 385 | /// ``` 386 | /// 387 | /// is equivalent to 388 | /// 389 | /// ```lua 390 | /// do 391 | /// local t = {} 392 | /// t[f(1)] = g 393 | /// t[1] = "x" -- 1st exp 394 | /// t[2] = "y" -- 2nd exp 395 | /// t.x = 1 -- t["x"] = 1 396 | /// t[3] = f(x) -- 3rd exp 397 | /// t[30] = 23 398 | /// t[4] = 45 -- 4th exp 399 | /// a = t 400 | /// end 401 | /// ``` 402 | /// 403 | /// The order of the assignments in a constructor is undefined. (This order 404 | /// would be relevant only when there are repeated keys.) 405 | /// 406 | /// If the last field in the list has the form exp and the expression is a 407 | /// function call or a vararg expression, then all values returned by this 408 | /// expression enter the list consecutively. 409 | /// 410 | /// The field list can have an optional trailing separator, as a convenience 411 | /// for machine-generated code. 412 | pub(crate) fn parse_table_constructor(&mut self) -> Result { 413 | self.expect_next(Token::LeftBrace)?; 414 | let mut fields = Vec::new(); 415 | while self.peek(0)? != Some(&Token::RightBrace) { 416 | match self.peek(0)? { 417 | Some(&Token::Comma) | Some(&Token::SemiColon) => { 418 | if fields.is_empty() { 419 | return Err(ParserError::Unexpected { 420 | unexpected: "comma or semicolon after {".to_string(), 421 | expected: None, 422 | }); 423 | } 424 | 425 | self.advance(1); 426 | } 427 | Some(&Token::LeftBracket) => { 428 | self.expect_next(Token::LeftBracket)?; 429 | let key = self.parse_expression()?; 430 | self.expect_next(Token::RightBracket)?; 431 | self.expect_next(Token::Assign)?; 432 | let value = self.parse_expression()?; 433 | fields.push(ConstructorField::Record(RecordKey::Indexed(key), value)); 434 | } 435 | Some(&Token::Identifier(_)) => match self.peek(1)? { 436 | Some(&Token::Assign) => { 437 | let name = self.expect_identifier()?; 438 | self.expect_next(Token::Assign)?; 439 | let value = self.parse_expression()?; 440 | fields.push(ConstructorField::Record(RecordKey::Named(name), value)); 441 | } 442 | _ => { 443 | fields.push(ConstructorField::Array(self.parse_expression()?)); 444 | } 445 | }, 446 | _ => { 447 | fields.push(ConstructorField::Array(self.parse_expression()?)); 448 | } 449 | } 450 | } 451 | 452 | self.expect_next(Token::RightBrace)?; 453 | Ok(TableConstructor { fields }) 454 | } 455 | 456 | /// Parse an expression list. 457 | /// 458 | /// An expression list consists of expressions. `,` (Comma) is used as the 459 | /// expression separator. 460 | /// 461 | /// It can be described in **EBNF** notation: 462 | /// 463 | /// ```lua 464 | /// explist ::= exp (',' exp)* 465 | /// ``` 466 | pub(crate) fn parse_expression_list(&mut self) -> Result, ParserError> { 467 | let mut exprs = Vec::new(); 468 | exprs.push(self.parse_expression()?); 469 | while self.peek(0)? == Some(&Token::Comma) { 470 | self.advance(1); 471 | exprs.push(self.parse_expression()?); 472 | } 473 | 474 | Ok(exprs) 475 | } 476 | 477 | /// Parses a function body. 478 | /// 479 | /// See `parse_function_definition` for more information. 480 | pub(crate) fn parse_function_body(&mut self) -> Result { 481 | self.expect_next(Token::LeftParen)?; 482 | 483 | let mut parameters = Vec::new(); 484 | let mut has_varargs = false; 485 | while self.peek(0)? != Some(&Token::RightParen) { 486 | match self.peek(0)? { 487 | Some(&Token::Dots) => { 488 | has_varargs = true; 489 | self.advance(1); 490 | } 491 | Some(&Token::Identifier(ref s)) => { 492 | parameters.push(s.clone()); 493 | self.advance(1); 494 | } 495 | Some(&Token::Comma) => { 496 | self.advance(1); 497 | } 498 | other => { 499 | return Err(ParserError::Unexpected { 500 | unexpected: format!("{:?}", other), 501 | expected: Some("parameters, '...' or ','".to_owned()), 502 | }); 503 | } 504 | } 505 | } 506 | 507 | self.expect_next(Token::RightParen)?; 508 | 509 | let body = self.parse_block()?; 510 | self.expect_next(Token::End)?; 511 | 512 | Ok(FunctionDefinition { 513 | parameters, 514 | has_varargs, 515 | body, 516 | }) 517 | } 518 | 519 | /// The syntax for function definition is 520 | /// 521 | /// ```lua 522 | /// FunctionDefinition ::= function funcbody 523 | /// funcbody ::= '(' parlist? ‘)’ block end 524 | /// ``` 525 | /// 526 | /// # Note 527 | /// 528 | /// A function definition is an executable expression, whose value has type 529 | /// function. When Lua precompiles a chunk, all its function bodies are 530 | /// precompiled too. Then, whenever Lua executes the function definition, 531 | /// the function is instantiated (or closed). This function instance (or 532 | /// closure) is the final value of the expression. 533 | /// 534 | /// Parameters act as local variables that are initialized with the argument 535 | /// values: 536 | /// 537 | /// ```lua 538 | /// parlist ::= namelist (',' '...')? | '...' 539 | /// ``` 540 | /// 541 | /// # Function calls 542 | /// 543 | /// When a function is called, the list of arguments is adjusted to the 544 | /// length of the list of parameters, unless the function is a vararg 545 | /// function, which is indicated by three dots ('...') at the end of its 546 | /// parameter list. A vararg function does not adjust its argument list; 547 | /// instead, it collects all extra arguments and supplies them to the 548 | /// function through a vararg expression, which is also written as three 549 | /// dots. The value of this expression is a list of all actual extra 550 | /// arguments, similar to a function with multiple results. If a vararg 551 | /// expression is used inside another expression or in the middle of a list 552 | /// of expressions, then its return list is adjusted to one element. If the 553 | /// expression is used as the last element of a list of expressions, then no 554 | /// adjustment is made (unless that last expression is enclosed in 555 | /// parentheses). 556 | pub(crate) fn parse_function_definition(&mut self) -> Result { 557 | self.expect_next(Token::Function)?; 558 | self.parse_function_body() 559 | } 560 | } 561 | 562 | #[cfg(test)] 563 | mod tests { 564 | use super::*; 565 | 566 | macro_rules! id { 567 | ($name:expr) => { 568 | Expression::Identifier($name.to_owned()) 569 | }; 570 | } 571 | 572 | fn float_equal(exp: Expression, f: f64) -> bool { 573 | match exp { 574 | Expression::Number(n) => (n - f).abs() < std::f64::EPSILON, 575 | _ => false, 576 | } 577 | } 578 | 579 | #[test] 580 | fn primitive_values() { 581 | let mut s: &[u8] = br"42 0xf10 1.5 'a string' nil true false ... anonymous"; 582 | let mut parser = Parser::new(&mut s); 583 | assert_eq!(parser.parse_expression().unwrap(), Expression::Integer(42)); 584 | assert_eq!( 585 | parser.parse_expression().unwrap(), 586 | Expression::Integer(0xf10) 587 | ); 588 | assert!(float_equal(parser.parse_expression().unwrap(), 1.5)); 589 | assert_eq!( 590 | parser.parse_expression().unwrap(), 591 | Expression::String("a string".to_owned()) 592 | ); 593 | assert_eq!(parser.parse_expression().unwrap(), Expression::Nil); 594 | assert_eq!(parser.parse_expression().unwrap(), Expression::True); 595 | assert_eq!(parser.parse_expression().unwrap(), Expression::False); 596 | assert_eq!(parser.parse_expression().unwrap(), Expression::Dots); 597 | assert_eq!(parser.parse_expression().unwrap(), id!("anonymous")); 598 | } 599 | 600 | #[test] 601 | fn simple_tableconstructor() { 602 | let mut s: &[u8] = br#"{[1]=2,"x","y";x=1,y=1;45}"#; 603 | let mut parser = Parser::new(&mut s); 604 | assert_eq!( 605 | parser.parse_table_constructor().unwrap(), 606 | TableConstructor { 607 | fields: vec![ 608 | ConstructorField::Record( 609 | RecordKey::Indexed(Expression::Integer(1)), 610 | Expression::Integer(2) 611 | ), 612 | ConstructorField::Array(Expression::String("x".to_owned())), 613 | ConstructorField::Array(Expression::String("y".to_owned())), 614 | ConstructorField::Record( 615 | RecordKey::Named("x".to_owned()), 616 | Expression::Integer(1) 617 | ), 618 | ConstructorField::Record( 619 | RecordKey::Named("y".to_owned()), 620 | Expression::Integer(1) 621 | ), 622 | ConstructorField::Array(Expression::Integer(45)), 623 | ], 624 | } 625 | ); 626 | 627 | let mut s: &[u8] = b"{; 2; 3; }"; 628 | let mut parser = Parser::new(&mut s); 629 | assert_eq!( 630 | parser.parse_table_constructor().unwrap_err(), 631 | ParserError::Unexpected { 632 | unexpected: format!("comma or semicolon after {{"), 633 | expected: None 634 | } 635 | ); 636 | } 637 | 638 | #[test] 639 | fn function_definition() { 640 | let mut s: &[u8] = br#"function() end"#; 641 | let mut parser = Parser::new(&mut s); 642 | assert_eq!( 643 | parser.parse_function_definition().unwrap(), 644 | FunctionDefinition { 645 | parameters: vec![], 646 | has_varargs: false, 647 | body: Block { 648 | stmts: vec![], 649 | retstmt: None, 650 | }, 651 | } 652 | ); 653 | 654 | let mut s: &[u8] = br#"function(a) end"#; 655 | let mut parser = Parser::new(&mut s); 656 | assert_eq!( 657 | parser.parse_function_definition().unwrap(), 658 | FunctionDefinition { 659 | parameters: vec!["a".to_owned()], 660 | has_varargs: false, 661 | body: Block { 662 | stmts: vec![], 663 | retstmt: None 664 | } 665 | } 666 | ); 667 | 668 | let mut s: &[u8] = br#"function(a,b,c,...) end"#; 669 | let mut parser = Parser::new(&mut s); 670 | assert_eq!( 671 | parser.parse_function_definition().unwrap(), 672 | FunctionDefinition { 673 | parameters: vec!["a".to_owned(), "b".to_owned(), "c".to_owned()], 674 | has_varargs: true, 675 | body: Block { 676 | stmts: vec![], 677 | retstmt: None 678 | } 679 | } 680 | ); 681 | 682 | let mut s: &[u8] = br#"function(...) end"#; 683 | let mut parser = Parser::new(&mut s); 684 | assert_eq!( 685 | parser.parse_function_definition().unwrap(), 686 | FunctionDefinition { 687 | parameters: vec![], 688 | has_varargs: true, 689 | body: Block { 690 | stmts: vec![], 691 | retstmt: None 692 | } 693 | } 694 | ); 695 | } 696 | 697 | #[test] 698 | fn suffixed_expression() { 699 | let mut s: &[u8] = br#"a.b.c.d a[b].c[d]"#; 700 | let mut parser = Parser::new(&mut s); 701 | assert_eq!( 702 | parser.parse_suffixed_expression().unwrap(), 703 | Expression::Suffixed(SuffixedExpression { 704 | primary: Box::new(id!("a")), 705 | suffixes: vec![ 706 | Suffix::NamedField("b".to_owned()), 707 | Suffix::NamedField("c".to_owned()), 708 | Suffix::NamedField("d".to_owned()), 709 | ], 710 | }) 711 | ); 712 | 713 | assert_eq!( 714 | parser.parse_suffixed_expression().unwrap(), 715 | Expression::Suffixed(SuffixedExpression { 716 | primary: Box::new(id!("a")), 717 | suffixes: vec![ 718 | Suffix::IndexedField(id!("b")), 719 | Suffix::NamedField("c".to_owned()), 720 | Suffix::IndexedField(id!("d")) 721 | ] 722 | }) 723 | ); 724 | 725 | // syntax sugar 726 | let mut s: &[u8] = br#"f1() f2'string' f3"string" f4[[string]] f5{1,2}"#; 727 | let mut parser = Parser::new(&mut s); 728 | assert_eq!( 729 | parser.parse_suffixed_expression().unwrap(), 730 | Expression::Suffixed(SuffixedExpression { 731 | primary: Box::new(id!("f1")), 732 | suffixes: vec![Suffix::FunctionCall(vec![]),] 733 | }) 734 | ); 735 | assert_eq!( 736 | parser.parse_suffixed_expression().unwrap(), 737 | Expression::Suffixed(SuffixedExpression { 738 | primary: Box::new(id!("f2")), 739 | suffixes: vec![Suffix::FunctionCall(vec![Expression::String( 740 | "string".to_owned() 741 | )])] 742 | }) 743 | ); 744 | assert_eq!( 745 | parser.parse_suffixed_expression().unwrap(), 746 | Expression::Suffixed(SuffixedExpression { 747 | primary: Box::new(id!("f3")), 748 | suffixes: vec![Suffix::FunctionCall(vec![Expression::String( 749 | "string".to_owned() 750 | )])] 751 | }) 752 | ); 753 | assert_eq!( 754 | parser.parse_suffixed_expression().unwrap(), 755 | Expression::Suffixed(SuffixedExpression { 756 | primary: Box::new(id!("f4")), 757 | suffixes: vec![Suffix::FunctionCall(vec![Expression::String( 758 | "string".to_owned() 759 | )])] 760 | }) 761 | ); 762 | assert_eq!( 763 | parser.parse_suffixed_expression().unwrap(), 764 | Expression::Suffixed(SuffixedExpression { 765 | primary: Box::new(id!("f5")), 766 | suffixes: vec![Suffix::FunctionCall(vec![Expression::TableConstructor( 767 | TableConstructor { 768 | fields: vec![ 769 | ConstructorField::Array(Expression::Integer(1)), 770 | ConstructorField::Array(Expression::Integer(2)), 771 | ] 772 | } 773 | )])] 774 | }) 775 | ); 776 | 777 | // method calls 778 | let mut s: &[u8] = br#"obj:method1(a)"#; 779 | let mut parser = Parser::new(&mut s); 780 | assert_eq!( 781 | parser.parse_suffixed_expression().unwrap(), 782 | Expression::Suffixed(SuffixedExpression { 783 | primary: Box::new(id!("obj")), 784 | suffixes: vec![Suffix::MethodCall("method1".to_owned(), vec![id!("a")])], 785 | }) 786 | ); 787 | 788 | // function call chains 789 | let mut s: &[u8] = br#"f(a)(b)(c)"#; 790 | let mut parser = Parser::new(&mut s); 791 | assert_eq!( 792 | parser.parse_suffixed_expression().unwrap(), 793 | Expression::Suffixed(SuffixedExpression { 794 | primary: Box::new(id!("f")), 795 | suffixes: vec![ 796 | Suffix::FunctionCall(vec![id!("a")]), 797 | Suffix::FunctionCall(vec![id!("b")]), 798 | Suffix::FunctionCall(vec![id!("c")]) 799 | ], 800 | }) 801 | ); 802 | } 803 | 804 | #[test] 805 | fn unary_operator() { 806 | let mut s: &[u8] = b"-1"; 807 | let mut parser = Parser::new(&mut s); 808 | assert_eq!(parser.parse_expression().unwrap(), Expression::Integer(-1)); 809 | 810 | let mut s: &[u8] = b"not false"; 811 | let mut parser = Parser::new(&mut s); 812 | assert_eq!(parser.parse_expression().unwrap(), Expression::True); 813 | 814 | let mut s: &[u8] = b"~0"; 815 | let mut parser = Parser::new(&mut s); 816 | assert_eq!(parser.parse_expression().unwrap(), Expression::Integer(-1)); 817 | 818 | let mut s: &[u8] = b"#'empty'"; 819 | let mut parser = Parser::new(&mut s); 820 | assert_eq!(parser.parse_expression().unwrap(), Expression::Integer(5)); 821 | } 822 | 823 | #[test] 824 | fn binary_operator() { 825 | let mut s: &[u8] = b"a+b"; 826 | let mut parser = Parser::new(&mut s); 827 | assert_eq!( 828 | parser.parse_expression().unwrap(), 829 | Expression::BinaryOperator(BinaryOperator::Add, Box::new(id!("a")), Box::new(id!("b"))) 830 | ); 831 | 832 | let mut s: &[u8] = b"(a + b) * c - d"; 833 | let mut parser = Parser::new(&mut s); 834 | assert_eq!( 835 | parser.parse_expression().unwrap(), 836 | Expression::BinaryOperator( 837 | BinaryOperator::Sub, 838 | Box::new(Expression::BinaryOperator( 839 | BinaryOperator::Mul, 840 | Box::new(Expression::BinaryOperator( 841 | BinaryOperator::Add, 842 | Box::new(id!("a")), 843 | Box::new(id!("b")) 844 | )), 845 | Box::new(id!("c")) 846 | )), 847 | Box::new(id!("d")) 848 | ) 849 | ); 850 | 851 | let mut s: &[u8] = b"a + b * c - d"; 852 | let mut parser = Parser::new(&mut s); 853 | assert_eq!( 854 | parser.parse_expression().unwrap(), 855 | Expression::BinaryOperator( 856 | BinaryOperator::Sub, 857 | Box::new(Expression::BinaryOperator( 858 | BinaryOperator::Add, 859 | Box::new(id!("a")), 860 | Box::new(Expression::BinaryOperator( 861 | BinaryOperator::Mul, 862 | Box::new(id!("b")), 863 | Box::new(id!("c")) 864 | )), 865 | )), 866 | Box::new(id!("d")) 867 | ) 868 | ); 869 | 870 | let mut s: &[u8] = b"a * -b ^ c - d"; 871 | let mut parser = Parser::new(&mut s); 872 | assert_eq!( 873 | parser.parse_expression().unwrap(), 874 | Expression::BinaryOperator( 875 | BinaryOperator::Sub, 876 | Box::new(Expression::BinaryOperator( 877 | BinaryOperator::Mul, 878 | Box::new(id!("a")), 879 | Box::new(Expression::UnaryOperator( 880 | UnaryOperator::Minus, 881 | Box::new(Expression::BinaryOperator( 882 | BinaryOperator::Power, 883 | Box::new(id!("b")), 884 | Box::new(id!("c")) 885 | )) 886 | )) 887 | )), 888 | Box::new(id!("d")) 889 | ) 890 | ); 891 | 892 | let mut s: &[u8] = b"(1 < 2 and 2 > 1) ~= true"; 893 | let mut parser = Parser::new(&mut s); 894 | assert_eq!(parser.parse_expression().unwrap(), Expression::False); 895 | 896 | // Illegal actually. 897 | // 898 | // Discovered after applying consteval. 899 | let mut s: &[u8] = br#"("abc" .. "def") + -(1 // 0)^4>>2 ~ 1 "#; 900 | let mut parser = Parser::new(&mut s); 901 | assert_eq!( 902 | parser.parse_expression().unwrap_err(), 903 | ParserError::DividedByZero 904 | ); 905 | } 906 | } 907 | -------------------------------------------------------------------------------- /src/parser/statement.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | 3 | use super::{ 4 | Block, Expression, FunctionDefinition, Parser, ParserError, Suffix, SuffixedExpression, 5 | }; 6 | use crate::lexer::Token; 7 | 8 | /// Lua supports an almost conventional set of statements, similar to those in 9 | /// Pascal or C. This set includes assignments, control structures, function 10 | /// calls, and variable declarations. 11 | #[derive(Debug, PartialEq)] 12 | pub enum Statement { 13 | Assignment(AssignmentStatement), 14 | FunctionCall(FunctionCallStatement), 15 | Label(String), 16 | Break, 17 | Goto(String), 18 | Do(Block), 19 | While(WhileStatement), 20 | Repeat(RepeatStatement), 21 | If(IfStatement), 22 | For(ForStatement), 23 | Function(FunctionStatement), 24 | LocalFunction(LocalFunctionStatement), 25 | LocalDeclaration(LocalDeclarationStatement), 26 | } 27 | 28 | #[derive(Debug, PartialEq)] 29 | pub struct FunctionCallStatement { 30 | pub call: SuffixedExpression, 31 | } 32 | 33 | #[derive(Debug, PartialEq)] 34 | pub struct AssignmentStatement { 35 | pub targets: Vec, 36 | pub values: Vec, 37 | } 38 | 39 | #[derive(Debug, PartialEq)] 40 | pub struct ReturnStatement { 41 | pub returns: Vec, 42 | } 43 | 44 | #[derive(Debug, PartialEq)] 45 | pub struct WhileStatement { 46 | pub condition: Expression, 47 | pub block: Block, 48 | } 49 | 50 | #[derive(Debug, PartialEq)] 51 | pub struct RepeatStatement { 52 | pub block: Block, 53 | pub until: Expression, 54 | } 55 | 56 | #[derive(Debug, PartialEq)] 57 | pub struct IfStatement { 58 | pub if_then: Vec<(Expression, Block)>, 59 | } 60 | 61 | #[derive(Debug, PartialEq)] 62 | pub enum ForStatement { 63 | Numeric { 64 | name: String, 65 | initial: Expression, 66 | limit: Expression, 67 | step: Option, 68 | block: Block, 69 | }, 70 | Generic { 71 | names: Vec, 72 | values: Vec, 73 | block: Block, 74 | }, 75 | } 76 | 77 | #[derive(Debug, PartialEq)] 78 | pub struct FunctionStatement { 79 | pub name: String, 80 | pub fields: Vec, 81 | pub method: Option, 82 | pub definition: FunctionDefinition, 83 | } 84 | 85 | #[derive(Debug, PartialEq)] 86 | pub struct LocalFunctionStatement { 87 | pub name: String, 88 | pub definition: FunctionDefinition, 89 | } 90 | 91 | #[derive(Debug, PartialEq)] 92 | pub struct LocalDeclarationStatement { 93 | pub names: Vec, 94 | pub values: Vec, 95 | } 96 | 97 | impl<'a, S: io::Read> Parser<'a, S> { 98 | /// Parses a statement. 99 | /// 100 | /// Although FunctionCall is an expression, it can also be a statement. 101 | pub(crate) fn parse_statement(&mut self) -> Result { 102 | let _recursion_guard = self.get_recursion_guard(); 103 | 104 | Ok(match self.peek(0)? { 105 | Some(&Token::DoubleColon) => { 106 | self.expect_next(Token::DoubleColon)?; 107 | let name = self.expect_identifier()?; 108 | self.expect_next(Token::DoubleColon)?; 109 | Statement::Label(name) 110 | } 111 | Some(&Token::Break) => { 112 | self.expect_next(Token::Break)?; 113 | Statement::Break 114 | } 115 | Some(&Token::Goto) => { 116 | self.expect_next(Token::Goto)?; 117 | Statement::Goto(self.expect_identifier()?) 118 | } 119 | Some(&Token::Do) => { 120 | self.expect_next(Token::Do)?; 121 | let block = self.parse_block()?; 122 | self.expect_next(Token::End)?; 123 | Statement::Do(block) 124 | } 125 | Some(&Token::While) => Statement::While(self.parse_while_statement()?), 126 | Some(&Token::Repeat) => Statement::Repeat(self.parse_repeat_statement()?), 127 | Some(&Token::If) => Statement::If(self.parse_if_statement()?), 128 | Some(&Token::For) => Statement::For(self.parse_for_statement()?), 129 | Some(&Token::Function) => Statement::Function(self.parse_function_statement()?), 130 | Some(&Token::Local) => match self.peek(1)? { 131 | Some(&Token::Function) => { 132 | Statement::LocalFunction(self.parse_local_function_statement()?) 133 | } 134 | _ => Statement::LocalDeclaration(self.parse_local_declaration_statement()?), 135 | }, 136 | // functioncall | assignment 137 | _ => self.parse_expression_statement()?, 138 | }) 139 | } 140 | 141 | /// Parses an expression statement. 142 | /// 143 | /// An expression statement is a function call or an assignment. 144 | pub(crate) fn parse_expression_statement(&mut self) -> Result { 145 | let suffixed_expression = self.parse_suffixed_expression()?; 146 | if self.peek(0)? == Some(&Token::Assign) || self.peek(0)? == Some(&Token::Comma) { 147 | let mut targets = Vec::new(); 148 | targets.push(suffixed_expression); 149 | while self.peek(0)? == Some(&Token::Comma) { 150 | self.advance(1); 151 | targets.push(self.parse_suffixed_expression()?); 152 | } 153 | 154 | self.expect_next(Token::Assign)?; 155 | 156 | // type checks 157 | for t in &targets { 158 | match t { 159 | Expression::Identifier(_) => {} 160 | Expression::Suffixed(SuffixedExpression { primary, suffixes }) => { 161 | assert!(!suffixes.is_empty()); 162 | 163 | match primary.as_ref() { 164 | Expression::Identifier(_) => {} 165 | _ => return Err(ParserError::AssignToExpression), 166 | } 167 | 168 | match suffixes.last().unwrap() { 169 | Suffix::NamedField(_) | Suffix::IndexedField(_) => {} 170 | _ => return Err(ParserError::AssignToExpression), 171 | } 172 | } 173 | _ => return Err(ParserError::AssignToExpression), 174 | } 175 | } 176 | 177 | let values = self.parse_expression_list()?; 178 | Ok(Statement::Assignment(AssignmentStatement { 179 | targets, 180 | values, 181 | })) 182 | } else { 183 | match suffixed_expression { 184 | Expression::Suffixed(expr) => { 185 | assert!(!expr.suffixes.is_empty()); 186 | match expr.suffixes.last().unwrap() { 187 | Suffix::FunctionCall(_) | Suffix::MethodCall(_, _) => { 188 | Ok(Statement::FunctionCall(FunctionCallStatement { 189 | call: expr, 190 | })) 191 | } 192 | _ => Err(ParserError::ExpressionNotStatement), 193 | } 194 | } 195 | _ => Err(ParserError::ExpressionNotStatement), 196 | } 197 | } 198 | } 199 | 200 | /// Parses an return statement. 201 | /// 202 | /// As return statement is the last lex of a block, and block can be in the 203 | /// following structures: 204 | /// 205 | /// 1. **while** expression **do** block **end** 206 | /// 2. **repeat** block **until** expression 207 | /// 3. **if** expression **then** block **else** block **end** 208 | /// 4. **if** expression **then** block **elseif** expression **then** block **end** 209 | /// 5. **do** block **end** in **for**loop or standalone 210 | /// 6. and function body 211 | /// 212 | /// The table is taken from `parse_block`. 213 | pub(crate) fn parse_return_statement(&mut self) -> Result { 214 | self.expect_next(Token::Return)?; 215 | let returns = match self.peek(0)? { 216 | Some(&Token::End) 217 | | Some(&Token::Until) 218 | | Some(&Token::Else) 219 | | Some(&Token::ElseIf) 220 | | Some(&Token::SemiColon) 221 | | None => Vec::new(), 222 | _ => self.parse_expression_list()?, 223 | }; 224 | 225 | if self.peek(0)? == Some(&Token::SemiColon) { 226 | self.advance(1); 227 | } 228 | 229 | Ok(ReturnStatement { returns }) 230 | } 231 | 232 | /// The control structure **while** has the usual meaning. 233 | /// 234 | /// ```lua 235 | /// while expression do block end 236 | /// ``` 237 | /// # Cautious 238 | /// 239 | /// The condition expression of a control structure can return any value. 240 | /// Both **false** and **nil** are considered _false_. All values different 241 | /// from **nil** and **false** are considered _true_ (in particular, the 242 | /// number 0 and the empty string are also true). 243 | pub(crate) fn parse_while_statement(&mut self) -> Result { 244 | self.expect_next(Token::While)?; 245 | let condition = self.parse_expression()?; 246 | self.expect_next(Token::Do)?; 247 | let block = self.parse_block()?; 248 | self.expect_next(Token::End)?; 249 | 250 | Ok(WhileStatement { condition, block }) 251 | } 252 | 253 | /// The control structure **repeat** has the usual meaning. 254 | /// 255 | /// ```lua 256 | /// repeat block until expression 257 | /// ``` 258 | /// 259 | /// In the repeat–until loop, the inner block does not end at the **until** 260 | /// keyword, but only after the condition. So, the condition can refer to 261 | /// local variables declared inside the loop block. 262 | /// 263 | /// # Cautious 264 | /// 265 | /// The condition expression of a control structure can return any value. 266 | /// Both **false** and **nil** are considered _false_. All values different 267 | /// from **nil** and **false** are considered _true_ (in particular, the 268 | /// number 0 and the empty string are also true). 269 | pub(crate) fn parse_repeat_statement(&mut self) -> Result { 270 | self.expect_next(Token::Repeat)?; 271 | let block = self.parse_block()?; 272 | self.expect_next(Token::Until)?; 273 | let until = self.parse_expression()?; 274 | 275 | Ok(RepeatStatement { block, until }) 276 | } 277 | 278 | /// The control structure **if** has the usual meaning. 279 | /// 280 | /// ```lua 281 | /// if expression then block (elseif expression then block)* (else block)? end 282 | /// ``` 283 | /// 284 | /// Let's transform the last `else block` to `elseif true then block`. So it 285 | /// can be merged into the `elseif expression then block` part. 286 | /// 287 | /// # Cautious 288 | /// 289 | /// The condition expression of a control structure can return any value. 290 | /// Both **false** and **nil** are considered _false_. All values different 291 | /// from **nil** and **false** are considered _true_ (in particular, the 292 | /// number 0 and the empty string are also true). 293 | pub(crate) fn parse_if_statement(&mut self) -> Result { 294 | let mut if_then = Vec::new(); 295 | 296 | self.expect_next(Token::If)?; 297 | let expr = self.parse_expression()?; 298 | self.expect_next(Token::Then)?; 299 | let block = self.parse_block()?; 300 | if_then.push((expr, block)); 301 | 302 | while self.peek(0)? == Some(&Token::ElseIf) { 303 | self.expect_next(Token::ElseIf)?; 304 | let expr = self.parse_expression()?; 305 | self.expect_next(Token::Then)?; 306 | let block = self.parse_block()?; 307 | if_then.push((expr, block)); 308 | } 309 | 310 | // transform `else` to `elseif true` 311 | if self.peek(0)? == Some(&Token::Else) { 312 | self.expect_next(Token::Else)?; 313 | let expr = Expression::True; 314 | let block = self.parse_block()?; 315 | if_then.push((expr, block)); 316 | } 317 | 318 | self.expect_next(Token::End)?; 319 | 320 | Ok(IfStatement { if_then }) 321 | } 322 | 323 | /// The for statement has two forms 324 | /// 325 | /// # Numeric form 326 | /// 327 | /// The numerical for loop repeats a block of code while a control variable 328 | /// runs through an arithmetic progression. It has the following syntax: 329 | /// 330 | /// ```lua 331 | /// for Name '=' expression ',' expression (',' expression)? do block end 332 | /// ``` 333 | /// 334 | /// The _block_ is repeated for name starting at the value of the first 335 | /// expression, until it passes the second expression by steps of the third 336 | /// expression. 337 | /// 338 | /// ## Note 339 | /// 340 | /// - All three control expressions are evaluated only once, before the loop 341 | /// starts. They must all result in numbers. 342 | /// - _initial_, _limit_, and _step_ are invisible variables. The names 343 | /// shown here are for explanatory purposes only. 344 | /// - If the third expression (the _step_) is absent, then a step of **1** 345 | /// is used. 346 | /// - You can use **break** and **goto** to exit a for loop. The loop 347 | /// variable _v_ is local to the loop body. If you need its value after the 348 | /// loop, assign it to another variable before exiting the loop. 349 | /// 350 | /// # Generic form 351 | /// 352 | /// The generic for statement works over functions, called iterators. On 353 | /// each iteration, the iterator function is called to produce a new value, 354 | /// stopping when this new value is **nil**. 355 | /// 356 | /// ```lua 357 | /// for namelist in explist do block end 358 | /// ``` 359 | /// 360 | /// ## Note 361 | /// 362 | /// - explist is evaluated only once. Its results are an iterator function, a 363 | /// state, and an initial value for the first iterator variable. 364 | /// - You can use **break** to exit a for loop. 365 | /// - The loop variables var_i are local to the loop; you cannot use their 366 | /// values after the for ends. If you need these values, then assign them to 367 | /// other variables before breaking or exiting the loop. 368 | pub(crate) fn parse_for_statement(&mut self) -> Result { 369 | self.expect_next(Token::For)?; 370 | let name = self.expect_identifier()?; 371 | 372 | match self.peek(0)? { 373 | Some(&Token::Assign) => { 374 | self.expect_next(Token::Assign)?; 375 | let initial = self.parse_expression()?; 376 | self.expect_next(Token::Comma)?; 377 | let limit = self.parse_expression()?; 378 | let step = if self.peek(0)? == Some(&Token::Comma) { 379 | self.expect_next(Token::Comma)?; 380 | Some(self.parse_expression()?) 381 | } else { 382 | None 383 | }; 384 | 385 | self.expect_next(Token::Do)?; 386 | let block = self.parse_block()?; 387 | self.expect_next(Token::End)?; 388 | 389 | Ok(ForStatement::Numeric { 390 | name, 391 | initial, 392 | limit, 393 | step, 394 | block, 395 | }) 396 | } 397 | Some(&Token::Comma) | Some(&Token::In) => { 398 | let mut names = Vec::new(); 399 | names.push(name); 400 | while self.peek(0)? == Some(&Token::Comma) { 401 | self.expect_next(Token::Comma)?; 402 | names.push(self.expect_identifier()?); 403 | } 404 | 405 | self.expect_next(Token::In)?; 406 | let values = self.parse_expression_list()?; 407 | 408 | self.expect_next(Token::Do)?; 409 | let block = self.parse_block()?; 410 | self.expect_next(Token::End)?; 411 | 412 | Ok(ForStatement::Generic { 413 | names, 414 | values, 415 | block, 416 | }) 417 | } 418 | Some(token) => Err(ParserError::Unexpected { 419 | unexpected: format!("{:?}", token), 420 | expected: Some("'=' ',' or 'in'".to_owned()), 421 | }), 422 | None => Err(ParserError::EndOfStream { 423 | expected: Some("'=' ',' or 'in'".to_owned()), 424 | }), 425 | } 426 | } 427 | 428 | /// The function statement syntax sugar simplifies function definition: 429 | /// 430 | /// ```lua 431 | /// statement ::= function funcname funcbody 432 | /// funcname ::= Name ('.' Name)* (':' Name)? 433 | /// ``` 434 | /// 435 | /// For example, the statement 436 | /// 437 | /// ```lua 438 | /// function f() body end 439 | /// ``` 440 | /// 441 | /// is equivalent with 442 | /// 443 | /// ```lua 444 | /// f = function() body end 445 | /// ``` 446 | /// 447 | /// The statement 448 | /// 449 | /// ```lua 450 | /// function t.a.b.c.f () body end 451 | /// ``` 452 | /// 453 | /// is equivalent with 454 | /// 455 | /// ```lua 456 | /// t.a.b.c.f = function () body end 457 | /// ``` 458 | /// 459 | /// The colon syntax is used for defining methods, that is, functions that 460 | /// have an implicit extra parameter self. Thus, the statement 461 | /// 462 | /// ```lua 463 | /// function t.a.b.c:f (params) body end 464 | /// ``` 465 | /// 466 | /// is syntactic sugar for 467 | /// 468 | /// ```lua 469 | /// t.a.b.c.f = function (self, params) body end 470 | /// ``` 471 | pub(crate) fn parse_function_statement(&mut self) -> Result { 472 | self.expect_next(Token::Function)?; 473 | 474 | let name = self.expect_identifier()?; 475 | 476 | let mut fields = Vec::new(); 477 | while self.peek(0)? == Some(&Token::Dot) { 478 | self.expect_next(Token::Dot)?; 479 | fields.push(self.expect_identifier()?); 480 | } 481 | 482 | let method = if self.peek(0)? == Some(&Token::Colon) { 483 | self.expect_next(Token::Colon)?; 484 | Some(self.expect_identifier()?) 485 | } else { 486 | None 487 | }; 488 | 489 | let definition = self.parse_function_body()?; 490 | 491 | Ok(FunctionStatement { 492 | name, 493 | fields, 494 | method, 495 | definition, 496 | }) 497 | } 498 | 499 | /// The local function statement syntax sugar simplifies function 500 | /// definition: 501 | /// 502 | /// ```lua 503 | /// local function Name funcbody 504 | /// ``` 505 | /// 506 | /// For example, the statement 507 | /// 508 | /// ```lua 509 | /// local function f() body end 510 | /// ``` 511 | /// 512 | /// is equivalent with 513 | /// 514 | /// ```lua 515 | /// local f; f = function () body end 516 | /// ``` 517 | /// 518 | /// not 519 | /// 520 | /// ```lua 521 | /// local f = function () body end 522 | /// ``` 523 | /// 524 | /// This only makes a difference when the body of the function contains 525 | /// references to f. 526 | pub(crate) fn parse_local_function_statement( 527 | &mut self, 528 | ) -> Result { 529 | self.expect_next(Token::Local)?; 530 | self.expect_next(Token::Function)?; 531 | 532 | let name = self.expect_identifier()?; 533 | let definition = self.parse_function_body()?; 534 | 535 | Ok(LocalFunctionStatement { name, definition }) 536 | } 537 | 538 | /// Local variables can be declared anywhere inside a block. 539 | /// 540 | /// The declaration can include an initial assignment: 541 | /// 542 | /// ```lua 543 | /// LocalDeclarationStatement ::= local namelist ('=' explist)? 544 | /// ``` 545 | /// 546 | /// If present, an initial assignment has the same semantics of a multiple 547 | /// assignment. Otherwise, all variables are initialized with nil. 548 | pub(crate) fn parse_local_declaration_statement( 549 | &mut self, 550 | ) -> Result { 551 | self.expect_next(Token::Local)?; 552 | 553 | let mut names = Vec::new(); 554 | names.push(self.expect_identifier()?); 555 | while self.peek(0)? == Some(&Token::Comma) { 556 | self.advance(1); 557 | names.push(self.expect_identifier()?); 558 | } 559 | 560 | let values = if self.peek(0)? == Some(&Token::Assign) { 561 | self.advance(1); 562 | self.parse_expression_list()? 563 | } else { 564 | Vec::new() 565 | }; 566 | 567 | Ok(LocalDeclarationStatement { names, values }) 568 | } 569 | } 570 | 571 | #[cfg(test)] 572 | mod tests { 573 | use super::*; 574 | use crate::parser::BinaryOperator; 575 | 576 | macro_rules! id { 577 | ($name:expr) => { 578 | Expression::Identifier($name.to_owned()) 579 | }; 580 | } 581 | 582 | #[test] 583 | fn assignment() { 584 | let mut s: &[u8] = b"a = 10\nb = 20"; 585 | let mut parser = Parser::new(&mut s); 586 | assert_eq!( 587 | parser.parse_statement().unwrap(), 588 | Statement::Assignment(AssignmentStatement { 589 | targets: vec![id!("a")], 590 | values: vec![Expression::Integer(10)] 591 | }) 592 | ); 593 | assert_eq!( 594 | parser.parse_statement().unwrap(), 595 | Statement::Assignment(AssignmentStatement { 596 | targets: vec![id!("b")], 597 | values: vec![Expression::Integer(20)], 598 | }) 599 | ); 600 | 601 | let mut s: &[u8] = b"a,b,c=1,2,3"; 602 | let mut parser = Parser::new(&mut s); 603 | assert_eq!( 604 | parser.parse_statement().unwrap(), 605 | Statement::Assignment(AssignmentStatement { 606 | targets: vec![id!("a"), id!("b"), id!("c")], 607 | values: vec![ 608 | Expression::Integer(1), 609 | Expression::Integer(2), 610 | Expression::Integer(3) 611 | ], 612 | }) 613 | ); 614 | 615 | let mut s: &[u8] = b"f(1) = 10"; 616 | let mut parser = Parser::new(&mut s); 617 | assert_eq!( 618 | parser.parse_statement().unwrap_err(), 619 | ParserError::AssignToExpression 620 | ); 621 | 622 | let mut s: &[u8] = b"a + b = 10"; 623 | let mut parser = Parser::new(&mut s); 624 | assert_eq!( 625 | parser.parse_statement().unwrap_err(), 626 | ParserError::ExpressionNotStatement 627 | ); 628 | } 629 | 630 | #[test] 631 | fn function_call() { 632 | let mut s: &[u8] = br#"f(1, "string")"#; 633 | let mut parser = Parser::new(&mut s); 634 | assert_eq!( 635 | parser.parse_statement().unwrap(), 636 | Statement::FunctionCall(FunctionCallStatement { 637 | call: SuffixedExpression { 638 | primary: Box::new(id!("f")), 639 | suffixes: vec![Suffix::FunctionCall(vec![ 640 | Expression::Integer(1), 641 | Expression::String("string".to_owned()) 642 | ])], 643 | } 644 | }) 645 | ); 646 | } 647 | 648 | #[test] 649 | fn label_goto() { 650 | let mut s: &[u8] = b"::label:: goto label"; 651 | let mut parser = Parser::new(&mut s); 652 | assert_eq!( 653 | parser.parse_statement().unwrap(), 654 | Statement::Label("label".to_owned()) 655 | ); 656 | assert_eq!( 657 | parser.parse_statement().unwrap(), 658 | Statement::Goto("label".to_owned()) 659 | ); 660 | } 661 | 662 | #[test] 663 | fn do_block() { 664 | let mut s: &[u8] = b"do a=10 end"; 665 | let mut parser = Parser::new(&mut s); 666 | assert_eq!( 667 | parser.parse_statement().unwrap(), 668 | Statement::Do(Block { 669 | stmts: vec![Statement::Assignment(AssignmentStatement { 670 | targets: vec![id!("a")], 671 | values: vec![Expression::Integer(10)], 672 | })], 673 | retstmt: None, 674 | }) 675 | ); 676 | } 677 | 678 | #[test] 679 | fn while_loop() { 680 | let mut s: &[u8] = b"while true do a=10 end"; 681 | let mut parser = Parser::new(&mut s); 682 | assert_eq!( 683 | parser.parse_statement().unwrap(), 684 | Statement::While(WhileStatement { 685 | condition: Expression::True, 686 | block: Block { 687 | stmts: vec![Statement::Assignment(AssignmentStatement { 688 | targets: vec![id!("a")], 689 | values: vec![Expression::Integer(10)] 690 | })], 691 | retstmt: None, 692 | } 693 | }) 694 | ) 695 | } 696 | 697 | #[test] 698 | fn repeat_loop() { 699 | let mut s: &[u8] = b"repeat until a~=0"; 700 | let mut parser = Parser::new(&mut s); 701 | assert_eq!( 702 | parser.parse_statement().unwrap(), 703 | Statement::Repeat(RepeatStatement { 704 | block: Block { 705 | stmts: vec![], 706 | retstmt: None, 707 | }, 708 | until: Expression::BinaryOperator( 709 | BinaryOperator::NotEqual, 710 | Box::new(id!("a")), 711 | Box::new(Expression::Integer(0)) 712 | ) 713 | }) 714 | ); 715 | } 716 | 717 | #[test] 718 | fn for_numeric_loop() { 719 | let mut s: &[u8] = b"for i=0,10,1 do a=10 end"; 720 | let mut parser = Parser::new(&mut s); 721 | assert_eq!( 722 | parser.parse_statement().unwrap(), 723 | Statement::For(ForStatement::Numeric { 724 | name: "i".to_owned(), 725 | initial: Expression::Integer(0), 726 | limit: Expression::Integer(10), 727 | step: Some(Expression::Integer(1)), 728 | block: Block { 729 | stmts: vec![Statement::Assignment(AssignmentStatement { 730 | targets: vec![id!("a")], 731 | values: vec![Expression::Integer(10)], 732 | })], 733 | retstmt: None, 734 | } 735 | }) 736 | ); 737 | } 738 | 739 | #[test] 740 | fn for_generic_loop() { 741 | let mut s: &[u8] = br#" 742 | function test_iterator() 743 | local function inc(s, c) 744 | if c == 10 then 745 | return nil 746 | else 747 | return c + 1, c + 11 748 | end 749 | end 750 | 751 | return inc, nil, 0 752 | end 753 | 754 | local sum = 0 755 | for i in test_iterator() do 756 | sum = sum + i 757 | end 758 | "#; 759 | let mut parser = Parser::new(&mut s); 760 | assert_eq!( 761 | parser.parse_statement().unwrap(), 762 | Statement::Function(FunctionStatement { 763 | name: "test_iterator".to_owned(), 764 | fields: vec![], 765 | method: None, 766 | definition: FunctionDefinition { 767 | parameters: vec![], 768 | has_varargs: false, 769 | body: Block { 770 | stmts: vec![Statement::LocalFunction(LocalFunctionStatement { 771 | name: "inc".to_owned(), 772 | definition: FunctionDefinition { 773 | parameters: vec!["s".to_owned(), "c".to_owned()], 774 | has_varargs: false, 775 | body: Block { 776 | stmts: vec![Statement::If(IfStatement { 777 | if_then: vec![ 778 | ( 779 | Expression::BinaryOperator( 780 | BinaryOperator::Equal, 781 | Box::new(id!("c")), 782 | Box::new(Expression::Integer(10)) 783 | ), 784 | Block { 785 | stmts: vec![], 786 | retstmt: Some(ReturnStatement { 787 | returns: vec![Expression::Nil] 788 | }) 789 | } 790 | ), 791 | ( 792 | Expression::True, 793 | Block { 794 | stmts: vec![], 795 | retstmt: Some(ReturnStatement { 796 | returns: vec![ 797 | Expression::BinaryOperator( 798 | BinaryOperator::Add, 799 | Box::new(id!("c")), 800 | Box::new(Expression::Integer(1)) 801 | ), 802 | Expression::BinaryOperator( 803 | BinaryOperator::Add, 804 | Box::new(id!("c")), 805 | Box::new(Expression::Integer(11)) 806 | ) 807 | ] 808 | }) 809 | } 810 | ) 811 | ] 812 | })], 813 | retstmt: None, 814 | } 815 | } 816 | })], 817 | retstmt: Some(ReturnStatement { 818 | returns: vec![id!("inc"), Expression::Nil, Expression::Integer(0)] 819 | }) 820 | } 821 | } 822 | }) 823 | ); 824 | 825 | assert_eq!( 826 | parser.parse_statement().unwrap(), 827 | Statement::LocalDeclaration(LocalDeclarationStatement { 828 | names: vec!["sum".to_owned()], 829 | values: vec![Expression::Integer(0)] 830 | }) 831 | ); 832 | 833 | assert_eq!( 834 | parser.parse_statement().unwrap(), 835 | Statement::For(ForStatement::Generic { 836 | names: vec!["i".to_owned()], 837 | values: vec![Expression::Suffixed(SuffixedExpression { 838 | primary: Box::new(id!("test_iterator")), 839 | suffixes: vec![Suffix::FunctionCall(vec![])] 840 | })], 841 | block: Block { 842 | stmts: vec![Statement::Assignment(AssignmentStatement { 843 | targets: vec![id!("sum")], 844 | values: vec![Expression::BinaryOperator( 845 | BinaryOperator::Add, 846 | Box::new(id!("sum")), 847 | Box::new(id!("i")) 848 | )], 849 | })], 850 | retstmt: None, 851 | } 852 | }) 853 | ); 854 | } 855 | 856 | #[test] 857 | fn if_stmt() { 858 | let mut s: &[u8] = b"if true then else end"; 859 | let mut parser = Parser::new(&mut s); 860 | assert_eq!( 861 | parser.parse_statement().unwrap(), 862 | Statement::If(IfStatement { 863 | if_then: vec![ 864 | ( 865 | Expression::True, 866 | Block { 867 | stmts: vec![], 868 | retstmt: None 869 | } 870 | ), 871 | ( 872 | Expression::True, 873 | Block { 874 | stmts: vec![], 875 | retstmt: None 876 | } 877 | ) 878 | ], 879 | }) 880 | ); 881 | 882 | let mut s: &[u8] = b"if false then elseif false then end"; 883 | let mut parser = Parser::new(&mut s); 884 | assert_eq!( 885 | parser.parse_statement().unwrap(), 886 | Statement::If(IfStatement { 887 | if_then: vec![ 888 | ( 889 | Expression::False, 890 | Block { 891 | stmts: vec![], 892 | retstmt: None 893 | } 894 | ), 895 | ( 896 | Expression::False, 897 | Block { 898 | stmts: vec![], 899 | retstmt: None 900 | } 901 | ) 902 | ] 903 | }) 904 | ); 905 | 906 | let mut s: &[u8] = b"if true then elseif false then else end"; 907 | let mut parser = Parser::new(&mut s); 908 | assert_eq!( 909 | parser.parse_statement().unwrap(), 910 | Statement::If(IfStatement { 911 | if_then: vec![ 912 | ( 913 | Expression::True, 914 | Block { 915 | stmts: vec![], 916 | retstmt: None, 917 | } 918 | ), 919 | ( 920 | Expression::False, 921 | Block { 922 | stmts: vec![], 923 | retstmt: None 924 | } 925 | ), 926 | ( 927 | Expression::True, 928 | Block { 929 | stmts: vec![], 930 | retstmt: None, 931 | } 932 | ) 933 | ] 934 | }) 935 | ); 936 | } 937 | 938 | #[test] 939 | fn function() { 940 | let mut s: &[u8] = br#"function one() return 1 end 941 | function add1(x) return x+1 end 942 | function t.a.b.c:f() end"#; 943 | let mut parser = Parser::new(&mut s); 944 | assert_eq!( 945 | parser.parse_statement().unwrap(), 946 | Statement::Function(FunctionStatement { 947 | name: "one".to_owned(), 948 | fields: vec![], 949 | method: None, 950 | definition: FunctionDefinition { 951 | parameters: vec![], 952 | has_varargs: false, 953 | body: Block { 954 | stmts: vec![], 955 | retstmt: Some(ReturnStatement { 956 | returns: vec![Expression::Integer(1)] 957 | }) 958 | } 959 | } 960 | }) 961 | ); 962 | assert_eq!( 963 | parser.parse_statement().unwrap(), 964 | Statement::Function(FunctionStatement { 965 | name: "add1".to_owned(), 966 | fields: vec![], 967 | method: None, 968 | definition: FunctionDefinition { 969 | parameters: vec!["x".to_owned()], 970 | has_varargs: false, 971 | body: Block { 972 | stmts: vec![], 973 | retstmt: Some(ReturnStatement { 974 | returns: vec![Expression::BinaryOperator( 975 | BinaryOperator::Add, 976 | Box::new(id!("x")), 977 | Box::new(Expression::Integer(1)) 978 | )] 979 | }) 980 | } 981 | } 982 | }) 983 | ); 984 | assert_eq!( 985 | parser.parse_statement().unwrap(), 986 | Statement::Function(FunctionStatement { 987 | name: "t".to_owned(), 988 | fields: vec!["a".to_owned(), "b".to_owned(), "c".to_owned()], 989 | method: Some("f".to_owned()), 990 | definition: FunctionDefinition { 991 | parameters: vec![], 992 | has_varargs: false, 993 | body: Block { 994 | stmts: vec![], 995 | retstmt: None, 996 | } 997 | } 998 | }) 999 | ); 1000 | } 1001 | 1002 | #[test] 1003 | fn local_function() { 1004 | let mut s: &[u8] = b"local function one() return 1 end"; 1005 | let mut parser = Parser::new(&mut s); 1006 | assert_eq!( 1007 | parser.parse_statement().unwrap(), 1008 | Statement::LocalFunction(LocalFunctionStatement { 1009 | name: "one".to_owned(), 1010 | definition: FunctionDefinition { 1011 | parameters: vec![], 1012 | has_varargs: false, 1013 | body: Block { 1014 | stmts: vec![], 1015 | retstmt: Some(ReturnStatement { 1016 | returns: vec![Expression::Integer(1)] 1017 | }) 1018 | } 1019 | } 1020 | }) 1021 | ); 1022 | } 1023 | 1024 | #[test] 1025 | fn local_declaration() { 1026 | let mut s: &[u8] = b"local a, b, c = 10, 20"; 1027 | let mut parser = Parser::new(&mut s); 1028 | assert_eq!( 1029 | parser.parse_statement().unwrap(), 1030 | Statement::LocalDeclaration(LocalDeclarationStatement { 1031 | names: vec!["a".to_owned(), "b".to_owned(), "c".to_owned()], 1032 | values: vec![Expression::Integer(10), Expression::Integer(20)] 1033 | }) 1034 | ); 1035 | } 1036 | } 1037 | --------------------------------------------------------------------------------