├── .gitignore ├── screenshot_example.jpg ├── Cargo.toml ├── ci ├── Dockerfile ├── tester │ ├── Cargo.toml │ └── main.rs └── test_projects.sh ├── src ├── lib.rs ├── tests │ ├── file.rs │ ├── mod.rs │ ├── expr.rs │ └── stmt.rs ├── interner.rs ├── ast.rs ├── tokens.rs ├── printer.rs └── tokenizer.rs ├── .travis.yml └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | ci/framework -------------------------------------------------------------------------------- /screenshot_example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/steffengy/pico-php-parser/HEAD/screenshot_example.jpg -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pico-php-parser" 3 | version = "0.1.0" 4 | authors = ["Steffen "] 5 | 6 | [dependencies] 7 | fnv = "1.0.5" 8 | -------------------------------------------------------------------------------- /ci/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu 2 | 3 | RUN apt-get update && \ 4 | apt-get install -y git wget php7.0-cli php7.0-mbstring php7.0-dom php7.0-sqlite3 php7.0-intl && \ 5 | wget -O - https://getcomposer.org/installer | php -- --install-dir=/usr/local/bin --filename=composer 6 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![recursion_limit="420"] 2 | extern crate fnv; 3 | 4 | mod interner; 5 | 6 | mod tokens; 7 | 8 | #[allow(dead_code)] //TODO: remove some day 9 | pub mod ast; 10 | 11 | mod tokenizer; 12 | mod parser; 13 | pub use parser::*; 14 | 15 | mod printer; 16 | pub use printer::PrettyPrinter; 17 | 18 | #[cfg(test)] 19 | mod tests; 20 | -------------------------------------------------------------------------------- /ci/tester/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pico-php-tester" 3 | version = "0.1.0" 4 | authors = ["Steffen "] 5 | license = "MIT" 6 | 7 | [[bin]] 8 | name = "pico-php-tester" 9 | path = "main.rs" 10 | 11 | [dependencies] 12 | pico-php-parser={path="../../"} 13 | glob="0.2" 14 | wait-timeout = "0.1" 15 | clap="2.10.0" 16 | 17 | [profile.release] 18 | debug = true -------------------------------------------------------------------------------- /src/tests/file.rs: -------------------------------------------------------------------------------- 1 | use parser::*; 2 | 3 | fn process_script(input: &str) -> Vec { 4 | Parser::parse_str(&input).unwrap() 5 | } 6 | 7 | #[test] 8 | fn parse_simple_file_echos() { 9 | assert_eq!(process_script(r#"beforeafter1end"#), vec![ 10 | rsnb!(0,6, Stmt_::Echo(vec![ rnb!(0,6, Expr_::String("before".into())) ])), 11 | rsnb!(12,24, Stmt_::Echo(vec![ rnb!(17,23, Expr_::String("test".into())) ])), 12 | rsnb!(27,33, Stmt_::Echo(vec![ rnb!(27,33, Expr_::String("after1".into())) ])), 13 | rsnb!(39,50, Stmt_::Echo(vec![ rnb!(44,49, Expr_::String("end".into())) ])), 14 | rsnb!(53,56, Stmt_::Echo(vec![ rnb!(53,56, Expr_::String("end".into())) ])), 15 | ]); 16 | } 17 | 18 | // TEST invalid cases TODO: like 26 | if [ -n "$TEST_PROJECT" ]; then 27 | docker build -t pico_builder ci 28 | fi 29 | 30 | script: 31 | - | 32 | if [ -n "$TEST_PROJECT" ]; then 33 | pushd ./ci/tester 34 | cargo build 35 | popd 36 | docker run -e "TEST_PROJECT=$TEST_PROJECT" -v $HOME/.composer:/root/.composer -v $(readlink -f ./ci):/ci pico_builder /ci/test_projects.sh 37 | else 38 | cargo build --verbose 39 | cargo test --verbose 40 | fi 41 | -------------------------------------------------------------------------------- /ci/test_projects.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # setup a new user (laravel tests fail as root, since file-permissions do not work, 5 | # since root is allowed to do anything) 6 | useradd -ms /bin/bash non_root 7 | 8 | # Laravel tests 9 | if [ "$TEST_PROJECT" == "laravel" ]; then 10 | su -l non_root < {Box::new(enb!($s, $end, $e))}; 4 | } 5 | 6 | /// expression (not boxed) 7 | macro_rules! enb { 8 | // 6 is the sizeof " {rnb!($s+6, $end+6, $e)}; 10 | } 11 | 12 | /// statement expression (not boxed) 13 | macro_rules! st { 14 | ($s:expr, $end:expr, $st:expr) => {rsnb!($s+6, $end+6, $st)}; 15 | } 16 | 17 | /// statement expression (wrapping a not-boxed expr) 18 | macro_rules! senb { 19 | ($s:expr, $end:expr, $e:expr) => {st!($s, $end+1, Stmt_::Expr(enb!($s, $end, $e)))} 20 | } 21 | 22 | /// raw spanned expression (not boxed) 23 | macro_rules! rnb { 24 | ($s:expr, $end:expr, $e:expr) => {Expr($e, Span { start:($s) as u32, end:($end) as u32, ..Span::new() })}; 25 | } 26 | 27 | /// raw spanned statement (not boxed) 28 | macro_rules! rsnb { 29 | ($s:expr, $end:expr, $st:expr) => {Stmt($st, Span { start: $s, end: $end, ..Span::new()})}; 30 | } 31 | 32 | macro_rules! constant { 33 | (true) => {Expr_::Path(Path::identifier(false, "true".into()))}; 34 | (false) => {Expr_::Path(Path::identifier(false, "false".into()))}; 35 | (null) => {Expr_::Path(Path::identifier(false, "null".into()))}; 36 | } 37 | 38 | mod expr; 39 | mod file; 40 | mod stmt; 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PicoPHPParser [![Build Status](https://travis-ci.org/steffengy/pico-php-parser.svg?branch=master)](https://travis-ci.org/steffengy/pico-php-parser) 2 | 3 | PicoPHPParser is a *lightweight* PHP 7.1 parser, which is (hand-)written in rust. 4 | It already supports the *most common constructs* of the PHP-language. 5 | 6 | A possible usage for example might be to provide *better error messages* than the original PHP-parser. 7 | A very early stage of this is shown below: 8 | ![alt text](/screenshot_example.jpg "") 9 | 10 | # Testing it 11 | 12 | To ensure that the most common constructs of the PHP-language work, we have **2 testing stages**: 13 | 14 | 1. **AST tests**: 15 | 16 | > These kind of tests ensure that the information in the AST is correct 17 | > and also check the correctness of **meta-information** such as **line numbers**. 18 | > There are quite a few of these tests, but they are not as easy to write. 19 | 20 | > These types of tests are run during a simple `cargo test` 21 | which is the typical test-command for the **rust ecosystem**. 22 | 2. **Project tests**: 23 | 24 | > This stage ensures **most common [PHP-projects](#current-test-targets)** parse successfully: 25 | > 1. We ensure the tests pass at a clean state of the project. 26 | > 2. The parser & pretty printer parse the files & rewrite them 27 | > 3. The tests are run a second time to ensure they still work properly. 28 | 29 | ## Current Test Targets 30 | - [Laravel (Framework)](https://github.com/laravel/framework) 31 | - [Slim](https://github.com/slimphp/Slim) 32 | -------------------------------------------------------------------------------- /src/interner.rs: -------------------------------------------------------------------------------- 1 | /// ! a very simple string interner 2 | use std::borrow::Borrow; 3 | use std::collections::HashSet; 4 | use std::rc::Rc; 5 | use std::hash::BuildHasherDefault; 6 | use std::mem; 7 | use fnv::FnvHasher; 8 | 9 | #[cfg(test)] 10 | use std::convert::From; 11 | 12 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 13 | pub struct RcStr(Rc); 14 | 15 | impl Borrow for RcStr { 16 | fn borrow(&self) -> &str { 17 | &self.0 18 | } 19 | } 20 | 21 | /// force compiler complaints when this is being done in production code 22 | /// this is only for test code to allow `assert_eq`s 23 | #[cfg(test)] 24 | impl> From for RcStr { 25 | fn from(t: T) -> RcStr { 26 | RcStr(Rc::new(t.into())) 27 | } 28 | } 29 | 30 | pub struct Interner { 31 | strs: HashSet>, 32 | } 33 | 34 | impl Interner { 35 | pub fn new() -> Interner { 36 | Interner { strs: HashSet::default() } 37 | } 38 | 39 | /// checks if a string is already cached in an owned form, reeuse it or 40 | /// clone it if necessary 41 | /// returns the ref counted reference to the string 42 | pub fn intern(&mut self, s: &str) -> RcStr { 43 | if let Some(str_) = self.strs.get(s).cloned() { 44 | return str_; 45 | } 46 | let str_ = RcStr(Rc::new(s.to_owned())); 47 | self.strs.insert(str_.clone()); 48 | str_ 49 | } 50 | 51 | /// get a new interner instance only containing interned strings 52 | /// which are also referenced externally 53 | pub fn compact(self) -> Interner { 54 | let mut new = Interner::new(); 55 | for key in self.strs { 56 | let weak_ref = Rc::downgrade(&key.0); 57 | mem::drop(key); 58 | if let Some(x) = weak_ref.upgrade() { 59 | let key = RcStr(x); 60 | new.strs.insert(key); 61 | } 62 | } 63 | new 64 | } 65 | } 66 | 67 | #[cfg(test)] 68 | mod tests { 69 | use super::*; 70 | use std::rc::Rc; 71 | 72 | #[test] 73 | fn test_interner() { 74 | let mut str2 = { 75 | let mut interner = Interner::new(); 76 | let mut str1 = interner.intern("test"); 77 | assert_eq!(Rc::get_mut(&mut str1.0), None); 78 | interner.intern("test") 79 | }; 80 | assert_eq!(Rc::get_mut(&mut str2.0), Some(&mut "test".to_owned())); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /ci/tester/main.rs: -------------------------------------------------------------------------------- 1 | //! This basically parses given files and pretty-prints them back 2 | //! Then we can compare the test results before & after, if they aren't identical, it's likely a parser bug 3 | //! 4 | //! most important usages: 5 | //! pico-php-tester parse : Parse&Print the files given in the 6 | //! pico-php-tester parse : Parse&Print the files given in the and test using (exit code = 0) 7 | //! pico-php-tester parse -b : Parse&Print the files given in the and try to locate the origin of the first error 8 | //! using binary search techniques 9 | //! pico-php-tester parse-file : Parse&Print a given file 10 | //! 11 | extern crate pico_php_parser; 12 | extern crate glob; 13 | extern crate wait_timeout; 14 | extern crate clap; 15 | 16 | use std::collections::{HashSet, HashMap}; 17 | use std::env; 18 | use std::fs::File; 19 | use std::io::{Read, Write}; 20 | use std::fmt::Write as WriteFmt; 21 | use std::path::{self, Path}; 22 | use std::process::Command; 23 | use glob::glob; 24 | use wait_timeout::ChildExt; 25 | use clap::{Arg, App, SubCommand}; 26 | use pico_php_parser::{Parser, PrettyPrinter}; 27 | 28 | fn main() { 29 | let matches = App::new("PicoPhpParser-Tester") 30 | .version("0.1") 31 | .arg(Arg::with_name("binary-search") 32 | .short("b") 33 | ) 34 | .subcommand(SubCommand::with_name("parse") 35 | .about("parses files in a directory") 36 | .arg(Arg::with_name("DIR").index(1).required(true)) 37 | .arg(Arg::with_name("CHECK").required(false)) 38 | ) 39 | .subcommand(SubCommand::with_name("parse-file") 40 | .about("parses a file") 41 | .arg(Arg::with_name("FILE").index(1).required(true)) 42 | .arg(Arg::with_name("CHECK").required(false)) 43 | ) 44 | .get_matches(); 45 | 46 | // perform the checks with binary search / divide and conquer 47 | let do_binary_search = matches.occurrences_of("binary-search") > 0; 48 | 49 | if let Some(matches) = matches.subcommand_matches("parse") { 50 | let dir = matches.value_of("DIR").unwrap(); 51 | println!("Parsing: "); 52 | let blacklist = [ 53 | Path::new(dir).join("vendor"), 54 | Path::new(dir).join("tests") 55 | ]; 56 | let path = Path::new(dir).join("**/*.php"); 57 | let mut done_files = HashSet::new(); 58 | let mut i = 0; 59 | 60 | let files: Vec = glob(&path.to_string_lossy()).unwrap().filter_map(Result::ok).filter(|entry| { 61 | // skip vendor stuff, since we can not guarantee to test it 62 | // skip tests stuff, to ensure we actually pass all tests (and do not only pass wrongly generated tests) 63 | for bpath in &blacklist { 64 | if entry.as_path().starts_with(bpath) { 65 | return false; 66 | } 67 | } 68 | return true; 69 | }).map(|entry| format!("{}", entry.display())).collect(); 70 | let has_check = matches.value_of("CHECK").is_some(); 71 | 72 | let run_parse_file = |entry: &str, no_check:bool| { 73 | let mut cmd = Command::new(env::current_exe().unwrap()); 74 | cmd 75 | .arg("parse-file") 76 | .arg(entry); 77 | if let Some(check_arg) = matches.value_of("CHECK") { 78 | if !no_check { 79 | cmd.arg(check_arg); 80 | } 81 | } 82 | let mut child = cmd.spawn().unwrap(); 83 | match child.wait_timeout_ms(60000).unwrap() { 84 | Some(status) if status.success() => { 85 | println!("SUCCESS"); 86 | return true; 87 | }, 88 | Some(_) => println!("ERROR"), 89 | _ => { 90 | println!("TIMEOUT"); 91 | child.kill().unwrap(); 92 | } 93 | } 94 | return false; 95 | }; 96 | 97 | // if binary search is specified: 98 | // - Read all files to memory (else we need to rely on git revert, which is more difficult to implement) 99 | // - check the first half 100 | // - If all tests pass, revert, check the second half 101 | // - If not, check the first half of the first half, and continue 102 | // the general goal is to find the specific file which is malformed due to a parser or printer bug 103 | if do_binary_search { 104 | assert!(has_check); 105 | let mut src_map = HashMap::new(); 106 | for file in &files { 107 | let mut str_ = String::new(); 108 | File::open(file).unwrap().read_to_string(&mut str_).unwrap(); 109 | src_map.insert(file, str_); 110 | } 111 | let mut low = 0; 112 | let mut high = files.len(); 113 | while high - low > 1 { 114 | let mid = (low + high) / 2; 115 | // parse the files from low -> mid 116 | let mut passes = true; 117 | println!("checking from {} to {}", low, mid); 118 | for (i, entry) in files[low..mid].iter().enumerate() { 119 | // perform the check for the last item 120 | let no_check = i != mid - low - 1; 121 | if !run_parse_file(entry, no_check) { 122 | passes = false; 123 | break; 124 | } 125 | } 126 | println!("reverting..."); 127 | // revert the changes 128 | for entry in &files[low..mid] { 129 | write!(File::create(entry).unwrap(), "{}", src_map[entry]).unwrap(); 130 | } 131 | if passes { 132 | println!("range passed."); 133 | low = mid; 134 | } else { 135 | println!("range failed."); 136 | high = mid; 137 | } 138 | } 139 | println!("bad: {} resolves to {}", low, files[low]); 140 | println!("good: {} resolves to {}", high, files[high]); 141 | } else { 142 | // normal parsing 143 | for entry in &files { 144 | i += 1; 145 | if run_parse_file(entry, false) { 146 | done_files.insert(entry.to_owned()); 147 | } 148 | } 149 | let success = done_files.len(); 150 | if success != i { 151 | panic!("Parsing stage failed!"); 152 | } 153 | println!("all_file_count: {}, handled: {} ({:.2}%), left: {}", i, success, (success as f64)/(i as f64)*100f64, i-success); 154 | } 155 | return; 156 | } 157 | 158 | if let Some(matches) = matches.subcommand_matches("parse-file") { 159 | let file = matches.value_of("FILE").unwrap(); 160 | parse_file(file, matches.value_of("CHECK")); 161 | return; 162 | } 163 | 164 | panic!("invalid arguments!") 165 | } 166 | 167 | fn parse_file(arg: &str, check_cmd: Option<&str>) { 168 | println!("parsing {}", arg); 169 | let mut s = String::new(); 170 | let ast = { 171 | let mut f = File::open(arg).unwrap(); 172 | f.read_to_string(&mut s).unwrap(); 173 | match Parser::parse_str(&s) { 174 | Ok(ast) => ast, 175 | Err(err) => { 176 | panic!("ERROR: {}", err.error_message(Some(&s))); 177 | } 178 | } 179 | }; 180 | { 181 | let mut f = File::create(arg).unwrap(); 182 | let mut str_ = String::new(); 183 | PrettyPrinter::print_statements(&mut str_, ast).unwrap(); 184 | write!(f, "", str_).unwrap(); 185 | } 186 | if let Some(cmd) = check_cmd { 187 | let mut child = Command::new(cmd).spawn().unwrap(); 188 | match child.wait_timeout_ms(60000).unwrap() { 189 | Some(status) if status.success() => { 190 | println!("check ok"); 191 | }, 192 | Some(_) => panic!("check error"), 193 | _ => { 194 | child.kill().unwrap(); 195 | panic!("check timeout"); 196 | } 197 | } 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /src/ast.rs: -------------------------------------------------------------------------------- 1 | use std::rc::Rc; 2 | use tokenizer::Span; 3 | use interner::RcStr; 4 | 5 | #[derive(Clone, Debug, PartialEq)] 6 | pub enum ParsedItem { 7 | Text(RcStr), 8 | CodeBlock(Vec), 9 | } 10 | 11 | pub type UseAlias = Option; 12 | 13 | #[derive(Clone, Debug, PartialEq)] 14 | pub enum UseClause { 15 | QualifiedName(Path, UseAlias), 16 | } 17 | 18 | #[derive(Clone, Debug, PartialEq)] 19 | pub struct Path { 20 | pub is_absolute: bool, 21 | pub namespace: Option, 22 | /// mostly something like the trait or class name 23 | pub identifier: RcStr, 24 | } 25 | 26 | impl Path { 27 | pub fn identifier(absolute: bool, name: RcStr) -> Path { 28 | Path { 29 | namespace: None, 30 | identifier: name, 31 | is_absolute: absolute, 32 | } 33 | } 34 | 35 | pub fn ns_identifier(absolute: bool, namespace: RcStr, name: RcStr) -> Path { 36 | Path { 37 | namespace: Some(namespace), 38 | identifier: name, 39 | is_absolute: absolute, 40 | } 41 | } 42 | } 43 | 44 | /// binary operators 45 | #[derive(Clone, Debug, PartialEq)] 46 | pub enum Op { 47 | Concat, 48 | // arith 49 | Add, 50 | Sub, 51 | Mul, 52 | Div, 53 | Pow, 54 | Mod, 55 | // logical 56 | Or, 57 | And, 58 | // equality 59 | Identical, 60 | NotIdentical, 61 | Eq, 62 | Neq, 63 | // relational 64 | Lt, 65 | Gt, 66 | Le, 67 | Ge, 68 | // bitwise 69 | BitwiseAnd, 70 | BitwiseInclOr, 71 | /// XOR 72 | BitwiseExclOr, 73 | /// spaceship operator, <=> 74 | Spaceship, 75 | Sl, 76 | Sr, 77 | } 78 | 79 | #[derive(Clone, Debug, PartialEq)] 80 | pub enum UnaryOp { 81 | Positive, 82 | Negative, 83 | Not, 84 | PreInc, 85 | PreDec, 86 | PostInc, 87 | PostDec, 88 | BitwiseNot, 89 | /// "@"" http://php.net/manual/en/language.operators.errorcontrol.php 90 | /// any error messages that might be generated by that expression will be ignored. 91 | SilenceErrors, 92 | } 93 | 94 | #[derive(Copy, Clone, Debug, PartialEq)] 95 | pub enum Visibility { 96 | None, 97 | Public, 98 | Private, 99 | Protected, 100 | } 101 | 102 | #[derive(Copy, Clone, Debug, PartialEq)] 103 | pub enum ClassModifier { 104 | Abstract = 1<<0, 105 | Final = 1<<1, 106 | } 107 | 108 | #[derive(Copy, Clone, Debug, PartialEq)] 109 | pub struct ClassModifiers(u8); 110 | 111 | impl ClassModifiers { 112 | pub fn none() -> ClassModifiers { 113 | ClassModifiers(0) 114 | } 115 | 116 | pub fn new(cms: &[ClassModifier]) -> ClassModifiers { 117 | let mut flag = 0; 118 | for modifier in cms { 119 | flag |= *modifier as u8; 120 | } 121 | ClassModifiers(flag) 122 | } 123 | 124 | #[inline] 125 | pub fn has(&self, m: ClassModifier) -> bool { 126 | self.0 & (m as u8) != 0 127 | } 128 | } 129 | 130 | /// the boolean indicates whether the underlying item is static or not 131 | /// TODO: error validation of duplicate and invalid states in ::new 132 | #[derive(Copy, Clone, Debug, PartialEq)] 133 | pub enum MemberModifier { 134 | Public = 1<<0, 135 | Protected = 1<<1, 136 | Private = 1<<2, 137 | Static = 1<<3, 138 | Abstract = 1<<4, 139 | Final = 1<<5, 140 | } 141 | #[derive(Copy, Clone, Debug, PartialEq)] 142 | pub struct MemberModifiers(u8); 143 | 144 | impl MemberModifiers { 145 | pub fn none() -> MemberModifiers { 146 | MemberModifiers(0) 147 | } 148 | 149 | pub fn new(ms: &[MemberModifier]) -> MemberModifiers { 150 | let mut flag = 0; 151 | for modifier in ms { 152 | flag |= *modifier as u8; 153 | } 154 | MemberModifiers(flag) 155 | } 156 | 157 | pub fn has(&self, m: MemberModifier) -> bool { 158 | self.0 & (m as u8) != 0 159 | } 160 | } 161 | 162 | #[derive(Clone, Debug, PartialEq)] 163 | pub struct Expr(pub Expr_, pub Span); 164 | 165 | #[derive(Clone, Debug, PartialEq)] 166 | pub struct Stmt(pub Stmt_, pub Span); 167 | 168 | #[derive(Clone, Debug, PartialEq)] 169 | pub struct Block(pub Vec); 170 | 171 | impl Block { 172 | pub fn empty() -> Block { 173 | Block(vec![]) 174 | } 175 | 176 | pub fn is_empty(&self) -> bool { 177 | self.0.is_empty() 178 | } 179 | } 180 | 181 | #[derive(Clone, Debug, PartialEq)] 182 | pub enum Variable { 183 | Name(RcStr), 184 | /// something like $$test, where another expression contains the name of the variable to be fetched 185 | Fetch(Box), 186 | } 187 | 188 | impl> From for Variable { 189 | fn from(t: T) -> Variable { 190 | Variable::Name(t.into()) 191 | } 192 | } 193 | 194 | #[derive(Clone, Debug, PartialEq)] 195 | pub enum Expr_ { 196 | /// indicates the path to e.g. a namespace or is a simple identifier (e.g. a runtime-constant) 197 | /// or a builtin (internal) constant like true, false, null or any magic-constant 198 | Path(Path), 199 | String(RcStr), 200 | BinaryString(Rc>), 201 | Int(i64), 202 | Double(f64), 203 | Array(Vec<(Option, Expr)>), 204 | Variable(Variable), 205 | Reference(Box), 206 | Clone(Box), 207 | Isset(Vec), 208 | Empty(Box), 209 | Exit(Option>), 210 | 211 | Include(IncludeTy, Box), 212 | ArrayIdx(Box, Vec>), 213 | ObjMember(Box, Vec), 214 | StaticMember(Box, Vec), 215 | Call(Box, Vec), 216 | New(Box, Vec), 217 | /// variadic unpack ... 218 | Unpack(Box), 219 | UnaryOp(UnaryOp, Box), 220 | BinaryOp(Op, Box, Box), 221 | InstanceOf(Box, Box), 222 | Cast(Ty, Box), 223 | Yield(Option>), 224 | /// an anonymous function 225 | Function(FunctionDecl), 226 | 227 | // statements 228 | Assign(Box, Box), 229 | /// compound (binary) assign e.g. $test += 3; which is equal to $test = $test + 3; (Assign, BinaryOp) 230 | CompoundAssign(Box, Op, Box), 231 | AssignRef(Box, Box), 232 | List(Vec<(Option, Expr)>), 233 | 234 | /// same as if, just will pass the return-value of either expression to the parent 235 | /// if .1 (then) is None, the value of .0 (condition) will be used 236 | /// this can be desugared into an `If` during post-processing 237 | TernaryIf(Box, Option>, Box), 238 | } 239 | 240 | #[derive(Clone, Debug, PartialEq)] 241 | pub enum Stmt_ { 242 | /// an empty statement such as simply ";" 243 | None, 244 | Block(Block), 245 | Decl(Decl), 246 | Use(Vec), 247 | /// An expression which is terminated by a semicolon 248 | Expr(Expr), 249 | Echo(Vec), 250 | Return(Option>), 251 | Break(Option>), 252 | Continue(Option>), 253 | Unset(Vec), 254 | 255 | /// If (condition=.0) { Block=.1 } else Else_Expr=.2 256 | If(Box, Block, Block), 257 | While(Box, Block), 258 | DoWhile(Block, Box), 259 | /// For(initializer=.0; cond=.1; end_of_loop=.2) statement=.3 260 | For(Vec, Vec, Vec, Block), 261 | ForEach(Box, Option>, Box, Block), 262 | /// Try(TryBlock, CatchClauses, FinallyClause) 263 | Try(Block, Vec, Option), 264 | Throw(Box), 265 | 266 | /// switch (stmt=.0) [case item: body]+=.1 267 | /// All item-cases for a body will be included in the first-member Vec 268 | /// so basically we have a mapping from all-cases -> body in .1 269 | /// TODO: should be desugared into an if-statement 270 | Switch(Box, Vec), 271 | 272 | Goto(RcStr), 273 | } 274 | 275 | #[derive(Clone, Debug, PartialEq)] 276 | pub enum Ty { 277 | Array, 278 | Callable, 279 | Bool, 280 | Float, 281 | Int, 282 | Double, 283 | String, 284 | Object(Option), 285 | } 286 | 287 | /// A type and flag describing whether it's nullable 288 | #[derive(Clone, Debug, PartialEq)] 289 | pub enum NullableTy { 290 | NonNullable(Ty), 291 | Nullable(Ty), 292 | } 293 | 294 | #[derive(Clone, Debug, PartialEq)] 295 | pub enum IncludeTy { 296 | Include, 297 | IncludeOnce, 298 | Require, 299 | RequireOnce, 300 | } 301 | 302 | #[derive(Clone, Debug, PartialEq)] 303 | pub enum TraitUse { 304 | InsteadOf(Path, RcStr, Vec), 305 | As(Option, RcStr, MemberModifiers, Option), 306 | } 307 | 308 | #[derive(Clone, Debug, PartialEq)] 309 | pub struct ParamDefinition { 310 | pub name: RcStr, 311 | pub as_ref: bool, 312 | pub variadic: bool, 313 | /// The type of the parameter 314 | pub ty: Option, 315 | /// The default value for the parameter 316 | pub default: Option, 317 | } 318 | 319 | #[derive(Clone, Debug, PartialEq)] 320 | pub struct FunctionDecl { 321 | pub params: Vec, 322 | pub body: Option, 323 | /// A list of variables to pass from the parent scope to the scope of this function 324 | /// So variables which are basically available shared into this function's scope 325 | /// the boolean indicates whether to bind by-reference (true) 326 | pub usev: Vec<(bool, RcStr)>, 327 | pub ret_ref: bool, 328 | pub ret_ty: Option, 329 | } 330 | 331 | #[derive(Clone, Debug, PartialEq)] 332 | pub struct ClassDecl { 333 | pub cmod: ClassModifiers, 334 | pub name: RcStr, 335 | pub base_class: Option, 336 | /// The implemented interfaces of this class 337 | pub implements: Vec, 338 | pub members: Vec, 339 | } 340 | 341 | #[derive(Clone, Debug, PartialEq)] 342 | pub enum Member { 343 | Constant(MemberModifiers, RcStr, Expr), 344 | Property(MemberModifiers, RcStr, Option), 345 | Method(MemberModifiers, RcStr, FunctionDecl), 346 | TraitUse(Vec, Vec), 347 | } 348 | 349 | #[derive(Clone, Debug, PartialEq)] 350 | pub enum Decl { 351 | Namespace(Path), 352 | GlobalFunction(RcStr, FunctionDecl), 353 | Class(ClassDecl), 354 | Interface(RcStr, Vec, Vec), 355 | Trait(RcStr, Vec), 356 | StaticVars(Vec<(RcStr, Option)>), 357 | GlobalVars(Vec), 358 | // a goto jump target 359 | Label(RcStr), 360 | } 361 | 362 | #[derive(Clone, Debug, PartialEq)] 363 | pub struct CatchClause { 364 | pub ty: Path, 365 | pub var: RcStr, 366 | pub block: Block, 367 | } 368 | 369 | #[derive(Clone, Debug, PartialEq)] 370 | pub struct SwitchCase { 371 | pub conds: Vec, 372 | pub default: bool, 373 | pub block: Block, 374 | } 375 | -------------------------------------------------------------------------------- /src/tokens.rs: -------------------------------------------------------------------------------- 1 | use std::rc::Rc; 2 | use interner::RcStr; 3 | 4 | #[derive(Clone, Debug)] 5 | pub struct TokenSpan(pub Token, pub Span); 6 | 7 | #[derive(Clone, Debug, PartialEq)] 8 | pub struct Span { 9 | /// the lower byte position (inclusive) 10 | pub start: u32, 11 | /// the upper byte position (exclusive) 12 | pub end: u32, 13 | /// This allows tokens to set or unset the current doc_comment for an declaration 14 | /// which the parser this way can easily track 15 | pub doc_comment: Option, 16 | } 17 | 18 | impl Span { 19 | #[inline] 20 | pub fn new() -> Span { 21 | Span { 22 | start: 0, 23 | end: 0, 24 | doc_comment: None, 25 | } 26 | } 27 | } 28 | 29 | #[derive(Debug, PartialEq)] 30 | pub enum SyntaxError { 31 | None, 32 | Unterminated(&'static str, Span), 33 | UnknownCharacter(Span), 34 | } 35 | 36 | impl SyntaxError { 37 | pub fn span(&self) -> Span { 38 | match *self { 39 | SyntaxError::None => unimplemented!(), 40 | SyntaxError::Unterminated(_, ref span) | 41 | SyntaxError::UnknownCharacter(ref span) => span.clone(), 42 | } 43 | } 44 | } 45 | 46 | #[allow(dead_code)] 47 | // TODO: remove some day 48 | #[derive(Clone, Debug, PartialEq)] 49 | pub enum Token { 50 | End, 51 | // very simple tokens 52 | SemiColon, 53 | Colon, 54 | Comma, 55 | Dot, 56 | SquareBracketOpen, 57 | SquareBracketClose, 58 | ParenthesesOpen, 59 | ParenthesesClose, 60 | BwOr, 61 | BwXor, 62 | Ampersand, 63 | Plus, 64 | Minus, 65 | Div, 66 | Mul, 67 | Equal, 68 | Mod, 69 | BoolNot, 70 | BwNot, 71 | Dollar, 72 | Lt, 73 | Gt, 74 | QuestionMark, 75 | Silence, 76 | DollarCurlyBracesOpen, 77 | CurlyBracesOpen, 78 | CurlyBracesClose, 79 | /// ` 80 | Backquote, 81 | DoubleQuote, 82 | HereDocStart, 83 | HereDocEnd, 84 | // php tokens 85 | OpenTagWithEcho, 86 | OpenTag, 87 | /// this counts as implicit ';' 88 | CloseTag, 89 | Exit, 90 | Function, 91 | Const, 92 | Return, 93 | Yield, 94 | YieldFrom, 95 | Try, 96 | Catch, 97 | Finally, 98 | Throw, 99 | If, 100 | ElseIf, 101 | EndIf, 102 | Else, 103 | While, 104 | EndWhile, 105 | Do, 106 | For, 107 | Endfor, 108 | Foreach, 109 | EndForeach, 110 | Declare, 111 | EndDeclare, 112 | InstanceOf, 113 | As, 114 | Switch, 115 | EndSwitch, 116 | Case, 117 | Default, 118 | Break, 119 | Continue, 120 | Goto, 121 | Echo, 122 | Print, 123 | Class, 124 | Interface, 125 | Trait, 126 | Extends, 127 | Implements, 128 | /// T_OBJECT_OPERATOR 129 | ObjectOp, 130 | /// T_PAAMAYIM_NEKUDOTAYIM 131 | ScopeOp, 132 | NsSeparator, 133 | Ellipsis, 134 | Coalesce, 135 | New, 136 | Clone, 137 | Var, 138 | CastInt, 139 | CastDouble, 140 | CastString, 141 | CastArray, 142 | CastObject, 143 | CastBool, 144 | CastUnset, 145 | Eval, 146 | Include, 147 | IncludeOnce, 148 | Require, 149 | RequireOnce, 150 | Namespace, 151 | Use, 152 | Insteadof, 153 | Global, 154 | Isset, 155 | Empty, 156 | HaltCompiler, 157 | Static, 158 | Abstract, 159 | Final, 160 | Private, 161 | Protected, 162 | Public, 163 | Unset, 164 | DoubleArrow, 165 | List, 166 | Array, 167 | Callable, 168 | Increment, 169 | Decrement, 170 | IsIdentical, 171 | IsNotIdentical, 172 | IsEqual, 173 | IsNotEqual, 174 | SpaceShip, 175 | IsSmallerOrEqual, 176 | IsGreaterOrEqual, 177 | PlusEqual, 178 | MinusEqual, 179 | MulEqual, 180 | Pow, 181 | PowEqual, 182 | DivEqual, 183 | ConcatEqual, 184 | ModEqual, 185 | SlEqual, 186 | SrEqual, 187 | AndEqual, 188 | OrEqual, 189 | XorEqual, 190 | BoolOr, 191 | BoolAnd, 192 | LogicalOr, 193 | LogicalAnd, 194 | LogicalXor, 195 | Sl, 196 | Sr, 197 | Variable(RcStr), 198 | Int(i64), 199 | Double(f64), 200 | Comment(RcStr), 201 | /// likely an arbitrary identifier 202 | String(RcStr), 203 | /// like 'test', constant encapsed string 204 | ConstantEncapsedString(RcStr), 205 | BinaryCharSequence(Rc>), 206 | InlineHtml(RcStr), 207 | // magic-tokens 208 | MagicClass, 209 | MagicTrait, 210 | MagicFunction, 211 | MagicMethod, 212 | MagicLine, 213 | MagicFile, 214 | MagicDir, 215 | MagicNamespace, 216 | } 217 | 218 | impl Token { 219 | #[inline] 220 | pub fn is_reserved_non_modifier(&self) -> bool { 221 | match *self { 222 | Token::Include | Token::IncludeOnce | Token::Eval | Token::Require | Token::RequireOnce | Token::LogicalOr | Token::LogicalXor | Token::LogicalAnd 223 | | Token::InstanceOf | Token::New | Token::Clone | Token::Exit | Token::If | Token::ElseIf | Token::Else | Token::EndIf | Token::Echo 224 | | Token::Do | Token::While | Token::EndWhile 225 | | Token::For | /*Token::EndFor |*/ Token::Foreach | Token::EndForeach | Token::Declare | Token::EndDeclare | Token::As | Token::Try | Token::Catch | Token::Finally 226 | | Token::Throw | Token::Use | Token::Insteadof | Token::Global | Token::Var | Token::Unset | Token::Isset | Token::Empty | Token::Continue | Token::Goto 227 | | Token::Function | Token::Const | Token::Return | Token::Print | Token::Yield | Token::List | Token::Switch | Token::EndSwitch | Token::Case | Token::Default 228 | | Token::Break | Token::Array | Token::Callable | Token::Extends | Token::Implements | Token::Namespace | Token::Trait | Token::Interface | Token::Class 229 | | Token::MagicClass | Token::MagicTrait | Token::MagicFunction | Token::MagicMethod | Token::MagicLine | Token::MagicFile | Token::MagicDir 230 | | Token::MagicNamespace => true, 231 | _ => false, 232 | } 233 | } 234 | 235 | /// Get the string representation of a token 236 | #[inline] 237 | pub fn repr(&self) -> &'static str { 238 | match *self { 239 | Token::OpenTagWithEcho => " " "?>", 242 | Token::Exit => "exit", 243 | Token::Function => "function", 244 | Token::Const => "const", 245 | Token::Return => "return", 246 | Token::Try => "try", 247 | Token::Catch => "catch", 248 | Token::Finally => "finally", 249 | Token::Throw => "throw", 250 | Token::If => "if", 251 | Token::ElseIf => "elseif", 252 | Token::EndIf => "endif", 253 | Token::Else => "else", 254 | Token::While => "while", 255 | Token::EndWhile => "endwhile", 256 | Token::Do => "do", 257 | Token::Foreach => "foreach", 258 | Token::EndForeach => "endforeach", 259 | Token::For => "for", 260 | Token::Endfor => "endfor", 261 | Token::Declare => "declare", 262 | Token::EndDeclare => "enddeclare", 263 | Token::InstanceOf => "instanceof", 264 | Token::As => "as", 265 | Token::Switch => "switch", 266 | Token::EndSwitch => "endswitch", 267 | Token::Case => "case", 268 | Token::Default => "default", 269 | Token::Break => "break", 270 | Token::Continue => "continue", 271 | Token::Goto => "goto", 272 | Token::Echo => "echo", 273 | Token::Print => "print", 274 | Token::Class => "class", 275 | Token::Interface => "interface", 276 | Token::Trait => "trait", 277 | Token::Extends => "extends", 278 | Token::Implements => "implements", 279 | Token::ObjectOp => "->", 280 | Token::ScopeOp => "::", 281 | Token::NsSeparator => "\\", 282 | Token::Ellipsis => "...", 283 | Token::Coalesce => "??", 284 | Token::New => "new", 285 | Token::Clone => "clone", 286 | Token::Var => "var", 287 | Token::CastInt => "int", 288 | Token::CastDouble => "double", 289 | Token::CastString => "string", 290 | Token::CastArray => "array", 291 | Token::CastObject => "object", 292 | Token::CastBool => "bool", 293 | Token::CastUnset => "unset", 294 | Token::Eval => "eval", 295 | Token::IncludeOnce => "include_once", 296 | Token::Include => "include", 297 | Token::RequireOnce => "require_once", 298 | Token::Require => "require", 299 | Token::Namespace => "namespace", 300 | Token::Use => "use", 301 | Token::Insteadof => "insteadof", 302 | Token::Global => "global", 303 | Token::Isset => "isset", 304 | Token::Empty => "empty", 305 | Token::HaltCompiler => "__halt_compiler", 306 | Token::Static => "static", 307 | Token::Abstract => "abstract", 308 | Token::Final => "final", 309 | Token::Private => "private", 310 | Token::Protected => "protected", 311 | Token::Public => "public", 312 | Token::Unset => "unset", 313 | Token::DoubleArrow => "=>", 314 | Token::List => "list", 315 | Token::Array => "array", 316 | Token::Callable => "callable", 317 | Token::Increment => "++", 318 | Token::Decrement => "--", 319 | Token::IsIdentical => "===", 320 | Token::IsNotIdentical => "!==", 321 | Token::IsEqual => "==", 322 | Token::IsNotEqual => "!=", 323 | Token::SpaceShip => "<=>", 324 | Token::IsSmallerOrEqual => "<=", 325 | Token::IsGreaterOrEqual => ">=", 326 | Token::PlusEqual => "+=", 327 | Token::MinusEqual => "-=", 328 | Token::MulEqual => "*=", 329 | Token::Pow => "**", 330 | Token::PowEqual => "**=", 331 | Token::DivEqual => "/=", 332 | Token::ConcatEqual => ".=", 333 | Token::ModEqual => "%=", 334 | Token::SlEqual => "<<=", 335 | Token::SrEqual => ">>=", 336 | Token::AndEqual => "&=", 337 | Token::OrEqual => "|=", 338 | Token::XorEqual => "^=", 339 | Token::BoolOr => "||", 340 | Token::BoolAnd => "&&", 341 | Token::LogicalOr => "OR", 342 | Token::LogicalAnd => "AND", 343 | Token::LogicalXor => "XOR", 344 | Token::Sl => "<<", 345 | Token::Sr => ">>", 346 | Token::CurlyBracesOpen => "{", 347 | Token::CurlyBracesClose => "}", 348 | Token::MagicClass => "__CLASS__", 349 | Token::MagicTrait => "__TRAIT__", 350 | Token::MagicFunction => "__FUNCTION__", 351 | Token::MagicMethod => "__METHOD__", 352 | Token::MagicLine => "__LINE__", 353 | Token::MagicFile => "__FILE__", 354 | Token::MagicDir => "__DIR__", 355 | Token::MagicNamespace => "__NAMESPACE__", 356 | _ => unimplemented!(), 357 | } 358 | } 359 | } 360 | -------------------------------------------------------------------------------- /src/tests/expr.rs: -------------------------------------------------------------------------------- 1 | use parser::*; 2 | 3 | fn process_expr(input: &str) -> Expr { 4 | let str_ = " expr, 9 | _ => unreachable!(), 10 | } 11 | } 12 | 13 | #[test] 14 | fn parse_expr_comment() { 15 | assert_eq!(process_expr("1/*test*/+/*test*/2"), enb!(0,19, Expr_::BinaryOp(Op::Add, eb!(0, 1, Expr_::Int(1)), eb!(18, 19, Expr_::Int(2))))); 16 | //TODO: doc comment tests 17 | } 18 | 19 | #[test] 20 | fn parse_expr_op() { 21 | assert_eq!(process_expr(r#"1+2"#), enb!(0,3, Expr_::BinaryOp(Op::Add, eb!(0, 1, Expr_::Int(1)), eb!(2, 3, Expr_::Int(2))))); 22 | assert_eq!(process_expr(r#"1+2*3"#), enb!(0,5, Expr_::BinaryOp(Op::Add, eb!(0,1, Expr_::Int(1)), eb!(2,5, Expr_::BinaryOp(Op::Mul, 23 | eb!(2,3, Expr_::Int(2)), eb!(4, 5, Expr_::Int(3)))))) 24 | ); 25 | assert_eq!(process_expr(r#"2+$d**$c**$d"#), enb!(0, 12, Expr_::BinaryOp(Op::Add, eb!(0, 1, Expr_::Int(2)), 26 | eb!(2, 12, Expr_::BinaryOp( 27 | Op::Pow, 28 | eb!(2, 4, Expr_::Variable("d".into())), 29 | eb!(6, 12, Expr_::BinaryOp(Op::Pow, eb!(6, 8, Expr_::Variable("c".into())), eb!(10, 12, Expr_::Variable("d".into())))) 30 | )) 31 | ))); 32 | assert_eq!(process_expr(r#"$g["a"]-$g["b"]/3"#), enb!(0,17, Expr_::BinaryOp( 33 | Op::Sub, 34 | eb!(0,7, Expr_::ArrayIdx(eb!(0,2, Expr_::Variable("g".into())), vec![ Some(enb!(3,6, Expr_::String("a".into()))) ])), 35 | eb!(8,17, Expr_::BinaryOp(Op::Div, eb!(8,15, Expr_::ArrayIdx(eb!(8,10, Expr_::Variable("g".into())), vec![ 36 | Some(enb!(11,14, Expr_::String("b".into()))) ])), eb!(16,17, Expr_::Int(3))) 37 | ) 38 | ))); 39 | } 40 | 41 | #[test] 42 | fn parse_expr_logical() { 43 | assert_eq!(process_expr("$a||$b"), enb!(0,6, Expr_::BinaryOp(Op::Or, eb!(0,2, Expr_::Variable("a".into())), eb!(4,6, Expr_::Variable("b".into()))))); 44 | assert_eq!(process_expr("$a&&true"), enb!(0,8, Expr_::BinaryOp(Op::And, eb!(0,2, Expr_::Variable("a".into())), eb!(4,8, constant!(true))))); 45 | assert_eq!(process_expr("!$a"), enb!(0,3, Expr_::UnaryOp(UnaryOp::Not, eb!(1,3, Expr_::Variable("a".into()))))); 46 | assert_eq!(process_expr("$a?false:true or $b"), enb!(0,19, Expr_::BinaryOp(Op::Or, 47 | eb!(0,13, Expr_::TernaryIf(eb!(0,2, Expr_::Variable("a".into())), Some(eb!(3,8, constant!(false))), eb!(9,13, constant!(true)))), eb!(17,19, Expr_::Variable("b".into())) 48 | ))); 49 | } 50 | 51 | #[test] 52 | fn parse_expr_parens() { 53 | assert_eq!(process_expr(r#"(1+2)*3"#), enb!(0,7, Expr_::BinaryOp(Op::Mul, eb!(0,5, Expr_::BinaryOp(Op::Add, eb!(1,2, Expr_::Int(1)), eb!(3,4, Expr_::Int(2)))), eb!(6,7, Expr_::Int(3))))); 54 | assert_eq!(process_expr(r#"(true||false)&&true"#), enb!(0,19, Expr_::BinaryOp(Op::And, eb!(0, 13, 55 | Expr_::BinaryOp(Op::Or, eb!(1,5, constant!(true)), eb!(7,12,constant!(false)))), eb!(15,19, constant!(true)) 56 | ))); 57 | } 58 | 59 | #[test] 60 | fn parse_expr_string() { 61 | // TODO: fix line numbers of fragments (complex strings containing variables, etc.) (not tested yet) 62 | assert_eq!(process_expr(r#""""#), enb!(0, 2, Expr_::String("".into()))); 63 | assert_eq!(process_expr(r#""t\nest\tsss\"os\"haha""#), enb!(0, 23, Expr_::String("t\nest\tsss\"os\"haha".into()))); 64 | assert_eq!(process_expr(r#""\xe7\x9a\x84""#), enb!(0, 14, Expr_::String("的".into()))); 65 | //assert_eq!(process_expr(r#""a\142\143d""#), Expr_::String("abcd".into())); 66 | assert_eq!(process_expr(r#""a\"b\\\"c\\\"d\"e""#), enb!(0, 19, Expr_::String(r#"a"b\"c\"d"e"#.into()))); 67 | assert_eq!(process_expr(r#""abc\ClassName""#), enb!(0, 15, Expr_::String("abc\\ClassName".into()))); 68 | } 69 | 70 | #[test] 71 | fn parse_expr_string_fragmented() { 72 | assert_eq!(process_expr(r#""hello $world""#), enb!(0,14, Expr_::BinaryOp(Op::Concat, eb!(1,7, Expr_::String("hello ".into())), eb!(7,13, Expr_::Variable("world".into()))))); 73 | assert_eq!(process_expr(r#""hello {$world}""#), enb!(0,16, Expr_::BinaryOp(Op::Concat, eb!(1,7, Expr_::String("hello ".into())), eb!(8,14, Expr_::Variable("world".into()))))); 74 | assert_eq!(process_expr(r#""hello $wor->ld""#), enb!(0,16, Expr_::BinaryOp(Op::Concat, eb!(1,7, Expr_::String("hello ".into())), eb!(7,15, Expr_::ObjMember( 75 | eb!(7,11, Expr_::Variable("wor".into())), vec![ enb!(13,15, Expr_::Path(Path::identifier(false, "ld".into()))) ] 76 | ))))); 77 | assert_eq!(process_expr(r#""hello ${world}""#), enb!(0,16, Expr_::BinaryOp(Op::Concat, eb!(1,7, Expr_::String("hello ".into())), eb!(7,15, Expr_::Variable("world".into()))))); 78 | } 79 | 80 | #[test] 81 | fn parse_expr_char_string() { 82 | assert_eq!(process_expr(r#"'\ntest\142'"#), enb!(0, 12, Expr_::String("\\ntest\\142".into()))); 83 | assert_eq!(process_expr(r#"'a\'b\'c'"#), enb!(0,9, Expr_::String("a'b'c".into()))); 84 | assert_eq!(process_expr(r#"'d\'e\\\'f\\\'\'g'"#), enb!(0,18, Expr_::String("d\'e\\\'f\\\'\'g".into()))); 85 | } 86 | 87 | #[test] 88 | fn parse_ns_identifier() { 89 | assert_eq!(process_expr("Test"), enb!(0, 4, Expr_::Path(Path::identifier(false, "Test".into())))); 90 | assert_eq!(process_expr("Test\\Abc"), enb!(0, 8, Expr_::Path(Path::ns_identifier(false, "Test".into(), "Abc".into())))); 91 | assert_eq!(process_expr("Test\\Ns1\\Ns2"), enb!(0, 12, Expr_::Path(Path::ns_identifier(false, "Test\\Ns1".into(), "Ns2".into())))); 92 | assert_eq!(process_expr("\\Test\\Ns1\\Ns2\\Ns3"), enb!(0, 17, Expr_::Path(Path::ns_identifier(true, "Test\\Ns1\\Ns2".into(), "Ns3".into())))); 93 | } 94 | 95 | #[test] 96 | fn parse_expr_object_property() { 97 | //TODO: fix line numbers 98 | assert_eq!(process_expr(r#"$obj->prop"#), enb!(0,10, Expr_::ObjMember(eb!(0,4, Expr_::Variable("obj".into())), vec![ 99 | enb!(6,10, Expr_::Path(Path::identifier(false, "prop".into()))) 100 | ]))); 101 | assert_eq!(process_expr(r#"$obj->a->b->c->d"#), enb!(0,16, Expr_::ObjMember(eb!(0,4, Expr_::Variable("obj".into())), 102 | vec![enb!(6,7, Expr_::Path(Path::identifier(false, "a".into()))), enb!(9,10, Expr_::Path(Path::identifier(false, "b".into()))), 103 | enb!(12,13, Expr_::Path(Path::identifier(false, "c".into()))), enb!(15,16, Expr_::Path(Path::identifier(false, "d".into())))]) 104 | )); 105 | assert_eq!(process_expr(r#"$obj->$a->b"#), enb!(0,11, Expr_::ObjMember(eb!(0,4, Expr_::Variable("obj".into())), vec![ 106 | enb!(6,8, Expr_::Variable("a".into())), enb!(10,11, Expr_::Path(Path::identifier(false, "b".into()))) ]) 107 | )); 108 | assert_eq!(process_expr("$obj->{$obj->b}->c"), enb!(0,18, Expr_::ObjMember(eb!(0,4, Expr_::Variable("obj".into())), vec![ 109 | enb!(7,14, Expr_::ObjMember(eb!(7,11, Expr_::Variable("obj".into())), vec![ enb!(13,14, Expr_::Path(Path::identifier(false, "b".into()))) ])), 110 | enb!(17,18, Expr_::Path(Path::identifier(false, "c".into()))) 111 | ])) 112 | ); 113 | assert_eq!(process_expr("$obj->{$a->{$b->c}->d}->e"), enb!(0,25, Expr_::ObjMember(eb!(0,4, Expr_::Variable("obj".into())), vec![ 114 | enb!(7,21, Expr_::ObjMember(eb!(7,9, Expr_::Variable("a".into())), vec![ 115 | enb!(12,17, Expr_::ObjMember(eb!(12,14, Expr_::Variable("b".into())), vec![ enb!(16,17, Expr_::Path(Path::identifier(false, "c".into()))) ])), 116 | enb!(20,21, Expr_::Path(Path::identifier(false, "d".into()))) 117 | ])), enb!(24,25, Expr_::Path(Path::identifier(false, "e".into()))) 118 | ]))); 119 | assert_eq!(process_expr(r#"$obj->$a->b()"#), enb!(0,13, Expr_::Call(eb!(0,11, Expr_::ObjMember( 120 | eb!(0,4, Expr_::Variable("obj".into())), 121 | vec![ enb!(6,8, Expr_::Variable("a".into())), enb!(10,11, Expr_::Path(Path::identifier(false, "b".into()))) ] 122 | )), vec![]))); 123 | } 124 | 125 | #[test] 126 | fn parse_expr_array_idx() { 127 | assert_eq!(process_expr(r#"$test["a"]"#), enb!(0,10, Expr_::ArrayIdx(eb!(0,5, Expr_::Variable("test".into())), vec![ Some(enb!(6,9, Expr_::String("a".into()))) ]))); 128 | assert_eq!(process_expr(r#"$test[9]"#), enb!(0,8, Expr_::ArrayIdx(eb!(0,5, Expr_::Variable("test".into())), vec![ Some(enb!(6,7, Expr_::Int(9))) ]))); 129 | assert_eq!(process_expr(r#"$test["a"]['b\n']"#), enb!(0,17, Expr_::ArrayIdx(eb!(0,5, Expr_::Variable("test".into())), vec![ 130 | Some(enb!(6,9, Expr_::String("a".into()))), Some(enb!(11,16, Expr_::String("b\\n".into()))) 131 | ]))); 132 | assert_eq!(process_expr(r#"$test[$g["a"]]["b"]["c"]"#), enb!(0,24, Expr_::ArrayIdx(eb!(0,5, Expr_::Variable("test".into())), vec![ 133 | Some(enb!(6,13, Expr_::ArrayIdx(eb!(6, 8, Expr_::Variable("g".into())), vec![ Some(enb!(9,12,Expr_::String("a".into()))) ] ))), 134 | Some(enb!(15,18, Expr_::String("b".into()))), 135 | Some(enb!(20,23, Expr_::String("c".into()))) 136 | ]))); 137 | } 138 | 139 | #[test] 140 | fn parse_expr_func_call() { 141 | assert_eq!(process_expr(r#"test()"#), enb!(0,6, Expr_::Call(eb!(0,4, Expr_::Path(Path::identifier(false, "test".into()))), vec![]))); 142 | assert_eq!(process_expr(r#"array_()"#), enb!(0,8, Expr_::Call(eb!(0,6, Expr_::Path(Path::identifier(false, "array_".into()))), vec![]))); 143 | assert_eq!(process_expr(r#"func_x(1, 2)"#), enb!(0,12, Expr_::Call(eb!(0,6, Expr_::Path(Path::identifier(false, "func_x".into()))), 144 | vec![ enb!(7,8, Expr_::Int(1)), enb!(10,11, Expr_::Int(2)) ] 145 | ))); 146 | assert_eq!(process_expr(r#"func_x(abc(1), 2)"#), enb!(0,17, Expr_::Call(eb!(0,6, Expr_::Path(Path::identifier(false, "func_x".into()))), vec![ 147 | enb!(7,13, Expr_::Call(eb!(7,10, Expr_::Path(Path::identifier(false, "abc".into()))), vec![ enb!(11,12, Expr_::Int(1)) ])), 148 | enb!(15, 16, Expr_::Int(2)) 149 | ]))); 150 | assert_eq!(process_expr(r#"$g[0]()"#), enb!(0,7, Expr_::Call(eb!(0,5, Expr_::ArrayIdx(eb!(0,2, Expr_::Variable("g".into())), vec![ 151 | Some(enb!(3,4, Expr_::Int(0))) ])), vec![]))); 152 | assert_eq!(process_expr(r#"$g[0]()[1](true)"#), enb!(0,16, Expr_::Call( 153 | eb!(0,10, Expr_::ArrayIdx( 154 | eb!(0,7, Expr_::Call( 155 | eb!(0,5, Expr_::ArrayIdx(eb!(0,2, Expr_::Variable("g".into())), vec![ Some(enb!(3,4, Expr_::Int(0))) ])), 156 | vec![] 157 | )), vec![ Some(enb!(8,9, Expr_::Int(1))) ] 158 | )), vec![ enb!(11,15, constant!(true)) ] 159 | ))); 160 | } 161 | 162 | #[test] 163 | fn parse_expr_require() { 164 | assert_eq!(process_expr("abc(require $path)"), enb!(0,18, Expr_::Call(eb!(0,3, Expr_::Path(Path::identifier(false, "abc".into()))), 165 | vec![ enb!(4,17, Expr_::Include(IncludeTy::Require, eb!(12,17, Expr_::Variable("path".into())))) ]) 166 | )); 167 | } 168 | 169 | #[test] 170 | fn parse_expr_isset() { 171 | assert_eq!(process_expr("isset($b)"), enb!(0,9, Expr_::Isset(vec![ enb!(6,8, Expr_::Variable("b".into())) ]))); 172 | } 173 | 174 | #[test] 175 | fn parse_expr_empty() { 176 | assert_eq!(process_expr("empty($b)"), enb!(0,9, Expr_::Empty(eb!(6,8, Expr_::Variable("b".into()))))); 177 | } 178 | 179 | #[test] 180 | fn parse_expr_cast() { 181 | assert_eq!(process_expr("(bool) $test"), enb!(0, 12, Expr_::Cast(Ty::Bool, eb!(7,12, Expr_::Variable("test".into()))))); 182 | assert_eq!(process_expr("(int)$a['a']"), enb!(0, 12, Expr_::Cast(Ty::Int, eb!(5,12, Expr_::ArrayIdx(eb!(5,7, Expr_::Variable("a".into())), vec![ 183 | Some(enb!(8,11, Expr_::String("a".into()))) 184 | ]))))); 185 | assert_eq!(process_expr("(int)ab()['c']"), enb!(0,14, Expr_::Cast(Ty::Int, eb!(5,14, Expr_::ArrayIdx( 186 | eb!(5,9, Expr_::Call(eb!(5,7, Expr_::Path(Path::identifier(false, "ab".into()))), vec![])), 187 | vec![ Some(enb!(10,13, Expr_::String("c".into()))) ] 188 | ))))); 189 | } 190 | 191 | #[test] 192 | fn parse_expr_error_control() { 193 | assert_eq!(process_expr("@test()"), enb!(0,7, Expr_::UnaryOp(UnaryOp::SilenceErrors, eb!(1,7, Expr_::Call(eb!(1,5, Expr_::Path(Path::identifier(false, "test".into()))), vec![]))))); 194 | } 195 | 196 | #[test] 197 | fn parse_expr_post_pre_dec_inc() { 198 | assert_eq!(process_expr("$c++"), enb!(0,4, Expr_::UnaryOp(UnaryOp::PostInc, eb!(0,2, Expr_::Variable("c".into()))))); 199 | assert_eq!(process_expr("$c--"), enb!(0,4, Expr_::UnaryOp(UnaryOp::PostDec, eb!(0,2, Expr_::Variable("c".into()))))); 200 | assert_eq!(process_expr("++$c"), enb!(0,4, Expr_::UnaryOp(UnaryOp::PreInc, eb!(2,4, Expr_::Variable("c".into()))))); 201 | assert_eq!(process_expr("--$c"), enb!(0,4, Expr_::UnaryOp(UnaryOp::PreDec, eb!(2,4, Expr_::Variable("c".into()))))); 202 | } 203 | 204 | #[test] 205 | fn parse_expr_static_const() { 206 | assert_eq!(process_expr(r#"Obj::test"#), enb!(0,9, Expr_::StaticMember(eb!(0,3, Expr_::Path(Path::identifier(false, "Obj".into()))), vec![ enb!(5,9, Expr_::Path( 207 | Path::identifier(false, "test".into()))) ])) 208 | ); 209 | } 210 | 211 | #[test] 212 | fn parse_expr_static_property() { 213 | assert_eq!(process_expr("Obj::$test"), enb!(0,10, Expr_::StaticMember(eb!(0,3, Expr_::Path(Path::identifier(false, "Obj".into()))), vec![ enb!(5,10, Expr_::Variable("test".into())) ]))); 214 | assert_eq!(process_expr("Obj::$a::$b"), enb!(0,11, Expr_::StaticMember(eb!(0,3, Expr_::Path(Path::identifier(false, "Obj".into()))), vec![ 215 | enb!(5,7, Expr_::Variable("a".into())), enb!(9,11, Expr_::Variable("b".into())) 216 | ]))); 217 | assert_eq!(process_expr("Obj::test()"), enb!(0,11, Expr_::Call(eb!(0,9, Expr_::StaticMember(eb!(0,3, Expr_::Path(Path::identifier(false, "Obj".into()))), vec![ 218 | enb!(5,9, Expr_::Path(Path::identifier(false, "test".into()))) 219 | ])), vec![]))); 220 | assert_eq!(process_expr("static::$abc"), enb!(0,12, Expr_::StaticMember(eb!(0,6, Expr_::Path(Path::identifier(false, "static".into()))), vec![ enb!(8,12, Expr_::Variable("abc".into())) ]))); 221 | } 222 | 223 | #[test] 224 | fn parse_expr_ternary() { 225 | assert_eq!(process_expr("$test?true:false"), enb!(0,16, Expr_::TernaryIf(eb!(0,5, Expr_::Variable("test".into())), 226 | Some(eb!(6,10, constant!(true))), eb!(11,16, constant!(false))) 227 | )); 228 | assert_eq!(process_expr("!$test?true:false"), enb!(0,17, Expr_::TernaryIf(eb!(0,6, Expr_::UnaryOp(UnaryOp::Not, 229 | eb!(1,6, Expr_::Variable("test".into())))), Some(eb!(7,11, constant!(true))), eb!(12,17, constant!(false))) 230 | )); 231 | assert_eq!(process_expr("true?'y':false?'n':'y'"), enb!(0,22, Expr_::TernaryIf(eb!(0,14, Expr_::TernaryIf( 232 | eb!(0,4, constant!(true)), 233 | Some(eb!(5,8, Expr_::String("y".into()))), 234 | eb!(9,14, constant!(false)), 235 | )), 236 | Some(eb!(15,18, Expr_::String("n".into()))), 237 | eb!(19,22, Expr_::String("y".into())), 238 | ))); 239 | } 240 | 241 | #[test] 242 | fn parse_expr_new() { 243 | assert_eq!(process_expr("new TestA()"), enb!(0,11, Expr_::New(eb!(4,9, Expr_::Path(Path::identifier(false, "TestA".into()))), vec![]))); 244 | assert_eq!(process_expr("new Foo\\Bar()"), enb!(0,13, Expr_::New(eb!(4,11, Expr_::Path(Path::ns_identifier(false, "Foo".into(), "Bar".into()))), vec![]))); 245 | assert_eq!(process_expr("new Foo"), enb!(0,7, Expr_::New(eb!(4,7, Expr_::Path(Path::identifier(false, "Foo".into()))), vec![]))); 246 | } 247 | 248 | #[test] 249 | fn parse_expr_clone() { 250 | assert_eq!(process_expr("clone $test"), enb!(0,5, Expr_::Clone(eb!(6,11, Expr_::Variable("test".into()))))); 251 | } 252 | 253 | #[test] 254 | fn parse_expr_array_append() { 255 | // for now we support append-expressions like that, TODO: figure out error reporting (AST_Node -> Position in source file) 256 | assert_eq!(process_expr(r#"$test[]=1"#), enb!(0,9, Expr_::Assign(eb!(0,7, Expr_::ArrayIdx(eb!(0,5, Expr_::Variable("test".into())), vec![ None ])), 257 | eb!(8,9, Expr_::Int(1)) 258 | ))); 259 | } 260 | 261 | #[test] 262 | fn parse_expr_assign() { 263 | assert_eq!(process_expr("($b=4)"), enb!(0,6, Expr_::Assign(eb!(1,3, Expr_::Variable("b".into())), eb!(4,5, Expr_::Int(4))))); 264 | assert_eq!(process_expr("!($b=1)"), enb!(0,7, Expr_::UnaryOp(UnaryOp::Not, eb!(1,7, Expr_::Assign(eb!(2,4, Expr_::Variable("b".into())), 265 | eb!(5,6, Expr_::Int(1))))))); 266 | assert_eq!(process_expr("!$b=1"), enb!(0,5, Expr_::UnaryOp(UnaryOp::Not, eb!(1,5, Expr_::Assign(eb!(1,3, Expr_::Variable("b".into())), 267 | eb!(4,5, Expr_::Int(1))))))); 268 | } 269 | 270 | #[test] 271 | fn parse_expr_array() { 272 | assert_eq!(process_expr("[]"), enb!(0,2, Expr_::Array(vec![]))); 273 | assert_eq!(process_expr("[1,]"), enb!(0,4, Expr_::Array(vec![ (None, enb!(1,2, Expr_::Int(1))) ]))); 274 | assert_eq!(process_expr("[1, 2]"), enb!(0,6, Expr_::Array(vec![ 275 | (None, enb!(1,2, Expr_::Int(1))), (None, enb!(4,5, Expr_::Int(2))) 276 | ]))); 277 | assert_eq!(process_expr("[1, [2, 3], 3]"), enb!(0,14, Expr_::Array(vec![ 278 | (None, enb!(1,2, Expr_::Int(1))), (None, enb!(4,10, Expr_::Array(vec![ 279 | (None, enb!(5,6, Expr_::Int(2))), (None, enb!(8,9, Expr_::Int(3))) 280 | ]))), 281 | (None, enb!(12,13, Expr_::Int(3))) 282 | ]))); 283 | assert_eq!(process_expr("array()"), enb!(0,7, Expr_::Array(vec![]))); 284 | } 285 | 286 | #[test] 287 | fn parse_expr_assoc_array() { 288 | assert_eq!(process_expr("['a' => 'b']"), enb!(0,12, Expr_::Array(vec![ 289 | (Some(enb!(1,4, Expr_::String("a".into()))), enb!(8,11, Expr_::String("b".into()))) 290 | ]))); 291 | } 292 | 293 | #[test] 294 | fn parse_expr_closure() { 295 | assert_eq!(process_expr("function () { c(); }"), enb!(0,20, Expr_::Function(FunctionDecl { 296 | params: vec![], 297 | body: Some(Block(vec![ senb!(14,17, Expr_::Call(eb!(14,15, Expr_::Path(Path::identifier(false, "c".into()))), vec![])) ])), usev: vec![], ret_ref: false, ret_ty: None, 298 | }))); 299 | } 300 | 301 | #[test] 302 | fn parse_expr_priority_parents_call() { 303 | assert_eq!(process_expr("(new Factory)->test"), enb!(0,19, Expr_::ObjMember(eb!(0,13, Expr_::New(eb!(5,12, Expr_::Path(Path::identifier(false, "Factory".into()))), vec![])), 304 | vec![ enb!(15,19, Expr_::Path(Path::identifier(false, "test".into()))) ] 305 | ))); 306 | assert_eq!(process_expr("(new $obj)->method()"), enb!(0,20, Expr_::Call(eb!(0,18, Expr_::ObjMember(eb!(0,10, Expr_::New(eb!(5,9, Expr_::Variable("obj".into())), vec![])), 307 | vec![ enb!(12,18, Expr_::Path(Path::identifier(false, "method".into()))) ] 308 | )), vec![]))); 309 | } 310 | 311 | #[test] 312 | fn parse_expr_unpack() { 313 | assert_eq!(process_expr("t(...$email)"), enb!(0,12, Expr_::Call(eb!(0,1, Expr_::Path(Path::identifier(false, "t".into()))), vec![ 314 | enb!(2,11, Expr_::Unpack(eb!(5,11, Expr_::Variable("email".into())))) 315 | ]))); 316 | } 317 | -------------------------------------------------------------------------------- /src/tests/stmt.rs: -------------------------------------------------------------------------------- 1 | use parser::*; 2 | 3 | fn process_stmt(input: &str) -> Stmt { 4 | let str_ = " $v) { ok(); }"), st!(0,37, Stmt_::ForEach( 190 | eb!(9,14, Expr_::Variable("test".into())), 191 | Some(eb!(18,20, Expr_::Variable("k".into()))), // key 192 | eb!(24,26, Expr_::Variable("v".into())), // value 193 | Block(vec![ senb!(30,34, Expr_::Call(eb!(30,32, Expr_::Path(Path::identifier(false, "ok".into()))), vec![])) ]) //body 194 | ))); 195 | } 196 | 197 | #[test] 198 | fn parse_stmt_instanceof() { 199 | assert_eq!(process_stmt("if ($result instanceof Response) { return $result; }"), st!(0,52, Stmt_::If( 200 | eb!(4,31, Expr_::InstanceOf(eb!(4,11, Expr_::Variable("result".into())), eb!(23,31,Expr_::Path(Path::identifier(false, "Response".into()))))), 201 | Block(vec![ st!(35,50, Stmt_::Return(Some(eb!(42,49,Expr_::Variable("result".into()))))) ]), 202 | Block::empty(), 203 | ))); 204 | } 205 | 206 | #[test] 207 | fn parse_stmt_new_as_param() { 208 | assert_eq!(process_stmt("r(new Foo);"), senb!(0,10, Expr_::Call(eb!(0,1, Expr_::Path(Path::identifier(false, "r".into()))), vec![ 209 | enb!(2,9, Expr_::New(eb!(6,9, Expr_::Path(Path::identifier(false, "Foo".into()))), vec![])) 210 | ]))); 211 | } 212 | 213 | #[test] 214 | fn parse_stmt_try() { 215 | assert_eq!(process_stmt(r#"try { echo "ok"; } catch (Exception $e) { return false;}"#), st!(0, 56, Stmt_::Try( 216 | Block(vec![ st!(6,16, Stmt_::Echo(vec![ enb!(11,15, Expr_::String("ok".into())) ])) ]), 217 | vec![ CatchClause { ty: Path::identifier(false, "Exception".into()), var: "e".into(), 218 | block: Block(vec![ st!(42,55, Stmt_::Return(Some(eb!(49,54, constant!(false))))) ]), 219 | } ], 220 | None, 221 | ))); 222 | assert_eq!(process_stmt(r#"try { echo "ok"; } catch (Exception $e) { return false; } catch (Throwable $e) { return true; }"#), st!(0,95, Stmt_::Try( 223 | Block(vec![ st!(6,16, Stmt_::Echo(vec![ enb!(11,15, Expr_::String("ok".into())) ])) ]), 224 | vec![ 225 | CatchClause { ty: Path::identifier(false, "Exception".into()), var: "e".into(), block: Block(vec![ st!(42,55, Stmt_::Return(Some(eb!(49,54, constant!(false))))) ]) }, 226 | CatchClause { ty: Path::identifier(false, "Throwable".into()), var: "e".into(), block: Block(vec![ st!(81,93, Stmt_::Return(Some(eb!(88,92, constant!(true))))) ]) }, 227 | ], 228 | None, 229 | ))); 230 | } 231 | 232 | #[test] 233 | fn parse_stmt_use() { 234 | assert_eq!(process_stmt("use Test;"), st!(0,9, Stmt_::Use(vec![ UseClause::QualifiedName(Path::identifier(false, "Test".into()), None) ]))); 235 | assert_eq!(process_stmt(r#"use Ab\Cd\Ef\Gh\Ij as Ga;"#), st!(0, 25, Stmt_::Use(vec![UseClause::QualifiedName( 236 | Path::ns_identifier(false, "Ab\\Cd\\Ef\\Gh".into(), "Ij".into()), 237 | Some("Ga".into())) 238 | ]))); 239 | assert_eq!(process_stmt(r#"use \FQNS\Test;"#), st!(0,15, Stmt_::Use(vec![ UseClause::QualifiedName(Path::ns_identifier(true, "FQNS".into(), "Test".into()), None) ]))); 240 | } 241 | 242 | #[test] 243 | fn parse_stmt_switch() { 244 | assert_eq!(process_stmt(r#"switch ($test) { case 1: echo "1"; break; default: echo "2"; }"#), st!(0,62, Stmt_::Switch( 245 | eb!(8,13, Expr_::Variable("test".into())), vec![ 246 | SwitchCase { default: false, conds: vec![ enb!(22,23, Expr_::Int(1)) ], block: Block(vec![ 247 | st!(25,34, Stmt_::Echo(vec![ enb!(30,33, Expr_::String("1".into())) ])), st!(35,41, Stmt_::Break(None)) 248 | ])}, 249 | SwitchCase { default: true, conds: vec![], block: Block(vec![ st!(51,60, Stmt_::Echo(vec![ enb!(56,59, Expr_::String("2".into())) ])) ]) }, 250 | ] 251 | ))); 252 | assert_eq!(process_stmt(r#"switch ($test) { case 1: echo "1"; default: echo "2"; }"#), st!(0,55, Stmt_::Switch(eb!(8,13, Expr_::Variable("test".into())), 253 | vec![ 254 | SwitchCase { default: false, conds: vec![ enb!(22,23, Expr_::Int(1)) ], block: Block(vec![ 255 | st!(25,34, Stmt_::Echo(vec![ enb!(30,33, Expr_::String("1".into())) ])) 256 | ]) }, 257 | SwitchCase { default: true, conds: vec![], block: Block(vec![ st!(44,53, Stmt_::Echo(vec![ enb!(49,52, Expr_::String("2".into())) ])) ]) } 258 | ]))); 259 | assert_eq!(process_stmt("switch ($test) { case 1: case 2: echo 1; }"), st!(0,42, Stmt_::Switch(eb!(8,13, Expr_::Variable("test".into())), vec![ 260 | SwitchCase { default: false, conds: vec![ enb!(22,23, Expr_::Int(1)), enb!(30,31, Expr_::Int(2)) ], block: Block(vec![ 261 | st!(33,40, Stmt_::Echo(vec![ enb!(38,39, Expr_::Int(1)) ])) 262 | ])} 263 | ]))); 264 | assert_eq!(process_stmt("switch ($test) { case 1: case 2: case 3: case 4: echo 1; }"), st!(0,58, Stmt_::Switch(eb!(8,13, Expr_::Variable("test".into())), vec![ 265 | SwitchCase { default: false, 266 | conds: vec![ enb!(22,23, Expr_::Int(1)), enb!(30,31, Expr_::Int(2)), enb!(38,39, Expr_::Int(3)), enb!(46,47, Expr_::Int(4)) ], 267 | block: Block(vec![ st!(49,56, Stmt_::Echo(vec![ enb!(54,55, Expr_::Int(1)) ]))]) 268 | } 269 | ]))); 270 | } 271 | 272 | #[test] 273 | fn parse_stmt_goto() { 274 | assert_eq!(process_stmt("goto hallo_welt;"), st!(0, 16, Stmt_::Goto("hallo_welt".into()))); 275 | } 276 | 277 | #[test] 278 | fn parse_stmt_func_decl() { 279 | assert_eq!(process_stmt("function test() { ok(); }"), st!(0,25, Stmt_::Decl(Decl::GlobalFunction("test".into(), FunctionDecl { params: vec![], 280 | body: Some(Block(vec![ senb!(18,22, Expr_::Call(eb!(18,20, Expr_::Path(Path::identifier(false, "ok".into()))), vec![])) ])), usev: vec![], ret_ref: false, ret_ty: None }) 281 | ))); 282 | assert_eq!(process_stmt("function &test() { ok(); }"), st!(0,26, Stmt_::Decl(Decl::GlobalFunction("test".into(), FunctionDecl { params: vec![], 283 | body: Some(Block(vec![ senb!(19,23, Expr_::Call(eb!(19,21, Expr_::Path(Path::identifier(false, "ok".into()))), vec![])) ])), usev: vec![], ret_ref: true, ret_ty: None }) 284 | ))); 285 | assert_eq!(process_stmt("function test($a) { ok(); }"), st!(0,27, Stmt_::Decl(Decl::GlobalFunction("test".into(), FunctionDecl { 286 | params: vec![ParamDefinition { name: "a".into(), as_ref: false, variadic: false, ty: None, default: None }], 287 | body: Some(Block(vec![ senb!(20,24, Expr_::Call(eb!(20,22, Expr_::Path(Path::identifier(false, "ok".into()))), vec![])) ])), usev: vec![], ret_ref: false, ret_ty: None }) 288 | ))); 289 | assert_eq!(process_stmt("function test($a, $b) { ok(); }"), st!(0,31, Stmt_::Decl(Decl::GlobalFunction("test".into(), FunctionDecl { 290 | params: vec![ 291 | ParamDefinition { name: "a".into(), as_ref: false, variadic: false, ty: None, default: None }, 292 | ParamDefinition { name: "b".into(), as_ref: false, variadic: false, ty: None, default: None } 293 | ], 294 | body: Some(Block(vec![ senb!(24,28, Expr_::Call(eb!(24,26, Expr_::Path(Path::identifier(false, "ok".into()))), vec![])) ])), usev: vec![], ret_ref: false, ret_ty: None }) 295 | ))); 296 | assert_eq!(process_stmt("function test(...$a) { ok(); }"), st!(0,30, Stmt_::Decl(Decl::GlobalFunction("test".into(), FunctionDecl { 297 | params: vec![ParamDefinition { name: "a".into(), as_ref: false, variadic: true, ty: None, default: None }], 298 | body: Some(Block(vec![ senb!(23,27, Expr_::Call(eb!(23,25, Expr_::Path(Path::identifier(false, "ok".into()))), vec![])) ])), usev: vec![], ret_ref: false, ret_ty: None }) 299 | ))); 300 | } 301 | 302 | #[test] 303 | fn parse_stmt_func_decl_ret() { 304 | assert_eq!(process_stmt("function test() : bool { ok(); }"), st!(0,32, Stmt_::Decl(Decl::GlobalFunction("test".into(), FunctionDecl { params: vec![], 305 | body: Some(Block(vec![ senb!(25,29, Expr_::Call(eb!(25,27, Expr_::Path(Path::identifier(false, "ok".into()))), vec![])) ])), usev: vec![], ret_ref: false, 306 | ret_ty: Some(NullableTy::NonNullable(Ty::Bool)) }) 307 | ))); 308 | assert_eq!(process_stmt(r#"function test() : \foo\bar { ok(); }"#), st!(0,36, Stmt_::Decl(Decl::GlobalFunction("test".into(), FunctionDecl { params: vec![], 309 | body: Some(Block(vec![ senb!(29,33, Expr_::Call(eb!(29,31, Expr_::Path(Path::identifier(false, "ok".into()))), vec![])) ])), usev: vec![], ret_ref: false, 310 | ret_ty: Some(NullableTy::NonNullable(Ty::Object(Some(Path::ns_identifier(true, "foo".into(), "bar".into()))))) }) 311 | ))); 312 | assert_eq!(process_stmt(r#"function test() : ?FoooBar { ok(); }"#), st!(0,36, Stmt_::Decl(Decl::GlobalFunction("test".into(), FunctionDecl { params: vec![], 313 | body: Some(Block(vec![ senb!(29,33, Expr_::Call(eb!(29,31, Expr_::Path(Path::identifier(false, "ok".into()))), vec![])) ])), usev: vec![], ret_ref: false, 314 | ret_ty: Some(NullableTy::Nullable(Ty::Object(Some(Path::identifier(false, "FoooBar".into()))))) }) 315 | ))); 316 | } 317 | 318 | #[test] 319 | fn parse_func_decl_typehint() { 320 | assert_eq!(process_stmt("function test(Test $a) { ok(); }"), st!(0,32, Stmt_::Decl(Decl::GlobalFunction("test".into(), FunctionDecl { 321 | params: vec![ ParamDefinition { name: "a".into(), as_ref: false, variadic: false, 322 | ty: Some(NullableTy::NonNullable(Ty::Object(Some(Path::identifier(false, "Test".into()))))), default: None } 323 | ], 324 | body: Some(Block(vec![ senb!(25,29, Expr_::Call(eb!(25,27, Expr_::Path(Path::identifier(false, "ok".into()))), vec![])) ])), usev: vec![], ret_ref: false, ret_ty: None }) 325 | ))); 326 | } 327 | 328 | #[test] 329 | fn parse_class_decl() { 330 | assert_eq!(process_stmt("class Test {}"), st!(0,13, Stmt_::Decl(Decl::Class(ClassDecl { 331 | cmod: ClassModifiers::none(), name: "Test".into(), base_class: None, implements: vec![], members: vec![] 332 | })))); 333 | assert_eq!(process_stmt("final class Test {}"), st!(0,19, Stmt_::Decl(Decl::Class(ClassDecl { 334 | cmod: ClassModifiers::new(&[ClassModifier::Final]), name: "Test".into(), base_class: None, implements: vec![], members: vec![] 335 | })))); 336 | assert_eq!(process_stmt("class Test extends Abc\\Test2 {}"), st!(0,31, Stmt_::Decl(Decl::Class(ClassDecl { 337 | cmod: ClassModifiers::none(), name: "Test".into(), base_class: Some(Path::ns_identifier(false, "Abc".into(), "Test2".into())), implements: vec![], members: vec![] 338 | })))); 339 | assert_eq!(process_stmt("class Test implements ITest {}"), st!(0,30, Stmt_::Decl(Decl::Class(ClassDecl { 340 | cmod: ClassModifiers::none(), name: "Test".into(), base_class: None, implements: vec![Path::identifier(false, "ITest".into())], members: vec![] 341 | })))); 342 | } 343 | 344 | #[test] 345 | fn parse_class_properties() { 346 | assert_eq!(process_stmt("class Test { public $test; }"), st!(0,28, Stmt_::Decl(Decl::Class(ClassDecl { 347 | cmod: ClassModifiers::none(), name: "Test".into(), base_class: None, implements: vec![], 348 | members: vec![ Member::Property(MemberModifiers::new(&[MemberModifier::Public]), "test".into(), None)], 349 | })))); 350 | assert_eq!(process_stmt("class Test { protected $ab = []; }"), st!(0,34, Stmt_::Decl(Decl::Class(ClassDecl { 351 | cmod: ClassModifiers::none(), name: "Test".into(), base_class: None, implements: vec![], 352 | members: vec![ Member::Property(MemberModifiers::new(&[MemberModifier::Protected]), "ab".into(), Some(enb!(29,31, Expr_::Array(vec![])))) ], 353 | })))); 354 | } 355 | 356 | #[test] 357 | fn parse_class_const() { 358 | assert_eq!(process_stmt("class Test { const C=true; }"), st!(0, 28, Stmt_::Decl(Decl::Class(ClassDecl { 359 | cmod: ClassModifiers::none(), name: "Test".into(), base_class: None, implements: vec![], 360 | members: vec![ Member::Constant(MemberModifiers::none(), "C".into(), enb!(21,25, constant!(true))) ] 361 | })))); 362 | } 363 | 364 | #[test] 365 | fn parse_class_methods() { 366 | assert_eq!(process_stmt("class Test { public function a() { run(); } }"), st!(0,45, Stmt_::Decl(Decl::Class(ClassDecl { 367 | cmod: ClassModifiers::none(), name: "Test".into(), base_class: None, implements: vec![], 368 | members: vec![ Member::Method(MemberModifiers::new(&[MemberModifier::Public]), "a".into(), FunctionDecl { 369 | params: vec![], body: Some(Block(vec![ senb!(35,40, Expr_::Call(eb!(35,38, Expr_::Path(Path::identifier(false, "run".into()))), vec![])) ])), 370 | usev: vec![], ret_ref: false, ret_ty: None, 371 | })] 372 | })))); 373 | assert_eq!(process_stmt("class Test { public function __construct(array $param1 = []) { $this->param = $param1; } }"), 374 | st!(0,90, Stmt_::Decl(Decl::Class(ClassDecl { 375 | cmod: ClassModifiers::none(), name: "Test".into(), base_class: None, implements: vec![], 376 | members: vec![ Member::Method(MemberModifiers::new(&[MemberModifier::Public]), "__construct".into(), FunctionDecl { 377 | params: vec![ParamDefinition { name: "param1".into(), as_ref: false, variadic: false, ty: Some(NullableTy::NonNullable(Ty::Array)), 378 | default: Some(enb!(57,59, Expr_::Array(vec![]))) } 379 | ], 380 | body: Some(Block(vec![ senb!(63,85, Expr_::Assign(eb!(63,75, Expr_::ObjMember(eb!(63,68, Expr_::Variable("this".into())), vec![ 381 | enb!(70,75, Expr_::Path(Path::identifier(false, "param".into()))) ])), eb!(78,85, Expr_::Variable("param1".into())))) 382 | ])), usev: vec![], ret_ref: false, ret_ty: None, 383 | })] 384 | }))) 385 | ); 386 | } 387 | 388 | #[test] 389 | fn parse_class_trait_use() { 390 | assert_eq!(process_stmt("class Test { use Abc; }"), st!(0,23, Stmt_::Decl(Decl::Class(ClassDecl { name: "Test".into(), base_class: None, implements: vec![], members: vec![ 391 | Member::TraitUse(vec![Path::identifier(false, "Abc".into())], vec![]) 392 | ], cmod: ClassModifiers::none() })))); 393 | } 394 | 395 | #[test] 396 | fn parse_stmt_list() { 397 | assert_eq!(process_stmt("list($a, $b) = test();"), senb!(0,21, Expr_::Assign(eb!(0,12, Expr_::List( 398 | vec![ (None, enb!(5,7, Expr_::Variable("a".into()))), (None, enb!(9,11, Expr_::Variable("b".into()))) ] 399 | )), eb!(15,21, Expr_::Call(eb!(15,19, Expr_::Path(Path::identifier(false, "test".into()))), vec![]))))); 400 | } 401 | 402 | #[test] 403 | fn parse_trait_decl() { 404 | assert_eq!(process_stmt("trait Test {}"), st!(0,13, Stmt_::Decl(Decl::Trait("Test".into(), vec![])))); 405 | // http://php.net/manual/de/language.oop5.traits.php 406 | assert_eq!(process_stmt("trait HelloWorld {use Hello, World;}"), st!(0,36, Stmt_::Decl(Decl::Trait("HelloWorld".into(), vec![ 407 | Member::TraitUse(vec![Path::identifier(false, "Hello".into()), Path::identifier(false, "World".into())], vec![]) 408 | ])))); 409 | } 410 | 411 | #[test] 412 | fn parse_interface_decl() { 413 | assert_eq!(process_stmt("interface ITest {}"), st!(0,18, Stmt_::Decl(Decl::Interface("ITest".into(), vec![], vec![])))); 414 | assert_eq!(process_stmt("interface ITest { public function test(); }"), st!(0,43, Stmt_::Decl( 415 | Decl::Interface("ITest".into(), vec![], vec![ Member::Method(MemberModifiers::new(&[MemberModifier::Public]), 416 | "test".into(), FunctionDecl {params: vec![], body: None, usev: vec![], ret_ref: false, ret_ty: None}) 417 | ]) 418 | ))); 419 | } 420 | 421 | #[test] 422 | fn parse_class_use_trait_complex() { 423 | // http://php.net/manual/de/language.oop5.traits.php 424 | let code = "class Aliased_Talker { 425 | use A, B { 426 | B::smallTalk insteadof A; 427 | A::bigTalk insteadof B; 428 | B::bigTalk as talk; 429 | } 430 | }"; 431 | assert_eq!(process_stmt(code), st!(0,163, Stmt_::Decl(Decl::Class(ClassDecl { 432 | cmod: ClassModifiers::none(), name: "Aliased_Talker".into(), base_class: None, implements: vec![], members: vec![ 433 | Member::TraitUse(vec![Path::identifier(false, "A".into()), Path::identifier(false, "B".into())], vec![ 434 | TraitUse::InsteadOf(Path::identifier(false, "B".into()), "smallTalk".into(), vec![Path::identifier(false, "A".into())]), 435 | TraitUse::InsteadOf(Path::identifier(false, "A".into()), "bigTalk".into(), vec![Path::identifier(false, "B".into())]), 436 | TraitUse::As(Some(Path::identifier(false, "B".into())), "bigTalk".into(), MemberModifiers::none(), Some("talk".into())), 437 | ]) 438 | ] 439 | })))); 440 | } 441 | 442 | #[test] 443 | fn parse_static_decl() { 444 | assert_eq!(process_stmt("static $t=true;"), st!(0,15, Stmt_::Decl(Decl::StaticVars(vec![ ("t".into(), Some(enb!(10,14, constant!(true)))) ])))); 445 | } 446 | 447 | #[test] 448 | fn parse_global_decl() { 449 | assert_eq!(process_stmt("global $t;"), st!(0,10, Stmt_::Decl(Decl::GlobalVars(vec![ "t".into() ])))); 450 | } 451 | 452 | #[test] 453 | fn parse_stmt_closure_use() { 454 | assert_eq!(process_stmt("return function () use ($t) {};"), st!(0,31, Stmt_::Return(Some(eb!(7,30, Expr_::Function(FunctionDecl { 455 | params: vec![], body: Some(Block(vec![])), usev: vec![(false, "t".into())], ret_ref: false, ret_ty: None, 456 | })))))); 457 | } 458 | 459 | #[test] 460 | fn parse_namespace_decl() { 461 | assert_eq!(process_stmt("namespace Foo\\Bar;"), st!(0,17, Stmt_::Decl(Decl::Namespace(Path::ns_identifier(false, "Foo".into(), "Bar".into()))))); 462 | } 463 | 464 | #[test] 465 | fn parse_label_decl() { 466 | assert_eq!(process_stmt("test_abc:"), st!(0, 9, Stmt_::Decl(Decl::Label("test_abc".into())))); 467 | } 468 | -------------------------------------------------------------------------------- /src/printer.rs: -------------------------------------------------------------------------------- 1 | /// ! a pretty-ish printer 2 | use std::fmt::{self, Write}; 3 | use std::borrow::Borrow; 4 | use ast::{Block, ClassModifiers, ClassModifier, Decl, FunctionDecl, Stmt, Stmt_, Expr, 5 | Expr_, IncludeTy, Op, Path, UnaryOp, Ty, NullableTy, TraitUse, UseClause}; 6 | use ast::{Member, MemberModifiers, MemberModifier, Variable}; 7 | 8 | pub struct PrettyPrinter { 9 | indentation: usize, 10 | target: W, 11 | } 12 | 13 | impl PrettyPrinter { 14 | pub fn new(target: W) -> PrettyPrinter { 15 | PrettyPrinter { 16 | indentation: 0, 17 | target: target, 18 | } 19 | } 20 | 21 | pub fn print_statements(target: W, stmts: Vec) -> fmt::Result { 22 | let mut printer = PrettyPrinter::new(target); 23 | for stmt in stmts { 24 | try!(printer.print_statement(&stmt)); 25 | } 26 | Ok(()) 27 | } 28 | 29 | fn writeln(&mut self, text: &str) -> fmt::Result { 30 | try!(self.write_indented(text)); 31 | write!(self.target, "\n") 32 | } 33 | 34 | fn write_indented(&mut self, text: &str) -> fmt::Result { 35 | for _ in 0..self.indentation { 36 | try!(write!(self.target, " ")); 37 | } 38 | write!(self.target, "{}", text) 39 | } 40 | 41 | fn write(&mut self, text: &str) -> fmt::Result { 42 | write!(self.target, "{}", text) 43 | } 44 | 45 | fn print_argument_list(&mut self, args: &[Expr]) -> fmt::Result { 46 | for (i, arg) in args.iter().enumerate() { 47 | if i > 0 { 48 | try!(self.write(", ")); 49 | } 50 | try!(self.print_expression(arg)); 51 | } 52 | Ok(()) 53 | } 54 | 55 | fn print_member_body(&mut self, members: &[Member]) -> fmt::Result { 56 | try!(self.write(" {\n")); 57 | self.indentation += 1; 58 | for member in members { 59 | try!(self.print_member(member)); 60 | try!(self.write("\n")); 61 | } 62 | self.indentation -= 1; 63 | self.write_indented("}") 64 | } 65 | 66 | fn print_decl(&mut self, decl: &Decl) -> fmt::Result { 67 | match *decl { 68 | Decl::Namespace(ref path) => { 69 | try!(self.write_indented("namespace ")); 70 | try!(write!(self.target, "{}", path)); 71 | self.write(";\n") 72 | } 73 | Decl::GlobalFunction(ref name, ref decl) => { 74 | try!(self.write_indented("")); 75 | self.print_function(decl, Some(name.borrow())) 76 | } 77 | Decl::Class(ref classdecl) => { 78 | try!(self.write_indented("")); 79 | try!(write!(self.target, "{}", classdecl.cmod)); 80 | try!(self.write("class ")); 81 | try!(self.write(classdecl.name.borrow())); 82 | try!(self.write(" ")); 83 | if let Some(ref base_class) = classdecl.base_class { 84 | try!(self.write("extends ")); 85 | try!(write!(self.target, "{} ", base_class)); 86 | } 87 | if !classdecl.implements.is_empty() { 88 | try!(self.write("implements ")); 89 | for (i, iface) in classdecl.implements.iter().enumerate() { 90 | if i > 0 { 91 | try!(self.write(", ")); 92 | } 93 | try!(write!(self.target, "{}", iface)); 94 | } 95 | } 96 | self.print_member_body(&classdecl.members) 97 | } 98 | Decl::Interface(ref name, ref implements, ref members) => { 99 | try!(self.write_indented("interface ")); 100 | try!(self.write(name.borrow())); 101 | if !implements.is_empty() { 102 | try!(self.write(" extends ")); 103 | for (i, iface) in implements.iter().enumerate() { 104 | if i > 0 { 105 | try!(self.write(", ")); 106 | } 107 | try!(write!(self.target, "{}", iface)); 108 | } 109 | } 110 | self.print_member_body(members) 111 | } 112 | Decl::Trait(ref name, ref members) => { 113 | try!(self.write_indented("trait ")); 114 | try!(self.write(name.borrow())); 115 | self.print_member_body(members) 116 | } 117 | Decl::StaticVars(ref vars) => { 118 | try!(self.write_indented("static ")); 119 | for (i, &(ref varname, ref value)) in vars.iter().enumerate() { 120 | if i > 0 { 121 | try!(self.write(", ")); 122 | } 123 | try!(self.write("$")); 124 | try!(self.write(varname.borrow())); 125 | if let Some(ref value) = *value { 126 | try!(self.write("=")); 127 | try!(self.print_expression(value)); 128 | } 129 | } 130 | self.write(";\n") 131 | } 132 | Decl::GlobalVars(ref vars) => { 133 | try!(self.write_indented("global ")); 134 | for (i, varname) in vars.iter().enumerate() { 135 | if i > 0 { 136 | try!(self.write(", ")); 137 | } 138 | try!(self.write("$")); 139 | try!(self.print_variable(varname)); 140 | } 141 | self.write(";\n") 142 | }, 143 | Decl::Label(ref label) => { 144 | try!(self.write(label.borrow())); 145 | self.write(":\n") 146 | } 147 | } 148 | } 149 | 150 | fn print_member(&mut self, member: &Member) -> fmt::Result { 151 | try!(self.write_indented("")); 152 | match *member { 153 | Member::Constant(ref modifiers, ref name, ref value) => { 154 | try!(write!(self.target, "{} const ", modifiers)); 155 | try!(self.write(name.borrow())); 156 | try!(self.write("=")); 157 | try!(self.print_expression(value)); 158 | self.write(";") 159 | } 160 | Member::Property(ref modifiers, ref name, ref value) => { 161 | try!(write!(self.target, "{} $", modifiers)); 162 | try!(self.write(name.borrow())); 163 | if let Some(ref default) = *value { 164 | try!(self.write("=")); 165 | try!(self.print_expression(default)); 166 | } 167 | self.write(";") 168 | } 169 | Member::Method(ref modifiers, ref name, ref decl) => { 170 | try!(write!(self.target, "{} ", modifiers)); 171 | self.print_function(decl, Some(name.borrow())) 172 | } 173 | Member::TraitUse(ref names, ref uses) => { 174 | try!(self.write("use ")); 175 | for (i, name) in names.iter().enumerate() { 176 | if i > 0 { 177 | try!(self.write(", ")); 178 | } 179 | try!(write!(self.target, "{}", name)); 180 | } 181 | if !uses.is_empty() { 182 | try!(self.write("{\n")); 183 | self.indentation += 1; 184 | for use_ in uses { 185 | try!(self.write_indented("")); 186 | match *use_ { 187 | TraitUse::As(ref path, ref method, ref modifiers, ref alias) => { 188 | if let Some(ref path) = *path { 189 | try!(write!(self.target, "{}::", path)); 190 | } 191 | try!(self.write(method.borrow())); 192 | try!(self.write(" as ")); 193 | try!(write!(self.target, "{} ", modifiers)); 194 | if let Some(ref alias) = *alias { 195 | try!(self.write(alias.borrow())); 196 | } 197 | } 198 | TraitUse::InsteadOf(ref path, ref method, ref names) => { 199 | try!(write!(self.target, "{}::", path)); 200 | try!(self.write(method.borrow())); 201 | try!(self.write(" insteadof ")); 202 | for (i, name) in names.iter().enumerate() { 203 | if i > 0 { 204 | try!(self.write(", ")); 205 | } 206 | try!(write!(self.target, "{}", name)); 207 | } 208 | } 209 | } 210 | try!(self.write(";\n")); 211 | } 212 | self.indentation -= 1; 213 | try!(self.write_indented("}\n")); 214 | } else { 215 | try!(self.write(";\n")); 216 | } 217 | Ok(()) 218 | } 219 | } 220 | } 221 | 222 | fn print_block(&mut self, block: &Block) -> fmt::Result { 223 | try!(self.write("{\n")); 224 | self.indentation += 1; 225 | for stmt in &block.0 { 226 | try!(self.print_statement(stmt)); 227 | } 228 | self.indentation -= 1; 229 | self.write_indented("}\n") 230 | } 231 | 232 | fn print_function(&mut self, func: &FunctionDecl, name: Option<&str>) -> fmt::Result { 233 | try!(self.write("function ")); 234 | if func.ret_ref { 235 | try!(self.write("&")); 236 | } 237 | if let Some(name) = name { 238 | try!(self.write(name)); 239 | } 240 | try!(self.write("(")); 241 | for (i, param) in func.params.iter().enumerate() { 242 | if i > 0 { 243 | try!(self.write(",")); 244 | } 245 | if let Some(ref ty) = param.ty { 246 | try!(write!(self.target, "{} ", ty)); 247 | } 248 | if param.as_ref { 249 | try!(self.write("&")); 250 | } 251 | if param.variadic { 252 | try!(self.write("...")); 253 | } 254 | try!(self.write("$")); 255 | try!(self.write(param.name.borrow())); 256 | if let Some(ref default) = param.default { 257 | try!(self.write("=")); 258 | try!(self.print_expression(default)); 259 | } 260 | } 261 | try!(self.write(") ")); 262 | if !func.usev.is_empty() { 263 | try!(self.write("use (")); 264 | for (i, &(ref by_ref, ref var)) in func.usev.iter().enumerate() { 265 | if i > 0 { 266 | try!(self.write(", ")); 267 | } 268 | if *by_ref { 269 | try!(self.write("&")); 270 | } 271 | try!(self.write("$")); 272 | try!(self.write(var.borrow())); 273 | } 274 | try!(self.write(") ")); 275 | } 276 | if let Some(ref body) = func.body { 277 | self.print_block(body) 278 | } else { 279 | self.write(";") 280 | } 281 | } 282 | 283 | fn print_use(&mut self, clauses: &[UseClause]) -> fmt::Result { 284 | for clause in clauses { 285 | try!(self.write_indented("use ")); 286 | match *clause { 287 | UseClause::QualifiedName(ref path, ref alias) => { 288 | try!(write!(self.target, "{}", path)); 289 | if let Some(ref alias) = *alias { 290 | try!(self.write(" as ")); 291 | try!(self.write(alias.borrow())); 292 | } 293 | } 294 | } 295 | try!(self.write(";\n")); 296 | } 297 | Ok(()) 298 | } 299 | 300 | fn print_statement(&mut self, stmt: &Stmt) -> fmt::Result { 301 | match stmt.0 { 302 | Stmt_::None => { 303 | self.writeln(";") 304 | } 305 | Stmt_::Block(ref block) => { 306 | try!(self.writeln("{\n")); 307 | self.indentation += 1; 308 | for bstmt in &block.0 { 309 | try!(self.print_statement(bstmt)); 310 | } 311 | self.indentation -= 1; 312 | self.writeln("}\n") 313 | } 314 | Stmt_::Decl(ref decl) => self.print_decl(decl), 315 | Stmt_::Use(ref clauses) => self.print_use(clauses), 316 | Stmt_::Expr(ref expr) => { 317 | try!(self.write_indented("")); 318 | try!(self.print_expression(expr)); 319 | self.write(";\n") 320 | } 321 | Stmt_::Echo(ref args) => { 322 | try!(self.write_indented("echo ")); 323 | try!(self.print_argument_list(args)); 324 | self.write(";\n") 325 | } 326 | Stmt_::Return(ref arg) | 327 | Stmt_::Break(ref arg) | 328 | Stmt_::Continue(ref arg) => { 329 | try!(self.write_indented(match stmt.0 { 330 | Stmt_::Return(_) => "return", 331 | Stmt_::Break(_) => "break", 332 | Stmt_::Continue(_) => "continue", 333 | _ => unreachable!(), 334 | })); 335 | if let Some(ref arg) = *arg { 336 | try!(self.write(" ")); 337 | try!(self.print_expression(arg)) 338 | } 339 | self.write(";\n") 340 | } 341 | Stmt_::Unset(ref args) => { 342 | try!(self.write_indented("unset(")); 343 | try!(self.print_argument_list(args)); 344 | self.write(");\n") 345 | } 346 | Stmt_::If(ref cond, ref bl, ref else_bl) => { 347 | try!(self.write_indented("if (")); 348 | try!(self.print_expression(cond)); 349 | try!(self.write(") ")); 350 | try!(self.print_block(bl)); 351 | if !else_bl.is_empty() { 352 | try!(self.write_indented("else ")); 353 | try!(self.print_block(else_bl)); 354 | } 355 | Ok(()) 356 | } 357 | Stmt_::While(ref cond, ref bl) => { 358 | try!(self.write_indented("while (")); 359 | try!(self.print_expression(cond)); 360 | try!(self.write(") ")); 361 | self.print_block(bl) 362 | } 363 | Stmt_::DoWhile(ref bl, ref cond) => { 364 | try!(self.write_indented("do ")); 365 | try!(self.print_block(bl)); 366 | try!(self.write("while (")); 367 | try!(self.print_expression(cond)); 368 | self.write(");\n") 369 | } 370 | Stmt_::For(ref init, ref looper, ref cond, ref bl) => { 371 | fn print_for_exprs(printer: &mut PrettyPrinter, exprs: &[Expr]) -> fmt::Result { 372 | for (i, expr) in exprs.iter().enumerate() { 373 | if i > 0 { 374 | try!(printer.write(",")); 375 | } 376 | try!(printer.print_expression(expr)); 377 | } 378 | Ok(()) 379 | } 380 | try!(self.write_indented("for (")); 381 | try!(print_for_exprs(self, init)); 382 | try!(self.write("; ")); 383 | try!(print_for_exprs(self, looper)); 384 | try!(self.write(";")); 385 | try!(print_for_exprs(self, cond)); 386 | try!(self.write(") ")); 387 | self.print_block(bl) 388 | } 389 | Stmt_::ForEach(ref base, ref k, ref v, ref bl) => { 390 | try!(self.write_indented("foreach (")); 391 | try!(self.print_expression(base)); 392 | try!(self.write(" as ")); 393 | if let Some(ref k) = *k { 394 | try!(self.print_expression(k)); 395 | try!(self.write(" => ")); 396 | } 397 | try!(self.print_expression(v)); 398 | try!(self.write(") ")); 399 | self.print_block(bl) 400 | } 401 | Stmt_::Try(ref bl, ref catch, ref finally) => { 402 | try!(self.write_indented("try ")); 403 | try!(self.print_block(bl)); 404 | for clause in catch { 405 | try!(self.write("catch (")); 406 | try!(write!(self.target, "{} ${}) ", clause.ty, clause.var.borrow() as &str)); 407 | try!(self.print_block(&clause.block)); 408 | } 409 | if let Some(ref finally_bl) = *finally { 410 | try!(self.write("finally ")); 411 | try!(self.print_block(finally_bl)); 412 | } 413 | Ok(()) 414 | } 415 | Stmt_::Throw(ref expr) => { 416 | try!(self.write_indented("throw ")); 417 | try!(self.print_expression(expr)); 418 | self.write(";\n") 419 | } 420 | Stmt_::Switch(ref base, ref cases) => { 421 | try!(self.write_indented("switch (")); 422 | try!(self.print_expression(base)); 423 | try!(self.write(") {\n")); 424 | self.indentation += 1; 425 | for case in cases { 426 | for cond in &case.conds { 427 | try!(self.write_indented("case ")); 428 | try!(self.print_expression(cond)); 429 | try!(self.write(":\n")); 430 | } 431 | if case.default { 432 | try!(self.write("default:\n")); 433 | } 434 | self.indentation += 1; 435 | for stmt in &case.block.0 { 436 | try!(self.print_statement(stmt)); 437 | } 438 | self.indentation -= 1; 439 | } 440 | self.indentation -= 1; 441 | self.write("}\n") 442 | }, 443 | Stmt_::Goto(ref label) => { 444 | try!(self.write_indented("goto ")); 445 | try!(self.write(label.borrow())); 446 | self.write(";\n") 447 | } 448 | } 449 | } 450 | 451 | fn print_opt_expression(&mut self, expr: &Option<&Expr>) -> fmt::Result { 452 | match *expr { 453 | None => Ok(()), 454 | Some(ref expr) => self.print_expression(expr), 455 | } 456 | } 457 | 458 | fn print_expression_curly_parens(&mut self, expr: &Expr, curly: bool) -> fmt::Result { 459 | let (parens_open, parens_close) = if curly { 460 | ("{", "}") 461 | } else { 462 | ("(", ")") 463 | }; 464 | 465 | let wrap_in_parens = match expr.0 { 466 | Expr_::BinaryOp(_, _, _) | 467 | Expr_::UnaryOp(_, _) | 468 | Expr_::ArrayIdx(_, _) | 469 | Expr_::ObjMember(_, _) | 470 | Expr_::StaticMember(_, _) | 471 | Expr_::Call(_, _) | 472 | Expr_::New(_, _) | 473 | Expr_::Assign(_, _) | 474 | Expr_::TernaryIf(_, _, _) => true, 475 | _ => false, 476 | }; 477 | 478 | if wrap_in_parens { 479 | try!(self.write(parens_open)); 480 | } 481 | try!(self.print_expression(expr)); 482 | if wrap_in_parens { 483 | try!(self.write(parens_close)); 484 | } 485 | Ok(()) 486 | } 487 | 488 | fn print_expression_parens(&mut self, expr: &Expr) -> fmt::Result { 489 | self.print_expression_curly_parens(expr, false) 490 | } 491 | 492 | fn print_variable(&mut self, v: &Variable) -> fmt::Result { 493 | match *v { 494 | Variable::Name(ref name) => write!(self.target, "${}", name.borrow() as &str), 495 | Variable::Fetch(ref expr) => { 496 | try!(self.write("$")); 497 | self.print_expression_curly_parens(expr, true) 498 | } 499 | } 500 | } 501 | 502 | pub fn print_expression(&mut self, expr: &Expr) -> fmt::Result { 503 | match expr.0 { 504 | Expr_::Path(ref path) => write!(self.target, "{}", path), 505 | Expr_::String(ref str_) => { 506 | let out_str = (str_.borrow() as &str).replace("\\'", "\\\\'").replace('\'', "\\'"); 507 | try!(write!(self.target, "'{}", out_str)); 508 | if out_str.ends_with('\\') { 509 | try!(self.write("\\")); 510 | } 511 | self.write("'") 512 | } 513 | Expr_::BinaryString(ref str_) => unimplemented!(), 514 | Expr_::Int(ref i) => write!(self.target, "{}", i), 515 | Expr_::Double(ref d) => write!(self.target, "{}", d), 516 | Expr_::Array(ref arr) => { 517 | try!(self.write("[")); 518 | for (i, &(ref k, ref v)) in arr.iter().enumerate() { 519 | if i > 0 { 520 | try!(self.write(", ")); 521 | } 522 | if let Some(ref k) = *k { 523 | try!(self.print_expression(k)); 524 | try!(self.write(" => ")); 525 | } 526 | try!(self.print_expression(v)); 527 | } 528 | self.write("]") 529 | }, 530 | Expr_::Variable(ref varname) => self.print_variable(varname), 531 | Expr_::Reference(ref ref_expr) => { 532 | try!(self.write("&")); 533 | self.print_expression(ref_expr) 534 | } 535 | Expr_::Isset(ref args) => { 536 | try!(self.write("isset(")); 537 | try!(self.print_argument_list(args)); 538 | self.write(")") 539 | } 540 | Expr_::Empty(ref arg) => { 541 | try!(self.write("empty(")); 542 | try!(self.print_expression(arg)); 543 | self.write(")") 544 | } 545 | Expr_::Exit(ref arg) => { 546 | try!(self.write("exit(")); 547 | try!(self.print_opt_expression(&arg.as_ref().map(|x| &**x))); 548 | self.write(")") 549 | } 550 | Expr_::Clone(ref arg) => { 551 | try!(self.write("clone ")); 552 | self.print_expression(arg) 553 | } 554 | Expr_::Include(ref ty, ref arg) => { 555 | try!(self.write(match *ty { 556 | IncludeTy::Require => "require", 557 | IncludeTy::RequireOnce => "require_once", 558 | IncludeTy::Include => "include", 559 | IncludeTy::IncludeOnce => "include_once", 560 | })); 561 | try!(self.write(" ")); 562 | self.print_expression(arg) 563 | } 564 | Expr_::ArrayIdx(ref base, ref idxs) => { 565 | try!(self.print_expression_parens(base)); 566 | for idx in idxs { 567 | try!(self.write("[")); 568 | try!(self.print_opt_expression(&idx.as_ref())); 569 | try!(self.write("]")); 570 | } 571 | Ok(()) 572 | } 573 | Expr_::ObjMember(ref base, ref idxs) => { 574 | try!(self.print_expression_parens(base)); 575 | for idx in idxs { 576 | try!(self.write("->")); 577 | try!(self.print_expression_curly_parens(idx, true)); 578 | } 579 | Ok(()) 580 | } 581 | Expr_::StaticMember(ref base, ref idxs) => { 582 | try!(self.print_expression_parens(base)); 583 | for idx in idxs { 584 | try!(self.write("::")); 585 | try!(self.print_expression_parens(idx)); 586 | } 587 | Ok(()) 588 | } 589 | Expr_::Call(ref target, ref args) => { 590 | try!(self.print_expression(target)); 591 | try!(self.write("(")); 592 | try!(self.print_argument_list(args)); 593 | self.write(")") 594 | } 595 | Expr_::New(ref target, ref args) => { 596 | try!(self.write("new ")); 597 | try!(self.print_expression(target)); 598 | try!(self.write("(")); 599 | try!(self.print_argument_list(args)); 600 | self.write(")") 601 | }, 602 | Expr_::Unpack(ref arg) => { 603 | try!(self.write("...")); 604 | self.print_expression(arg) 605 | }, 606 | Expr_::UnaryOp(ref operator, ref operand) => { 607 | let (op, can_have_parens) = match *operator { 608 | UnaryOp::Positive => ("+", true), 609 | UnaryOp::Negative => ("-", true), 610 | UnaryOp::Not => ("!", true), 611 | UnaryOp::PreInc => ("++", false), 612 | UnaryOp::PreDec => ("--", false), 613 | UnaryOp::PostInc | UnaryOp::PostDec => ("", false), 614 | UnaryOp::BitwiseNot => ("~", true), 615 | UnaryOp::SilenceErrors => ("@", true), 616 | }; 617 | try!(self.write(op)); 618 | if can_have_parens { 619 | try!(self.print_expression_parens(operand)); 620 | } else { 621 | try!(self.print_expression(operand)); 622 | } 623 | match *operator { 624 | UnaryOp::PostInc => try!(self.write("++")), 625 | UnaryOp::PostDec => try!(self.write("--")), 626 | _ => (), 627 | } 628 | Ok(()) 629 | } 630 | Expr_::BinaryOp(ref operator, ref op1, ref op2) => { 631 | try!(self.print_expression_parens(op1)); 632 | try!(write!(self.target, "{}", operator)); 633 | self.print_expression_parens(op2) 634 | } 635 | Expr_::InstanceOf(ref op1, ref op2) => { 636 | try!(self.print_expression(op1)); 637 | try!(self.write(" instanceof ")); 638 | self.print_expression(op2) 639 | } 640 | Expr_::Cast(ref ty, ref op) => { 641 | try!(self.write("(")); 642 | try!(self.write(match *ty { 643 | Ty::Array => "array", 644 | Ty::Callable => "callable", 645 | Ty::Bool => "bool", 646 | Ty::Float => "float", 647 | Ty::Int => "int", 648 | Ty::Double => "double", 649 | Ty::String => "string", 650 | Ty::Object(None) => "object", 651 | _ => unimplemented!(), 652 | })); 653 | try!(self.write(")(")); 654 | try!(self.print_expression(op)); 655 | self.write(")") 656 | } 657 | Expr_::Yield(ref expr) => { 658 | try!(self.write("yield ")); 659 | self.print_opt_expression(&expr.as_ref().map(|x| &**x)) 660 | } 661 | Expr_::Function(ref decl) => self.print_function(decl, None), 662 | Expr_::Assign(ref target, ref value) => { 663 | try!(self.print_expression(target)); 664 | try!(self.write("=")); 665 | self.print_expression(value) 666 | } 667 | Expr_::CompoundAssign(ref target, ref op, ref value) => { 668 | try!(self.print_expression(target)); 669 | try!(write!(self.target, "{}=", op)); 670 | self.print_expression(value) 671 | } 672 | Expr_::AssignRef(ref target, ref value) => { 673 | try!(self.print_expression(target)); 674 | try!(self.write("=&")); 675 | self.print_expression(value) 676 | } 677 | Expr_::List(ref parts) => { 678 | try!(self.write("list(")); 679 | for (i, &(ref key, ref value)) in parts.iter().enumerate() { 680 | if i > 0 { 681 | try!(self.write(", ")); 682 | } 683 | if let Some(ref key) = *key { 684 | try!(self.print_expression(key)); 685 | try!(self.write(" => ")); 686 | } 687 | try!(self.print_expression(value)); 688 | } 689 | self.write(")") 690 | } 691 | Expr_::TernaryIf(ref base, ref case_true, ref case_else) => { 692 | try!(self.print_expression_parens(base)); 693 | try!(self.write("?")); 694 | if let Some(ref expr_) = *case_true { 695 | try!(self.print_expression_parens(&**expr_)); 696 | } 697 | try!(self.write(":")); 698 | self.print_expression_parens(case_else) 699 | } 700 | } 701 | } 702 | } 703 | 704 | impl fmt::Display for Op { 705 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 706 | write!(f, "{}", match *self { 707 | Op::Concat => ".", 708 | Op::Add => "+", 709 | Op::Sub => "-", 710 | Op::Mul => "*", 711 | Op::Div => "/", 712 | Op::Pow => "**", 713 | Op::Mod => "%", 714 | Op::Or => "||", 715 | Op::And => "&&", 716 | Op::Identical => "===", 717 | Op::NotIdentical => "!==", 718 | Op::Eq => "==", 719 | Op::Neq => "!=", 720 | Op::Lt => "<", 721 | Op::Gt => ">", 722 | Op::Le => "<=", 723 | Op::Ge => ">=", 724 | Op::BitwiseAnd => "&", 725 | Op::BitwiseInclOr => "|", 726 | Op::BitwiseExclOr => "^", 727 | Op::Spaceship => "<=>", 728 | Op::Sl => "<<", 729 | Op::Sr => ">>", 730 | }) 731 | } 732 | } 733 | 734 | impl fmt::Display for ClassModifiers { 735 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 736 | if self.has(ClassModifier::Abstract) { 737 | try!(write!(f, "abstract ")); 738 | } 739 | if self.has(ClassModifier::Final) { 740 | try!(write!(f, "final ")); 741 | } 742 | Ok(()) 743 | } 744 | } 745 | 746 | impl fmt::Display for MemberModifiers { 747 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 748 | if self.has(MemberModifier::Public) { 749 | try!(write!(f, "public ")); 750 | } 751 | if self.has(MemberModifier::Protected) { 752 | try!(write!(f, "protected ")); 753 | } 754 | if self.has(MemberModifier::Private) { 755 | try!(write!(f, "private ")); 756 | } 757 | if self.has(MemberModifier::Static) { 758 | try!(write!(f, "static ")); 759 | } 760 | if self.has(MemberModifier::Abstract) { 761 | try!(write!(f, "abstract ")); 762 | } 763 | if self.has(MemberModifier::Final) { 764 | try!(write!(f, "final ")); 765 | } 766 | Ok(()) 767 | } 768 | } 769 | 770 | impl fmt::Display for Path { 771 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 772 | if self.is_absolute { 773 | try!(write!(f, "\\")); 774 | } 775 | if let Some(ref namespace) = self.namespace { 776 | try!(write!(f, "{}\\", namespace.borrow() as &str)); 777 | } 778 | write!(f, "{}", self.identifier.borrow() as &str) 779 | } 780 | } 781 | 782 | 783 | impl fmt::Display for Ty { 784 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 785 | let ty = match *self { 786 | Ty::Array => "array", 787 | Ty::Callable => "callable", 788 | Ty::Bool => "bool", 789 | Ty::Float => "float", 790 | Ty::Int => "int", 791 | Ty::Double => "double", 792 | Ty::String => "string", 793 | Ty::Object(None) => "object", 794 | Ty::Object(Some(ref path)) => { 795 | try!(write!(f, "{}", path)); 796 | return Ok(()); 797 | } 798 | }; 799 | write!(f, "{}", ty) 800 | } 801 | } 802 | 803 | impl fmt::Display for NullableTy { 804 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 805 | let ty = match *self { 806 | NullableTy::Nullable(ref ty) => { 807 | try!(write!(f, "?")); 808 | ty 809 | }, 810 | NullableTy::NonNullable(ref ty) => ty, 811 | }; 812 | write!(f, "{}", ty) 813 | } 814 | } 815 | -------------------------------------------------------------------------------- /src/tokenizer.rs: -------------------------------------------------------------------------------- 1 | //! tokenizer based on [Zend LS](https://github.com/php/php-src/blob/ebb99a1a3a2ec9216d95c63b267ae0f66074f4de/Zend/zend_language_scanner.l) 2 | //! since the reference doesn't seem very correct in some cases 3 | use std::str::{self, FromStr}; 4 | use std::rc::Rc; 5 | use std::mem; 6 | 7 | use interner::Interner; 8 | pub use tokens::{Span, Token, TokenSpan, SyntaxError}; 9 | 10 | pub trait AsSpanPos { 11 | fn as_span_pos(&self) -> u32; 12 | } 13 | 14 | impl AsSpanPos for u32 { 15 | fn as_span_pos(&self) -> u32 { 16 | *self 17 | } 18 | } 19 | 20 | impl AsSpanPos for usize { 21 | fn as_span_pos(&self) -> u32 { 22 | *self as u32 23 | } 24 | } 25 | 26 | #[inline] 27 | pub fn mk_span(start: A, end: B) -> Span { 28 | Span { 29 | start: start.as_span_pos(), 30 | end: end.as_span_pos(), 31 | ..Span::new() 32 | } 33 | } 34 | 35 | #[derive(Clone, Debug, PartialEq)] 36 | enum State { 37 | /// dummy state to force an ending after a `}` in InScripting 38 | DoNothing, 39 | Initial, 40 | InScripting, 41 | LookingForProperty, 42 | EmitQueue, 43 | Done, 44 | } 45 | 46 | pub struct Tokenizer<'a> { 47 | code: &'a str, 48 | /// whether to support short tags, equal to CG(short_tags) 49 | short_tags: bool, 50 | pub state: TokenizerState, 51 | queue: Vec, 52 | interner: Interner, 53 | } 54 | 55 | #[derive(Clone, Debug)] 56 | pub struct LineMap { 57 | data: Vec, 58 | end_pos: usize, 59 | } 60 | 61 | impl LineMap { 62 | fn new() -> LineMap { 63 | LineMap { 64 | data: vec![0], 65 | end_pos: 0, 66 | } 67 | } 68 | 69 | pub fn line_from_position(&self, pos: usize) -> usize { 70 | let mut b = self.data.len(); 71 | let mut a = 0; 72 | while b - a > 1 { 73 | let mid = (a + b) / 2; 74 | if self.data[mid] as usize > pos { 75 | b = mid; 76 | } else { 77 | a = mid; 78 | } 79 | } 80 | a 81 | } 82 | 83 | pub fn line(&self, line: usize) -> (u32, u32) { 84 | let end = if line + 1 < self.data.len() { 85 | self.data[line + 1] 86 | } else { 87 | self.end_pos as u32 88 | }; 89 | (self.data[line], end) 90 | } 91 | 92 | #[inline] 93 | pub fn push(&mut self, pos: u32) { 94 | self.data.push(pos); 95 | } 96 | } 97 | 98 | /// stuff that's also important for the parser 99 | #[derive(Debug, Clone)] 100 | pub struct TokenizerExternalState { 101 | /// contains the first byte-position of every line 102 | pub line_map: LineMap, 103 | } 104 | 105 | impl TokenizerExternalState { 106 | fn new() -> TokenizerExternalState { 107 | TokenizerExternalState { line_map: LineMap::new() } 108 | } 109 | } 110 | 111 | #[derive(Debug, Clone)] 112 | pub struct TokenizerState { 113 | src_pos: usize, 114 | state: State, 115 | state_stack: Vec, 116 | line_num: usize, 117 | pub external: TokenizerExternalState, 118 | restart: bool, 119 | } 120 | 121 | impl TokenizerState { 122 | #[inline] 123 | fn next_line(&mut self) { 124 | self.external.line_map.push(self.src_pos as u32); 125 | self.line_num += 1; 126 | } 127 | } 128 | 129 | /// Check if a string staarts with a token (case-insensitive) 130 | trait StrStartsWithCI { 131 | fn starts_with_ci(&self, s: &str) -> bool; 132 | } 133 | 134 | impl<'a> StrStartsWithCI for &'a str { 135 | #[inline] 136 | fn starts_with_ci(&self, s: &str) -> bool { 137 | self.chars().take(s.len()).collect::() == s.to_lowercase() 138 | } 139 | } 140 | 141 | macro_rules! state_helper { 142 | (push, $self_:expr, $new_state:ident) => ({ 143 | let old_state = mem::replace(&mut $self_.state.state, State::$new_state); 144 | $self_.state.state_stack.push(old_state); 145 | }); 146 | (pop, $self_:expr) => ($self_.pop_state()); 147 | } 148 | 149 | macro_rules! match_token_alias { 150 | // set state syntax 151 | ($self_:expr, $alias:expr, $token:ident, state=$new_state:ident) => (match_token_alias!($self_, $alias, $token, {$self_.state.state = State::$new_state})); 152 | // state unchanged 153 | ($self_:expr, $alias:expr, $token:ident) => (match_token_alias!($self_, $alias, $token, {})); 154 | ($self_:expr, $alias:expr, $token:ident, $bl:expr) => {{ 155 | let str_repr = $alias; 156 | if $self_.input().starts_with_ci(str_repr) { 157 | let old_pos = $self_.state.src_pos; 158 | $self_.advance_bytes(str_repr.len()); 159 | let span = mk_span(old_pos, $self_.state.src_pos); 160 | $bl; 161 | Ok(TokenSpan(Token::$token, span)) 162 | } else { Err(SyntaxError::None) } 163 | }}; 164 | } 165 | 166 | macro_rules! match_token { 167 | // set state syntax 168 | ($self_:expr, $token:ident, state=$new_state:ident) => {match_token_alias!($self_, Token::$token.repr(), $token, state=$new_state)}; 169 | // state unchanged 170 | ($self_:expr, $token:ident) => {match_token_alias!($self_, Token::$token.repr(), $token)}; 171 | // state push 172 | ($self_:expr, $token:ident, state<-$new_state:ident) => {{ 173 | let str_repr = Token::$token.repr(); 174 | if $self_.input().starts_with_ci(str_repr) { 175 | let old_pos = $self_.state.src_pos; 176 | $self_.advance_bytes(str_repr.len()); 177 | let span = mk_span(old_pos, $self_.state.src_pos); 178 | state_helper!(push, $self_, $new_state); 179 | Ok(TokenSpan(Token::$token, span)) 180 | } else { Err(SyntaxError::None) } 181 | }}; 182 | // state pop 183 | ($self_:expr, $token:ident, state->) => {{ 184 | let str_repr = Token::$token.repr(); 185 | if $self_.input().starts_with_ci(str_repr) { 186 | let old_pos = $self_.state.src_pos; 187 | $self_.advance_bytes(str_repr.len()); 188 | let span = mk_span(old_pos, $self_.state.src_pos); 189 | state_helper!(pop, $self_); 190 | Ok(TokenSpan(Token::$token, span)) 191 | } else { Err(SyntaxError::None) } 192 | }}; 193 | } 194 | 195 | /// return if a token was found, else continue in code flow 196 | macro_rules! ret_token { 197 | ($e:expr) => {match $e { 198 | Ok(x) => return Ok(x), 199 | Err(SyntaxError::None) => (), 200 | x => return x, 201 | } 202 | }; 203 | } 204 | 205 | /// helper to transform string-members into the appropriate token 206 | impl<'a> Tokenizer<'a> { 207 | pub fn new(src: &'a str) -> Tokenizer<'a> { 208 | let mut tokenizer = Tokenizer { 209 | code: src, 210 | state: TokenizerState { 211 | src_pos: 0, 212 | state: State::Initial, 213 | state_stack: vec![], 214 | line_num: 1, 215 | external: TokenizerExternalState::new(), 216 | restart: false, 217 | }, 218 | short_tags: true, 219 | queue: vec![], 220 | interner: Interner::new(), 221 | }; 222 | tokenizer.state.external.line_map.end_pos = src.len(); 223 | tokenizer 224 | } 225 | 226 | #[inline] 227 | pub fn into_external_state(self) -> (Interner, TokenizerExternalState) { 228 | (self.interner, self.state.external) 229 | } 230 | 231 | /// advances by n-positions where a position is a char-index 232 | /// this returns a substring of the skipped part (old[..n]) 233 | #[inline] 234 | fn advance_chars(&mut self, n: usize) -> &'a str { 235 | let end_byte_pos = match self.input().char_indices().nth(n) { 236 | Some((byte_pos, _)) => byte_pos, 237 | None => self.input().len(), 238 | }; 239 | self.advance_bytes(end_byte_pos) 240 | } 241 | 242 | #[inline] 243 | fn advance_bytes(&mut self, n: usize) -> &'a str { 244 | let ret = &self.input()[..n]; 245 | self.state.src_pos += n; 246 | ret 247 | } 248 | 249 | #[inline] 250 | fn input(&self) -> &'a str { 251 | &self.code[self.state.src_pos..] 252 | } 253 | 254 | #[inline] 255 | fn input_pos(&self) -> usize { 256 | self.state.src_pos 257 | } 258 | 259 | #[inline] 260 | fn pop_state(&mut self) { 261 | let new_state = match self.state.state_stack.pop() { 262 | Some(x) => x, 263 | None => unreachable!(), 264 | }; 265 | self.state.state = new_state; 266 | } 267 | 268 | /// handle whitespace 269 | fn whitespace(&mut self) { 270 | while !self.input().is_empty() { 271 | match self.input().chars().nth(0).unwrap() { 272 | ' ' | '\t' | '\r' => { 273 | self.advance_bytes(1); 274 | } 275 | '\n' => { 276 | self.advance_bytes(1); 277 | self.state.next_line(); 278 | } 279 | _ => break, 280 | } 281 | } 282 | } 283 | 284 | /// handle tabs and spaces 285 | fn whitespace_only(&mut self) { 286 | while !self.input().is_empty() { 287 | match self.input().chars().nth(0).unwrap() { 288 | ' ' | '\t' => { 289 | self.advance_bytes(1); 290 | } 291 | _ => break, 292 | } 293 | } 294 | } 295 | 296 | /// match exactly one newline 297 | fn newline(&mut self) -> bool { 298 | let amount = if self.input().starts_with("\r\n") { 299 | 2 300 | } else if self.input().starts_with('\n') || self.input().starts_with('\r') { 301 | 1 302 | } else { 303 | return false; 304 | }; 305 | self.advance_bytes(amount); 306 | self.state.next_line(); 307 | true 308 | } 309 | 310 | // re2c stuff (mostly prefixed with _) 311 | fn _label(&mut self) -> Option<(&'a str, Span)> { 312 | if self.input().is_empty() { 313 | return None; 314 | } 315 | if let Some('0'...'9') = self.input().chars().nth(0) { 316 | return None; 317 | } 318 | 319 | // \u{0080} U+0080 320 | // \u{00BF} U+00FF 321 | let end_pos = self.input().chars().position(|x| match x { 322 | 'a'...'z' | 323 | 'A'...'Z' | 324 | '\u{80}'...'\u{FF}' | 325 | '_' | 326 | '0'...'9' => false, 327 | _ => true, 328 | }); 329 | let end_pos = match end_pos { 330 | Some(0) => return None, 331 | Some(x) => x, 332 | None => self.input().chars().count(), 333 | }; 334 | let old_pos = self.input_pos(); 335 | let ret = self.advance_chars(end_pos); 336 | Some((ret, mk_span(old_pos, old_pos + end_pos))) 337 | } 338 | 339 | /// matches a token which consists of one character (simple) 340 | fn _token(&mut self) -> Result { 341 | if self.input().is_empty() { 342 | return Ok(TokenSpan(Token::End, mk_span(self.code.len(), self.code.len()))); 343 | } 344 | let tok = match self.input().chars().nth(0).unwrap() { 345 | ';' => Token::SemiColon, 346 | ':' => Token::Colon, 347 | ',' => Token::Comma, 348 | '.' => Token::Dot, 349 | '[' => Token::SquareBracketOpen, 350 | ']' => Token::SquareBracketClose, 351 | '(' => Token::ParenthesesOpen, 352 | ')' => Token::ParenthesesClose, 353 | '|' => Token::BwOr, 354 | '^' => Token::BwXor, 355 | '&' => Token::Ampersand, 356 | '+' => Token::Plus, 357 | '-' => Token::Minus, 358 | '/' => Token::Div, 359 | '*' => Token::Mul, 360 | '=' => Token::Equal, 361 | '%' => Token::Mod, 362 | '!' => Token::BoolNot, 363 | '~' => Token::BwNot, 364 | '$' => Token::Dollar, 365 | '<' => Token::Lt, 366 | '>' => Token::Gt, 367 | '?' => Token::QuestionMark, 368 | '@' => Token::Silence, 369 | _ => return Err(SyntaxError::None), 370 | }; 371 | self.advance_chars(1); 372 | Ok(TokenSpan(tok, mk_span(self.input_pos() - 1, self.input_pos()))) 373 | } 374 | 375 | /// match a double number (or a long number/octal number) 376 | fn _onum_dnum_lnum(&mut self) -> Result { 377 | // valid inputs for double: "long.", ".long", "long.long" 378 | if self.input().is_empty() { 379 | return Ok(TokenSpan(Token::End, mk_span(self.code.len(), self.code.len()))); 380 | } 381 | let end_pos = match self.input().chars().position(|x| x < '0' || x > '9') { 382 | None => self.input().len(), 383 | Some(end_pos) => end_pos, 384 | }; 385 | let old_pos = self.input_pos(); 386 | let str_ = self.advance_chars(end_pos).to_owned(); 387 | if !self.input().starts_with('.') { 388 | { 389 | let span = mk_span(old_pos, self.input_pos()); 390 | // long sub-match 391 | if end_pos != 0 { 392 | if str_.starts_with('0') && str_.len() > 1 { 393 | return Ok(TokenSpan(Token::Int(i64::from_str_radix(&str_[1..], 8).unwrap()), span)); 394 | } else { 395 | return Ok(TokenSpan(Token::Int(i64::from_str_radix(&str_, 10).unwrap()), span)); 396 | } 397 | } 398 | } 399 | self.state.src_pos = old_pos; 400 | return Err(SyntaxError::None); 401 | } 402 | let mut str_ = str_ + self.advance_chars(1); 403 | // at this point we either matched "long." or just "." 404 | let end_pos2 = match self.input().chars().position(|x| x < '0' || x > '9') { 405 | None => self.input().chars().count(), 406 | Some(0) if str_.len() == 1 => { 407 | self.state.src_pos = old_pos; 408 | return Err(SyntaxError::None); 409 | } 410 | Some(end_pos) => end_pos, 411 | }; 412 | let span = mk_span(self.input_pos(), end_pos); 413 | str_.push_str(self.advance_chars(end_pos2)); 414 | Ok(TokenSpan(Token::Double(f64::from_str(&str_).unwrap()), span)) 415 | } 416 | 417 | /// match a hex number 418 | fn _hnum(&mut self) -> Result { 419 | if self.input().len() < 3 || !self.input().starts_with("0x") { 420 | return Err(SyntaxError::None); 421 | } 422 | self.advance_bytes(2); 423 | let end_pos = match self.input().chars().position(|x| match x { 424 | 'a'...'f' | 'A'...'F' | '0'...'9' => false, 425 | _ => true, 426 | }) { 427 | None => self.input().chars().count(), 428 | Some(0) => return Err(SyntaxError::None), 429 | Some(end_pos) => end_pos, 430 | }; 431 | let span = mk_span(self.input_pos() - 2, end_pos); 432 | let str_ = self.advance_chars(end_pos); 433 | Ok(TokenSpan(Token::Int(i64::from_str_radix(str_, 16).unwrap()), span)) 434 | } 435 | 436 | /// match a binary number 437 | fn _bnum(&mut self) -> Result { 438 | if self.input().len() < 3 || !self.input().starts_with("0b") { 439 | return Err(SyntaxError::None); 440 | } 441 | self.advance_bytes(2); 442 | let end_pos = match self.input().chars().position(|x| x != '0' && x != '1') { 443 | None => self.input().chars().count(), 444 | Some(0) => return Err(SyntaxError::None), 445 | Some(end_pos) => end_pos, 446 | }; 447 | let span = mk_span(self.input_pos() - 2, end_pos); 448 | let str_ = self.advance_chars(end_pos); 449 | Ok(TokenSpan(Token::Int(i64::from_str_radix(str_, 2).unwrap()), span)) 450 | } 451 | 452 | /// matches ${label} so any valid variable_name 453 | fn match_variable(&mut self) -> Result { 454 | if self.input().len() < 2 || !self.input().starts_with('$') { 455 | return Err(SyntaxError::None); 456 | } 457 | let bak_pos = self.input_pos(); 458 | self.advance_bytes(1); 459 | match self._label().map(|(x, span)| (self.interner.intern(x), span)) { 460 | Some((name, mut span)) => { 461 | span.start = bak_pos as u32; 462 | Ok(TokenSpan(Token::Variable(name), span)) 463 | } 464 | None => { 465 | self.state.src_pos = bak_pos; 466 | Err(SyntaxError::None) 467 | } 468 | } 469 | } 470 | 471 | fn str_escape(&mut self, bytes: &mut Vec, sq: bool) -> Result<(), SyntaxError> { 472 | let chr = match (self.input().chars().nth(1), sq) { 473 | (Some('n'), false) => Some(b'\n'), 474 | (Some('r'), false) => Some(b'\r'), 475 | (Some('t'), false) => Some(b'\t'), 476 | (Some('f'), false) => Some(b'\x0C'), 477 | (Some('v'), false) => Some(b'\x0B'), 478 | (Some('e'), false) => unimplemented!(), 479 | (Some('"'), false) => Some(b'"'), 480 | (Some('\''), true) => Some(b'\''), 481 | (Some('\\'), _) => Some(b'\\'), 482 | (Some('$'), false) => Some(b'$'), 483 | (Some(x @ 'x'), false) | 484 | (Some(x @ 'X'), false) => { 485 | // read up to 2 hex characters, on 0 add \x to bytes 486 | let mut end_idx = 0; 487 | for i in 2..4 { 488 | match self.input().chars().nth(i) { 489 | Some('a'...'z') | Some('A'...'Z') | Some('0'...'9') => end_idx = i, 490 | _ => break, 491 | } 492 | } 493 | if end_idx == 0 { 494 | bytes.push(b'\\'); 495 | Some(x as u8) 496 | } else { 497 | let start_pos = self.input().char_indices().nth(2).unwrap().0; 498 | let end_pos = self.input().char_indices().nth(end_idx + 1).unwrap().0; 499 | let byte = u8::from_str_radix(&self.input()[start_pos..end_pos], 16).unwrap(); 500 | bytes.push(byte); 501 | self.advance_chars(1 + end_idx); 502 | return Ok(()); 503 | } 504 | } 505 | (Some('u'), false) => unimplemented!(), 506 | (Some(x), _) => { 507 | bytes.push(b'\\'); 508 | let mut tmp_str = String::new(); 509 | tmp_str.push(x); 510 | bytes.extend(tmp_str.as_bytes()); 511 | None 512 | } 513 | _ => return Err(SyntaxError::Unterminated("string escape sequence", mk_span(self.input_pos(), self.input_pos() + 1))), 514 | }; 515 | self.advance_chars(2); 516 | if let Some(chr) = chr { 517 | bytes.push(chr); 518 | } 519 | Ok(()) 520 | } 521 | 522 | #[inline] 523 | fn return_tokens_from_parts(&mut self, 524 | start_tok: TokenSpan, 525 | end_tok: TokenSpan, 526 | bytes: Vec, 527 | parts: Vec) 528 | -> TokenSpan { 529 | let (start_pos, end_pos) = (start_tok.1.start, end_tok.1.end); 530 | self.queue.push(end_tok); 531 | if !bytes.is_empty() { 532 | let ret_token = match String::from_utf8(bytes) { 533 | Ok(str_) => Token::ConstantEncapsedString(self.interner.intern(&str_)), 534 | Err(err) => Token::BinaryCharSequence(Rc::new(err.into_bytes())), 535 | }; 536 | self.queue.push(TokenSpan(ret_token, mk_span(start_pos as usize, end_pos as usize))); 537 | } 538 | if !parts.is_empty() { 539 | self.queue.extend(parts.into_iter().rev()); 540 | } 541 | state_helper!(push, self, EmitQueue); 542 | start_tok 543 | } 544 | 545 | /// matches a single-quoted string literal 546 | fn match_sq_string(&mut self) -> Result { 547 | if self.input().len() < 2 { 548 | return Err(SyntaxError::None); 549 | } 550 | // backup the whole state, since line counting needs resetting too 551 | let bak_state = self.state.clone(); 552 | if self.input().starts_with("b'") { 553 | self.advance_bytes(2); 554 | } else if self.input().starts_with('\'') { 555 | self.advance_bytes(1); 556 | } else { 557 | return Err(SyntaxError::None); 558 | } 559 | 560 | // valid escapes: \ \\ 561 | // repeatedly progress until we encounter an escape sequence (or end) 562 | let mut bytes: Vec = vec![]; 563 | loop { 564 | let end_pos = 565 | match self.input().chars().position(|x| x == '\\' || x == '\'' || x == '\n') { 566 | Some(end_pos) => end_pos, 567 | None => self.input().chars().count(), 568 | }; 569 | bytes.extend(self.advance_chars(end_pos).as_bytes()); 570 | match self.input().chars().nth(0) { 571 | Some('\n') => { 572 | self.advance_bytes(1); 573 | self.state.next_line(); 574 | bytes.push(b'\n'); 575 | } 576 | Some('\\') => try!(self.str_escape(&mut bytes, true)), 577 | Some('\'') => { 578 | self.advance_bytes(1); 579 | break; 580 | } 581 | _ => { 582 | let old_pos = self.state.src_pos; 583 | self.state = bak_state; 584 | return Err(SyntaxError::Unterminated("single-quoted string literal", mk_span(self.input_pos(), old_pos + 1))); 585 | } 586 | } 587 | } 588 | let span = mk_span(bak_state.src_pos, self.input_pos()); 589 | let ret_token = match String::from_utf8(bytes) { 590 | Ok(str_) => Token::ConstantEncapsedString(self.interner.intern(&str_)), 591 | Err(err) => Token::BinaryCharSequence(Rc::new(err.into_bytes())), 592 | }; 593 | Ok(TokenSpan(ret_token, span)) 594 | } 595 | 596 | fn str_variable(&mut self, bytes: &mut Vec, parts: &mut Vec) { 597 | self.advance_bytes(1); 598 | // T_DOLLAR_OPEN_CURLY_BRACES ${ ... } syntax (simple = DollarCurlyBraces, complex = str_block) 599 | if self.input().starts_with('{') { 600 | let pos = self.input_pos() - 1; 601 | if !bytes.is_empty() { 602 | let len = bytes.len(); 603 | let old_fragment = match String::from_utf8(mem::replace(bytes, vec![])) { 604 | Ok(str_) => Token::ConstantEncapsedString(self.interner.intern(&str_)), 605 | Err(err) => Token::BinaryCharSequence(Rc::new(err.into_bytes())), 606 | }; 607 | parts.push(TokenSpan(old_fragment, mk_span(pos - len, pos))); 608 | } 609 | let next_part = parts.len(); 610 | self.str_block(bytes, parts, false); 611 | // patch the CurlyBracesOpen token 612 | assert_eq!(parts[next_part].0, Token::CurlyBracesOpen); 613 | parts[next_part].1.start -= 1; 614 | parts[next_part].0 = Token::DollarCurlyBracesOpen; 615 | return; 616 | } 617 | // match variable 618 | if let Some((label, span)) = self._label().map(|(x, span)| (self.interner.intern(x), span)) { 619 | let mut tmp_parts = vec![]; 620 | // match var_offset 621 | if self.input().starts_with('[') { 622 | unimplemented!(); 623 | } 624 | // match object access (only $var->label supported in PHP) 625 | else if self.input().starts_with("->") { 626 | let bak_pos = self.input_pos(); 627 | self.advance_bytes(2); 628 | if let Some((property, span)) = self._label().map(|(x, span)| (self.interner.intern(x), span)) { 629 | tmp_parts.push(TokenSpan(Token::ObjectOp, mk_span(bak_pos, bak_pos+1))); 630 | tmp_parts.push(TokenSpan(Token::String(property), mk_span(span.start, span.end))); 631 | } else { 632 | self.state.src_pos = bak_pos; 633 | } 634 | } 635 | // and match the single variable, prepend it 636 | if !bytes.is_empty() { 637 | let len = bytes.len(); 638 | let start = span.start - 1; 639 | let old_fragment = match String::from_utf8(mem::replace(bytes, vec![])) { 640 | Ok(str_) => Token::ConstantEncapsedString(self.interner.intern(&str_)), 641 | Err(err) => Token::BinaryCharSequence(Rc::new(err.into_bytes())), 642 | }; 643 | parts.push(TokenSpan(old_fragment, mk_span(start as usize - len, start))); 644 | } 645 | parts.push(TokenSpan(Token::Variable(label), mk_span(span.start - 1, span.end))); 646 | parts.extend(tmp_parts); 647 | } else { 648 | bytes.push(b'$'); 649 | } 650 | } 651 | 652 | fn str_block(&mut self, 653 | bytes: &mut Vec, 654 | parts: &mut Vec, 655 | require_dollar: bool) { 656 | self.advance_bytes(1); 657 | if self.input().starts_with('$') || !require_dollar { 658 | let bak_state = self.state.clone(); 659 | // temporary state transition to use the same instance to match 660 | self.state.state = State::InScripting; 661 | self.state.state_stack = vec![State::DoNothing]; 662 | let mut tokens = vec![TokenSpan(Token::CurlyBracesOpen, mk_span(self.input_pos()-1, self.input_pos()))]; 663 | while let Ok(tok) = self.next_token() { 664 | tokens.push(tok); 665 | } 666 | if let Some(&TokenSpan(Token::CurlyBracesClose, _)) = tokens.last() { 667 | if !bytes.is_empty() { 668 | let len = bytes.len(); 669 | let start = bak_state.src_pos - 1; 670 | let old_fragment = match String::from_utf8(mem::replace(bytes, vec![])) { 671 | Ok(str_) => Token::ConstantEncapsedString(self.interner.intern(&str_)), 672 | Err(err) => Token::BinaryCharSequence(Rc::new(err.into_bytes())), 673 | }; 674 | parts.push(TokenSpan(old_fragment, mk_span(start - len, start))); 675 | } 676 | parts.extend(tokens); 677 | // undo the temporary tokenizer state transition 678 | self.state.state = bak_state.state; 679 | self.state.state_stack = bak_state.state_stack; 680 | } else { 681 | self.state = bak_state; 682 | } 683 | } else { 684 | bytes.push(b'{'); 685 | } 686 | } 687 | 688 | /// matches a double-quoted string literal 689 | fn match_dq_string(&mut self) -> Result { 690 | if self.input().len() < 2 { 691 | return Err(SyntaxError::None); 692 | } 693 | let bak_state_str = self.state.clone(); 694 | if self.input().starts_with("b\"") { 695 | self.advance_bytes(2); 696 | } else if self.input().starts_with('"') { 697 | self.advance_bytes(1); 698 | } else { 699 | return Err(SyntaxError::None); 700 | } 701 | // valid escapes: \n \r \t \f \v \e \" \\ \$ \x \X \u{unicode} 702 | 703 | // repeatedly progress until we encounter an escape sequence (or end) 704 | let mut parts = vec![]; 705 | let mut bytes: Vec = vec![]; 706 | loop { 707 | let end_pos = match self.input() 708 | .chars() 709 | .position(|x| x == '\\' || x == '"' || x == '$' || x == '\n' || x == '{') { 710 | Some(end_pos) => end_pos, 711 | None => self.input().chars().count() - 1, 712 | }; 713 | bytes.extend(self.advance_chars(end_pos).as_bytes()); 714 | 715 | match self.input().chars().nth(0) { 716 | Some('\n') => { 717 | self.advance_bytes(1); 718 | self.state.next_line(); 719 | bytes.push(b'\n'); 720 | } 721 | Some('\\') => try!(self.str_escape(&mut bytes, false)), 722 | Some('"') => { 723 | self.advance_bytes(1); 724 | break; 725 | } 726 | Some('$') => self.str_variable(&mut bytes, &mut parts), 727 | // match {$} block 728 | Some('{') => self.str_block(&mut bytes, &mut parts, true), 729 | _ => { 730 | let err_pos = self.input_pos(); 731 | self.state = bak_state_str; 732 | return Err(SyntaxError::Unterminated("double-quoted string literal", mk_span(self.input_pos(), err_pos))) 733 | }, 734 | } 735 | } 736 | let current_pos = self.input_pos(); 737 | Ok(self.return_tokens_from_parts( 738 | TokenSpan(Token::DoubleQuote, mk_span(bak_state_str.src_pos, bak_state_str.src_pos + 1)), 739 | TokenSpan(Token::DoubleQuote, mk_span(current_pos - 1, current_pos)), 740 | bytes, parts 741 | )) 742 | } 743 | 744 | /// backquote handling 745 | fn match_backquote(&mut self) -> Result { 746 | if self.input().len() < 2 { 747 | return Err(SyntaxError::None); 748 | } 749 | let bak_state_str = if self.input().starts_with('`') { 750 | let bak_state = self.state.clone(); 751 | self.advance_bytes(1); 752 | bak_state 753 | } else { 754 | return Err(SyntaxError::None); 755 | }; 756 | let mut parts = vec![]; 757 | let mut bytes: Vec = vec![]; 758 | loop { 759 | let end_pos = match self.input() 760 | .chars() 761 | .position(|x| x == '\\' || x == '"' || x == '$' || x == '\n' || x == '{') { 762 | Some(end_pos) => end_pos, 763 | None => self.input().chars().count() - 1, 764 | }; 765 | bytes.extend(self.advance_chars(end_pos).as_bytes()); 766 | 767 | match self.input().chars().nth(0) { 768 | Some('\n') => { 769 | self.advance_bytes(1); 770 | self.state.next_line(); 771 | bytes.push(b'\n'); 772 | } 773 | Some('`') => { 774 | self.advance_bytes(1); 775 | break; 776 | } 777 | Some('$') => self.str_variable(&mut bytes, &mut parts), 778 | // match {$} block 779 | Some('{') => self.str_block(&mut bytes, &mut parts, true), 780 | _ => { 781 | let old_pos = self.input_pos(); 782 | self.state = bak_state_str; 783 | return Err(SyntaxError::Unterminated("Backquote", mk_span(self.state.src_pos, old_pos))); 784 | } 785 | } 786 | } 787 | let current_pos = self.input_pos(); 788 | Ok(self.return_tokens_from_parts( 789 | TokenSpan(Token::Backquote, mk_span(bak_state_str.src_pos, bak_state_str.src_pos + 1)), 790 | TokenSpan(Token::Backquote, mk_span(current_pos - 1, current_pos)), 791 | bytes, parts 792 | )) 793 | } 794 | 795 | /// Try to parse the heredoc or nowdoc syntax for a string 796 | fn match_here_now_doc(&mut self) -> Result { 797 | enum DocType { 798 | /// label 799 | HereDoc, 800 | /// "label" 801 | HereDocEncapsed, 802 | /// 'label' 803 | NowDoc, 804 | } 805 | if self.input().len() < 8 { 806 | return Err(SyntaxError::None); 807 | } 808 | let bak_state_str = self.state.clone(); 809 | if self.input().starts_with("b<<<") { 810 | self.advance_bytes(4); 811 | } else if self.input().starts_with("<<<") { 812 | self.advance_bytes(3); 813 | } else { 814 | return Err(SyntaxError::None); 815 | } 816 | self.whitespace_only(); 817 | // determine the label type 818 | let mut doc_ty = DocType::HereDoc; 819 | if self.input().starts_with('\'') { 820 | doc_ty = DocType::NowDoc; 821 | self.advance_bytes(1); 822 | } else if self.input().starts_with('"') { 823 | doc_ty = DocType::HereDocEncapsed; 824 | self.advance_bytes(1); 825 | } 826 | // match the label 827 | let label = if let Some(label) = self._label().map(|x| x.0.to_owned()) { 828 | label 829 | } else { 830 | self.state = bak_state_str; 831 | return Err(SyntaxError::None); 832 | }; 833 | // match the following label-type (if required) 834 | let required_chr = match doc_ty { 835 | DocType::HereDocEncapsed => Some('"'), 836 | DocType::NowDoc => Some('\''), 837 | DocType::HereDoc => None, 838 | }; 839 | if let Some(chr) = required_chr { 840 | if self.input().starts_with(chr) { 841 | self.advance_bytes(1); 842 | } else { 843 | self.state = bak_state_str; 844 | return Err(SyntaxError::None); 845 | } 846 | } 847 | // match a required newline after the "header" of the doc 848 | if !self.newline() { 849 | self.state = bak_state_str; 850 | return Err(SyntaxError::None); 851 | } 852 | 853 | // NOWDOC behaves roughly like sq_string and HEREDOC like dq_string 854 | let mut bytes: Vec = vec![]; 855 | let mut parts = vec![]; 856 | 857 | let is_now_doc = match doc_ty { 858 | DocType::NowDoc => true, 859 | _ => false, 860 | }; 861 | 862 | // match characters until we find the required end_tag 863 | let end_tag = label; 864 | loop { 865 | let end_pos = match self.input() 866 | .chars() 867 | .position(|x| x == '\\' || x == '$' || x == '\n' || x == '{') { 868 | Some(end_pos) => end_pos, 869 | None => self.input().len(), 870 | }; 871 | bytes.extend(self.advance_chars(end_pos).as_bytes()); 872 | 873 | match (self.input().chars().nth(0), is_now_doc) { 874 | (Some('\n'), _) => { 875 | self.advance_bytes(1); 876 | self.state.next_line(); 877 | // we are done if we are followed by our end-tag 878 | if self.input().starts_with(&end_tag) { 879 | self.advance_bytes(end_tag.len()); 880 | // only skip the semicolon for the validation of the here/nowdoc 881 | // but make sure it end's up in the token stream 882 | let old_pos = self.input_pos(); 883 | if self.input().starts_with(';') { 884 | self.advance_bytes(1); 885 | } 886 | if !self.newline() { 887 | let old_pos = self.input_pos(); 888 | self.state = bak_state_str; 889 | return Err(SyntaxError::Unterminated("Here/Nowdoc: end-tag requires to be followed by a newline", mk_span(self.state.src_pos, old_pos))); 890 | } 891 | self.state.src_pos = old_pos; 892 | break 893 | } else { 894 | bytes.push(b'\n'); 895 | } 896 | } 897 | (Some('\\'), _) => try!(self.str_escape(&mut bytes, is_now_doc)), 898 | (Some('$'), false) => self.str_variable(&mut bytes, &mut parts), 899 | (Some('{'), false) => self.str_block(&mut bytes, &mut parts, true), 900 | _ => { 901 | let old_pos = self.input_pos(); 902 | self.state = bak_state_str; 903 | return Err(SyntaxError::Unterminated("Here/Nowdoc", mk_span(self.state.src_pos, old_pos))); 904 | } 905 | } 906 | } 907 | let current_pos = self.input_pos(); 908 | if is_now_doc { 909 | assert!(parts.is_empty()); 910 | let ret_token = match String::from_utf8(bytes) { 911 | Ok(str_) => Token::ConstantEncapsedString(self.interner.intern(&str_)), 912 | Err(err) => Token::BinaryCharSequence(Rc::new(err.into_bytes())), 913 | }; 914 | return Ok(TokenSpan(ret_token, mk_span(bak_state_str.src_pos, current_pos))); 915 | } 916 | Ok(self.return_tokens_from_parts( 917 | TokenSpan(Token::HereDocStart, mk_span(bak_state_str.src_pos, bak_state_str.src_pos + 1)), 918 | TokenSpan(Token::HereDocEnd, mk_span(current_pos - 1, current_pos)), 919 | bytes, parts 920 | )) 921 | } 922 | 923 | /// match a comment 924 | pub fn match_comments(&mut self) -> Result { 925 | let old_pos = self.input_pos(); 926 | let mut doc_comment = false; 927 | // single line comment 928 | let start_tokens_count = if self.input().starts_with('#') { 929 | 1 930 | } else if self.input().starts_with("//") { 931 | 2 932 | } else { 933 | 0 934 | }; 935 | let comment = if start_tokens_count > 0 { 936 | self.advance_bytes(start_tokens_count); 937 | let end_pos = match self.input().chars().position(|x| x == '\n') { 938 | Some(end_pos) => end_pos, 939 | None => self.input().len(), 940 | }; 941 | self.advance_chars(end_pos) 942 | } else { 943 | // block comment 944 | let start_tokens_count = if self.input().starts_with("/**") { 945 | doc_comment = true; 946 | 3 947 | } else if self.input().starts_with("/*") { 948 | 2 949 | } else { 950 | 0 951 | }; 952 | if start_tokens_count > 0 { 953 | self.advance_bytes(start_tokens_count); 954 | let end_pos = match self.input().find("*/") { 955 | Some(end_pos) => end_pos, 956 | None => { 957 | let old_pos = self.state.src_pos; 958 | self.state.src_pos = old_pos; 959 | return Err(SyntaxError::Unterminated("comment", mk_span(self.input_pos(), old_pos))); 960 | } 961 | }; 962 | let ret = self.advance_bytes(end_pos); 963 | self.advance_bytes(2); 964 | ret 965 | } else { 966 | return Err(SyntaxError::None); 967 | } 968 | }.to_owned(); 969 | for _ in 0..comment.lines().count().checked_sub(1).unwrap_or(0) { 970 | self.state.next_line() 971 | } 972 | let mut span = mk_span(old_pos, self.input_pos()); 973 | if doc_comment { 974 | // For a doc comment the parser'll use the content of Token::Comment as doc_comment 975 | // for this specific case `.doc_comment` merely acts as a flag 976 | span.doc_comment = Some("".to_owned()); 977 | } 978 | Ok(TokenSpan(Token::Comment(self.interner.intern(&comment)), span)) 979 | } 980 | 981 | /// Try to parse a token depending on the current state 982 | pub fn next_token(&mut self) -> Result { 983 | loop { 984 | let ret = match self.state.state { 985 | State::Done | State::DoNothing => Err(SyntaxError::None), 986 | State::Initial => self.initial_token(), 987 | State::InScripting => self.in_scripting_token(), 988 | State::LookingForProperty => self.looking_for_property_token(), 989 | /// this state allows returning multiple tokens (for e.g. string fragments) 990 | State::EmitQueue => { 991 | match self.queue.pop() { 992 | Some(x) => Ok(x), 993 | None => { 994 | state_helper!(pop, self); 995 | self.state.restart = true; 996 | continue; 997 | } 998 | } 999 | } 1000 | }; 1001 | match ret { 1002 | Err(SyntaxError::None) if self.state.restart => { 1003 | self.state.restart = false; 1004 | continue; 1005 | } 1006 | _ => return ret, 1007 | } 1008 | } 1009 | } 1010 | 1011 | /// token scanner for initial-state 1012 | fn initial_token(&mut self) -> Result { 1013 | if self.input().is_empty() { 1014 | self.state.state = State::Done; 1015 | return Ok(TokenSpan(Token::End, mk_span(self.code.len(), self.code.len()))); 1016 | } 1017 | ret_token!(match_token!(self, OpenTagWithEcho, state = InScripting)); 1018 | ret_token!(match_token!(self, OpenTag, state = InScripting)); 1019 | if self.short_tags { 1020 | ret_token!(match_token_alias!(self, " (false, input.len()), 1029 | Some(x) => (true, x), 1030 | }; 1031 | let is_php_tag = if is_php_tag && self.short_tags { 1032 | true 1033 | } else { 1034 | input.starts_with(" Result { 1054 | self.whitespace(); 1055 | // check if we are at the end of the input 1056 | if self.input().is_empty() { 1057 | return Ok(TokenSpan(Token::End, mk_span(self.code.len(), self.code.len()))); 1058 | } 1059 | let mut ret = vec![]; 1060 | let old_pos = self.input_pos(); 1061 | ret_token!(self.match_here_now_doc()); 1062 | ret_token!(self._bnum()); 1063 | ret_token!(self._hnum()); 1064 | ret_token!(self._onum_dnum_lnum()); 1065 | ret_token!(self.match_dq_string()); 1066 | ret_token!(self.match_variable()); 1067 | ret_token!(self.match_sq_string()); 1068 | ret_token!(self.match_comments()); 1069 | ret_token!(self.match_backquote()); 1070 | self.state.src_pos = old_pos; 1071 | 1072 | if let Some((label, span)) = self._label() { 1073 | ret.push(Ok(TokenSpan(Token::String(self.interner.intern(label)), span))); 1074 | } 1075 | self.state.src_pos = old_pos; 1076 | ret.push(self.in_scripting_other_token()); 1077 | 1078 | // we return either the longest success or longest error 1079 | // being the longest means, having the furthest position 1080 | let mut longest = (0, None); 1081 | let mut longest_err = (0, None); 1082 | for r in ret { 1083 | match r { 1084 | Ok(token) => if longest.0 <= token.1.end { 1085 | longest = (token.1.end, Some(token)); 1086 | }, 1087 | Err(SyntaxError::None) => (), 1088 | Err(err) => if longest_err.0 <= err.span().end { 1089 | longest_err = (err.span().end, Some(err)); 1090 | } 1091 | } 1092 | } 1093 | if let Some(ret) = longest.1 { 1094 | self.state.src_pos = longest.0 as usize; 1095 | return Ok(ret); 1096 | } 1097 | if let Some(err) = longest_err.1 { 1098 | self.state.src_pos = longest_err.0 as usize; 1099 | return Err(err); 1100 | } 1101 | Err(SyntaxError::UnknownCharacter(mk_span(self.input_pos(), self.input_pos() + 1))) 1102 | } 1103 | 1104 | fn in_scripting_other_token(&mut self) -> Result { 1105 | ret_token!(match_token!(self, CloseTag, state = Initial)); 1106 | ret_token!(match_token!(self, Exit)); 1107 | ret_token!(match_token_alias!(self, "die", Exit)); 1108 | ret_token!(match_token!(self, Function)); 1109 | ret_token!(match_token!(self, Const)); 1110 | ret_token!(match_token!(self, Return)); 1111 | ret_token!({ 1112 | let keyword = "yield"; 1113 | if self.input().starts_with_ci(keyword) { 1114 | let old_pos = self.input_pos(); 1115 | self.advance_bytes(keyword.len()); 1116 | // yield_from submatch 1117 | self.whitespace(); 1118 | let keyword = "from"; 1119 | if self.input().starts_with_ci(keyword) { 1120 | self.advance_bytes(keyword.len()); 1121 | let span = mk_span(old_pos, self.input_pos()); 1122 | Ok(TokenSpan(Token::YieldFrom, span)) 1123 | } else { 1124 | let span = mk_span(old_pos, self.input_pos()); 1125 | Ok(TokenSpan(Token::Yield, span)) 1126 | } 1127 | } else { 1128 | Err(SyntaxError::None) 1129 | } 1130 | }); 1131 | ret_token!(match_token!(self, Try)); 1132 | ret_token!(match_token!(self, Catch)); 1133 | ret_token!(match_token!(self, Finally)); 1134 | ret_token!(match_token!(self, Throw)); 1135 | ret_token!(match_token!(self, If)); 1136 | ret_token!(match_token!(self, ElseIf)); 1137 | ret_token!(match_token!(self, EndIf)); 1138 | ret_token!(match_token!(self, Else)); 1139 | ret_token!(match_token!(self, While)); 1140 | ret_token!(match_token!(self, EndWhile)); 1141 | ret_token!(match_token!(self, Do)); 1142 | ret_token!(match_token!(self, Foreach)); 1143 | ret_token!(match_token!(self, EndForeach)); 1144 | ret_token!(match_token!(self, For)); 1145 | ret_token!(match_token!(self, Endfor)); 1146 | ret_token!(match_token!(self, Declare)); 1147 | ret_token!(match_token!(self, EndDeclare)); 1148 | ret_token!(match_token!(self, InstanceOf)); 1149 | ret_token!(match_token!(self, As)); 1150 | ret_token!(match_token!(self, Switch)); 1151 | ret_token!(match_token!(self, EndSwitch)); 1152 | ret_token!(match_token!(self, Case)); 1153 | ret_token!(match_token!(self, Default)); 1154 | ret_token!(match_token!(self, Break)); 1155 | ret_token!(match_token!(self, Continue)); 1156 | ret_token!(match_token!(self, Goto)); 1157 | ret_token!(match_token!(self, Echo)); 1158 | ret_token!(match_token!(self, Print)); 1159 | ret_token!(match_token!(self, Class)); 1160 | ret_token!(match_token!(self, Interface)); 1161 | ret_token!(match_token!(self, Trait)); 1162 | ret_token!(match_token!(self, Extends)); 1163 | ret_token!(match_token!(self, Implements)); 1164 | ret_token!(match_token!(self, ObjectOp, state <- LookingForProperty)); 1165 | ret_token!(match_token!(self, ScopeOp)); 1166 | ret_token!(match_token!(self, NsSeparator)); 1167 | ret_token!(match_token!(self, Ellipsis)); 1168 | ret_token!(match_token!(self, Coalesce)); 1169 | ret_token!(match_token!(self, New)); 1170 | ret_token!(match_token!(self, Clone)); 1171 | ret_token!(match_token!(self, Var)); 1172 | 1173 | // match cast tokens, all in one-try 1174 | if self.input().starts_with('(') { 1175 | #[inline] 1176 | fn try_determine_cast_type(self_: &mut Tokenizer) -> Result { 1177 | ret_token!(match_token!(self_, CastInt)); 1178 | ret_token!(match_token_alias!(self_, "int", CastInt)); 1179 | ret_token!(match_token_alias!(self_, "real", CastDouble)); 1180 | ret_token!(match_token!(self_, CastDouble)); 1181 | ret_token!(match_token_alias!(self_, "float", CastDouble)); 1182 | ret_token!(match_token!(self_, CastString)); 1183 | ret_token!(match_token_alias!(self_, "binary", CastString)); 1184 | ret_token!(match_token!(self_, CastArray)); 1185 | ret_token!(match_token!(self_, CastObject)); 1186 | ret_token!(match_token_alias!(self_, "boolean", CastBool)); 1187 | ret_token!(match_token!(self_, CastBool)); 1188 | ret_token!(match_token!(self_, CastUnset)); 1189 | Err(SyntaxError::None) 1190 | } 1191 | let old_pos = self.input_pos(); 1192 | self.advance_bytes(1); 1193 | self.whitespace_only(); 1194 | if let Ok(ret) = try_determine_cast_type(self) { 1195 | self.whitespace_only(); 1196 | if self.input().starts_with(')') { 1197 | self.advance_bytes(1); 1198 | return Ok(TokenSpan(ret.0, mk_span(old_pos, self.input_pos()))); 1199 | } 1200 | } 1201 | // restore the position if we didn't match a catch 1202 | self.state.src_pos = old_pos; 1203 | } 1204 | ret_token!(match_token!(self, Eval)); 1205 | ret_token!(match_token!(self, IncludeOnce)); 1206 | ret_token!(match_token!(self, Include)); 1207 | ret_token!(match_token!(self, RequireOnce)); 1208 | ret_token!(match_token!(self, Require)); 1209 | ret_token!(match_token!(self, Namespace)); 1210 | ret_token!(match_token!(self, Use)); 1211 | ret_token!(match_token!(self, Insteadof)); 1212 | ret_token!(match_token!(self, Global)); 1213 | ret_token!(match_token!(self, Isset)); 1214 | ret_token!(match_token!(self, Empty)); 1215 | ret_token!(match_token!(self, HaltCompiler)); 1216 | ret_token!(match_token!(self, Static)); 1217 | ret_token!(match_token!(self, Abstract)); 1218 | ret_token!(match_token!(self, Final)); 1219 | ret_token!(match_token!(self, Private)); 1220 | ret_token!(match_token!(self, Protected)); 1221 | ret_token!(match_token!(self, Public)); 1222 | ret_token!(match_token!(self, Unset)); 1223 | ret_token!(match_token!(self, DoubleArrow)); 1224 | ret_token!(match_token!(self, List)); 1225 | ret_token!(match_token!(self, Array)); 1226 | ret_token!(match_token!(self, Callable)); 1227 | ret_token!(match_token!(self, Increment)); 1228 | ret_token!(match_token!(self, Decrement)); 1229 | ret_token!(match_token!(self, IsIdentical)); 1230 | ret_token!(match_token!(self, IsNotIdentical)); 1231 | ret_token!(match_token!(self, IsEqual)); 1232 | ret_token!(match_token!(self, IsNotEqual)); 1233 | ret_token!(match_token_alias!(self, "<>", IsNotEqual)); 1234 | ret_token!(match_token!(self, SpaceShip)); 1235 | ret_token!(match_token!(self, IsSmallerOrEqual)); 1236 | ret_token!(match_token!(self, IsGreaterOrEqual)); 1237 | ret_token!(match_token!(self, PlusEqual)); 1238 | ret_token!(match_token!(self, MinusEqual)); 1239 | ret_token!(match_token!(self, MulEqual)); 1240 | ret_token!(match_token!(self, Pow)); 1241 | ret_token!(match_token!(self, PowEqual)); 1242 | ret_token!(match_token!(self, DivEqual)); 1243 | ret_token!(match_token!(self, ConcatEqual)); 1244 | ret_token!(match_token!(self, ModEqual)); 1245 | ret_token!(match_token!(self, SlEqual)); 1246 | ret_token!(match_token!(self, SrEqual)); 1247 | ret_token!(match_token!(self, AndEqual)); 1248 | ret_token!(match_token!(self, OrEqual)); 1249 | ret_token!(match_token!(self, XorEqual)); 1250 | ret_token!(match_token!(self, BoolOr)); 1251 | ret_token!(match_token!(self, BoolAnd)); 1252 | ret_token!(match_token!(self, LogicalOr)); 1253 | ret_token!(match_token!(self, LogicalAnd)); 1254 | ret_token!(match_token!(self, LogicalXor)); 1255 | ret_token!(match_token!(self, Sl)); 1256 | ret_token!(match_token!(self, Sr)); 1257 | ret_token!(match_token!(self, CurlyBracesOpen, state <- InScripting)); 1258 | ret_token!(match match_token!(self, CurlyBracesClose, state ->) { 1259 | Ok(TokenSpan(token, mut span)) => { 1260 | // equivalent of RESET_DOC_COMMENT() 1261 | span.doc_comment = Some("".to_owned()); 1262 | Ok(TokenSpan(token, span)) 1263 | } 1264 | x => x, 1265 | }); //TODO: stack is allowed to be empty! (dont fail once error handling is implemented) 1266 | ret_token!(match_token!(self, MagicClass)); 1267 | ret_token!(match_token!(self, MagicTrait)); 1268 | ret_token!(match_token!(self, MagicFunction)); 1269 | ret_token!(match_token!(self, MagicMethod)); 1270 | ret_token!(match_token!(self, MagicLine)); 1271 | ret_token!(match_token!(self, MagicFile)); 1272 | ret_token!(match_token!(self, MagicDir)); 1273 | ret_token!(match_token!(self, MagicNamespace)); 1274 | ret_token!(self._token()); //{TOKENS}, keep this last 1275 | Err(SyntaxError::UnknownCharacter(mk_span(self.input_pos(), self.input_pos() + 1))) 1276 | } 1277 | 1278 | /// token-scanner for looking-for-property state 1279 | fn looking_for_property_token(&mut self) -> Result { 1280 | self.whitespace(); 1281 | ret_token!(match_token!(self, ObjectOp)); 1282 | match self._label().map(|(x, span)| (self.interner.intern(x), span)) { 1283 | None => (), 1284 | Some((x, span)) => { 1285 | state_helper!(pop, self); 1286 | return Ok(TokenSpan(Token::String(x), span)); 1287 | } 1288 | } 1289 | // ANY_CHAR: pop_state 1290 | self.pop_state(); 1291 | self.state.restart = true; 1292 | Err(SyntaxError::None) 1293 | } 1294 | } 1295 | 1296 | #[cfg(test)] 1297 | mod tests { 1298 | use super::State; 1299 | use super::*; 1300 | 1301 | macro_rules! assert_eq_tok { 1302 | ($a:expr, $b:expr) => {assert_eq!($a.map(|x| x.0), $b)}; 1303 | } 1304 | 1305 | #[inline] 1306 | fn get_n_tokens(tokenizer: &mut Tokenizer, n: usize) -> Vec> { 1307 | (0..n).map(|_| tokenizer.next_token().map(|x| x.0)).collect() 1308 | } 1309 | 1310 | #[test] 1311 | fn simple_whitespace_line_num() { 1312 | let mut tokenizer = Tokenizer::new(""); 1313 | assert_eq_tok!(tokenizer.next_token(), Ok(Token::OpenTag)); 1314 | assert_eq_tok!(tokenizer.next_token(), Ok(Token::CloseTag)); 1315 | assert_eq!(tokenizer.state.line_num, 4); 1316 | } 1317 | 1318 | #[test] 1319 | fn simple_object_operator() { 1320 | let mut tokenizer = Tokenizer::new("test ?>"); 1321 | assert_eq!(get_n_tokens(&mut tokenizer, 3), vec![Ok(Token::OpenTag), Ok(Token::ObjectOp), Ok(Token::String("test".into()))]); 1322 | let mut tokenizer = Tokenizer::new("test->gest2 ?>"); 1323 | assert_eq!(get_n_tokens(&mut tokenizer, 5), vec![Ok(Token::OpenTag), Ok(Token::ObjectOp), Ok(Token::String("test".into())), 1324 | Ok(Token::ObjectOp), Ok(Token::String("gest2".into())) 1325 | ]); 1326 | } 1327 | 1328 | #[test] 1329 | fn simple_cast() { 1330 | let mut tokenizer = Tokenizer::new(""); 1331 | assert_eq!(get_n_tokens(&mut tokenizer, 3), 1332 | vec![Ok(Token::OpenTag), Ok(Token::CastString), Ok(Token::CloseTag)]); 1333 | } 1334 | 1335 | #[test] 1336 | fn simple_scripting_initial_state_test() { 1337 | let mut tokenizer = Tokenizer::new(""); 1342 | assert_eq!(tokenizer.state.state, State::Initial); 1343 | assert_eq_tok!(tokenizer.next_token(), Ok(Token::OpenTag)); 1344 | assert_eq!(tokenizer.state.state, State::InScripting); 1345 | assert_eq_tok!(tokenizer.next_token(), Ok(Token::CloseTag)); 1346 | assert_eq!(tokenizer.state.state, State::Initial); 1347 | } 1348 | 1349 | #[test] 1350 | fn scripting_initial_text() { 1351 | let mut tokenizer = Tokenizer::new("a?>b"); 1352 | assert_eq!(get_n_tokens(&mut tokenizer, 3), vec![Ok(Token::InlineHtml("a?>b".into())), Ok(Token::OpenTag), Ok(Token::CloseTag)]); 1353 | } 1354 | 1355 | #[test] 1356 | fn simple_lnum() { 1357 | let mut tokenizer = Tokenizer::new("ab\""); 1441 | assert_eq!(get_n_tokens(&mut tokenizer, 6), vec![Ok(Token::OpenTag), Ok(Token::DoubleQuote), Ok(Token::Variable("world".into())), Ok(Token::ObjectOp), 1442 | Ok(Token::String("ab".into())), Ok(Token::DoubleQuote), 1443 | ]); 1444 | let mut tokenizer = Tokenizer::new("ab->cd\""); 1445 | assert_eq!(get_n_tokens(&mut tokenizer, 7), 1446 | vec![Ok(Token::OpenTag), 1447 | Ok(Token::DoubleQuote), 1448 | Ok(Token::Variable("world".into())), 1449 | Ok(Token::ObjectOp), 1450 | Ok(Token::String("ab".into())), 1451 | Ok(Token::ConstantEncapsedString("->cd".into())), 1452 | Ok(Token::DoubleQuote)]); 1453 | let mut tokenizer = Tokenizer::new("ab->cd}\""); 1454 | assert_eq!(get_n_tokens(&mut tokenizer, 9), 1455 | vec![Ok(Token::OpenTag), 1456 | Ok(Token::DoubleQuote), 1457 | Ok(Token::CurlyBracesOpen), 1458 | Ok(Token::Variable("world".into())), 1459 | Ok(Token::ObjectOp), 1460 | Ok(Token::String("ab".into())), 1461 | Ok(Token::ObjectOp), 1462 | Ok(Token::String("cd".into())), 1463 | Ok(Token::CurlyBracesClose)]); 1464 | } 1465 | 1466 | #[test] 1467 | fn backquote() { 1468 | let mut tokenizer = Tokenizer::new(" { 1524 | assert_eq!(tokenizer.state.line_num, 4); 1525 | assert_eq!(comment, " test ".into()); 1526 | println!("{:?}", span); 1527 | assert_eq!(tokenizer.state.external.line_map.line_from_position(span.start as usize), 4 - 1); //0-based lines 1528 | }, 1529 | _ => {assert!(false);}, 1530 | } 1531 | } 1532 | 1533 | // TODO: use own error type? 1534 | // TODO: error handling tests 1535 | } 1536 | --------------------------------------------------------------------------------