├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE ├── README.md ├── src ├── ast.rs ├── lexer.rs ├── lib.rs ├── macros.rs ├── parser.rs ├── put_back_n.rs └── tabled_rc.rs └── tests ├── bom.rs └── parse_tokens.rs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Build 20 | run: cargo build --verbose 21 | - name: Run tests 22 | run: cargo test --verbose 23 | - name: Test optional features 24 | run: cargo test --verbose --no-default-features --features num 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | rust: 3 | - 1.31.0 4 | - stable 5 | - beta 6 | - nightly 7 | 8 | script: 9 | - cargo test 10 | - cargo test --no-default-features --features num 11 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "prolog_parser" 3 | version = "0.8.68" 4 | authors = ["Mark Thom "] 5 | repository = "https://github.com/mthom/prolog_parser" 6 | description = " An operator precedence parser for rusty-wam, an up and coming ISO Prolog implementation." 7 | license = "BSD-3-Clause" 8 | 9 | [dependencies] 10 | lexical = "2.1.0" 11 | ordered-float = "0.5.0" 12 | rug = { optional = true, version = "1.4.0" } 13 | num-rug-adapter = { optional = true, version = "0.1.3" } 14 | unicode_reader = "1.0.0" 15 | 16 | [lib] 17 | path = "src/lib.rs" 18 | 19 | [features] 20 | num = ["num-rug-adapter"] 21 | default = ["rug"] 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, Mark Thom 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # prolog_parser 2 | 3 | An operator precedence parser for 4 | [rusty-wam](https://github.com/mthom/rusty-wam) based on the paper "An 5 | Operator Precedence Parser for Standard Prolog" by Koen De Bosschere 6 | and accompanying C code. -------------------------------------------------------------------------------- /src/ast.rs: -------------------------------------------------------------------------------- 1 | use rug::{Integer, Rational}; 2 | use ordered_float::*; 3 | use tabled_rc::*; 4 | 5 | use put_back_n::*; 6 | 7 | use std::cell::Cell; 8 | use std::cmp::Ordering; 9 | use std::collections::HashMap; 10 | use std::fmt; 11 | use std::hash::{Hash, Hasher}; 12 | use std::io::{Bytes, Error as IOError, Read}; 13 | use std::rc::Rc; 14 | use std::vec::Vec; 15 | 16 | use unicode_reader::CodePoints; 17 | 18 | pub type Atom = String; 19 | 20 | pub type Var = String; 21 | 22 | pub type Specifier = u32; 23 | 24 | pub const MAX_ARITY: usize = 1023; 25 | 26 | pub const XFX: u32 = 0x0001; 27 | pub const XFY: u32 = 0x0002; 28 | pub const YFX: u32 = 0x0004; 29 | pub const XF: u32 = 0x0010; 30 | pub const YF: u32 = 0x0020; 31 | pub const FX: u32 = 0x0040; 32 | pub const FY: u32 = 0x0080; 33 | pub const DELIMITER: u32 = 0x0100; 34 | pub const TERM: u32 = 0x1000; 35 | pub const LTERM: u32 = 0x3000; 36 | 37 | pub const NEGATIVE_SIGN: u32 = 0x0200; 38 | 39 | #[macro_export] 40 | macro_rules! clause_name { 41 | ($name: expr, $tbl: expr) => ( 42 | ClauseName::User(TabledRc::new($name, $tbl.clone())) 43 | ) ; 44 | ($name: expr) => ( 45 | ClauseName::BuiltIn($name) 46 | ) 47 | } 48 | 49 | #[macro_export] 50 | macro_rules! atom { 51 | ($e:expr, $tbl:expr) => ( 52 | Constant::Atom(ClauseName::User(tabled_rc!($e, $tbl)), None) 53 | ); 54 | ($e:expr) => ( 55 | Constant::Atom(clause_name!($e), None) 56 | ) 57 | } 58 | 59 | #[macro_export] 60 | macro_rules! rc_atom { 61 | ($e:expr) => ( 62 | Rc::new(String::from($e)) 63 | ) 64 | } 65 | macro_rules! is_term { 66 | ($x:expr) => ( ($x & TERM) != 0 ) 67 | } 68 | 69 | macro_rules! is_lterm { 70 | ($x:expr) => ( ($x & LTERM) != 0 ) 71 | } 72 | 73 | macro_rules! is_op { 74 | ($x:expr) => ( $x & (XF | YF | FX | FY | XFX | XFY | YFX) != 0 ) 75 | } 76 | 77 | macro_rules! is_negate { 78 | ($x:expr) => ( ($x & NEGATIVE_SIGN) != 0 ) 79 | } 80 | 81 | #[macro_export] 82 | macro_rules! is_prefix { 83 | ($x:expr) => ( $x & (FX | FY) != 0 ) 84 | } 85 | 86 | #[macro_export] 87 | macro_rules! is_postfix { 88 | ($x:expr) => ( $x & (XF | YF) != 0 ) 89 | } 90 | 91 | #[macro_export] 92 | macro_rules! is_infix { 93 | ($x:expr) => ( ($x & (XFX | XFY | YFX)) != 0 ) 94 | } 95 | 96 | #[macro_export] 97 | macro_rules! is_xfx { 98 | ($x:expr) => ( ($x & XFX) != 0 ) 99 | } 100 | 101 | #[macro_export] 102 | macro_rules! is_xfy { 103 | ($x:expr) => ( ($x & XFY) != 0 ) 104 | } 105 | 106 | #[macro_export] 107 | macro_rules! is_yfx { 108 | ($x:expr) => ( ($x & YFX) != 0 ) 109 | } 110 | 111 | #[macro_export] 112 | macro_rules! is_yf { 113 | ($x:expr) => ( ($x & YF) != 0 ) 114 | } 115 | 116 | #[macro_export] 117 | macro_rules! is_xf { 118 | ($x:expr) => ( ($x & XF) != 0 ) 119 | } 120 | 121 | #[macro_export] 122 | macro_rules! is_fx { 123 | ($x:expr) => ( ($x & FX) != 0 ) 124 | } 125 | 126 | #[macro_export] 127 | macro_rules! is_fy { 128 | ($x:expr) => ( ($x & FY) != 0 ) 129 | } 130 | 131 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 132 | pub enum RegType { 133 | Perm(usize), 134 | Temp(usize) 135 | } 136 | 137 | impl Default for RegType { 138 | fn default() -> Self { 139 | RegType::Temp(0) 140 | } 141 | } 142 | 143 | impl RegType { 144 | pub fn reg_num(self) -> usize { 145 | match self { 146 | RegType::Perm(reg_num) | RegType::Temp(reg_num) => reg_num 147 | } 148 | } 149 | 150 | pub fn is_perm(self) -> bool { 151 | match self { 152 | RegType::Perm(_) => true, 153 | _ => false 154 | } 155 | } 156 | } 157 | 158 | impl fmt::Display for RegType { 159 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 160 | match self { 161 | &RegType::Perm(val) => write!(f, "Y{}", val), 162 | &RegType::Temp(val) => write!(f, "X{}", val) 163 | } 164 | } 165 | } 166 | 167 | #[derive(Debug, PartialEq, Eq, Clone, Copy)] 168 | pub enum VarReg { 169 | ArgAndNorm(RegType, usize), 170 | Norm(RegType) 171 | } 172 | 173 | impl VarReg { 174 | pub fn norm(self) -> RegType { 175 | match self { 176 | VarReg::ArgAndNorm(reg, _) | VarReg::Norm(reg) => reg 177 | } 178 | } 179 | } 180 | 181 | impl fmt::Display for VarReg { 182 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 183 | match self { 184 | &VarReg::Norm(RegType::Perm(reg)) => write!(f, "Y{}", reg), 185 | &VarReg::Norm(RegType::Temp(reg)) => write!(f, "X{}", reg), 186 | &VarReg::ArgAndNorm(RegType::Perm(reg), arg) => 187 | write!(f, "Y{} A{}", reg, arg), 188 | &VarReg::ArgAndNorm(RegType::Temp(reg), arg) => 189 | write!(f, "X{} A{}", reg, arg) 190 | } 191 | } 192 | } 193 | 194 | impl Default for VarReg { 195 | fn default() -> Self { 196 | VarReg::Norm(RegType::default()) 197 | } 198 | } 199 | 200 | #[macro_export] 201 | macro_rules! temp_v { 202 | ($x:expr) => ( 203 | RegType::Temp($x) 204 | ) 205 | } 206 | 207 | #[macro_export] 208 | macro_rules! perm_v { 209 | ($x:expr) => ( 210 | RegType::Perm($x) 211 | ) 212 | } 213 | 214 | #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] 215 | pub enum GenContext { 216 | Head, Mid(usize), Last(usize) // Mid & Last: chunk_num 217 | } 218 | 219 | impl GenContext { 220 | pub fn chunk_num(self) -> usize { 221 | match self { 222 | GenContext::Head => 0, 223 | GenContext::Mid(cn) | GenContext::Last(cn) => cn 224 | } 225 | } 226 | } 227 | 228 | pub type OpDirKey = (ClauseName, Fixity); 229 | 230 | #[derive(Debug, Clone)] 231 | pub struct OpDirValue(pub SharedOpDesc, pub ClauseName); 232 | 233 | impl OpDirValue { 234 | pub fn new(spec: Specifier, priority: usize, module_name: ClauseName) -> Self { 235 | OpDirValue(SharedOpDesc::new(priority, spec), module_name) 236 | } 237 | 238 | #[inline] 239 | pub fn shared_op_desc(&self) -> SharedOpDesc { 240 | self.0.clone() 241 | } 242 | 243 | #[inline] 244 | pub fn owning_module(&self) -> ClauseName { 245 | self.1.clone() 246 | } 247 | } 248 | 249 | // name and fixity -> operator type and precedence. 250 | pub type OpDir = HashMap; 251 | 252 | #[derive(Debug, Clone, Copy)] 253 | pub struct MachineFlags { 254 | pub double_quotes: DoubleQuotes 255 | } 256 | 257 | impl Default for MachineFlags { 258 | fn default() -> Self { 259 | MachineFlags { double_quotes: DoubleQuotes::default() } 260 | } 261 | } 262 | 263 | #[derive(Debug, Clone, Copy)] 264 | pub enum DoubleQuotes { 265 | Atom, Chars, Codes 266 | } 267 | 268 | impl DoubleQuotes { 269 | pub fn is_chars(self) -> bool { 270 | if let DoubleQuotes::Chars = self { 271 | true 272 | } else { 273 | false 274 | } 275 | } 276 | 277 | pub fn is_atom(self) -> bool { 278 | if let DoubleQuotes::Atom = self { 279 | true 280 | } else { 281 | false 282 | } 283 | } 284 | 285 | pub fn is_codes(self) -> bool { 286 | if let DoubleQuotes::Codes = self { 287 | true 288 | } else { 289 | false 290 | } 291 | } 292 | } 293 | 294 | impl Default for DoubleQuotes { 295 | fn default() -> Self { 296 | DoubleQuotes::Chars 297 | } 298 | } 299 | 300 | pub fn default_op_dir() -> OpDir { 301 | let module_name = clause_name!("builtins"); 302 | let mut op_dir = OpDir::new(); 303 | 304 | op_dir.insert((clause_name!(":-"), Fixity::In), OpDirValue::new(XFX, 1200, module_name.clone())); 305 | op_dir.insert((clause_name!(":-"), Fixity::Pre), OpDirValue::new(FX, 1200, module_name.clone())); 306 | op_dir.insert((clause_name!("?-"), Fixity::Pre), OpDirValue::new(FX, 1200, module_name.clone())); 307 | op_dir.insert((clause_name!(","), Fixity::In), OpDirValue::new(XFY, 1000, module_name.clone())); 308 | 309 | op_dir 310 | } 311 | 312 | #[derive(Debug, Clone)] 313 | pub enum ArithmeticError { 314 | NonEvaluableFunctor(Constant, usize), 315 | UninstantiatedVar 316 | } 317 | 318 | #[derive(Debug)] 319 | pub enum ParserError { 320 | Arithmetic(ArithmeticError), 321 | BackQuotedString(usize, usize), 322 | BadPendingByte, 323 | CannotParseCyclicTerm, 324 | UnexpectedChar(char, usize, usize), 325 | UnexpectedEOF, 326 | IO(IOError), 327 | ExpectedRel, 328 | ExpectedTopLevelTerm, 329 | InadmissibleFact, 330 | InadmissibleQueryTerm, 331 | IncompleteReduction(usize, usize), 332 | InconsistentEntry, 333 | InvalidDoubleQuotesDecl, 334 | InvalidHook, 335 | InvalidModuleDecl, 336 | InvalidModuleExport, 337 | InvalidRuleHead, 338 | InvalidUseModuleDecl, 339 | InvalidModuleResolution, 340 | InvalidSingleQuotedCharacter(char), 341 | MissingQuote(usize, usize), 342 | NonPrologChar(usize, usize), 343 | ParseBigInt(usize, usize), 344 | ParseFloat(usize, usize), 345 | Utf8Error(usize, usize) 346 | } 347 | 348 | impl ParserError { 349 | pub fn line_and_col_num(&self) -> Option<(usize, usize)> { 350 | match self { 351 | &ParserError::BackQuotedString(line_num, col_num) 352 | | &ParserError::UnexpectedChar(_, line_num, col_num) 353 | | &ParserError::IncompleteReduction(line_num, col_num) 354 | | &ParserError::MissingQuote(line_num, col_num) 355 | | &ParserError::NonPrologChar(line_num, col_num) 356 | | &ParserError::ParseBigInt(line_num, col_num) 357 | | &ParserError::ParseFloat(line_num, col_num) 358 | | &ParserError::Utf8Error(line_num, col_num) => 359 | Some((line_num, col_num)), 360 | _ => 361 | None 362 | } 363 | } 364 | 365 | pub fn as_str(&self) -> &'static str { 366 | match self { 367 | &ParserError::Arithmetic(..) => 368 | "arithmetic_error", 369 | &ParserError::BackQuotedString(..) => 370 | "back_quoted_string", 371 | &ParserError::BadPendingByte => 372 | "bad_pending_byte", 373 | &ParserError::UnexpectedChar(..) => 374 | "unexpected_char", 375 | &ParserError::UnexpectedEOF => 376 | "unexpected_end_of_file", 377 | &ParserError::ExpectedRel => 378 | "expected_relation", 379 | &ParserError::ExpectedTopLevelTerm => 380 | "expected_atom_or_cons_or_clause", 381 | &ParserError::InadmissibleFact => 382 | "inadmissible_fact", 383 | &ParserError::InadmissibleQueryTerm => 384 | "inadmissible_query_term", 385 | &ParserError::IncompleteReduction(..) => 386 | "incomplete_reduction", 387 | &ParserError::InconsistentEntry => 388 | "inconsistent_entry", 389 | &ParserError::InvalidDoubleQuotesDecl => 390 | "invalid_double_quotes_declaration", 391 | &ParserError::InvalidHook => 392 | "invalid_hook", 393 | &ParserError::InvalidModuleDecl => 394 | "invalid_module_declaration", 395 | &ParserError::InvalidModuleExport => 396 | "invalid_module_export", 397 | &ParserError::InvalidModuleResolution => 398 | "invalid_module_resolution", 399 | &ParserError::InvalidRuleHead => 400 | "invalid_head_of_rule", 401 | &ParserError::InvalidUseModuleDecl => 402 | "invalid_use_module_declaration", 403 | &ParserError::InvalidSingleQuotedCharacter(..) => 404 | "invalid_single_quoted_character", 405 | &ParserError::IO(_) => 406 | "input_output_error", 407 | &ParserError::MissingQuote(..) => 408 | "missing_quote", 409 | &ParserError::NonPrologChar(..) => 410 | "non_prolog_character", 411 | &ParserError::ParseBigInt(..) => 412 | "cannot_parse_big_int", 413 | &ParserError::ParseFloat(..) => 414 | "cannot_parse_float", 415 | &ParserError::Utf8Error(..) => 416 | "utf8_conversion_error", 417 | &ParserError::CannotParseCyclicTerm => 418 | "cannot_parse_cyclic_term" 419 | } 420 | } 421 | } 422 | 423 | impl From for ParserError { 424 | fn from(err: ArithmeticError) -> ParserError { 425 | ParserError::Arithmetic(err) 426 | } 427 | } 428 | 429 | impl From for ParserError { 430 | fn from(err: IOError) -> ParserError { 431 | ParserError::IO(err) 432 | } 433 | } 434 | 435 | impl From<&IOError> for ParserError { 436 | fn from(error: &IOError) -> ParserError { 437 | if error.get_ref().filter(|e| e.is::()).is_some() { 438 | ParserError::Utf8Error(0, 0) 439 | } else { 440 | ParserError::IO(error.kind().into()) 441 | } 442 | } 443 | } 444 | 445 | 446 | #[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, PartialOrd, Ord)] 447 | pub enum Fixity { 448 | In, Post, Pre 449 | } 450 | 451 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] 452 | pub struct SharedOpDesc(Rc>); 453 | 454 | impl SharedOpDesc { 455 | #[inline] 456 | pub fn new(priority: usize, spec: Specifier) -> Self { 457 | SharedOpDesc(Rc::new(Cell::new((priority, spec)))) 458 | } 459 | 460 | #[inline] 461 | pub fn ptr_eq(lop_desc: &SharedOpDesc, rop_desc: &SharedOpDesc) -> bool { 462 | Rc::ptr_eq(&lop_desc.0, &rop_desc.0) 463 | } 464 | 465 | #[inline] 466 | pub fn arity(&self) -> usize { 467 | if self.get().1 & (XFX | XFY | YFX) == 0 { 468 | 1 469 | } else { 470 | 2 471 | } 472 | } 473 | 474 | #[inline] 475 | pub fn get(&self) -> (usize, Specifier) { 476 | self.0.get() 477 | } 478 | 479 | #[inline] 480 | pub fn set(&self, prec: usize, spec: Specifier) { 481 | self.0.set((prec, spec)); 482 | } 483 | 484 | #[inline] 485 | pub fn prec(&self) -> usize { 486 | self.0.get().0 487 | } 488 | 489 | #[inline] 490 | pub fn assoc(&self) -> Specifier { 491 | self.0.get().1 492 | } 493 | } 494 | 495 | // this ensures that SharedOpDesc (which is not consistently placed in 496 | // every atom!) doesn't affect the value of an atom hash. If 497 | // SharedOpDesc values are to be indexed, a BTreeMap or BTreeSet 498 | // should be used, obviously. 499 | impl Hash for SharedOpDesc { 500 | fn hash(&self, state: &mut H) { 501 | 0.hash(state) 502 | } 503 | } 504 | 505 | #[derive(Debug, Clone, Hash)] 506 | pub enum Constant { 507 | Atom(ClauseName, Option), 508 | Char(char), 509 | EmptyList, 510 | Fixnum(isize), 511 | Integer(Rc), 512 | Rational(Rc), 513 | Float(OrderedFloat), 514 | String(Rc), 515 | Usize(usize), 516 | } 517 | 518 | impl fmt::Display for Constant { 519 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 520 | match self { 521 | &Constant::Atom(ref atom, _) => 522 | if atom.as_str().chars().any(|c| "`.$'\" ".contains(c)) { 523 | write!(f, "'{}'", atom.as_str()) 524 | } else { 525 | write!(f, "{}", atom.as_str()) 526 | }, 527 | &Constant::Char(c) => 528 | write!(f, "'{}'", c as u32), 529 | &Constant::EmptyList => 530 | write!(f, "[]"), 531 | &Constant::Fixnum(n) => 532 | write!(f, "{}", n), 533 | &Constant::Integer(ref n) => 534 | write!(f, "{}", n), 535 | &Constant::Rational(ref n) => 536 | write!(f, "{}", n), 537 | &Constant::Float(ref n) => 538 | write!(f, "{}", n), 539 | &Constant::String(ref s) => 540 | write!(f, "\"{}\"", &s), 541 | &Constant::Usize(integer) => 542 | write!(f, "u{}", integer), 543 | } 544 | } 545 | } 546 | 547 | impl PartialEq for Constant { 548 | fn eq(&self, other: &Constant) -> bool { 549 | match (self, other) { 550 | (&Constant::Atom(ref atom, _), &Constant::Char(c)) 551 | | (&Constant::Char(c), &Constant::Atom(ref atom, _)) => { 552 | atom.is_char() && Some(c) == atom.as_str().chars().next() 553 | }, 554 | (&Constant::Atom(ref a1, _), &Constant::Atom(ref a2, _)) => 555 | a1.as_str() == a2.as_str(), 556 | (&Constant::Char(c1), &Constant::Char(c2)) => 557 | c1 == c2, 558 | (&Constant::Fixnum(n1), &Constant::Fixnum(n2)) => 559 | n1 == n2, 560 | (&Constant::Fixnum(n1), &Constant::Integer(ref n2)) | 561 | (&Constant::Integer(ref n2), &Constant::Fixnum(n1)) => { 562 | if let Some(n2) = n2.to_isize() { 563 | n1 == n2 564 | } else { 565 | false 566 | } 567 | } 568 | (&Constant::Integer(ref n1), &Constant::Integer(ref n2)) => 569 | n1 == n2, 570 | (&Constant::Rational(ref n1), &Constant::Rational(ref n2)) => 571 | n1 == n2, 572 | (&Constant::Float(ref n1), &Constant::Float(ref n2)) => 573 | n1 == n2, 574 | (&Constant::String(ref s1), &Constant::String(ref s2)) => { 575 | &s1 == &s2 576 | } 577 | (&Constant::EmptyList, &Constant::EmptyList) => 578 | true, 579 | (&Constant::Usize(u1), &Constant::Usize(u2)) => 580 | u1 == u2, 581 | _ => false 582 | } 583 | } 584 | } 585 | 586 | impl Eq for Constant {} 587 | 588 | impl Constant { 589 | pub fn to_atom(self) -> Option { 590 | match self { 591 | Constant::Atom(a, _) => Some(a.defrock_brackets()), 592 | _ => None 593 | } 594 | } 595 | } 596 | 597 | #[derive(Debug, Clone)] 598 | pub enum ClauseName { 599 | BuiltIn(&'static str), 600 | User(TabledRc) 601 | } 602 | 603 | impl fmt::Display for ClauseName { 604 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 605 | write!(f, "{}", self.as_str()) 606 | } 607 | } 608 | 609 | impl Hash for ClauseName { 610 | fn hash(&self, state: &mut H) { 611 | (*self.as_str()).hash(state) 612 | } 613 | } 614 | 615 | impl PartialEq for ClauseName { 616 | fn eq(&self, other: &ClauseName) -> bool { 617 | *self.as_str() == *other.as_str() 618 | } 619 | } 620 | 621 | impl Eq for ClauseName {} 622 | 623 | impl Ord for ClauseName { 624 | fn cmp(&self, other: &ClauseName) -> Ordering { 625 | (*self.as_str()).cmp(other.as_str()) 626 | } 627 | } 628 | 629 | impl PartialOrd for ClauseName { 630 | fn partial_cmp(&self, other: &ClauseName) -> Option { 631 | Some(self.cmp(other)) 632 | } 633 | } 634 | 635 | impl<'a> From<&'a TabledRc> for ClauseName { 636 | fn from(name: &'a TabledRc) -> ClauseName { 637 | ClauseName::User(name.clone()) 638 | } 639 | } 640 | 641 | impl ClauseName { 642 | #[inline] 643 | pub fn owning_module(&self) -> Self { 644 | match self { 645 | &ClauseName::User(ref name) => { 646 | let module = name.owning_module(); 647 | ClauseName::User(TabledRc { atom: module.clone(), 648 | table: TabledData::new(module) }) 649 | }, 650 | _ => clause_name!("user") 651 | } 652 | } 653 | 654 | #[inline] 655 | pub fn to_rc(&self) -> Rc { 656 | match self { 657 | &ClauseName::BuiltIn(s) => Rc::new(s.to_string()), 658 | &ClauseName::User(ref rc) => rc.inner() 659 | } 660 | } 661 | 662 | #[inline] 663 | pub fn with_table(self, atom_tbl: TabledData) -> Self { 664 | match self { 665 | ClauseName::BuiltIn(_) => self, 666 | ClauseName::User(mut name) => { 667 | name.table = atom_tbl; 668 | ClauseName::User(name) 669 | } 670 | } 671 | } 672 | 673 | #[inline] 674 | pub fn has_table(&self, atom_tbl: &TabledData) -> bool { 675 | match self { 676 | ClauseName::BuiltIn(_) => false, 677 | ClauseName::User(ref name) => &name.table == atom_tbl, 678 | } 679 | } 680 | 681 | #[inline] 682 | pub fn has_table_of(&self, other: &ClauseName) -> bool { 683 | match self { 684 | ClauseName::BuiltIn(_) => { 685 | if let ClauseName::BuiltIn(_) = other { 686 | true 687 | } else { 688 | false 689 | } 690 | } 691 | ClauseName::User(ref name) => { 692 | other.has_table(&name.table) 693 | } 694 | } 695 | } 696 | 697 | #[inline] 698 | pub fn as_str(&self) -> &str { 699 | match self { 700 | &ClauseName::BuiltIn(s) => s, 701 | &ClauseName::User(ref name) => name.as_ref() 702 | } 703 | } 704 | 705 | #[inline] 706 | pub fn is_char(&self) -> bool { 707 | !self.as_str().is_empty() && self.as_str().chars().skip(1).next().is_none() 708 | } 709 | 710 | pub fn defrock_brackets(self) -> Self { 711 | fn defrock_brackets(s: &str) -> &str { 712 | if s.starts_with('(') && s.ends_with(')') { 713 | &s[1 .. s.len() - 1] 714 | } else { 715 | s 716 | } 717 | } 718 | 719 | match self { 720 | ClauseName::BuiltIn(s) => 721 | ClauseName::BuiltIn(defrock_brackets(s)), 722 | ClauseName::User(s) => 723 | ClauseName::User(tabled_rc!(defrock_brackets(s.as_str()).to_owned(), s.table)) 724 | } 725 | } 726 | } 727 | 728 | impl AsRef for ClauseName { 729 | #[inline] 730 | fn as_ref(self: &Self) -> &str { 731 | self.as_str() 732 | } 733 | } 734 | 735 | #[derive(Debug, PartialEq, Eq, Clone)] 736 | pub enum Term { 737 | AnonVar, 738 | Clause(Cell, ClauseName, Vec>, Option), 739 | Cons(Cell, Box, Box), 740 | Constant(Cell, Constant), 741 | Var(Cell, Rc) 742 | } 743 | 744 | impl Term { 745 | pub fn shared_op_desc(&self) -> Option { 746 | match self { 747 | &Term::Clause(_, _, _, ref spec) => spec.clone(), 748 | &Term::Constant(_, Constant::Atom(_, ref spec)) => spec.clone(), 749 | _ => None 750 | } 751 | } 752 | 753 | pub fn to_constant(self) -> Option { 754 | match self { 755 | Term::Constant(_, c) => Some(c), 756 | _ => None 757 | } 758 | } 759 | 760 | pub fn first_arg(&self) -> Option<&Term> { 761 | match self { 762 | &Term::Clause(_, _, ref terms, _) => 763 | terms.first().map(|bt| bt.as_ref()), 764 | _ => None 765 | } 766 | } 767 | 768 | pub fn set_name(&mut self, new_name: ClauseName) { 769 | match self { 770 | Term::Constant(_, Constant::Atom(ref mut atom, _)) 771 | | Term::Clause(_, ref mut atom, ..) => { 772 | *atom = new_name; 773 | } 774 | _ => {} 775 | } 776 | } 777 | 778 | pub fn name(&self) -> Option { 779 | match self { 780 | &Term::Constant(_, Constant::Atom(ref atom, _)) 781 | | &Term::Clause(_, ref atom, ..) => Some(atom.clone()), 782 | _ => None 783 | } 784 | } 785 | 786 | pub fn arity(&self) -> usize { 787 | match self { 788 | &Term::Clause(_, _, ref child_terms, ..) => child_terms.len(), 789 | _ => 0 790 | } 791 | } 792 | } 793 | 794 | #[derive(Debug, Clone, Copy)] 795 | pub struct CompositeOp<'a, 'b> { 796 | pub op_dir: &'a OpDir, 797 | pub static_op_dir: Option<&'b OpDir> 798 | } 799 | 800 | #[macro_export] 801 | macro_rules! composite_op { 802 | ($include_machine_p:expr, $op_dir:expr, $machine_op_dir:expr) => ( 803 | CompositeOp { op_dir: $op_dir, 804 | static_op_dir: if !$include_machine_p { 805 | Some($machine_op_dir) 806 | } else { 807 | None 808 | }} 809 | ); 810 | ($op_dir:expr) => ( 811 | CompositeOp { op_dir: $op_dir, static_op_dir: None } 812 | ) 813 | } 814 | 815 | impl<'a, 'b> CompositeOp<'a, 'b> 816 | { 817 | #[inline] 818 | pub(crate) 819 | fn get(&self, name: ClauseName, fixity: Fixity) -> Option 820 | { 821 | let entry = 822 | if let Some(ref static_op_dir) = &self.static_op_dir { 823 | static_op_dir.get(&(name.clone(), fixity)) 824 | } else { 825 | None 826 | }; 827 | 828 | entry.or_else(move || self.op_dir.get(&(name, fixity))) 829 | .cloned() 830 | } 831 | } 832 | 833 | fn unfold_by_str_once(term: &mut Term, s: &str) -> Option<(Term, Term)> { 834 | if let &mut Term::Clause(_, ref name, ref mut subterms, _) = term { 835 | if name.as_str() == s && subterms.len() == 2 { 836 | let snd = *subterms.pop().unwrap(); 837 | let fst = *subterms.pop().unwrap(); 838 | 839 | return Some((fst, snd)); 840 | } 841 | } 842 | 843 | None 844 | } 845 | 846 | pub fn unfold_by_str(mut term: Term, s: &str) -> Vec { 847 | let mut terms = vec![]; 848 | 849 | while let Some((fst, snd)) = unfold_by_str_once(&mut term, s) { 850 | terms.push(fst); 851 | term = snd; 852 | } 853 | 854 | terms.push(term); 855 | terms 856 | } 857 | 858 | pub type ParsingStream = PutBackN>>; 859 | 860 | use unicode_reader::BadUtf8Error; 861 | 862 | #[inline] 863 | pub fn parsing_stream(src: R) -> Result, ParserError> { 864 | let mut stream = put_back_n(CodePoints::from(src.bytes())); 865 | match stream.peek() { 866 | None => Ok(stream), // empty stream is handled gracefully by Lexer::eof 867 | Some(Err(error)) => Err(ParserError::from(error)), 868 | Some(Ok(c)) => { 869 | if *c == '\u{feff}' { 870 | // skip UTF-8 BOM 871 | stream.next(); 872 | } 873 | Ok(stream) 874 | } 875 | } 876 | } 877 | -------------------------------------------------------------------------------- /src/lexer.rs: -------------------------------------------------------------------------------- 1 | use crate::lexical::parse_lossy; 2 | use crate::ordered_float::*; 3 | use crate::rug::Integer; 4 | 5 | use ast::*; 6 | use tabled_rc::*; 7 | 8 | use std::convert::TryFrom; 9 | use std::fmt; 10 | use std::io::Read; 11 | use std::rc::Rc; 12 | 13 | macro_rules! is_not_eof { 14 | ($c:expr) => ( 15 | match $c { 16 | Ok(c) => c, 17 | Err(ParserError::UnexpectedEOF) => return Ok(true), 18 | Err(e) => return Err(e) 19 | } 20 | ) 21 | } 22 | 23 | macro_rules! consume_chars_with { 24 | ($token:expr, $e:expr) => { 25 | loop { 26 | match $e { 27 | Ok(Some(c)) => $token.push(c), 28 | Ok(None) => continue, 29 | Err(ParserError::UnexpectedChar(..)) => break, 30 | Err(e) => return Err(e) 31 | } 32 | } 33 | } 34 | } 35 | 36 | #[derive(Debug, Clone, PartialEq)] 37 | pub enum Token { 38 | Constant(Constant), 39 | Var(Rc), 40 | Open, // '(' 41 | OpenCT, // '(' 42 | Close, // ')' 43 | OpenList, // '[' 44 | CloseList, // ']' 45 | OpenCurly, // '{' 46 | CloseCurly, // '}' 47 | HeadTailSeparator, // '|' 48 | Comma, // ',' 49 | End 50 | } 51 | 52 | pub struct Lexer<'a, R: Read> { 53 | pub(crate) atom_tbl: TabledData, 54 | pub(crate) reader: &'a mut ParsingStream, 55 | pub(crate) flags: MachineFlags, 56 | pub(crate) line_num: usize, 57 | pub(crate) col_num: usize 58 | } 59 | 60 | impl<'a, R: Read + fmt::Debug> fmt::Debug for Lexer<'a, R> { 61 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 62 | f.debug_struct("Lexer") 63 | .field("atom_tbl", &self.atom_tbl) 64 | .field("reader", &"&'a mut ParsingStream") // Hacky solution. 65 | .field("line_num", &self.line_num) 66 | .field("col_num", &self.col_num) 67 | .finish() 68 | } 69 | } 70 | 71 | impl<'a, R: Read> Lexer<'a, R> { 72 | pub fn new( 73 | atom_tbl: TabledData, 74 | flags: MachineFlags, 75 | src: &'a mut ParsingStream, 76 | ) -> Self { 77 | Lexer { atom_tbl, flags, reader: src, line_num: 0, col_num: 0 } 78 | } 79 | 80 | fn return_char(&mut self, c: char) { 81 | if new_line_char!(c) { 82 | self.line_num -= 1; 83 | self.col_num = 0; 84 | } 85 | 86 | self.reader.put_back(Ok(c)); 87 | } 88 | 89 | fn skip_char(&mut self) -> Result { 90 | if let Some(Ok(c)) = self.reader.next() { 91 | self.col_num += 1; 92 | 93 | if new_line_char!(c) { 94 | self.line_num += 1; 95 | self.col_num = 0; 96 | } 97 | 98 | Ok(c) 99 | } else { 100 | Err(ParserError::UnexpectedEOF) 101 | } 102 | } 103 | 104 | pub fn eof(&mut self) -> Result { 105 | if self.reader.peek().is_none() { 106 | return Ok(true); 107 | } 108 | 109 | let mut c = is_not_eof!(self.lookahead_char()); 110 | 111 | while layout_char!(c) { 112 | self.skip_char()?; 113 | 114 | if self.reader.peek().is_none() { 115 | return Ok(true); 116 | } 117 | 118 | c = is_not_eof!(self.lookahead_char()); 119 | } 120 | 121 | Ok(false) 122 | } 123 | 124 | pub fn lookahead_char(&mut self) -> Result { 125 | match self.reader.peek() { 126 | Some(&Ok(c)) => Ok(c), 127 | _ => Err(ParserError::UnexpectedEOF), 128 | } 129 | } 130 | 131 | fn single_line_comment(&mut self) -> Result<(), ParserError> 132 | { 133 | loop { 134 | if self.reader.peek().is_none() || new_line_char!(self.skip_char()?) { 135 | break; 136 | } 137 | } 138 | 139 | Ok(()) 140 | } 141 | 142 | fn bracketed_comment(&mut self) -> Result { 143 | // we have already checked that the current lookahead_char is comment_1_char, just skip it 144 | let c = self.skip_char()?; 145 | 146 | if comment_2_char!(self.lookahead_char()?) { 147 | self.skip_char()?; 148 | 149 | // Keep reading until we find characters '*' and '/' 150 | // Deliberately skip checks for prolog_char to allow comments to contain any characters, 151 | // including so-called "extended characters", without having to explicitly add them to a character class. 152 | let mut c = self.lookahead_char()?; 153 | loop { 154 | while !comment_2_char!(c) { 155 | self.skip_char()?; 156 | c = self.lookahead_char()?; 157 | } 158 | 159 | self.skip_char()?; 160 | 161 | c = self.lookahead_char()?; 162 | if comment_1_char!(c) { 163 | break; 164 | } 165 | } 166 | 167 | if prolog_char!(c) { 168 | self.skip_char()?; 169 | Ok(true) 170 | } else { 171 | Err(ParserError::NonPrologChar(self.line_num, self.col_num)) 172 | } 173 | } else { 174 | self.return_char(c); 175 | Ok(false) 176 | } 177 | } 178 | 179 | fn get_back_quoted_char(&mut self) -> Result { 180 | if back_quote_char!(self.lookahead_char()?) { 181 | let c = self.skip_char()?; 182 | 183 | if !back_quote_char!(self.lookahead_char()?) { 184 | self.return_char(c); 185 | Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) 186 | } else { 187 | self.skip_char() 188 | } 189 | } else if single_quote_char!(self.lookahead_char()?) { 190 | self.skip_char() 191 | } else { 192 | self.get_non_quote_char() 193 | } 194 | } 195 | 196 | fn get_back_quoted_item(&mut self) -> Result, ParserError> { 197 | if backslash_char!(self.lookahead_char()?) { 198 | let c = self.skip_char()?; 199 | 200 | if new_line_char!(self.lookahead_char()?) { 201 | self.skip_char()?; 202 | Ok(None) 203 | } else { 204 | self.return_char(c); 205 | Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) 206 | } 207 | } else { 208 | self.get_back_quoted_char().map(Some) 209 | } 210 | } 211 | 212 | fn get_back_quoted_string(&mut self) -> Result { 213 | let c = self.lookahead_char()?; 214 | 215 | if back_quote_char!(c) { 216 | self.skip_char()?; 217 | 218 | let mut token = String::new(); 219 | consume_chars_with!(token, self.get_back_quoted_item()); 220 | 221 | if back_quote_char!(self.lookahead_char()?) { 222 | self.skip_char()?; 223 | Ok(token) 224 | } else { 225 | Err(ParserError::MissingQuote(self.line_num, self.col_num)) 226 | } 227 | } else { 228 | Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) 229 | } 230 | } 231 | 232 | fn get_single_quoted_item(&mut self) -> Result, ParserError> 233 | { 234 | if backslash_char!(self.lookahead_char()?) { 235 | let c = self.skip_char()?; 236 | 237 | if new_line_char!(self.lookahead_char()?) { 238 | self.skip_char()?; 239 | return Ok(None); 240 | } else { 241 | self.return_char(c); 242 | } 243 | } 244 | 245 | self.get_single_quoted_char().map(Some) 246 | } 247 | 248 | fn get_single_quoted_char(&mut self) -> Result { 249 | let c = self.lookahead_char()?; 250 | 251 | if single_quote_char!(c) { 252 | self.skip_char()?; 253 | 254 | if !single_quote_char!(self.lookahead_char()?) { 255 | self.return_char(c); 256 | Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) 257 | } else { 258 | self.skip_char() 259 | } 260 | } else if double_quote_char!(c) || back_quote_char!(c) { 261 | self.skip_char() 262 | } else { 263 | self.get_non_quote_char() 264 | } 265 | } 266 | 267 | fn get_double_quoted_item(&mut self) -> Result, ParserError> 268 | { 269 | if backslash_char!(self.lookahead_char()?) { 270 | let c = self.skip_char()?; 271 | 272 | if new_line_char!(self.lookahead_char()?) { 273 | self.skip_char()?; 274 | return Ok(None) 275 | } else { 276 | self.return_char(c); 277 | } 278 | } 279 | 280 | self.get_double_quoted_char().map(Some) 281 | } 282 | 283 | fn get_double_quoted_char(&mut self) -> Result { 284 | if double_quote_char!(self.lookahead_char()?) { 285 | let c = self.skip_char()?; 286 | 287 | if !double_quote_char!(self.lookahead_char()?) { 288 | self.return_char(c); 289 | Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) 290 | } else { 291 | self.skip_char() 292 | } 293 | } else if single_quote_char!(self.lookahead_char()?) { 294 | self.skip_char() 295 | } else if back_quote_char!(self.lookahead_char()?) { 296 | self.skip_char() 297 | } else { 298 | self.get_non_quote_char() 299 | } 300 | } 301 | 302 | fn get_control_escape_sequence(&mut self) -> Result 303 | { 304 | let escaped = match self.lookahead_char()? { 305 | 'a' => '\u{07}', // UTF-8 alert 306 | 'b' => '\u{08}', // UTF-8 backspace 307 | 'v' => '\u{0b}', // UTF-8 vertical tab 308 | 'f' => '\u{0c}', // UTF-8 form feed 309 | 't' => '\t', 310 | 'n' => '\n', 311 | 'r' => '\r', 312 | c => return Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) 313 | }; 314 | 315 | self.skip_char()?; 316 | return Ok(escaped); 317 | } 318 | 319 | fn get_octal_escape_sequence(&mut self) -> Result 320 | { 321 | self.escape_sequence_to_char(|c| octal_digit_char!(c), 8) 322 | } 323 | 324 | fn get_hexadecimal_escape_sequence(&mut self) -> Result 325 | { 326 | self.skip_char()?; 327 | let c = self.lookahead_char()?; 328 | 329 | if hexadecimal_digit_char!(c) { 330 | self.escape_sequence_to_char(|c| hexadecimal_digit_char!(c), 16) 331 | } else { 332 | Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) 333 | } 334 | } 335 | 336 | fn escape_sequence_to_char( 337 | &mut self, 338 | accept_char: impl Fn(char) -> bool, 339 | radix: u32, 340 | ) -> Result { 341 | let mut c = self.lookahead_char()?; 342 | let mut token = String::new(); 343 | 344 | loop { 345 | token.push(c); 346 | 347 | self.skip_char()?; 348 | c = self.lookahead_char()?; 349 | 350 | if !accept_char(c) { 351 | break; 352 | } 353 | } 354 | 355 | if backslash_char!(c) { 356 | self.skip_char()?; 357 | u32::from_str_radix(&token, radix) 358 | .map_or_else( 359 | |_| Err(ParserError::ParseBigInt(self.line_num, self.col_num)), 360 | |n| char::try_from(n) 361 | .map_err(|_| ParserError::Utf8Error(self.line_num, self.col_num)) 362 | ) 363 | } else { 364 | // on failure, restore the token characters and backslash. 365 | self.reader.put_back_all(token.chars().map(Ok)); 366 | self.reader.put_back(Ok('\\')); 367 | 368 | Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)) 369 | } 370 | } 371 | 372 | fn get_non_quote_char(&mut self) -> Result { 373 | let c = self.lookahead_char()?; 374 | 375 | if graphic_char!(c) || alpha_numeric_char!(c) || solo_char!(c) || space_char!(c) { 376 | self.skip_char() 377 | } else { 378 | if !backslash_char!(c) { 379 | return Err(ParserError::UnexpectedChar(c, self.line_num, self.col_num)); 380 | } 381 | 382 | self.skip_char()?; 383 | 384 | let c = self.lookahead_char()?; 385 | 386 | if meta_char!(c) { 387 | self.skip_char() 388 | } else if octal_digit_char!(c) { 389 | self.get_octal_escape_sequence() 390 | } else if symbolic_hexadecimal_char!(c) { 391 | self.get_hexadecimal_escape_sequence() 392 | } else { 393 | self.get_control_escape_sequence() 394 | } 395 | } 396 | } 397 | 398 | fn char_code_list_token(&mut self) -> Result { 399 | let mut token = String::new(); 400 | 401 | self.skip_char()?; 402 | consume_chars_with!(token, self.get_double_quoted_item()); 403 | 404 | if double_quote_char!(self.lookahead_char()?) { 405 | self.skip_char()?; 406 | Ok(token) 407 | } else { 408 | Err(ParserError::MissingQuote(self.line_num, self.col_num)) 409 | } 410 | } 411 | 412 | fn hexadecimal_constant(&mut self) -> Result { 413 | self.skip_char()?; 414 | 415 | if hexadecimal_digit_char!(self.lookahead_char()?) { 416 | let mut token = String::new(); 417 | 418 | while hexadecimal_digit_char!(self.lookahead_char()?) { 419 | token.push(self.skip_char()?); 420 | } 421 | 422 | isize::from_str_radix(&token, 16) 423 | .map(|n| Token::Constant(Constant::Fixnum(n))) 424 | .or_else(|_| { 425 | Integer::from_str_radix(&token, 16) 426 | .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) 427 | .map_err(|_| ParserError::ParseBigInt( 428 | self.line_num, 429 | self.col_num, 430 | )) 431 | }) 432 | } else { 433 | self.return_char('x'); 434 | Err(ParserError::ParseBigInt(self.line_num, self.col_num)) 435 | } 436 | } 437 | 438 | fn octal_constant(&mut self) -> Result { 439 | self.skip_char()?; 440 | 441 | if octal_digit_char!(self.lookahead_char()?) { 442 | let mut token = String::new(); 443 | 444 | while octal_digit_char!(self.lookahead_char()?) { 445 | token.push(self.skip_char()?); 446 | } 447 | 448 | isize::from_str_radix(&token, 8) 449 | .map(|n| Token::Constant(Constant::Fixnum(n))) 450 | .or_else(|_| { 451 | Integer::from_str_radix(&token, 8) 452 | .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) 453 | .map_err(|_| ParserError::ParseBigInt( 454 | self.line_num, 455 | self.col_num, 456 | )) 457 | }) 458 | } else { 459 | self.return_char('o'); 460 | Err(ParserError::ParseBigInt(self.line_num, self.col_num)) 461 | } 462 | } 463 | 464 | fn binary_constant(&mut self) -> Result { 465 | self.skip_char()?; 466 | 467 | if binary_digit_char!(self.lookahead_char()?) { 468 | let mut token = String::new(); 469 | 470 | while binary_digit_char!(self.lookahead_char()?) { 471 | token.push(self.skip_char()?); 472 | } 473 | 474 | isize::from_str_radix(&token, 2) 475 | .map(|n| Token::Constant(Constant::Fixnum(n))) 476 | .or_else(|_| { 477 | Integer::from_str_radix(&token, 2) 478 | .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) 479 | .map_err(|_| ParserError::ParseBigInt( 480 | self.line_num, 481 | self.col_num, 482 | )) 483 | }) 484 | } else { 485 | self.return_char('b'); 486 | Err(ParserError::ParseBigInt(self.line_num, self.col_num)) 487 | } 488 | } 489 | 490 | fn variable_token(&mut self) -> Result { 491 | let mut s = String::new(); 492 | s.push(self.skip_char()?); 493 | 494 | while alpha_numeric_char!(self.lookahead_char()?) { 495 | s.push(self.skip_char()?); 496 | } 497 | 498 | Ok(Token::Var(rc_atom!(s))) 499 | } 500 | 501 | fn name_token(&mut self, c: char) -> Result { 502 | let mut token = String::new(); 503 | 504 | if small_letter_char!(c) { 505 | token.push(self.skip_char()?); 506 | 507 | while alpha_numeric_char!(self.lookahead_char()?) { 508 | token.push(self.skip_char()?); 509 | } 510 | } else if graphic_token_char!(c) { 511 | token.push(self.skip_char()?); 512 | 513 | while graphic_token_char!(self.lookahead_char()?) { 514 | token.push(self.skip_char()?); 515 | } 516 | } else if cut_char!(c) { 517 | token.push(self.skip_char()?); 518 | } else if semicolon_char!(c) { 519 | token.push(self.skip_char()?); 520 | } else if single_quote_char!(c) { 521 | self.skip_char()?; 522 | 523 | consume_chars_with!(token, self.get_single_quoted_item()); 524 | 525 | if single_quote_char!(self.lookahead_char()?) { 526 | self.skip_char()?; 527 | 528 | if !token.is_empty() && token.chars().skip(1).next().is_none() { 529 | if let Some(c) = token.chars().next() { 530 | return Ok(Token::Constant(Constant::Char(c))); 531 | } 532 | } 533 | } else { 534 | return Err(ParserError::InvalidSingleQuotedCharacter(self.lookahead_char()?)) 535 | } 536 | } else { 537 | match self.get_back_quoted_string() { 538 | Ok(_) => return Err(ParserError::BackQuotedString(self.line_num, self.col_num)), 539 | Err(e) => return Err(e) 540 | } 541 | } 542 | 543 | if token.as_str() == "[]" { 544 | Ok(Token::Constant(Constant::EmptyList)) 545 | } else { 546 | Ok(Token::Constant(atom!(token, self.atom_tbl))) 547 | } 548 | } 549 | 550 | fn vacate_with_float(&mut self, mut token: String) -> Token { 551 | self.return_char(token.pop().unwrap()); 552 | 553 | let result = OrderedFloat(parse_lossy::(token.as_bytes())); 554 | Token::Constant(Constant::Float(result)) 555 | } 556 | 557 | pub fn number_token(&mut self) -> Result { 558 | let mut token = String::new(); 559 | 560 | token.push(self.skip_char()?); 561 | let mut c = self.lookahead_char()?; 562 | 563 | while decimal_digit_char!(c) { 564 | token.push(c); 565 | self.skip_char()?; 566 | c = self.lookahead_char()?; 567 | } 568 | 569 | if decimal_point_char!(c) { 570 | self.skip_char()?; 571 | 572 | if self.reader.peek().is_none() { 573 | self.return_char('.'); 574 | 575 | isize::from_str_radix(&token, 10) 576 | .map(|n| Token::Constant(Constant::Fixnum(n))) 577 | .or_else(|_| { 578 | token.parse::() 579 | .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) 580 | .map_err(|_| ParserError::ParseBigInt( 581 | self.line_num, 582 | self.col_num, 583 | )) 584 | }) 585 | } else if decimal_digit_char!(self.lookahead_char()?) { 586 | token.push('.'); 587 | token.push(self.skip_char()?); 588 | 589 | let mut c = self.lookahead_char()?; 590 | 591 | while decimal_digit_char!(c) { 592 | token.push(c); 593 | self.skip_char()?; 594 | c = self.lookahead_char()?; 595 | } 596 | 597 | if exponent_char!(self.lookahead_char()?) { 598 | token.push(self.skip_char()?); 599 | 600 | let c = match self.lookahead_char() { 601 | Err(_) => return Ok(self.vacate_with_float(token)), 602 | Ok(c) => c 603 | }; 604 | 605 | if !sign_char!(c) && !decimal_digit_char!(c) { 606 | return Ok(self.vacate_with_float(token)); 607 | } 608 | 609 | if sign_char!(c) { 610 | token.push(self.skip_char()?); 611 | 612 | let c = match self.lookahead_char() { 613 | Err(_) => { 614 | self.return_char(token.pop().unwrap()); 615 | return Ok(self.vacate_with_float(token)); 616 | }, 617 | Ok(c) => c 618 | }; 619 | 620 | if !decimal_digit_char!(c) { 621 | self.return_char(token.pop().unwrap()); 622 | return Ok(self.vacate_with_float(token)); 623 | } 624 | } 625 | 626 | if decimal_digit_char!(self.lookahead_char()?) { 627 | token.push(self.skip_char()?); 628 | 629 | while decimal_digit_char!(self.lookahead_char()?) { 630 | token.push(self.skip_char()?); 631 | } 632 | 633 | let n = OrderedFloat(parse_lossy::(token.as_bytes())); 634 | Ok(Token::Constant(Constant::Float(n))) 635 | } else { 636 | return Ok(self.vacate_with_float(token)); 637 | } 638 | } else { 639 | let n = OrderedFloat(parse_lossy::(token.as_bytes())); 640 | Ok(Token::Constant(Constant::Float(n))) 641 | } 642 | } else { 643 | self.return_char('.'); 644 | 645 | isize::from_str_radix(&token, 10) 646 | .map(|n| Token::Constant(Constant::Fixnum(n))) 647 | .or_else(|_| { 648 | token.parse::() 649 | .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) 650 | .map_err(|_| ParserError::ParseBigInt( 651 | self.line_num, 652 | self.col_num, 653 | )) 654 | }) 655 | } 656 | } else { 657 | if token.starts_with('0') && token.len() == 1 { 658 | if c == 'x' { 659 | self.hexadecimal_constant() 660 | .or_else(|e| { 661 | if let ParserError::ParseBigInt(..) = e { 662 | isize::from_str_radix(&token, 10) 663 | .map(|n| Token::Constant(Constant::Fixnum(n))) 664 | .or_else(|_| { 665 | token.parse::() 666 | .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) 667 | .map_err(|_| ParserError::ParseBigInt( 668 | self.line_num, 669 | self.col_num, 670 | )) 671 | }) 672 | } else { 673 | Err(e) 674 | } 675 | }) 676 | } else if c == 'o' { 677 | self.octal_constant() 678 | .or_else(|e| { 679 | if let ParserError::ParseBigInt(..) = e { 680 | isize::from_str_radix(&token, 10) 681 | .map(|n| Token::Constant(Constant::Fixnum(n))) 682 | .or_else(|_| { 683 | token.parse::() 684 | .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) 685 | .map_err(|_| ParserError::ParseBigInt( 686 | self.line_num, 687 | self.col_num, 688 | )) 689 | }) 690 | } else { 691 | Err(e) 692 | } 693 | }) 694 | } else if c == 'b' { 695 | self.binary_constant() 696 | .or_else(|e| { 697 | if let ParserError::ParseBigInt(..) = e { 698 | isize::from_str_radix(&token, 10) 699 | .map(|n| Token::Constant(Constant::Fixnum(n))) 700 | .or_else(|_| { 701 | token.parse::() 702 | .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) 703 | .map_err(|_| ParserError::ParseBigInt( 704 | self.line_num, 705 | self.col_num, 706 | )) 707 | }) 708 | } else { 709 | Err(e) 710 | } 711 | }) 712 | } else if single_quote_char!(c) { 713 | self.skip_char()?; 714 | 715 | if backslash_char!(self.lookahead_char()?) { 716 | self.skip_char()?; 717 | 718 | if new_line_char!(self.lookahead_char()?) { 719 | self.return_char('\\'); 720 | self.return_char('\''); 721 | 722 | return Ok(Token::Constant(Constant::Fixnum(0))); 723 | } else { 724 | self.return_char('\\'); 725 | } 726 | } 727 | 728 | self.get_single_quoted_char() 729 | .and_then(|c| { 730 | Ok(Token::Constant(Constant::Fixnum(c as isize))) 731 | }) 732 | .or_else(|_| { 733 | self.return_char(c); 734 | 735 | isize::from_str_radix(&token, 10) 736 | .map(|n| Token::Constant(Constant::Fixnum(n))) 737 | .or_else(|_| { 738 | token.parse::() 739 | .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) 740 | .map_err(|_| ParserError::ParseBigInt( 741 | self.line_num, 742 | self.col_num, 743 | )) 744 | }) 745 | }) 746 | } else { 747 | isize::from_str_radix(&token, 10) 748 | .map(|n| Token::Constant(Constant::Fixnum(n))) 749 | .or_else(|_| { 750 | token.parse::() 751 | .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) 752 | .map_err(|_| ParserError::ParseBigInt( 753 | self.line_num, 754 | self.col_num, 755 | )) 756 | }) 757 | } 758 | } else { 759 | isize::from_str_radix(&token, 10) 760 | .map(|n| Token::Constant(Constant::Fixnum(n))) 761 | .or_else(|_| { 762 | token.parse::() 763 | .map(|n| Token::Constant(Constant::Integer(Rc::new(n)))) 764 | .map_err(|_| ParserError::ParseBigInt( 765 | self.line_num, 766 | self.col_num, 767 | )) 768 | }) 769 | } 770 | } 771 | } 772 | 773 | pub fn scan_for_layout(&mut self) -> Result { 774 | let mut layout_inserted = false; 775 | let mut more_layout = true; 776 | 777 | loop { 778 | let cr = self.lookahead_char(); 779 | 780 | match cr { 781 | Ok(c) if layout_char!(c) || new_line_char!(c) => { 782 | self.skip_char()?; 783 | layout_inserted = true; 784 | }, 785 | Ok(c) if end_line_comment_char!(c) => { 786 | self.single_line_comment()?; 787 | layout_inserted = true; 788 | }, 789 | Ok(c) if comment_1_char!(c) => 790 | if self.bracketed_comment()? { 791 | layout_inserted = true; 792 | } else { 793 | more_layout = false; 794 | }, 795 | _ => more_layout = false 796 | }; 797 | 798 | if !more_layout { 799 | break; 800 | } 801 | } 802 | 803 | Ok(layout_inserted) 804 | } 805 | 806 | pub fn next_token(&mut self) -> Result { 807 | let layout_inserted = self.scan_for_layout()?; 808 | let cr = self.lookahead_char(); 809 | 810 | match cr { 811 | Ok(c) => { 812 | if capital_letter_char!(c) || variable_indicator_char!(c) { 813 | return self.variable_token(); 814 | } 815 | 816 | if c == ',' { 817 | self.skip_char()?; 818 | return Ok(Token::Comma); 819 | } 820 | 821 | if c == ')' { 822 | self.skip_char()?; 823 | return Ok(Token::Close); 824 | } 825 | 826 | if c == '(' { 827 | self.skip_char()?; 828 | return Ok(if layout_inserted { Token::Open } 829 | else { Token::OpenCT }); 830 | } 831 | 832 | if c == '.' { 833 | self.skip_char()?; 834 | 835 | match self.lookahead_char() { 836 | Ok(c) if layout_char!(c) || c == '%' => { 837 | if new_line_char!(c) { 838 | self.skip_char()?; 839 | } 840 | 841 | return Ok(Token::End); 842 | }, 843 | Err(ParserError::UnexpectedEOF) => { 844 | return Ok(Token::End); 845 | } 846 | _ => { 847 | self.return_char('.'); 848 | } 849 | }; 850 | } 851 | 852 | if decimal_digit_char!(c) { 853 | return self.number_token(); 854 | } 855 | 856 | if c == ']' { 857 | self.skip_char()?; 858 | return Ok(Token::CloseList); 859 | } 860 | 861 | if c == '[' { 862 | self.skip_char()?; 863 | return Ok(Token::OpenList); 864 | } 865 | 866 | if c == '|' { 867 | self.skip_char()?; 868 | return Ok(Token::HeadTailSeparator); 869 | } 870 | 871 | if c == '{' { 872 | self.skip_char()?; 873 | return Ok(Token::OpenCurly); 874 | } 875 | 876 | if c == '}' { 877 | self.skip_char()?; 878 | return Ok(Token::CloseCurly); 879 | } 880 | 881 | if c == '"' { 882 | let s = self.char_code_list_token()?; 883 | 884 | if let DoubleQuotes::Atom = self.flags.double_quotes { 885 | let s = clause_name!(s, self.atom_tbl); 886 | return Ok(Token::Constant(Constant::Atom(s, None))); 887 | } else { 888 | let s = Rc::new(s); 889 | return Ok(Token::Constant(Constant::String(s))); 890 | } 891 | } 892 | 893 | self.name_token(c) 894 | }, 895 | Err(e) => Err(e) 896 | } 897 | } 898 | } 899 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate lexical; 2 | extern crate ordered_float; 3 | #[cfg(feature = "rug")] 4 | extern crate rug; 5 | #[cfg(feature = "num-rug-adapter")] 6 | extern crate num_rug_adapter as rug; 7 | extern crate unicode_reader; 8 | 9 | #[macro_use] pub mod tabled_rc; 10 | #[macro_use] pub mod ast; 11 | #[macro_use] pub mod macros; 12 | pub mod parser; 13 | pub mod put_back_n; 14 | 15 | pub mod lexer; 16 | -------------------------------------------------------------------------------- /src/macros.rs: -------------------------------------------------------------------------------- 1 | #[macro_export] 2 | macro_rules! char_class { 3 | ($c: expr, [$head:expr]) => ($c == $head); 4 | ($c: expr, [$head:expr $(, $cs:expr)+]) => ($c == $head || char_class!($c, [$($cs),*])); 5 | } 6 | 7 | #[macro_export] 8 | macro_rules! symbolic_control_char { 9 | ($c: expr) => (char_class!($c, ['a', 'b', 'f', 'n', 'r', 't', 'v', '0'])) 10 | } 11 | 12 | #[macro_export] 13 | macro_rules! space_char { 14 | ($c: expr) => ($c == ' ') 15 | } 16 | 17 | #[macro_export] 18 | macro_rules! layout_char { 19 | ($c: expr) => (char_class!($c, [' ', '\n', '\t', '\u{0B}', '\u{0C}'])) 20 | } 21 | 22 | #[macro_export] 23 | macro_rules! symbolic_hexadecimal_char { 24 | ($c: expr) => ($c == 'x') 25 | } 26 | 27 | #[macro_export] 28 | macro_rules! octal_digit_char { 29 | ($c: expr) => ($c >= '0' && $c <= '7') 30 | } 31 | 32 | #[macro_export] 33 | macro_rules! binary_digit_char { 34 | ($c: expr) => ($c >= '0' && $c <= '1') 35 | } 36 | 37 | #[macro_export] 38 | macro_rules! hexadecimal_digit_char { 39 | ($c: expr) => ($c >= '0' && $c <= '9' || 40 | $c >= 'A' && $c <= 'F' || 41 | $c >= 'a' && $c <= 'f') 42 | } 43 | 44 | #[macro_export] 45 | macro_rules! exponent_char { 46 | ($c: expr) => ($c == 'e' || $c == 'E') 47 | } 48 | 49 | #[macro_export] 50 | macro_rules! sign_char { 51 | ($c: expr) => ($c == '-' || $c == '+') 52 | } 53 | 54 | #[macro_export] 55 | macro_rules! new_line_char { 56 | ($c: expr) => ($c == '\n') 57 | } 58 | 59 | #[macro_export] 60 | macro_rules! end_line_comment_char { 61 | ($c: expr) => ($c == '%') 62 | } 63 | 64 | #[macro_export] 65 | macro_rules! comment_1_char { 66 | ($c: expr) => ($c == '/') 67 | } 68 | 69 | #[macro_export] 70 | macro_rules! comment_2_char { 71 | ($c: expr) => ($c == '*') 72 | } 73 | 74 | #[macro_export] 75 | macro_rules! capital_letter_char { 76 | ($c: expr) => ($c >= 'A' && $c <= 'Z') 77 | } 78 | 79 | #[macro_export] 80 | macro_rules! small_letter_char { 81 | ($c: expr) => ($c >= 'a' && $c <= 'z') 82 | } 83 | 84 | #[macro_export] 85 | macro_rules! variable_indicator_char { 86 | ($c: expr) => ($c == '_') 87 | } 88 | 89 | #[macro_export] 90 | macro_rules! graphic_char { 91 | ($c: expr) => (char_class!($c, ['#', '$', '&', '*', '+', '-', '.', '/', ':', 92 | '<', '=', '>', '?', '@', '^', '~'])) 93 | } 94 | 95 | #[macro_export] 96 | macro_rules! graphic_token_char { 97 | ($c: expr) => (graphic_char!($c) || backslash_char!($c)) 98 | } 99 | 100 | #[macro_export] 101 | macro_rules! alpha_char { 102 | ($c: expr) => 103 | (match $c { 104 | 'a' ..= 'z' => true, 105 | 'A' ..= 'Z' => true, 106 | '_' => true, 107 | '\u{00A0}' ..= '\u{00BF}' => true, 108 | '\u{00C0}' ..= '\u{00D6}' => true, 109 | '\u{00D8}' ..= '\u{00F6}' => true, 110 | '\u{00F8}' ..= '\u{00FF}' => true, 111 | '\u{0100}' ..= '\u{017F}' => true, // Latin Extended-A 112 | '\u{0180}' ..= '\u{024F}' => true, // Latin Extended-B 113 | '\u{0250}' ..= '\u{02AF}' => true, // IPA Extensions 114 | '\u{02B0}' ..= '\u{02FF}' => true, // Spacing Modifier Letters 115 | '\u{0300}' ..= '\u{036F}' => true, // Combining Diacritical Marks 116 | '\u{0370}' ..= '\u{03FF}' => true, // Greek/Coptic 117 | '\u{0400}' ..= '\u{04FF}' => true, // Cyrillic 118 | '\u{0500}' ..= '\u{052F}' => true, // Cyrillic Supplement 119 | '\u{0530}' ..= '\u{058F}' => true, // Armenian 120 | '\u{0590}' ..= '\u{05FF}' => true, // Hebrew 121 | '\u{0600}' ..= '\u{06FF}' => true, // Arabic 122 | '\u{0700}' ..= '\u{074F}' => true, // Syriac 123 | _ => false 124 | }) 125 | } 126 | 127 | #[macro_export] 128 | macro_rules! decimal_digit_char { 129 | ($c: expr) => ($c >= '0' && $c <= '9') 130 | } 131 | 132 | #[macro_export] 133 | macro_rules! decimal_point_char { 134 | ($c: expr) => ($c == '.') 135 | } 136 | 137 | #[macro_export] 138 | macro_rules! alpha_numeric_char { 139 | ($c: expr) => (alpha_char!($c) || decimal_digit_char!($c)) 140 | } 141 | 142 | #[macro_export] 143 | macro_rules! cut_char { 144 | ($c: expr) => ($c == '!') 145 | } 146 | 147 | #[macro_export] 148 | macro_rules! semicolon_char { 149 | ($c: expr) => ($c == ';') 150 | } 151 | 152 | #[macro_export] 153 | macro_rules! backslash_char { 154 | ($c: expr) => ($c == '\\') 155 | } 156 | 157 | #[macro_export] 158 | macro_rules! single_quote_char { 159 | ($c: expr) => ($c == '\'') 160 | } 161 | 162 | #[macro_export] 163 | macro_rules! double_quote_char { 164 | ($c: expr) => ($c == '"') 165 | } 166 | 167 | #[macro_export] 168 | macro_rules! back_quote_char { 169 | ($c: expr) => ($c == '`') 170 | } 171 | 172 | #[macro_export] 173 | macro_rules! meta_char { 174 | ($c: expr) => ( char_class!($c, ['\\', '\'', '"', '`']) ) 175 | } 176 | 177 | #[macro_export] 178 | macro_rules! solo_char { 179 | ($c: expr) => ( char_class!($c, ['!', '(', ')', ',', ';', '[', ']', 180 | '{', '}', '|', '%']) ) 181 | } 182 | 183 | #[macro_export] 184 | macro_rules! prolog_char { 185 | ($c: expr) => (graphic_char!($c) || alpha_numeric_char!($c) || solo_char!($c) || 186 | layout_char!($c) || meta_char!($c)) 187 | } 188 | -------------------------------------------------------------------------------- /src/parser.rs: -------------------------------------------------------------------------------- 1 | use ast::*; 2 | use lexer::*; 3 | use tabled_rc::*; 4 | 5 | use ordered_float::OrderedFloat; 6 | 7 | use rug::ops::NegAssign; 8 | 9 | use std::cell::Cell; 10 | use std::io::Read; 11 | use std::mem::swap; 12 | use std::rc::Rc; 13 | 14 | #[derive(Debug, Clone, Copy, PartialEq)] 15 | enum TokenType { 16 | Term, 17 | Open, 18 | OpenCT, 19 | OpenList, // '[' 20 | OpenCurly, // '{' 21 | HeadTailSeparator, // '|' 22 | Comma, // ',' 23 | Close, 24 | CloseList, // ']' 25 | CloseCurly, // '}' 26 | End 27 | } 28 | 29 | impl TokenType { 30 | fn is_sep(self) -> bool { 31 | match self { 32 | TokenType::HeadTailSeparator | TokenType::OpenCT | TokenType::Open | 33 | TokenType::Close | TokenType::OpenList | TokenType::CloseList | 34 | TokenType::OpenCurly | TokenType::CloseCurly | TokenType::Comma 35 | => true, 36 | _ => false 37 | } 38 | } 39 | } 40 | 41 | #[derive(Debug, Clone, Copy)] 42 | struct TokenDesc { 43 | tt: TokenType, 44 | priority: usize, 45 | spec: u32 46 | } 47 | 48 | pub 49 | fn get_clause_spec(name: ClauseName, arity: usize, op_dir: CompositeOp) -> Option 50 | { 51 | match arity { 52 | 1 => { 53 | /* This is a clause with an operator principal functor. Prefix operators 54 | are supposed over post. 55 | */ 56 | if let Some(OpDirValue(cell, _)) = op_dir.get(name.clone(), Fixity::Pre) { 57 | return Some(cell); 58 | } 59 | 60 | if let Some(OpDirValue(cell, _)) = op_dir.get(name, Fixity::Post) { 61 | return Some(cell); 62 | } 63 | }, 64 | 2 => 65 | if let Some(OpDirValue(cell, _)) = op_dir.get(name, Fixity::In) { 66 | return Some(cell); 67 | }, 68 | _ => {} 69 | }; 70 | 71 | None 72 | } 73 | 74 | pub fn get_desc(name: ClauseName, op_dir: CompositeOp) -> Option 75 | { 76 | let mut op_desc = OpDesc { pre: 0, inf: 0, post: 0, spec: 0 }; 77 | 78 | if let Some(OpDirValue(cell, _)) = op_dir.get(name.clone(), Fixity::Pre) { 79 | let (pri, spec) = cell.get(); 80 | 81 | if pri > 0 { 82 | op_desc.pre = pri; 83 | op_desc.spec |= spec; 84 | } else if name.as_str() == "-" { 85 | op_desc.spec |= NEGATIVE_SIGN; 86 | } 87 | } 88 | 89 | if let Some(OpDirValue(cell, _)) = op_dir.get(name.clone(), Fixity::Post) { 90 | let (pri, spec) = cell.get(); 91 | 92 | if pri > 0 { 93 | op_desc.post = pri; 94 | op_desc.spec |= spec; 95 | } 96 | } 97 | 98 | if let Some(OpDirValue(cell, _)) = op_dir.get(name.clone(), Fixity::In) { 99 | let (pri, spec) = cell.get(); 100 | 101 | if pri > 0 { 102 | op_desc.inf = pri; 103 | op_desc.spec |= spec; 104 | } 105 | } 106 | 107 | if op_desc.pre + op_desc.post + op_desc.inf == 0 && !is_negate!(op_desc.spec) { 108 | None 109 | } else { 110 | Some(op_desc) 111 | } 112 | } 113 | 114 | fn affirm_xfx(priority: usize, d2: TokenDesc, d3: TokenDesc, d1: TokenDesc) -> bool 115 | { 116 | d2.priority <= priority 117 | && is_term!(d3.spec) 118 | && is_term!(d1.spec) 119 | && d3.priority < d2.priority 120 | && d1.priority < d2.priority 121 | } 122 | 123 | fn affirm_yfx(priority: usize, d2: TokenDesc, d3: TokenDesc, d1: TokenDesc) -> bool 124 | { 125 | d2.priority <= priority 126 | && ((is_term!(d3.spec) && d3.priority < d2.priority) 127 | || (is_lterm!(d3.spec) && d3.priority == d2.priority)) 128 | && is_term!(d1.spec) 129 | && d1.priority < d2.priority 130 | } 131 | 132 | 133 | fn affirm_xfy(priority: usize, d2: TokenDesc, d3: TokenDesc, d1: TokenDesc) -> bool 134 | { 135 | d2.priority < priority 136 | && is_term!(d3.spec) 137 | && d3.priority < d2.priority 138 | && is_term!(d1.spec) 139 | && d1.priority <= d2.priority 140 | } 141 | 142 | fn affirm_yf(d1: TokenDesc, d2: TokenDesc) -> bool 143 | { 144 | let is_valid_lterm = is_lterm!(d2.spec) && d2.priority == d1.priority; 145 | (is_term!(d2.spec) && d2.priority < d1.priority) || is_valid_lterm 146 | } 147 | 148 | fn affirm_xf(d1: TokenDesc, d2: TokenDesc) -> bool 149 | { 150 | is_term!(d2.spec) && d2.priority < d1.priority 151 | } 152 | 153 | fn affirm_fy(priority: usize, d1: TokenDesc, d2: TokenDesc) -> bool 154 | { 155 | d2.priority < priority && is_term!(d1.spec) && d1.priority <= d2.priority 156 | } 157 | 158 | fn affirm_fx(priority: usize, d1: TokenDesc, d2: TokenDesc) -> bool 159 | { 160 | d2.priority <= priority && is_term!(d1.spec) && d1.priority < d2.priority 161 | } 162 | 163 | fn sep_to_atom(tt: TokenType) -> Option 164 | { 165 | match tt { 166 | TokenType::Open | TokenType::OpenCT => 167 | Some(clause_name!("(")), 168 | TokenType::Close => 169 | Some(clause_name!(")")), 170 | TokenType::OpenList => 171 | Some(clause_name!("[")), 172 | TokenType::CloseList => 173 | Some(clause_name!("]")), 174 | TokenType::OpenCurly => 175 | Some(clause_name!("{")), 176 | TokenType::CloseCurly => 177 | Some(clause_name!("}")), 178 | TokenType::HeadTailSeparator => 179 | Some(clause_name!("|")), 180 | TokenType::Comma => 181 | Some(clause_name!(",")), 182 | TokenType::End => 183 | Some(clause_name!(".")), 184 | _ => None 185 | } 186 | } 187 | 188 | #[derive(Debug, Clone, Copy)] 189 | pub struct OpDesc { 190 | pub pre: usize, 191 | pub inf: usize, 192 | pub post: usize, 193 | pub spec: Specifier 194 | } 195 | 196 | #[derive(Debug)] 197 | pub struct Parser<'a, R: Read> { 198 | lexer: Lexer<'a, R>, 199 | tokens: Vec, 200 | stack: Vec, 201 | terms: Vec, 202 | } 203 | 204 | fn read_tokens<'a, R: Read>(lexer: &mut Lexer<'a, R>) -> Result, ParserError> 205 | { 206 | let mut tokens = vec![]; 207 | 208 | loop { 209 | let token = lexer.next_token()?; 210 | let at_end = Token::End == token; 211 | 212 | tokens.push(token); 213 | 214 | if at_end { 215 | break; 216 | } 217 | } 218 | 219 | tokens.reverse(); 220 | 221 | Ok(tokens) 222 | } 223 | 224 | impl<'a, R: Read> Parser<'a, R> { 225 | pub fn new( 226 | stream: &'a mut ParsingStream, 227 | atom_tbl: TabledData, 228 | flags: MachineFlags, 229 | ) -> Self { 230 | Parser { lexer: Lexer::new(atom_tbl, flags, stream), 231 | tokens: vec![], 232 | stack: Vec::new(), 233 | terms: Vec::new() } 234 | } 235 | 236 | #[inline] 237 | pub fn line_num(&self) -> usize { 238 | self.lexer.line_num 239 | } 240 | 241 | #[inline] 242 | pub fn col_num(&self) -> usize { 243 | self.lexer.col_num 244 | } 245 | 246 | #[inline] 247 | pub fn get_atom_tbl(&self) -> TabledData { 248 | self.lexer.atom_tbl.clone() 249 | } 250 | 251 | #[inline] 252 | pub fn set_atom_tbl(&mut self, atom_tbl: TabledData) { 253 | self.lexer.atom_tbl = atom_tbl; 254 | } 255 | 256 | fn get_term_name(&mut self, td: TokenDesc) -> Option<(ClauseName, Option)> { 257 | match td.tt { 258 | TokenType::HeadTailSeparator => { 259 | Some((clause_name!("|"), Some(SharedOpDesc::new(td.priority, td.spec)))) 260 | } 261 | TokenType::Comma => { 262 | Some((clause_name!(","), Some(SharedOpDesc::new(1000, XFY)))) 263 | } 264 | TokenType::Term => { 265 | match self.terms.pop() { 266 | Some(Term::Constant(_, Constant::Atom(atom, spec))) => 267 | Some((atom, spec)), 268 | Some(term) => { 269 | self.terms.push(term); 270 | None 271 | }, 272 | _ => None 273 | } 274 | } 275 | _ => { 276 | None 277 | } 278 | } 279 | } 280 | 281 | fn push_binary_op(&mut self, td: TokenDesc, spec: Specifier) 282 | { 283 | if let Some(arg2) = self.terms.pop() { 284 | if let Some((name, shared_op_desc)) = self.get_term_name(td) { 285 | if let Some(arg1) = self.terms.pop() { 286 | let term = Term::Clause(Cell::default(), 287 | name, 288 | vec![Box::new(arg1), Box::new(arg2)], 289 | shared_op_desc); 290 | 291 | self.terms.push(term); 292 | self.stack.push(TokenDesc { tt: TokenType::Term, 293 | priority: td.priority, 294 | spec }); 295 | } 296 | } 297 | } 298 | } 299 | 300 | fn push_unary_op(&mut self, td: TokenDesc, spec: Specifier, assoc: u32) 301 | { 302 | if let Some(mut arg1) = self.terms.pop() { 303 | if let Some(mut name) = self.terms.pop() { 304 | if is_postfix!(assoc) { 305 | swap(&mut arg1, &mut name); 306 | } 307 | 308 | if let Term::Constant(_, Constant::Atom(name, shared_op_desc)) = name { 309 | let term = Term::Clause(Cell::default(), name, vec![Box::new(arg1)], 310 | shared_op_desc); 311 | 312 | self.terms.push(term); 313 | self.stack.push(TokenDesc { tt: TokenType::Term, 314 | priority: td.priority, 315 | spec }); 316 | } 317 | } 318 | } 319 | } 320 | 321 | fn promote_atom_op(&mut self, atom: ClauseName, priority: usize, assoc: u32, 322 | op_dir_val: Option) 323 | { 324 | let spec = op_dir_val.map(|op_dir_val| op_dir_val.shared_op_desc()); 325 | 326 | self.terms.push(Term::Constant(Cell::default(), Constant::Atom(atom, spec))); 327 | self.stack.push(TokenDesc { tt: TokenType::Term, priority, spec: assoc }); 328 | } 329 | 330 | fn shift(&mut self, token: Token, priority: usize, spec: Specifier) 331 | { 332 | let tt = match token { 333 | Token::Constant(Constant::String(s)) 334 | if self.lexer.flags.double_quotes.is_codes() => { 335 | let mut list = Term::Constant(Cell::default(), Constant::EmptyList); 336 | 337 | for c in s.chars().rev() { 338 | list = Term::Cons( 339 | Cell::default(), 340 | Box::new(Term::Constant( 341 | Cell::default(), 342 | Constant::Fixnum(c as isize), 343 | )), 344 | Box::new(list), 345 | ); 346 | } 347 | 348 | self.terms.push(list); 349 | TokenType::Term 350 | } 351 | Token::Constant(c) => { 352 | self.terms.push(Term::Constant(Cell::default(), c)); 353 | TokenType::Term 354 | }, 355 | Token::Var(v) => { 356 | if v.trim() == "_" { 357 | self.terms.push(Term::AnonVar); 358 | } else { 359 | self.terms.push(Term::Var(Cell::default(), v)); 360 | } 361 | 362 | TokenType::Term 363 | }, 364 | Token::Comma => TokenType::Comma, 365 | Token::Open => TokenType::Open, 366 | Token::Close => TokenType::Close, 367 | Token::OpenCT => TokenType::OpenCT, 368 | Token::HeadTailSeparator => TokenType::HeadTailSeparator, 369 | Token::OpenList => TokenType::OpenList, 370 | Token::CloseList => TokenType::CloseList, 371 | Token::OpenCurly => TokenType::OpenCurly, 372 | Token::CloseCurly => TokenType::CloseCurly, 373 | Token::End => TokenType::End, 374 | }; 375 | 376 | self.stack.push(TokenDesc { tt, priority, spec }); 377 | } 378 | 379 | fn reduce_op(&mut self, priority: usize) { 380 | loop { 381 | if let Some(desc1) = self.stack.pop() { 382 | if let Some(desc2) = self.stack.pop() { 383 | if let Some(desc3) = self.stack.pop() { 384 | if is_xfx!(desc2.spec) && affirm_xfx(priority, desc2, desc3, desc1) 385 | { 386 | self.push_binary_op(desc2, LTERM); 387 | continue; 388 | } 389 | else if is_yfx!(desc2.spec) && affirm_yfx(priority, desc2, desc3, desc1) 390 | { 391 | self.push_binary_op(desc2, LTERM); 392 | continue; 393 | } 394 | else if is_xfy!(desc2.spec) && affirm_xfy(priority, desc2, desc3, desc1) 395 | { 396 | self.push_binary_op(desc2, TERM); 397 | continue; 398 | } else { 399 | self.stack.push(desc3); 400 | } 401 | } 402 | 403 | if is_yf!(desc1.spec) && affirm_yf(desc1, desc2) { 404 | self.push_unary_op(desc1, LTERM, YF); 405 | continue; 406 | } else if is_xf!(desc1.spec) && affirm_xf(desc1, desc2) { 407 | self.push_unary_op(desc1, LTERM, XF); 408 | continue; 409 | } else if is_fy!(desc2.spec) && affirm_fy(priority, desc1, desc2) { 410 | self.push_unary_op(desc2, TERM, FY); 411 | continue; 412 | } else if is_fx!(desc2.spec) && affirm_fx(priority, desc1, desc2) { 413 | self.push_unary_op(desc2, TERM, FX); 414 | continue; 415 | } else { 416 | self.stack.push(desc2); 417 | self.stack.push(desc1); 418 | } 419 | } else { 420 | self.stack.push(desc1); 421 | } 422 | } 423 | 424 | break; 425 | } 426 | } 427 | 428 | fn compute_arity_in_brackets(&self) -> Option 429 | { 430 | let mut arity = 0; 431 | 432 | for (i, desc) in self.stack.iter().rev().enumerate() { 433 | if i % 2 == 0 { // expect a term or non-comma operator. 434 | if let TokenType::Comma = desc.tt { 435 | return None; 436 | } else if is_term!(desc.spec) || is_op!(desc.spec) || is_negate!(desc.spec) { 437 | arity += 1; 438 | } else { 439 | return None; 440 | } 441 | } else { 442 | if desc.tt == TokenType::OpenCT { 443 | return Some(arity); 444 | } 445 | 446 | if let TokenType::Comma = desc.tt { 447 | continue; 448 | } else { 449 | return None; 450 | } 451 | } 452 | } 453 | 454 | None 455 | } 456 | 457 | fn reduce_term(&mut self, op_dir: CompositeOp) -> bool 458 | { 459 | if self.stack.is_empty() { 460 | return false; 461 | } 462 | 463 | self.reduce_op(999); 464 | 465 | let arity = match self.compute_arity_in_brackets() { 466 | Some(arity) => arity, 467 | None => return false 468 | }; 469 | 470 | if self.stack.len() > 2 * arity { 471 | let idx = self.stack.len() - 2 * arity - 1; 472 | 473 | if is_infix!(self.stack[idx].spec) && idx > 0 { 474 | if !is_op!(self.stack[idx - 1].spec) && !self.stack[idx - 1].tt.is_sep() { 475 | return false; 476 | } 477 | } 478 | 479 | if arity >= 2 && is_prefix!(self.stack[idx].spec) && self.stack[idx].priority > 0 { 480 | return false; 481 | } 482 | } else { 483 | return false; 484 | } 485 | 486 | let stack_len = self.stack.len() - 2 * arity - 1; 487 | let idx = self.terms.len() - arity; 488 | 489 | if TokenType::Term == self.stack[stack_len].tt { 490 | if self.atomize_term(&self.terms[idx - 1]).is_some() { 491 | self.stack.truncate(stack_len + 1); 492 | 493 | let mut subterms: Vec<_> = self.terms.drain(idx ..) 494 | .map(|t| Box::new(t)) 495 | .collect(); 496 | 497 | if let Some(name) = self.terms.pop().and_then(|t| self.atomize_term(&t)) { 498 | // reduce the '.' functor to a cons cell if it applies. 499 | if name.as_str() == "." && subterms.len() == 2 { 500 | let tail = subterms.pop().unwrap(); 501 | let head = subterms.pop().unwrap(); 502 | 503 | self.terms.push(Term::Cons(Cell::default(), head, tail)); 504 | } else { 505 | let spec = get_clause_spec(name.clone(), subterms.len(), op_dir); 506 | self.terms.push(Term::Clause(Cell::default(), name, subterms, spec)); 507 | } 508 | 509 | if let Some(&mut TokenDesc { ref mut priority, ref mut spec, 510 | ref mut tt }) = self.stack.last_mut() 511 | { 512 | *tt = TokenType::Term; 513 | *priority = 0; 514 | *spec = TERM; 515 | } 516 | 517 | return true; 518 | } 519 | } 520 | } 521 | 522 | false 523 | } 524 | 525 | pub fn devour_whitespace(&mut self) -> Result<(), ParserError> { 526 | self.lexer.scan_for_layout()?; 527 | Ok(()) 528 | } 529 | 530 | pub fn reset(&mut self) { 531 | self.stack.clear() 532 | } 533 | 534 | fn expand_comma_compacted_terms(&mut self, index: usize) -> usize 535 | { 536 | if let Some(term) = self.terms.pop() { 537 | let op_desc = self.stack[index - 1]; 538 | 539 | if 0 < op_desc.priority && op_desc.priority < self.stack[index].priority { 540 | /* '|' is a head-tail separator here, not 541 | * an operator, so expand the 542 | * terms it compacted out again. */ 543 | match (term.name(), term.arity()) { 544 | (Some(name), 2) if name.as_str() == "," => { 545 | let terms = unfold_by_str(term, ","); 546 | let arity = terms.len() - 1; 547 | 548 | self.terms.extend(terms.into_iter()); 549 | return arity; 550 | } 551 | _ => { 552 | } 553 | } 554 | } 555 | 556 | self.terms.push(term); 557 | } 558 | 559 | 0 560 | } 561 | 562 | fn compute_arity_in_list(&self) -> Option 563 | { 564 | let mut arity = 0; 565 | 566 | for (i, desc) in self.stack.iter().rev().enumerate() { 567 | if i % 2 == 0 { // expect a term or non-comma operator. 568 | if let TokenType::Comma = desc.tt { 569 | return None; 570 | } else if is_term!(desc.spec) || is_op!(desc.spec) { 571 | arity += 1; 572 | } else { 573 | return None; 574 | } 575 | } else { 576 | if desc.tt == TokenType::HeadTailSeparator { 577 | if arity == 1 { 578 | continue; 579 | } 580 | 581 | return None; 582 | } else if desc.tt == TokenType::OpenList { 583 | return Some(arity); 584 | } else if desc.tt != TokenType::Comma { 585 | return None; 586 | } 587 | } 588 | } 589 | 590 | None 591 | } 592 | 593 | fn reduce_list(&mut self) -> Result 594 | { 595 | if self.stack.is_empty() { 596 | return Ok(false); 597 | } 598 | 599 | if let Some(ref mut td) = self.stack.last_mut() { 600 | if td.tt == TokenType::OpenList { 601 | td.spec = TERM; 602 | td.tt = TokenType::Term; 603 | td.priority = 0; 604 | 605 | self.terms.push(Term::Constant(Cell::default(), Constant::EmptyList)); 606 | return Ok(true); 607 | } 608 | } 609 | 610 | self.reduce_op(1000); 611 | 612 | let mut arity = match self.compute_arity_in_list() { 613 | Some(arity) => arity, 614 | None => return Ok(false) 615 | }; 616 | 617 | // we know that self.stack.len() >= 2 by this point. 618 | let idx = self.stack.len() - 2; 619 | let list_len = self.stack.len() - 2 * arity; 620 | 621 | let end_term = if self.stack[idx].tt != TokenType::HeadTailSeparator { 622 | Term::Constant(Cell::default(), Constant::EmptyList) 623 | } else { 624 | let term = 625 | match self.terms.pop() { 626 | Some(term) => term, 627 | _ => return Err(ParserError::IncompleteReduction(self.lexer.line_num, 628 | self.lexer.col_num)) 629 | }; 630 | 631 | if self.stack[idx].priority > 1000 { 632 | arity += self.expand_comma_compacted_terms(idx); 633 | } 634 | 635 | arity -= 1; 636 | 637 | term 638 | }; 639 | 640 | let idx = self.terms.len() - arity; 641 | 642 | let list = self.terms.drain(idx ..) 643 | .rev() 644 | .fold(end_term, |acc, t| Term::Cons(Cell::default(), 645 | Box::new(t), 646 | Box::new(acc))); 647 | 648 | self.stack.truncate(list_len); 649 | 650 | self.stack.push(TokenDesc { tt: TokenType::Term, priority: 0, spec: TERM }); 651 | self.terms.push(list); 652 | 653 | Ok(true) 654 | } 655 | 656 | fn reduce_curly(&mut self) -> Result { 657 | if self.stack.is_empty() { 658 | return Ok(false); 659 | } 660 | 661 | if let Some(ref mut td) = self.stack.last_mut() { 662 | if td.tt == TokenType::OpenCurly { 663 | td.tt = TokenType::Term; 664 | td.priority = 0; 665 | td.spec = TERM; 666 | 667 | let term = Term::Constant(Cell::default(), 668 | atom!("{}", self.lexer.atom_tbl)); 669 | self.terms.push(term); 670 | return Ok(true); 671 | } 672 | } 673 | 674 | self.reduce_op(1201); 675 | 676 | if self.stack.len() > 1 { 677 | if let Some(td) = self.stack.pop() { 678 | if let Some(ref mut oc) = self.stack.last_mut() { 679 | if td.tt != TokenType::Term { 680 | return Ok(false); 681 | } 682 | 683 | if oc.tt == TokenType::OpenCurly { 684 | oc.tt = TokenType::Term; 685 | oc.priority = 0; 686 | oc.spec = TERM; 687 | 688 | let term = match self.terms.pop() { 689 | Some(term) => term, 690 | _ => return Err(ParserError::IncompleteReduction(self.lexer.line_num, 691 | self.lexer.col_num)) 692 | }; 693 | 694 | self.terms.push(Term::Clause(Cell::default(), clause_name!("{}"), 695 | vec![Box::new(term)], None)); 696 | 697 | return Ok(true); 698 | } 699 | } 700 | } 701 | } 702 | 703 | Ok(false) 704 | } 705 | 706 | fn reduce_brackets(&mut self) -> bool { 707 | if self.stack.is_empty() { 708 | return false; 709 | } 710 | 711 | self.reduce_op(1400); 712 | 713 | if self.stack.len() == 1 { 714 | return false; 715 | } 716 | 717 | let idx = self.stack.len() - 2; 718 | 719 | match self.stack.remove(idx) { 720 | td => 721 | match td.tt { 722 | TokenType::Open | TokenType::OpenCT => { 723 | if self.stack[idx].tt == TokenType::Comma { 724 | return false; 725 | } 726 | 727 | if let Some(atom) = sep_to_atom(self.stack[idx].tt) { 728 | self.terms.push(Term::Constant(Cell::default(), Constant::Atom(atom, None))); 729 | } 730 | 731 | self.stack[idx].spec = TERM; 732 | self.stack[idx].tt = TokenType::Term; 733 | self.stack[idx].priority = 0; 734 | true 735 | }, 736 | _ => false 737 | } 738 | } 739 | } 740 | 741 | fn shift_op(&mut self, name: ClauseName, op_dir: CompositeOp) -> Result { 742 | if let Some(OpDesc { pre, inf, post, spec }) = get_desc(name.clone(), op_dir) { 743 | if (pre > 0 && inf + post > 0) || is_negate!(spec) { 744 | match self.tokens.last().ok_or(ParserError::UnexpectedEOF)? { 745 | // do this when layout hasn't been inserted, 746 | // ie. why we don't match on Token::Open. 747 | &Token::OpenCT => { 748 | // can't be prefix, so either inf == 0 749 | // or post == 0. 750 | self.reduce_op(inf + post); 751 | 752 | let fixity = if inf > 0 { Fixity::In } else { Fixity::Post }; 753 | let op_dir_val = op_dir.get(name.clone(), fixity); 754 | 755 | self.promote_atom_op(name, inf + post, spec & (XFX | XFY | YFX | YF | XF), 756 | op_dir_val); 757 | }, 758 | _ => { 759 | self.reduce_op(inf + post); 760 | 761 | if let Some(TokenDesc { spec: pspec, .. }) = self.stack.last().cloned() { 762 | // rterm.c: 412 763 | if is_term!(pspec) { 764 | let fixity = if inf > 0 { Fixity::In } else { Fixity::Post }; 765 | let op_dir_val = op_dir.get(name.clone(), fixity); 766 | 767 | self.promote_atom_op(name, inf + post, 768 | spec & (XFX | XFY | YFX | XF | YF), 769 | op_dir_val); 770 | } else { 771 | let op_dir_val = op_dir.get(name.clone(), Fixity::Pre); 772 | self.promote_atom_op(name, pre, spec & (FX | FY | NEGATIVE_SIGN), op_dir_val); 773 | } 774 | } else { 775 | let op_dir_val = op_dir.get(name.clone(), Fixity::Pre); 776 | self.promote_atom_op(name, pre, spec & (FX | FY | NEGATIVE_SIGN), op_dir_val); 777 | } 778 | } 779 | } 780 | } else { 781 | let op_dir_val = op_dir.get(name.clone(), 782 | if pre + inf == 0 { 783 | Fixity::Post 784 | } else if post + pre == 0 { 785 | Fixity::In 786 | } else { 787 | Fixity::Pre 788 | }); 789 | 790 | self.reduce_op(pre + inf + post); // only one non-zero priority among these. 791 | self.promote_atom_op(name, pre + inf + post, spec, op_dir_val); 792 | } 793 | 794 | Ok(true) 795 | } else { // not an operator. 796 | Ok(false) 797 | } 798 | } 799 | 800 | fn atomize_term(&self, term: &Term) -> Option { 801 | match term { 802 | &Term::Constant(_, ref c) => self.atomize_constant(c), 803 | _ => None 804 | } 805 | } 806 | 807 | fn atomize_constant(&self, c: &Constant) -> Option { 808 | match c { 809 | &Constant::Atom(ref name, _) => Some(name.clone()), 810 | &Constant::Char(c) => 811 | Some(clause_name!(c.to_string(), self.lexer.atom_tbl)), 812 | &Constant::EmptyList => 813 | Some(clause_name!(c.to_string(), self.lexer.atom_tbl)), 814 | _ => None 815 | } 816 | } 817 | 818 | fn negate_number( 819 | &mut self, 820 | n: N, 821 | negator: Negator, 822 | constr: ToConstant 823 | ) 824 | where Negator: Fn(N) -> N, 825 | ToConstant: Fn(N) -> Constant 826 | { 827 | if let Some(desc) = self.stack.last().cloned() { 828 | if let Some(term) = self.terms.last().cloned() { 829 | match term { 830 | Term::Constant(_, Constant::Atom(ref name, _)) 831 | if name.as_str() == "-" && (is_prefix!(desc.spec) || is_negate!(desc.spec)) => { 832 | self.stack.pop(); 833 | self.terms.pop(); 834 | 835 | self.shift(Token::Constant(constr(negator(n))), 0, TERM); 836 | return; 837 | }, 838 | _ => {} 839 | } 840 | } 841 | } 842 | 843 | self.shift(Token::Constant(constr(n)), 0, TERM); 844 | } 845 | 846 | fn shift_token(&mut self, token: Token, op_dir: CompositeOp) -> Result<(), ParserError> { 847 | fn negate_rc(mut t: Rc) -> Rc { 848 | match Rc::get_mut(&mut t) { 849 | Some(t) => { 850 | t.neg_assign(); 851 | } 852 | None => { 853 | } 854 | }; 855 | 856 | t 857 | } 858 | 859 | match token { 860 | Token::Constant(Constant::Fixnum(n)) => 861 | self.negate_number(n, |n| -n, Constant::Fixnum), 862 | Token::Constant(Constant::Integer(n)) => 863 | self.negate_number(n, negate_rc, Constant::Integer), 864 | Token::Constant(Constant::Rational(n)) => 865 | self.negate_number(n, negate_rc, Constant::Rational), 866 | Token::Constant(Constant::Float(n)) => 867 | self.negate_number( 868 | n, 869 | |n| OrderedFloat(-n.into_inner()), 870 | |n| Constant::Float(n) 871 | ), 872 | Token::Constant(c) => 873 | if let Some(name) = self.atomize_constant(&c) { 874 | if !self.shift_op(name, op_dir)? { 875 | self.shift(Token::Constant(c), 0, TERM); 876 | } 877 | } else { 878 | self.shift(Token::Constant(c), 0, TERM); 879 | }, 880 | Token::Var(v) => self.shift(Token::Var(v), 0, TERM), 881 | Token::Open => self.shift(Token::Open, 1300, DELIMITER), 882 | Token::OpenCT => self.shift(Token::OpenCT, 1300, DELIMITER), 883 | Token::Close => 884 | if !self.reduce_term(op_dir) { 885 | if !self.reduce_brackets() { 886 | return Err(ParserError::IncompleteReduction( 887 | self.lexer.line_num, 888 | self.lexer.col_num, 889 | )); 890 | } 891 | }, 892 | Token::OpenList => self.shift(Token::OpenList, 1300, DELIMITER), 893 | Token::CloseList => 894 | if !self.reduce_list()? { 895 | return Err(ParserError::IncompleteReduction( 896 | self.lexer.line_num, 897 | self.lexer.col_num, 898 | )); 899 | }, 900 | Token::OpenCurly => self.shift(Token::OpenCurly, 1300, DELIMITER), 901 | Token::CloseCurly => 902 | if !self.reduce_curly()? { 903 | return Err(ParserError::IncompleteReduction( 904 | self.lexer.line_num, 905 | self.lexer.col_num, 906 | )); 907 | }, 908 | Token::HeadTailSeparator => { 909 | /* '|' as an operator must have priority > 1000 and can only be infix. 910 | * See: http://www.complang.tuwien.ac.at/ulrich/iso-prolog/dtc2#Res_A78 911 | */ 912 | let (priority, spec) = get_desc(clause_name!("|"), op_dir) 913 | .map(|OpDesc { inf, spec, .. }| (inf, spec)) 914 | .unwrap_or((1000, DELIMITER)); 915 | 916 | self.reduce_op(priority); 917 | self.shift(Token::HeadTailSeparator, priority, spec); 918 | }, 919 | Token::Comma => { 920 | self.reduce_op(1000); 921 | self.shift(Token::Comma, 1000, XFY); 922 | }, 923 | Token::End => 924 | match self.stack.last().map(|t| t.tt) { 925 | Some(TokenType::Open) 926 | | Some(TokenType::OpenCT) 927 | | Some(TokenType::OpenList) 928 | | Some(TokenType::OpenCurly) 929 | | Some(TokenType::HeadTailSeparator) 930 | | Some(TokenType::Comma) 931 | => return Err(ParserError::IncompleteReduction(self.lexer.line_num, 932 | self.lexer.col_num)), 933 | _ => {} 934 | } 935 | } 936 | 937 | Ok(()) 938 | } 939 | 940 | #[inline] 941 | pub fn eof(&mut self) -> Result { 942 | self.lexer.eof() 943 | } 944 | 945 | pub fn read_term(&mut self, op_dir: CompositeOp) -> Result 946 | { 947 | self.tokens = read_tokens(&mut self.lexer)?; 948 | 949 | while let Some(token) = self.tokens.pop() { 950 | self.shift_token(token, op_dir)?; 951 | } 952 | 953 | self.reduce_op(1400); 954 | 955 | if self.terms.len() > 1 || self.stack.len() > 1 { 956 | return Err(ParserError::IncompleteReduction(self.lexer.line_num, self.lexer.col_num)); 957 | } 958 | 959 | match self.terms.pop() { 960 | Some(term) => if self.terms.is_empty() { 961 | Ok(term) 962 | } else { 963 | Err(ParserError::IncompleteReduction(self.lexer.line_num, self.lexer.col_num)) 964 | }, 965 | _ => Err(ParserError::IncompleteReduction(self.lexer.line_num, self.lexer.col_num)) 966 | } 967 | } 968 | 969 | pub fn read(&mut self, op_dir: CompositeOp) -> Result, ParserError> 970 | { 971 | let mut terms = Vec::new(); 972 | 973 | loop { 974 | terms.push(self.read_term(op_dir)?); 975 | 976 | if self.lexer.eof()? { 977 | break; 978 | } 979 | } 980 | 981 | Ok(terms) 982 | } 983 | } 984 | -------------------------------------------------------------------------------- /src/put_back_n.rs: -------------------------------------------------------------------------------- 1 | use std::iter::Peekable; 2 | 3 | #[derive(Debug, Clone)] 4 | pub struct PutBackN { 5 | top: Vec, 6 | iter: Peekable, 7 | } 8 | 9 | pub fn put_back_n(iterable: I) -> PutBackN 10 | where I: IntoIterator 11 | { 12 | PutBackN { 13 | top: Vec::new(), 14 | iter: iterable.into_iter().peekable(), 15 | } 16 | } 17 | 18 | impl PutBackN { 19 | #[inline] 20 | pub(crate) 21 | fn put_back(&mut self, item: I::Item) { 22 | self.top.push(item); 23 | } 24 | 25 | #[inline] 26 | pub fn take_buf(&mut self) -> Vec { 27 | std::mem::replace(&mut self.top, vec![]) 28 | } 29 | 30 | #[inline] 31 | pub(crate) 32 | fn peek(&mut self) -> Option<&I::Item> { 33 | if self.top.is_empty() { 34 | /* This is a kludge for Ctrl-D not being 35 | * handled properly if self.iter().peek() isn't called 36 | * first. */ 37 | match self.iter.peek() { 38 | Some(_) => { 39 | self.iter.next().and_then(move |item| { 40 | self.top.push(item); 41 | self.top.last() 42 | }) 43 | } 44 | None => { 45 | None 46 | } 47 | } 48 | } else { 49 | self.top.last() 50 | } 51 | } 52 | 53 | #[inline] 54 | pub(crate) 55 | fn put_back_all>(&mut self, iter: DEI) { 56 | self.top.extend(iter.rev()); 57 | } 58 | } 59 | 60 | impl Iterator for PutBackN { 61 | type Item = I::Item; 62 | 63 | #[inline] 64 | fn next(&mut self) -> Option { 65 | if self.top.is_empty() { 66 | self.iter.next() 67 | } else { 68 | self.top.pop() 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/tabled_rc.rs: -------------------------------------------------------------------------------- 1 | use std::cell::{RefCell, RefMut}; 2 | use std::cmp::Ordering; 3 | use std::collections::HashSet; 4 | use std::fmt; 5 | use std::hash::{Hash, Hasher}; 6 | use std::ops::Deref; 7 | use std::rc::{Rc}; 8 | 9 | pub struct TabledData { 10 | table: Rc>>>, 11 | pub(crate) module_name: Rc 12 | } 13 | 14 | impl fmt::Debug for TabledData { 15 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 16 | f.debug_struct("TabledData") 17 | .field("table", &self.table) 18 | .field("module_name", &self.table) 19 | .finish() 20 | } 21 | } 22 | 23 | impl Clone for TabledData { 24 | fn clone(&self) -> Self { 25 | TabledData { table: self.table.clone(), 26 | module_name: self.module_name.clone() } 27 | } 28 | } 29 | 30 | impl PartialEq for TabledData { 31 | fn eq(&self, other: &TabledData) -> bool 32 | { 33 | Rc::ptr_eq(&self.table, &other.table) && self.module_name == other.module_name 34 | } 35 | } 36 | 37 | impl TabledData { 38 | #[inline] 39 | pub fn new(module_name: Rc) -> Self { 40 | TabledData { 41 | table: Rc::new(RefCell::new(HashSet::new())), 42 | module_name 43 | } 44 | } 45 | 46 | #[inline] 47 | pub fn borrow_mut(&self) -> RefMut>> { 48 | self.table.borrow_mut() 49 | } 50 | } 51 | 52 | pub struct TabledRc { 53 | pub(crate) atom: Rc, 54 | pub table: TabledData 55 | } 56 | 57 | impl fmt::Debug for TabledRc { 58 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 59 | f.debug_struct("TabledRc") 60 | .field("atom", &self.atom) 61 | .field("table", &self.table) 62 | .finish() 63 | } 64 | } 65 | 66 | // this Clone instance is manually defined to prevent the compiler 67 | // from complaining when deriving Clone for StringList. 68 | impl Clone for TabledRc { 69 | fn clone(&self) -> Self { 70 | TabledRc { atom: self.atom.clone(), table: self.table.clone() } 71 | } 72 | } 73 | 74 | impl PartialOrd for TabledRc { 75 | fn partial_cmp(&self, other: &Self) -> Option 76 | { 77 | Some(self.atom.cmp(&other.atom)) 78 | } 79 | } 80 | 81 | impl Ord for TabledRc { 82 | fn cmp(&self, other: &Self) -> Ordering 83 | { 84 | self.atom.cmp(&other.atom) 85 | } 86 | } 87 | 88 | impl PartialEq for TabledRc { 89 | fn eq(&self, other: &TabledRc) -> bool 90 | { 91 | self.atom == other.atom 92 | } 93 | } 94 | 95 | impl Eq for TabledRc {} 96 | 97 | impl Hash for TabledRc { 98 | fn hash(&self, state: &mut H) { 99 | self.atom.hash(state) 100 | } 101 | } 102 | 103 | impl TabledRc { 104 | pub fn new(atom: T, table: TabledData) -> Self { 105 | let atom = match table.borrow_mut().take(&atom) { 106 | Some(atom) => atom.clone(), 107 | None => Rc::new(atom) 108 | }; 109 | 110 | table.borrow_mut().insert(atom.clone()); 111 | 112 | TabledRc { atom, table } 113 | } 114 | 115 | #[inline] 116 | pub fn inner(&self) -> Rc { 117 | self.atom.clone() 118 | } 119 | 120 | #[inline] 121 | pub(crate) fn owning_module(&self) -> Rc { 122 | self.table.module_name.clone() 123 | } 124 | } 125 | 126 | impl Drop for TabledRc { 127 | fn drop(&mut self) { 128 | if Rc::strong_count(&self.atom) == 2 { 129 | self.table.borrow_mut().remove(&self.atom); 130 | } 131 | } 132 | } 133 | 134 | impl Deref for TabledRc { 135 | type Target = T; 136 | 137 | fn deref(&self) -> &Self::Target { 138 | &*self.atom 139 | } 140 | } 141 | 142 | impl fmt::Display for TabledRc { 143 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 144 | write!(f, "{}", &*self.atom) 145 | } 146 | } 147 | 148 | #[macro_export] 149 | macro_rules! tabled_rc { 150 | ($e:expr, $tbl:expr) => ( 151 | TabledRc::new(String::from($e), $tbl.clone()) 152 | ) 153 | } 154 | -------------------------------------------------------------------------------- /tests/bom.rs: -------------------------------------------------------------------------------- 1 | extern crate prolog_parser; 2 | 3 | use prolog_parser::ast::*; 4 | use prolog_parser::lexer::{Lexer, Token}; 5 | use prolog_parser::tabled_rc::TabledData; 6 | 7 | use std::rc::Rc; 8 | 9 | #[test] 10 | fn valid_token() { 11 | let stream = parsing_stream("valid text".as_bytes()); 12 | assert!(stream.is_ok()); 13 | } 14 | 15 | #[test] 16 | fn empty_stream() { 17 | let bytes: &[u8] = &[]; 18 | assert!(parsing_stream(bytes).is_ok()); 19 | } 20 | 21 | #[test] 22 | fn skip_utf8_bom() { 23 | let atom_tbl = TabledData::new(Rc::new("my_module".to_string())); 24 | let flags = MachineFlags::default(); 25 | let bytes: &[u8] = &[0xEF, 0xBB, 0xBF, '4' as u8, '\n' as u8]; 26 | let mut stream = parsing_stream(bytes).expect("valid stream"); 27 | let mut lexer = Lexer::new(atom_tbl, flags, &mut stream); 28 | match lexer.next_token() { 29 | Ok(Token::Constant(Constant::Fixnum(4))) => (), 30 | _ => assert!(false) 31 | } 32 | } 33 | 34 | #[test] 35 | fn invalid_utf16_bom() { 36 | let bytes: &[u8] = &[0xFF, 0xFE, 'a' as u8, '\n' as u8]; 37 | let stream = parsing_stream(bytes); 38 | match stream { 39 | Err(ParserError::Utf8Error(0, 0)) => (), 40 | _ => assert!(false) 41 | } 42 | } 43 | 44 | -------------------------------------------------------------------------------- /tests/parse_tokens.rs: -------------------------------------------------------------------------------- 1 | extern crate prolog_parser; 2 | 3 | use prolog_parser::ast::*; 4 | use prolog_parser::lexer::{Lexer, Token}; 5 | use prolog_parser::tabled_rc::TabledData; 6 | 7 | use std::rc::Rc; 8 | 9 | fn read_all_tokens(text: &str) -> Result, ParserError> { 10 | let atom_tbl = TabledData::new(Rc::new("my_module".to_string())); 11 | let flags = MachineFlags::default(); 12 | let mut stream = parsing_stream(text.as_bytes())?; 13 | let mut lexer = Lexer::new(atom_tbl, flags, &mut stream); 14 | 15 | let mut tokens = Vec::new(); 16 | while !lexer.eof()? { 17 | let token = lexer.next_token()?; 18 | tokens.push(token); 19 | } 20 | Ok(tokens) 21 | } 22 | 23 | #[test] 24 | fn empty_multiline_comment() -> Result<(), ParserError> { 25 | let tokens = read_all_tokens("/**/ 4\n")?; 26 | assert_eq!(tokens, [Token::Constant(Constant::Fixnum(4))]); 27 | Ok(()) 28 | } 29 | 30 | #[test] 31 | fn any_char_multiline_comment() -> Result<(), ParserError> { 32 | let tokens = read_all_tokens("/* █╗╚═══╝ © */ 4\n")?; 33 | assert_eq!(tokens, [Token::Constant(Constant::Fixnum(4))]); 34 | Ok(()) 35 | } 36 | 37 | #[test] 38 | fn simple_char() -> Result<(), ParserError> { 39 | let tokens = read_all_tokens("'a'\n")?; 40 | assert_eq!(tokens, [Token::Constant(Constant::Char('a'))]); 41 | Ok(()) 42 | } 43 | 44 | #[test] 45 | fn char_with_meta_seq() -> Result<(), ParserError> { 46 | let tokens = read_all_tokens(r#"'\\' '\'' '\"' '\`' "#)?; // use literal string so \ are escaped 47 | assert_eq!(tokens, [Token::Constant(Constant::Char('\\')), 48 | Token::Constant(Constant::Char('\'')), 49 | Token::Constant(Constant::Char('"')), 50 | Token::Constant(Constant::Char('`'))]); 51 | Ok(()) 52 | } 53 | 54 | #[test] 55 | fn char_with_control_seq() -> Result<(), ParserError> { 56 | let tokens = read_all_tokens(r"'\a' '\b' '\r' '\f' '\t' '\n' '\v' ")?; 57 | assert_eq!(tokens, [ 58 | Token::Constant(Constant::Char('\u{07}')), 59 | Token::Constant(Constant::Char('\u{08}')), 60 | Token::Constant(Constant::Char('\r')), 61 | Token::Constant(Constant::Char('\u{0c}')), 62 | Token::Constant(Constant::Char('\t')), 63 | Token::Constant(Constant::Char('\n')), 64 | Token::Constant(Constant::Char('\u{0b}')), 65 | ]); 66 | Ok(()) 67 | } 68 | 69 | #[test] 70 | fn char_with_octseq() -> Result<(), ParserError> { 71 | let tokens = read_all_tokens(r"'\60433\' ")?; 72 | assert_eq!(tokens, [Token::Constant(Constant::Char('愛'))]); // Japanese character 73 | Ok(()) 74 | } 75 | 76 | #[test] 77 | fn char_with_octseq_0() -> Result<(), ParserError> { 78 | let tokens = read_all_tokens(r"'\0\' ")?; 79 | assert_eq!(tokens, [Token::Constant(Constant::Char('\u{0000}'))]); 80 | Ok(()) 81 | } 82 | 83 | #[test] 84 | fn char_with_hexseq() -> Result<(), ParserError> { 85 | let tokens = read_all_tokens(r"'\x2124\' ")?; 86 | assert_eq!(tokens, [Token::Constant(Constant::Char('ℤ'))]); // Z math symbol 87 | Ok(()) 88 | } 89 | 90 | #[test] 91 | fn char_with_hexseq_invalid() { 92 | assert!(read_all_tokens(r"'\x\' ").is_err()); 93 | } 94 | 95 | #[test] 96 | fn empty() -> Result<(), ParserError> { 97 | let tokens = read_all_tokens("")?; 98 | assert!(tokens.is_empty()); 99 | Ok(()) 100 | } 101 | 102 | #[test] 103 | fn comment_then_eof() -> Result<(), ParserError> { 104 | let tokens = read_all_tokens("% only a comment")?; 105 | assert_eq!(tokens, [Token::End]); 106 | Ok(()) 107 | } 108 | --------------------------------------------------------------------------------