├── Tests ├── files │ ├── small │ │ ├── empty.wl │ │ ├── crash.txt │ │ ├── crash6.txt │ │ ├── carriagereturn.wl │ │ ├── comment.wl │ │ ├── crash10.txt │ │ ├── crash12.txt │ │ ├── crash15.txt │ │ ├── crash16.txt │ │ ├── crash4.txt │ │ ├── crash5.txt │ │ ├── span1.wl │ │ ├── strange.wl │ │ ├── crash2.txt │ │ ├── crash7.txt │ │ ├── sample.wl │ │ ├── carriagereturn2.wl │ │ ├── continuation.wl │ │ ├── crash14.txt │ │ ├── carriagereturn3.wl │ │ ├── coverage3.wl │ │ ├── crash11.txt │ │ ├── carriagereturn4.wl │ │ ├── string1.wl │ │ ├── coverage2.wl │ │ ├── crash13.txt │ │ ├── crash17.txt │ │ ├── crash18.txt │ │ ├── crash3.txt │ │ ├── crash8.txt │ │ ├── crash9.txt │ │ ├── invalid1.wl │ │ ├── unsafe1.wl │ │ ├── unsafe2.wl │ │ └── coverage1.wl │ ├── package.wl │ ├── inputs-0001.txt │ ├── stackoverflow1.txt │ ├── stackoverflow3.txt │ ├── .gitattributes │ ├── large │ │ ├── ReliefPlot.nb │ │ ├── geomagneticmodels.m │ │ └── expandedCompanyDataNew1.m │ ├── 鳥物.wl │ ├── script.wl │ ├── inputs-0002.txt │ ├── linearsyntax.wl │ └── jpeg-string.txt ├── Regressions.mt ├── CodeSyntaxQ.mt ├── Quirks.mt ├── TokenEnum.mt ├── TestSuite.mt ├── ToNode.mt ├── Tokenize.mt ├── SafeString.mt ├── SyntaxErrorNodes.mt ├── Aggregate.mt ├── Characters.mt ├── CodeParser.mt ├── Unsafe.mt ├── ToString.mt ├── Error.mt ├── TokenErrors.mt ├── AbstractSyntaxIssues.mt ├── Scoping.mt ├── AbstractSyntaxErrorNodes.mt └── CallMissingCloserNodes.mt ├── .gitignore ├── .vscode └── settings.json ├── Cargo.toml ├── .lfsconfig ├── crates ├── rustfmt.toml ├── wolfram-parser │ ├── src │ │ ├── tokenize.rs │ │ ├── tests │ │ │ ├── test_source_character.rs │ │ │ ├── test_ffi.rs │ │ │ ├── test_token_enum.rs │ │ │ ├── test_node.rs │ │ │ └── test_api.rs │ │ ├── read │ │ │ └── byte_buffer.rs │ │ ├── symbol.rs │ │ ├── feature.rs │ │ ├── precedence.rs │ │ ├── parse │ │ │ ├── parse_tests │ │ │ │ └── test_parselet.rs │ │ │ ├── parselet │ │ │ │ ├── times_parselet.rs │ │ │ │ ├── under_parselet.rs │ │ │ │ └── integral_parselet.rs │ │ │ └── parser_docs.rs │ │ ├── quirks.rs │ │ ├── bin │ │ │ └── main.rs │ │ ├── long_names.rs │ │ ├── iter.rs │ │ └── agg.rs │ ├── .cargo │ │ └── config │ ├── Cargo.toml │ └── benches │ │ ├── bench_fast_string_scan.rs │ │ └── bench_general.rs └── codeparser-wll │ └── Cargo.toml ├── .WolframResources ├── docs ├── maintenance.md ├── docs.md ├── debugging.md ├── quirks.md ├── concretify.md ├── nodes.md ├── tokens.md ├── implementation.md ├── characters.md ├── Development.md ├── fuzz-testing.md ├── stages.md └── compatibility.md ├── CodeParser ├── Kernel │ ├── TokenEnum.wl │ ├── Trees.wl │ ├── Quirks.wl │ ├── Shims.wl │ ├── Node.wl │ └── Definitions.wl ├── PacletInfo.wl.in ├── Resources │ └── Examples │ │ └── Collatz.m └── Generate │ ├── TokenEnum.wl │ ├── Common.wl │ └── Precedence.wl ├── .project ├── .github └── workflows │ ├── run_tests.wls │ └── workflow.yml ├── cmake ├── WolframLibrary.cmake ├── ReplacePacletInfo.cmake ├── MacOSXVersionMin.cmake ├── InspectFile.cmake ├── InstallPaclet.cmake ├── PacletInfo.cmake └── WolframScript.cmake ├── cpp └── include │ ├── ExprLibrary.h │ └── Diagnostics.h ├── LICENSE ├── run_tests.wls ├── CONTRIBUTING.md ├── CodeTools └── Generate │ ├── CreatePacletArchive.wl │ └── GenerateSources.wl ├── HowToBuild.md ├── README.md └── scripts └── re_build_CodeParser.xml /Tests/files/small/empty.wl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Tests/files/small/crash.txt: -------------------------------------------------------------------------------- 1 | \777 -------------------------------------------------------------------------------- /Tests/files/small/crash6.txt: -------------------------------------------------------------------------------- 1 | >>[ -------------------------------------------------------------------------------- /Tests/files/small/carriagereturn.wl: -------------------------------------------------------------------------------- 1 | A -------------------------------------------------------------------------------- /Tests/files/small/comment.wl: -------------------------------------------------------------------------------- 1 | (* xxx *) -------------------------------------------------------------------------------- /Tests/files/small/crash10.txt: -------------------------------------------------------------------------------- 1 | >>[[ -------------------------------------------------------------------------------- /Tests/files/small/crash12.txt: -------------------------------------------------------------------------------- 1 | a:b~1:2 -------------------------------------------------------------------------------- /Tests/files/small/crash15.txt: -------------------------------------------------------------------------------- 1 | 6`5.. -------------------------------------------------------------------------------- /Tests/files/small/crash16.txt: -------------------------------------------------------------------------------- 1 | 1`+.. -------------------------------------------------------------------------------- /Tests/files/small/crash4.txt: -------------------------------------------------------------------------------- 1 | ?a\ 2 | -------------------------------------------------------------------------------- /Tests/files/small/crash5.txt: -------------------------------------------------------------------------------- 1 | 1*\ 2 | -------------------------------------------------------------------------------- /Tests/files/small/span1.wl: -------------------------------------------------------------------------------- 1 | a ;; b 2 | c -------------------------------------------------------------------------------- /Tests/files/small/strange.wl: -------------------------------------------------------------------------------- 1 | x = 1 -------------------------------------------------------------------------------- /Tests/files/small/crash2.txt: -------------------------------------------------------------------------------- 1 | ?123\ 2 | 456" -------------------------------------------------------------------------------- /Tests/files/small/crash7.txt: -------------------------------------------------------------------------------- 1 | a:: 2 | +1 -------------------------------------------------------------------------------- /Tests/files/small/sample.wl: -------------------------------------------------------------------------------- 1 | 2 | 1+1 3 | -------------------------------------------------------------------------------- /Tests/files/small/carriagereturn2.wl: -------------------------------------------------------------------------------- 1 | " 2 | 123" -------------------------------------------------------------------------------- /Tests/files/small/continuation.wl: -------------------------------------------------------------------------------- 1 | { 2 | 1\ 3 | } -------------------------------------------------------------------------------- /Tests/files/small/crash14.txt: -------------------------------------------------------------------------------- 1 | \[Integral]\[Sum] -------------------------------------------------------------------------------- /Tests/files/small/carriagereturn3.wl: -------------------------------------------------------------------------------- 1 | "123\ 2 | 456" -------------------------------------------------------------------------------- /Tests/files/small/coverage3.wl: -------------------------------------------------------------------------------- 1 | ##2 2 | 3 | a>>>b 4 | -------------------------------------------------------------------------------- /Tests/files/small/crash11.txt: -------------------------------------------------------------------------------- 1 | 13333333333333333332^^a -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | build* 3 | 4 | .DS_Store 5 | 6 | /target 7 | -------------------------------------------------------------------------------- /Tests/files/small/carriagereturn4.wl: -------------------------------------------------------------------------------- 1 | 2 | f[]\ 3 | /; x -------------------------------------------------------------------------------- /Tests/files/small/string1.wl: -------------------------------------------------------------------------------- 1 | 2 | "data\\ 3 | " 4 | 5 | x 6 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cmake.configureOnOpen": false 3 | } -------------------------------------------------------------------------------- /Tests/files/small/coverage2.wl: -------------------------------------------------------------------------------- 1 | \[Integral] f[x] \[DifferentialD] x -------------------------------------------------------------------------------- /Tests/files/package.wl: -------------------------------------------------------------------------------- 1 | 2 | BeginPackage["Foo.m`"] 3 | 4 | EndPackage[] 5 | 6 | -------------------------------------------------------------------------------- /Tests/files/inputs-0001.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/inputs-0001.txt -------------------------------------------------------------------------------- /Tests/files/small/crash13.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/crash13.txt -------------------------------------------------------------------------------- /Tests/files/small/crash17.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/crash17.txt -------------------------------------------------------------------------------- /Tests/files/small/crash18.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/crash18.txt -------------------------------------------------------------------------------- /Tests/files/small/crash3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/crash3.txt -------------------------------------------------------------------------------- /Tests/files/small/crash8.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/crash8.txt -------------------------------------------------------------------------------- /Tests/files/small/crash9.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/crash9.txt -------------------------------------------------------------------------------- /Tests/files/small/invalid1.wl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/invalid1.wl -------------------------------------------------------------------------------- /Tests/files/small/unsafe1.wl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/unsafe1.wl -------------------------------------------------------------------------------- /Tests/files/small/unsafe2.wl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/unsafe2.wl -------------------------------------------------------------------------------- /Tests/files/stackoverflow1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/stackoverflow1.txt -------------------------------------------------------------------------------- /Tests/files/stackoverflow3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/stackoverflow3.txt -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "2" 3 | members = [ 4 | "crates/wolfram-parser", 5 | "crates/codeparser-wll" 6 | ] 7 | -------------------------------------------------------------------------------- /.lfsconfig: -------------------------------------------------------------------------------- 1 | [lfs] 2 | # Ensure that a default checkout of this repository doesn't download any 3 | # large files. 4 | fetchexclude = * 5 | -------------------------------------------------------------------------------- /Tests/files/.gitattributes: -------------------------------------------------------------------------------- 1 | # Use Git LFS to store all of the files in the files/large/ directory 2 | large/* filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /Tests/files/small/coverage1.wl: -------------------------------------------------------------------------------- 1 | 2 | _a` 3 | 4 | _. 5 | 6 | a_. 7 | 8 | 9 | __ 10 | 11 | ___ 12 | 13 | a__ 14 | 15 | a___ 16 | 17 | a_. 18 | 19 | _:1 20 | -------------------------------------------------------------------------------- /Tests/files/large/ReliefPlot.nb: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:08dbac51a17a741b997bd7c60d13ea0fe7d6c970191aa2c07356f2c26d70b1b1 3 | size 244135221 4 | -------------------------------------------------------------------------------- /Tests/files/large/geomagneticmodels.m: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:682f7198f2468d2ebfa23eacb971dce688cc3229873feb4960f704bf823eec92 3 | size 6827153 4 | -------------------------------------------------------------------------------- /Tests/files/large/expandedCompanyDataNew1.m: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c32e17edc64ee9b94e6c7a36c9dd7e837a1be0d965c51098ae41edd428874af8 3 | size 71487044 4 | -------------------------------------------------------------------------------- /crates/rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 80 2 | # use_small_heuristics = "off" 3 | match_block_trailing_comma = true 4 | blank_lines_upper_bound = 3 5 | merge_derives = false 6 | reorder_modules = false -------------------------------------------------------------------------------- /Tests/files/鳥物.wl: -------------------------------------------------------------------------------- 1 | (* ::Package:: *) 2 | 3 | BeginPackage["鳥物`"] 4 | 鳥言う::usage = "鳥言う[物] 鳥に何か言うように頼む" 5 | Begin["`私的`"] 6 | 鳥言う[ア_] := ResourceFunction["BirdSay"][ア] 7 | End[] 8 | EndPackage[] 9 | -------------------------------------------------------------------------------- /.WolframResources: -------------------------------------------------------------------------------- 1 | Resources[ 2 | Version[1], 3 | ExecutionBuildCommand["< 50] 10 | -------------------------------------------------------------------------------- /docs/docs.md: -------------------------------------------------------------------------------- 1 | 2 | # CodeParser 3 | 4 | 5 | 6 | ## Caveats 7 | 8 | 9 | linear syntax is not parsed, just tokenized 10 | 11 | linear syntax is not abstracted, don't want to reimplement MakeExpression 12 | 13 | boxes are not abstracted, don't want to reimplement MakeExpression 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /docs/debugging.md: -------------------------------------------------------------------------------- 1 | 2 | type summary add --summary-string "Escape: ${var.escapeBits%u} Sign: ${var.signBit%u} BinValue: ${var.valBits%b} CharValue: ${var.valBits%c} DecimalValue: ${var.valBits%d}" WLCharacter 3 | 4 | 5 | 6 | type summary add --summary-string "CharValue: ${var.valBits%c} DecimalValue: ${var.valBits%d}" SourceCharacter 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /Tests/files/inputs-0002.txt: -------------------------------------------------------------------------------- 1 | a`b`c 2 | a~b~c 3 | a!b!c 4 | a@b@c 5 | a#b#c 6 | a$b$c 7 | a%b%c 8 | a^b^c 9 | a&b&c 10 | a*b*c 11 | a(b(c 12 | a)b)c 13 | a-b-c 14 | a_b_c 15 | a=b=c 16 | a+b+c 17 | a[b[c 18 | a{b{c 19 | a]b]c 20 | a}b}c 21 | a\b\c 22 | a|b|c 23 | a;b;c 24 | a:b:c 25 | a'b'c 26 | a"b"c 27 | a,b,c 28 | ab>c 31 | a/b/c 32 | a?b?c -------------------------------------------------------------------------------- /crates/wolfram-parser/src/tokenize.rs: -------------------------------------------------------------------------------- 1 | mod token; 2 | pub(crate) mod token_kind; 3 | pub(crate) mod tokenizer; 4 | 5 | pub use self::{ 6 | token::{Token, TokenStr, TokenString}, 7 | token_kind::TokenKind, 8 | }; 9 | 10 | #[doc(hidden)] 11 | pub use self::token::{TokenInput, TokenSource}; 12 | 13 | pub(crate) use self::{token::TokenRef, tokenizer::Tokenizer}; 14 | -------------------------------------------------------------------------------- /crates/wolfram-parser/.cargo/config: -------------------------------------------------------------------------------- 1 | # Specify the Rust compiler equivalent of `/MT`, to statically link the runtime 2 | # on Windows. 3 | # 4 | # This prevents "The program can't start because ucrtbased.dll is missing from 5 | # your computer" error on Windows 7 and fixes bug 427427: 6 | # error 126 "The specified module could not be found" 7 | # when Visual Studio is not installed 8 | [target.x86_64-pc-windows-msvc] 9 | rustflags = ["-Ctarget-feature=+crt-static"] 10 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/tests/test_source_character.rs: -------------------------------------------------------------------------------- 1 | use crate::read::WLCharacter; 2 | 3 | #[test] 4 | fn SourceCharacterTest_Graphical1() { 5 | assert_eq!(WLCharacter::new('\t').graphicalString(), "\\t"); 6 | 7 | assert_eq!(WLCharacter::new(0x1b).graphicalString(), "\\[RawEscape]"); 8 | 9 | assert_eq!(WLCharacter::new(0xb0).graphicalString(), "\\[Degree]"); 10 | 11 | assert_eq!(WLCharacter::new('\u{abcd}').graphicalString(), "\\:abcd"); 12 | } 13 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | CodeParser 4 | 5 | 6 | 7 | 8 | 9 | com.wolfram.eclipse.MEET.MathematicaProjectBuilder 10 | 11 | 12 | 13 | 14 | 15 | com.wolfram.eclipse.MEET.SimpleMathematicaNature 16 | 17 | 18 | -------------------------------------------------------------------------------- /Tests/Regressions.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start Regressions.mt =====\n"] 2 | 3 | Needs["CodeParser`"] 4 | 5 | (*------------------------------------*) 6 | (* Bug 439902 *) 7 | (*------------------------------------*) 8 | 9 | TestMatch[ 10 | CodeTokenize @ ExportString[1, "JPEG"], 11 | {__, ErrorNode[Token`Error`UnterminatedString, _, _]} 12 | ] 13 | 14 | TestMatch[ 15 | CodeConcreteParse @ ExportString[1, "JPEG"], 16 | ContainerNode[String, {__}, _] 17 | ] 18 | -------------------------------------------------------------------------------- /Tests/files/linearsyntax.wl: -------------------------------------------------------------------------------- 1 | 2 | InputAliases -> {"intt" -> \(\[Integral] \(\[SelectionPlaceholder] \(\[DifferentialD] \[Placeholder]\)\)\), "dintt" -> \(\(\[Integral]\_\[SelectionPlaceholder]\%\[Placeholder]\) \(\[Placeholder] \(\[DifferentialD] \[Placeholder]\)\)\), "sumt" -> \(\(\[Sum]\+\(\[SelectionPlaceholder] = \[Placeholder]\)\%\[Placeholder]\) \[Placeholder]\), "prodt" -> \(\(\[Product]\+\(\[SelectionPlaceholder] = \[Placeholder]\)\%\[Placeholder]\) \[Placeholder]\), "dt" -> \(\(\[PartialD]\_\[Placeholder]\)\ \[SelectionPlaceholder]\)} 3 | -------------------------------------------------------------------------------- /crates/codeparser-wll/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "codeparser-wll" 3 | version = "0.1.0" 4 | edition = "2021" 5 | publish = false 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [lib] 10 | # This crate compiles to a dynamic library. 11 | crate-type = ["cdylib"] 12 | 13 | [features] 14 | default = ["USE_MATHLINK"] 15 | USE_MATHLINK = [] 16 | 17 | [dependencies] 18 | wolfram-library-link = { version = "0.2.10", default-features = false } 19 | wolfram-parser = { path = "../wolfram-parser", features = ["CHECK_ABORT"] } 20 | 21 | -------------------------------------------------------------------------------- /Tests/CodeSyntaxQ.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start CodeSyntaxQ.mt =====\n"] 2 | 3 | (* Wolfram Language Test file *) 4 | 5 | Needs["CodeParser`"] 6 | 7 | (* 8 | There should be no messages from CodeSyntaxQ 9 | *) 10 | Test[ 11 | CodeSyntaxQ["#\"\\A\""] 12 | , 13 | True 14 | , 15 | {} 16 | , 17 | TestID->"CodeSyntaxQ-20200702-D6P6W9" 18 | ] 19 | 20 | Test[ 21 | CodeSyntaxQ["a>>b\\1c"] 22 | , 23 | True 24 | , 25 | {} 26 | , 27 | TestID->"CodeSyntaxQ-20200703-Q2R5G9" 28 | ] 29 | 30 | 31 | 32 | Test[ 33 | CodeSyntaxQ[File["doesntexist"]] 34 | , 35 | False 36 | , 37 | TestID->"CodeSyntaxQ-20230426-X7Z7D3" 38 | ] 39 | -------------------------------------------------------------------------------- /Tests/Quirks.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start Quirks.mt =====\n"] 2 | 3 | Needs["CodeParser`"] 4 | 5 | 6 | Test[ 7 | Internal`InheritedBlock[{CodeParser`Quirks`$Quirks}, 8 | 9 | CodeParser`Quirks`$Quirks["OldAtAtAt"] = True; 10 | 11 | CodeParse["a @@@ b"] 12 | ] 13 | , 14 | ContainerNode[String, { 15 | CallNode[LeafNode[Symbol, "Apply", <||>], { 16 | LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>], 17 | LeafNode[Symbol, "b", <|Source -> {{1, 7}, {1, 8}}|>], 18 | CallNode[LeafNode[Symbol, "List", <||>], {LeafNode[Integer, "1", <||>]}, <||>]}, <|Source -> {{1, 1}, {1, 8}}|>]}, <|Source -> {{1, 1}, {1, 8}}|>] 19 | , 20 | TestID->"Quirks-20220919-O2S9R6" 21 | ] -------------------------------------------------------------------------------- /Tests/TokenEnum.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start TokenEnum.mt =====\n"] 2 | 3 | Needs["CodeParser`TokenEnum`"] 4 | 5 | Test @ tokenIsEmpty[EndOfFile] 6 | Test @ tokenIsEmpty[Token`Fake`ImplicitTimes] 7 | Test @ tokenIsEmpty[Token`Error`Aborted] 8 | Test @ tokenIsEmpty[Token`Fake`ImplicitNull] 9 | Test @ tokenIsEmpty[Token`Fake`ImplicitOne] 10 | Test @ tokenIsEmpty[Token`Fake`ImplicitAll] 11 | Test @ tokenIsEmpty[Token`Error`ExpectedOperand] 12 | Test @ tokenIsEmpty[Token`Error`ExpectedTag] 13 | Test @ tokenIsEmpty[Token`Error`ExpectedFile] 14 | Test @ tokenIsEmpty[Token`Error`PrefixImplicitNull] 15 | Test @ tokenIsEmpty[Token`Error`InfixImplicitNull] 16 | 17 | Test @ !tokenIsEmpty[String] 18 | Test @ !tokenIsEmpty[Token`Comma] -------------------------------------------------------------------------------- /docs/quirks.md: -------------------------------------------------------------------------------- 1 | internal docs: quirks mode 2 | 3 | 4 | 5 | reproduce kernel buggy behavior 6 | 7 | 8 | 9 | reproduce front end buggy behavior 10 | 11 | 12 | 13 | 14 | version 11.0, do blah 15 | 16 | version 11.2, do blah, 17 | 18 | etc. 19 | 20 | 21 | 22 | 23 | # 24 | 25 | quirks mode DannyL branch stuff: 26 | 27 | https://bugs.wolfram.com/show?number=139531 28 | 29 | https://bugs.wolfram.com/show?number=160919 30 | 31 | 32 | Prototype build 33 | 34 | 35 | 36 | 37 | parsing -a/2 is now Times[Times[-1, a], Power[2, -1]] 38 | 39 | 40 | branch bugfix/139531_et_al 41 | 42 | 43 | 44 | ``Internal`$PrototypeBuild`` 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /docs/concretify.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | concretifying is: 4 | given abstract syntax 5 | choosing operators + 6 | parenthesizing where needed + 7 | (removing implicit tokens where possible) + 8 | (stringifying where possible) + 9 | (compounding where possible) + 10 | (crazy stuff like convert `-1*a` to `-a`) 11 | make different choices about operators: 12 | e.g. CompoundExpression[] or ; ? 13 | f[x] or f@x ? 14 | 15 | 16 | pretty-printing is: 17 | given abstract syntax 18 | concretifying[operators that look nice] then formatting 19 | 20 | this is better InputForm 21 | 22 | 23 | minifying is: 24 | given abstract syntax 25 | concretifying[operators that minimize space], no formatting 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /CodeParser/Kernel/Trees.wl: -------------------------------------------------------------------------------- 1 | BeginPackage["CodeParser`Trees`"] 2 | 3 | ToTree 4 | 5 | Begin["`Private`"] 6 | 7 | Needs["CodeParser`"] 8 | Needs["CodeParser`Utils`"] 9 | 10 | 11 | 12 | ToTree[parseTree_] := 13 | NestTree[codeChildren, parseTree, Infinity, codeData] 14 | 15 | 16 | 17 | codeChildren[head_[tag_, children_, data_]] := children 18 | 19 | codeData[head_[tag_, children_, data_]] := 20 | {head, tag, data} 21 | 22 | 23 | 24 | codeChildren[LeafNode[tag_, str_, data_]] := None 25 | 26 | codeData[LeafNode[tag_, str_, data_]] := 27 | {LeafNode, tag, str, data} 28 | 29 | 30 | codeChildren[ErrorNode[tag_, str_, data_]] := None 31 | 32 | codeData[ErrorNode[tag_, str_, data_]] := 33 | {ErrorNode, tag, str, data} 34 | 35 | 36 | End[] 37 | 38 | EndPackage[] 39 | -------------------------------------------------------------------------------- /.github/workflows/run_tests.wls: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env wolframscript 2 | 3 | Needs["MUnit`"] 4 | 5 | createSuccessLogger[] := With[{logger = Unique[]}, 6 | Module[{success = True}, 7 | logger /: LogFatal[logger, _] := success = False; 8 | logger /: LogFailure[logger, _] := success = False; 9 | logger /: LogMessagesFailure[logger, _] := success = False; 10 | logger /: LogError[logger, _] := success = False; 11 | logger /: LogFatal[logger, _] := success = False; 12 | logger /: TestRunSucceededQ[logger] := success; 13 | logger 14 | ] 15 | ] 16 | 17 | successLogger = createSuccessLogger[] 18 | 19 | TestRun["Tests/TestSuite.mt", Loggers :> {VerbosePrintLogger[], successLogger}] 20 | 21 | If[TrueQ[TestRunSucceededQ[successLogger]], 22 | Exit[0] 23 | , 24 | Exit[1] 25 | ] 26 | -------------------------------------------------------------------------------- /docs/nodes.md: -------------------------------------------------------------------------------- 1 | 2 | # Nodes 3 | 4 | ## Terminology 5 | 6 | leaf: Integer, Real, Symbol, String, etc. 7 | 8 | 9 | 10 | 11 | ## Philosophy 12 | 13 | if a node is not something else, then it is leaf 14 | 15 | 16 | 17 | ## Structure 18 | 19 | All nodes have a uniform structure: 20 | 21 | `Node[tag or operator, contents or children, opts]` 22 | 23 | 24 | 25 | 26 | We take advantage of the symbolic nature of WL and use the function symbols themselves for tags: 27 | 28 | 29 | a+b is parsed as: 30 | ``` 31 | InfixNode[Plus, {LeafNode[Symbol, "a", <||>], LeafNode[Token`Plus, "+", <||>], LeafNode[Symbol, "b", <||>]}, <||>] 32 | ``` 33 | 34 | and a::b is parsed as: 35 | ``` 36 | InfixNode[MessageName, {LeafNode[Symbol, "a", <||>], LeafNode[Token`ColonColon, "::", <||>], LeafNode[String, "b", <||>]}, <||>] 37 | ``` 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /CodeParser/PacletInfo.wl.in: -------------------------------------------------------------------------------- 1 | 2 | Paclet[ 3 | Name -> "CodeParser", 4 | Version -> "1.10", 5 | WolframVersion -> "12.1+", 6 | Description -> "Parse Wolfram Language code.", 7 | Creator -> "Brenton Bostick ", 8 | BuildDate -> "", 9 | BuildNumber -> 0, 10 | BuildWolframVersionNumber -> 0, 11 | BuildWolframLibraryVersion -> 0, 12 | Transport -> "", 13 | Updating -> Automatic, 14 | Extensions -> { 15 | {"Kernel", Root -> "Kernel", Context -> "CodeParser`"}, 16 | {"Documentation", Language -> All, MainPage -> "Guides/CodeParser"}, 17 | {"LibraryLink"}, 18 | {"Resource", Root -> "Resources", 19 | Resources -> { 20 | {"Collatz", "Examples/Collatz.m"}, 21 | {"LongNames", "Generated/LongNames.wl"}, 22 | {"Precedence", "Generated/Precedence.wl"} 23 | } 24 | } 25 | } 26 | ] 27 | -------------------------------------------------------------------------------- /cmake/WolframLibrary.cmake: -------------------------------------------------------------------------------- 1 | 2 | macro(ParseWolframLibraryHeader) 3 | 4 | if(NOT EXISTS ${WOLFRAMLIBRARY_INCLUDE_DIR}) 5 | message(FATAL_ERROR "WOLFRAMLIBRARY_INCLUDE_DIR does not exist. WOLFRAMLIBRARY_INCLUDE_DIR: ${WOLFRAMLIBRARY_INCLUDE_DIR}") 6 | endif() 7 | 8 | set(WOLFRAMLIBRARY_HEADER ${WOLFRAMLIBRARY_INCLUDE_DIR}/WolframLibrary.h) 9 | 10 | if(NOT EXISTS ${WOLFRAMLIBRARY_HEADER}) 11 | message(FATAL_ERROR "WOLFRAMLIBRARY_HEADER does not exist. WOLFRAMLIBRARY_HEADER: ${WOLFRAMLIBRARY_HEADER}") 12 | endif() 13 | 14 | file(READ ${WOLFRAMLIBRARY_HEADER} filedata) 15 | 16 | string(REGEX MATCH "#define WolframLibraryVersion ([0-9]+)" _ ${filedata}) 17 | 18 | set(WOLFRAMLIBRARY_VERSION ${CMAKE_MATCH_1}) 19 | 20 | if(NOT DEFINED WOLFRAMLIBRARY_VERSION) 21 | message(FATAL_ERROR "WOLFRAMLIBRARY_VERSION was not set.") 22 | endif() 23 | 24 | endmacro(ParseWolframLibraryHeader) 25 | -------------------------------------------------------------------------------- /Tests/files/jpeg-string.txt: -------------------------------------------------------------------------------- 1 | ÿØÿàJFIFHHÿáÔExifII*  nv( 14~2²*ˆ úÿÿÿHHCreated with the Wolfram Language : www.wolfram.com2023:11:16 12:20:24-06:00ÿÛC  2 |   $.' ",#(7),01444'9=82<.342ÿÛC  2!!22222222222222222222222222222222222222222222222222ÿÀ"ÿÄ 3 | ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ 4 | %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ 5 | ÿĵw!1AQaq"2B‘¡±Á #3RðbrÑ 6 | $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz‚ƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚâãäåæçèéêòóôõö÷øùúÿÚ ?öhõÙ[ÅÒhRiï‹CuɕHU (ärÝý:QY¯aâ3ã¨õqe¥}-ÚÏþ?äóLm"¶ý¾N7a~îìµEutQEÿÙ -------------------------------------------------------------------------------- /Tests/TestSuite.mt: -------------------------------------------------------------------------------- 1 | (* Wolfram Language Test file *) 2 | 3 | Needs["MUnit`"] 4 | 5 | SetEnvironment["CODEPARSER_DEBUG" -> "True"] 6 | 7 | TestSuite[{ 8 | "Abstract.mt", 9 | "AbstractCallNode.mt", 10 | "AbstractSyntaxErrorNodes.mt", 11 | "AbstractSyntaxIssues.mt", 12 | "Aggregate.mt", 13 | "Arrows.mt", 14 | "Boxes.mt", 15 | "CallMissingCloserNodes.mt", 16 | "Characters.mt", 17 | "CodeParser.mt", 18 | "CodeSyntaxQ.mt", 19 | "Concrete.mt", 20 | "Concretify.mt", 21 | "Definitions.mt", 22 | "Error.mt", 23 | "Errors.mt", 24 | "File.mt", 25 | "Inequality.mt", 26 | "LineContinuations.mt", 27 | "Parse.mt", 28 | "Quirks.mt", 29 | "Regressions.mt", 30 | "SafeString.mt", 31 | "Scoping.mt", 32 | "Span.mt", 33 | "SyntaxErrorNodes.mt", 34 | "SyntaxIssues.mt", 35 | "TokenErrors.mt", 36 | "TokenEnum.mt", 37 | "Tokenize.mt", 38 | "ToNode.mt", 39 | "TopLevel.mt", 40 | "ToString.mt", 41 | "TypeSpecifier.mt", 42 | "Unsafe.mt", 43 | "Weird.mt" 44 | }] 45 | -------------------------------------------------------------------------------- /cmake/ReplacePacletInfo.cmake: -------------------------------------------------------------------------------- 1 | 2 | file(READ ${PACLETINFO_IN_SOURCE} filedata) 3 | 4 | string(TIMESTAMP DATESTRING "%a %d %b %Y %H:%M:%S") 5 | 6 | string(REGEX REPLACE "BuildDate -> \"[a-zA-Z0-9 :]*\"" "BuildDate -> \"${DATESTRING}\"" filedata ${filedata}) 7 | 8 | string(REGEX REPLACE "BuildNumber -> [0-9]+" "BuildNumber -> ${BUILDNUMBER}" filedata ${filedata}) 9 | 10 | string(REGEX REPLACE "BuildWolframVersionNumber -> [0-9]+" "BuildWolframVersionNumber -> ${VERSION_NUMBER}" filedata ${filedata}) 11 | 12 | string(REGEX REPLACE "BuildWolframLibraryVersion -> [0-9]+" "BuildWolframLibraryVersion -> ${WOLFRAMLIBRARY_VERSION}" filedata ${filedata}) 13 | 14 | string(REGEX REPLACE "Transport -> \"[a-zA-Z]*\"" "Transport -> \"${TRANSPORT}\"" filedata ${filedata}) 15 | 16 | if(LOCAL_BUILD) 17 | 18 | string(REGEX REPLACE "Version -> \"[0-9\\.]+\"," "Version -> \"${LOCAL_BUILD_VERSION}\"(* local build *)," filedata ${filedata}) 19 | 20 | endif() 21 | 22 | file(WRITE ${REPLACED_PACLETINFO} "${filedata}") 23 | -------------------------------------------------------------------------------- /cmake/MacOSXVersionMin.cmake: -------------------------------------------------------------------------------- 1 | 2 | macro(CheckMacOSXVersionMin) 3 | 4 | if(NOT EXISTS ${WOLFRAMKERNEL}) 5 | message(FATAL_ERROR "WOLFRAMKERNEL does not exist. WOLFRAMKERNEL: ${WOLFRAMKERNEL}") 6 | endif() 7 | 8 | execute_process( 9 | COMMAND 10 | ${WOLFRAMKERNEL} -noinit -noprompt -nopaclet -nostartuppaclets -run Pause[${KERNEL_PAUSE}]\;Needs["CCompilerDriver`"]\;Print[OutputForm[If[$VersionNumber\ >=\ 12.2,\ StringReplace[CCompilerDriver`CCompilerDriverBase`MacOSXVersionMinFlag[],\ "-mmacosx-version-min="\ ->\ ""],\ "10.10"]]]\;Exit[] 11 | OUTPUT_VARIABLE 12 | MACOSX_VERSION_MIN 13 | OUTPUT_STRIP_TRAILING_WHITESPACE 14 | WORKING_DIRECTORY 15 | ${PROJECT_SOURCE_DIR} 16 | TIMEOUT 17 | ${KERNEL_TIMEOUT} 18 | RESULT_VARIABLE 19 | MACOSX_VERSION_MIN_RESULT 20 | ) 21 | 22 | if(NOT ${MACOSX_VERSION_MIN_RESULT} EQUAL "0") 23 | message(FATAL_ERROR "Bad exit code from MacOSXVersionMin script: ${MACOSX_VERSION_MIN_RESULT}") 24 | endif() 25 | 26 | endmacro(CheckMacOSXVersionMin) 27 | -------------------------------------------------------------------------------- /cpp/include/ExprLibrary.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | #include "WolframLibrary.h" // for mint 5 | #undef True 6 | #undef False 7 | 8 | #include // for int64_t 9 | 10 | using expr = void *; 11 | using Buffer = const unsigned char *; 12 | 13 | 14 | EXTERN_C expr Expr_FromInteger64(int64_t val); 15 | 16 | EXTERN_C expr Expr_FromReal64(double val); 17 | 18 | EXTERN_C expr Expr_UTF8BytesToStringExpr(Buffer buf, mint size); 19 | 20 | EXTERN_C expr Expr_MEncodedStringToSymbolExpr(const char *str); 21 | 22 | // 23 | // The suffix A means automatically handle releasing reference to head 24 | // 25 | EXTERN_C expr Expr_BuildExprA(expr head, mint argCount); 26 | 27 | // 28 | // The suffix A means automatically handle releasing reference to arg 29 | // 30 | // index is base 1 31 | // 32 | EXTERN_C void Expr_InsertA(expr e, mint index, expr arg); 33 | 34 | EXTERN_C void Expr_Release(expr e); 35 | 36 | EXTERN_C void Expr_StringExprToUTF8Bytes(expr e, Buffer *buffer, mint *len); 37 | 38 | EXTERN_C expr Expr_LongNameSuggestion(expr input); 39 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/tests/test_ffi.rs: -------------------------------------------------------------------------------- 1 | //! Test that types used in this crate's LibraryLink API have a stable 2 | //! representation. If these tests fail, that implies that a version of this 3 | //! crate is not backwards compatible with the version the tests were initially 4 | //! written for. 5 | 6 | use pretty_assertions::assert_eq; 7 | 8 | use crate::{EncodingMode, FirstLineBehavior, SourceConvention, StringifyMode}; 9 | 10 | #[test] 11 | fn public_enum_values() { 12 | assert_eq!(FirstLineBehavior::NotScript as i32, 0); 13 | assert_eq!(FirstLineBehavior::Check as i32, 1); 14 | assert_eq!(FirstLineBehavior::Script as i32, 2); 15 | 16 | assert_eq!(EncodingMode::Normal as i32, 0); 17 | assert_eq!(EncodingMode::Box as i32, 1); 18 | 19 | assert_eq!(StringifyMode::Normal as i32, 0); 20 | assert_eq!(StringifyMode::Tag as i32, 1); 21 | assert_eq!(StringifyMode::File as i32, 2); 22 | 23 | assert_eq!(SourceConvention::LineColumn as i32, 0); 24 | assert_eq!(SourceConvention::CharacterIndex as i32, 1); 25 | } 26 | -------------------------------------------------------------------------------- /crates/wolfram-parser/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "wolfram-parser" 3 | version = "0.1.0" 4 | edition = "2021" 5 | rust-version = "1.70" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [lib] 10 | bench = false 11 | 12 | [[bin]] 13 | name = "main" 14 | bench = false 15 | 16 | [features] 17 | COMPUTE_SOURCE = [] 18 | FAST_STRING_SCAN = [] 19 | 20 | # Features used when building the LibraryLink dynamic library. 21 | CHECK_ABORT = ["wolfram-library-link"] 22 | 23 | default = ["COMPUTE_SOURCE"] 24 | 25 | 26 | [dependencies] 27 | edit-distance = "2.1.0" 28 | memchr = "2.5.0" 29 | 30 | wolfram-expr = "0.1.4" 31 | 32 | wolfram-library-link = { version = "0.2.10", optional = true, default-features = false } 33 | 34 | [dev-dependencies] 35 | pretty_assertions = "1.2.1" 36 | 37 | criterion = "0.5.1" 38 | 39 | [[bench]] 40 | name = "bench_general" 41 | harness = false 42 | 43 | [[bench]] 44 | name = "bench_fast_string_scan" 45 | harness = false 46 | required-features = ["FAST_STRING_SCAN"] 47 | -------------------------------------------------------------------------------- /Tests/ToNode.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start ToNode.mt =====\n"] 2 | 3 | Needs["CodeParser`"] 4 | 5 | 6 | Clear[a] 7 | 8 | Test[ 9 | ToNode[a] 10 | , 11 | LeafNode[Symbol, "Global`a", <||>] 12 | , 13 | TestID->"ToNode-20181230-L1R6Q9" 14 | ] 15 | 16 | 17 | Test[ 18 | ToNode["abc"] 19 | , 20 | LeafNode[String, "\"abc\"", <||>] 21 | , 22 | TestID->"ToNode-20181230-S1R5V6" 23 | ] 24 | 25 | 26 | Test[ 27 | ToNode[123] 28 | , 29 | LeafNode[Integer, "123", <||>] 30 | , 31 | TestID->"ToNode-20181230-O2A4T0" 32 | ] 33 | 34 | 35 | Test[ 36 | ToNode[1.23] 37 | , 38 | LeafNode[Real, "1.23", <||>] 39 | , 40 | TestID->"ToNode-20181230-E5S9U5" 41 | ] 42 | 43 | 44 | 45 | rat = 1/16 46 | 47 | Test[ 48 | ToNode[rat] 49 | , 50 | LeafNode[Rational, "16^^1*^-1", <||>] 51 | , 52 | TestID->"ToNode-20200413-V3L1T8" 53 | ] 54 | 55 | 56 | rat = 1/37 57 | 58 | Test[ 59 | ToNode[rat] 60 | , 61 | CallNode[LeafNode[Symbol, "Rational", <||>], {LeafNode[Integer, "1", <||>], LeafNode[Integer, "37", <||>]}, <||>] 62 | , 63 | TestID->"ToNode-20200413-V2I2X0" 64 | ] 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2020 Wolfram Research Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /crates/wolfram-parser/src/tests/test_token_enum.rs: -------------------------------------------------------------------------------- 1 | use crate::tokenize::TokenKind; 2 | 3 | 4 | #[test] 5 | fn TokenEnumTest_Trivia() { 6 | assert!(TokenKind::Whitespace.isTrivia()); 7 | } 8 | 9 | #[test] 10 | fn TokenEnumTest_PossibleBeginning() { 11 | assert!(TokenKind::Symbol.isPossibleBeginning()); 12 | 13 | assert!(TokenKind::SemiSemi.isPossibleBeginning()); 14 | } 15 | 16 | #[test] 17 | fn TokenEnumTest_Closer() { 18 | assert!(TokenKind::CloseSquare.isCloser()); 19 | 20 | assert!(TokenKind::LongName_RightCeiling.isCloser()); 21 | } 22 | 23 | #[test] 24 | fn TokenEnumTest_Error() { 25 | assert!(TokenKind::Error_ExpectedTag.isError()); 26 | 27 | assert!(TokenKind::Error_UnsupportedToken.isError()); 28 | } 29 | 30 | #[test] 31 | fn TokenEnumTest_Unterminated() { 32 | assert!(TokenKind::Error_UnterminatedString.isUnterminated()); 33 | 34 | assert!(TokenKind::Error_UnterminatedComment.isUnterminated()); 35 | } 36 | 37 | #[test] 38 | fn TokenEnumTest_Empty() { 39 | assert!(TokenKind::EndOfFile.isEmpty()); 40 | 41 | assert!(TokenKind::Error_ExpectedOperand.isEmpty()); 42 | } 43 | -------------------------------------------------------------------------------- /run_tests.wls: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env wolframscript 2 | 3 | $builtPacletDir = FileNameJoin[{Directory[], "build", "paclet", "CodeParser"}]; 4 | 5 | If[!FileExistsQ[$builtPacletDir], 6 | Throw["Failed"]; 7 | Exit[-1]; 8 | ]; 9 | 10 | 11 | Print[ 12 | "Loading CodeParser: ", PacletDirectoryLoad[$builtPacletDir] 13 | ] 14 | 15 | Needs["CodeParser`"] 16 | 17 | 18 | 19 | Needs["MUnit`"] 20 | 21 | createSuccessLogger[] := With[{logger = Unique[]}, 22 | Module[{success = True}, 23 | logger /: LogFatal[logger, _] := success = False; 24 | logger /: LogFailure[logger, _] := success = False; 25 | logger /: LogMessagesFailure[logger, _] := success = False; 26 | logger /: LogError[logger, _] := success = False; 27 | logger /: LogFatal[logger, _] := success = False; 28 | logger /: TestRunSucceededQ[logger] := success; 29 | logger 30 | ] 31 | ] 32 | 33 | successLogger = createSuccessLogger[] 34 | 35 | Quiet @ EchoTiming @ TestRun[ 36 | "Tests/TestSuite.mt", 37 | Loggers :> {VerbosePrintLogger[], successLogger} 38 | ] 39 | 40 | If[TrueQ[TestRunSucceededQ[successLogger]], 41 | Exit[0] 42 | , 43 | Exit[1] 44 | ] 45 | -------------------------------------------------------------------------------- /Tests/Tokenize.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start Tokenize.mt =====\n"] 2 | 3 | Needs["CodeParser`"] 4 | 5 | 6 | (* 7 | Comments 8 | *) 9 | Test[ 10 | CodeTokenize["(* \\.28\\.2a *)"] 11 | , 12 | {LeafNode[Token`Comment, "(* \\.28\\.2a *)", <|Source -> {{1, 1}, {1, 15}}|>]} 13 | , 14 | TestID->"Tokenize-20181208-O3D5M5" 15 | ] 16 | 17 | 18 | (* 19 | Number Errors 20 | *) 21 | Test[ 22 | CodeTokenize["1.2``->3"] 23 | , 24 | { 25 | ErrorNode[Token`Error`Number, "1.2``-", <|Source -> {{1, 1}, {1, 7}}|>], 26 | LeafNode[Token`Greater, ">", <|Source -> {{1, 7}, {1, 8}}|>], 27 | LeafNode[Integer, "3", <|Source -> {{1, 8}, {1, 9}}|>]} 28 | , 29 | TestID->"Tokenize-20181215-Z0H7Y5" 30 | ] 31 | 32 | 33 | (* 34 | String Errors 35 | *) 36 | Test[ 37 | CodeTokenize["\"123\\\""] 38 | , 39 | {ErrorNode[Token`Error`UnterminatedString, "\"123\\\"", <|Source -> {{1, 1}, {1, 7}}|>]} 40 | , 41 | TestID->"Tokenize-20190406-A1G3U8" 42 | ] 43 | 44 | 45 | Test[ 46 | CodeTokenize["*)"] 47 | , 48 | {ErrorNode[Token`Error`UnexpectedCommentCloser, "*)", <|Source -> {{1, 1}, {1, 3}}|>]} 49 | , 50 | TestID->"Tokenize-20220709-J1V7W8" 51 | ] 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /cmake/InspectFile.cmake: -------------------------------------------------------------------------------- 1 | 2 | if(NOT CODEPARSER_EXE) 3 | return() 4 | endif() 5 | 6 | if(NOT EXISTS ${CODEPARSER_EXE}) 7 | return() 8 | endif() 9 | 10 | execute_process( 11 | COMMAND 12 | ${CODEPARSER_EXE} -check -file ${SRC} 13 | RESULT_VARIABLE 14 | CODEPARSER_RESULT 15 | ) 16 | 17 | if(${CODEPARSER_RESULT} EQUAL "0") 18 | return() 19 | endif() 20 | 21 | if(NOT ${CODEPARSER_RESULT} EQUAL "1") 22 | message(WARNING "Internal error. CODEPARSER_RESULT: ${CODEPARSER_RESULT}") 23 | return() 24 | endif() 25 | 26 | # 27 | # We know there was some problem, so now use CodeInspector to report the problem 28 | # 29 | 30 | if(NOT WOLFRAMKERNEL) 31 | return() 32 | endif() 33 | 34 | if(NOT EXISTS ${WOLFRAMKERNEL}) 35 | return() 36 | endif() 37 | 38 | set(CODE "\ 39 | If[FailureQ[FindFile[\"CodeInspector`\"]], Exit[0]]\;\ 40 | Needs[\"CodeInspector`\"]\;\ 41 | Print[\"Code inspection...\" //OutputForm]\;\ 42 | Print[CodeInspector`CodeInspectSummarize[File[\"${SRC}\"]] //OutputForm]\;\ 43 | Exit[1]\ 44 | ") 45 | 46 | execute_process( 47 | COMMAND 48 | ${WOLFRAMKERNEL} -noinit -noprompt -nopaclet -nostartuppaclets -run ${CODE} 49 | TIMEOUT 50 | ${KERNEL_TIMEOUT} 51 | ) 52 | 53 | message(FATAL_ERROR "File had fatal errors: ${SRC}") 54 | -------------------------------------------------------------------------------- /cmake/InstallPaclet.cmake: -------------------------------------------------------------------------------- 1 | 2 | if(NOT EXISTS ${WOLFRAMKERNEL}) 3 | message(FATAL_ERROR "WOLFRAMKERNEL does not exist. WOLFRAMKERNEL: ${WOLFRAMKERNEL}") 4 | endif() 5 | 6 | set(CODE "\ 7 | Print[OutputForm[\"Calling PacletInstall...\"]]\; 8 | Check[ 9 | res = PacletInstall[\"${PACLET_ARCHIVE}\", ForceVersionInstall -> True]\; 10 | , 11 | Print[OutputForm[Row[{\"$VersionNumber: \", NumberForm[$VersionNumber, {2, 1}]}]]]\; 12 | Print[OutputForm[Row[{\"Paclet WolframVersion: \", \"${PACLET_WOLFRAMVERSION}\"}]]]\; 13 | Print[OutputForm[Row[{\"To prevent this PacletInstall::compat message, update PacletInfo.wl.in with WolframVersion -> \\\"\", NumberForm[$VersionNumber, {2, 1}] ,\"\\\" and build and install again.\"}]]]; 14 | res 15 | , 16 | {PacletInstall::compat} 17 | ]\; 18 | Print[res //OutputForm]\; 19 | Print[OutputForm[\"Done PacletInstall\"]]\; 20 | If[!PacletObjectQ[res], 21 | Exit[1] 22 | ]\; 23 | Exit[0] 24 | ") 25 | 26 | execute_process( 27 | COMMAND 28 | ${WOLFRAMKERNEL} -noinit -noprompt -run ${CODE} 29 | TIMEOUT 30 | ${KERNEL_TIMEOUT} 31 | RESULT_VARIABLE 32 | INSTALL_RESULT 33 | ) 34 | 35 | if(NOT ${INSTALL_RESULT} EQUAL "0") 36 | message(FATAL_ERROR "Bad exit code from install: ${INSTALL_RESULT}") 37 | endif() 38 | -------------------------------------------------------------------------------- /Tests/SafeString.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start SafeString.mt =====\n"] 2 | 3 | Needs["CodeParser`"] 4 | 5 | Test[ 6 | SafeString[ByteArray[ToCharacterCode["1+1"]]] 7 | , 8 | "1+1" 9 | , 10 | TestID->"SafeString-20200103-U8A6X2" 11 | ] 12 | 13 | (* 14 | Invalid sequences 15 | *) 16 | Test[ 17 | SafeString[ByteArray[{206}]] 18 | , 19 | Missing["UnsafeCharacterEncoding_IncompleteUTF8Sequence"] 20 | , 21 | TestID->"SafeString-20200103-K0M0B9" 22 | ] 23 | 24 | (* 25 | High surrogates 26 | *) 27 | Test[ 28 | (* 29 | UTF-8 for 0xd800 30 | *) 31 | SafeString[ByteArray[{237, 160, 128}]] 32 | , 33 | Missing["UnsafeCharacterEncoding_StraySurrogate"] 34 | , 35 | TestID->"SafeString-20200103-Z8W9G3" 36 | ] 37 | 38 | (* 39 | Low surrogates 40 | *) 41 | Test[ 42 | (* 43 | UTF-8 for 0xdc00 44 | *) 45 | SafeString[ByteArray[{237, 176, 128}]] 46 | , 47 | Missing["UnsafeCharacterEncoding_StraySurrogate"] 48 | , 49 | TestID->"SafeString-20200103-G7F2O6" 50 | ] 51 | 52 | 53 | 54 | (* 55 | BOM 56 | *) 57 | Test[ 58 | (* 59 | UTF-8 for 0xfeff 60 | *) 61 | SafeString[ByteArray[{239, 187, 191}]] 62 | , 63 | Missing["UnsafeCharacterEncoding_BOM"] 64 | , 65 | TestID->"SafeString-20200103-V9G4Y6" 66 | ] 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/read/byte_buffer.rs: -------------------------------------------------------------------------------- 1 | //! A byte buffer that can return the current byte and advance to the next byte. 2 | 3 | use crate::read::Reader; 4 | 5 | // 6 | // Precondition: buffer is pointing to current byte 7 | // Postcondition: buffer is pointing to 1 byte past current byte 8 | // 9 | // Return current byte 10 | // 11 | pub(crate) fn ByteBuffer_nextByte(session: &mut Reader) -> u8 { 12 | // assert!((session.start <= session.buffer && session.buffer <= session.end)); 13 | 14 | // if session.buffer == session.end { 15 | if session.offset >= session.input.len() { 16 | session.wasEOF = true; 17 | // TODO: Make this return None. 18 | return 0xff; 19 | } 20 | 21 | // session.buffer += 1; 22 | // return *(session.buffer); 23 | 24 | let byte = session.buffer()[0]; 25 | 26 | session.offset += 1; 27 | 28 | return byte; 29 | } 30 | 31 | pub(crate) fn ByteBuffer_currentByte(session: &Reader) -> u8 { 32 | // assert!((session.start <= session.buffer && session.buffer <= session.end)); 33 | 34 | // if session.buffer == session.end { 35 | if session.offset >= session.input.len() { 36 | return 0xff; 37 | } 38 | 39 | // return *(session.buffer); 40 | 41 | return session.buffer()[0]; 42 | } 43 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/symbol.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_upper_case_globals)] 2 | 3 | use wolfram_expr::symbol::SymbolRef; 4 | 5 | pub type Symbol = SymbolRef<'static>; 6 | 7 | 8 | //========================================================== 9 | // Symbol constant declarations 10 | //========================================================== 11 | 12 | macro_rules! symbol { 13 | ($name:ident) => { 14 | pub const $name: Symbol = 15 | unsafe { Symbol::unchecked_new(concat!("System`", stringify!($name))) }; 16 | }; 17 | 18 | ($($name:ident);* $(;)?) => { 19 | $( 20 | $crate::symbol::symbol!($name); 21 | )* 22 | }; 23 | } 24 | 25 | macro_rules! nested_symbol { 26 | ($context:ident :: { $($name:ident),* }) => { 27 | pub mod $context { 28 | $( 29 | pub const $name: $crate::symbol::Symbol = unsafe { 30 | $crate::symbol::Symbol::unchecked_new(concat!( 31 | stringify!($context), 32 | "`", 33 | stringify!($name) 34 | )) 35 | }; 36 | )* 37 | } 38 | }; 39 | 40 | ($($($context:ident ::)+ $name:ident);* $(;)?) => { 41 | $( 42 | symbol!($($context ::)* $name); 43 | )* 44 | }; 45 | } 46 | 47 | 48 | pub(crate) use {nested_symbol, symbol}; 49 | -------------------------------------------------------------------------------- /CodeParser/Resources/Examples/Collatz.m: -------------------------------------------------------------------------------- 1 | BeginPackage["Collatz`"] 2 | 3 | Collatz::usage "Collatz[n] gives a list of the iterates in the 3n+1 problem, 4 | starting from n. The conjecture is that this sequence always 5 | terminates." 6 | (*intentional implicit times*) 7 | 8 | Begin["`Private`"] 9 | 10 | Collatz[1] := {1}; 11 | 12 | Collatz[n_Integer] := Prepend[Collatz[(3 n + 1)/2], n] /; OddQ[n] && n > 0;; 13 | (*intentional ;;*) 14 | 15 | Collatz[n_Integer] := Prepend[Collatz[n/2], n] /; EvenQ[n] && n > 0; 16 | 17 | 18 | (* 19 | The call DummyFunction1[] can be replaced with CallSite[DummyFunction1[]] when profiling 20 | to enable CallSite analysis. 21 | 22 | CallSite analysis enables the profiling of time between when a function is called to when its body is entered. 23 | In this example, the Pause[0.01] would be kept track of. 24 | 25 | The CallSite wrapper is removed during instrumentation and does not affect the result. 26 | 27 | Make sure to call InstrumentProfile with the updated code and to reload the packages under profile. 28 | *) 29 | Collatz[n_Integer] := (DummyFunction1[];Prepend[Collatz[3 n + 1], n]) /; OddQ[n] && n > 0; 30 | 31 | 32 | DummyFunction1[] /; (Pause[0.01];True) := 33 | Module[{}, 34 | Null 35 | ] 36 | 37 | 38 | End[ ] 39 | 40 | EndPackage[ ] -------------------------------------------------------------------------------- /crates/wolfram-parser/src/feature.rs: -------------------------------------------------------------------------------- 1 | //! Constants that are true if the associated cargo feature is enabled. 2 | //! 3 | //! The constants in this module are intended to be used as: 4 | //! 5 | //! ```ignore 6 | //! if feature::CHECK_ABORT { 7 | //! // ... 8 | //! } 9 | //! ``` 10 | //! 11 | //! Using these constants is preferred over the standard alternatives of: 12 | //! 13 | //! ```ignore 14 | //! #[cfg(feature = "CHECK_ABORT")] 15 | //! // ... 16 | //! ``` 17 | //! 18 | //! or: 19 | //! 20 | //! ```ignore 21 | //! if cfg!(feature = "CHECK_ABORT") { 22 | //! // ... 23 | //! } 24 | //! ``` 25 | //! 26 | //! which have the disadvantage that the `feature = "..."` is not validated to 27 | //! correspond to a feature that is declared in Cargo.toml. 28 | //! 29 | //! More generally, using a constant instead of a parse-time `#[cfg(..)]` to 30 | //! disable sections of code has the advantage that the code inside the 31 | //! condition is still validated and type checked, which doesn't happen if 32 | //! `#[cfg(..)]` is used. 33 | //! 34 | //! This makes code controlled by feature flags easier to keep up-to-date as 35 | //! refactoring occurs. 36 | 37 | pub(crate) const COMPUTE_SOURCE: bool = cfg!(feature = "COMPUTE_SOURCE"); 38 | 39 | pub(crate) const CHECK_ABORT: bool = cfg!(feature = "CHECK_ABORT"); 40 | 41 | pub(crate) const FAST_STRING_SCAN: bool = cfg!(feature = "FAST_STRING_SCAN"); 42 | -------------------------------------------------------------------------------- /docs/tokens.md: -------------------------------------------------------------------------------- 1 | 2 | # Tokens 3 | 4 | 5 | ## Terminology 6 | 7 | 8 | trivia: whitespace, newlines, comments [1] 9 | 10 | 11 | 12 | ## Philosophy 13 | 14 | if a token is not something else, then it is prefix 15 | 16 | 17 | 18 | ## TokenEnum encoding 19 | 20 | There are currently ~427 tokens, so 9 bits are required to enumerate them 21 | 22 | 16 bits: 23 | 24 | ``` 25 | fedcba9876543210 26 | ^~~~~~~~~ 27 | Enum bits (9 bits) 28 | ^~ 29 | Group 1 bits (2 bits) 30 | ^~ 31 | Group 2 bits (2 bits) 32 | ^~~ 33 | Unused bits (3 bits) 34 | ``` 35 | 36 | 37 | Within the set of trivia tokens, the values of the enum bits themselves are special because they are used for fast 38 | testing. 39 | 40 | 41 | Group 1: These are all mutually exclusive categories: PossibleBeginning, Closer, Error 42 | 01 PossibleBeginning 43 | 10 Closer 44 | 11 Error 45 | 00 Anything Else 46 | 47 | Other possible categories for Group 1 are: Trivia, InfixOperator, etc. Everything in Group 1 would still 48 | be mutually exclusive. 49 | 50 | 51 | 52 | Group 2: These are all mutually exclusive categories: Empty, DifferentialD 53 | 01 Empty 54 | 10 DifferentialD 55 | 11 (unused) 56 | 00 Anything Else 57 | 58 | 59 | 60 | 61 | 62 | ## References 63 | 64 | [1] https://github.com/dotnet/roslyn/wiki/Roslyn-Overview#syntax-trivia 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /cmake/PacletInfo.cmake: -------------------------------------------------------------------------------- 1 | 2 | macro(CheckPacletInfo) 3 | 4 | if(NOT EXISTS ${WOLFRAMKERNEL}) 5 | message(FATAL_ERROR "WOLFRAMKERNEL does not exist. WOLFRAMKERNEL: ${WOLFRAMKERNEL}") 6 | endif() 7 | 8 | if(LOCAL_BUILD) 9 | message(STATUS "Paclet Version ignored in local build") 10 | set(LOCAL_BUILD_VERSION 999.9) 11 | else() 12 | # 13 | # if not local build, then get Version from PacletInfo.wl 14 | # 15 | execute_process( 16 | COMMAND 17 | ${WOLFRAMKERNEL} -noinit -noprompt -nopaclet -nostartuppaclets -runfirst Pause[${KERNEL_PAUSE}]\;Print[OutputForm[Row[{Version,\ ";",\ WolframVersion}\ /.\ List\ @@\ Get["${PACLETINFO_IN_SOURCE}"]]]]\;Exit[] 18 | OUTPUT_VARIABLE 19 | PACLET_VERSIONS_LIST 20 | OUTPUT_STRIP_TRAILING_WHITESPACE 21 | WORKING_DIRECTORY 22 | ${PROJECT_SOURCE_DIR} 23 | TIMEOUT 24 | ${KERNEL_TIMEOUT} 25 | RESULT_VARIABLE 26 | PACLETINFO_RESULT 27 | ) 28 | 29 | if(NOT ${PACLETINFO_RESULT} EQUAL "0") 30 | message(FATAL_ERROR "Bad exit code from PacletInfo script: ${PACLETINFO_RESULT}") 31 | endif() 32 | 33 | list(GET PACLET_VERSIONS_LIST 0 PACLET_VERSION) 34 | list(GET PACLET_VERSIONS_LIST 1 PACLET_WOLFRAMVERSION) 35 | message(STATUS "PACLET_VERSION: ${PACLET_VERSION}") 36 | message(STATUS "PACLET_WOLFRAMVERSION: ${PACLET_WOLFRAMVERSION}") 37 | 38 | endif(LOCAL_BUILD) 39 | 40 | endmacro(CheckPacletInfo) 41 | -------------------------------------------------------------------------------- /CodeParser/Generate/TokenEnum.wl: -------------------------------------------------------------------------------- 1 | (* ::Package::"Tags"-><|"SuspiciousSessionSymbol" -> <|Enabled -> False|>|>:: *) 2 | 3 | If[!MemberQ[$Path, #], PrependTo[$Path, #]]&[DirectoryName[$InputFileName, 3]] 4 | 5 | BeginPackage["CodeParser`Generate`TokenEnum`"] 6 | 7 | (* Used by Generate/RowBox.wl *) 8 | GroupOpenerToCloser 9 | 10 | 11 | Begin["`Private`"] 12 | 13 | 14 | GroupOpenerToCloser[Token`OpenCurly] = Closer`CloseCurly 15 | GroupOpenerToCloser[Token`LessBar] = Closer`BarGreater 16 | GroupOpenerToCloser[Token`OpenSquare] = Closer`CloseSquare 17 | GroupOpenerToCloser[Token`OpenParen] = Closer`CloseParen 18 | GroupOpenerToCloser[Token`ColonColonOpenSquare] = Closer`CloseSquare 19 | 20 | GroupOpenerToCloser[Token`LongName`LeftAngleBracket] = Closer`LongName`RightAngleBracket 21 | GroupOpenerToCloser[Token`LongName`LeftCeiling] = Closer`LongName`RightCeiling 22 | GroupOpenerToCloser[Token`LongName`LeftFloor] = Closer`LongName`RightFloor 23 | GroupOpenerToCloser[Token`LongName`LeftDoubleBracket] = Closer`LongName`RightDoubleBracket 24 | GroupOpenerToCloser[Token`LongName`LeftBracketingBar] = Closer`LongName`RightBracketingBar 25 | GroupOpenerToCloser[Token`LongName`LeftDoubleBracketingBar] = Closer`LongName`RightDoubleBracketingBar 26 | GroupOpenerToCloser[Token`LongName`LeftAssociation] = Closer`LongName`RightAssociation 27 | GroupOpenerToCloser[Token`LongName`OpenCurlyQuote] = Closer`LongName`CloseCurlyQuote 28 | GroupOpenerToCloser[Token`LongName`OpenCurlyDoubleQuote] = Closer`LongName`CloseCurlyDoubleQuote 29 | 30 | 31 | End[] 32 | 33 | EndPackage[] 34 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Wolfram® 2 | 3 | Thank you for taking the time to contribute to the [Wolfram Research](https://github.com/wolframresearch) repos on GitHub. 4 | 5 | ## Licensing of Contributions 6 | 7 | By contributing to Wolfram, you agree and affirm that: 8 | 9 | > Wolfram may release your contribution under the terms of the [MIT license](https://opensource.org/licenses/MIT); and 10 | 11 | > You have read and agreed to the [Developer Certificate of Origin](http://developercertificate.org/), version 1.1 or later. 12 | 13 | Please see [LICENSE](LICENSE) for licensing conditions pertaining 14 | to individual repositories. 15 | 16 | 17 | ## Bug reports 18 | 19 | ### Security Bugs 20 | 21 | Please **DO NOT** file a public issue regarding a security issue. 22 | Rather, send your report privately to security@wolfram.com. Security 23 | reports are appreciated and we will credit you for it. We do not offer 24 | a security bounty, but the forecast in your neighborhood will be cloudy 25 | with a chance of Wolfram schwag! 26 | 27 | ### General Bugs 28 | 29 | Please use the repository issues page to submit general bug issues. 30 | 31 | Please do not duplicate issues. 32 | 33 | Please do send a complete and well-written report to us. Note: **the 34 | thoroughness of your report will positively correlate to our willingness 35 | and ability to address it**. 36 | 37 | When reporting issues, always include: 38 | 39 | * Your version of *Mathematica*® or the Wolfram Language. 40 | * Your operating system. 41 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/precedence.rs: -------------------------------------------------------------------------------- 1 | use std::num::NonZeroU8; 2 | 3 | /// All levels of precedence 4 | /// 5 | /// The 1's bit denotes the associativity. 6 | #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] 7 | #[repr(transparent)] 8 | pub struct Precedence(NonZeroU8); 9 | 10 | // Verify that Option is the same size as a u8. 11 | const _: () = assert!(std::mem::size_of::>() == 1); 12 | 13 | 14 | impl Precedence { 15 | // TODO(cleanup): Take the precedence value and associativity as separate 16 | // arguments. 17 | pub(crate) const fn new(value: u8) -> Self { 18 | match NonZeroU8::new(value) { 19 | Some(value) => Precedence(value), 20 | None => panic!("invalid Precedence 0 value"), 21 | } 22 | } 23 | 24 | // TODO(cleanup): Make this unnecessary. What does it mean anyway? 25 | fn bits(self) -> u8 { 26 | let Precedence(bits) = self; 27 | 28 | bits.get() 29 | } 30 | 31 | /// Returns true if `lhs` is greater then `rhs`. 32 | pub(crate) fn greater( 33 | lhs: Option, 34 | rhs: Option, 35 | ) -> bool { 36 | let lhs = lhs.map(Precedence::bits).unwrap_or(0); 37 | let rhs = rhs.map(Precedence::bits).unwrap_or(0); 38 | 39 | lhs | 0x1 > rhs 40 | } 41 | } 42 | 43 | impl PartialEq for Option { 44 | fn eq(&self, other: &Precedence) -> bool { 45 | match self { 46 | Some(self_) => self_ == other, 47 | None => false, 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /Tests/SyntaxErrorNodes.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start SyntaxErrorNodes.mt =====\n"] 2 | 3 | Needs["CodeParser`"] 4 | 5 | (* 6 | ExpectedTilde: 7 | *) 8 | 9 | Test[ 10 | CodeParse["a ~f"] 11 | , 12 | ContainerNode[String, { 13 | SyntaxErrorNode[SyntaxError`ExpectedTilde, { 14 | LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>], 15 | LeafNode[Symbol, "f", <|Source -> {{1, 4}, {1, 5}}|>]}, <|Source -> {{1, 1}, {1, 5}}|>] }, <|Source -> {{1, 1}, {1, 5}}|>] 16 | , 17 | TestID->"SyntaxErrorNodes-20190521-T2R4L9" 18 | ] 19 | 20 | Test[ 21 | CodeConcreteParse["~"] 22 | , 23 | ContainerNode[String, { 24 | SyntaxErrorNode[SyntaxError`ExpectedTilde, { 25 | ErrorNode[Token`Error`ExpectedOperand, "", <|Source -> {{1, 1}, {1, 1}}|>], 26 | LeafNode[Token`Tilde, "~", <|Source -> {{1, 1}, {1, 2}}|>], 27 | ErrorNode[Token`Error`ExpectedOperand, "", <|Source -> {{1, 2}, {1, 2}}|>]}, <|Source -> {{1, 1}, {1, 2}}|>]}, <|Source -> {{1, 1}, {1, 2}}|>] 28 | , 29 | TestID->"SyntaxErrorNodes-20200628-O0J0J1" 30 | ] 31 | 32 | 33 | 34 | (* 35 | ExpectedSet: 36 | *) 37 | 38 | Test[ 39 | CodeParse["a /: b * c"] 40 | , 41 | ContainerNode[String, { 42 | SyntaxErrorNode[SyntaxError`ExpectedSet, { 43 | LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>], 44 | CallNode[LeafNode[Symbol, "Times", <||>], { 45 | LeafNode[Symbol, "b", <|Source -> {{1, 6}, {1, 7}}|>], 46 | LeafNode[Symbol, "c", <|Source -> {{1, 10}, {1, 11}}|>]}, <|Source -> {{1, 6}, {1, 11}}|>]}, <|Source -> {{1, 1}, {1, 11}}|>] }, <|Source -> {{1, 1}, {1, 11}}|>] 47 | , 48 | TestID->"SyntaxErrorNodes-20190521-D9G5L2" 49 | ] 50 | 51 | -------------------------------------------------------------------------------- /docs/implementation.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | > One curiosity question I have it why the parsePrefix() and parseInfix() methods were written to return a function pointer, instead of writing them to simply take the (ParserSessionPtr, Token) arguments and compute the result directly. The function pointer seems like pure indirection (I didn't see any place where the returned function pointer wasn't immediately called.) 4 | 5 | 6 | Just looking at the first example I saw: 7 | 8 | ``` 9 | auto P2 = prefixParselets[Tok.Tok.value()]; 10 | 11 | MUSTTAIL 12 | return (P2->parsePrefix())(session, P2, Tok); 13 | ``` 14 | 15 | Are you asking why not just do: 16 | 17 | ``` 18 | return P2->parsePrefix(session, Tok); 19 | ``` 20 | 21 | ? 22 | 23 | This is because the current state-of-the-art for tail calls in Clang is that you cannot make a tail call from flat function to an instance method. 24 | 25 | You can call flat function -> flat function and method instance -> method of same instance, but I think that's it. 26 | 27 | 28 | But I later discovered that I am abusing the current tail-call technology and cannot actually use the `[[clang::musttail]]` stuff right now 29 | (see https://github.com/llvm/llvm-project/issues/56435 that I filed) 30 | 31 | If I made Tokens a lot smaller, I may be able to get away with not breaking anything at -O2 but I haven't done that yet. 32 | 33 | So none of the tail-call stuff is even being used right now. 34 | 35 | I wonder if it would be possible to do `P2->parsePrefix(session, Tok)` or similar if not compiling with MUSTTAIL. 36 | 37 | I have no doubt that deep C++ magic involving `std::bind` or something may be technically possible. 38 | -------------------------------------------------------------------------------- /CodeParser/Kernel/Quirks.wl: -------------------------------------------------------------------------------- 1 | (* ::Package::"Tags"-><|"NoVariables" -> <|"Module" -> <|Enabled -> False|>|>|>:: *) 2 | 3 | BeginPackage["CodeParser`Quirks`"] 4 | 5 | setupQuirks 6 | 7 | 8 | $Quirks 9 | 10 | 11 | processInfixBinaryAtQuirk 12 | 13 | 14 | Begin["`Private`"] 15 | 16 | Needs["CodeParser`"] 17 | 18 | 19 | 20 | setupQuirks[] := 21 | Module[{}, 22 | 23 | $Quirks = <||>; 24 | 25 | (* 26 | Setup "FlattenTimes" quirk 27 | 28 | In 12.1 and before: 29 | a / b / c is parsed as Times[a, Power[b, -1], Power[c, -1]] 30 | -a / b is parsed as Times[-1, a, Power[b, -1]] 31 | 32 | In 12.2 and after: 33 | a / b / c is parsed as Times[Times[a, Power[b, -1]], Power[c, -1]] 34 | -a / b is parsed as Times[Times[-1, a], Power[b, -1]] 35 | 36 | TODO: when targeting v12.2 as a minimum, remove this quirk 37 | 38 | Related bugs: 57064, 139531, 153875, 160919 39 | *) 40 | If[$VersionNumber <= 12.1, 41 | $Quirks["FlattenTimes"] = True 42 | ]; 43 | 44 | (* 45 | Setup "InfixBinaryAt" quirk 46 | 47 | The kernel parses a<>StringJoin@b as StringJoin[a, b] 48 | 49 | Most infix operators can be used with this syntax. 50 | Notably, SameQ and UnsameQ do NOT work with this syntax. 51 | 52 | Related bugs: 365013 53 | *) 54 | $Quirks["InfixBinaryAt"] = True; 55 | 56 | (* 57 | changed in 13.1: 58 | @@@ 59 | 60 | In 13.0 and before: 61 | a @@@ b parsed as Apply[a, b, {1}] 62 | 63 | In 13.1 and after: 64 | a @@@ b parses as MapApply[a, b] 65 | *) 66 | If[$VersionNumber <= 13.0, 67 | $Quirks["OldAtAtAt"] = True 68 | ]; 69 | ] 70 | 71 | 72 | 73 | 74 | End[] 75 | 76 | EndPackage[] 77 | -------------------------------------------------------------------------------- /CodeTools/Generate/CreatePacletArchive.wl: -------------------------------------------------------------------------------- 1 | 2 | If[!MemberQ[$Path, #], PrependTo[$Path, #]]&[DirectoryName[$InputFileName, 3]] 3 | 4 | BeginPackage["CodeTools`Generate`CreatePacletArchive`"] 5 | 6 | Begin["`Private`"] 7 | 8 | (* 9 | Do not allow PacletManager to participate in finding `Generate` files 10 | 11 | PacletManager will find e.g. CodeParser/Kernel/TokenEnum.wl when asked to find CodeParser`Generate`TokenEnum` 12 | 13 | related issues: PACMAN-54 14 | *) 15 | Block[{Internal`PacletFindFile = Null&}, 16 | Needs["CodeTools`Generate`GenerateSources`"]; 17 | ] 18 | If[$VersionNumber < 12.1, 19 | Needs["PacletManager`"] 20 | ] 21 | 22 | checkBuildDir[] 23 | checkPaclet[] 24 | checkPacletLayoutDir[] 25 | 26 | 27 | If[retry, 28 | (* 29 | CreatePacletArchive may be slow on RE machines, so allow re-trying if JLink connection timeout is hit 30 | 31 | Set $connectTimeout to some large value and cross fingers (default is 20) 32 | 33 | See: RE-515885 34 | *) 35 | Needs["JLink`"]; 36 | JLink`InstallJava`Private`$connectTimeout = 300.0 37 | ] 38 | 39 | 40 | generate[] := ( 41 | 42 | Print["Calling CreatePacletArchive..."]; 43 | 44 | If[$VersionNumber >= 12.1, 45 | res = System`CreatePacletArchive[FileNameJoin[{pacletLayoutDir, paclet}], FileNameJoin[{buildDir, "paclet"}]] 46 | , 47 | res = PacletManager`PackPaclet[FileNameJoin[{pacletLayoutDir, paclet}], FileNameJoin[{buildDir, "paclet"}]] 48 | ]; 49 | 50 | Print[res]; 51 | 52 | If[!StringQ[res], 53 | Quit[1] 54 | ]; 55 | 56 | Print["Done CreatePacletArchive"] 57 | ) 58 | 59 | If[!StringQ[script], 60 | Quit[1] 61 | ] 62 | If[AbsoluteFileName[script] === AbsoluteFileName[$InputFileName], 63 | generate[] 64 | ] 65 | 66 | End[] 67 | 68 | EndPackage[] 69 | -------------------------------------------------------------------------------- /HowToBuild.md: -------------------------------------------------------------------------------- 1 | # Building 2 | 3 | CodeParser uses a Wolfram Language kernel to generate code at build time and a 4 | Rust compiler to compile a native library. 5 | 6 | CodeParser uses CMake to generate build scripts. 7 | 8 | Here is an example transcript using the default make generator to build CodeParser: 9 | ``` 10 | cd codeparser 11 | mkdir build 12 | cd build 13 | cmake .. 14 | cmake --build . 15 | ``` 16 | 17 | The result is a directory named `paclet` that contains the WL package source code and a built CodeParser `.paclet` file for installing. 18 | 19 | Inside a kernel session you may then install the paclet by evaluating: 20 | ``` 21 | PacletInstall["/path/to/build/paclet/CodeParser-1.10.paclet"] 22 | ``` 23 | 24 | Specify `MATHEMATICA_INSTALL_DIR` if you have Wolfram System installed in a non-default location: 25 | ``` 26 | cmake -DMATHEMATICA_INSTALL_DIR=/Applications/Mathematica.app/Contents/ .. 27 | cmake --build . 28 | ``` 29 | 30 | On Windows: 31 | ``` 32 | cmake -DMATHEMATICA_INSTALL_DIR="C:/Program Files/Wolfram Research/Mathematica/13.1" .. 33 | cmake --build . 34 | ``` 35 | 36 | ## Installing 37 | 38 | You can install the paclet from CMake: 39 | ``` 40 | cmake --install . 41 | ``` 42 | 43 | This starts a kernel and calls `PacletInstall` with the built .paclet file. 44 | 45 | 46 | ## Troubleshooting 47 | 48 | ### "building for macOS-arm64 but attempting to link with file built for macOS-x86_64" 49 | 50 | You see this error during building: 51 | ``` 52 | ld: warning: ignoring file /Applications/Mathematica.app/Contents/SystemFiles/Links/MathLink/DeveloperKit/MacOSX-x86-64/CompilerAdditions/mathlink.framework/mathlink, building for macOS-arm64 but attempting to link with file built for macOS-x86_64 53 | ``` 54 | 55 | You most likely need to specify: 56 | ``` 57 | -DCMAKE_OSX_ARCHITECTURES=x86_64 58 | ``` 59 | -------------------------------------------------------------------------------- /docs/characters.md: -------------------------------------------------------------------------------- 1 | 2 | # Characters 3 | 4 | 5 | ## Philosophy 6 | 7 | if a character is not something else, then it is letterlike 8 | 9 | 10 | 11 | ## Character Encodings 12 | 13 | 14 | UTF-8 input is assumed everywhere. 15 | 16 | There is an API function SafeString that will accept an array of bytes and return a "safe" string, i.e., a string that has assumed UTF-8 input with these changes: 17 | 18 | Any invalid byte sequences are converted into \[UnknownGlyph] 19 | 20 | Any high or low surrogates are converted into \[UnknownGlyph] 21 | 22 | BOM character 0xfeff is converted into 0xe001, to allow transferring through MathLink. 23 | Related bugs: 366106 24 | 25 | 26 | 27 | 28 | 29 | 30 | ## Raw 31 | 32 | characters like \[RawReturn] are a way of escaping that character 33 | 34 | Poorly understood 35 | 36 | Perhaps essentially unused 37 | 38 | 39 | 40 | A good philosophy that I follow is to treat the Raw characters as escaped versions of their normal characters 41 | 42 | \[RawTab] is similar to \t, and is NOT the same as actual 0x09 character 43 | \[NewLine] is similar to \n, and is NOT the same as actual 0x0a character 44 | etc. 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | ## WLCharacter encoding 54 | 55 | 32 bits: 56 | ``` 57 | vutsrqponmlkjihgfedcba9876543210 58 | ^~~~~~~~~~~~~~~~~~~~~ 59 | Character bits (21 bits) 60 | ^ 61 | Sign bit 62 | ^~~ 63 | EscapeStyle bits (3 bits) 64 | ^~~~~~~ 65 | Unused (7 bits) 66 | ``` 67 | 68 | 69 | 70 | ## Private Use Area 71 | 72 | No attempt will be made to define or describe characters in the PUA. 73 | 74 | The FE defines a number of PUA characters for its own internal use. 75 | 76 | This is not a binding contract and usage, values, behavior, and stability is subject to change at any moment. 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /docs/Development.md: -------------------------------------------------------------------------------- 1 | # Development 2 | 3 | ## Quick Command Reference 4 | 5 | #### Build the CodeParser paclet: 6 | 7 | ```shell 8 | $ cmake -S . -B build -DMATHEMATICA_INSTALL_DIR=/Applications/Wolfram/Mathematica-13.1.0.app/Contents/ 9 | $ cmake --build build 10 | ``` 11 | 12 | Intermediate compiled library artifacts will be built into the `./target` 13 | directory, and a copy will be placed in the `CodeParser/LibraryResources/` 14 | subdirectory of the built CodeParser paclet. 15 | 16 | #### Run the compiled tests: 17 | 18 | Run the compiled Rust library tests using `cargo`: 19 | 20 | ```shell 21 | $ cargo test 22 | ``` 23 | 24 | #### Run the Wolfram tests: 25 | 26 | After building CodeParser, tests written in Wolfram can be run from the command 27 | line using the 28 | [`wolfram-cli paclet test`](https://github.com/ConnorGray/wolfram-cli) tool: 29 | 30 | ```shell 31 | $ wolfram-cli paclet test build/paclet/CodeParser Tests/TestSuite.mt 32 | ``` 33 | 34 | #### Run the wolfram-parser benchmarks: 35 | 36 | To run the benchmarks, execute: 37 | 38 | ```shell 39 | $ cargo bench 40 | ``` 41 | 42 | Re-running the benchmarks will print out comparision statistics between the latest 43 | and most recent previous benchmark run. See also 44 | [criterion.rs](https://github.com/bheisler/criterion.rs). 45 | 46 | 47 | ## Testing 48 | 49 | CodeParser has two test suites: 50 | 51 | 1. Tests written in Rust, primarily located in [crates/wolfram-parser/src/tests/](../crates/wolfram-parser/src/tests/). 52 | 2. Tests written in Wolfram, primarily located in [Tests](../Tests/). 53 | 54 | ## Benchmarking 55 | 56 | To capture a named baseline benchmark, execute: 57 | 58 | ```shell 59 | $ cargo bench -p wolfram-parser -- --save-baseline 60 | ``` 61 | 62 | Then, to run benchmarks that show comparision results compared to that captured 63 | baseline, execute: 64 | 65 | ```shell 66 | $ cargo bench -p wolfram-parser -- --baseline master 67 | ``` 68 | 69 | -------------------------------------------------------------------------------- /CodeParser/Kernel/Shims.wl: -------------------------------------------------------------------------------- 1 | BeginPackage["CodeParser`Shims`"] 2 | 3 | setupShims 4 | 5 | cleanupStackShimMemoryLeak 6 | 7 | 8 | Begin["`Private`"] 9 | 10 | 11 | setupShims[] := ( 12 | Which[ 13 | $VersionNumber < 12.1, 14 | setupStackShim[] 15 | , 16 | (* 17 | Some weird problem is causing: 18 | DataStructure::nods: Stack is not a known DataStructure. 19 | 20 | Fall-back on shims 21 | *) 22 | FailureQ[Quiet[System`CreateDataStructure["Stack"], {DataStructure::nods}]], 23 | setupStackShim[] 24 | ] 25 | ) 26 | 27 | 28 | setupStackShim[] := ( 29 | 30 | (* 31 | For versions before 12.1, we implement our own stack to store top-level expressions. 32 | 33 | The push, pop and peek operations take O(1), while Normal takes O(n). 34 | *) 35 | 36 | (* 37 | Define CreateDataStructure for earlier versions 38 | *) 39 | System`CreateDataStructure["Stack"] := 40 | Module[{stack, stackVal, stackDepth, stackCons}, 41 | 42 | stackVal = stackCons[]; 43 | 44 | stackDepth = 0; 45 | 46 | stack /: stack["Push", expr_] := ( 47 | stackVal = stackCons[stackVal, expr]; 48 | stackDepth += 1; 49 | Null 50 | ); 51 | 52 | stack /: stack["Pop"] := 53 | Module[{tmp}, 54 | If[stackDepth != 0, 55 | stackDepth -= 1; 56 | {stackVal, tmp} = List @@ stackVal; 57 | tmp 58 | ] 59 | ]; 60 | 61 | stack /: stack["Peek"] := Last[stackVal]; 62 | 63 | stack /: Normal[stack] := ( 64 | Flatten[{stackVal}, Infinity, stackCons] 65 | ); 66 | 67 | stack /: stack["Length"] := stackDepth; 68 | 69 | stack 70 | ]; 71 | 72 | cleanupStackShimMemoryLeak[] := ( 73 | (* 74 | Hack to prevent memory leak with shims 75 | *) 76 | Quiet[Remove["CodeParser`Shims`Private`stack*$*"];, {Remove::rmnsm}]; 77 | ) 78 | ) 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | End[] 89 | 90 | EndPackage[] 91 | -------------------------------------------------------------------------------- /cmake/WolframScript.cmake: -------------------------------------------------------------------------------- 1 | 2 | if(NOT EXISTS ${WOLFRAMKERNEL}) 3 | message(FATAL_ERROR "WOLFRAMKERNEL does not exist. WOLFRAMKERNEL: ${WOLFRAMKERNEL}") 4 | endif() 5 | 6 | if(NOT DEFINED RETRY_ON_FAILURE) 7 | set(RETRY_ON_FAILURE OFF) 8 | endif() 9 | 10 | if(NOT EXISTS ${SCRIPT}) 11 | message(FATAL_ERROR "SCRIPT does not exist. SCRIPT: ${SCRIPT}") 12 | endif() 13 | 14 | file(READ ${SCRIPT} script) 15 | 16 | if(script STREQUAL "") 17 | message(FATAL_ERROR "SCRIPT is empty. SCRIPT: ${SCRIPT}") 18 | endif() 19 | 20 | if(RETRY_ON_FAILURE) 21 | 22 | # 23 | # try twice 24 | # 25 | 26 | execute_process( 27 | COMMAND 28 | ${WOLFRAMKERNEL} -script ${SCRIPT} -srcDir ${SRCDIR} -buildDir ${BUILDDIR} -pacletLayoutDir ${PACLET_LAYOUT_DIR} -paclet ${PACLET} 29 | TIMEOUT 30 | ${KERNEL_TIMEOUT} 31 | RESULT_VARIABLE 32 | SCRIPT_RESULT 33 | ) 34 | 35 | if(NOT ${SCRIPT_RESULT} EQUAL "0") 36 | message(WARNING "First try: Bad exit code from script: ${SCRIPT_RESULT}; retrying...") 37 | 38 | execute_process( 39 | COMMAND 40 | ${WOLFRAMKERNEL} -retry -script ${SCRIPT} -srcDir ${SRCDIR} -buildDir ${BUILDDIR} -pacletLayoutDir ${PACLET_LAYOUT_DIR} -paclet ${PACLET} 41 | TIMEOUT 42 | ${KERNEL_TIMEOUT} 43 | RESULT_VARIABLE 44 | SCRIPT_RESULT 45 | ) 46 | 47 | if(NOT ${SCRIPT_RESULT} EQUAL "0") 48 | message(FATAL_ERROR "Second try: Bad exit code from script: ${SCRIPT_RESULT}; stopping") 49 | else() 50 | message(STATUS "Second try: Success!") 51 | endif() 52 | 53 | endif() 54 | 55 | else(RETRY_ON_FAILURE) 56 | 57 | # 58 | # only try once 59 | # 60 | 61 | execute_process( 62 | COMMAND 63 | ${WOLFRAMKERNEL} -script ${SCRIPT} -srcDir ${SRCDIR} -buildDir ${BUILDDIR} -pacletLayoutDir ${PACLET_LAYOUT_DIR} -paclet ${PACLET} 64 | TIMEOUT 65 | ${KERNEL_TIMEOUT} 66 | RESULT_VARIABLE 67 | SCRIPT_RESULT 68 | ) 69 | 70 | if(NOT ${SCRIPT_RESULT} EQUAL "0") 71 | message(FATAL_ERROR "Bad exit code from script: ${SCRIPT_RESULT} (script was ${SCRIPT})") 72 | endif() 73 | 74 | endif() 75 | -------------------------------------------------------------------------------- /.github/workflows/workflow.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: CI 4 | 5 | # Controls when the action will run. Triggers the workflow on push or pull request 6 | # events but only for the master branch 7 | on: 8 | push: 9 | branches: 10 | - master 11 | pull_request: 12 | branches: 13 | - master 14 | 15 | env: 16 | WOLFRAM_ID: ${{ secrets.WOLFRAM_ID }} 17 | WOLFRAM_PW: ${{ secrets.WOLFRAM_PW }} 18 | 19 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 20 | jobs: 21 | # This workflow contains a single job called "build" 22 | build: 23 | name: "Build Code Parser" 24 | # The type of runner that the job will run on 25 | runs-on: ubuntu-latest 26 | # Steps represent a sequence of tasks that will be executed as part of the job 27 | steps: 28 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 29 | - uses: actions/checkout@v2 30 | 31 | - name: Install Dependencies 32 | run: | 33 | sudo apt update 34 | sudo apt install -y wget cmake 35 | 36 | - name: Install Wolfram Engine 37 | run: | 38 | wget https://account.wolfram.com/download/public/wolfram-engine/desktop/LINUX 39 | sudo bash LINUX -- -auto -verbose 40 | rm LINUX 41 | 42 | - name: Activate Wolfram Engine 43 | run: | 44 | /usr/bin/wolframscript -authenticate $WOLFRAM_ID $WOLFRAM_PW 45 | /usr/bin/wolframscript -activate 46 | 47 | - name: Build CodeParser 48 | run: | 49 | mkdir build 50 | cd build 51 | cmake .. -DMATHEMATICA_INSTALL_DIR="/usr/local/Wolfram/WolframEngine/13.1" 52 | cmake --build . --target paclet 53 | 54 | - name: Install Paclet 55 | # TODO: find a way to specify the name of the paclet file instead of hardcoding. 56 | run: | 57 | ls build 58 | /usr/bin/wolframscript -code 'PacletInstall["./build/paclet/CodeParser-1.7.paclet"];Exit[]' 59 | 60 | - name: Run Tests 61 | run: | 62 | pwd 63 | /usr/bin/wolframscript -file .github/workflows/run_tests.wls 64 | -------------------------------------------------------------------------------- /Tests/Aggregate.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start Aggregate.mt =====\n"] 2 | 3 | Needs["CodeParser`"] 4 | Needs["CodeParser`Folds`"] (* For aggregate *) 5 | 6 | Test[Context[aggregate], "CodeParser`Folds`"] 7 | 8 | Test[ 9 | aggregate @ ContainerNode[String, { 10 | InfixNode[Plus, { 11 | LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>], 12 | LeafNode[Whitespace, " ", <|Source -> {{1, 2}, {1, 3}}|>], 13 | LeafNode[Token`Plus, "+", <|Source -> {{1, 3}, {1, 4}}|>], 14 | LeafNode[Whitespace, " ", <|Source -> {{1, 4}, {1, 5}}|>], 15 | LeafNode[Symbol, "b", <|Source -> {{1, 5}, {1, 6}}|>] 16 | }, <|Source -> {{1, 1}, {1, 6}}|>] 17 | }, <|Source -> {{1, 1}, {1, 6}}|>] 18 | , 19 | ContainerNode[String, { 20 | InfixNode[Plus, { 21 | LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>], 22 | LeafNode[Token`Plus, "+", <|Source -> {{1, 3}, {1, 4}}|>], 23 | LeafNode[Symbol, "b", <|Source -> {{1, 5}, {1, 6}}|>] 24 | }, <|Source -> {{1, 1}, {1, 6}}|>] 25 | }, <|Source -> {{1, 1}, {1, 6}}|>] 26 | ] 27 | 28 | (*-------------------------------------------*) 29 | (* Test aggregate[..] of a non-ContainerNode *) 30 | (*-------------------------------------------*) 31 | 32 | (* aggregate[..] of nodes with line:column positions *) 33 | Test[ 34 | aggregate @ InfixNode[Plus, { 35 | LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>], 36 | LeafNode[Whitespace, " ", <|Source -> {{1, 2}, {1, 3}}|>], 37 | LeafNode[Token`Plus, "+", <|Source -> {{1, 3}, {1, 4}}|>], 38 | LeafNode[Whitespace, " ", <|Source -> {{1, 4}, {1, 5}}|>], 39 | LeafNode[Symbol, "b", <|Source -> {{1, 5}, {1, 6}}|>] 40 | }, <|Source -> {{1, 1}, {1, 6}}|>] 41 | , 42 | InfixNode[Plus, { 43 | LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>], 44 | LeafNode[Token`Plus, "+", <|Source -> {{1, 3}, {1, 4}}|>], 45 | LeafNode[Symbol, "b", <|Source -> {{1, 5}, {1, 6}}|>] 46 | }, <|Source -> {{1, 1}, {1, 6}}|>] 47 | ] 48 | 49 | (*----------------------------------------*) 50 | (* Test aggregate[..] of individual tokes *) 51 | (*----------------------------------------*) 52 | 53 | With[{ 54 | symbolTok = LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>] 55 | }, 56 | Test[aggregate[symbolTok], symbolTok] 57 | ] 58 | 59 | Test[ 60 | aggregate @ LeafNode[Whitespace, " ", <|Source -> {{1, 2}, {1, 3}}|>], 61 | Nothing 62 | ] 63 | -------------------------------------------------------------------------------- /crates/wolfram-parser/benches/bench_fast_string_scan.rs: -------------------------------------------------------------------------------- 1 | use std::{fs, path::Path}; 2 | 3 | use criterion::{criterion_group, criterion_main, Criterion, SamplingMode}; 4 | 5 | use wolfram_parser::ParseOptions; 6 | 7 | fn parse_tokens(input: &str) { 8 | parse_tokens_u8(input.as_bytes()) 9 | } 10 | 11 | fn parse_tokens_u8(input: &[u8]) { 12 | wolfram_parser::tokenize_bytes(input, &ParseOptions::default()).unwrap(); 13 | } 14 | 15 | fn benchmark(c: &mut Criterion) { 16 | println!("\n==== Legend ===="); 17 | println!("FSS: FAST_STRING_SCAN = true"); 18 | println!("================\n"); 19 | 20 | c.bench_function("[FSS] tokenize 2 + 2", |b| { 21 | b.iter(|| parse_tokens("2 + 2")) 22 | }); 23 | 24 | let boxes_wl = include_str!("../../../CodeParser/Kernel/Boxes.wl"); 25 | c.bench_function("[FSS] tokenize Boxes.wl", |b| { 26 | b.iter(|| parse_tokens(boxes_wl)) 27 | }); 28 | 29 | benchmark_large_files(c); 30 | } 31 | 32 | fn benchmark_large_files(c: &mut Criterion) { 33 | let mut group = c.benchmark_group("large files"); 34 | group.sampling_mode(SamplingMode::Flat); 35 | group.sample_size(10); 36 | 37 | //------------ 38 | // Large files 39 | //------------ 40 | 41 | let relief_plot = 42 | fs::read(Path::new("../Tests/files/large/ReliefPlot.nb")).unwrap(); 43 | group.bench_function("[FSS] tokenize ReliefPlot.nb", |b| { 44 | b.iter(|| parse_tokens_u8(&relief_plot)) 45 | }); 46 | 47 | let expanded_company_data_new = 48 | fs::read(Path::new("../Tests/files/large/expandedCompanyDataNew1.m")) 49 | .unwrap(); 50 | group.bench_function("[FSS] tokenize expandedCompanyDataNew1.m", |b| { 51 | b.iter(|| parse_tokens_u8(&expanded_company_data_new)) 52 | }); 53 | 54 | //------------- 55 | // Medium files 56 | //------------- 57 | 58 | group.sampling_mode(SamplingMode::Auto); 59 | group.sample_size(30); 60 | 61 | let geomagnetic_models = 62 | fs::read(Path::new("../Tests/files/large/geomagneticmodels.m")) 63 | .unwrap(); 64 | group.bench_function("[FSS] tokenize geomagneticmodels.m", |b| { 65 | b.iter(|| parse_tokens_u8(&geomagnetic_models)) 66 | }); 67 | } 68 | 69 | criterion_group!(benches, benchmark); 70 | criterion_main!(benches); 71 | -------------------------------------------------------------------------------- /crates/wolfram-parser/benches/bench_general.rs: -------------------------------------------------------------------------------- 1 | use std::{fs, path::Path}; 2 | 3 | use criterion::{criterion_group, criterion_main, Criterion, SamplingMode}; 4 | 5 | use wolfram_parser::ParseOptions; 6 | 7 | fn tokenize(input: &str) { 8 | tokenize_bytes(input.as_bytes()) 9 | } 10 | 11 | fn tokenize_bytes(input: &[u8]) { 12 | wolfram_parser::tokenize_bytes(input, &ParseOptions::default()).unwrap(); 13 | } 14 | 15 | fn parse(input: &str) { 16 | wolfram_parser::parse_cst_seq(input, &ParseOptions::default()); 17 | } 18 | 19 | fn parse_bytes(input: &[u8]) { 20 | wolfram_parser::parse_bytes_cst_seq(input, &ParseOptions::default()); 21 | } 22 | 23 | fn benchmark(c: &mut Criterion) { 24 | c.bench_function("tokenize 2 + 2", |b| b.iter(|| tokenize("2 + 2"))); 25 | 26 | let boxes_wl = include_str!("../../../CodeParser/Kernel/Boxes.wl"); 27 | c.bench_function("tokenize Boxes.wl", |b| b.iter(|| tokenize(boxes_wl))); 28 | c.bench_function("parse CST of Boxes.wl", |b| b.iter(|| parse(boxes_wl))); 29 | 30 | benchmark_large_files(c); 31 | } 32 | 33 | fn benchmark_large_files(c: &mut Criterion) { 34 | let mut group = c.benchmark_group("large files"); 35 | group.sampling_mode(SamplingMode::Flat); 36 | group.sample_size(10); 37 | 38 | //------------ 39 | // Large files 40 | //------------ 41 | 42 | let relief_plot = 43 | fs::read(Path::new("../../Tests/files/large/ReliefPlot.nb")).unwrap(); 44 | group.bench_function("parse CST of ReliefPlot.nb", |b| { 45 | b.iter(|| parse_bytes(&relief_plot)) 46 | }); 47 | 48 | let expanded_company_data_new = fs::read(Path::new( 49 | "../../Tests/files/large/expandedCompanyDataNew1.m", 50 | )) 51 | .unwrap(); 52 | group.bench_function("parse CST of expandedCompanyDataNew1.m", |b| { 53 | b.iter(|| parse_bytes(&expanded_company_data_new)) 54 | }); 55 | 56 | //------------- 57 | // Medium files 58 | //------------- 59 | 60 | group.sampling_mode(SamplingMode::Auto); 61 | group.sample_size(30); 62 | 63 | let geomagnetic_models = 64 | fs::read(Path::new("../../Tests/files/large/geomagneticmodels.m")) 65 | .unwrap(); 66 | group.bench_function("parse CST of geomagneticmodels.m", |b| { 67 | b.iter(|| parse_bytes(&geomagnetic_models)) 68 | }); 69 | } 70 | 71 | criterion_group!(benches, benchmark); 72 | criterion_main!(benches); 73 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/parse/parse_tests/test_parselet.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | cst::Cst, 3 | parse::{ParseBuilder, ParserSession}, 4 | parse_cst::ParseCst, 5 | ParseOptions, 6 | }; 7 | 8 | 9 | #[test] 10 | fn ParseletTest_Bug1() { 11 | let strIn = "a /: b := c"; 12 | 13 | let builder = ParseCst::new_builder(); 14 | 15 | let mut session = 16 | ParserSession::new(strIn.as_bytes(), builder, &ParseOptions::default()); 17 | 18 | let tok = session.tokenizer.peek_token(); 19 | 20 | let () = session.parse_prefix(tok); 21 | 22 | let P = session.builder.top_node(); 23 | 24 | assert_eq!(session.tokenizer.non_fatal_issues.len(), 0); 25 | assert_eq!(session.tokenizer.fatal_issues.len(), 0); 26 | 27 | assert!(matches!(P, Cst::Ternary(_))); 28 | } 29 | 30 | // 31 | // This used to assert 32 | // 33 | #[test] 34 | fn ParseletTest_Bug2() { 35 | // 36 | let strIn = "a] 16 | *) 17 | 18 | TestMatch[ 19 | CodeConcreteParse["\"\\:f3a2\""] 20 | , 21 | ContainerNode[String, { 22 | LeafNode[String, "\"\\:f3a2\"", <|Source -> {{1, 1}, {1, 9}}|>] }, _] 23 | , 24 | TestID->"Characters-20190601-E6Q0I8" 25 | ] 26 | 27 | 28 | (* 29 | \r and \[RawReturn] 30 | *) 31 | Test[ 32 | "\"\\r\"" 33 | , 34 | Null 35 | , 36 | EquivalenceFunction -> parseEquivalenceFunction 37 | , 38 | TestID->"Characters-20181115-M4K2F9" 39 | ] 40 | 41 | Test[ 42 | "\"\\[RawReturn]\"" 43 | , 44 | Null 45 | , 46 | EquivalenceFunction -> parseEquivalenceFunction 47 | , 48 | TestID->"Characters-20181115-A3F2Z1" 49 | ] 50 | 51 | Test[ 52 | "\"\\:000d\"" 53 | , 54 | Null 55 | , 56 | EquivalenceFunction -> parseEquivalenceFunction 57 | , 58 | TestID->"Characters-20190126-A6E4K4" 59 | ] 60 | 61 | 62 | 63 | (* 64 | \[RawDoubleQuote] 65 | *) 66 | Test[ 67 | "\"\\[RawDoubleQuote]\"" 68 | , 69 | Null 70 | , 71 | EquivalenceFunction -> parseEquivalenceFunction 72 | , 73 | TestID->"Characters-20190126-S9D1H2" 74 | ] 75 | 76 | Test[ 77 | "\"\\:0022\"" 78 | , 79 | Null 80 | , 81 | EquivalenceFunction -> parseEquivalenceFunction 82 | , 83 | TestID->"Characters-20190126-O0I4X0" 84 | ] 85 | 86 | 87 | 88 | (* 89 | \[RawBackslash] 90 | *) 91 | Test[ 92 | "\"\\[RawBackslash]\"" 93 | , 94 | Null 95 | , 96 | EquivalenceFunction -> parseEquivalenceFunction 97 | , 98 | TestID->"Characters-20190126-T0Y0O1" 99 | ] 100 | 101 | Test[ 102 | "\"\\:005c\"" 103 | , 104 | Null 105 | , 106 | EquivalenceFunction -> parseEquivalenceFunction 107 | , 108 | TestID->"Characters-20190126-F7Z5P8" 109 | ] 110 | 111 | 112 | 113 | 114 | 115 | Test[ 116 | "\"\\.00\"" 117 | , 118 | Null 119 | , 120 | EquivalenceFunction -> parseEquivalenceFunction 121 | , 122 | TestID->"Characters-20190128-I9O3D9" 123 | ] 124 | 125 | 126 | Test[ 127 | "\"\\|010023\"" 128 | , 129 | Null 130 | , 131 | EquivalenceFunction -> parseEquivalenceFunction 132 | , 133 | TestID->"Characters-20190129-O8S8M2" 134 | ] 135 | 136 | 137 | 138 | 139 | 140 | 141 | -------------------------------------------------------------------------------- /Tests/CodeParser.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start CodeParser.mt =====\n"] 2 | 3 | Needs["CodeParser`"] 4 | 5 | Needs["PacletManager`"] (* for PacletInformation *) 6 | 7 | 8 | (* 9 | 10 | Test options 11 | 12 | *) 13 | 14 | 15 | 16 | (* 17 | TODO: when targeting 12.1 as a minimum, then look into doing paclet["AssetLocation", "LibraryResources"] or similar 18 | *) 19 | location = "Location" /. PacletInformation["CodeParser"] 20 | 21 | pacletInfoFile = FileNameJoin[{location, "PacletInfo.wl"}] 22 | 23 | Block[{$ContextPath = {"PacletManager`", "System`"}, $Context = "Global`"}, 24 | (* 25 | put PacletManager` on $ContextPath to guarantee using PacletManager`Paclet symbol 26 | *) 27 | pacletInfo = Get[pacletInfoFile]; 28 | ] 29 | 30 | transport = Transport /. List @@ pacletInfo; 31 | 32 | 33 | 34 | 35 | TestMatch[ 36 | CodeParse["Plot[f[x,y],{x,0,1},{y,0,1},PlotRange\[Rule]All];", SourceConvention -> "Test"] 37 | , 38 | _Failure 39 | , 40 | Which[ 41 | transport === "ExprLib", 42 | {LibraryFunction::unevaluated} 43 | , 44 | transport === "MathLink", 45 | {} 46 | ] 47 | , 48 | TestID->"CodeParser-20200312-G4J9U7" 49 | ] 50 | 51 | 52 | 53 | Test[ 54 | CodeParse["\\[Pi]"] 55 | , 56 | ContainerNode[String, { 57 | LeafNode[Symbol, "Pi", <|Source->{{1,1}, {1,6}}|>]}, <|Source->{{1,1}, {1,6}}|>] 58 | , 59 | TestID->"CodeParser-20220910-I3Q6U1" 60 | ] 61 | 62 | Test[ 63 | CodeParse["\\[Degree]"] 64 | , 65 | ContainerNode[String, { 66 | LeafNode[Symbol, "Degree", <|Source->{{1,1}, {1,10}}|>]}, <|Source->{{1,1}, {1,10}}|>] 67 | , 68 | TestID->"CodeParser-20220910-Z3K4F3" 69 | ] 70 | 71 | Test[ 72 | CodeParse["\\[Infinity]"] 73 | , 74 | ContainerNode[String, { 75 | LeafNode[Symbol, "Infinity", <|Source->{{1,1}, {1,12}}|>]}, <|Source->{{1,1}, {1,12}}|>] 76 | , 77 | TestID->"CodeParser-20220910-T2T3W7" 78 | ] 79 | 80 | Test[ 81 | CodeParse["\\[ExponentialE]"] 82 | , 83 | ContainerNode[String, { 84 | LeafNode[Symbol, "E", <|Source->{{1,1}, {1,16}}|>]}, <|Source->{{1,1}, {1,16}}|>] 85 | , 86 | TestID->"CodeParser-20220910-H2B2B6" 87 | ] 88 | 89 | Test[ 90 | CodeParse["\\[ImaginaryI]"] 91 | , 92 | ContainerNode[String, { 93 | LeafNode[Symbol, "I", <|Source->{{1,1}, {1,14}}|>]}, <|Source->{{1,1}, {1,14}}|>] 94 | , 95 | TestID->"CodeParser-20220910-M6R5R1" 96 | ] 97 | 98 | Test[ 99 | CodeParse["\\[ImaginaryJ]"] 100 | , 101 | ContainerNode[String, { 102 | LeafNode[Symbol, "I", <|Source->{{1,1}, {1,14}}|>]}, <|Source->{{1,1}, {1,14}}|>] 103 | , 104 | TestID->"CodeParser-20220910-C4S7C2" 105 | ] 106 | 107 | 108 | -------------------------------------------------------------------------------- /Tests/Unsafe.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start Unsafe.mt =====\n"] 2 | 3 | Needs["CodeParser`"] 4 | 5 | Test[ 6 | CodeParse[{65, 16^^ed, 16^^a0, 16^^80, 65}] 7 | , 8 | ContainerNode[Byte, {Missing[ 9 | "UnsafeCharacterEncoding_StraySurrogate"]}, <|SyntaxIssues -> {EncodingIssue[ 10 | "StraySurrogate", "Stray surrogate.", 11 | "Fatal", <|Source -> {{1, 2}, {1, 3}}, 12 | ConfidenceLevel -> 1.|>]}|>] 13 | , 14 | TestID->"Unsafe-20211223-G8M5U2" 15 | ] 16 | 17 | 18 | Test[ 19 | CodeParse[{16^^E1, 16^^A0, 16^^C0}] 20 | , 21 | ContainerNode[Byte, {Missing[ 22 | "UnsafeCharacterEncoding_IncompleteUTF8Sequence"]}, <|SyntaxIssues -> {EncodingIssue[ 23 | "IncompleteUTF8Sequence", "Incomplete UTF-8 sequence.", 24 | "Fatal", <|Source -> {{1, 1}, {1, 2}}, ConfidenceLevel -> 1.|>], 25 | EncodingIssue["IncompleteUTF8Sequence", "Incomplete UTF-8 sequence.", 26 | "Fatal", <|Source -> {{1, 2}, {1, 3}}, 27 | ConfidenceLevel -> 1.|>]}|>] 28 | , 29 | TestID->"Unsafe-20211223-B6H1C5" 30 | ] 31 | 32 | 33 | Test[ 34 | CodeParse[{16^^C0 , 16^^80}] 35 | , 36 | ContainerNode[Byte, {Missing[ 37 | "UnsafeCharacterEncoding_IncompleteUTF8Sequence"]}, <|SyntaxIssues -> {EncodingIssue[ 38 | "IncompleteUTF8Sequence", "Incomplete UTF-8 sequence.", 39 | "Fatal", <|Source -> {{1, 1}, {1, 2}}, ConfidenceLevel -> 1.|>], 40 | EncodingIssue["IncompleteUTF8Sequence", "Incomplete UTF-8 sequence.", 41 | "Fatal", <|Source -> {{1, 2}, {1, 3}}, 42 | ConfidenceLevel -> 1.|>]}|>] 43 | , 44 | TestID->"Unsafe-20211224-A8O4H2" 45 | ] 46 | 47 | 48 | unsafe = FileNameJoin[{DirectoryName[$CurrentTestSource], "files", "small", "unsafe1.wl"}] 49 | 50 | Test[ 51 | CodeParse[File[unsafe]] 52 | , 53 | ContainerNode[File, { 54 | Missing["UnsafeCharacterEncoding_IncompleteUTF8Sequence"]}, <| 55 | SyntaxIssues -> { 56 | EncodingIssue["IncompleteUTF8Sequence", "Incomplete UTF-8 sequence.", "Fatal", <|Source -> {{1, 16}, {1, 17}}, ConfidenceLevel -> 1.|>]}, 57 | "FileName" -> unsafe|>] 58 | , 59 | TestID->"Unsafe-20220121-L0W6B5" 60 | ] 61 | 62 | 63 | (* 64 | from bug 420623 65 | 66 | unsafe2.wl has bytes: 67 | 68 | 0x5c 0xa9 69 | 70 | 0x5c is '\\' backslash character 71 | 72 | 0xa9 is incomplete UTF-8 sequence 73 | *) 74 | unsafe = FileNameJoin[{DirectoryName[$CurrentTestSource], "files", "small", "unsafe2.wl"}] 75 | 76 | Test[ 77 | CodeParse[File[unsafe]] 78 | , 79 | ContainerNode[File, { 80 | Missing["UnsafeCharacterEncoding_IncompleteUTF8Sequence"]}, <| 81 | SyntaxIssues -> { 82 | EncodingIssue["IncompleteUTF8Sequence", "Incomplete UTF-8 sequence.", "Fatal", <|Source -> {{1, 2}, {1, 3}}, ConfidenceLevel -> 1.|>]}, 83 | "FileName" -> unsafe|>] 84 | , 85 | TestID->"Unsafe-20220223-W0U9G9" 86 | ] 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /CodeTools/Generate/GenerateSources.wl: -------------------------------------------------------------------------------- 1 | BeginPackage["CodeTools`Generate`GenerateSources`"] 2 | 3 | buildDirFlagPosition 4 | 5 | buildDir 6 | 7 | srcDirFlagPosition 8 | 9 | srcDir 10 | 11 | script 12 | 13 | pacletFlagPosition 14 | 15 | paclet 16 | 17 | retryFlagPosition 18 | 19 | retry 20 | 21 | pacletLayoutDirFlagPosition 22 | 23 | pacletLayoutDir 24 | 25 | 26 | checkBuildDir 27 | 28 | checkSrcDir 29 | 30 | checkPaclet 31 | 32 | checkPacletLayoutDir 33 | 34 | 35 | Begin["`Private`"] 36 | 37 | buildDirFlagPosition = FirstPosition[$CommandLine, "-buildDir"] 38 | 39 | buildDir := buildDir = $CommandLine[[buildDirFlagPosition[[1]] + 1]] 40 | 41 | srcDirFlagPosition = FirstPosition[$CommandLine, "-srcDir"] 42 | 43 | srcDir := srcDir = $CommandLine[[srcDirFlagPosition[[1]] + 1]] 44 | 45 | scriptPosition = FirstPosition[$CommandLine, "-script"] 46 | 47 | script := script = $CommandLine[[scriptPosition[[1]] + 1]] 48 | 49 | pacletFlagPosition = FirstPosition[$CommandLine, "-paclet"] 50 | 51 | paclet := paclet = $CommandLine[[pacletFlagPosition[[1]] + 1]] 52 | 53 | retryFlagPosition = FirstPosition[$CommandLine, "-retry"] 54 | 55 | retry = !MissingQ[retryFlagPosition] 56 | 57 | pacletLayoutDirFlagPosition = FirstPosition[$CommandLine, "-pacletLayoutDir"] 58 | 59 | pacletLayoutDir := pacletLayoutDir = $CommandLine[[pacletLayoutDirFlagPosition[[1]] + 1]] 60 | 61 | 62 | checkBuildDir[] := 63 | Module[{}, 64 | If[MissingQ[buildDirFlagPosition], 65 | Print["Cannot proceed; buildDir flag missing"]; 66 | Quit[1] 67 | ]; 68 | 69 | If[!DirectoryQ[buildDir], 70 | Print["Cannot proceed; Unsupported buildDir: ", buildDir]; 71 | Quit[1] 72 | ]; 73 | ] 74 | 75 | 76 | checkSrcDir[] := 77 | Module[{}, 78 | If[MissingQ[srcDirFlagPosition], 79 | Print["Cannot proceed; srcDir flag missing"]; 80 | Quit[1] 81 | ]; 82 | 83 | If[!DirectoryQ[srcDir], 84 | Print["Cannot proceed; Unsupported srcDir: ", srcDir]; 85 | Quit[1] 86 | ]; 87 | ] 88 | 89 | 90 | checkPaclet[] := 91 | Module[{}, 92 | If[MissingQ[pacletFlagPosition], 93 | Print["Cannot proceed; paclet flag missing"]; 94 | Quit[1] 95 | ]; 96 | ] 97 | 98 | 99 | checkPacletLayoutDir[] := 100 | Module[{}, 101 | If[MissingQ[pacletLayoutDirFlagPosition], 102 | Print["Cannot proceed; pacletLayoutDir flag missing"]; 103 | Quit[1] 104 | ]; 105 | 106 | If[!DirectoryQ[pacletLayoutDir], 107 | Print["Cannot proceed; Unsupported pacletLayoutDir: ", pacletLayoutDir]; 108 | Quit[1] 109 | ]; 110 | 111 | If[FileNameTake[pacletLayoutDir, -1] =!= "paclet", 112 | Print["Cannot proceed; Unsupported pacletLayoutDir: ", pacletLayoutDir]; 113 | Quit[1] 114 | ]; 115 | ] 116 | 117 | End[] 118 | 119 | EndPackage[] 120 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/tests/test_node.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | cst::{BinaryNode, CompoundNode, Cst, OperatorNode}, 3 | macros::{src, token}, 4 | parse::operators::{BinaryOperator, CompoundOperator}, 5 | parse_cst, 6 | source::Span, 7 | tests::assert_src, 8 | NodeSeq, ParseOptions, 9 | }; 10 | 11 | use pretty_assertions::assert_eq; 12 | 13 | 14 | #[test] 15 | fn NodeTest_Bug1() { 16 | let input = "a_."; 17 | 18 | let NodeSeq(tokens) = crate::tokenize(input, &ParseOptions::default()); 19 | 20 | assert_eq!( 21 | tokens, 22 | vec![ 23 | token!(Symbol, "a", src!(1:1-1:2)), 24 | token!(UnderDot, "_.", src!(1:2-1:4)) 25 | ] 26 | ); 27 | 28 | let [T1, T2] = tokens.try_into().unwrap(); 29 | 30 | let N = CompoundNode::new2( 31 | CompoundOperator::CodeParser_PatternOptionalDefault, 32 | T1, 33 | T2, 34 | ); 35 | 36 | let NSource = Cst::Compound(N).get_source(); 37 | 38 | assert_eq!(NSource.start(), src!(1:1).into()); 39 | assert_eq!(NSource.end(), src!(1:4).into()); 40 | 41 | // FIXME: Check that no issues were generated; make tokenize() return a 42 | // ParseResult 43 | // assert_eq!(session.non_fatal_issues().len(), 0); 44 | // assert_eq!(session.fatal_issues().len(), 0); 45 | } 46 | 47 | #[test] 48 | fn test_parse_span() { 49 | // Binary Span with implicit 1st arg 50 | assert_eq!( 51 | parse_cst(";; b", &Default::default()).syntax, 52 | assert_src!(1:1-5 => Cst::Binary(BinaryNode(OperatorNode { 53 | op: BinaryOperator::Span, 54 | children: NodeSeq(vec![ 55 | Cst::Token(token![ 56 | Fake_ImplicitOne, 57 | "", 58 | Span::from(src!(1:1-1:1)) 59 | ]), 60 | Cst::Token(token![SemiSemi, ";;", Span::from(src!(1:1-1:3))]), 61 | Cst::Token(token![Whitespace, " ", Span::from(src!(1:3-1:4))]), 62 | Cst::Token(token![Symbol, "b", Span::from(src!(1:4-1:5))]), 63 | ]), 64 | }))) 65 | ); 66 | 67 | 68 | // Binary Span 69 | assert_eq!( 70 | parse_cst("a ;; b", &Default::default()).syntax, 71 | assert_src!(1:1-7 => Cst::Binary(BinaryNode(OperatorNode { 72 | op: BinaryOperator::Span, 73 | children: NodeSeq(vec![ 74 | Cst::Token(token![Symbol, "a", Span::from(src!(1:1-1:2))]), 75 | Cst::Token(token![Whitespace, " ", Span::from(src!(1:2-1:3))]), 76 | Cst::Token(token![SemiSemi, ";;", Span::from(src!(1:3-1:5))]), 77 | Cst::Token(token![Whitespace, " ", Span::from(src!(1:5-1:6))]), 78 | Cst::Token(token![Symbol, "b", Span::from(src!(1:6-1:7))]), 79 | ]), 80 | 81 | }))) 82 | ); 83 | } 84 | -------------------------------------------------------------------------------- /CodeParser/Kernel/Node.wl: -------------------------------------------------------------------------------- 1 | BeginPackage["CodeParser`Node`"] 2 | 3 | Begin["`Private`"] 4 | 5 | Needs["CodeParser`"] 6 | Needs["CodeParser`Utils`"] 7 | 8 | 9 | (* 10 | Some selectors 11 | *) 12 | 13 | LeafNode[_, str_, _]["String"] := str 14 | 15 | 16 | 17 | (* 18 | Some attributes 19 | *) 20 | 21 | Attributes[CodeNode] = {HoldAllComplete} 22 | 23 | 24 | 25 | (* 26 | ToNode[sym] returns a LeafNode[Symbol] 27 | *) 28 | ToNode[s_Symbol] := 29 | If[Context[s] == "System`", 30 | LeafNode[Symbol, SymbolName[s], <||>] 31 | , 32 | (* 33 | Play it safe for now and fully qualify any non-System` symbol 34 | *) 35 | LeafNode[Symbol, Context[s]<>SymbolName[s], <||>] 36 | ] 37 | 38 | (* 39 | ToNode[string] returns a LeafNode[String] 40 | *) 41 | ToNode[s_String] := LeafNode[String, escapeString[s], <||>] 42 | 43 | (* 44 | ToNode[integer] returns a LeafNode[Integer] 45 | ToNode[real] returns a LeafNode[Real] 46 | *) 47 | ToNode[i_Integer] := LeafNode[Integer, ToString[i], <||>] 48 | ToNode[r_Real] := LeafNode[Real, ToString[r, InputForm], <||>] 49 | 50 | (* 51 | ToNode[rational] returns: 52 | if possible to convert to Rational literal then return LeafNode[Rational] 53 | otherwise, return CallNode[Rational] 54 | *) 55 | ToNode[r_Rational] := 56 | Catch[ 57 | Module[{num, den, e}, 58 | (* 59 | TODO: when targeting 12.0 as a minimum, use NumeratorDenominator[r] 60 | *) 61 | {num, den} = {Numerator[r], Denominator[r]}; 62 | (* 63 | loop between 2 and 36 and test if the base works 64 | 65 | loop from 36 to 2, going down 66 | 67 | out of all of these ways of constructing 1/16: 68 | 2^^1*^-4 69 | 4^^1*^-2 70 | 8^^4*^-2 71 | 16^^1*^-1 72 | 73 | prefer to do 16^^1*^-1 74 | that is, prefer the highest base 75 | *) 76 | Do[ 77 | e = IntegerExponent[den, b]; 78 | If[e != 0, 79 | Throw[LeafNode[Rational, ToString[b] <> "^^" <> IntegerString[num, b] <> "*^-" <> ToString[e], <||>]] 80 | ] 81 | , 82 | {b, 36, 2, -1} 83 | ]; 84 | CallNode[LeafNode[Symbol, "Rational", <||>], {ToNode[num], ToNode[den]}, <||>] 85 | ]] 86 | 87 | ToNode[f_?FailureQ] := f 88 | 89 | ToNode[args___] := 90 | Failure["Unhandled", <| "Function" -> ToNode, "Arguments" -> HoldForm[{args}] |>] 91 | 92 | 93 | 94 | FromNode[LeafNode[Symbol, s_, _]] := 95 | Symbol[s] 96 | 97 | (* 98 | No simple way to convert "\"123\"" to "123" 99 | *) 100 | FromNode[LeafNode[String, s_, _]] := 101 | ToExpression[s] 102 | 103 | (* 104 | No simple way to convert "123.456``7" to 123.456``7 105 | *) 106 | FromNode[LeafNode[Integer, i_, _]] := 107 | ToExpression[i] 108 | 109 | FromNode[LeafNode[Real, r_, _]] := 110 | ToExpression[r] 111 | 112 | FromNode[LeafNode[Rational, r_, _]] := 113 | ToExpression[r] 114 | 115 | FromNode[f_?FailureQ] := f 116 | 117 | FromNode[args___] := 118 | Failure["Unhandled", <| "Function" -> FromNode, "Arguments" -> HoldForm[{args}] |>] 119 | 120 | 121 | 122 | End[] 123 | 124 | EndPackage[] 125 | -------------------------------------------------------------------------------- /CodeParser/Generate/Common.wl: -------------------------------------------------------------------------------- 1 | BeginPackage["CodeParser`Generate`Common`"] 2 | 3 | toGlobal 4 | toTokenEnumVariant 5 | 6 | generatedCPPDir 7 | generatedCPPIncludeDir 8 | generatedCPPSrcDir 9 | 10 | dataDir 11 | 12 | importedPrefixParselets 13 | 14 | importedInfixParselets 15 | 16 | importedLongNames 17 | 18 | importedPrecedenceSource 19 | 20 | FatalError::usage = "FatalError[expr, ...] prints an error message an exists with a fatal error code." 21 | 22 | Begin["`Private`"] 23 | 24 | (* 25 | Do not allow PacletManager to participate in finding `Generate` files 26 | 27 | PacletManager will find e.g. CodeParser/Kernel/TokenEnum.wl when asked to find CodeParser`Generate`TokenEnum` 28 | 29 | related issues: PACMAN-54 30 | *) 31 | Block[{Internal`PacletFindFile = Null&}, 32 | Needs["CodeTools`Generate`GenerateSources`"]; 33 | ] 34 | 35 | 36 | (* 37 | uppercases and replaces ` with _ 38 | *) 39 | toGlobal[n0_String] := Module[{n = n0}, 40 | (* TODO(cleanup): This is a workaround *) 41 | If[StringStartsQ[n, "CodePoint`LongName`"], 42 | n = ToUpperCase[n] 43 | ]; 44 | 45 | StringReplace[n, {"`" -> "_", "$" -> "_"}] 46 | ] 47 | 48 | toGlobal[n_Symbol] := ( 49 | If[StringStartsQ[Context[n], "Precedence`"], 50 | StringReplace[ 51 | toGlobal[ToUpperCase[ToString[n]]], 52 | "PRECEDENCE_" -> "Precedence::" 53 | ] 54 | , 55 | toGlobal[ToUpperCase[ToString[n]]] 56 | ] 57 | ) 58 | 59 | toGlobal[n_, "CodePoint"] := 60 | Replace[n, { 61 | CodePoint`CRLF -> "CodePoint::CRLF", 62 | "CodePoint`LongName`RawDoubleQuote" -> toGlobal[n], 63 | "CodePoint`LongName`RawBackslash" -> toGlobal[n], 64 | other_String :> StringJoin["CodePoint::from_char(", toGlobal[other], ")"] 65 | }] 66 | 67 | toGlobal[n_, "UpperCamelCase"] := 68 | StringReplace[ToString[n], {"`" -> "_", "$" -> "_"}] 69 | 70 | 71 | toGlobal[sym_Symbol, "DefinePrecedence"] := 72 | StringTrim[toGlobal[sym], "Precedence::"] 73 | 74 | toGlobal[args___] := FatalError[{"BAD ARGS: ", args}] 75 | 76 | toTokenEnumVariant[name_] := 77 | StringReplace[ 78 | toGlobal[name, "UpperCamelCase"], 79 | StartOfString ~~ "Token_" -> "" 80 | ] 81 | 82 | (* generatedCPPDir = FileNameJoin[{buildDir, "generated", "rust"}] *) 83 | generatedCPPDir = FileNameJoin[{srcDir, "crates", "wolfram-parser", "src", "generated"}] 84 | generatedCPPIncludeDir = FileNameJoin[{generatedCPPDir}] 85 | generatedCPPSrcDir = FileNameJoin[{generatedCPPDir}] 86 | 87 | dataDir := dataDir = FileNameJoin[{srcDir, "CodeParser", "Data"}] 88 | 89 | importedPrefixParselets := importedPrefixParselets = Get[FileNameJoin[{dataDir, "PrefixParselets.wl"}]] 90 | 91 | importedInfixParselets := importedInfixParselets = Get[FileNameJoin[{dataDir, "InfixParselets.wl"}]] 92 | 93 | importedLongNames := importedLongNames = Get[FileNameJoin[{dataDir, "LongNames.wl"}]] 94 | 95 | importedPrecedenceSource := importedPrecedenceSource = Get[FileNameJoin[{dataDir, "Precedence.wl"}]] 96 | 97 | FatalError[args___] := ( 98 | Print["\n\nFATAL ERROR: ", args, "\n\n"]; 99 | 100 | Exit[-1] 101 | ) 102 | 103 | End[] 104 | 105 | EndPackage[] 106 | -------------------------------------------------------------------------------- /docs/fuzz-testing.md: -------------------------------------------------------------------------------- 1 | 2 | # Fuzz testing with AFL++ 3 | 4 | https://aflplus.plus/ 5 | 6 | 7 | 8 | ## Building AFL++ 9 | 10 | 11 | ### prerequisite: coreutils is installed 12 | 13 | the install of AFL++ assumes to be using `install` command from coreutils 14 | 15 | 16 | this is the bad `install`: 17 | ``` 18 | % which install 19 | /usr/bin/install 20 | ``` 21 | 22 | ``` 23 | brew install coreutils 24 | ``` 25 | 26 | you will see: 27 | ``` 28 | Commands also provided by macOS and the commands dir, dircolors, vdir have been installed with the prefix "g". 29 | If you need to use these commands with their normal names, you can add a "gnubin" directory to your PATH with: 30 | PATH="/usr/local/opt/coreutils/libexec/gnubin:$PATH" 31 | ``` 32 | 33 | do what it says: 34 | ``` 35 | % export PATH="/usr/local/opt/coreutils/libexec/gnubin:$PATH" 36 | ``` 37 | 38 | or: 39 | ``` 40 | % export PATH="/opt/homebrew/opt/coreutils/libexec/gnubin:$PATH" 41 | ``` 42 | 43 | this is the good `install`: 44 | ``` 45 | % which install 46 | /usr/local/opt/coreutils/libexec/gnubin/install 47 | ``` 48 | 49 | 50 | ### prerequisite: LLVM clang is installed 51 | 52 | AFL++ assumes to be using LLVM clang and NOT Apple clang 53 | 54 | this is the bad clang: 55 | ``` 56 | % which clang 57 | /usr/bin/clang 58 | ``` 59 | 60 | ``` 61 | brew install llvm 62 | ``` 63 | 64 | you will see: 65 | ``` 66 | If you need to have llvm first in your PATH, run: 67 | echo 'export PATH="/usr/local/opt/llvm/bin:$PATH"' >> ~/.zshrc 68 | ``` 69 | 70 | do what it says: 71 | ``` 72 | % export PATH="/usr/local/opt/llvm/bin:$PATH" 73 | ``` 74 | 75 | or: 76 | ``` 77 | % export PATH="/opt/homebrew/opt/llvm/bin:$PATH" 78 | ``` 79 | 80 | this is the good `clang`: 81 | ``` 82 | % which clang 83 | /usr/local/opt/llvm/bin/clang 84 | ``` 85 | 86 | 87 | ### building AFL++ 88 | 89 | https://aflplus.plus/building/ 90 | 91 | 92 | ``` 93 | git clone https://github.com/AFLplusplus/AFLplusplus 94 | 95 | cd AFLplusplus 96 | 97 | make clean 98 | 99 | make distrib 100 | 101 | sudo make install 102 | ``` 103 | 104 | Verify afl-fuzz is installed: 105 | ``` 106 | % which afl-fuzz 107 | /usr/local/bin/afl-fuzz 108 | ``` 109 | 110 | 111 | ## Building CodeParser 112 | 113 | ``` 114 | mkdir build-afl 115 | 116 | cd build-afl 117 | 118 | cmake -DTRANSPORT=None-DBUILD_EXE=ON -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_COMPILER=afl-clang-fast -DCMAKE_CXX_COMPILER=afl-clang-fast++ -DMATHEMATICA_INSTALL_DIR=/Applications/Mathematica.app/Contents .. 119 | 120 | cmake --build . --target codeparser-exe 121 | ``` 122 | 123 | 124 | ## Running CodeParser with AFL++ 125 | 126 | ``` 127 | cd build-afl 128 | 129 | rm -rf afl_out 130 | 131 | afl-fuzz -i ../Tests/files/small -o afl_out/ -x ../Tests/wl.dict -D -- cpp/src/exe/codeparser -file @@ 132 | ``` 133 | 134 | 135 | ## Troubleshooting 136 | 137 | Might get this: 138 | ``` 139 | [-] SYSTEM ERROR : shmget() failed, try running afl-system-config 140 | Stop location : afl_shm_init(), src/afl-sharedmem.c:252 141 | OS message : Invalid argument 142 | ``` 143 | 144 | do what it says and run: 145 | ``` 146 | sudo afl-system-config 147 | ``` 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | -------------------------------------------------------------------------------- /CodeParser/Generate/Precedence.wl: -------------------------------------------------------------------------------- 1 | (* ::Package::"Tags"-><|"SuspiciousSessionSymbol" -> <|Enabled -> False|>|>:: *) 2 | 3 | If[!MemberQ[$Path, #], PrependTo[$Path, #]]&[DirectoryName[$InputFileName, 3]] 4 | 5 | BeginPackage["CodeParser`Generate`Precedence`"] 6 | 7 | Begin["`Private`"] 8 | 9 | (* 10 | Do not allow PacletManager to participate in finding `Generate` files 11 | 12 | PacletManager will find e.g. CodeParser/Kernel/TokenEnum.wl when asked to find CodeParser`Generate`TokenEnum` 13 | 14 | related issues: PACMAN-54 15 | *) 16 | Block[{Internal`PacletFindFile = Null&}, 17 | Needs["CodeParser`Generate`Common`"]; 18 | Needs["CodeTools`Generate`GenerateSources`"]; 19 | ] 20 | 21 | 22 | checkBuildDir[] 23 | 24 | 25 | associativityToValue[Associativity`NonRight] = 0 26 | associativityToValue[Associativity`Right] = 1 27 | 28 | 29 | generate[] := ( 30 | 31 | Print["Generating Precedence..."]; 32 | 33 | If[FailureQ[importedPrecedenceSource], 34 | Print[importedPrecedenceSource]; 35 | Quit[1] 36 | ]; 37 | 38 | (* 39 | resolve the symbolic values in the Precedence table to integer values 40 | *) 41 | cur = {0, Associativity`NonRight}; 42 | enumMap = <||>; 43 | KeyValueMap[( 44 | Which[ 45 | Head[#2] === Symbol, cur = enumMap[#2], 46 | Head[#2[[1]]] === Integer, cur = #2, 47 | #2[[1]] === Next, cur[[1]]++;cur[[2]] = #2[[2]], 48 | True, Print["Unhandled precedence"]; Quit[1] 49 | ]; 50 | AssociateTo[enumMap, #1 -> cur])& 51 | , 52 | importedPrecedenceSource 53 | ]; 54 | 55 | 56 | (* 57 | sanity check that all precedences are in order 58 | *) 59 | cur = -Infinity; 60 | KeyValueMap[ 61 | If[!TrueQ[#2[[1]] >= cur], 62 | Print["Precedence is out of order: ", #1 -> #2]; 63 | Quit[1] 64 | , 65 | cur = #2[[1]] 66 | ]& 67 | , 68 | enumMap 69 | ]; 70 | 71 | 72 | precedenceCPPHeader = { 73 | "\ 74 | // 75 | // AUTO GENERATED FILE 76 | // DO NOT MODIFY 77 | // 78 | 79 | #![allow(dead_code)] 80 | 81 | use crate::precedence::Precedence; 82 | 83 | impl Precedence {\ 84 | "} ~Join~ 85 | KeyValueMap[ 86 | {key, value} |-> Row[{ 87 | "\tpub const ", 88 | toGlobal[key, "DefinePrecedence"], 89 | ": Precedence = Precedence::new(", 90 | BitShiftLeft[value[[1]], 1] + associativityToValue[value[[2]]], 91 | "); // prec: ", value[[1]], ", assoc: ", value[[2]] 92 | }], 93 | enumMap 94 | ] 95 | ~Join~ { 96 | "\n}" 97 | }; 98 | 99 | Print["exporting Precedence.h"]; 100 | res = Export[FileNameJoin[{generatedCPPIncludeDir, "precedence_values.rs"}], Column[precedenceCPPHeader], "String"]; 101 | 102 | Print[res]; 103 | 104 | If[FailureQ[res], 105 | Quit[1] 106 | ]; 107 | 108 | precedenceWL = { 109 | " 110 | (* 111 | AUTO GENERATED FILE 112 | DO NOT MODIFY 113 | *) 114 | 115 | <|"} ~Join~ 116 | KeyValueMap[(Row[{#1, " -> ", BitShiftLeft[#2[[1]], 1] + associativityToValue[#2[[2]]], ",", "(* prec: ", #2[[1]], ", assoc: ", #2[[2]], " *)"}])&, enumMap] ~Join~ { 117 | "Nothing 118 | |> 119 | " 120 | }; 121 | 122 | Print["exporting Precedence.wl"]; 123 | res = Export[FileNameJoin[{buildDir, "paclet", "CodeParser", "Resources", "Generated", "Precedence.wl"}], Column[precedenceWL], "String"]; 124 | 125 | Print[res]; 126 | 127 | If[FailureQ[res], 128 | Quit[1] 129 | ]; 130 | 131 | Print["Done Precedence"] 132 | ) 133 | 134 | If[!StringQ[script], 135 | Quit[1] 136 | ] 137 | If[AbsoluteFileName[script] === AbsoluteFileName[$InputFileName], 138 | generate[] 139 | ] 140 | 141 | End[] 142 | 143 | EndPackage[] 144 | -------------------------------------------------------------------------------- /CodeParser/Kernel/Definitions.wl: -------------------------------------------------------------------------------- 1 | BeginPackage["CodeParser`Definitions`"] 2 | 3 | 4 | DefinitionSymbols 5 | 6 | 7 | Begin["`Private`"] 8 | 9 | Needs["CodeParser`"] 10 | Needs["CodeParser`Utils`"] 11 | 12 | 13 | (* 14 | given an LHS AST node, determine the symbol that gives the definition 15 | *) 16 | 17 | DefinitionSymbols[n:LeafNode[Symbol, _, _]] := {n} 18 | 19 | (* 20 | this is really a definition for Subscript 21 | 22 | adhere to principle of not re-implementing MakeExpression and do not try to refine 23 | *) 24 | DefinitionSymbols[n:BoxNode[SubscriptBox, _, _]] := {n} 25 | 26 | (* 27 | this is really a definition for Power, SuperStar, etc. 28 | 29 | SuperscriptBox["a", "b"] is a definition for Power 30 | 31 | SuperscriptBox["a", "*"] is a definition for SuperStar 32 | 33 | adhere to principle of not re-implementing MakeExpression and do not try to refine 34 | *) 35 | DefinitionSymbols[n:BoxNode[SuperscriptBox, _, _]] := {n} 36 | 37 | 38 | DefinitionSymbols[LeafNode[_, _, _]] := {} 39 | DefinitionSymbols[ErrorNode[_, _, _]] := {} 40 | DefinitionSymbols[AbstractSyntaxErrorNode[_, _, _]] := {} 41 | DefinitionSymbols[PrefixNode[PrefixLinearSyntaxBang, _, _]] := {} 42 | 43 | 44 | DefinitionSymbols[CallNode[LeafNode[Symbol, "Condition", _], {node_, _}, _]] := DefinitionSymbols[node] 45 | DefinitionSymbols[CallNode[LeafNode[Symbol, "Pattern", _], {_, node_}, _]] := DefinitionSymbols[node] 46 | DefinitionSymbols[CallNode[LeafNode[Symbol, "PatternTest", _], {node_, _}, _]] := DefinitionSymbols[node] 47 | DefinitionSymbols[CallNode[LeafNode[Symbol, "HoldPattern", _], {node_}, _]] := DefinitionSymbols[node] 48 | 49 | DefinitionSymbols[CallNode[LeafNode[Symbol, "Attributes", _], {node_}, _]] := DefinitionSymbols[node] 50 | DefinitionSymbols[CallNode[LeafNode[Symbol, "Format", _], {node_}, _]] := DefinitionSymbols[node] 51 | DefinitionSymbols[CallNode[LeafNode[Symbol, "Options", _], {node_}, _]] := DefinitionSymbols[node] 52 | DefinitionSymbols[CallNode[LeafNode[Symbol, "MessageName", _], {node_, _, ___}, _]] := DefinitionSymbols[node] 53 | 54 | DefinitionSymbols[CallNode[LeafNode[Symbol, "Blank", _], {node_}, _]] := DefinitionSymbols[node] 55 | 56 | (* 57 | Something like a /: (b|c)[a] := d 58 | 59 | When scanning over (b|c)[a], we want to treat both b and c as definitions 60 | *) 61 | DefinitionSymbols[CallNode[LeafNode[Symbol, "Alternatives", _], children_, _]] := 62 | Catch[ 63 | Module[{defs}, 64 | 65 | defs = DefinitionSymbols /@ children; 66 | 67 | If[AnyTrue[defs, FailureQ], 68 | Throw[SelectFirst[defs, FailureQ]] 69 | ]; 70 | 71 | Flatten[defs] 72 | ]] 73 | 74 | DefinitionSymbols[CallNode[LeafNode[Symbol, "List", _], children_, _]] := 75 | Catch[ 76 | Module[{defs}, 77 | 78 | defs = DefinitionSymbols /@ children; 79 | 80 | If[AnyTrue[defs, FailureQ], 81 | Throw[SelectFirst[defs, FailureQ]] 82 | ]; 83 | 84 | Flatten[defs] 85 | ]] 86 | 87 | DefinitionSymbols[CallNode[node_, _, _]] := DefinitionSymbols[node] 88 | 89 | DefinitionSymbols[SyntaxErrorNode[_, _, _]] := {} 90 | 91 | DefinitionSymbols[args___] := 92 | Failure["Unhandled", <| "Function" -> DefinitionSymbols, "Arguments" -> HoldForm[{args}] |>] 93 | 94 | 95 | 96 | 97 | (* 98 | DeclarationName is appropriate for when you want a single name string 99 | 100 | If there are 0 names or if there is more than 1 name, then a Failure is returned 101 | *) 102 | DeclarationName[node_] := 103 | Catch[ 104 | Module[{syms}, 105 | 106 | syms = DefinitionSymbols[node]; 107 | 108 | If[empty[syms], 109 | Throw[Failure["NoDefinitions", <| "Node" -> node |>]] 110 | ]; 111 | 112 | If[Length[syms] > 1, 113 | Throw[Failure["TooManyDefinitions", <| "Node" -> node |>]] 114 | ]; 115 | 116 | syms[[1, 2]] 117 | ]] 118 | 119 | 120 | 121 | End[] 122 | 123 | EndPackage[] 124 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/parse/parselet/times_parselet.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | panic_if_aborted, 3 | parse::{parselet::*, ParserSession}, 4 | precedence::Precedence, 5 | tokenize::{TokenKind, TokenRef}, 6 | }; 7 | 8 | 9 | impl<'i, B: ParseBuilder<'i> + 'i> InfixParselet<'i, B> for TimesParselet { 10 | fn parse_infix( 11 | &self, 12 | session: &mut ParserSession<'i, B>, 13 | first_node: B::Node, 14 | trivia1: B::TriviaHandle, 15 | tok_in: TokenRef<'i>, 16 | ) -> B::Node { 17 | panic_if_aborted!(); 18 | 19 | let tok_in = session.push_syntax_and_next(tok_in); 20 | 21 | let mut infix_state = 22 | session.begin_infix(InfixOperator::Times, first_node); 23 | 24 | // 25 | // Unroll 1 iteration of the loop because we know that tok_in has already been read 26 | // 27 | 28 | let (trivia2, tok2) = session.current_token_eat_trivia(); 29 | 30 | let second_node = session.parse_prefix(tok2); 31 | 32 | session.builder.infix_add( 33 | &mut infix_state, 34 | trivia1, 35 | tok_in, 36 | trivia2, 37 | second_node, 38 | ); 39 | 40 | return TimesParselet::parse_loop(session, infix_state); 41 | } 42 | 43 | fn getOp(&self) -> InfixParseletOperator { 44 | return InfixOperator::Times.into(); 45 | } 46 | 47 | fn getPrecedence(&self, _: &ParserSession<'i, B>) -> Option { 48 | return Some(Precedence::STAR); 49 | } 50 | } 51 | 52 | impl TimesParselet { 53 | fn parse_loop<'i, B: ParseBuilder<'i> + 'i>( 54 | session: &mut ParserSession<'i, B>, 55 | mut infix_state: B::InfixParseState, 56 | ) -> B::Node { 57 | loop { 58 | panic_if_aborted!(); 59 | 60 | 61 | let (mut trivia1, mut tok1) = session.current_token(); 62 | 63 | tok1 = session.do_process_implicit_times(tok1); 64 | 65 | if tok1.tok == TokenKind::Fake_ImplicitTimes { 66 | // 67 | // implicit Times should not cross toplevel newlines 68 | // 69 | // so reset and try again 70 | // 71 | 72 | session.trivia_reset(trivia1); 73 | 74 | (trivia1, tok1) = session 75 | .current_token_eat_trivia_but_not_toplevel_newlines_into(); 76 | 77 | tok1 = session.do_process_implicit_times(tok1) 78 | } 79 | 80 | // 81 | // Cannot just compare tokens 82 | // 83 | // May be something like a * b c \[Times] d 84 | // 85 | // and we want only a single Infix node created 86 | // 87 | 88 | let tok1_op = 89 | B::with_infix_parselet(tok1.tok, |parselet| parselet.getOp()); 90 | 91 | if tok1_op 92 | != >::getOp(&TimesParselet {}) 93 | { 94 | // 95 | // Tok.tok != tok_in.tok, so break 96 | // 97 | 98 | session.trivia_reset(trivia1); 99 | 100 | let node = session.reduce_infix(infix_state); 101 | 102 | // MUSTTAIL 103 | return session.parse_climb(node); 104 | } 105 | 106 | let (trivia1, tok1) = session.commit_syntax_and_next(trivia1, tok1); 107 | 108 | let (trivia2, Tok2) = session.current_token_eat_trivia(); 109 | 110 | let operand = session.parse_prefix(Tok2); 111 | 112 | session.builder.infix_add( 113 | &mut infix_state, 114 | trivia1, 115 | tok1, 116 | trivia2, 117 | operand, 118 | ); 119 | } // loop 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /Tests/ToString.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start ToString.mt =====\n"] 2 | 3 | Needs["CodeParser`"] 4 | Needs["CodeParser`Abstract`"] 5 | Needs["CodeParser`ToString`"] (* ToInputFormString *) 6 | 7 | 8 | Test[ 9 | ToInputFormString[Aggregate[CodeConcreteParse["1+1"]]] 10 | , 11 | " 1 + 1 " 12 | , 13 | TestID->"ToString-20181230-P1F9Q9" 14 | ] 15 | 16 | Test[ 17 | ToInputFormString[Aggregate[CodeConcreteParse["_ + __ * ___"]]] 18 | , 19 | " _ + __*___ " 20 | , 21 | TestID->"ToString-20181230-S7R9U8" 22 | ] 23 | 24 | Test[ 25 | ToInputFormString[Aggregate[CodeConcreteParse["% ^ # ^ ## ^ f''[x]"]]] 26 | , 27 | " %^ #^ ##^ f' ' [x] " 28 | , 29 | TestID->"ToString-20181230-E6E4O1" 30 | ] 31 | 32 | 33 | Test[ 34 | ToInputFormString[Aggregate[CodeConcreteParse["@"]]] 35 | , 36 | " @ " 37 | , 38 | TestID->"ToString-20181230-V8O8B1" 39 | ] 40 | 41 | Test[ 42 | ToInputFormString[Aggregate[CodeConcreteParse["{a_b, c__d, e___f, _., g_.}"]]] 43 | , 44 | "{ a_b,c__d,e___f,_.,g_. }" 45 | , 46 | TestID->"ToString-20181230-U1H3E1" 47 | ] 48 | 49 | 50 | Test[ 51 | ToInputFormString[Aggregate[CodeConcreteParse["aaa - bbb + ccc - !ddd"]]] 52 | , 53 | " aaa - bbb + ccc - !ddd " 54 | , 55 | TestID->"ToString-20181230-Z9F3L8" 56 | ] 57 | 58 | 59 | 60 | Test[ 61 | ToInputFormString[Aggregate[CodeConcreteParse["a::b::c"]]] 62 | , 63 | " a::b::c " 64 | , 65 | TestID->"ToString-20181230-P0K1Y7" 66 | ] 67 | 68 | Test[ 69 | ToInputFormString[Aggregate[CodeConcreteParse["a /: b := c"]]] 70 | , 71 | " a/:b:=c " 72 | , 73 | TestID->"ToString-20181230-H9T6O8" 74 | ] 75 | 76 | 77 | Test[ 78 | ToInputFormString[Aggregate[CodeConcreteParse["##&"]]] 79 | , 80 | " ##& " 81 | , 82 | TestID->"ToString-20181230-A2F7W1" 83 | ] 84 | 85 | 86 | Test[ 87 | ToInputFormString[Aggregate[CodeConcreteParse["f[]"]]] 88 | , 89 | "f[]" 90 | , 91 | TestID->"ToString-20181230-R5Q3J4" 92 | ] 93 | 94 | Test[ 95 | ToInputFormString[Aggregate[CodeConcreteParse["f["]]] 96 | , 97 | "f[" 98 | , 99 | TestID->"ToString-20181230-T4A0R3" 100 | ] 101 | 102 | 103 | Test[ 104 | ToInputFormString[Aggregate[CodeConcreteParse["f[[4]]"]]] 105 | , 106 | "f[[4]]" 107 | , 108 | TestID->"ToString-20181230-C6W4M5" 109 | ] 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | Test[ 121 | ToInputFormString[Aggregate[CodeConcreteParse["\\(x\\)"]]] 122 | , 123 | "\\(x\\)" 124 | , 125 | TestID->"ToString-20181230-U6K9Q7" 126 | ] 127 | 128 | Test[ 129 | ToInputFormString[Aggregate[CodeConcreteParse["\\(x,y\\)"]]] 130 | , 131 | "\\(x,y\\)" 132 | , 133 | TestID->"ToString-20181231-U3W4B3" 134 | ] 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | Test[ 151 | ToInputFormString[Aggregate[CodeConcreteParse["a& & + b"]]] 152 | , 153 | " a& & + b " 154 | , 155 | TestID->"ToString-20181231-F0J3L4" 156 | ] 157 | 158 | 159 | 160 | 161 | 162 | 163 | (* 164 | Error handling 165 | *) 166 | Test[ 167 | ToInputFormString[Aggregate[CodeConcreteParse["A B:C:.Ne"]]] 168 | , 169 | " A B:C : . Ne " 170 | , 171 | TestID->"ToString-20190523-V1I4S4" 172 | ] 173 | 174 | 175 | Test[ 176 | ToInputFormString[Aggregate[CodeConcreteParse["a:"]]] 177 | , 178 | " a: " 179 | , 180 | TestID->"ToString-20190523-H5C9J2" 181 | ] 182 | 183 | 184 | 185 | 186 | 187 | 188 | Test[ 189 | StringJoin[ToSourceCharacterString /@ CodeConcreteParse["{]", ContainerNode -> (#[[1]]&)]] 190 | , 191 | "{]" 192 | , 193 | TestID->"ToString-20190926-T4I8S1" 194 | ] 195 | 196 | 197 | ast = CodeParse["a // -1"] 198 | 199 | Test[ 200 | ToFullFormString[ast] 201 | , 202 | "(-1)[a]" 203 | , 204 | TestID->"ToString-20200315-Z6K8C4" 205 | ] 206 | 207 | 208 | ast = CodeParse["{\"a\", \\\n\"b\"}"] 209 | 210 | Test[ 211 | ToFullFormString[ast] 212 | , 213 | "List[\"a\", \"b\"]" 214 | , 215 | TestID->"ToString-20200601-D9Z8Z8" 216 | ] 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/parse/parselet/under_parselet.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | panic_if_aborted, 3 | parse::{ 4 | operators::CompoundOperator, parselet::*, ParserSession, UnderParseData, 5 | }, 6 | tokenize::{TokenKind, TokenRef}, 7 | }; 8 | 9 | impl UnderParselet { 10 | pub(crate) const fn new( 11 | BOp: CompoundOperator, 12 | PBOp: CompoundOperator, 13 | ) -> Self { 14 | Self { BOp, PBOp } 15 | } 16 | } 17 | 18 | impl<'i, B: ParseBuilder<'i> + 'i> PrefixParselet<'i, B> for UnderParselet { 19 | fn parse_prefix( 20 | &self, 21 | session: &mut ParserSession<'i, B>, 22 | tok_in: TokenRef<'i>, 23 | ) -> B::Node { 24 | // 25 | // prefix 26 | // 27 | // Something like _ or _a 28 | // 29 | 30 | let node = self.get_parse_under_context_sensitive(session, tok_in); 31 | 32 | let node = session.builder.push_compound_blank(node); 33 | 34 | // MUSTTAIL 35 | return session.parse_climb(node); 36 | } 37 | } 38 | 39 | impl UnderParselet { 40 | pub(in crate::parse) fn get_parse_infix_context_sensitive< 41 | 'i, 42 | B: ParseBuilder<'i> + 'i, 43 | >( 44 | &self, 45 | session: &mut ParserSession<'i, B>, 46 | tok_in: TokenRef<'i>, 47 | ) -> UnderParseData<'i> { 48 | // 49 | // infix 50 | // 51 | // Something like a_b 52 | // 53 | 54 | self.get_parse_under_context_sensitive(session, tok_in) 55 | } 56 | 57 | fn get_parse_under_context_sensitive<'i, B: ParseBuilder<'i> + 'i>( 58 | &self, 59 | session: &mut ParserSession<'i, B>, 60 | tok_in: TokenRef<'i>, 61 | ) -> UnderParseData<'i> { 62 | panic_if_aborted!(); 63 | 64 | tok_in.skip(&mut session.tokenizer); 65 | 66 | let tok = session.tokenizer.peek_token(); 67 | 68 | match tok.tok { 69 | TokenKind::Symbol => { 70 | // 71 | // Something like 72 | // prefix: _b 73 | // infix: a_b 74 | // 75 | 76 | // Context-sensitive infix parse of Symbol token 77 | // 78 | // Something like _b 79 | // ^ 80 | // We know we are already in the middle of parsing _ 81 | // 82 | // Just push this symbol 83 | // 84 | tok.skip(&mut session.tokenizer); 85 | 86 | UnderParseData::UnderSymbol { 87 | op: self.BOp, 88 | under: tok_in, 89 | symbol: tok, 90 | } 91 | }, 92 | 93 | TokenKind::Error_ExpectedLetterlike => { 94 | // 95 | // Something like: 96 | // prefix: _a` (TID:231016/1) 97 | // infix: a_b` (TID:231016/2) 98 | // 99 | // It's nice to include the error inside of the blank 100 | // 101 | 102 | tok.skip(&mut session.tokenizer); 103 | 104 | UnderParseData::UnderSymbol { 105 | op: self.BOp, 106 | under: tok_in, 107 | symbol: tok, 108 | } 109 | }, 110 | 111 | _ => UnderParseData::Under(tok_in), 112 | } 113 | } 114 | } 115 | 116 | //====================================== 117 | // UnderDotParselet 118 | //====================================== 119 | 120 | impl<'i, B: ParseBuilder<'i> + 'i> PrefixParselet<'i, B> for UnderDotParselet { 121 | fn parse_prefix( 122 | &self, 123 | session: &mut ParserSession<'i, B>, 124 | tok_in: TokenRef<'i>, 125 | ) -> B::Node { 126 | // 127 | // prefix 128 | // 129 | // Something like _. 130 | // 131 | 132 | panic_if_aborted!(); 133 | 134 | 135 | let node = session.push_leaf_and_next(tok_in); 136 | 137 | // MUSTTAIL 138 | return session.parse_climb(node); 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /docs/stages.md: -------------------------------------------------------------------------------- 1 | docs: stages of parser 2 | 3 | 4 | 5 | 6 | 7 | bytes 8 | 9 | -> decode bytes -> 10 | 11 | Source characters 12 | 13 | -> decode Source characters -> 14 | 15 | WL characters 16 | 17 | -> tokenize -> 18 | 19 | tokens 20 | 21 | -> parse -> 22 | 23 | concrete nodes 24 | 25 | -> aggregate -> 26 | 27 | aggregate nodes 28 | 29 | -> abstract -> 30 | 31 | abstract nodes 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | different levels of syntax 46 | 47 | 48 | 49 | # boxes 50 | 51 | ``` 52 | RowBox[{"1", "+", RowBox[{"(*", "*)"}], "a"}] 53 | ``` 54 | 55 | ``` 56 | RowBox[{"1", "+", RowBox[{"(*", "*)"}], SqrtBox["a"]}] 57 | ``` 58 | 59 | tree structure of tokens 60 | 61 | no type information 62 | 63 | no Implicit tokens 64 | 65 | Trivia is kept 66 | 67 | 68 | 69 | ## What is trivia? 70 | 71 | Taken from: 72 | https://github.com/dotnet/roslyn/blob/master/docs/wiki/Roslyn-Overview.md#syntax-trivia 73 | 74 | comments 75 | 76 | whitespace 77 | 78 | newlines 79 | 80 | trivia is only ever RIFFLED between tokens, never at the beginning or end 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | # concrete 92 | 93 | ``` 94 | InfixNode[Plus, { 95 | LeafNode[Integer, "1", <||>], 96 | LeafNode[Token`Plus, "+", <||>], 97 | LeafNode[Token`Comment, "(**)", <||>], 98 | LeafNode[Symbol, "a", <||>] }, <||>] 99 | 100 | InfixNode[Plus, { 101 | LeafNode[Integer, "1", <||>], 102 | LeafNode[Token`Plus, "+", <||>], 103 | LeafNode[Token`Comment, "(**)", <||>], 104 | LeafNode[SqrtBox, "a", <||>] }, <||>] 105 | ``` 106 | 107 | Trivia is kept 108 | 109 | type information is added 110 | type information is the wrapper like InfixNode[Plus, ...] 111 | 112 | and also type information is all of the riffled tokens InfixNode[Plus, { 1, +, 2, \[ImplicitPlus], 3 }] 113 | 114 | 115 | 116 | Implicit tokens are added 117 | 118 | 119 | ## What are Implicit tokens? 120 | 121 | 122 | when parsing ;; it is convenient to remember the implicit 1 ;; All 123 | 124 | when parsing a; ; it is convenient to remember the implicit a ; Null ; Null 125 | 126 | 127 | 128 | implicit Times 129 | 130 | when parsing a b it is convenient to remember the implicit a ImplicitTimes b 131 | 132 | 133 | 134 | concrete syntax is everything 135 | 136 | concrete syntax has CallNode[{head, comment}, {child1}] 137 | 138 | concrete syntax has InfixNode[Plus, {1, +, comment, 1}] 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | # aggregate 154 | 155 | ``` 156 | InfixNode[Plus, { 157 | LeafNode[Integer, "1", <||>], 158 | LeafNode[Token`Plus, "+", <||>], 159 | LeafNode[Symbol, "a", <||>] }, <||>] 160 | 161 | InfixNode[Plus, { 162 | LeafNode[Integer, "1", <||>], 163 | LeafNode[Token`Plus, "+", <||>], 164 | LeafNode[SqrtBox, "a", <||>] }, <||>] 165 | ``` 166 | 167 | type information is kept 168 | 169 | Implicit tokens are kept 170 | 171 | Trivia is removed 172 | 173 | aggregate syntax 174 | 175 | aggregate removes comments, whitespace, and newlines 176 | 177 | aggregate syntax has CallNode[head, {child1}] 178 | 179 | aggregate syntax has InfixNode[Plus, {1, +, 1}] 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | # abstract 196 | 197 | ``` 198 | CallNode[LeafNode[Symbol, "Plus", <||>], { 199 | LeafNode[Integer, "1", <||>], 200 | LeafNode[Symbol, "a", <||>] }, <||>] 201 | 202 | CallNode[LeafNode[Symbol, "Plus", <||>], { 203 | LeafNode[Integer, "1", <||>], 204 | LeafNode[SqrtBox, "a", <||>] }, <||>] 205 | ``` 206 | 207 | everything is a Call 208 | 209 | type information is lost because everything is a CallNode 210 | 211 | Implicit tokens are converted to actual tokens 212 | 213 | abstract syntax 214 | 215 | abstract syntax has CallNode[head, {child1}] 216 | 217 | abstract syntax has CallNode[Plus, {1, 1}] 218 | 219 | 220 | 221 | 222 | 223 | # further work that could be done 224 | 225 | * removing line continuations 226 | 227 | * converting characters (e.g., \[Infinity] and \[Degree]) to symbols (e.g., Infinity and Degree) 228 | 229 | * removing \< \> from strings 230 | 231 | * more? 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | -------------------------------------------------------------------------------- /Tests/Error.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start Error.mt =====\n"] 2 | 3 | path = FileNameJoin[{DirectoryName[$CurrentTestSource], "CodeParserTestUtils"}] 4 | PrependTo[$Path, path] 5 | 6 | Needs["CodeParserTestUtils`"] 7 | 8 | Needs["CodeParser`"] 9 | Needs["CodeParser`Utils`"] 10 | 11 | 12 | 13 | 14 | (* 15 | 16 | Tests related to Error.wl: 17 | 18 | Handling unterminated groups 19 | Handling unterminated tokens 20 | 21 | Chunks 22 | 23 | etc. 24 | 25 | *) 26 | 27 | 28 | TestMatch[ 29 | CodeConcreteParse["\"\n", SourceConvention -> "LineColumn"] 30 | , 31 | ContainerNode[String, { 32 | ErrorNode[Token`Error`UnterminatedString, "\"", <|Source -> {{1, 1}, {1, 2}}|>]}, _] 33 | , 34 | TestID->"Error-20210118-C0F5T5" 35 | ] 36 | 37 | TestMatch[ 38 | CodeConcreteParse["\"\r", SourceConvention -> "LineColumn"] 39 | , 40 | ContainerNode[String, { 41 | ErrorNode[Token`Error`UnterminatedString, "\"", <|Source -> {{1, 1}, {1, 2}}|>]}, _] 42 | , 43 | TestID->"Error-20210118-R8T2P0" 44 | ] 45 | 46 | TestMatch[ 47 | CodeConcreteParse["\"\r\n", SourceConvention -> "LineColumn"] 48 | , 49 | ContainerNode[String, { 50 | ErrorNode[Token`Error`UnterminatedString, "\"", <|Source -> {{1, 1}, {1, 2}}|>]}, _] 51 | , 52 | TestID->"Error-20210118-T0V9F9" 53 | ] 54 | 55 | TestMatch[ 56 | CodeConcreteParse["\"\n", SourceConvention -> "SourceCharacterIndex"] 57 | , 58 | ContainerNode[String, { 59 | ErrorNode[Token`Error`UnterminatedString, "\"\n", <|Source -> {1, 2}|>]}, _] 60 | , 61 | TestID->"Error-20210118-O5Q6Y0" 62 | ] 63 | 64 | TestMatch[ 65 | CodeConcreteParse["\"\r", SourceConvention -> "SourceCharacterIndex"] 66 | , 67 | ContainerNode[String, { 68 | ErrorNode[Token`Error`UnterminatedString, "\"\r", <|Source -> {1, 2}|>]}, _] 69 | , 70 | TestID->"Error-20210118-W7L4K8" 71 | ] 72 | 73 | TestMatch[ 74 | CodeConcreteParse["\"\r\n", SourceConvention -> "SourceCharacterIndex"] 75 | , 76 | ContainerNode[String, { 77 | ErrorNode[Token`Error`UnterminatedString, "\"\r\n", <|Source -> {1, 3}|>]}, _] 78 | , 79 | TestID->"Error-20210118-R9E3S6" 80 | ] 81 | 82 | 83 | Test[ 84 | CodeConcreteParse["\\|110000"] 85 | , 86 | ContainerNode[String, { 87 | ErrorNode[Token`Error`UnhandledCharacter, "\\|110000", <|Source -> {{1, 1}, {1, 9}}|>]}, <|Source -> {{1, 1}, {1, 9}}|>] 88 | , 89 | TestID->"Error-20211104-P0L8Y0" 90 | ] 91 | 92 | Test[ 93 | CodeConcreteParse["\\|FFFFFF"] 94 | , 95 | ContainerNode[String, { 96 | ErrorNode[Token`Error`UnhandledCharacter, "\\|FFFFFF", <|Source -> {{1, 1}, {1, 9}}|>]}, <|Source -> {{1, 1}, {1, 9}}|>] 97 | , 98 | TestID->"Error-20211104-Q2O1J4" 99 | ] 100 | 101 | 102 | Test[ 103 | CodeConcreteParse["\\\\[Alpa]"] 104 | , 105 | ContainerNode[String, { 106 | ErrorNode[Token`Error`UnhandledCharacter, "\\\\", <|Source -> {{1, 1}, {1, 3}}|>], 107 | GroupNode[GroupSquare, { 108 | LeafNode[Token`OpenSquare, "[", <|Source -> {{1, 3}, {1, 4}}|>], 109 | LeafNode[Symbol, "Alpa", <|Source -> {{1, 4}, {1, 8}}|>], 110 | LeafNode[Token`CloseSquare, "]", <|Source -> {{1, 8}, {1, 9}}|>]}, <|Source -> {{1, 3}, {1, 9}}|>]}, <|SyntaxIssues -> { 111 | 112 | SyntaxIssue["UnrecognizedLongName", "Unrecognized longname: ``\\\\[Alpa]``.", "Error", <|Source -> {{1, 1}, {1, 9}}, ConfidenceLevel -> 0.75, CodeActions -> {CodeAction["Replace with ``\\\\[Alpha]``", ReplaceText, <|Source -> {{1, 1}, {1, 9}}, "ReplacementText" -> "\\\\[Alpha]"|>]}, "AdditionalDescriptions" -> {"``Alpa`` is not a valid long name."}|>]}, Source -> {{1, 1}, {1, 9}}|>] 113 | , 114 | TestID->"Error-20220709-M4Y7Z3" 115 | ] 116 | 117 | (* 118 | no warning 119 | *) 120 | Test[ 121 | CodeConcreteParse["RegularExpression[\"\\\\[a-zA-Z0-9]+\\\\]\"]"] 122 | , 123 | ContainerNode[String, { 124 | CallNode[{LeafNode[Symbol, "RegularExpression", <|Source -> {{1, 1}, {1, 18}}|>]}, 125 | GroupNode[GroupSquare, { 126 | LeafNode[Token`OpenSquare, "[", <|Source -> {{1, 18}, {1, 19}}|>], 127 | LeafNode[String, "\"\\\\[a-zA-Z0-9]+\\\\]\"", <|Source -> {{1, 19}, {1, 38}}|>], 128 | LeafNode[Token`CloseSquare, "]", <|Source -> {{1, 38}, {1, 39}}|>]}, <|Source -> {{1, 18}, {1, 39}}|>], <|Source -> {{1, 1}, {1, 39}}|>]}, <|Source -> {{1, 1}, {1, 39}}|>] 129 | , 130 | TestID->"Error-20220711-I6O1H4" 131 | ] 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/quirks.rs: -------------------------------------------------------------------------------- 1 | use std::{cell::Cell, fmt::Debug}; 2 | 3 | thread_local! { 4 | // TODO(cleanup): Don't store these settings using error-prone global state. 5 | static QUIRK_SETTINGS: Cell = 6 | Cell::new(QuirkSettings::const_default()); 7 | } 8 | 9 | 10 | #[derive(Debug, Copy, Clone, PartialEq)] 11 | pub struct QuirkSettings { 12 | /// "InfixBinaryAt" quirk 13 | /// 14 | /// 15 | /// The kernel parses `a<>StringJoin@b` as `StringJoin[a, b]` 16 | /// 17 | /// Most infix operators can be used with this syntax. 18 | /// Notably, SameQ and UnsameQ do NOT work with this syntax. 19 | /// 20 | /// *Related bugs: 365013* 21 | pub infix_binary_at: bool, 22 | 23 | /// "FlattenTimes" quirk 24 | /// 25 | /// In 12.1 and before: 26 | /// 27 | /// * `a / b / c` is parsed as `Times[a, Power[b, -1], Power[c, -1]]` 28 | /// * `-a / b` is parsed as `Times[-1, a, Power[b, -1]]` 29 | /// 30 | /// In 12.2 and after: 31 | /// 32 | /// * `a / b / c` is parsed as `Times[Times[a, Power[b, -1]], Power[c, -1]]` 33 | /// * `-a / b` is parsed as `Times[Times[-1, a], Power[b, -1]]` 34 | /// 35 | /// TODO: when targeting v12.2 as a minimum, remove this quirk 36 | /// 37 | /// *Related bugs: 57064, 139531, 153875, 160919* 38 | pub flatten_times: bool, 39 | 40 | /// "OldAtAtAt" quirk 41 | /// 42 | /// Changed in 13.1: `@@@` 43 | /// 44 | /// In 13.0 and before: 45 | /// 46 | /// `a @@@ b` parsed as `Apply[a, b, {1}]` 47 | /// 48 | /// In 13.1 and after: 49 | /// 50 | /// `a @@@ b` parses as `MapApply[a, b]` 51 | pub old_at_at_at: bool, 52 | } 53 | 54 | pub enum Quirk { 55 | /// "InfixBinaryAt" quirk 56 | /// 57 | /// 58 | /// The kernel parses `a<>StringJoin@b` as `StringJoin[a, b]` 59 | /// 60 | /// Most infix operators can be used with this syntax. 61 | /// Notably, SameQ and UnsameQ do NOT work with this syntax. 62 | /// 63 | /// *Related bugs: 365013* 64 | InfixBinaryAt, 65 | 66 | /// "FlattenTimes" quirk 67 | /// 68 | /// In 12.1 and before: 69 | /// 70 | /// * `a / b / c` is parsed as `Times[a, Power[b, -1], Power[c, -1]]` 71 | /// * `-a / b` is parsed as `Times[-1, a, Power[b, -1]]` 72 | /// 73 | /// In 12.2 and after: 74 | /// 75 | /// * `a / b / c` is parsed as `Times[Times[a, Power[b, -1]], Power[c, -1]]` 76 | /// * `-a / b` is parsed as `Times[Times[-1, a], Power[b, -1]]` 77 | /// 78 | /// TODO: when targeting v12.2 as a minimum, remove this quirk 79 | /// 80 | /// *Related bugs: 57064, 139531, 153875, 160919* 81 | FlattenTimes, 82 | 83 | /// "OldAtAtAt" quirk 84 | /// 85 | /// Changed in 13.1: `@@@` 86 | /// 87 | /// In 13.0 and before: 88 | /// 89 | /// `a @@@ b` parsed as `Apply[a, b, {1}]` 90 | /// 91 | /// In 13.1 and after: 92 | /// 93 | /// `a @@@ b` parses as `MapApply[a, b]` 94 | OldAtAtAt, 95 | } 96 | 97 | impl QuirkSettings { 98 | pub const fn const_default() -> Self { 99 | Self { 100 | infix_binary_at: true, 101 | flatten_times: false, 102 | old_at_at_at: false, 103 | } 104 | } 105 | 106 | pub fn flatten_times(self, value: bool) -> Self { 107 | QuirkSettings { 108 | flatten_times: value, 109 | ..self 110 | } 111 | } 112 | 113 | pub fn infix_binary_at(self, value: bool) -> Self { 114 | QuirkSettings { 115 | infix_binary_at: value, 116 | ..self 117 | } 118 | } 119 | 120 | pub fn old_at_at_at(self, value: bool) -> Self { 121 | QuirkSettings { 122 | old_at_at_at: value, 123 | ..self 124 | } 125 | } 126 | } 127 | 128 | impl Default for QuirkSettings { 129 | fn default() -> Self { 130 | Self::const_default() 131 | } 132 | } 133 | 134 | pub fn set_quirks(quirks: QuirkSettings) { 135 | QUIRK_SETTINGS.set(quirks); 136 | } 137 | 138 | pub(crate) fn is_quirk_enabled(quirk: Quirk) -> bool { 139 | let settings = QUIRK_SETTINGS.get(); 140 | 141 | match quirk { 142 | Quirk::InfixBinaryAt => settings.infix_binary_at, 143 | Quirk::FlattenTimes => settings.flatten_times, 144 | Quirk::OldAtAtAt => settings.old_at_at_at, 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /Tests/TokenErrors.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start TokenErrors.mt =====\n"] 2 | 3 | Needs["CodeParser`"] 4 | 5 | (* 6 | UnhandledCharacter: 7 | *) 8 | 9 | Test[ 10 | CodeTokenize["\\[SkeletonIndicator]"] 11 | , 12 | {ErrorNode[Token`Error`UnhandledCharacter, "\\[SkeletonIndicator]", <|Source -> {{1, 1}, {1, 21}}|>]} 13 | , 14 | TestID->"TokenErrors-20190520-B1H0A6" 15 | ] 16 | 17 | Test[ 18 | CodeTokenize["\\\""] 19 | , 20 | {ErrorNode[Token`Error`UnhandledCharacter, "\\\"", <|Source -> {{1, 1}, {1, 3}}|>]} 21 | , 22 | TestID->"TokenErrors-20190816-G5Q8B5" 23 | ] 24 | 25 | Test[ 26 | CodeTokenize["a::\\\""] 27 | , 28 | { 29 | LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>], 30 | LeafNode[Token`ColonColon, "::", <|Source -> {{1, 2}, {1, 4}}|>], 31 | ErrorNode[Token`Error`UnhandledCharacter, "\\\"", <|Source -> {{1, 4}, {1, 6}}|>] } 32 | , 33 | TestID->"TokenErrors-20190520-L5N7B0" 34 | ] 35 | 36 | 37 | 38 | (* 39 | UnterminatedComment: 40 | *) 41 | 42 | Test[ 43 | CodeTokenize["(*"] 44 | , 45 | {ErrorNode[Token`Error`UnterminatedComment, "(*", <|Source -> {{1, 1}, {1, 3}}|>]} 46 | , 47 | TestID->"TokenErrors-20190520-C8W1P2" 48 | ] 49 | 50 | 51 | 52 | 53 | (* 54 | ExpectedAlphaOrDollar: 55 | *) 56 | 57 | 58 | Test[ 59 | CodeTokenize["aaa`1"] 60 | , 61 | { 62 | ErrorNode[Token`Error`ExpectedLetterlike, "aaa`", <|Source -> {{1, 1}, {1, 5}}|>], 63 | LeafNode[Integer, "1", <|Source -> {{1, 5}, {1, 6}}|>] 64 | } 65 | , 66 | TestID->"TokenErrors-20190520-H9P0H9" 67 | ] 68 | 69 | 70 | 71 | 72 | (* 73 | EmptyString: 74 | *) 75 | 76 | Test[ 77 | CodeTokenize["a::"] 78 | , 79 | { 80 | LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>], 81 | LeafNode[Token`ColonColon, "::", <|Source -> {{1, 2}, {1, 4}}|>] } 82 | , 83 | TestID->"TokenErrors-20190520-R2P3A3" 84 | ] 85 | 86 | Test[ 87 | CodeTokenize["a>>"] 88 | , 89 | { 90 | LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>], 91 | LeafNode[Token`GreaterGreater, ">>", <|Source -> {{1, 2}, {1, 4}}|>] } 92 | , 93 | TestID->"TokenErrors-20190520-M3H7E9" 94 | ] 95 | 96 | 97 | (* 98 | UnterminatedString: 99 | *) 100 | 101 | Test[ 102 | CodeTokenize["\""] 103 | , 104 | {ErrorNode[Token`Error`UnterminatedString, "\"", <|Source -> {{1, 1}, {1, 2}}|>]} 105 | , 106 | TestID->"TokenErrors-20190520-L6N6S8" 107 | ] 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | (* 118 | InvalidBase: 119 | *) 120 | 121 | Test[ 122 | CodeTokenize["37^^2"] 123 | , 124 | {ErrorNode[Token`Error`Number, "37^^2", <|Source -> {{1, 1}, {1, 6}}|>]} 125 | , 126 | TestID->"TokenErrors-20190520-Q9B9R6" 127 | ] 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | (* 137 | ExpectedDigitOrAlpha: 138 | *) 139 | 140 | Test[ 141 | CodeTokenize["2^^3"] 142 | , 143 | {ErrorNode[Token`Error`Number, "2^^3", <|Source -> {{1, 1}, {1, 5}}|>]} 144 | , 145 | TestID->"TokenErrors-20190520-B7G4V4" 146 | ] 147 | 148 | 149 | Test[ 150 | CodeTokenize["2^^@"] 151 | , 152 | { 153 | ErrorNode[Token`Error`Number, "2^^", <|Source -> {{1, 1}, {1, 4}}|>], 154 | LeafNode[Token`At, "@", <|Source -> {{1, 4}, {1, 5}}|>]} 155 | , 156 | TestID->"TokenErrors-20190520-J3Q2S7" 157 | ] 158 | 159 | 160 | 161 | 162 | (* 163 | ExpectedAccuracy: 164 | *) 165 | 166 | Test[ 167 | CodeTokenize["1.2``->3"] 168 | , 169 | { 170 | ErrorNode[Token`Error`Number, "1.2``-", <|Source -> {{1, 1}, {1, 7}}|>], 171 | LeafNode[Token`Greater, ">", <|Source -> {{1, 7}, {1, 8}}|>], 172 | LeafNode[Integer, "3", <|Source -> {{1, 8}, {1, 9}}|>]} 173 | , 174 | TestID->"TokenErrors-20190520-B2J9I4" 175 | ] 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | (* 185 | ExpectedExponent: 186 | *) 187 | 188 | Test[ 189 | CodeTokenize["123*^"] 190 | , 191 | {ErrorNode[Token`Error`Number, "123*^", <|Source -> {{1, 1}, {1, 6}}|>]} 192 | , 193 | TestID->"TokenErrors-20190520-L1J8C1" 194 | ] 195 | 196 | 197 | 198 | 199 | 200 | (* 201 | ExpectedEqual: 202 | *) 203 | 204 | Test[ 205 | CodeTokenize["a ^: f"] 206 | , 207 | { 208 | LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>], 209 | LeafNode[Whitespace, " ", <|Source -> {{1, 2}, {1, 3}}|>], 210 | ErrorNode[Token`Error`ExpectedEqual, "^:", <|Source -> {{1, 3}, {1, 5}}|>], 211 | LeafNode[Whitespace, " ", <|Source -> {{1, 5}, {1, 6}}|>], 212 | LeafNode[Symbol, "f", <|Source -> {{1, 6}, {1, 7}}|>] } 213 | , 214 | TestID->"TokenErrors-20190520-M3N7T5" 215 | ] 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | -------------------------------------------------------------------------------- /cpp/include/Diagnostics.h: -------------------------------------------------------------------------------- 1 | 2 | #include "WolframLibrary.h" 3 | #undef True 4 | #undef False 5 | 6 | #include 7 | #include 8 | 9 | 10 | EXTERN_C DLLEXPORT void DiagnosticsLog(std::string s); 11 | 12 | EXTERN_C DLLEXPORT void DiagnosticsMarkTime(); 13 | 14 | EXTERN_C DLLEXPORT void DiagnosticsLogTime(); 15 | 16 | EXTERN_C DLLEXPORT void DiagnosticsPrint(); 17 | 18 | extern int ByteBuffer_size; 19 | 20 | extern int ByteDecoder_PrintableCount; 21 | 22 | extern int ByteDecoder_LineFeedCount; 23 | 24 | extern int ByteDecoder_TabCount; 25 | 26 | extern int ByteDecoder_CarriageReturnCount; 27 | 28 | extern int ByteDecoder_1ByteCount; 29 | 30 | extern int ByteDecoder_2ByteCount; 31 | 32 | extern int ByteDecoder_3ByteCount; 33 | 34 | extern int ByteDecoder_4ByteCount; 35 | 36 | extern int ByteDecoder_FFCount; 37 | 38 | extern int ByteDecoder_Incomplete1ByteCount; 39 | 40 | extern int CharacterDecoder_UnescapedCount; 41 | 42 | extern int CharacterDecoder_LineContinuationCount; 43 | 44 | extern int CharacterDecoder_LongNameCount; 45 | 46 | extern int CharacterDecoder_4HexCount; 47 | 48 | extern int CharacterDecoder_2HexCount; 49 | 50 | extern int CharacterDecoder_6HexCount; 51 | 52 | extern int CharacterDecoder_OctalCount; 53 | 54 | extern int CharacterDecoder_StringMetaBackspaceCount; 55 | 56 | extern int CharacterDecoder_StringMetaFormFeedCount; 57 | 58 | extern int CharacterDecoder_StringMetaLineFeedCount; 59 | 60 | extern int CharacterDecoder_StringMetaCarriageReturnCount; 61 | 62 | extern int CharacterDecoder_StringMetaTabCount; 63 | 64 | extern int CharacterDecoder_StringMetaDoubleQuoteCount; 65 | 66 | extern int CharacterDecoder_StringMetaBackslashCount; 67 | 68 | extern int CharacterDecoder_StringMetaOpenCount; 69 | 70 | extern int CharacterDecoder_StringMetaCloseCount; 71 | 72 | extern int CharacterDecoder_LinearSyntaxBangCount; 73 | 74 | extern int CharacterDecoder_LinearSyntaxPercentCount; 75 | 76 | extern int CharacterDecoder_LinearSyntaxAmpCount; 77 | 78 | extern int CharacterDecoder_LinearSyntaxOpenParenCount; 79 | 80 | extern int CharacterDecoder_LinearSyntaxCloseParenCount; 81 | 82 | extern int CharacterDecoder_LinearSyntaxStarCount; 83 | 84 | extern int CharacterDecoder_LinearSyntaxPlusCount; 85 | 86 | extern int CharacterDecoder_LinearSyntaxSlashCount; 87 | 88 | extern int CharacterDecoder_LinearSyntaxAtCount; 89 | 90 | extern int CharacterDecoder_LinearSyntaxCaretCount; 91 | 92 | extern int CharacterDecoder_LinearSyntaxUnderscoreCount; 93 | 94 | extern int CharacterDecoder_LinearSyntaxBacktickCount; 95 | 96 | extern int CharacterDecoder_LinearSyntaxSpaceCount; 97 | 98 | extern int CharacterDecoder_UnhandledCount; 99 | 100 | extern int Tokenizer_StringFastCount; 101 | 102 | extern int Tokenizer_StringSlowCount; 103 | 104 | extern int Tokenizer_CommentCount; 105 | 106 | extern int Tokenizer_NewlineCount; 107 | 108 | extern int Tokenizer_SymbolCount; 109 | 110 | extern int Tokenizer_OpenSquareCount; 111 | 112 | extern int Tokenizer_OpenCurlyCount; 113 | 114 | extern int Tokenizer_WhitespaceCount; 115 | 116 | extern int Tokenizer_CommaCount; 117 | 118 | extern int Tokenizer_CloseSquareCount; 119 | 120 | extern int Tokenizer_CloseCurlyCount; 121 | 122 | extern int Tokenizer_CloseParenCount; 123 | 124 | extern int Tokenizer_MinusGreaterCount; 125 | 126 | extern int Tokenizer_NumberCount; 127 | 128 | extern int Tokenizer_ColonGreaterCount; 129 | 130 | extern int Tokenizer_MinusCount; 131 | 132 | extern int Tokenizer_OpenParenCount; 133 | 134 | extern int Tokenizer_HashCount; 135 | 136 | extern int Tokenizer_AmpCount; 137 | 138 | extern int Tokenizer_PlusCount; 139 | 140 | extern int Node_LeafNodeCount; 141 | 142 | extern int Node_ErrorNodeCount; 143 | 144 | extern int Node_UnterminatedTokenErrorNeedsReparseNodeCount; 145 | 146 | extern int Node_SyntaxErrorNodeCount; 147 | 148 | //extern int Node_OperatorNodeCount; 149 | 150 | extern int Node_AbortNodeCount; 151 | 152 | extern int Node_PrefixNodeCount; 153 | 154 | extern int Node_BinaryNodeCount; 155 | 156 | extern int Node_InfixNodeCount; 157 | 158 | extern int Node_TernaryNodeCount; 159 | 160 | extern int Node_PostfixNodeCount; 161 | 162 | extern int Node_PrefixBinaryNodeCount; 163 | 164 | extern int Node_GroupNodeCount; 165 | 166 | extern int Node_CompoundNodeCount; 167 | 168 | extern int Node_GroupMissingCloserNodeCount; 169 | 170 | extern int Node_UnterminatedGroupNeedsReparseNodeCount; 171 | 172 | extern int Node_CallNodeCount; 173 | -------------------------------------------------------------------------------- /Tests/AbstractSyntaxIssues.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start AbstractSyntaxIssues.mt =====\n"] 2 | 3 | Needs["CodeParser`"] 4 | 5 | (* 6 | Package: 7 | *) 8 | 9 | TestMatch[ 10 | FirstCase[CodeParse["BeginPackage[\"Foo`\"]", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)], 11 | KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}] 12 | , 13 | KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["Package", _, _, _]}] 14 | , 15 | TestID->"AbstractSyntaxIssues-20190520-P2N0D7" 16 | ] 17 | 18 | TestMatch[ 19 | FirstCase[CodeParse["EndPackage[]", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)], 20 | KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}] 21 | , 22 | KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["Package", _, _, _]}] 23 | , 24 | TestID->"AbstractSyntaxIssues-20190520-M6K6Y5" 25 | ] 26 | 27 | TestMatch[ 28 | FirstCase[CodeParse["Begin[\"Foo`\"]", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)], 29 | KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}] 30 | , 31 | KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["Package", _, _, _]}] 32 | , 33 | TestID->"AbstractSyntaxIssues-20190520-F7B2Y5" 34 | ] 35 | 36 | TestMatch[ 37 | FirstCase[CodeParse["End[]", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)], 38 | KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}] 39 | , 40 | KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["Package", _, _, _]}] 41 | , 42 | TestID->"AbstractSyntaxIssues-20190520-T0U9L8" 43 | ] 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | (* 53 | StrangeCall: 54 | *) 55 | 56 | (* 57 | TestMatch[ 58 | FirstCase[CodeParse[" %[] ", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)], 59 | KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}] 60 | , 61 | KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["StrangeCall", _, _, _]}] 62 | , 63 | TestID->"AbstractSyntaxIssues-20190520-X5H0W9" 64 | ] 65 | *) 66 | 67 | TestMatch[ 68 | FirstCase[CodeParse[" \\!\\(x\\)[] ", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)], 69 | KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}] 70 | , 71 | KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["StrangeCall", _, _, _]}] 72 | , 73 | TestID->"AbstractSyntaxIssues-20190520-V9T6S1" 74 | ] 75 | 76 | TestMatch[ 77 | FirstCase[CodeParse[" \\(x\\)[] ", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)], 78 | KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}] 79 | , 80 | KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["StrangeCall", _, _, _]}] 81 | , 82 | TestID->"AbstractSyntaxIssues-20190520-I7T4W0" 83 | ] 84 | 85 | 86 | TestMatch[ 87 | FirstCase[CodeParse[" x--[] ", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)], 88 | KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}] 89 | , 90 | KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["StrangeCall", _, _, _]}] 91 | , 92 | TestID->"AbstractSyntaxIssues-20190520-I3X6I7" 93 | ] 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | (* 102 | SyntaxUndocumentedMessageName: 103 | *) 104 | 105 | TestMatch[ 106 | FirstCase[CodeParse[" a::b::c::d ", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)], 107 | KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}] 108 | , 109 | KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["SyntaxUndocumentedMessageName", _, _, _]}] 110 | , 111 | TestID->"AbstractSyntaxIssues-20190520-F4W6X1" 112 | ] 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | (* 121 | StrangeCallSlotSequence: 122 | *) 123 | 124 | TestMatch[ 125 | FirstCase[CodeParse[" ##2[] ", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)], 126 | KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}] 127 | , 128 | KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["StrangeCallSlotSequence", _, _, _]}] 129 | , 130 | TestID->"AbstractSyntaxIssues-20190520-O7G6C1" 131 | ] 132 | 133 | 134 | 135 | 136 | 137 | (* 138 | NotContiguous: 139 | *) 140 | (* 141 | Not handled by parser any more 142 | 143 | handled by syntax highlighting 144 | *) 145 | (*TestMatch[ 146 | FirstCase[CodeParse[" a[[] ] ", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)], 147 | KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}] 148 | , 149 | KeyValuePattern[AbstractSyntaxIssues -> {FormatIssue["NotContiguous", _, _, _]}] 150 | , 151 | TestID->"AbstractSyntaxIssues-20190520-U1R2G5" 152 | ] 153 | 154 | *) 155 | 156 | 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /docs/compatibility.md: -------------------------------------------------------------------------------- 1 | 2 | # Compatibility 3 | 4 | 5 | ## Source Compatibility 6 | 7 | CodeParser has source compatibility with 11.0+ 8 | 9 | 10 | ## FrontEnd Compatibility 11 | 12 | Any source .wl files that have `(* ::Package::"Tags" *)` or `(* ::Code::Initialization::"Tags" *)` syntax may only be edited with a version 12.3+ FE 13 | 14 | 15 | ## Runtime Compatibility 16 | 17 | Building with Wolfram 11.0+ and running with the same version should always work. 18 | 19 | Building and running with different Wolfram versions will not always work. 20 | 21 | Building with the latest Wolfram version (which is 13.1) is only guaranteed to work back to Wolfram 13.1+. 22 | 23 | This is due to various issues including LibraryLink versioning and rpath changes on MacOSX. 24 | 25 | 26 | ## C++ Compiler Compatibility 27 | 28 | CodeParser uses C++17 features and requires a compiler that can support at least C++17. 29 | 30 | 31 | ## Wolfram Compiler Compatibility 32 | 33 | The ExprLibrary built by the Wolfram Compiler requires 13.1+. 34 | 35 | 36 | ## CCompilerDriver libraries 37 | 38 | This table show the value of -mmacosx-version-min for libraries generated by CCompilerDriver: 39 | 40 | | Wolfram version | MacOSX-x86-64 | MacOSX-ARM64 | 41 | | --------------- | ------------- | ------------ | 42 | | 12.0 | 10.10 | N/A | 43 | | 12.1 | 10.10 | N/A | 44 | | 12.2 | 10.12 | 11.0 | 45 | | 12.3 | 10.14 | 11.0 | 46 | | 13.0 | 10.14 | 11.0 | 47 | | 13.1 | 10.14 | 11.0 | 48 | | 13.2 | 10.15 | 11.0 | 49 | | 13.3 | 11.00 | 11.0 | 50 | 51 | CodeParser sets the same macosx-version-min in order to achieve maximum compatibility with libraries generated by CCompilerDriver. 52 | 53 | 54 | ## Earlier Versions 55 | 56 | Wolfram versions before 12.1 must build from sources to use CodeParser. 57 | 58 | Manually modify WolframVersion in PacletInfo.wl to allow the paclet to be used. 59 | 60 | The message that you get when you install the paclet: 61 | ``` 62 | The paclet CodeParser was successfully installed. 63 | ``` 64 | does not necessarily mean that the paclet can be used. 65 | 66 | Make sure that the correct WolframVersion is specified. 67 | 68 | 69 | ## LibraryLink 70 | 71 | CodeParser uses [LibraryLink](https://reference.wolfram.com/language/guide/LibraryLink.html). 72 | 73 | The version of LibraryLink was updated in version 13.1: 74 | 75 | | Wolfram version | LibraryLink version | 76 | | --------------- | ------------------- | 77 | | 12.0 | 5 | 78 | | 12.1 | 6 | 79 | | 12.2 | 6 | 80 | | 12.3 | 6 | 81 | | 13.0 | 6 | 82 | | 13.1 | 7 | 83 | 84 | The LibraryLink version is defined in the header file WolframLibrary.h 85 | 86 | In the [LibraryLink documentation](https://reference.wolfram.com/language/LibraryLink/tutorial/LibraryStructure.html#280210622), it is described how backwards-compatibility is not maintained: 87 | 88 | >However, you should note that you cannot use a library built with a newer version of the header into an older version of the Wolfram Language. 89 | 90 | So LibraryLink defines Wolfram version 13.1 as a minimum that can run with libraries built with the current Wolfram version. 91 | 92 | But to be clear, LibraryLink does have forwards-compatibility. For example, libraries built with LibraryLink version 6 will work with LibraryLink version 7. 93 | 94 | The CodeParser paclets distributed by Wolfram Research on the public paclet server are built with LibraryLink version 6 in order to guarantee compatibility with all versions of Wolfram System from 12.1 onward. 95 | 96 | 97 | ## rpath (MacOSX) 98 | 99 | CodeParser uses MathLink. 100 | 101 | The mathlink rpath was changed in version 12.1: 102 | 103 | | Wolfram version | mathlink rpath | 104 | | --------------- | ------------------------------------------------------------------------ | 105 | | 12.0 | @executable_path/../Frameworks/mathlink.framework/Versions/4.36/mathlink | 106 | | 12.1 | @rpath/mathlink.framework/Versions/4/mathlink | 107 | 108 | This means that CodeParser.dylib built with 12.1+ will not work with previous versions. 109 | 110 | It is possible to use `install_name_tool` to change the rpath, but it is recommended to build from sources. 111 | -------------------------------------------------------------------------------- /Tests/Scoping.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start Scoping.mt =====\n"] 2 | 3 | Needs["CodeParser`"] 4 | Needs["CodeParser`Scoping`"] 5 | Needs["CodeParser`Utils`"] 6 | Needs["CodeParser`Library`"] 7 | 8 | 9 | ast = CodeParse["Module[{x, y}, Block[{x, z}, x]]"]; 10 | 11 | (* 12 | Test that x is not marked as unused in the Module 13 | 14 | bug 414554 15 | *) 16 | Test[ 17 | ScopingData[ast] 18 | , 19 | { 20 | scopingDataObject[{{1, 30}, {1, 31}}, {"Module", "Block"}, {"shadowed"}, "x"], 21 | scopingDataObject[{{1, 23}, {1, 24}}, {"Module", "Block"}, {"shadowed"}, "x"], 22 | scopingDataObject[{{1, 9}, {1, 10}}, {"Module"}, {}, "x"], 23 | scopingDataObject[{{1, 26}, {1, 27}}, {"Block"}, {"unused"}, "z"], 24 | scopingDataObject[{{1, 12}, {1, 13}}, {"Module"}, {"unused"}, "y"]} 25 | , 26 | TestID->"Scoping-20210921-U4U6T2" 27 | ] 28 | 29 | 30 | 31 | 32 | 33 | 34 | box = RowBox[{SuperscriptBox["u", 35 | TagBox[RowBox[{"(", RowBox[{"dx_", ",", "0"}], ")"}], 36 | Derivative]], "\[RuleDelayed]", "a"}]; 37 | 38 | cst = CodeConcreteParseBox[box]; 39 | 40 | Test[ 41 | cst, 42 | ContainerNode[Box, { 43 | BinaryNode[RuleDelayed, { 44 | BoxNode[SuperscriptBox, { 45 | LeafNode[Symbol, "u", <| Source -> {1, 1, 1} |>], 46 | BoxNode[TagBox, { 47 | GroupNode[ 48 | GroupParen, 49 | { 50 | LeafNode[Token`OpenParen, "(", <| Source -> {1, 1, 2, 1, 1, 1} |>], 51 | InfixNode[Comma, { 52 | CompoundNode[ 53 | PatternBlank, 54 | { 55 | LeafNode[Symbol, "dx", <| Source -> {1, 1, 2, 1, 1, 2, 1, 1} |>], 56 | LeafNode[Token`Under, "_", <| Source -> {1, 1, 2, 1, 1, 2, 1, 1} |>] 57 | }, 58 | <| Source -> {1, 1, 2, 1, 1, 2, 1, 1} |> 59 | ], 60 | LeafNode[Token`Comma, ",", <| Source -> {1, 1, 2, 1, 1, 2, 1, 2} |>], 61 | LeafNode[Integer, "0", <| Source -> {1, 1, 2, 1, 1, 2, 1, 3} |>] 62 | }, 63 | <| Source -> {1, 1, 2, 1, 1, 2} |> 64 | ], 65 | LeafNode[Token`CloseParen, ")" , <| Source -> {1, 1, 2, 1, 1, 3} |>] 66 | }, 67 | <| Source -> {1, 1, 2, 1} |> 68 | ], 69 | CodeNode @@ {Evaluated, Derivative, <||>} 70 | }, 71 | <| Source -> {1, 1, 2} |> 72 | ] 73 | }, 74 | <| Source -> {1, 1} |> 75 | ], 76 | LeafNode[Token`LongName`RuleDelayed, "\[RuleDelayed]",<| Source -> {1, 2} |>], 77 | LeafNode[Symbol, "a", <| Source -> {1, 3} |>] 78 | }, <|Source -> {} |>] 79 | }, <||>] 80 | ] 81 | 82 | Test[RoundTripCst[cst], cst] 83 | 84 | agg = CodeParser`Abstract`Aggregate[cst]; 85 | 86 | ast = CodeParser`Abstract`Abstract[agg]; 87 | 88 | Test[ 89 | ScopingData[ast] 90 | , 91 | {scopingDataObject[{1, 1, 2, 1, 1, 2, 1, 1}, {"RuleDelayed"}, {"unused"}, "dx"]} 92 | , 93 | TestID->"Scoping-20220211-E8N5O8" 94 | ] 95 | 96 | 97 | 98 | 99 | 100 | 101 | ast = CodeParse["foo[] := \\!\\(\\*s\\)"] 102 | 103 | Test[ 104 | ScopingData[ast] 105 | , 106 | {scopingDataObject[{{1, 1}, {1, 4}}, {"Defined"}, {"definition"}, "foo"]} 107 | , 108 | TestID->"Scoping-20220316-D3G1W4" 109 | ] 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | ast = CodeParse[" 123 | 124 | foo[x_]:=x+1 125 | 126 | Module[{a}, a+1] 127 | 128 | Module[{y}, 129 | y + 2 130 | ] 131 | 132 | Module[{b}, b+1] 133 | 134 | bar[z_]:=z+3 135 | 136 | "] 137 | 138 | 139 | Test[ 140 | ScopingData[ast] 141 | , 142 | {scopingDataObject[{{3, 1}, {3, 4}}, {"Defined"}, {"definition"}, "foo"], 143 | scopingDataObject[{{3, 10}, {3, 11}}, {"SetDelayed"}, {}, "x"], 144 | scopingDataObject[{{3, 5}, {3, 6}}, {"SetDelayed"}, {}, "x"], 145 | scopingDataObject[{{5, 13}, {5, 14}}, {"Module"}, {}, "a"], 146 | scopingDataObject[{{5, 9}, {5, 10}}, {"Module"}, {}, "a"], 147 | scopingDataObject[{{8, 3}, {8, 4}}, {"Module"}, {}, "y"], 148 | scopingDataObject[{{7, 9}, {7, 10}}, {"Module"}, {}, "y"], 149 | scopingDataObject[{{11, 13}, {11, 14}}, {"Module"}, {}, "b"], 150 | scopingDataObject[{{11, 9}, {11, 10}}, {"Module"}, {}, "b"], 151 | scopingDataObject[{{13, 1}, {13, 4}}, {"Defined"}, {"definition"}, "bar"], 152 | scopingDataObject[{{13, 10}, {13, 11}}, {"SetDelayed"}, {}, "z"], 153 | scopingDataObject[{{13, 5}, {13, 6}}, {"SetDelayed"}, {}, "z"]} 154 | , 155 | TestID->"Scoping-20220830-W8Q8Y1" 156 | ] 157 | 158 | Test[ 159 | ScopingData[ast, SourceMemberQ[#[[3, Key[Source]]], {8, 3}]&] 160 | , 161 | {scopingDataObject[{{8, 3}, {8, 4}}, {"Module"}, {}, "y"], 162 | scopingDataObject[{{7, 9}, {7, 10}}, {"Module"}, {}, "y"]} 163 | , 164 | TestID->"Scoping-20220830-X8E0N5" 165 | ] 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/parse/parser_docs.rs: -------------------------------------------------------------------------------- 1 | //! # Parser Design 2 | //! 3 | //! Each parse of a Wolfram input is managed by a [`ParserSession`] instance. 4 | //! 5 | //! Parsing logic is structured into individual "modules" calls *parselets*. 6 | //! 7 | //! There are two kinds of parselet: 8 | //! 9 | //! * [`PrefixParselet`] — invoked when there is no previous expression in the 10 | //! current context. 11 | //! * [`InfixParselet`] — invoked when there is a previous expression in the 12 | //! current context. 13 | //! 14 | //! Every token is associated with one [`PrefixParselet`] instance 15 | //! ([`ParseBuilder::with_prefix_parselet()`]) and one 16 | //! [`InfixParselet`] instance ([`ParseBuilder::with_infix_parselet()`]), which are 17 | //! invoked, respectively, when that token is encountered in "prefix" or "infix" 18 | //! position. 19 | //! 20 | //! Parselet implementations will typically view the current or next token, 21 | //! do a bit of logic checking for possible ways forward, and then continue the 22 | //! parsing process by doing one of the following: 23 | //! 24 | //! * For simple parselets, like [`LeafParselet`], construct a parsed node from 25 | //! a single [*operand token*][term] and return it. 26 | //! 27 | //! * Call [`ParserSession::parse_prefix()`] on subsequent token(s) in the input 28 | //! to parse parselet-defined argument subexpression(s), followed by 29 | //! calling a [`reduce_*()` method][self#reduce-methods] to produce a new 30 | //! parsed node. 31 | //! 32 | //! * Call [`ParserSession::parse_infix()`] on a subsequent token 33 | //! in the input, passing in the immediately previously completed parsed 34 | //! sub-expression. 35 | //! 36 | //! In the majority of cases, parselet implementations should finish by calling 37 | //! [`parse_climb()`][ParserSession::parse_climb] and passing in the completed 38 | //! parsed node value. 39 | //! 40 | //! [term]: crate::parse#general-terminology 41 | //! 42 | //! # Parse Contexts 43 | //! 44 | //! The term "context" is used to refer to the state kept by the parser to 45 | //! guide the parsing of a subexpression within the input. 46 | //! 47 | //! Context state is stored as [`Context`] value created by calls to 48 | //! [`ParserSession::push_context()`]. A new parser context is typically created 49 | //! when the parser begins processing a higher-precedence subexpression. 50 | //! 51 | //! The text diagram below roughly indicates the region of source code covered 52 | //! by several contexts: 53 | //! 54 | //! ```text 55 | //! a + b * foo[x / y] | 56 | //! ^^!^^ | BinaryOperatorParselet, Precedence::SLASH, reduce_binary() 57 | //! ^^^!^^^^^^ | CallParselet, Precedence::HIGHEST, reduce_call() 58 | //! ^^!^^^^^^^^^^^ | InfixOperatorParselet, Prececence::STAR, reduce_infix() 59 | //! ^^!^^^^^^^^^^^^^^^ | InfixOperatorParselet, Precedence::PLUS, reduce_infix() 60 | //! ``` 61 | //! 62 | //! From this diagram, a few corrolary statements about contexts follow: 63 | //! 64 | //! * Roughly speaking, one context exists for each logical subexpression in the 65 | //! input. 66 | //! 67 | //! * A parser context must always contain at least one node (its initial node). 68 | //! 69 | //! *Note:* [`ParserSession::push_context()`] must only be called by a 70 | //! parselet implementation after a node has been pushed. 71 | //! 72 | //! * A parser context has an associated precedence value, typically the 73 | //! precedence of the operator that caused a new parsing context to begin. 74 | //! 75 | //! * At any given time during parsing, the current parsing contexts form a 76 | //! stack, with the latest (further along in the input) context at the top. 77 | //! 78 | //! Typically, though not always, parser contexts are created automatically 79 | //! when [`ParserSession::parse_climb()`] detects that a subsequent token 80 | //! in the input has a higher precedence than the current top context, and 81 | //! begin a new context using [`push_context()`][ParserSession::push_context] to 82 | //! contain the parsing of the higher-precedence subexpression. 83 | //! 84 | //! Parser contexts provide a bit of ambient information to guide the parser, 85 | //! but they are not responsible for creating, storing or manipulating parsed 86 | //! expressions. 87 | //! 88 | //! 89 | //! 90 | 91 | 92 | 93 | // Import items referenced in the module doc comment 94 | #[allow(unused_imports)] 95 | use crate::{ 96 | cst::Cst, 97 | parse::{ 98 | parselet::{InfixParselet, LeafParselet, PrefixParselet}, 99 | Context, ParseBuilder, ParserSession, 100 | }, 101 | read::Reader, 102 | tokenize::TokenKind, 103 | }; 104 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/tests/test_api.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | issue::{CodeAction, Issue, IssueTag, Severity}, 3 | macros::src, 4 | parse_cst, 5 | source::{Source, Span}, 6 | symbols as sym, ParseOptions, SourceConvention, StringifyMode, 7 | }; 8 | 9 | use pretty_assertions::assert_eq; 10 | 11 | 12 | // 13 | // this used to assert 14 | // 15 | #[test] 16 | fn APITest_Bug1() { 17 | let result = crate::parse_to_token( 18 | b"abc[]", 19 | &ParseOptions::default(), 20 | StringifyMode::Normal, 21 | ); 22 | 23 | assert_eq!(result.non_fatal_issues, Vec::new()); 24 | assert_eq!(result.fatal_issues, Vec::new()); 25 | } 26 | 27 | // 28 | // this used to hang 29 | // 30 | #[test] 31 | fn APITest_Hang1() { 32 | let strIn = "<(2), DEFAULT_TAB_WIDTH, FirstLineBehavior::NotScript, EncodingMode::Normal); 152 | //assert_eq!(res, PARSERSESSIONINIT_ERROR); 153 | } 154 | 155 | // 156 | // this used to crash 157 | // 158 | // CODETOOLS-62 159 | // 160 | #[test] 161 | fn APITest_Crash8() { 162 | let bufAndLen = "(*\r\n*)"; 163 | 164 | let result = parse_cst( 165 | bufAndLen, 166 | &ParseOptions::default() 167 | .source_convention(SourceConvention::CharacterIndex), 168 | ); 169 | 170 | assert_eq!(result.non_fatal_issues, Vec::new()); 171 | assert_eq!(result.fatal_issues, Vec::new()); 172 | } 173 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/bin/main.rs: -------------------------------------------------------------------------------- 1 | use std::io::{self, Write}; 2 | 3 | use wolfram_parser::{ 4 | fmt_as_expr::FmtAsExpr, ParseOptions, QuirkSettings, StringifyMode, 5 | }; 6 | 7 | 8 | // #if DIAGNOSTICS 9 | // #include "Diagnostics.h" 10 | // #endif // DIAGNOSTICS 11 | 12 | #[derive(Copy, Clone)] 13 | enum ApiMode { 14 | CstExpr, 15 | Cst, 16 | Ast, 17 | Tokenize, 18 | Leaf, 19 | SafeString, 20 | } 21 | 22 | #[derive(Copy, Clone)] 23 | enum OutputMode { 24 | None, 25 | Print, 26 | #[allow(dead_code)] 27 | PrintDryrun, 28 | SyntaxQ, 29 | } 30 | 31 | fn main() { 32 | let mut file_input = None; 33 | let mut api_mode = ApiMode::CstExpr; 34 | let mut output_mode = OutputMode::Print; 35 | let mut quirks = QuirkSettings::default(); 36 | 37 | let args: Vec = std::env::args().skip(1).collect(); 38 | 39 | let mut i = 0; 40 | loop { 41 | if i >= args.len() { 42 | break; 43 | } 44 | 45 | let arg = &args[i]; 46 | 47 | match &**arg { 48 | "-file" => { 49 | i += 1; 50 | file_input = Some(args[i].clone()); 51 | }, 52 | "-tokenize" => api_mode = ApiMode::Tokenize, 53 | "-leaf" => api_mode = ApiMode::Leaf, 54 | "-safestring" => api_mode = ApiMode::SafeString, 55 | "--cst" => api_mode = ApiMode::Cst, 56 | "--ast" => api_mode = ApiMode::Ast, 57 | "-n" => output_mode = OutputMode::None, 58 | "-check" | "-syntaxq" | "-syntaxQ" => { 59 | output_mode = OutputMode::SyntaxQ; 60 | }, 61 | "--flatten-times" => { 62 | quirks.flatten_times = true; 63 | }, 64 | _ => panic!("unrecognized argument: {arg}"), 65 | } 66 | 67 | i += 1; 68 | } 69 | 70 | let result = match file_input { 71 | Some(file_input) => { 72 | read_file(&file_input, api_mode, output_mode, quirks) 73 | }, 74 | None => read_std_in(api_mode, output_mode, quirks), 75 | }; 76 | 77 | return result; 78 | } 79 | 80 | fn read_std_in(mode: ApiMode, output_mode: OutputMode, quirks: QuirkSettings) { 81 | loop { 82 | let mut input = String::new(); 83 | 84 | print!(">>> "); 85 | io::stdout().flush().unwrap(); 86 | 87 | io::stdin().read_line(&mut input).unwrap(); 88 | 89 | handle(input.trim_end().as_bytes(), mode, output_mode, quirks) 90 | } 91 | 92 | // #if DIAGNOSTICS 93 | // DiagnosticsPrint(); 94 | // #endif // DIAGNOSTICS 95 | } 96 | 97 | fn read_file( 98 | file: &str, 99 | mode: ApiMode, 100 | output_mode: OutputMode, 101 | quirks: QuirkSettings, 102 | ) { 103 | let fb: Vec = std::fs::read(file).expect("error reading file"); 104 | 105 | handle(fb.as_slice(), mode, output_mode, quirks) 106 | 107 | // #if DIAGNOSTICS 108 | // DiagnosticsPrint(); 109 | // #endif // DIAGNOSTICS 110 | } 111 | 112 | fn handle( 113 | input: &[u8], 114 | mode: ApiMode, 115 | output_mode: OutputMode, 116 | quirks: QuirkSettings, 117 | ) { 118 | let mut opts = ParseOptions::default(); 119 | opts.quirk_settings = quirks; 120 | 121 | match mode { 122 | ApiMode::Tokenize => { 123 | let result = wolfram_parser::tokenize_bytes(input, &opts).unwrap(); 124 | output(output_mode, FmtAsExpr(&result)); 125 | }, 126 | ApiMode::Leaf => { 127 | let result = wolfram_parser::parse_to_token( 128 | input, 129 | &opts, 130 | StringifyMode::Normal, 131 | ); 132 | output(output_mode, FmtAsExpr(&result.syntax)); 133 | }, 134 | ApiMode::SafeString => { 135 | let result = wolfram_parser::safe_string(input, &opts).unwrap(); 136 | output(output_mode, result); 137 | }, 138 | ApiMode::CstExpr => { 139 | let result = wolfram_parser::parse_bytes_cst_seq(input, &opts); 140 | output(output_mode, FmtAsExpr(&result.syntax)); 141 | }, 142 | ApiMode::Cst => { 143 | let result = wolfram_parser::parse_bytes_cst_seq(input, &opts); 144 | output(output_mode, format!("{:#?}", result.syntax)); 145 | }, 146 | ApiMode::Ast => { 147 | let result = wolfram_parser::parse_bytes_ast_seq(input, &opts); 148 | output(output_mode, format!("{:#?}", result.syntax)); 149 | }, 150 | } 151 | } 152 | 153 | fn output(mode: OutputMode, value: T) { 154 | match mode { 155 | OutputMode::Print => { 156 | println!("{value}"); 157 | }, 158 | OutputMode::PrintDryrun => { 159 | let mut buffer = Vec::new(); 160 | 161 | write!(buffer, "{value}\n").unwrap(); 162 | }, 163 | OutputMode::None | OutputMode::SyntaxQ => {}, 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/parse/parselet/integral_parselet.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | panic_if_aborted, 3 | parse::{parselet::*, ParserSession}, 4 | precedence::Precedence, 5 | tokenize::{Token, TokenKind, TokenRef}, 6 | }; 7 | 8 | impl IntegralParselet { 9 | pub(crate) const fn new( 10 | Op1: PrefixBinaryOperator, 11 | Op2: PrefixOperator, 12 | ) -> Self { 13 | IntegralParselet { Op1, Op2 } 14 | } 15 | } 16 | 17 | impl<'i, B: ParseBuilder<'i> + 'i> PrefixParselet<'i, B> for IntegralParselet { 18 | fn parse_prefix( 19 | &self, 20 | session: &mut ParserSession<'i, B>, 21 | tok_in: TokenRef<'i>, 22 | ) -> B::Node { 23 | // 24 | // Something like "\[Integral] f \[DifferentialD] x" (TID:231113/1) 25 | // 26 | 27 | panic_if_aborted!(); 28 | 29 | let tok_in = session.push_syntax_and_next(tok_in); 30 | 31 | let _ = session.push_context(Precedence::CLASS_INTEGRATIONOPERATORS); 32 | 33 | let (trivia1, Tok) = session.current_token_eat_trivia(); 34 | 35 | if Tok.tok == TokenKind::LongName_DifferentialD 36 | || Tok.tok == TokenKind::LongName_CapitalDifferentialD 37 | { 38 | // 39 | // TID:231113/2: "\[Integral] \[DifferentialD] x" 40 | // 41 | 42 | let node = session 43 | .push_leaf(Token::at_start(TokenKind::Fake_ImplicitOne, Tok)); 44 | 45 | return IntegralParselet::parse1( 46 | self, session, tok_in, trivia1, node, 47 | ); 48 | } 49 | 50 | let lhs_expr = session.parse_prefix(Tok); 51 | 52 | // MUSTTAIL 53 | return IntegralParselet::parse1( 54 | self, session, tok_in, trivia1, lhs_expr, 55 | ); 56 | } 57 | } 58 | 59 | impl IntegralParselet { 60 | fn parse1<'i, B: ParseBuilder<'i> + 'i>( 61 | &self, 62 | session: &mut ParserSession<'i, B>, 63 | prefix_op_token: B::SyntaxTokenNode, 64 | trivia1: B::TriviaHandle, 65 | first_operand: B::Node, 66 | ) -> B::Node { 67 | panic_if_aborted!(); 68 | 69 | 70 | let (trivia2, tok) = session.current_token(); 71 | 72 | if !(tok.tok == TokenKind::LongName_DifferentialD 73 | || tok.tok == TokenKind::LongName_CapitalDifferentialD) 74 | { 75 | session.trivia_reset(trivia2); 76 | 77 | // 78 | // TID:231113/3: "\[Integral] f" 79 | // 80 | 81 | let node = session.reduce_prefix( 82 | self.Op2, 83 | prefix_op_token, 84 | trivia1, 85 | first_operand, 86 | ); 87 | 88 | // MUSTTAIL 89 | return session.parse_climb(node); 90 | } 91 | 92 | let trivia2 = session.builder.push_trivia_seq(trivia2); 93 | 94 | // TODO(cleanup): 95 | // `tok` here is a known prefix operator. 96 | // Statically check somehow that `second_operand` is a prefix 97 | // parselet, because we know it is LongName_{Capital}DifferentialD 98 | 99 | // MUSTTAIL 100 | let second_operand = session.parse_prefix(tok); 101 | 102 | 103 | // \[Integral] f \[DifferentialD] x 104 | 105 | let node = session.reduce_prefix_binary( 106 | self.Op1, 107 | prefix_op_token, 108 | trivia1, 109 | first_operand, 110 | trivia2, 111 | second_operand, 112 | ); 113 | 114 | return session.parse_climb(node); 115 | } 116 | } 117 | 118 | impl<'i, B: ParseBuilder<'i> + 'i> InfixParselet<'i, B> 119 | for InfixDifferentialDParselet 120 | { 121 | fn parse_infix( 122 | &self, 123 | _session: &mut ParserSession<'i, B>, 124 | _node: B::Node, 125 | _trivia1: B::TriviaHandle, 126 | _token: TokenRef, 127 | ) -> B::Node { 128 | panic!("illegal call to InfixDifferentialDParselet::parse_infix()") 129 | } 130 | 131 | fn getPrecedence( 132 | &self, 133 | session: &ParserSession<'i, B>, 134 | ) -> Option { 135 | if session.top_precedence() == Precedence::CLASS_INTEGRATIONOPERATORS { 136 | // 137 | // Inside \[Integral], so \[DifferentialD] is treated specially 138 | // 139 | 140 | return None; 141 | } 142 | 143 | return Some(Precedence::FAKE_IMPLICITTIMES); 144 | } 145 | 146 | fn process_implicit_times( 147 | &self, 148 | session: &mut ParserSession<'i, B>, 149 | tok_in: TokenRef<'i>, 150 | ) -> TokenRef<'i> { 151 | if session.top_precedence() == Precedence::CLASS_INTEGRATIONOPERATORS { 152 | // 153 | // Inside \[Integral], so \[DifferentialD] is treated specially 154 | // 155 | 156 | return tok_in; 157 | } 158 | 159 | return Token::at_start(TokenKind::Fake_ImplicitTimes, tok_in); 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/long_names.rs: -------------------------------------------------------------------------------- 1 | //! Collection of utility functions for codepoints and long names 2 | 3 | use crate::{ 4 | generated::long_names_registration::{ 5 | ASCII_REPLACEMENTS_MAP, CODEPOINT_TO_LONGNAME_MAP, 6 | LONGNAME_TO_CODEPOINT_MAP, MB_NEWLINE_CODE_POINTS, 7 | MB_NOT_STRAGE_LETTERLIKE_CODE_POINTS, MB_PUNCTUATION_CODE_POINTS, 8 | MB_UNINTERPRETABLE_CODE_POINTS, MB_WHITESPACE_CODE_POINTS, RAW_SET, 9 | }, 10 | read::code_point::CodePoint, 11 | utils, 12 | }; 13 | 14 | pub(crate) fn codepoint_has_longname(point: char) -> bool { 15 | codepoint_to_longname(CodePoint::Char(point)).is_some() 16 | } 17 | 18 | pub(crate) fn codepoint_to_longname(point: CodePoint) -> Option<&'static str> { 19 | // NOTE: This assertion currently spuriously fails because the 20 | // StringMeta_DoubleQuote and StringMeta_Backslash codepoints are fake 21 | // codepoints with negative values. 22 | /* 23 | debug_assert!(utils::is_sorted_by( 24 | &CODEPOINT_TO_LONGNAME_MAP, 25 | |(point, _): &(CodePoint, &str)| *point 26 | )); 27 | */ 28 | 29 | let index: usize = CODEPOINT_TO_LONGNAME_MAP 30 | .binary_search_by(|(cp, _)| cp.cmp(&point)) 31 | .ok()?; 32 | 33 | let (_, longname) = CODEPOINT_TO_LONGNAME_MAP[index]; 34 | 35 | Some(longname) 36 | } 37 | 38 | pub(crate) fn longname_to_codepoint(longname: &str) -> Option { 39 | debug_assert!(utils::is_sorted_by( 40 | &LONGNAME_TO_CODEPOINT_MAP, 41 | |(str, _): &(&str, CodePoint)| *str 42 | )); 43 | 44 | let index: usize = LONGNAME_TO_CODEPOINT_MAP 45 | .binary_search_by(|&(str, _)| str.cmp(longname)) 46 | .ok()?; 47 | 48 | let (_, point) = LONGNAME_TO_CODEPOINT_MAP[index]; 49 | 50 | Some(point) 51 | } 52 | 53 | /// Is this \[Raw] something? 54 | pub fn isRaw(long_name_str: &str) -> bool { 55 | debug_assert!(utils::is_sorted(&RAW_SET)); 56 | return RAW_SET.binary_search(&long_name_str).is_ok(); 57 | } 58 | 59 | pub fn isMBNotStrangeLetterlike(point: CodePoint) -> bool { 60 | // TODO(cleanup): Change param type? 61 | let Some(char) = point.as_char() else { 62 | return false; 63 | }; 64 | 65 | debug_assert!(utils::is_sorted(&MB_NOT_STRAGE_LETTERLIKE_CODE_POINTS)); 66 | return MB_NOT_STRAGE_LETTERLIKE_CODE_POINTS 67 | .binary_search(&char) 68 | .is_ok(); 69 | } 70 | 71 | pub fn asciiReplacements(point: CodePoint) -> Vec { 72 | // TODO(cleanup): Change param type? 73 | let Some(char) = point.as_char() else { 74 | return Vec::new(); 75 | }; 76 | 77 | debug_assert!(utils::is_sorted(ASCII_REPLACEMENTS_MAP)); 78 | 79 | let Some(index): Option = ASCII_REPLACEMENTS_MAP 80 | .binary_search_by(|(cp, _)| cp.cmp(&char)) 81 | .ok() 82 | else { 83 | return Vec::new(); 84 | }; 85 | 86 | let (_, replacements) = ASCII_REPLACEMENTS_MAP[index]; 87 | 88 | replacements 89 | .into_iter() 90 | .map(|&s: &&str| s.to_owned()) 91 | .collect() 92 | } 93 | 94 | pub fn replacementGraphical(replacement: String) -> String { 95 | if replacement == " " { 96 | // 97 | // \[SpaceIndicator] 98 | // 99 | 100 | // this was: 101 | // return "\u2423"; 102 | // 103 | // But MSVC gave: 104 | // warning C4566: character represented by universal-character-name '\u2423' cannot be represented in the current code page (1252) 105 | // 106 | 107 | // 108 | // UTF-8 bytes for U+2423 109 | // 110 | return String::from("\u{2423}"); 111 | } 112 | 113 | if replacement == "\n" { 114 | return String::from("\\n"); 115 | } 116 | 117 | return replacement; 118 | } 119 | 120 | pub fn isMBPunctuation(point: CodePoint) -> bool { 121 | // TODO(cleanup): Change param type? 122 | let Some(char) = point.as_char() else { 123 | return false; 124 | }; 125 | 126 | debug_assert!(utils::is_sorted(&MB_PUNCTUATION_CODE_POINTS)); 127 | return MB_PUNCTUATION_CODE_POINTS.binary_search(&char).is_ok(); 128 | } 129 | 130 | pub fn isMBWhitespace(point: CodePoint) -> bool { 131 | // TODO(cleanup): Change param type? 132 | let Some(char) = point.as_char() else { 133 | return false; 134 | }; 135 | 136 | debug_assert!(utils::is_sorted(&MB_WHITESPACE_CODE_POINTS)); 137 | return MB_WHITESPACE_CODE_POINTS.binary_search(&char).is_ok(); 138 | } 139 | 140 | pub fn isMBNewline(point: CodePoint) -> bool { 141 | debug_assert!(utils::is_sorted(&MB_NEWLINE_CODE_POINTS)); 142 | return MB_NEWLINE_CODE_POINTS.binary_search(&point).is_ok(); 143 | } 144 | 145 | pub fn isMBUninterpretable(point: CodePoint) -> bool { 146 | // TODO(cleanup): Change param type? 147 | let Some(char) = point.as_char() else { 148 | return false; 149 | }; 150 | 151 | debug_assert!(utils::is_sorted(&MB_UNINTERPRETABLE_CODE_POINTS)); 152 | return MB_UNINTERPRETABLE_CODE_POINTS.binary_search(&char).is_ok(); 153 | } 154 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CodeParser 2 | 3 | CodeParser is a package for parsing Wolfram Language source code as abstract syntax trees (ASTs) or concrete syntax trees (CSTs). 4 | CodeParser is useful for inspecting code, formatting code, and instrumenting code (for e.g., coverage reporting or profiling), and much more! 5 | 6 | CodeParser has many key features: 7 | * Understands practically entire Wolfram Language syntax. 8 | * Fast native library implementation. 9 | * Tested with combination of suite of hand-written tests and fuzz testing. 10 | * Gracious error handling and recovery 11 | 12 | 13 | ``` 14 | Needs["CodeParser`"] 15 | 16 | CodeParse["1+1"] 17 | ``` 18 | ``` 19 | Out[2]= ContainerNode[String, {CallNode[LeafNode[Symbol, "Plus", <||>], {LeafNode[Integer, "1", <|Source -> {{1, 1}, {1, 2}}|>], LeafNode[Integer, "1", <|Source -> {{1, 3}, {1, 4}}|>]}, <|Source -> {{1, 1}, {1, 4}}|>]}, <||>] 20 | ``` 21 | 22 | ["CodeParser and CodeInspector" on community.wolfram.com](https://community.wolfram.com/groups/-/m/t/1931315) 23 | 24 | [Parsing the Wolfram Language from WTC 2019: Watch Video (youtube)](https://www.youtube.com/watch?v=rOa5IntICFA) 25 | 26 | [Parsing the Wolfram Language from WTC 2019: Watch Video (wolfram.com)](https://www.wolfram.com/broadcast/video.php?v=2908) 27 | 28 | [Parsing the Wolfram Language from WTC 2019: Download Presentation](https://files.wolframcdn.com/pub/www.wolfram.com/technology-conference/2019/Thursday/2019BrentonBostickParsingTheWL.nb) 29 | 30 | 31 | ## Setup 32 | 33 | CodeParser is included in Mathematica 12.2 and above. 34 | 35 | For older versions, install CodeParser paclet from the public paclet server: 36 | ``` 37 | PacletInstall["CodeParser"] 38 | ``` 39 | 40 | [Build and install the CodeParser paclet locally](HowToBuild.md) 41 | 42 | 43 | ## Using CodeParser 44 | 45 | After CodeParser is installed, it can be used. 46 | 47 | ``` 48 | Needs["CodeParser`"] 49 | 50 | CodeParse["1+1"] 51 | ``` 52 | ``` 53 | Out[2]= ContainerNode[String, {CallNode[LeafNode[Symbol, "Plus", <||>], {LeafNode[Integer, "1", <|Source -> {{1, 1}, {1, 2}}|>], LeafNode[Integer, "1", <|Source -> {{1, 3}, {1, 4}}|>]}, <|Source -> {{1, 1}, {1, 4}}|>]}, <||>] 54 | ``` 55 | 56 | The input to `CodeParse` may be a string, a `File`, or a list of bytes. 57 | 58 | 59 | ### Command-line tool (Optional) 60 | 61 | An optional `codeparser` command-line tool is also built and can be used. 62 | 63 | ``` 64 | cmake -DBUILD_EXE=ON .. 65 | cmake --build . --target codeparser-exe 66 | 67 | $cpp/src/exe/codeparser 68 | >>> 1+1 69 | InfixNode[Plus, {LeafNode[Integer, "1", <|Source->{{1, 2}, {1, 2}}|>], LeafNode[Integer, 1, <|Source->{{1, 3}, {1, 4}}|>]}, <|Source->{{1, 1}, {1, 4}}|>] 70 | 71 | >>> 72 | ``` 73 | 74 | 75 | ## Troubleshooting 76 | 77 | Make sure that the CodeParser can be found on your system: 78 | ``` 79 | Needs["CodeParser`"] 80 | ``` 81 | 82 | and try a basic example: 83 | ``` 84 | CodeParse["1+1"] 85 | ``` 86 | 87 | You may get `LibraryFunction` messages: 88 | ``` 89 | In[1]:= Needs["CodeParser`"] 90 | 91 | In[2]:= CodeParse["1+1"] 92 | 93 | During evaluation of In[2]:= LibraryFunction::version: The version number 7 of the library is not consistent with the current or any previous WolframLibraryVersion. 94 | 95 | During evaluation of In[2]:= LibraryFunction::initerr: A nonzero error code 7 was returned during the initialization of the library /Users/user/Library/Mathematica/Paclets/Repository/CodeParser-1.6/LibraryResources/MacOSX-x86-64/CodeParser.dylib. 96 | 97 | During evaluation of In[2]:= LibraryFunction::libload: The function ConcreteParseBytes_Listable_LibraryLink was not loaded from the file /Users/user/Library/Mathematica/Paclets/Repository/CodeParser-1.6/LibraryResources/MacOSX-x86-64/CodeParser.dylib. 98 | 99 | Out[2]= $Failed 100 | ``` 101 | 102 | This means that CodeParser was built with a newer version of Wolfram System than your system supports. 103 | 104 | To fix this, build CodeParser from source with the version of Wolfram System that you will use. 105 | 106 | ## Benchmarks 107 | 108 | > Some of the benchmarks test large data files. Those files are tracked in this 109 | > repository to ensure that benchmarks are always run against identical input. 110 | > [Git LFS](https://git-lfs.github.com/) is used to ensure that a basic checkout 111 | > of this repository remains small, which is important in CI/CD builds. 112 | 113 | To run the benchmarks, first ensure that the large benchmark files have been 114 | checked out locally using: 115 | 116 | ```shell 117 | $ git lfs pull --exclude="" --include="*" 118 | ``` 119 | 120 | This will override the default settings in [`.lfsconfig`](./.lfsconfig). 121 | 122 | Then, to begin running the benchmarks, execute: 123 | 124 | ```shell 125 | $ cargo bench 126 | ``` 127 | 128 | ## File Overview 129 | 130 | * [Tests/files/large/](./Tests/files/large/) contains files 131 | managed by [`Git LFS`](https://git-lfs.github.com/). The files in this 132 | directory are used by the benchmarks. These files should never be modified, to 133 | ensure that benchmark comparisions between different revisions of this 134 | repository can be meaningfully compared. -------------------------------------------------------------------------------- /Tests/AbstractSyntaxErrorNodes.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start AbstractSyntaxErrorNodes.mt =====\n"] 2 | 3 | Needs["CodeParser`"] 4 | 5 | 6 | (* 7 | OpenSquare: 8 | *) 9 | 10 | TestMatch[ 11 | CodeParse[" [x] "] 12 | , 13 | ContainerNode[String, {AbstractSyntaxErrorNode[AbstractSyntaxError`OpenSquare, _, _]}, _] 14 | , 15 | TestID->"AbstractSyntaxErrorNodes-20190520-U4J1C1" 16 | ] 17 | 18 | 19 | TestMatch[ 20 | CodeParse[" ::[x] "] 21 | , 22 | ContainerNode[String, { 23 | AbstractSyntaxErrorNode[AbstractSyntaxError`ColonColonOpenSquare, { 24 | LeafNode[Symbol, "x", <|Source -> {{1, 5}, {1, 6}}|>]}, <|Source -> {{1, 2}, {1, 7}}|>]}, <|Source -> {{1, 1}, {1, 8}}|>] 25 | , 26 | TestID->"AbstractSyntaxErrorNodes-20220917-G3L5M1" 27 | ] 28 | 29 | TestMatch[ 30 | CodeParse[" \\[LeftDoubleBracket]x\\[RightDoubleBracket] "] 31 | , 32 | ContainerNode[String, { 33 | AbstractSyntaxErrorNode[AbstractSyntaxError`LeftDoubleBracket, { 34 | LeafNode[Symbol, "x", <|Source -> {{1, 22}, {1, 23}}|>]}, <|Source -> {{1, 2}, {1, 44}}|>]}, <|Source -> {{1, 1}, {1, 45}}|>] 35 | , 36 | TestID->"AbstractSyntaxErrorNodes-20220917-C4T9X0" 37 | ] 38 | 39 | (* 40 | OpenParen: 41 | *) 42 | 43 | TestMatch[ 44 | CodeParse[" (1,2,3) "] 45 | , 46 | ContainerNode[String, {AbstractSyntaxErrorNode[AbstractSyntaxError`OpenParen, _, _]}, _] 47 | , 48 | TestID->"AbstractSyntaxErrorNodes-20190520-E0X9G7" 49 | ] 50 | 51 | 52 | 53 | (* 54 | GroupMissingCloser: 55 | *) 56 | 57 | TestMatch[ 58 | CodeParse["{"] 59 | , 60 | ContainerNode[String, {GroupMissingCloserNode[List, _, _]}, _] 61 | , 62 | TestID->"AbstractSyntaxErrorNodes-20190520-M0B3Z5" 63 | ] 64 | 65 | 66 | TestMatch[ 67 | CodeParse["<|"] 68 | , 69 | ContainerNode[String, {GroupMissingCloserNode[Association, _, _]}, _] 70 | , 71 | TestID->"AbstractSyntaxErrorNodes-20190520-U0L5P6" 72 | ] 73 | 74 | TestMatch[ 75 | CodeParse["\[LeftAngleBracket]"] 76 | , 77 | ContainerNode[String, {GroupMissingCloserNode[AngleBracket, _, _]}, _] 78 | , 79 | TestID->"AbstractSyntaxErrorNodes-20190520-X7G1G5" 80 | ] 81 | 82 | 83 | TestMatch[ 84 | CodeParse["\[LeftCeiling]"] 85 | , 86 | ContainerNode[String, {GroupMissingCloserNode[Ceiling, _, _]}, _] 87 | , 88 | TestID->"AbstractSyntaxErrorNodes-20190520-Q4A4B9" 89 | ] 90 | 91 | 92 | TestMatch[ 93 | CodeParse["\[LeftFloor]"] 94 | , 95 | ContainerNode[String, {GroupMissingCloserNode[Floor, _, _]}, _] 96 | , 97 | TestID->"AbstractSyntaxErrorNodes-20190520-C4T4D9" 98 | ] 99 | 100 | TestMatch[ 101 | CodeParse["\[LeftDoubleBracket]"] 102 | , 103 | ContainerNode[String, {GroupMissingCloserNode[GroupDoubleBracket, _, _]}, _] 104 | , 105 | TestID->"AbstractSyntaxErrorNodes-20190520-S1C3U4" 106 | ] 107 | 108 | TestMatch[ 109 | CodeParse["\[LeftBracketingBar]"] 110 | , 111 | ContainerNode[String, {GroupMissingCloserNode[BracketingBar, _, _]}, _] 112 | , 113 | TestID->"AbstractSyntaxErrorNodes-20190520-H0B3W9" 114 | ] 115 | 116 | TestMatch[ 117 | CodeParse["\[LeftDoubleBracketingBar]"] 118 | , 119 | ContainerNode[String, {GroupMissingCloserNode[DoubleBracketingBar, _, _]}, _] 120 | , 121 | TestID->"AbstractSyntaxErrorNodes-20190520-R4A5I7" 122 | ] 123 | 124 | TestMatch[ 125 | CodeParse["("] 126 | , 127 | ContainerNode[String, {GroupMissingCloserNode[GroupParen, _, _]}, _] 128 | , 129 | TestID->"AbstractSyntaxErrorNodes-20190520-K6C7J1" 130 | ] 131 | 132 | TestMatch[ 133 | CodeParse["["] 134 | , 135 | ContainerNode[String, {GroupMissingCloserNode[GroupSquare, _, _]}, _] 136 | , 137 | TestID->"AbstractSyntaxErrorNodes-20190520-Y0H1P1" 138 | ] 139 | 140 | 141 | TestMatch[ 142 | CodeParse["\\("] 143 | , 144 | ContainerNode[String, {ErrorNode[Token`Error`UnterminatedLinearSyntaxBlob, _, _]}, _] 145 | , 146 | TestID->"AbstractSyntaxErrorNodes-20190520-B2V0A0" 147 | ] 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | (* 157 | LinearSyntaxBang: 158 | *) 159 | 160 | 161 | Test[ 162 | CodeParse["\\!123"] 163 | , 164 | ContainerNode[String, { 165 | AbstractSyntaxErrorNode[AbstractSyntaxError`LinearSyntaxBang, { 166 | LeafNode[Integer, "123", <|Source -> {{1, 3}, {1, 6}}|>]}, <|Source -> {{1, 1}, {1, 6}}|>]}, <|Source -> {{1, 1}, {1, 6}}|>] 167 | , 168 | TestID->"AbstractSyntaxErrorNodes-20190520-N8K8K4" 169 | ] 170 | 171 | 172 | (* 173 | NonAssociative: 174 | 175 | TODO: is this a quirk? 176 | 177 | *) 178 | 179 | Test[ 180 | CodeParse["a ? b ? c"] 181 | , 182 | ContainerNode[String, { 183 | AbstractSyntaxErrorNode[AbstractSyntaxError`NonAssociativePatternTest, { 184 | CallNode[LeafNode[Symbol, "PatternTest", <||>], { 185 | LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>], 186 | LeafNode[Symbol, "b", <|Source -> {{1, 5}, {1, 6}}|>]}, <|Source -> {{1, 1}, {1, 6}}|>], 187 | LeafNode[Symbol, "c", <|Source -> {{1, 9}, {1, 10}}|>]}, <|Source -> {{1, 1}, {1, 10}}|>] }, <|Source -> {{1, 1}, {1, 10}}|>] 188 | , 189 | TestID->"AbstractSyntaxErrorNodes-20190521-A6K4H1" 190 | ] 191 | 192 | 193 | (* 194 | ExpectedSymbol: 195 | *) 196 | 197 | Test[ 198 | CodeParse["1:2"] 199 | , 200 | ContainerNode[String, { 201 | SyntaxErrorNode[SyntaxError`ExpectedSymbol, { 202 | LeafNode[Integer, "1", <|Source -> {{1, 1}, {1, 2}}|>], 203 | LeafNode[Integer, "2", <|Source -> {{1, 3}, {1, 4}}|>]}, <|Source -> {{1, 1}, {1, 4}}|>]}, <|Source -> {{1, 1}, {1, 4}}|>] 204 | , 205 | TestID->"AbstractSyntaxErrorNodes-20190521-Z6D6T1" 206 | ] 207 | 208 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/iter.rs: -------------------------------------------------------------------------------- 1 | //! Iterators over source characters, Wolfram characters, and tokens. 2 | //! 3 | //! ## Source Characters 4 | //! 5 | //! Iterate over [`SourceCharacter`]s using [`source_chars()`]: 6 | //! 7 | //! ``` 8 | //! use wolfram_parser::{iter::source_chars, source::SourceCharacter}; 9 | //! 10 | //! let mut chars = source_chars(r#"2*\[Pi]"#, &Default::default()); 11 | //! 12 | //! assert_eq!(chars.next(), Some(SourceCharacter::Char('2'))); 13 | //! assert_eq!(chars.next(), Some(SourceCharacter::Char('*'))); 14 | //! assert_eq!(chars.next(), Some(SourceCharacter::Char('\\'))); 15 | //! assert_eq!(chars.next(), Some(SourceCharacter::Char('['))); 16 | //! assert_eq!(chars.next(), Some(SourceCharacter::Char('P'))); 17 | //! assert_eq!(chars.next(), Some(SourceCharacter::Char('i'))); 18 | //! assert_eq!(chars.next(), Some(SourceCharacter::Char(']'))); 19 | //! assert_eq!(chars.next(), None); 20 | //! ``` 21 | //! 22 | //! ## Wolfram Characters 23 | //! 24 | //! Iterate over [`WLCharacter`]s using [`wolfram_chars()`]: 25 | //! 26 | //! ``` 27 | //! use wolfram_parser::{iter::wolfram_chars, read::{WLCharacter, Escape}}; 28 | //! 29 | //! let mut chars = wolfram_chars(r#"2*\[Pi]"#, &Default::default()); 30 | //! 31 | //! assert_eq!(chars.next(), Some(WLCharacter::new('2'))); 32 | //! assert_eq!(chars.next(), Some(WLCharacter::new('*'))); 33 | //! assert_eq!(chars.next(), Some(WLCharacter::escaped('π', Escape::LongName))); 34 | //! assert_eq!(chars.next(), None); 35 | //! ``` 36 | //! 37 | //! ## Tokens 38 | //! 39 | //! Iterate over [`Token`]s using [`tokens()`]: 40 | //! 41 | //! ``` 42 | //! use wolfram_parser::{ 43 | //! iter::tokens, 44 | //! tokenize::{Token, TokenKind}, 45 | //! macros::src, 46 | //! }; 47 | //! 48 | //! let mut chars = tokens(r#"2*\[Pi]"#, &Default::default()); 49 | //! 50 | //! assert_eq!(chars.next(), Some(Token::new(TokenKind::Integer, "2", src!(1:1-2)))); 51 | //! assert_eq!(chars.next(), Some(Token::new(TokenKind::Star, "*", src!(1:2-3)))); 52 | //! assert_eq!(chars.next(), Some(Token::new(TokenKind::Symbol, "\\[Pi]", src!(1:3-8)))); 53 | //! assert_eq!(chars.next(), None); 54 | //! ``` 55 | 56 | use crate::{ 57 | read::{code_point::CodePoint, Reader, WLCharacter}, 58 | source::{SourceCharacter, TOPLEVEL}, 59 | tokenize::{Token, TokenKind, TokenStr, Tokenizer}, 60 | ParseOptions, 61 | }; 62 | 63 | //====================================== 64 | // API Functions 65 | //====================================== 66 | 67 | /// Get an iterator over the [`SourceCharacter`]s in a Wolfram Language input. 68 | pub fn source_chars<'i>( 69 | input: &'i str, 70 | opts: &ParseOptions, 71 | ) -> SourceChars<'i> { 72 | SourceChars { 73 | reader: Reader::new(input.as_bytes(), opts), 74 | } 75 | } 76 | 77 | /// Get an iterator over the [`WLCharacter`]s in a Wolfram Language input. 78 | pub fn wolfram_chars<'i>( 79 | input: &'i str, 80 | opts: &ParseOptions, 81 | ) -> WolframChars<'i> { 82 | WolframChars { 83 | reader: Reader::new(input.as_bytes(), opts), 84 | } 85 | } 86 | 87 | /// Get an iterator over the [`Token`]s in a Wolfram Language input. 88 | pub fn tokens<'i>(input: &'i str, opts: &ParseOptions) -> Tokens<'i> { 89 | Tokens { 90 | tokenizer: Tokenizer::new(input.as_bytes(), opts), 91 | } 92 | } 93 | 94 | //====================================== 95 | // Types 96 | //====================================== 97 | 98 | /// Iterator over [`SourceCharacter`]s in a Wolfram Language input. 99 | /// 100 | /// Returned by [`source_chars()`]. 101 | pub struct SourceChars<'i> { 102 | reader: Reader<'i>, 103 | } 104 | 105 | 106 | /// Iterator over [`WLCharacter`]s in a Wolfram Language input. 107 | /// 108 | /// Returned by [`wolfram_chars()`]. 109 | pub struct WolframChars<'i> { 110 | reader: Reader<'i>, 111 | } 112 | 113 | /// Iterator over [`Token`]s in a Wolfram Language input. 114 | /// 115 | /// Returned by [`tokens()`]. 116 | pub struct Tokens<'i> { 117 | tokenizer: Tokenizer<'i>, 118 | } 119 | 120 | //======================================= 121 | // Iterator Impls 122 | //======================================= 123 | 124 | impl<'i> Iterator for SourceChars<'i> { 125 | type Item = SourceCharacter; 126 | 127 | fn next(&mut self) -> Option { 128 | let SourceChars { reader } = self; 129 | 130 | let char = reader.next_source_char(TOPLEVEL); 131 | 132 | if char != CodePoint::EndOfFile { 133 | Some(char) 134 | } else { 135 | None 136 | } 137 | } 138 | } 139 | 140 | impl<'i> Iterator for WolframChars<'i> { 141 | type Item = WLCharacter; 142 | 143 | fn next(&mut self) -> Option { 144 | let WolframChars { reader } = self; 145 | 146 | let char = reader.next_wolfram_char(TOPLEVEL); 147 | 148 | if char.point != CodePoint::EndOfFile { 149 | Some(char) 150 | } else { 151 | None 152 | } 153 | } 154 | } 155 | 156 | impl<'i> Iterator for Tokens<'i> { 157 | type Item = Token>; 158 | 159 | fn next(&mut self) -> Option { 160 | let Tokens { tokenizer } = self; 161 | 162 | let token = tokenizer.next_token(); 163 | 164 | if token.tok != TokenKind::EndOfFile { 165 | Some(token) 166 | } else { 167 | None 168 | } 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /Tests/CallMissingCloserNodes.mt: -------------------------------------------------------------------------------- 1 | Print["\n===== Start CallMissingCloserNodes.mt =====\n"] 2 | 3 | (* Wolfram Language Test file *) 4 | 5 | Needs["CodeParser`"] 6 | 7 | 8 | Test[ 9 | CodeParse["f["] 10 | , 11 | ContainerNode[String, { 12 | CallMissingCloserNode[ 13 | LeafNode[Symbol, "f", <|Source -> {{1, 1}, {1, 2}}|>], {}, <|Source -> {{1, 1}, {1, 3}}|>]}, <|Source -> {{1, 1}, {1, 3}}|>] 14 | , 15 | TestID->"CallMissingCloserNodes-20190701-H7G3R7" 16 | ] 17 | 18 | Test[ 19 | CodeParse["f[1"] 20 | , 21 | ContainerNode[String, { 22 | CallMissingCloserNode[ 23 | LeafNode[Symbol, "f", <|Source -> {{1, 1}, {1, 2}}|>], { 24 | LeafNode[Integer, "1", <|Source -> {{1, 3}, {1, 4}}|>]}, <|Source -> {{1, 1}, {1, 4}}|>]}, <|Source -> {{1, 1}, {1, 4}}|>] 25 | , 26 | TestID->"CallMissingCloserNodes-20220917-B7K4Z8" 27 | ] 28 | 29 | Test[ 30 | CodeParse["f::["] 31 | , 32 | ContainerNode[String, {CallMissingCloserNode[CallNode[LeafNode[Symbol, "TypeSpecifier", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 1}, {1, 2}}|>]}, <||>], {}, <|Source -> {{1, 1}, {1, 5}}|>]}, <|Source -> {{1, 1}, {1, 5}}|>] 33 | , 34 | TestID->"CallMissingCloserNodes-20220917-W2D0J4" 35 | ] 36 | 37 | Test[ 38 | CodeParse["f::[1"] 39 | , 40 | ContainerNode[String, {CallMissingCloserNode[CallNode[LeafNode[Symbol, "TypeSpecifier", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 1}, {1, 2}}|>]}, <||>], {LeafNode[Integer, "1", <|Source -> {{1, 5}, {1, 6}}|>]}, <|Source -> {{1, 1}, {1, 6}}|>]}, <|Source -> {{1, 1}, {1, 6}}|>] 41 | , 42 | TestID->"CallMissingCloserNodes-20220917-L2H2N5" 43 | ] 44 | 45 | Test[ 46 | CodeParse["f\\[LeftDoubleBracket]"] 47 | , 48 | ContainerNode[String, {CallMissingCloserNode[LeafNode[Symbol, "Part", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 1}, {1, 2}}|>]}, <|Source -> {{1, 1}, {1, 22}}|>]}, <|Source -> {{1, 1}, {1, 22}}|>] 49 | , 50 | TestID->"CallMissingCloserNodes-20220917-C2J4K7" 51 | ] 52 | 53 | Test[ 54 | CodeParse["f\\[LeftDoubleBracket]1"] 55 | , 56 | ContainerNode[String, {CallMissingCloserNode[LeafNode[Symbol, "Part", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 1}, {1, 2}}|>], LeafNode[Integer, "1", <|Source -> {{1, 22}, {1, 23}}|>]}, <|Source -> {{1, 1}, {1, 23}}|>]}, <|Source -> {{1, 1}, {1, 23}}|>] 57 | , 58 | TestID->"CallMissingCloserNodes-20220917-J0V0J3" 59 | ] 60 | 61 | Test[ 62 | CodeParse["( f[ )"] 63 | , 64 | ContainerNode[String, {CallMissingCloserNode[LeafNode[Symbol, "f", <|Source -> {{1, 3}, {1, 4}}|>], {}, <|Source -> {{1, 3}, {1, 5}}|>]}, <|Source -> {{1, 1}, {1, 7}}|>] 65 | , 66 | TestID->"CallMissingCloserNodes-20220917-L0B1H1" 67 | ] 68 | 69 | Test[ 70 | CodeParse["( f[1 )"] 71 | , 72 | ContainerNode[String, {CallMissingCloserNode[LeafNode[Symbol, "f", <|Source -> {{1, 3}, {1, 4}}|>], {LeafNode[Integer, "1", <|Source -> {{1, 5}, {1, 6}}|>]}, <|Source -> {{1, 3}, {1, 6}}|>]}, <|Source -> {{1, 1}, {1, 8}}|>] 73 | , 74 | TestID->"CallMissingCloserNodes-20220917-J2C1D4" 75 | ] 76 | 77 | Test[ 78 | CodeParse["( f::[ )"] 79 | , 80 | ContainerNode[String, {CallMissingCloserNode[CallNode[LeafNode[Symbol, "TypeSpecifier", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 3}, {1, 4}}|>]}, <||>], {}, <|Source -> {{1, 3}, {1, 7}}|>]}, <|Source -> {{1, 1}, {1, 9}}|>] 81 | , 82 | TestID->"CallMissingCloserNodes-20220917-S2K7R7" 83 | ] 84 | 85 | Test[ 86 | CodeParse["( f::[1 )"] 87 | , 88 | ContainerNode[String, {CallMissingCloserNode[CallNode[LeafNode[Symbol, "TypeSpecifier", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 3}, {1, 4}}|>]}, <||>], {LeafNode[Integer, "1", <|Source -> {{1, 7}, {1, 8}}|>]}, <|Source -> {{1, 3}, {1, 8}}|>]}, <|Source -> {{1, 1}, {1, 10}}|>] 89 | , 90 | TestID->"CallMissingCloserNodes-20220917-K9J6S4" 91 | ] 92 | 93 | Test[ 94 | CodeParse["( f\\[LeftDoubleBracket] )"] 95 | , 96 | ContainerNode[String, {CallMissingCloserNode[LeafNode[Symbol, "Part", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 3}, {1, 4}}|>]}, <|Source -> {{1, 3}, {1, 24}}|>]}, <|Source -> {{1, 1}, {1, 26}}|>] 97 | , 98 | TestID->"CallMissingCloserNodes-20220917-D4Q8T3" 99 | ] 100 | 101 | Test[ 102 | CodeParse["( f\\[LeftDoubleBracket]1 )"] 103 | , 104 | ContainerNode[String, {CallMissingCloserNode[LeafNode[Symbol, "Part", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 3}, {1, 4}}|>], LeafNode[Integer, "1", <|Source -> {{1, 24}, {1, 25}}|>]}, <|Source -> {{1, 3}, {1, 25}}|>]}, <|Source -> {{1, 1}, {1, 27}}|>] 105 | , 106 | TestID->"CallMissingCloserNodes-20220917-V9R1M3" 107 | ] 108 | 109 | Test[ 110 | CodeParse["( [ )"] 111 | , 112 | ContainerNode[String, {GroupMissingCloserNode[GroupSquare, {}, <|Source -> {{1, 3}, {1, 4}}|>]}, <|Source -> {{1, 1}, {1, 6}}|>] 113 | , 114 | TestID->"CallMissingCloserNodes-20220917-B5I2K2" 115 | ] 116 | 117 | Test[ 118 | CodeParse["( ::[ )"] 119 | , 120 | ContainerNode[String, {GroupMissingCloserNode[GroupTypeSpecifier, {}, <|Source -> {{1, 3}, {1, 6}}|>]}, <|Source -> {{1, 1}, {1, 8}}|>] 121 | , 122 | TestID->"CallMissingCloserNodes-20220917-X9W0H8" 123 | ] 124 | 125 | Test[ 126 | CodeParse["( \\[LeftDoubleBracket] )"] 127 | , 128 | ContainerNode[String, {GroupMissingCloserNode[GroupDoubleBracket, {}, <|Source -> {{1, 3}, {1, 23}}|>]}, <|Source -> {{1, 1}, {1, 25}}|>] 129 | , 130 | TestID->"CallMissingCloserNodes-20220917-G3Y7X9" 131 | ] 132 | 133 | Test[ 134 | CodeParse["(a[b[])"] 135 | , 136 | ContainerNode[String, { 137 | CallMissingCloserNode[LeafNode[Symbol, "a", <|Source -> {{1, 2}, {1, 3}}|>], { 138 | CallNode[LeafNode[Symbol, "b", <|Source -> {{1, 4}, {1, 5}}|>], {}, <|Source -> {{1, 4}, {1, 7}}|>]}, <|Source -> {{1, 2}, {1, 7}}|>]}, <|Source -> {{1, 1}, {1, 8}}|>] 139 | , 140 | TestID->"CallMissingCloserNodes-20190803-C7O2S5" 141 | ] 142 | 143 | Test[ 144 | CodeParse["List[a"] 145 | , 146 | ContainerNode[String, {CallMissingCloserNode[LeafNode[Symbol, "List", <|Source -> {{1, 1}, {1, 5}}|>], {LeafNode[Symbol, "a", <|Source -> {{1, 6}, {1, 7}}|>]}, <|Source -> {{1, 1}, {1, 7}}|>]}, <|Source -> {{1, 1}, {1, 7}}|>] 147 | , 148 | TestID->"CallMissingCloserNodes-20200708-Y2V4V2" 149 | 150 | ] 151 | 152 | 153 | 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /crates/wolfram-parser/src/agg.rs: -------------------------------------------------------------------------------- 1 | use crate::{cst::Cst, source::Span, tokenize::TokenString, NodeSeq}; 2 | 3 | pub type AggNodeSeq = NodeSeq>; 4 | 5 | //========================================================== 6 | // Macros 7 | //========================================================== 8 | 9 | //====================================== 10 | // LHS! 11 | //====================================== 12 | 13 | // Note: Don't make this public outside of this crate until `AggCallNode` is 14 | // made part of Node. And updating it to use $crate for types. 15 | macro_rules! LHS { 16 | (LeafNode[$($head_kind:ident)|*, _, _]) => { 17 | $crate::cst::Cst::Token(Token { 18 | tok: $(TK::$head_kind)|*, 19 | .. 20 | }) 21 | }; 22 | 23 | //================================== 24 | // CallNode 25 | //================================== 26 | 27 | (CallNode[LeafNode[$token_kind:ident, _, _], $children:ident:_, _]) => { 28 | AggCallNode { 29 | head: Node::Token(Token { 30 | tok: TK::$token_kind, 31 | .. 32 | }), 33 | children: $children, 34 | } 35 | }; 36 | 37 | (CallNode[ 38 | $head_name:ident:$node_head:ident[$($node_args:tt)*], 39 | $group_name:ident:$group_head:ident[$group_kind:ident, _], 40 | $data:ident:_ 41 | ]) => { 42 | AggCallNode { 43 | head: $head_name @ LHS!($node_head[$($node_args)*]), 44 | body: LHS!($group_name : $group_head[$group_kind, _]), 45 | src: $data, 46 | } 47 | }; 48 | (CallNode[ 49 | $head_name:ident:_, 50 | $group_name:ident:$group_head:ident[$($group_kind:ident)|*, _], 51 | $data:ident:_ 52 | ]) => { 53 | AggCallNode { 54 | head: $head_name, 55 | body: LHS!($group_name:$group_head[$($group_kind)|*, _]), 56 | src: $data 57 | } 58 | }; 59 | (CallNode[ 60 | $head_name:ident:($($sub_head_pat:ident[$($sub_head_args:tt)*])|*), 61 | $group_name:ident:GroupNode[$group_kind:ident, _], 62 | $data:ident:_ 63 | ]) => { 64 | AggCallNode { 65 | head: $head_name @ ($(LHS!($sub_head_pat[$($sub_head_args)*]))|*), 66 | body: LHS!($group_name:GroupNode[$group_kind, _]), 67 | src: $data 68 | } 69 | }; 70 | (CallNode[_, _, _]) => { 71 | Cst::Call(_) 72 | }; 73 | 74 | //================================== 75 | // CompoundNode, BinaryNode, InfixNode, PrefixNode 76 | //================================== 77 | 78 | (CompoundNode[$($op_kind:ident)|*, _, _]) => { 79 | Cst::Compound(CompoundNode(OperatorNode { 80 | op: $(crate::parse::operators::CompoundOperator::$op_kind)|*, 81 | .. 82 | })) 83 | }; 84 | 85 | (BinaryNode[$($op_kind:ident)|*, _, _]) => { 86 | Cst::Binary(BinaryNode(OperatorNode { 87 | op: $($crate::parse::operators::BinaryOperator::$op_kind)|*, 88 | .. 89 | })) 90 | }; 91 | 92 | (InfixNode[$($op_kind:ident)|*, _, _]) => { 93 | Cst::Infix(InfixNode(OperatorNode { 94 | op: $($crate::parse::operators::InfixOperator::$op_kind)|*, 95 | .. 96 | })) 97 | }; 98 | (PrefixNode[$($op_kind:ident)|*, _, _]) => { 99 | Cst::Prefix(PrefixNode(OperatorNode { 100 | op: $($crate::parse::operators::PrefixOperator::$op_kind)|*, 101 | .. 102 | })) 103 | }; 104 | 105 | (PostfixNode[$($op_kind:ident)|*, _, _]) => { 106 | Cst::Postfix(PostfixNode(OperatorNode { 107 | op: $(crate::parse::operators::PostfixOperator::$op_kind)|*, 108 | .. 109 | })) 110 | }; 111 | 112 | //================================== 113 | // GroupNode 114 | //================================== 115 | 116 | (GroupNode[$($op_kind:ident)|*, $children:ident:_]) => { 117 | $crate::cst::Cst::Group(GroupNode(OperatorNode { 118 | op: $(GroupOperator::$op_kind)|*, 119 | children: $children, 120 | })) 121 | }; 122 | 123 | (GroupNode[$($op_kind:ident)|*, _, _]) => { 124 | Cst::Group(GroupNode(OperatorNode { 125 | op: $(GroupOperator::$op_kind)|*, 126 | .. 127 | })) 128 | }; 129 | ($name:ident:GroupNode[$group_kind:ident, _]) => { 130 | CallBody::Group($name @ GroupNode(OperatorNode { 131 | op: $crate::parse::operators::CallOperator::$group_kind, 132 | children: _, 133 | })) 134 | }; 135 | 136 | (GroupNode[_, _, _]) => { 137 | Cst::Group(GroupNode(OperatorNode { 138 | op: _, 139 | .. 140 | })) 141 | }; 142 | 143 | //---------------------------------- 144 | // GroupMissingCloserNode 145 | //---------------------------------- 146 | 147 | ($name:ident:GroupMissingCloserNode[$($op_kind:ident)|*, _]) => { 148 | $crate::cst::CallBody::GroupMissingCloser($name @ $crate::cst::GroupMissingCloserNode(OperatorNode { 149 | op: $($crate::parse::operators::CallOperator::$op_kind)|*, 150 | .. 151 | })) 152 | }; 153 | 154 | //================================== 155 | // BoxNode 156 | //================================== 157 | 158 | (BoxNode[$box_kind:ident:_, _, _]) => { 159 | Cst::Box(BoxNode { 160 | kind: $box_kind, 161 | .. 162 | }) 163 | }; 164 | (BoxNode[$box_kind:ident:_, $children:ident:_, $data:ident:_]) => { 165 | Cst::Box(BoxNode { 166 | kind: $box_kind, 167 | children: $children, 168 | src: $data, 169 | }) 170 | }; 171 | (BoxNode[$box_kind:ident, $children:ident:_, $data:ident:_]) => { 172 | $crate::cst::Cst::Box(BoxNode { 173 | kind: BoxKind::$box_kind, 174 | children: $children, 175 | src: $data, 176 | }) 177 | }; 178 | (BoxNode[_, _, _]) => { 179 | Cst::Box(_) 180 | }; 181 | } 182 | 183 | 184 | pub(crate) use LHS; 185 | -------------------------------------------------------------------------------- /scripts/re_build_CodeParser.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | --------------------------------------------------------------------------------