├── Tests
    ├── files
    │   ├── small
    │   │   ├── empty.wl
    │   │   ├── crash.txt
    │   │   ├── crash6.txt
    │   │   ├── carriagereturn.wl
    │   │   ├── comment.wl
    │   │   ├── crash10.txt
    │   │   ├── crash12.txt
    │   │   ├── crash15.txt
    │   │   ├── crash16.txt
    │   │   ├── crash4.txt
    │   │   ├── crash5.txt
    │   │   ├── span1.wl
    │   │   ├── strange.wl
    │   │   ├── crash2.txt
    │   │   ├── crash7.txt
    │   │   ├── sample.wl
    │   │   ├── carriagereturn2.wl
    │   │   ├── continuation.wl
    │   │   ├── crash14.txt
    │   │   ├── carriagereturn3.wl
    │   │   ├── coverage3.wl
    │   │   ├── crash11.txt
    │   │   ├── carriagereturn4.wl
    │   │   ├── string1.wl
    │   │   ├── coverage2.wl
    │   │   ├── crash13.txt
    │   │   ├── crash17.txt
    │   │   ├── crash18.txt
    │   │   ├── crash3.txt
    │   │   ├── crash8.txt
    │   │   ├── crash9.txt
    │   │   ├── invalid1.wl
    │   │   ├── unsafe1.wl
    │   │   ├── unsafe2.wl
    │   │   └── coverage1.wl
    │   ├── package.wl
    │   ├── inputs-0001.txt
    │   ├── stackoverflow1.txt
    │   ├── stackoverflow3.txt
    │   ├── .gitattributes
    │   ├── large
    │   │   ├── ReliefPlot.nb
    │   │   ├── geomagneticmodels.m
    │   │   └── expandedCompanyDataNew1.m
    │   ├── 鳥物.wl
    │   ├── script.wl
    │   ├── inputs-0002.txt
    │   ├── linearsyntax.wl
    │   └── jpeg-string.txt
    ├── Regressions.mt
    ├── CodeSyntaxQ.mt
    ├── Quirks.mt
    ├── TokenEnum.mt
    ├── TestSuite.mt
    ├── ToNode.mt
    ├── Tokenize.mt
    ├── SafeString.mt
    ├── SyntaxErrorNodes.mt
    ├── Aggregate.mt
    ├── Characters.mt
    ├── CodeParser.mt
    ├── Unsafe.mt
    ├── ToString.mt
    ├── Error.mt
    ├── TokenErrors.mt
    ├── AbstractSyntaxIssues.mt
    ├── Scoping.mt
    ├── AbstractSyntaxErrorNodes.mt
    └── CallMissingCloserNodes.mt
├── .gitignore
├── .vscode
    └── settings.json
├── Cargo.toml
├── .lfsconfig
├── crates
    ├── rustfmt.toml
    ├── wolfram-parser
    │   ├── src
    │   │   ├── tokenize.rs
    │   │   ├── tests
    │   │   │   ├── test_source_character.rs
    │   │   │   ├── test_ffi.rs
    │   │   │   ├── test_token_enum.rs
    │   │   │   ├── test_node.rs
    │   │   │   └── test_api.rs
    │   │   ├── read
    │   │   │   └── byte_buffer.rs
    │   │   ├── symbol.rs
    │   │   ├── feature.rs
    │   │   ├── precedence.rs
    │   │   ├── parse
    │   │   │   ├── parse_tests
    │   │   │   │   └── test_parselet.rs
    │   │   │   ├── parselet
    │   │   │   │   ├── times_parselet.rs
    │   │   │   │   ├── under_parselet.rs
    │   │   │   │   └── integral_parselet.rs
    │   │   │   └── parser_docs.rs
    │   │   ├── quirks.rs
    │   │   ├── bin
    │   │   │   └── main.rs
    │   │   ├── long_names.rs
    │   │   ├── iter.rs
    │   │   └── agg.rs
    │   ├── .cargo
    │   │   └── config
    │   ├── Cargo.toml
    │   └── benches
    │   │   ├── bench_fast_string_scan.rs
    │   │   └── bench_general.rs
    └── codeparser-wll
    │   └── Cargo.toml
├── .WolframResources
├── docs
    ├── maintenance.md
    ├── docs.md
    ├── debugging.md
    ├── quirks.md
    ├── concretify.md
    ├── nodes.md
    ├── tokens.md
    ├── implementation.md
    ├── characters.md
    ├── Development.md
    ├── fuzz-testing.md
    ├── stages.md
    └── compatibility.md
├── CodeParser
    ├── Kernel
    │   ├── TokenEnum.wl
    │   ├── Trees.wl
    │   ├── Quirks.wl
    │   ├── Shims.wl
    │   ├── Node.wl
    │   └── Definitions.wl
    ├── PacletInfo.wl.in
    ├── Resources
    │   └── Examples
    │   │   └── Collatz.m
    └── Generate
    │   ├── TokenEnum.wl
    │   ├── Common.wl
    │   └── Precedence.wl
├── .project
├── .github
    └── workflows
    │   ├── run_tests.wls
    │   └── workflow.yml
├── cmake
    ├── WolframLibrary.cmake
    ├── ReplacePacletInfo.cmake
    ├── MacOSXVersionMin.cmake
    ├── InspectFile.cmake
    ├── InstallPaclet.cmake
    ├── PacletInfo.cmake
    └── WolframScript.cmake
├── cpp
    └── include
    │   ├── ExprLibrary.h
    │   └── Diagnostics.h
├── LICENSE
├── run_tests.wls
├── CONTRIBUTING.md
├── CodeTools
    └── Generate
    │   ├── CreatePacletArchive.wl
    │   └── GenerateSources.wl
├── HowToBuild.md
├── README.md
└── scripts
    └── re_build_CodeParser.xml


/Tests/files/small/empty.wl:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Tests/files/small/crash.txt:
--------------------------------------------------------------------------------
1 | \777


--------------------------------------------------------------------------------
/Tests/files/small/crash6.txt:
--------------------------------------------------------------------------------
1 | >>[ 


--------------------------------------------------------------------------------
/Tests/files/small/carriagereturn.wl:
--------------------------------------------------------------------------------
1 | A


--------------------------------------------------------------------------------
/Tests/files/small/comment.wl:
--------------------------------------------------------------------------------
1 | (* xxx *)


--------------------------------------------------------------------------------
/Tests/files/small/crash10.txt:
--------------------------------------------------------------------------------
1 | >>[[ 


--------------------------------------------------------------------------------
/Tests/files/small/crash12.txt:
--------------------------------------------------------------------------------
1 | a:b~1:2


--------------------------------------------------------------------------------
/Tests/files/small/crash15.txt:
--------------------------------------------------------------------------------
1 | 6`5..


--------------------------------------------------------------------------------
/Tests/files/small/crash16.txt:
--------------------------------------------------------------------------------
1 | 1`+..


--------------------------------------------------------------------------------
/Tests/files/small/crash4.txt:
--------------------------------------------------------------------------------
1 | ?a\
2 | 


--------------------------------------------------------------------------------
/Tests/files/small/crash5.txt:
--------------------------------------------------------------------------------
1 | 1*\
2 | 


--------------------------------------------------------------------------------
/Tests/files/small/span1.wl:
--------------------------------------------------------------------------------
1 | a ;; b
2 | c


--------------------------------------------------------------------------------
/Tests/files/small/strange.wl:
--------------------------------------------------------------------------------
1 | x = 1


--------------------------------------------------------------------------------
/Tests/files/small/crash2.txt:
--------------------------------------------------------------------------------
1 | ?123\
2 | 456"


--------------------------------------------------------------------------------
/Tests/files/small/crash7.txt:
--------------------------------------------------------------------------------
1 | a::
2 | +1


--------------------------------------------------------------------------------
/Tests/files/small/sample.wl:
--------------------------------------------------------------------------------
1 | 
2 | 1+1
3 | 


--------------------------------------------------------------------------------
/Tests/files/small/carriagereturn2.wl:
--------------------------------------------------------------------------------
1 | "
2 | 123"


--------------------------------------------------------------------------------
/Tests/files/small/continuation.wl:
--------------------------------------------------------------------------------
1 | {
2 | 	1\
3 | }


--------------------------------------------------------------------------------
/Tests/files/small/crash14.txt:
--------------------------------------------------------------------------------
1 | \[Integral]\[Sum]


--------------------------------------------------------------------------------
/Tests/files/small/carriagereturn3.wl:
--------------------------------------------------------------------------------
1 | "123\
2 | 456"


--------------------------------------------------------------------------------
/Tests/files/small/coverage3.wl:
--------------------------------------------------------------------------------
1 | ##2
2 | 
3 | a>>>b
4 | 


--------------------------------------------------------------------------------
/Tests/files/small/crash11.txt:
--------------------------------------------------------------------------------
1 | 13333333333333333332^^a


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | build*
3 | 
4 | .DS_Store
5 | 
6 | /target
7 | 


--------------------------------------------------------------------------------
/Tests/files/small/carriagereturn4.wl:
--------------------------------------------------------------------------------
1 | 
2 |   f[]\
3 |      /; x


--------------------------------------------------------------------------------
/Tests/files/small/string1.wl:
--------------------------------------------------------------------------------
1 | 
2 | "data\\
3 | "
4 | 
5 | x
6 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "cmake.configureOnOpen": false
3 | }


--------------------------------------------------------------------------------
/Tests/files/small/coverage2.wl:
--------------------------------------------------------------------------------
1 | \[Integral] f[x] \[DifferentialD] x


--------------------------------------------------------------------------------
/Tests/files/package.wl:
--------------------------------------------------------------------------------
1 | 
2 | BeginPackage["Foo.m`"]
3 | 
4 | EndPackage[]
5 | 
6 | 


--------------------------------------------------------------------------------
/Tests/files/inputs-0001.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/inputs-0001.txt


--------------------------------------------------------------------------------
/Tests/files/small/crash13.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/crash13.txt


--------------------------------------------------------------------------------
/Tests/files/small/crash17.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/crash17.txt


--------------------------------------------------------------------------------
/Tests/files/small/crash18.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/crash18.txt


--------------------------------------------------------------------------------
/Tests/files/small/crash3.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/crash3.txt


--------------------------------------------------------------------------------
/Tests/files/small/crash8.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/crash8.txt


--------------------------------------------------------------------------------
/Tests/files/small/crash9.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/crash9.txt


--------------------------------------------------------------------------------
/Tests/files/small/invalid1.wl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/invalid1.wl


--------------------------------------------------------------------------------
/Tests/files/small/unsafe1.wl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/unsafe1.wl


--------------------------------------------------------------------------------
/Tests/files/small/unsafe2.wl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/small/unsafe2.wl


--------------------------------------------------------------------------------
/Tests/files/stackoverflow1.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/stackoverflow1.txt


--------------------------------------------------------------------------------
/Tests/files/stackoverflow3.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WolframResearch/codeparser/master/Tests/files/stackoverflow3.txt


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | resolver = "2"
3 | members = [
4 |     "crates/wolfram-parser",
5 |     "crates/codeparser-wll"
6 | ]
7 | 


--------------------------------------------------------------------------------
/.lfsconfig:
--------------------------------------------------------------------------------
1 | [lfs]
2 | 	# Ensure that a default checkout of this repository doesn't download any
3 | 	# large files.
4 | 	fetchexclude = *
5 | 


--------------------------------------------------------------------------------
/Tests/files/.gitattributes:
--------------------------------------------------------------------------------
1 | # Use Git LFS to store all of the files in the files/large/ directory
2 | large/* filter=lfs diff=lfs merge=lfs -text
3 | 


--------------------------------------------------------------------------------
/Tests/files/small/coverage1.wl:
--------------------------------------------------------------------------------
 1 | 
 2 | _a`
 3 | 
 4 | _.
 5 | 
 6 | a_.
 7 | 
 8 | 
 9 | __
10 | 
11 | ___
12 | 
13 | a__
14 | 
15 | a___
16 | 
17 | a_.
18 | 
19 | _:1
20 | 


--------------------------------------------------------------------------------
/Tests/files/large/ReliefPlot.nb:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:08dbac51a17a741b997bd7c60d13ea0fe7d6c970191aa2c07356f2c26d70b1b1
3 | size 244135221
4 | 


--------------------------------------------------------------------------------
/Tests/files/large/geomagneticmodels.m:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:682f7198f2468d2ebfa23eacb971dce688cc3229873feb4960f704bf823eec92
3 | size 6827153
4 | 


--------------------------------------------------------------------------------
/Tests/files/large/expandedCompanyDataNew1.m:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:c32e17edc64ee9b94e6c7a36c9dd7e837a1be0d965c51098ae41edd428874af8
3 | size 71487044
4 | 


--------------------------------------------------------------------------------
/crates/rustfmt.toml:
--------------------------------------------------------------------------------
1 | max_width = 80
2 | # use_small_heuristics = "off"
3 | match_block_trailing_comma = true
4 | blank_lines_upper_bound = 3
5 | merge_derives = false
6 | reorder_modules = false


--------------------------------------------------------------------------------
/Tests/files/鳥物.wl:
--------------------------------------------------------------------------------
1 | (* ::Package:: *)
2 | 
3 | BeginPackage["鳥物`"]
4 | 鳥言う::usage = "鳥言う[物] 鳥に何か言うように頼む"
5 | Begin["`私的`"]
6 | 鳥言う[ア_] := ResourceFunction["BirdSay"][ア]
7 | End[]
8 | EndPackage[]
9 | 


--------------------------------------------------------------------------------
/.WolframResources:
--------------------------------------------------------------------------------
1 | Resources[
2 |     Version[1],
3 |     ExecutionBuildCommand["<<CodeParser`"],
4 |     Paclet[
5 |         PacletFolder["CodeParser"],
6 |         FunctionPaclet[False]
7 |     ]
8 | ]


--------------------------------------------------------------------------------
/docs/maintenance.md:
--------------------------------------------------------------------------------
 1 | # Maintenance
 2 | 
 3 | 
 4 | ## Characters
 5 | 
 6 | Periodically run ``System`Private`CharacterNames[]`` to obtain the current list of long names
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/CodeParser/Kernel/TokenEnum.wl:
--------------------------------------------------------------------------------
 1 | BeginPackage["CodeParser`TokenEnum`"]
 2 | 
 3 | tokenIsEmpty
 4 | 
 5 | Begin["`Private`"]
 6 | 
 7 | Needs["CodeParser`Library`"] (* For tokenIsEmptyFunc *)
 8 | 
 9 | tokenIsEmpty[tok_] := tokenIsEmptyFunc[tok]
10 | 
11 | 
12 | End[]
13 | 
14 | EndPackage[]
15 | 


--------------------------------------------------------------------------------
/Tests/files/script.wl:
--------------------------------------------------------------------------------
 1 | #!/usr/local/bin/wolframscript
 2 | 
 3 | (* generate high-precision samples of a mixed distribution *)
 4 | Print /@ \
 5 | RandomVariate[MixtureDistribution[
 6 |     {1,2},
 7 |     {NormalDistribution[1,2/10],
 8 |      NormalDistribution[3,1/10]}],
 9 |     10,  WorkingPrecision -> 50]
10 | 


--------------------------------------------------------------------------------
/docs/docs.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # CodeParser
 3 | 
 4 | 
 5 | 
 6 | ## Caveats
 7 | 
 8 | 
 9 | linear syntax is not parsed, just tokenized
10 | 
11 | linear syntax is not abstracted, don't want to reimplement MakeExpression
12 | 
13 | boxes are not abstracted, don't want to reimplement MakeExpression
14 | 
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/docs/debugging.md:
--------------------------------------------------------------------------------
 1 | 
 2 | type summary add --summary-string "Escape: ${var.escapeBits%u} Sign: ${var.signBit%u} BinValue: ${var.valBits%b} CharValue: ${var.valBits%c} DecimalValue: ${var.valBits%d}" WLCharacter
 3 | 
 4 | 
 5 | 
 6 | type summary add --summary-string "CharValue: ${var.valBits%c} DecimalValue: ${var.valBits%d}" SourceCharacter
 7 | 
 8 | 
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/Tests/files/inputs-0002.txt:
--------------------------------------------------------------------------------
 1 | a`b`c
 2 | a~b~c
 3 | a!b!c
 4 | a@b@c
 5 | a#b#c
 6 | a$b$c
 7 | a%b%c
 8 | a^b^c
 9 | a&b&c
10 | a*b*c
11 | a(b(c
12 | a)b)c
13 | a-b-c
14 | a_b_c
15 | a=b=c
16 | a+b+c
17 | a[b[c
18 | a{b{c
19 | a]b]c
20 | a}b}c
21 | a\b\c
22 | a|b|c
23 | a;b;c
24 | a:b:c
25 | a'b'c
26 | a"b"c
27 | a,b,c
28 | a<b<c
29 | a.b.c
30 | a>b>c
31 | a/b/c
32 | a?b?c


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/tokenize.rs:
--------------------------------------------------------------------------------
 1 | mod token;
 2 | pub(crate) mod token_kind;
 3 | pub(crate) mod tokenizer;
 4 | 
 5 | pub use self::{
 6 |     token::{Token, TokenStr, TokenString},
 7 |     token_kind::TokenKind,
 8 | };
 9 | 
10 | #[doc(hidden)]
11 | pub use self::token::{TokenInput, TokenSource};
12 | 
13 | pub(crate) use self::{token::TokenRef, tokenizer::Tokenizer};
14 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/.cargo/config:
--------------------------------------------------------------------------------
 1 | # Specify the Rust compiler equivalent of `/MT`, to statically link the runtime
 2 | # on Windows.
 3 | #
 4 | # This prevents "The program can't start because ucrtbased.dll is missing from
 5 | # your computer" error on Windows 7 and fixes bug 427427:
 6 | #     error 126 "The specified module could not be found"
 7 | # when Visual Studio is not installed
 8 | [target.x86_64-pc-windows-msvc]
 9 | rustflags = ["-Ctarget-feature=+crt-static"]
10 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/tests/test_source_character.rs:
--------------------------------------------------------------------------------
 1 | use crate::read::WLCharacter;
 2 | 
 3 | #[test]
 4 | fn SourceCharacterTest_Graphical1() {
 5 |     assert_eq!(WLCharacter::new('\t').graphicalString(), "\\t");
 6 | 
 7 |     assert_eq!(WLCharacter::new(0x1b).graphicalString(), "\\[RawEscape]");
 8 | 
 9 |     assert_eq!(WLCharacter::new(0xb0).graphicalString(), "\\[Degree]");
10 | 
11 |     assert_eq!(WLCharacter::new('\u{abcd}').graphicalString(), "\\:abcd");
12 | }
13 | 


--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>CodeParser</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>com.wolfram.eclipse.MEET.MathematicaProjectBuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 	</buildSpec>
14 | 	<natures>
15 | 		<nature>com.wolfram.eclipse.MEET.SimpleMathematicaNature</nature>
16 | 	</natures>
17 | </projectDescription>
18 | 


--------------------------------------------------------------------------------
/Tests/Regressions.mt:
--------------------------------------------------------------------------------
 1 | Print["\n===== Start Regressions.mt =====\n"]
 2 | 
 3 | Needs["CodeParser`"]
 4 | 
 5 | (*------------------------------------*)
 6 | (* Bug 439902                         *)
 7 | (*------------------------------------*)
 8 | 
 9 | TestMatch[
10 | 	CodeTokenize @ ExportString[1, "JPEG"],
11 | 	{__, ErrorNode[Token`Error`UnterminatedString, _, _]}
12 | ]
13 | 
14 | TestMatch[
15 | 	CodeConcreteParse @ ExportString[1, "JPEG"],
16 | 	ContainerNode[String, {__}, _]
17 | ]
18 | 


--------------------------------------------------------------------------------
/Tests/files/linearsyntax.wl:
--------------------------------------------------------------------------------
1 | 
2 | 	InputAliases -> {"intt" -> \(\[Integral] \(\[SelectionPlaceholder] \(\[DifferentialD] \[Placeholder]\)\)\), "dintt" -> \(\(\[Integral]\_\[SelectionPlaceholder]\%\[Placeholder]\) \(\[Placeholder] \(\[DifferentialD] \[Placeholder]\)\)\), "sumt" -> \(\(\[Sum]\+\(\[SelectionPlaceholder] = \[Placeholder]\)\%\[Placeholder]\) \[Placeholder]\), "prodt" -> \(\(\[Product]\+\(\[SelectionPlaceholder] = \[Placeholder]\)\%\[Placeholder]\) \[Placeholder]\), "dt" -> \(\(\[PartialD]\_\[Placeholder]\)\ \[SelectionPlaceholder]\)}
3 | 


--------------------------------------------------------------------------------
/crates/codeparser-wll/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "codeparser-wll"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | publish = false
 6 | 
 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 8 | 
 9 | [lib]
10 | # This crate compiles to a dynamic library.
11 | crate-type = ["cdylib"]
12 | 
13 | [features]
14 | default = ["USE_MATHLINK"]
15 | USE_MATHLINK = []
16 | 
17 | [dependencies]
18 | wolfram-library-link = { version = "0.2.10", default-features = false }
19 | wolfram-parser = { path = "../wolfram-parser", features = ["CHECK_ABORT"] }
20 | 
21 | 


--------------------------------------------------------------------------------
/Tests/CodeSyntaxQ.mt:
--------------------------------------------------------------------------------
 1 | Print["\n===== Start CodeSyntaxQ.mt =====\n"]
 2 | 
 3 | (* Wolfram Language Test file *)
 4 | 
 5 | Needs["CodeParser`"]
 6 | 
 7 | (*
 8 | There should be no messages from CodeSyntaxQ
 9 | *)
10 | Test[
11 | 	CodeSyntaxQ["#\"\\A\""]
12 | 	,
13 | 	True
14 | 	,
15 | 	{}
16 | 	,
17 | 	TestID->"CodeSyntaxQ-20200702-D6P6W9"
18 | ]
19 | 
20 | Test[
21 | 	CodeSyntaxQ["a>>b\\1c"]
22 | 	,
23 | 	True
24 | 	,
25 | 	{}
26 | 	,
27 | 	TestID->"CodeSyntaxQ-20200703-Q2R5G9"
28 | ]
29 | 
30 | 
31 | 
32 | Test[
33 | 	CodeSyntaxQ[File["doesntexist"]]
34 | 	,
35 | 	False
36 | 	,
37 | 	TestID->"CodeSyntaxQ-20230426-X7Z7D3"
38 | ]
39 | 


--------------------------------------------------------------------------------
/Tests/Quirks.mt:
--------------------------------------------------------------------------------
 1 | Print["\n===== Start Quirks.mt =====\n"]
 2 | 
 3 | Needs["CodeParser`"]
 4 | 
 5 | 
 6 | Test[
 7 | 	Internal`InheritedBlock[{CodeParser`Quirks`$Quirks},
 8 | 		
 9 | 		CodeParser`Quirks`$Quirks["OldAtAtAt"] = True;
10 | 		
11 | 		CodeParse["a @@@ b"]
12 | 	]
13 | 	,
14 | 	ContainerNode[String, {
15 | 		CallNode[LeafNode[Symbol, "Apply", <||>], {
16 | 			LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>],
17 | 			LeafNode[Symbol, "b", <|Source -> {{1, 7}, {1, 8}}|>],
18 | 			CallNode[LeafNode[Symbol, "List", <||>], {LeafNode[Integer, "1", <||>]}, <||>]}, <|Source -> {{1, 1}, {1, 8}}|>]}, <|Source -> {{1, 1}, {1, 8}}|>]
19 | 	,
20 | 	TestID->"Quirks-20220919-O2S9R6"
21 | ]


--------------------------------------------------------------------------------
/Tests/TokenEnum.mt:
--------------------------------------------------------------------------------
 1 | Print["\n===== Start TokenEnum.mt =====\n"]
 2 | 
 3 | Needs["CodeParser`TokenEnum`"]
 4 | 
 5 | Test @ tokenIsEmpty[EndOfFile]
 6 | Test @ tokenIsEmpty[Token`Fake`ImplicitTimes]
 7 | Test @ tokenIsEmpty[Token`Error`Aborted]
 8 | Test @ tokenIsEmpty[Token`Fake`ImplicitNull]
 9 | Test @ tokenIsEmpty[Token`Fake`ImplicitOne]
10 | Test @ tokenIsEmpty[Token`Fake`ImplicitAll]
11 | Test @ tokenIsEmpty[Token`Error`ExpectedOperand]
12 | Test @ tokenIsEmpty[Token`Error`ExpectedTag]
13 | Test @ tokenIsEmpty[Token`Error`ExpectedFile]
14 | Test @ tokenIsEmpty[Token`Error`PrefixImplicitNull]
15 | Test @ tokenIsEmpty[Token`Error`InfixImplicitNull]
16 | 
17 | Test @ !tokenIsEmpty[String]
18 | Test @ !tokenIsEmpty[Token`Comma]


--------------------------------------------------------------------------------
/docs/quirks.md:
--------------------------------------------------------------------------------
 1 | internal docs: quirks mode
 2 | 
 3 | 
 4 | 
 5 | reproduce kernel buggy behavior
 6 | 
 7 | 
 8 | 
 9 | reproduce front end buggy behavior
10 | 
11 | 
12 | 
13 | 
14 | version 11.0, do blah
15 | 
16 | version 11.2, do blah,
17 | 
18 | etc.
19 | 
20 | 
21 | 
22 | 
23 | #
24 | 
25 | quirks mode DannyL branch stuff:
26 | 
27 | https://bugs.wolfram.com/show?number=139531
28 | 
29 | https://bugs.wolfram.com/show?number=160919
30 | 
31 | 
32 | Prototype build
33 | 
34 | 
35 | 
36 | 
37 | parsing -a/2 is now Times[Times[-1, a], Power[2, -1]]
38 | 
39 | 
40 | branch bugfix/139531_et_al
41 | 
42 | 
43 | 
44 | ``Internal`$PrototypeBuild``
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/docs/concretify.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | concretifying is:
 4 | given abstract syntax
 5 | choosing operators +
 6 |     parenthesizing where needed +
 7 |     (removing implicit tokens where possible) +
 8 |     (stringifying where possible) +
 9 |     (compounding where possible) +
10 |     (crazy stuff like convert `-1*a` to `-a`)
11 | make different choices about operators:
12 | e.g. CompoundExpression[] or ; ?
13 | f[x] or f@x ?
14 | 
15 | 
16 | pretty-printing is:
17 | given abstract syntax
18 | concretifying[operators that look nice] then formatting
19 | 
20 | this is better InputForm
21 | 
22 | 
23 | minifying is:
24 | given abstract syntax
25 | concretifying[operators that minimize space], no formatting
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/CodeParser/Kernel/Trees.wl:
--------------------------------------------------------------------------------
 1 | BeginPackage["CodeParser`Trees`"]
 2 | 
 3 | ToTree
 4 | 
 5 | Begin["`Private`"]
 6 | 
 7 | Needs["CodeParser`"]
 8 | Needs["CodeParser`Utils`"]
 9 | 
10 | 
11 | 
12 | ToTree[parseTree_] :=
13 |   NestTree[codeChildren, parseTree, Infinity, codeData]
14 | 
15 | 
16 | 
17 | codeChildren[head_[tag_, children_, data_]] := children
18 | 
19 | codeData[head_[tag_, children_, data_]] :=
20 |   {head, tag, data}
21 | 
22 | 
23 | 
24 | codeChildren[LeafNode[tag_, str_, data_]] := None
25 | 
26 | codeData[LeafNode[tag_, str_, data_]] :=
27 |   {LeafNode, tag, str, data}
28 | 
29 | 
30 | codeChildren[ErrorNode[tag_, str_, data_]] := None
31 | 
32 | codeData[ErrorNode[tag_, str_, data_]] :=
33 |   {ErrorNode, tag, str, data}
34 | 
35 | 
36 | End[]
37 | 
38 | EndPackage[]
39 | 


--------------------------------------------------------------------------------
/.github/workflows/run_tests.wls:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env wolframscript
 2 | 
 3 | Needs["MUnit`"]
 4 | 
 5 | createSuccessLogger[] := With[{logger = Unique[]},
 6 |   Module[{success = True},
 7 |    logger /: LogFatal[logger, _] := success = False;
 8 |    logger /: LogFailure[logger, _] := success = False;
 9 |    logger /: LogMessagesFailure[logger, _] := success = False;
10 |    logger /: LogError[logger, _] := success = False;
11 |    logger /: LogFatal[logger, _] := success = False;
12 |    logger /: TestRunSucceededQ[logger] := success;
13 |    logger
14 |    ]
15 |   ]
16 | 
17 | successLogger = createSuccessLogger[]
18 | 
19 | TestRun["Tests/TestSuite.mt", Loggers :> {VerbosePrintLogger[], successLogger}]
20 | 
21 | If[TrueQ[TestRunSucceededQ[successLogger]],
22 |   Exit[0]
23 |   ,
24 |   Exit[1]
25 | ]
26 | 


--------------------------------------------------------------------------------
/docs/nodes.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Nodes
 3 | 
 4 | ## Terminology
 5 | 
 6 | leaf: Integer, Real, Symbol, String, etc.
 7 | 
 8 | 
 9 | 
10 | 
11 | ## Philosophy
12 | 
13 | if a node is not something else, then it is leaf
14 | 
15 | 
16 | 
17 | ## Structure
18 | 
19 | All nodes have a uniform structure:
20 | 
21 | `Node[tag or operator, contents or children, opts]`
22 | 
23 | 
24 | 
25 | 
26 | We take advantage of the symbolic nature of WL and use the function symbols themselves for tags:
27 | 
28 | 
29 | a+b is parsed as:
30 | ```
31 | InfixNode[Plus, {LeafNode[Symbol, "a", <||>], LeafNode[Token`Plus, "+", <||>], LeafNode[Symbol, "b", <||>]}, <||>]
32 | ```
33 | 
34 | and a::b is parsed as:
35 | ```
36 | InfixNode[MessageName, {LeafNode[Symbol, "a", <||>], LeafNode[Token`ColonColon, "::", <||>], LeafNode[String, "b", <||>]}, <||>]
37 | ```
38 | 
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/CodeParser/PacletInfo.wl.in:
--------------------------------------------------------------------------------
 1 | 
 2 | Paclet[
 3 |   Name -> "CodeParser",
 4 |   Version -> "1.10",
 5 |   WolframVersion -> "12.1+",
 6 |   Description -> "Parse Wolfram Language code.",
 7 |   Creator -> "Brenton Bostick <brenton@wolfram.com>",
 8 |   BuildDate -> "",
 9 |   BuildNumber -> 0,
10 |   BuildWolframVersionNumber -> 0,
11 |   BuildWolframLibraryVersion -> 0,
12 |   Transport -> "",
13 |   Updating -> Automatic,
14 |   Extensions -> {
15 |     {"Kernel", Root -> "Kernel", Context -> "CodeParser`"},
16 |     {"Documentation", Language -> All, MainPage -> "Guides/CodeParser"},
17 |     {"LibraryLink"},
18 |     {"Resource", Root -> "Resources",
19 |       Resources -> {
20 |         {"Collatz", "Examples/Collatz.m"},
21 |         {"LongNames", "Generated/LongNames.wl"},
22 |         {"Precedence", "Generated/Precedence.wl"}
23 |       }
24 |     }
25 |   }
26 | ]
27 | 


--------------------------------------------------------------------------------
/cmake/WolframLibrary.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | macro(ParseWolframLibraryHeader)
 3 | 
 4 | 	if(NOT EXISTS ${WOLFRAMLIBRARY_INCLUDE_DIR})
 5 | 	message(FATAL_ERROR "WOLFRAMLIBRARY_INCLUDE_DIR does not exist. WOLFRAMLIBRARY_INCLUDE_DIR: ${WOLFRAMLIBRARY_INCLUDE_DIR}")
 6 | 	endif()
 7 | 
 8 | 	set(WOLFRAMLIBRARY_HEADER ${WOLFRAMLIBRARY_INCLUDE_DIR}/WolframLibrary.h)
 9 | 
10 | 	if(NOT EXISTS ${WOLFRAMLIBRARY_HEADER})
11 | 	message(FATAL_ERROR "WOLFRAMLIBRARY_HEADER does not exist. WOLFRAMLIBRARY_HEADER: ${WOLFRAMLIBRARY_HEADER}")
12 | 	endif()
13 | 
14 | 	file(READ ${WOLFRAMLIBRARY_HEADER} filedata)
15 | 
16 | 	string(REGEX MATCH "#define WolframLibraryVersion ([0-9]+)" _ ${filedata})
17 | 
18 | 	set(WOLFRAMLIBRARY_VERSION ${CMAKE_MATCH_1})
19 | 
20 | 	if(NOT DEFINED WOLFRAMLIBRARY_VERSION)
21 | 	message(FATAL_ERROR "WOLFRAMLIBRARY_VERSION was not set.")
22 | 	endif()
23 | 
24 | endmacro(ParseWolframLibraryHeader)
25 | 


--------------------------------------------------------------------------------
/Tests/files/jpeg-string.txt:
--------------------------------------------------------------------------------
1 | ÿØÿà JFIF  H H  ÿá ÔExif  II*      	       	           n       v   (	       1 4   ~   2    ²   *	    úÿÿÿ    H      H      Created with the Wolfram Language : www.wolfram.com 2023:11:16 12:20:24-06:00 ÿÛ C 		
2 |  $.' ",#(7),01444'9=82<.342ÿÛ C			2!!22222222222222222222222222222222222222222222222222ÿÀ   " ÿÄ           	
3 | ÿÄ µ   } !1AQa"q2¡#B±ÁRÑð$3br	
4 | %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖ×ØÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ        	
5 | ÿÄ µ  w !1AQaq"2B¡±Á	#3RðbrÑ
6 | $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖ×ØÙÚâãäåæçèéêòóôõö÷øùúÿÚ   ? öhõÙ[ÅÒhRiïCuÉHU (ärÝý:QY¯aâ3ã¨õqe¥}-ÚÏþ?äóLm"¶ý¾N7a~îìµE utQE ÿÙ


--------------------------------------------------------------------------------
/Tests/TestSuite.mt:
--------------------------------------------------------------------------------
 1 | (* Wolfram Language Test file *)
 2 | 
 3 | Needs["MUnit`"]
 4 | 
 5 | SetEnvironment["CODEPARSER_DEBUG" -> "True"]
 6 | 
 7 | TestSuite[{
 8 | 	"Abstract.mt",
 9 | 	"AbstractCallNode.mt",
10 | 	"AbstractSyntaxErrorNodes.mt",
11 | 	"AbstractSyntaxIssues.mt",
12 | 	"Aggregate.mt",
13 | 	"Arrows.mt",
14 | 	"Boxes.mt",
15 | 	"CallMissingCloserNodes.mt",
16 | 	"Characters.mt",
17 | 	"CodeParser.mt",
18 | 	"CodeSyntaxQ.mt",
19 | 	"Concrete.mt",
20 | 	"Concretify.mt",
21 | 	"Definitions.mt",
22 | 	"Error.mt",
23 | 	"Errors.mt",
24 | 	"File.mt",
25 | 	"Inequality.mt",
26 | 	"LineContinuations.mt",
27 | 	"Parse.mt",
28 | 	"Quirks.mt",
29 | 	"Regressions.mt",
30 | 	"SafeString.mt",
31 | 	"Scoping.mt",
32 | 	"Span.mt",
33 | 	"SyntaxErrorNodes.mt",
34 | 	"SyntaxIssues.mt",
35 | 	"TokenErrors.mt",
36 | 	"TokenEnum.mt",
37 | 	"Tokenize.mt",
38 | 	"ToNode.mt",
39 | 	"TopLevel.mt",
40 | 	"ToString.mt",
41 | 	"TypeSpecifier.mt",
42 | 	"Unsafe.mt",
43 | 	"Weird.mt"
44 | }]
45 | 


--------------------------------------------------------------------------------
/cmake/ReplacePacletInfo.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | file(READ ${PACLETINFO_IN_SOURCE} filedata)
 3 | 
 4 | string(TIMESTAMP DATESTRING "%a %d %b %Y %H:%M:%S")
 5 | 
 6 | string(REGEX REPLACE "BuildDate -> \"[a-zA-Z0-9 :]*\"" "BuildDate -> \"${DATESTRING}\"" filedata ${filedata})
 7 | 
 8 | string(REGEX REPLACE "BuildNumber -> [0-9]+" "BuildNumber -> ${BUILDNUMBER}" filedata ${filedata})
 9 | 
10 | string(REGEX REPLACE "BuildWolframVersionNumber -> [0-9]+" "BuildWolframVersionNumber -> ${VERSION_NUMBER}" filedata ${filedata})
11 | 
12 | string(REGEX REPLACE "BuildWolframLibraryVersion -> [0-9]+" "BuildWolframLibraryVersion -> ${WOLFRAMLIBRARY_VERSION}" filedata ${filedata})
13 | 
14 | string(REGEX REPLACE "Transport -> \"[a-zA-Z]*\"" "Transport -> \"${TRANSPORT}\"" filedata ${filedata})
15 | 
16 | if(LOCAL_BUILD)
17 | 
18 | string(REGEX REPLACE "Version -> \"[0-9\\.]+\"," "Version -> \"${LOCAL_BUILD_VERSION}\"(* local build *)," filedata ${filedata})
19 | 
20 | endif()
21 | 
22 | file(WRITE ${REPLACED_PACLETINFO} "${filedata}")
23 | 


--------------------------------------------------------------------------------
/cmake/MacOSXVersionMin.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | macro(CheckMacOSXVersionMin)
 3 | 
 4 |   if(NOT EXISTS ${WOLFRAMKERNEL})
 5 |   message(FATAL_ERROR "WOLFRAMKERNEL does not exist. WOLFRAMKERNEL: ${WOLFRAMKERNEL}")
 6 |   endif()
 7 |   
 8 |   execute_process(
 9 |     COMMAND
10 |       ${WOLFRAMKERNEL} -noinit -noprompt -nopaclet -nostartuppaclets -run Pause[${KERNEL_PAUSE}]\;Needs["CCompilerDriver`"]\;Print[OutputForm[If[$VersionNumber\ >=\ 12.2,\ StringReplace[CCompilerDriver`CCompilerDriverBase`MacOSXVersionMinFlag[],\ "-mmacosx-version-min="\ ->\ ""],\ "10.10"]]]\;Exit[]
11 |     OUTPUT_VARIABLE
12 |       MACOSX_VERSION_MIN
13 |     OUTPUT_STRIP_TRAILING_WHITESPACE
14 |     WORKING_DIRECTORY
15 |       ${PROJECT_SOURCE_DIR}
16 |     TIMEOUT
17 |       ${KERNEL_TIMEOUT}
18 |     RESULT_VARIABLE
19 |       MACOSX_VERSION_MIN_RESULT
20 |   )
21 | 
22 |   if(NOT ${MACOSX_VERSION_MIN_RESULT} EQUAL "0")
23 |     message(FATAL_ERROR "Bad exit code from MacOSXVersionMin script: ${MACOSX_VERSION_MIN_RESULT}")
24 |   endif()
25 | 
26 | endmacro(CheckMacOSXVersionMin)
27 | 


--------------------------------------------------------------------------------
/cpp/include/ExprLibrary.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #pragma once
 3 | 
 4 | #include "WolframLibrary.h" // for mint
 5 | #undef True
 6 | #undef False
 7 | 
 8 | #include <cstdint> // for int64_t
 9 | 
10 | using expr = void *;
11 | using Buffer = const unsigned char *;
12 | 
13 | 
14 | EXTERN_C expr Expr_FromInteger64(int64_t val);
15 | 
16 | EXTERN_C expr Expr_FromReal64(double val);
17 | 
18 | EXTERN_C expr Expr_UTF8BytesToStringExpr(Buffer buf, mint size);
19 | 
20 | EXTERN_C expr Expr_MEncodedStringToSymbolExpr(const char *str);
21 | 
22 | //
23 | // The suffix A means automatically handle releasing reference to head
24 | //
25 | EXTERN_C expr Expr_BuildExprA(expr head, mint argCount);
26 | 
27 | //
28 | // The suffix A means automatically handle releasing reference to arg
29 | //
30 | // index is base 1
31 | //
32 | EXTERN_C void Expr_InsertA(expr e, mint index, expr arg);
33 | 
34 | EXTERN_C void Expr_Release(expr e);
35 | 
36 | EXTERN_C void Expr_StringExprToUTF8Bytes(expr e, Buffer *buffer, mint *len);
37 | 
38 | EXTERN_C expr Expr_LongNameSuggestion(expr input);
39 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/tests/test_ffi.rs:
--------------------------------------------------------------------------------
 1 | //! Test that types used in this crate's LibraryLink API have a stable
 2 | //! representation. If these tests fail, that implies that a version of this
 3 | //! crate is not backwards compatible with the version the tests were initially
 4 | //! written for.
 5 | 
 6 | use pretty_assertions::assert_eq;
 7 | 
 8 | use crate::{EncodingMode, FirstLineBehavior, SourceConvention, StringifyMode};
 9 | 
10 | #[test]
11 | fn public_enum_values() {
12 |     assert_eq!(FirstLineBehavior::NotScript as i32, 0);
13 |     assert_eq!(FirstLineBehavior::Check as i32, 1);
14 |     assert_eq!(FirstLineBehavior::Script as i32, 2);
15 | 
16 |     assert_eq!(EncodingMode::Normal as i32, 0);
17 |     assert_eq!(EncodingMode::Box as i32, 1);
18 | 
19 |     assert_eq!(StringifyMode::Normal as i32, 0);
20 |     assert_eq!(StringifyMode::Tag as i32, 1);
21 |     assert_eq!(StringifyMode::File as i32, 2);
22 | 
23 |     assert_eq!(SourceConvention::LineColumn as i32, 0);
24 |     assert_eq!(SourceConvention::CharacterIndex as i32, 1);
25 | }
26 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "wolfram-parser"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | rust-version = "1.70"
 6 | 
 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 8 | 
 9 | [lib]
10 | bench = false
11 | 
12 | [[bin]]
13 | name = "main"
14 | bench = false
15 | 
16 | [features]
17 | COMPUTE_SOURCE = []
18 | FAST_STRING_SCAN = []
19 | 
20 | # Features used when building the LibraryLink dynamic library.
21 | CHECK_ABORT = ["wolfram-library-link"]
22 | 
23 | default = ["COMPUTE_SOURCE"]
24 | 
25 | 
26 | [dependencies]
27 | edit-distance = "2.1.0"
28 | memchr = "2.5.0"
29 | 
30 | wolfram-expr = "0.1.4"
31 | 
32 | wolfram-library-link = { version = "0.2.10", optional = true, default-features = false }
33 | 
34 | [dev-dependencies]
35 | pretty_assertions = "1.2.1"
36 | 
37 | criterion = "0.5.1"
38 | 
39 | [[bench]]
40 | name = "bench_general"
41 | harness = false
42 | 
43 | [[bench]]
44 | name = "bench_fast_string_scan"
45 | harness = false
46 | required-features = ["FAST_STRING_SCAN"]
47 | 


--------------------------------------------------------------------------------
/Tests/ToNode.mt:
--------------------------------------------------------------------------------
 1 | Print["\n===== Start ToNode.mt =====\n"]
 2 | 
 3 | Needs["CodeParser`"]
 4 | 
 5 | 
 6 | Clear[a]
 7 | 
 8 | Test[
 9 | 	ToNode[a]
10 | 	,
11 | 	LeafNode[Symbol, "Global`a", <||>]
12 | 	,
13 | 	TestID->"ToNode-20181230-L1R6Q9"
14 | ]
15 | 
16 | 
17 | Test[
18 | 	ToNode["abc"]
19 | 	,
20 | 	LeafNode[String, "\"abc\"", <||>]
21 | 	,
22 | 	TestID->"ToNode-20181230-S1R5V6"
23 | ]
24 | 
25 | 
26 | Test[
27 | 	ToNode[123]
28 | 	,
29 | 	LeafNode[Integer, "123", <||>]
30 | 	,
31 | 	TestID->"ToNode-20181230-O2A4T0"
32 | ]
33 | 
34 | 
35 | Test[
36 | 	ToNode[1.23]
37 | 	,
38 | 	LeafNode[Real, "1.23", <||>]
39 | 	,
40 | 	TestID->"ToNode-20181230-E5S9U5"
41 | ]
42 | 
43 | 
44 | 
45 | rat = 1/16
46 | 
47 | Test[
48 | 	ToNode[rat]
49 | 	,
50 | 	LeafNode[Rational, "16^^1*^-1", <||>]
51 | 	,
52 | 	TestID->"ToNode-20200413-V3L1T8"
53 | ]
54 | 
55 | 
56 | rat = 1/37
57 | 
58 | Test[
59 | 	ToNode[rat]
60 | 	,
61 | 	CallNode[LeafNode[Symbol, "Rational", <||>], {LeafNode[Integer, "1", <||>], LeafNode[Integer, "37", <||>]}, <||>]
62 | 	,
63 | 	TestID->"ToNode-20200413-V2I2X0"
64 | ]
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2020 Wolfram Research Inc.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so,
 8 | subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/tests/test_token_enum.rs:
--------------------------------------------------------------------------------
 1 | use crate::tokenize::TokenKind;
 2 | 
 3 | 
 4 | #[test]
 5 | fn TokenEnumTest_Trivia() {
 6 |     assert!(TokenKind::Whitespace.isTrivia());
 7 | }
 8 | 
 9 | #[test]
10 | fn TokenEnumTest_PossibleBeginning() {
11 |     assert!(TokenKind::Symbol.isPossibleBeginning());
12 | 
13 |     assert!(TokenKind::SemiSemi.isPossibleBeginning());
14 | }
15 | 
16 | #[test]
17 | fn TokenEnumTest_Closer() {
18 |     assert!(TokenKind::CloseSquare.isCloser());
19 | 
20 |     assert!(TokenKind::LongName_RightCeiling.isCloser());
21 | }
22 | 
23 | #[test]
24 | fn TokenEnumTest_Error() {
25 |     assert!(TokenKind::Error_ExpectedTag.isError());
26 | 
27 |     assert!(TokenKind::Error_UnsupportedToken.isError());
28 | }
29 | 
30 | #[test]
31 | fn TokenEnumTest_Unterminated() {
32 |     assert!(TokenKind::Error_UnterminatedString.isUnterminated());
33 | 
34 |     assert!(TokenKind::Error_UnterminatedComment.isUnterminated());
35 | }
36 | 
37 | #[test]
38 | fn TokenEnumTest_Empty() {
39 |     assert!(TokenKind::EndOfFile.isEmpty());
40 | 
41 |     assert!(TokenKind::Error_ExpectedOperand.isEmpty());
42 | }
43 | 


--------------------------------------------------------------------------------
/run_tests.wls:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env wolframscript
 2 | 
 3 | $builtPacletDir = FileNameJoin[{Directory[], "build", "paclet", "CodeParser"}];
 4 | 
 5 | If[!FileExistsQ[$builtPacletDir],
 6 | 	Throw["Failed"];
 7 | 	Exit[-1];
 8 | ];
 9 | 
10 | 
11 | Print[
12 | 	"Loading CodeParser: ", PacletDirectoryLoad[$builtPacletDir]
13 | ]
14 | 
15 | Needs["CodeParser`"]
16 | 
17 | 
18 | 
19 | Needs["MUnit`"]
20 | 
21 | createSuccessLogger[] := With[{logger = Unique[]},
22 |   Module[{success = True},
23 |    logger /: LogFatal[logger, _] := success = False;
24 |    logger /: LogFailure[logger, _] := success = False;
25 |    logger /: LogMessagesFailure[logger, _] := success = False;
26 |    logger /: LogError[logger, _] := success = False;
27 |    logger /: LogFatal[logger, _] := success = False;
28 |    logger /: TestRunSucceededQ[logger] := success;
29 |    logger
30 |    ]
31 |   ]
32 | 
33 | successLogger = createSuccessLogger[]
34 | 
35 | Quiet @ EchoTiming @ TestRun[
36 | 	"Tests/TestSuite.mt",
37 | 	Loggers :> {VerbosePrintLogger[], successLogger}
38 | ]
39 | 
40 | If[TrueQ[TestRunSucceededQ[successLogger]],
41 |   Exit[0]
42 |   ,
43 |   Exit[1]
44 | ]
45 | 


--------------------------------------------------------------------------------
/Tests/Tokenize.mt:
--------------------------------------------------------------------------------
 1 | Print["\n===== Start Tokenize.mt =====\n"]
 2 | 
 3 | Needs["CodeParser`"]
 4 | 
 5 | 
 6 | (*
 7 | Comments
 8 | *)
 9 | Test[
10 | 	CodeTokenize["(* \\.28\\.2a *)"]
11 | 	,
12 | 	{LeafNode[Token`Comment, "(* \\.28\\.2a *)", <|Source -> {{1, 1}, {1, 15}}|>]}
13 | 	,
14 | 	TestID->"Tokenize-20181208-O3D5M5"
15 | ]
16 | 
17 | 
18 | (*
19 | Number Errors
20 | *)
21 | Test[
22 | 	CodeTokenize["1.2``->3"]
23 | 	,
24 | 	{
25 | 		ErrorNode[Token`Error`Number, "1.2``-", <|Source -> {{1, 1}, {1, 7}}|>],
26 | 		LeafNode[Token`Greater, ">", <|Source -> {{1, 7}, {1, 8}}|>],
27 | 		LeafNode[Integer, "3", <|Source -> {{1, 8}, {1, 9}}|>]}
28 | 	,
29 | 	TestID->"Tokenize-20181215-Z0H7Y5"
30 | ]
31 | 
32 | 
33 | (*
34 | String Errors
35 | *)
36 | Test[
37 | 	CodeTokenize["\"123\\\""]
38 | 	,
39 | 	{ErrorNode[Token`Error`UnterminatedString, "\"123\\\"", <|Source -> {{1, 1}, {1, 7}}|>]}
40 | 	,
41 | 	TestID->"Tokenize-20190406-A1G3U8"
42 | ]
43 | 
44 | 
45 | Test[
46 | 	CodeTokenize["*)"]
47 | 	,
48 | 	{ErrorNode[Token`Error`UnexpectedCommentCloser, "*)", <|Source -> {{1, 1}, {1, 3}}|>]}
49 | 	,
50 | 	TestID->"Tokenize-20220709-J1V7W8"
51 | ]
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/cmake/InspectFile.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | if(NOT CODEPARSER_EXE)
 3 | return()
 4 | endif()
 5 | 
 6 | if(NOT EXISTS ${CODEPARSER_EXE})
 7 | return()
 8 | endif()
 9 | 
10 | execute_process(
11 |   COMMAND
12 |     ${CODEPARSER_EXE} -check -file ${SRC}
13 |   RESULT_VARIABLE
14 |     CODEPARSER_RESULT
15 | )
16 | 
17 | if(${CODEPARSER_RESULT} EQUAL "0")
18 | return()
19 | endif()
20 | 
21 | if(NOT ${CODEPARSER_RESULT} EQUAL "1")
22 | message(WARNING "Internal error. CODEPARSER_RESULT: ${CODEPARSER_RESULT}")
23 | return()
24 | endif()
25 | 
26 | #
27 | # We know there was some problem, so now use CodeInspector to report the problem
28 | #
29 | 
30 | if(NOT WOLFRAMKERNEL)
31 | return()
32 | endif()
33 | 
34 | if(NOT EXISTS ${WOLFRAMKERNEL})
35 | return()
36 | endif()
37 | 
38 | set(CODE "\
39 | If[FailureQ[FindFile[\"CodeInspector`\"]], Exit[0]]\;\
40 | Needs[\"CodeInspector`\"]\;\
41 | Print[\"Code inspection...\" //OutputForm]\;\
42 | Print[CodeInspector`CodeInspectSummarize[File[\"${SRC}\"]] //OutputForm]\;\
43 | Exit[1]\
44 | ")
45 | 
46 | execute_process(
47 |   COMMAND
48 |     ${WOLFRAMKERNEL} -noinit -noprompt -nopaclet -nostartuppaclets -run ${CODE}
49 |   TIMEOUT
50 |     ${KERNEL_TIMEOUT}
51 | )
52 | 
53 | message(FATAL_ERROR "File had fatal errors: ${SRC}")
54 | 


--------------------------------------------------------------------------------
/cmake/InstallPaclet.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | if(NOT EXISTS ${WOLFRAMKERNEL})
 3 | message(FATAL_ERROR "WOLFRAMKERNEL does not exist. WOLFRAMKERNEL: ${WOLFRAMKERNEL}")
 4 | endif()
 5 | 
 6 | set(CODE "\
 7 | Print[OutputForm[\"Calling PacletInstall...\"]]\;
 8 | Check[
 9 | res = PacletInstall[\"${PACLET_ARCHIVE}\", ForceVersionInstall -> True]\;
10 | ,
11 | Print[OutputForm[Row[{\"$VersionNumber: \", NumberForm[$VersionNumber, {2, 1}]}]]]\;
12 | Print[OutputForm[Row[{\"Paclet WolframVersion: \", \"${PACLET_WOLFRAMVERSION}\"}]]]\;
13 | Print[OutputForm[Row[{\"To prevent this PacletInstall::compat message, update PacletInfo.wl.in with WolframVersion -> \\\"\", NumberForm[$VersionNumber, {2, 1}] ,\"\\\" and build and install again.\"}]]];
14 | res
15 | ,
16 | {PacletInstall::compat}
17 | ]\;
18 | Print[res //OutputForm]\;
19 | Print[OutputForm[\"Done PacletInstall\"]]\;
20 | If[!PacletObjectQ[res],
21 |   Exit[1]
22 | ]\;
23 | Exit[0]
24 | ")
25 | 
26 | execute_process(
27 |   COMMAND
28 |     ${WOLFRAMKERNEL} -noinit -noprompt -run ${CODE}
29 |   TIMEOUT
30 |     ${KERNEL_TIMEOUT}
31 |   RESULT_VARIABLE
32 |     INSTALL_RESULT
33 | )
34 | 
35 | if(NOT ${INSTALL_RESULT} EQUAL "0")
36 |   message(FATAL_ERROR "Bad exit code from install: ${INSTALL_RESULT}")
37 | endif()
38 | 


--------------------------------------------------------------------------------
/Tests/SafeString.mt:
--------------------------------------------------------------------------------
 1 | Print["\n===== Start SafeString.mt =====\n"]
 2 | 
 3 | Needs["CodeParser`"]
 4 | 
 5 | Test[
 6 | 	SafeString[ByteArray[ToCharacterCode["1+1"]]]
 7 | 	,
 8 | 	"1+1"
 9 | 	,
10 | 	TestID->"SafeString-20200103-U8A6X2"
11 | ]
12 | 
13 | (*
14 | Invalid sequences
15 | *)
16 | Test[
17 | 	SafeString[ByteArray[{206}]]
18 | 	,
19 | 	Missing["UnsafeCharacterEncoding_IncompleteUTF8Sequence"]
20 | 	,
21 | 	TestID->"SafeString-20200103-K0M0B9"
22 | ]
23 | 
24 | (*
25 | High surrogates
26 | *)
27 | Test[
28 | 	(*
29 | 	UTF-8 for 0xd800
30 | 	*)
31 | 	SafeString[ByteArray[{237, 160, 128}]]
32 | 	,
33 | 	Missing["UnsafeCharacterEncoding_StraySurrogate"]
34 | 	,
35 | 	TestID->"SafeString-20200103-Z8W9G3"
36 | ]
37 | 
38 | (*
39 | Low surrogates
40 | *)
41 | Test[
42 | 	(*
43 | 	UTF-8 for 0xdc00
44 | 	*)
45 | 	SafeString[ByteArray[{237, 176, 128}]]
46 | 	,
47 | 	Missing["UnsafeCharacterEncoding_StraySurrogate"]
48 | 	,
49 | 	TestID->"SafeString-20200103-G7F2O6"
50 | ]
51 | 
52 | 
53 | 
54 | (*
55 | BOM
56 | *)
57 | Test[
58 | 	(*
59 | 	UTF-8 for 0xfeff
60 | 	*)
61 | 	SafeString[ByteArray[{239, 187, 191}]]
62 | 	,
63 | 	Missing["UnsafeCharacterEncoding_BOM"]
64 | 	,
65 | 	TestID->"SafeString-20200103-V9G4Y6"
66 | ]
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/read/byte_buffer.rs:
--------------------------------------------------------------------------------
 1 | //! A byte buffer that can return the current byte and advance to the next byte.
 2 | 
 3 | use crate::read::Reader;
 4 | 
 5 | //
 6 | // Precondition: buffer is pointing to current byte
 7 | // Postcondition: buffer is pointing to 1 byte past current byte
 8 | //
 9 | // Return current byte
10 | //
11 | pub(crate) fn ByteBuffer_nextByte(session: &mut Reader) -> u8 {
12 |     // assert!((session.start <= session.buffer && session.buffer <= session.end));
13 | 
14 |     // if session.buffer == session.end {
15 |     if session.offset >= session.input.len() {
16 |         session.wasEOF = true;
17 |         // TODO: Make this return None.
18 |         return 0xff;
19 |     }
20 | 
21 |     // session.buffer += 1;
22 |     // return *(session.buffer);
23 | 
24 |     let byte = session.buffer()[0];
25 | 
26 |     session.offset += 1;
27 | 
28 |     return byte;
29 | }
30 | 
31 | pub(crate) fn ByteBuffer_currentByte(session: &Reader) -> u8 {
32 |     // assert!((session.start <= session.buffer && session.buffer <= session.end));
33 | 
34 |     // if session.buffer == session.end {
35 |     if session.offset >= session.input.len() {
36 |         return 0xff;
37 |     }
38 | 
39 |     // return *(session.buffer);
40 | 
41 |     return session.buffer()[0];
42 | }
43 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/symbol.rs:
--------------------------------------------------------------------------------
 1 | #![allow(non_upper_case_globals)]
 2 | 
 3 | use wolfram_expr::symbol::SymbolRef;
 4 | 
 5 | pub type Symbol = SymbolRef<'static>;
 6 | 
 7 | 
 8 | //==========================================================
 9 | // Symbol constant declarations
10 | //==========================================================
11 | 
12 | macro_rules! symbol {
13 |     ($name:ident) => {
14 |         pub const $name: Symbol =
15 |             unsafe { Symbol::unchecked_new(concat!("System`", stringify!($name))) };
16 |     };
17 | 
18 |     ($($name:ident);* $(;)?) => {
19 |         $(
20 |             $crate::symbol::symbol!($name);
21 |         )*
22 |     };
23 | }
24 | 
25 | macro_rules! nested_symbol {
26 |     ($context:ident :: { $($name:ident),* }) => {
27 |         pub mod $context {
28 |         $(
29 |             pub const $name: $crate::symbol::Symbol = unsafe {
30 |                 $crate::symbol::Symbol::unchecked_new(concat!(
31 |                     stringify!($context),
32 |                     "`",
33 |                     stringify!($name)
34 |                 ))
35 |             };
36 |         )*
37 |         }
38 |     };
39 | 
40 |     ($($($context:ident ::)+ $name:ident);* $(;)?) => {
41 |         $(
42 |             symbol!($($context ::)* $name);
43 |         )*
44 |     };
45 | }
46 | 
47 | 
48 | pub(crate) use {nested_symbol, symbol};
49 | 


--------------------------------------------------------------------------------
/CodeParser/Resources/Examples/Collatz.m:
--------------------------------------------------------------------------------
 1 | BeginPackage["Collatz`"]
 2 | 
 3 | Collatz::usage "Collatz[n] gives a list of the iterates in the 3n+1 problem,
 4 |         starting from n. The conjecture is that this sequence always
 5 |         terminates."
 6 |               (*intentional implicit times*)
 7 | 
 8 | Begin["`Private`"]
 9 | 
10 | Collatz[1] := {1};
11 | 
12 | Collatz[n_Integer]  := Prepend[Collatz[(3 n + 1)/2], n] /; OddQ[n] && n > 0;;
13 |                                                                            (*intentional ;;*)
14 | 
15 | Collatz[n_Integer] := Prepend[Collatz[n/2], n] /; EvenQ[n] && n > 0;
16 | 
17 | 
18 | (*
19 | The call DummyFunction1[] can be replaced with CallSite[DummyFunction1[]] when profiling
20 | to enable CallSite analysis.
21 | 
22 | CallSite analysis enables the profiling of time between when a function is called to when its body is entered.
23 | In this example, the Pause[0.01] would be kept track of.
24 | 
25 | The CallSite wrapper is removed during instrumentation and does not affect the result.
26 | 
27 | Make sure to call InstrumentProfile with the updated code and to reload the packages under profile.
28 | *)
29 | Collatz[n_Integer] := (DummyFunction1[];Prepend[Collatz[3 n + 1], n]) /; OddQ[n] && n > 0;
30 | 
31 | 
32 | DummyFunction1[] /; (Pause[0.01];True) :=
33 | Module[{},
34 | 	Null
35 | ]
36 | 
37 | 
38 | End[ ]
39 | 
40 | EndPackage[ ]


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/feature.rs:
--------------------------------------------------------------------------------
 1 | //! Constants that are true if the associated cargo feature is enabled.
 2 | //!
 3 | //! The constants in this module are intended to be used as:
 4 | //!
 5 | //! ```ignore
 6 | //! if feature::CHECK_ABORT {
 7 | //!     // ...
 8 | //! }
 9 | //! ```
10 | //!
11 | //! Using these constants is preferred over the standard alternatives of:
12 | //!
13 | //! ```ignore
14 | //! #[cfg(feature = "CHECK_ABORT")]
15 | //! // ...
16 | //! ```
17 | //!
18 | //! or:
19 | //!
20 | //! ```ignore
21 | //! if cfg!(feature = "CHECK_ABORT") {
22 | //!     // ...
23 | //! }
24 | //! ```
25 | //!
26 | //! which have the disadvantage that the `feature = "..."` is not validated to
27 | //! correspond to a feature that is declared in Cargo.toml.
28 | //!
29 | //! More generally, using a constant instead of a parse-time `#[cfg(..)]` to
30 | //! disable sections of code has the advantage that the code inside the
31 | //! condition is still validated and type checked, which doesn't happen if
32 | //! `#[cfg(..)]` is used.
33 | //!
34 | //! This makes code controlled by feature flags easier to keep up-to-date as
35 | //! refactoring occurs.
36 | 
37 | pub(crate) const COMPUTE_SOURCE: bool = cfg!(feature = "COMPUTE_SOURCE");
38 | 
39 | pub(crate) const CHECK_ABORT: bool = cfg!(feature = "CHECK_ABORT");
40 | 
41 | pub(crate) const FAST_STRING_SCAN: bool = cfg!(feature = "FAST_STRING_SCAN");
42 | 


--------------------------------------------------------------------------------
/docs/tokens.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Tokens
 3 | 
 4 | 
 5 | ## Terminology
 6 | 
 7 | 
 8 | trivia: whitespace, newlines, comments [1]
 9 | 
10 | 
11 | 
12 | ## Philosophy
13 | 
14 | if a token is not something else, then it is prefix
15 | 
16 | 
17 | 
18 | ## TokenEnum encoding
19 | 
20 |  There are currently ~427 tokens, so 9 bits are required to enumerate them
21 | 
22 |  16 bits:
23 | 
24 | ```
25 |  fedcba9876543210
26 |         ^~~~~~~~~
27 |         Enum bits (9 bits)
28 |       ^~
29 |       Group 1 bits (2 bits)
30 |     ^~
31 |     Group 2 bits (2 bits)
32 |  ^~~
33 |  Unused bits (3 bits)
34 | ```
35 | 
36 | 
37 | Within the set of trivia tokens, the values of the enum bits themselves are special because they are used for fast
38 | testing.
39 | 
40 | 
41 | Group 1: These are all mutually exclusive categories: PossibleBeginning, Closer, Error
42 | 01 PossibleBeginning
43 | 10 Closer
44 | 11 Error
45 | 00 Anything Else
46 | 
47 | Other possible categories for Group 1 are: Trivia, InfixOperator, etc. Everything in Group 1 would still
48 | be mutually exclusive.
49 | 
50 | 
51 | 
52 | Group 2: These are all mutually exclusive categories: Empty, DifferentialD
53 | 01 Empty
54 | 10 DifferentialD
55 | 11 (unused)
56 | 00 Anything Else
57 | 
58 | 
59 | 
60 | 
61 | 
62 | ## References
63 | 
64 | [1] https://github.com/dotnet/roslyn/wiki/Roslyn-Overview#syntax-trivia
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/cmake/PacletInfo.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | macro(CheckPacletInfo)
 3 | 
 4 |   if(NOT EXISTS ${WOLFRAMKERNEL})
 5 |   message(FATAL_ERROR "WOLFRAMKERNEL does not exist. WOLFRAMKERNEL: ${WOLFRAMKERNEL}")
 6 |   endif()
 7 | 
 8 |   if(LOCAL_BUILD)
 9 |     message(STATUS "Paclet Version ignored in local build")
10 |     set(LOCAL_BUILD_VERSION 999.9)
11 |   else()
12 |     #
13 |     # if not local build, then get Version from PacletInfo.wl
14 |     #
15 |     execute_process(
16 |       COMMAND
17 |         ${WOLFRAMKERNEL} -noinit -noprompt -nopaclet -nostartuppaclets -runfirst Pause[${KERNEL_PAUSE}]\;Print[OutputForm[Row[{Version,\ ";",\ WolframVersion}\ /.\ List\ @@\ Get["${PACLETINFO_IN_SOURCE}"]]]]\;Exit[]
18 |       OUTPUT_VARIABLE
19 |         PACLET_VERSIONS_LIST
20 |       OUTPUT_STRIP_TRAILING_WHITESPACE
21 |       WORKING_DIRECTORY
22 |         ${PROJECT_SOURCE_DIR}
23 |       TIMEOUT
24 |         ${KERNEL_TIMEOUT}
25 |       RESULT_VARIABLE
26 |         PACLETINFO_RESULT
27 |     )
28 | 
29 |     if(NOT ${PACLETINFO_RESULT} EQUAL "0")
30 |       message(FATAL_ERROR "Bad exit code from PacletInfo script: ${PACLETINFO_RESULT}")
31 |     endif()
32 | 
33 |     list(GET PACLET_VERSIONS_LIST 0 PACLET_VERSION)
34 |     list(GET PACLET_VERSIONS_LIST 1 PACLET_WOLFRAMVERSION)
35 |     message(STATUS "PACLET_VERSION: ${PACLET_VERSION}")
36 |     message(STATUS "PACLET_WOLFRAMVERSION: ${PACLET_WOLFRAMVERSION}")
37 |     
38 |   endif(LOCAL_BUILD)
39 | 
40 | endmacro(CheckPacletInfo)
41 | 


--------------------------------------------------------------------------------
/CodeParser/Generate/TokenEnum.wl:
--------------------------------------------------------------------------------
 1 | (* ::Package::"Tags"-><|"SuspiciousSessionSymbol" -> <|Enabled -> False|>|>:: *)
 2 | 
 3 | If[!MemberQ[$Path, #], PrependTo[$Path, #]]&[DirectoryName[$InputFileName, 3]]
 4 | 
 5 | BeginPackage["CodeParser`Generate`TokenEnum`"]
 6 | 
 7 | (* Used by Generate/RowBox.wl *)
 8 | GroupOpenerToCloser
 9 | 
10 | 
11 | Begin["`Private`"]
12 | 
13 | 
14 | GroupOpenerToCloser[Token`OpenCurly] = Closer`CloseCurly
15 | GroupOpenerToCloser[Token`LessBar] = Closer`BarGreater
16 | GroupOpenerToCloser[Token`OpenSquare] = Closer`CloseSquare
17 | GroupOpenerToCloser[Token`OpenParen] = Closer`CloseParen
18 | GroupOpenerToCloser[Token`ColonColonOpenSquare] = Closer`CloseSquare
19 | 
20 | GroupOpenerToCloser[Token`LongName`LeftAngleBracket] = Closer`LongName`RightAngleBracket
21 | GroupOpenerToCloser[Token`LongName`LeftCeiling] = Closer`LongName`RightCeiling
22 | GroupOpenerToCloser[Token`LongName`LeftFloor] = Closer`LongName`RightFloor
23 | GroupOpenerToCloser[Token`LongName`LeftDoubleBracket] = Closer`LongName`RightDoubleBracket
24 | GroupOpenerToCloser[Token`LongName`LeftBracketingBar] = Closer`LongName`RightBracketingBar
25 | GroupOpenerToCloser[Token`LongName`LeftDoubleBracketingBar] = Closer`LongName`RightDoubleBracketingBar
26 | GroupOpenerToCloser[Token`LongName`LeftAssociation] = Closer`LongName`RightAssociation
27 | GroupOpenerToCloser[Token`LongName`OpenCurlyQuote] = Closer`LongName`CloseCurlyQuote
28 | GroupOpenerToCloser[Token`LongName`OpenCurlyDoubleQuote] = Closer`LongName`CloseCurlyDoubleQuote
29 | 
30 | 
31 | End[]
32 | 
33 | EndPackage[]
34 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Wolfram<sup>&reg;</sup>
 2 | 
 3 | Thank you for taking the time to contribute to the [Wolfram Research](https://github.com/wolframresearch) repos on GitHub.
 4 | 
 5 | ## Licensing of Contributions
 6 | 
 7 | By contributing to Wolfram, you agree and affirm that:
 8 | 
 9 | > Wolfram may release your contribution under the terms of the [MIT license](https://opensource.org/licenses/MIT); and
10 | 
11 | > You have read and agreed to the [Developer Certificate of Origin](http://developercertificate.org/), version 1.1 or later.
12 | 
13 | Please see [LICENSE](LICENSE) for licensing conditions pertaining
14 | to individual repositories.
15 | 
16 | 
17 | ## Bug reports
18 | 
19 | ### Security Bugs
20 | 
21 | Please **DO NOT** file a public issue regarding a security issue.
22 | Rather, send your report privately to security@wolfram.com.  Security
23 | reports are appreciated and we will credit you for it.  We do not offer
24 | a security bounty, but the forecast in your neighborhood will be cloudy
25 | with a chance of Wolfram schwag!
26 | 
27 | ### General Bugs
28 | 
29 | Please use the repository issues page to submit general bug issues.
30 | 
31 | Please do not duplicate issues.
32 | 
33 | Please do send a complete and well-written report to us.  Note:  **the
34 | thoroughness of your report will positively correlate to our willingness
35 | and ability to address it**.
36 | 
37 | When reporting issues, always include:
38 | 
39 | * Your version of *Mathematica*<sup>&reg;</sup> or the Wolfram Language<sup>&trade;</sup>.
40 | * Your operating system.
41 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/precedence.rs:
--------------------------------------------------------------------------------
 1 | use std::num::NonZeroU8;
 2 | 
 3 | /// All levels of precedence
 4 | ///
 5 | /// The 1's bit denotes the associativity.
 6 | #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
 7 | #[repr(transparent)]
 8 | pub struct Precedence(NonZeroU8);
 9 | 
10 | // Verify that Option<Precedence> is the same size as a u8.
11 | const _: () = assert!(std::mem::size_of::<Option<Precedence>>() == 1);
12 | 
13 | 
14 | impl Precedence {
15 |     // TODO(cleanup): Take the precedence value and associativity as separate
16 |     // arguments.
17 |     pub(crate) const fn new(value: u8) -> Self {
18 |         match NonZeroU8::new(value) {
19 |             Some(value) => Precedence(value),
20 |             None => panic!("invalid Precedence 0 value"),
21 |         }
22 |     }
23 | 
24 |     // TODO(cleanup): Make this unnecessary. What does it mean anyway?
25 |     fn bits(self) -> u8 {
26 |         let Precedence(bits) = self;
27 | 
28 |         bits.get()
29 |     }
30 | 
31 |     /// Returns true if `lhs` is greater then `rhs`.
32 |     pub(crate) fn greater(
33 |         lhs: Option<Precedence>,
34 |         rhs: Option<Precedence>,
35 |     ) -> bool {
36 |         let lhs = lhs.map(Precedence::bits).unwrap_or(0);
37 |         let rhs = rhs.map(Precedence::bits).unwrap_or(0);
38 | 
39 |         lhs | 0x1 > rhs
40 |     }
41 | }
42 | 
43 | impl PartialEq<Precedence> for Option<Precedence> {
44 |     fn eq(&self, other: &Precedence) -> bool {
45 |         match self {
46 |             Some(self_) => self_ == other,
47 |             None => false,
48 |         }
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/Tests/SyntaxErrorNodes.mt:
--------------------------------------------------------------------------------
 1 | Print["\n===== Start SyntaxErrorNodes.mt =====\n"]
 2 | 
 3 | Needs["CodeParser`"]
 4 | 
 5 | (*
 6 | ExpectedTilde:
 7 | *)
 8 | 
 9 | Test[
10 | 	CodeParse["a ~f"]
11 | 	,
12 | 	ContainerNode[String, {
13 | 		SyntaxErrorNode[SyntaxError`ExpectedTilde, {
14 | 			LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>], 
15 | 	  		LeafNode[Symbol, "f", <|Source -> {{1, 4}, {1, 5}}|>]}, <|Source -> {{1, 1}, {1, 5}}|>] }, <|Source -> {{1, 1}, {1, 5}}|>]
16 | 	,
17 | 	TestID->"SyntaxErrorNodes-20190521-T2R4L9"
18 | ]
19 | 
20 | Test[
21 | 	CodeConcreteParse["~"]
22 | 	,
23 | 	ContainerNode[String, {
24 | 		SyntaxErrorNode[SyntaxError`ExpectedTilde, {
25 | 			ErrorNode[Token`Error`ExpectedOperand, "", <|Source -> {{1, 1}, {1, 1}}|>],
26 | 			LeafNode[Token`Tilde, "~", <|Source -> {{1, 1}, {1, 2}}|>],
27 | 			ErrorNode[Token`Error`ExpectedOperand, "", <|Source -> {{1, 2}, {1, 2}}|>]}, <|Source -> {{1, 1}, {1, 2}}|>]}, <|Source -> {{1, 1}, {1, 2}}|>]
28 | 	,
29 | 	TestID->"SyntaxErrorNodes-20200628-O0J0J1"
30 | ]
31 | 
32 | 
33 | 
34 | (*
35 | ExpectedSet:
36 | *)
37 | 
38 | Test[
39 | 	CodeParse["a /: b * c"]
40 | 	,
41 | 	ContainerNode[String, {
42 | 		SyntaxErrorNode[SyntaxError`ExpectedSet, {
43 | 			LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>],
44 | 			CallNode[LeafNode[Symbol, "Times", <||>], {
45 | 				LeafNode[Symbol, "b", <|Source -> {{1, 6}, {1, 7}}|>],
46 | 		    	LeafNode[Symbol, "c", <|Source -> {{1, 10}, {1, 11}}|>]}, <|Source -> {{1, 6}, {1, 11}}|>]}, <|Source -> {{1, 1}, {1, 11}}|>] }, <|Source -> {{1, 1}, {1, 11}}|>]
47 | 	,
48 | 	TestID->"SyntaxErrorNodes-20190521-D9G5L2"
49 | ]
50 | 
51 | 


--------------------------------------------------------------------------------
/docs/implementation.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | > One curiosity question I have it why the parsePrefix() and parseInfix() methods were written to return a function pointer, instead of writing them to simply take the (ParserSessionPtr, Token) arguments and compute the result directly. The function pointer seems like pure indirection (I didn't see any place where the returned function pointer wasn't immediately called.)
 4 | 
 5 | 
 6 | Just looking at the first example I saw:
 7 | 
 8 | ```
 9 | auto P2 = prefixParselets[Tok.Tok.value()];
10 |     
11 | MUSTTAIL
12 | return (P2->parsePrefix())(session, P2, Tok);
13 | ```
14 | 
15 | Are you asking why not just do:
16 | 
17 | ```
18 | return P2->parsePrefix(session, Tok);
19 | ```
20 | 
21 | ?
22 | 
23 | This is because the current state-of-the-art for tail calls in Clang is that you cannot make a tail call from flat function to an instance method.
24 | 
25 | You can call flat function -> flat function and method instance -> method of same instance, but I think that's it.
26 | 
27 | 
28 | But I later discovered that I am abusing the current tail-call technology and cannot actually use the `[[clang::musttail]]` stuff right now
29 | (see https://github.com/llvm/llvm-project/issues/56435 that I filed)
30 | 
31 | If I made Tokens a lot smaller, I may be able to get away with not breaking anything at -O2 but I haven't done that yet.
32 | 
33 | So none of the tail-call stuff is even being used right now.
34 | 
35 | I wonder if it would be possible to do `P2->parsePrefix(session, Tok)` or similar if not compiling with MUSTTAIL.
36 | 
37 | I have no doubt that deep C++ magic involving `std::bind` or something may be technically possible.
38 | 


--------------------------------------------------------------------------------
/CodeParser/Kernel/Quirks.wl:
--------------------------------------------------------------------------------
 1 | (* ::Package::"Tags"-><|"NoVariables" -> <|"Module" -> <|Enabled -> False|>|>|>:: *)
 2 | 
 3 | BeginPackage["CodeParser`Quirks`"]
 4 | 
 5 | setupQuirks
 6 | 
 7 | 
 8 | $Quirks
 9 | 
10 | 
11 | processInfixBinaryAtQuirk
12 | 
13 | 
14 | Begin["`Private`"]
15 | 
16 | Needs["CodeParser`"]
17 | 
18 | 
19 | 
20 | setupQuirks[] :=
21 | Module[{},
22 |   
23 |   $Quirks = <||>;
24 | 
25 |   (*
26 |   Setup "FlattenTimes" quirk
27 | 
28 |   In 12.1 and before:
29 |     a / b / c is parsed as Times[a, Power[b, -1], Power[c, -1]]
30 |     -a / b is parsed as Times[-1, a, Power[b, -1]]
31 | 
32 |   In 12.2 and after:
33 |     a / b / c is parsed as Times[Times[a, Power[b, -1]], Power[c, -1]]
34 |     -a / b is parsed as Times[Times[-1, a], Power[b, -1]]
35 | 
36 |   TODO: when targeting v12.2 as a minimum, remove this quirk
37 | 
38 |   Related bugs: 57064, 139531, 153875, 160919
39 |   *)
40 |   If[$VersionNumber <= 12.1,
41 |     $Quirks["FlattenTimes"] = True
42 |   ];
43 | 
44 |   (*
45 |   Setup "InfixBinaryAt" quirk
46 | 
47 |   The kernel parses  a<>StringJoin@b  as  StringJoin[a, b]
48 | 
49 |   Most infix operators can be used with this syntax.
50 |   Notably, SameQ and UnsameQ do NOT work with this syntax.
51 | 
52 |   Related bugs: 365013
53 |   *)
54 |   $Quirks["InfixBinaryAt"] = True;
55 | 
56 |   (*
57 |   changed in 13.1:
58 |   @@@
59 | 
60 |   In 13.0 and before:
61 |   a @@@ b parsed as Apply[a, b, {1}]
62 | 
63 |   In 13.1 and after:
64 |   a @@@ b parses as MapApply[a, b]
65 |   *)
66 |   If[$VersionNumber <= 13.0,
67 |     $Quirks["OldAtAtAt"] = True
68 |   ];
69 | ]
70 | 
71 | 
72 | 
73 | 
74 | End[]
75 | 
76 | EndPackage[]
77 | 


--------------------------------------------------------------------------------
/CodeTools/Generate/CreatePacletArchive.wl:
--------------------------------------------------------------------------------
 1 | 
 2 | If[!MemberQ[$Path, #], PrependTo[$Path, #]]&[DirectoryName[$InputFileName, 3]]
 3 | 
 4 | BeginPackage["CodeTools`Generate`CreatePacletArchive`"]
 5 | 
 6 | Begin["`Private`"]
 7 | 
 8 | (*
 9 | Do not allow PacletManager to participate in finding `Generate` files
10 | 
11 | PacletManager will find e.g. CodeParser/Kernel/TokenEnum.wl when asked to find CodeParser`Generate`TokenEnum`
12 | 
13 | related issues: PACMAN-54
14 | *)
15 | Block[{Internal`PacletFindFile = Null&},
16 | Needs["CodeTools`Generate`GenerateSources`"];
17 | ]
18 | If[$VersionNumber < 12.1,
19 |   Needs["PacletManager`"]
20 | ]
21 | 
22 | checkBuildDir[]
23 | checkPaclet[]
24 | checkPacletLayoutDir[]
25 | 
26 | 
27 | If[retry,
28 |   (*
29 |   CreatePacletArchive may be slow on RE machines, so allow re-trying if JLink connection timeout is hit
30 | 
31 |   Set $connectTimeout to some large value and cross fingers (default is 20)
32 | 
33 |   See: RE-515885
34 |   *)
35 |   Needs["JLink`"];
36 |   JLink`InstallJava`Private`$connectTimeout = 300.0
37 | ]
38 | 
39 | 
40 | generate[] := (
41 | 
42 | Print["Calling CreatePacletArchive..."];
43 | 
44 | If[$VersionNumber >= 12.1,
45 |   res = System`CreatePacletArchive[FileNameJoin[{pacletLayoutDir, paclet}], FileNameJoin[{buildDir, "paclet"}]]
46 |   ,
47 |   res = PacletManager`PackPaclet[FileNameJoin[{pacletLayoutDir, paclet}], FileNameJoin[{buildDir, "paclet"}]]
48 | ];
49 | 
50 | Print[res];
51 | 
52 | If[!StringQ[res],
53 |   Quit[1]
54 | ];
55 | 
56 | Print["Done CreatePacletArchive"]
57 | )
58 | 
59 | If[!StringQ[script],
60 |   Quit[1]
61 | ]
62 | If[AbsoluteFileName[script] === AbsoluteFileName[$InputFileName],
63 | generate[]
64 | ]
65 | 
66 | End[]
67 | 
68 | EndPackage[]
69 | 


--------------------------------------------------------------------------------
/HowToBuild.md:
--------------------------------------------------------------------------------
 1 | # Building
 2 | 
 3 | CodeParser uses a Wolfram Language kernel to generate code at build time and a
 4 | Rust compiler to compile a native library.
 5 | 
 6 | CodeParser uses CMake to generate build scripts.
 7 | 
 8 | Here is an example transcript using the default make generator to build CodeParser:
 9 | ```
10 | cd codeparser
11 | mkdir build
12 | cd build
13 | cmake ..
14 | cmake --build .
15 | ```
16 | 
17 | The result is a directory named `paclet` that contains the WL package source code and a built CodeParser `.paclet` file for installing.
18 | 
19 | Inside a kernel session you may then install the paclet by evaluating:
20 | ```
21 | PacletInstall["/path/to/build/paclet/CodeParser-1.10.paclet"]
22 | ```
23 | 
24 | Specify `MATHEMATICA_INSTALL_DIR` if you have Wolfram System installed in a non-default location:
25 | ```
26 | cmake -DMATHEMATICA_INSTALL_DIR=/Applications/Mathematica.app/Contents/ ..
27 | cmake --build .
28 | ```
29 | 
30 | On Windows:
31 | ```
32 | cmake -DMATHEMATICA_INSTALL_DIR="C:/Program Files/Wolfram Research/Mathematica/13.1" ..
33 | cmake --build .
34 | ```
35 | 
36 | ## Installing
37 | 
38 | You can install the paclet from CMake:
39 | ```
40 | cmake --install .
41 | ```
42 | 
43 | This starts a kernel and calls `PacletInstall` with the built .paclet file.
44 | 
45 | 
46 | ## Troubleshooting
47 | 
48 | ### "building for macOS-arm64 but attempting to link with file built for macOS-x86_64"
49 | 
50 | You see this error during building:
51 | ```
52 | ld: warning: ignoring file /Applications/Mathematica.app/Contents/SystemFiles/Links/MathLink/DeveloperKit/MacOSX-x86-64/CompilerAdditions/mathlink.framework/mathlink, building for macOS-arm64 but attempting to link with file built for macOS-x86_64
53 | ```
54 | 
55 | You most likely need to specify:
56 | ```
57 | -DCMAKE_OSX_ARCHITECTURES=x86_64
58 | ```
59 | 


--------------------------------------------------------------------------------
/docs/characters.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Characters
 3 | 
 4 | 
 5 | ## Philosophy
 6 | 
 7 | if a character is not something else, then it is letterlike
 8 | 
 9 | 
10 | 
11 | ## Character Encodings
12 | 
13 | 
14 | UTF-8 input is assumed everywhere.
15 | 
16 | There is an API function SafeString that will accept an array of bytes and return a "safe" string, i.e., a string that has assumed UTF-8 input with these changes:
17 | 
18 | Any invalid byte sequences are converted into \[UnknownGlyph]
19 | 
20 | Any high or low surrogates are converted into \[UnknownGlyph]
21 | 
22 | BOM character 0xfeff is converted into 0xe001, to allow transferring through MathLink.
23 | Related bugs: 366106
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | ## Raw
31 | 
32 | characters like \[RawReturn] are a way of escaping that character
33 | 
34 | Poorly understood
35 | 
36 | Perhaps essentially unused
37 | 
38 | 
39 | 
40 | A good philosophy that I follow is to treat the Raw characters as escaped versions of their normal characters
41 | 
42 | \[RawTab] is similar to \t, and is NOT the same as actual 0x09 character
43 | \[NewLine] is similar to \n, and is NOT the same as actual 0x0a character
44 | etc.
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | ## WLCharacter encoding
54 | 
55 |  32 bits:
56 | ```
57 |  vutsrqponmlkjihgfedcba9876543210
58 |             ^~~~~~~~~~~~~~~~~~~~~
59 |             Character bits (21 bits)
60 |            ^
61 |            Sign bit
62 |         ^~~
63 |         EscapeStyle bits (3 bits)
64 |  ^~~~~~~
65 |  Unused (7 bits)
66 | ```
67 | 
68 | 
69 | 
70 | ## Private Use Area
71 | 
72 | No attempt will be made to define or describe characters in the PUA.
73 | 
74 | The FE defines a number of PUA characters for its own internal use.
75 | 
76 | This is not a binding contract and usage, values, behavior, and stability is subject to change at any moment.
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/docs/Development.md:
--------------------------------------------------------------------------------
 1 | # Development
 2 | 
 3 | ## Quick Command Reference
 4 | 
 5 | #### Build the CodeParser paclet:
 6 | 
 7 | ```shell
 8 | $ cmake -S . -B build -DMATHEMATICA_INSTALL_DIR=/Applications/Wolfram/Mathematica-13.1.0.app/Contents/
 9 | $ cmake --build build
10 | ```
11 | 
12 | Intermediate compiled library artifacts will be built into the `./target`
13 | directory, and a copy will be placed in the `CodeParser/LibraryResources/`
14 | subdirectory of the built CodeParser paclet.
15 | 
16 | #### Run the compiled tests:
17 | 
18 | Run the compiled Rust library tests using `cargo`:
19 | 
20 | ```shell
21 | $ cargo test
22 | ```
23 | 
24 | #### Run the Wolfram tests:
25 | 
26 | After building CodeParser, tests written in Wolfram can be run from the command
27 | line using the
28 | [`wolfram-cli paclet test`](https://github.com/ConnorGray/wolfram-cli) tool:
29 | 
30 | ```shell
31 | $ wolfram-cli paclet test build/paclet/CodeParser Tests/TestSuite.mt
32 | ```
33 | 
34 | #### Run the wolfram-parser benchmarks:
35 | 
36 | To run the benchmarks, execute:
37 | 
38 | ```shell
39 | $ cargo bench
40 | ```
41 | 
42 | Re-running the benchmarks will print out comparision statistics between the latest
43 | and most recent previous benchmark run. See also
44 | [criterion.rs](https://github.com/bheisler/criterion.rs).
45 | 
46 | 
47 | ## Testing
48 | 
49 | CodeParser has two test suites:
50 | 
51 | 1. Tests written in Rust, primarily located in [crates/wolfram-parser/src/tests/](../crates/wolfram-parser/src/tests/).
52 | 2. Tests written in Wolfram, primarily located in [Tests](../Tests/).
53 | 
54 | ## Benchmarking
55 | 
56 | To capture a named baseline benchmark, execute:
57 | 
58 | ```shell
59 | $ cargo bench -p wolfram-parser -- --save-baseline <BASELINE NAME>
60 | ```
61 | 
62 | Then, to run benchmarks that show comparision results compared to that captured
63 | baseline, execute:
64 | 
65 | ```shell
66 | $ cargo bench -p wolfram-parser -- --baseline master
67 | ```
68 | 
69 | 


--------------------------------------------------------------------------------
/CodeParser/Kernel/Shims.wl:
--------------------------------------------------------------------------------
 1 | BeginPackage["CodeParser`Shims`"]
 2 | 
 3 | setupShims
 4 | 
 5 | cleanupStackShimMemoryLeak
 6 | 
 7 | 
 8 | Begin["`Private`"]
 9 | 
10 | 
11 | setupShims[] := (
12 |   Which[
13 |     $VersionNumber < 12.1,
14 |       setupStackShim[]
15 |     ,
16 |     (*
17 |     Some weird problem is causing:
18 |     DataStructure::nods: Stack is not a known DataStructure.
19 | 
20 |     Fall-back on shims
21 |     *)
22 |     FailureQ[Quiet[System`CreateDataStructure["Stack"], {DataStructure::nods}]],
23 |       setupStackShim[]
24 |   ]
25 | )
26 | 
27 | 
28 | setupStackShim[] := (
29 | 
30 |   (*
31 |   For versions before 12.1, we implement our own stack to store top-level expressions.
32 | 
33 |   The push, pop and peek operations take O(1), while Normal takes O(n).
34 |   *)
35 | 
36 |   (*
37 |   Define CreateDataStructure for earlier versions
38 |   *)
39 |   System`CreateDataStructure["Stack"] :=
40 |     Module[{stack, stackVal, stackDepth, stackCons},
41 | 
42 |       stackVal = stackCons[];
43 | 
44 |       stackDepth = 0;
45 | 
46 |       stack /: stack["Push", expr_] := (
47 |         stackVal = stackCons[stackVal, expr];
48 |         stackDepth += 1;
49 |         Null
50 |       );
51 | 
52 |       stack /: stack["Pop"] :=
53 |         Module[{tmp},
54 |           If[stackDepth != 0,
55 |             stackDepth -= 1;
56 |             {stackVal, tmp} = List @@ stackVal;
57 |             tmp
58 |           ]
59 |         ];
60 | 
61 |       stack /: stack["Peek"] := Last[stackVal];
62 | 
63 |       stack /: Normal[stack] := (
64 |         Flatten[{stackVal}, Infinity, stackCons]
65 |       );
66 | 
67 |       stack /: stack["Length"] := stackDepth;
68 | 
69 |       stack
70 |     ];
71 | 
72 |   cleanupStackShimMemoryLeak[] := (
73 |     (*
74 |     Hack to prevent memory leak with shims
75 |     *)
76 |     Quiet[Remove["CodeParser`Shims`Private`stack*$*"];, {Remove::rmnsm}];
77 |   )
78 | )
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 
88 | End[]
89 | 
90 | EndPackage[]
91 | 


--------------------------------------------------------------------------------
/cmake/WolframScript.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | if(NOT EXISTS ${WOLFRAMKERNEL})
 3 | message(FATAL_ERROR "WOLFRAMKERNEL does not exist. WOLFRAMKERNEL: ${WOLFRAMKERNEL}")
 4 | endif()
 5 | 
 6 | if(NOT DEFINED RETRY_ON_FAILURE)
 7 | set(RETRY_ON_FAILURE OFF)
 8 | endif()
 9 | 
10 | if(NOT EXISTS ${SCRIPT})
11 | message(FATAL_ERROR "SCRIPT does not exist. SCRIPT: ${SCRIPT}")
12 | endif()
13 | 
14 | file(READ ${SCRIPT} script)
15 | 
16 | if(script STREQUAL "")
17 | message(FATAL_ERROR "SCRIPT is empty. SCRIPT: ${SCRIPT}")
18 | endif()
19 | 
20 | if(RETRY_ON_FAILURE)
21 | 
22 | #
23 | # try twice
24 | #
25 | 
26 | execute_process(
27 |   COMMAND
28 |     ${WOLFRAMKERNEL} -script ${SCRIPT} -srcDir ${SRCDIR} -buildDir ${BUILDDIR} -pacletLayoutDir ${PACLET_LAYOUT_DIR} -paclet ${PACLET}
29 |   TIMEOUT
30 |     ${KERNEL_TIMEOUT}
31 |   RESULT_VARIABLE
32 |     SCRIPT_RESULT
33 | )
34 | 
35 | if(NOT ${SCRIPT_RESULT} EQUAL "0")
36 | message(WARNING "First try: Bad exit code from script: ${SCRIPT_RESULT}; retrying...")
37 | 
38 | execute_process(
39 |   COMMAND
40 |     ${WOLFRAMKERNEL} -retry -script ${SCRIPT} -srcDir ${SRCDIR} -buildDir ${BUILDDIR} -pacletLayoutDir ${PACLET_LAYOUT_DIR} -paclet ${PACLET}
41 |   TIMEOUT
42 |     ${KERNEL_TIMEOUT}
43 |   RESULT_VARIABLE
44 |     SCRIPT_RESULT
45 | )
46 | 
47 | if(NOT ${SCRIPT_RESULT} EQUAL "0")
48 | message(FATAL_ERROR "Second try: Bad exit code from script: ${SCRIPT_RESULT}; stopping")
49 | else()
50 | message(STATUS "Second try: Success!")
51 | endif()
52 | 
53 | endif()
54 | 
55 | else(RETRY_ON_FAILURE)
56 | 
57 | #
58 | # only try once
59 | #
60 | 
61 | execute_process(
62 |   COMMAND
63 |     ${WOLFRAMKERNEL} -script ${SCRIPT} -srcDir ${SRCDIR} -buildDir ${BUILDDIR} -pacletLayoutDir ${PACLET_LAYOUT_DIR} -paclet ${PACLET}
64 |   TIMEOUT
65 |     ${KERNEL_TIMEOUT}
66 |   RESULT_VARIABLE
67 |     SCRIPT_RESULT
68 | )
69 | 
70 | if(NOT ${SCRIPT_RESULT} EQUAL "0")
71 | message(FATAL_ERROR "Bad exit code from script: ${SCRIPT_RESULT} (script was ${SCRIPT})")
72 | endif()
73 | 
74 | endif()
75 | 


--------------------------------------------------------------------------------
/.github/workflows/workflow.yml:
--------------------------------------------------------------------------------
 1 | # This is a basic workflow to help you get started with Actions
 2 | 
 3 | name: CI
 4 | 
 5 | # Controls when the action will run. Triggers the workflow on push or pull request
 6 | # events but only for the master branch
 7 | on:
 8 |   push:
 9 |     branches:
10 |       - master
11 |   pull_request:
12 |     branches:
13 |       - master
14 | 
15 | env:
16 |   WOLFRAM_ID: ${{ secrets.WOLFRAM_ID }}
17 |   WOLFRAM_PW: ${{ secrets.WOLFRAM_PW }}
18 | 
19 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
20 | jobs:
21 |   # This workflow contains a single job called "build"
22 |   build:
23 |     name: "Build Code Parser"
24 |     # The type of runner that the job will run on
25 |     runs-on: ubuntu-latest
26 |     # Steps represent a sequence of tasks that will be executed as part of the job
27 |     steps:
28 |     # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
29 |     - uses: actions/checkout@v2
30 | 
31 |     - name: Install Dependencies
32 |       run: |
33 |         sudo apt update
34 |         sudo apt install -y wget cmake
35 | 
36 |     - name: Install Wolfram Engine
37 |       run: |
38 |         wget https://account.wolfram.com/download/public/wolfram-engine/desktop/LINUX
39 |         sudo bash LINUX -- -auto -verbose
40 |         rm LINUX
41 | 
42 |     - name: Activate Wolfram Engine
43 |       run: |
44 |         /usr/bin/wolframscript -authenticate $WOLFRAM_ID $WOLFRAM_PW
45 |         /usr/bin/wolframscript -activate
46 | 
47 |     - name: Build CodeParser
48 |       run: |
49 |         mkdir build
50 |         cd build
51 |         cmake .. -DMATHEMATICA_INSTALL_DIR="/usr/local/Wolfram/WolframEngine/13.1"
52 |         cmake --build . --target paclet
53 | 
54 |     - name: Install Paclet
55 |       # TODO: find a way to specify the name of the paclet file instead of hardcoding.
56 |       run: |
57 |         ls build
58 |         /usr/bin/wolframscript -code 'PacletInstall["./build/paclet/CodeParser-1.7.paclet"];Exit[]'
59 |         
60 |     - name: Run Tests
61 |       run: |
62 |         pwd
63 |         /usr/bin/wolframscript -file .github/workflows/run_tests.wls
64 | 


--------------------------------------------------------------------------------
/Tests/Aggregate.mt:
--------------------------------------------------------------------------------
 1 | Print["\n===== Start Aggregate.mt =====\n"]
 2 | 
 3 | Needs["CodeParser`"]
 4 | Needs["CodeParser`Folds`"] (* For aggregate *)
 5 | 
 6 | Test[Context[aggregate], "CodeParser`Folds`"]
 7 | 
 8 | Test[
 9 | 	aggregate @ ContainerNode[String, {
10 | 		InfixNode[Plus, {
11 | 			LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>],
12 | 			LeafNode[Whitespace, " ", <|Source -> {{1, 2}, {1, 3}}|>],
13 | 			LeafNode[Token`Plus, "+", <|Source -> {{1, 3}, {1, 4}}|>],
14 | 			LeafNode[Whitespace, " ", <|Source -> {{1, 4}, {1, 5}}|>],
15 | 			LeafNode[Symbol, "b", <|Source -> {{1, 5}, {1, 6}}|>]
16 | 		}, <|Source -> {{1, 1}, {1, 6}}|>]
17 | 	}, <|Source -> {{1, 1}, {1, 6}}|>]
18 | 	,
19 | 	ContainerNode[String, {
20 | 		InfixNode[Plus, {
21 | 			LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>],
22 | 			LeafNode[Token`Plus, "+", <|Source -> {{1, 3}, {1, 4}}|>],
23 | 			LeafNode[Symbol, "b", <|Source -> {{1, 5}, {1, 6}}|>]
24 | 		}, <|Source -> {{1, 1}, {1, 6}}|>]
25 | 	}, <|Source -> {{1, 1}, {1, 6}}|>]
26 | ]
27 | 
28 | (*-------------------------------------------*)
29 | (* Test aggregate[..] of a non-ContainerNode *)
30 | (*-------------------------------------------*)
31 | 
32 | (* aggregate[..] of nodes with line:column positions *)
33 | Test[
34 | 	aggregate @ InfixNode[Plus, {
35 | 		LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>],
36 | 		LeafNode[Whitespace, " ", <|Source -> {{1, 2}, {1, 3}}|>],
37 | 		LeafNode[Token`Plus, "+", <|Source -> {{1, 3}, {1, 4}}|>],
38 | 		LeafNode[Whitespace, " ", <|Source -> {{1, 4}, {1, 5}}|>],
39 | 		LeafNode[Symbol, "b", <|Source -> {{1, 5}, {1, 6}}|>]
40 | 	}, <|Source -> {{1, 1}, {1, 6}}|>]
41 | 	,
42 | 	InfixNode[Plus, {
43 | 		LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>],
44 | 		LeafNode[Token`Plus, "+", <|Source -> {{1, 3}, {1, 4}}|>],
45 | 		LeafNode[Symbol, "b", <|Source -> {{1, 5}, {1, 6}}|>]
46 | 	}, <|Source -> {{1, 1}, {1, 6}}|>]
47 | ]
48 | 
49 | (*----------------------------------------*)
50 | (* Test aggregate[..] of individual tokes *)
51 | (*----------------------------------------*)
52 | 
53 | With[{
54 | 	symbolTok = LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>]
55 | },
56 | 	Test[aggregate[symbolTok], symbolTok]
57 | ]
58 | 
59 | Test[
60 | 	aggregate @ LeafNode[Whitespace, " ", <|Source -> {{1, 2}, {1, 3}}|>],
61 | 	Nothing
62 | ]
63 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/benches/bench_fast_string_scan.rs:
--------------------------------------------------------------------------------
 1 | use std::{fs, path::Path};
 2 | 
 3 | use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
 4 | 
 5 | use wolfram_parser::ParseOptions;
 6 | 
 7 | fn parse_tokens(input: &str) {
 8 |     parse_tokens_u8(input.as_bytes())
 9 | }
10 | 
11 | fn parse_tokens_u8(input: &[u8]) {
12 |     wolfram_parser::tokenize_bytes(input, &ParseOptions::default()).unwrap();
13 | }
14 | 
15 | fn benchmark(c: &mut Criterion) {
16 |     println!("\n==== Legend ====");
17 |     println!("FSS: FAST_STRING_SCAN = true");
18 |     println!("================\n");
19 | 
20 |     c.bench_function("[FSS] tokenize 2 + 2", |b| {
21 |         b.iter(|| parse_tokens("2 + 2"))
22 |     });
23 | 
24 |     let boxes_wl = include_str!("../../../CodeParser/Kernel/Boxes.wl");
25 |     c.bench_function("[FSS] tokenize Boxes.wl", |b| {
26 |         b.iter(|| parse_tokens(boxes_wl))
27 |     });
28 | 
29 |     benchmark_large_files(c);
30 | }
31 | 
32 | fn benchmark_large_files(c: &mut Criterion) {
33 |     let mut group = c.benchmark_group("large files");
34 |     group.sampling_mode(SamplingMode::Flat);
35 |     group.sample_size(10);
36 | 
37 |     //------------
38 |     // Large files
39 |     //------------
40 | 
41 |     let relief_plot =
42 |         fs::read(Path::new("../Tests/files/large/ReliefPlot.nb")).unwrap();
43 |     group.bench_function("[FSS] tokenize ReliefPlot.nb", |b| {
44 |         b.iter(|| parse_tokens_u8(&relief_plot))
45 |     });
46 | 
47 |     let expanded_company_data_new =
48 |         fs::read(Path::new("../Tests/files/large/expandedCompanyDataNew1.m"))
49 |             .unwrap();
50 |     group.bench_function("[FSS] tokenize expandedCompanyDataNew1.m", |b| {
51 |         b.iter(|| parse_tokens_u8(&expanded_company_data_new))
52 |     });
53 | 
54 |     //-------------
55 |     // Medium files
56 |     //-------------
57 | 
58 |     group.sampling_mode(SamplingMode::Auto);
59 |     group.sample_size(30);
60 | 
61 |     let geomagnetic_models =
62 |         fs::read(Path::new("../Tests/files/large/geomagneticmodels.m"))
63 |             .unwrap();
64 |     group.bench_function("[FSS] tokenize geomagneticmodels.m", |b| {
65 |         b.iter(|| parse_tokens_u8(&geomagnetic_models))
66 |     });
67 | }
68 | 
69 | criterion_group!(benches, benchmark);
70 | criterion_main!(benches);
71 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/benches/bench_general.rs:
--------------------------------------------------------------------------------
 1 | use std::{fs, path::Path};
 2 | 
 3 | use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
 4 | 
 5 | use wolfram_parser::ParseOptions;
 6 | 
 7 | fn tokenize(input: &str) {
 8 |     tokenize_bytes(input.as_bytes())
 9 | }
10 | 
11 | fn tokenize_bytes(input: &[u8]) {
12 |     wolfram_parser::tokenize_bytes(input, &ParseOptions::default()).unwrap();
13 | }
14 | 
15 | fn parse(input: &str) {
16 |     wolfram_parser::parse_cst_seq(input, &ParseOptions::default());
17 | }
18 | 
19 | fn parse_bytes(input: &[u8]) {
20 |     wolfram_parser::parse_bytes_cst_seq(input, &ParseOptions::default());
21 | }
22 | 
23 | fn benchmark(c: &mut Criterion) {
24 |     c.bench_function("tokenize 2 + 2", |b| b.iter(|| tokenize("2 + 2")));
25 | 
26 |     let boxes_wl = include_str!("../../../CodeParser/Kernel/Boxes.wl");
27 |     c.bench_function("tokenize Boxes.wl", |b| b.iter(|| tokenize(boxes_wl)));
28 |     c.bench_function("parse CST of Boxes.wl", |b| b.iter(|| parse(boxes_wl)));
29 | 
30 |     benchmark_large_files(c);
31 | }
32 | 
33 | fn benchmark_large_files(c: &mut Criterion) {
34 |     let mut group = c.benchmark_group("large files");
35 |     group.sampling_mode(SamplingMode::Flat);
36 |     group.sample_size(10);
37 | 
38 |     //------------
39 |     // Large files
40 |     //------------
41 | 
42 |     let relief_plot =
43 |         fs::read(Path::new("../../Tests/files/large/ReliefPlot.nb")).unwrap();
44 |     group.bench_function("parse CST of ReliefPlot.nb", |b| {
45 |         b.iter(|| parse_bytes(&relief_plot))
46 |     });
47 | 
48 |     let expanded_company_data_new = fs::read(Path::new(
49 |         "../../Tests/files/large/expandedCompanyDataNew1.m",
50 |     ))
51 |     .unwrap();
52 |     group.bench_function("parse CST of expandedCompanyDataNew1.m", |b| {
53 |         b.iter(|| parse_bytes(&expanded_company_data_new))
54 |     });
55 | 
56 |     //-------------
57 |     // Medium files
58 |     //-------------
59 | 
60 |     group.sampling_mode(SamplingMode::Auto);
61 |     group.sample_size(30);
62 | 
63 |     let geomagnetic_models =
64 |         fs::read(Path::new("../../Tests/files/large/geomagneticmodels.m"))
65 |             .unwrap();
66 |     group.bench_function("parse CST of geomagneticmodels.m", |b| {
67 |         b.iter(|| parse_bytes(&geomagnetic_models))
68 |     });
69 | }
70 | 
71 | criterion_group!(benches, benchmark);
72 | criterion_main!(benches);
73 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/parse/parse_tests/test_parselet.rs:
--------------------------------------------------------------------------------
 1 | use crate::{
 2 |     cst::Cst,
 3 |     parse::{ParseBuilder, ParserSession},
 4 |     parse_cst::ParseCst,
 5 |     ParseOptions,
 6 | };
 7 | 
 8 | 
 9 | #[test]
10 | fn ParseletTest_Bug1() {
11 |     let strIn = "a /: b := c";
12 | 
13 |     let builder = ParseCst::new_builder();
14 | 
15 |     let mut session =
16 |         ParserSession::new(strIn.as_bytes(), builder, &ParseOptions::default());
17 | 
18 |     let tok = session.tokenizer.peek_token();
19 | 
20 |     let () = session.parse_prefix(tok);
21 | 
22 |     let P = session.builder.top_node();
23 | 
24 |     assert_eq!(session.tokenizer.non_fatal_issues.len(), 0);
25 |     assert_eq!(session.tokenizer.fatal_issues.len(), 0);
26 | 
27 |     assert!(matches!(P, Cst::Ternary(_)));
28 | }
29 | 
30 | //
31 | // This used to assert
32 | //
33 | #[test]
34 | fn ParseletTest_Bug2() {
35 |     //
36 |     let strIn = "a<b ";
37 | 
38 |     let builder = ParseCst::new_builder();
39 | 
40 |     let mut session =
41 |         ParserSession::new(strIn.as_bytes(), builder, &ParseOptions::default());
42 | 
43 |     let tok = session.tokenizer.peek_token();
44 | 
45 |     session.parse_prefix(tok);
46 | 
47 |     assert_eq!(session.tokenizer.non_fatal_issues.len(), 0);
48 |     assert_eq!(session.tokenizer.fatal_issues.len(), 0);
49 | }
50 | 
51 | //
52 | // This used to assert
53 | //
54 | #[test]
55 | fn ParseletTest_Bug3() {
56 |     let strIn = "a\\[Integral]b\\[Integral]c ";
57 | 
58 |     let builder = ParseCst::new_builder();
59 | 
60 |     let mut session =
61 |         ParserSession::new(strIn.as_bytes(), builder, &ParseOptions::default());
62 | 
63 |     let tok = session.tokenizer.peek_token();
64 | 
65 |     session.parse_prefix(tok);
66 | 
67 |     assert_eq!(session.tokenizer.non_fatal_issues.len(), 0);
68 |     assert_eq!(session.tokenizer.fatal_issues.len(), 0);
69 | }
70 | 
71 | //
72 | // This used to assert
73 | //
74 | #[test]
75 | fn ParseletTest_Bug4() {
76 |     let strIn = "\\[RawLeftBrace]*\\[RawRightBrace]";
77 | 
78 |     let builder = ParseCst::new_builder();
79 | 
80 |     let mut session =
81 |         ParserSession::new(strIn.as_bytes(), builder, &ParseOptions::default());
82 | 
83 |     let tok = session.tokenizer.peek_token();
84 | 
85 |     session.parse_prefix(tok);
86 | 
87 |     assert_eq!(session.tokenizer.non_fatal_issues.len(), 0);
88 |     assert_eq!(session.tokenizer.fatal_issues.len(), 0);
89 | }
90 | 


--------------------------------------------------------------------------------
/Tests/Characters.mt:
--------------------------------------------------------------------------------
  1 | Print["\n===== Start Characters.mt =====\n"]
  2 | 
  3 | path = FileNameJoin[{DirectoryName[$CurrentTestSource], "CodeParserTestUtils"}]
  4 | PrependTo[$Path, path]
  5 | 
  6 | Needs["CodeParserTestUtils`"]
  7 | 
  8 | 
  9 | Needs["CodeParser`"]
 10 | Needs["CodeParser`Utils`"]
 11 | 
 12 | 
 13 | 
 14 | (*
 15 | guarantee that "\:f3a2" does not get returned as StringNode[String, "\[COMPATIBILITYNoBreak]", <||>]
 16 | *)
 17 | 
 18 | TestMatch[
 19 | 	CodeConcreteParse["\"\\:f3a2\""]
 20 | 	,
 21 | 	ContainerNode[String, {
 22 | 		LeafNode[String, "\"\\:f3a2\"", <|Source -> {{1, 1}, {1, 9}}|>] }, _]
 23 | 	,
 24 | 	TestID->"Characters-20190601-E6Q0I8"
 25 | ]
 26 | 
 27 | 
 28 | (*
 29 | \r and \[RawReturn]
 30 | *)
 31 | Test[
 32 | 	"\"\\r\""
 33 | 	,
 34 | 	Null
 35 | 	,
 36 | 	EquivalenceFunction -> parseEquivalenceFunction
 37 | 	,
 38 | 	TestID->"Characters-20181115-M4K2F9"
 39 | ]
 40 | 
 41 | Test[
 42 | 	"\"\\[RawReturn]\""
 43 | 	,
 44 | 	Null
 45 | 	,
 46 | 	EquivalenceFunction -> parseEquivalenceFunction
 47 | 	,
 48 | 	TestID->"Characters-20181115-A3F2Z1"
 49 | ]
 50 | 
 51 | Test[
 52 | 	"\"\\:000d\""
 53 | 	,
 54 | 	Null
 55 | 	,
 56 | 	EquivalenceFunction -> parseEquivalenceFunction
 57 | 	,
 58 | 	TestID->"Characters-20190126-A6E4K4"
 59 | ]
 60 | 
 61 | 
 62 | 
 63 | (*
 64 | \[RawDoubleQuote]
 65 | *)
 66 | Test[
 67 | 	"\"\\[RawDoubleQuote]\""
 68 | 	,
 69 | 	Null
 70 | 	,
 71 | 	EquivalenceFunction -> parseEquivalenceFunction
 72 | 	,
 73 | 	TestID->"Characters-20190126-S9D1H2"
 74 | ]
 75 | 
 76 | Test[
 77 | 	"\"\\:0022\""
 78 | 	,
 79 | 	Null
 80 | 	,
 81 | 	EquivalenceFunction -> parseEquivalenceFunction
 82 | 	,
 83 | 	TestID->"Characters-20190126-O0I4X0"
 84 | ]
 85 | 
 86 | 
 87 | 
 88 | (*
 89 | \[RawBackslash]
 90 | *)
 91 | Test[
 92 | 	"\"\\[RawBackslash]\""
 93 | 	,
 94 | 	Null
 95 | 	,
 96 | 	EquivalenceFunction -> parseEquivalenceFunction
 97 | 	,
 98 | 	TestID->"Characters-20190126-T0Y0O1"
 99 | ]
100 | 
101 | Test[
102 | 	"\"\\:005c\""
103 | 	,
104 | 	Null
105 | 	,
106 | 	EquivalenceFunction -> parseEquivalenceFunction
107 | 	,
108 | 	TestID->"Characters-20190126-F7Z5P8"
109 | ]
110 | 
111 | 
112 | 
113 | 
114 | 
115 | Test[
116 | 	"\"\\.00\""
117 | 	,
118 | 	Null
119 | 	,
120 | 	EquivalenceFunction -> parseEquivalenceFunction
121 | 	,
122 | 	TestID->"Characters-20190128-I9O3D9"
123 | ]
124 | 
125 | 
126 | Test[
127 | 	"\"\\|010023\""
128 | 	,
129 | 	Null
130 | 	,
131 | 	EquivalenceFunction -> parseEquivalenceFunction
132 | 	,
133 | 	TestID->"Characters-20190129-O8S8M2"
134 | ]
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 


--------------------------------------------------------------------------------
/Tests/CodeParser.mt:
--------------------------------------------------------------------------------
  1 | Print["\n===== Start CodeParser.mt =====\n"]
  2 | 
  3 | Needs["CodeParser`"]
  4 | 
  5 | Needs["PacletManager`"] (* for PacletInformation *)
  6 | 
  7 | 
  8 | (*
  9 | 
 10 | Test options
 11 | 
 12 | *)
 13 | 
 14 | 
 15 | 
 16 | (*
 17 | TODO: when targeting 12.1 as a minimum, then look into doing paclet["AssetLocation", "LibraryResources"] or similar
 18 | *)
 19 | location = "Location" /. PacletInformation["CodeParser"]
 20 | 
 21 | pacletInfoFile = FileNameJoin[{location, "PacletInfo.wl"}]
 22 | 
 23 | Block[{$ContextPath = {"PacletManager`", "System`"}, $Context = "Global`"},
 24 | 	(*
 25 | 	put PacletManager` on $ContextPath to guarantee using PacletManager`Paclet symbol
 26 | 	*)
 27 | 	pacletInfo = Get[pacletInfoFile];
 28 | ]
 29 | 
 30 | transport = Transport /. List @@ pacletInfo;
 31 | 
 32 | 
 33 | 
 34 | 
 35 | TestMatch[
 36 | 	CodeParse["Plot[f[x,y],{x,0,1},{y,0,1},PlotRange\[Rule]All];", SourceConvention -> "Test"]
 37 | 	,
 38 | 	_Failure
 39 | 	,
 40 | 	Which[
 41 | 		transport === "ExprLib",
 42 | 			{LibraryFunction::unevaluated}
 43 | 		,
 44 | 		transport === "MathLink",
 45 | 			{}
 46 | 	]
 47 | 	,
 48 | 	TestID->"CodeParser-20200312-G4J9U7"
 49 | ]
 50 | 
 51 | 
 52 | 
 53 | Test[
 54 | 	CodeParse["\\[Pi]"]
 55 | 	,
 56 | 	ContainerNode[String, {
 57 | 		LeafNode[Symbol, "Pi", <|Source->{{1,1}, {1,6}}|>]}, <|Source->{{1,1}, {1,6}}|>]
 58 | 	,
 59 | 	TestID->"CodeParser-20220910-I3Q6U1"
 60 | ]
 61 | 
 62 | Test[
 63 | 	CodeParse["\\[Degree]"]
 64 | 	,
 65 | 	ContainerNode[String, {
 66 | 		LeafNode[Symbol, "Degree", <|Source->{{1,1}, {1,10}}|>]}, <|Source->{{1,1}, {1,10}}|>]
 67 | 	,
 68 | 	TestID->"CodeParser-20220910-Z3K4F3"
 69 | ]
 70 | 
 71 | Test[
 72 | 	CodeParse["\\[Infinity]"]
 73 | 	,
 74 | 	ContainerNode[String, {
 75 | 		LeafNode[Symbol, "Infinity", <|Source->{{1,1}, {1,12}}|>]}, <|Source->{{1,1}, {1,12}}|>]
 76 | 	,
 77 | 	TestID->"CodeParser-20220910-T2T3W7"
 78 | ]
 79 | 
 80 | Test[
 81 | 	CodeParse["\\[ExponentialE]"]
 82 | 	,
 83 | 	ContainerNode[String, {
 84 | 		LeafNode[Symbol, "E", <|Source->{{1,1}, {1,16}}|>]}, <|Source->{{1,1}, {1,16}}|>]
 85 | 	,
 86 | 	TestID->"CodeParser-20220910-H2B2B6"
 87 | ]
 88 | 
 89 | Test[
 90 | 	CodeParse["\\[ImaginaryI]"]
 91 | 	,
 92 | 	ContainerNode[String, {
 93 | 		LeafNode[Symbol, "I", <|Source->{{1,1}, {1,14}}|>]}, <|Source->{{1,1}, {1,14}}|>]
 94 | 	,
 95 | 	TestID->"CodeParser-20220910-M6R5R1"
 96 | ]
 97 | 
 98 | Test[
 99 | 	CodeParse["\\[ImaginaryJ]"]
100 | 	,
101 | 	ContainerNode[String, {
102 | 		LeafNode[Symbol, "I", <|Source->{{1,1}, {1,14}}|>]}, <|Source->{{1,1}, {1,14}}|>]
103 | 	,
104 | 	TestID->"CodeParser-20220910-C4S7C2"
105 | ]
106 | 
107 | 
108 | 


--------------------------------------------------------------------------------
/Tests/Unsafe.mt:
--------------------------------------------------------------------------------
 1 | Print["\n===== Start Unsafe.mt =====\n"]
 2 | 
 3 | Needs["CodeParser`"]
 4 | 
 5 | Test[
 6 | 	CodeParse[{65, 16^^ed, 16^^a0, 16^^80, 65}]
 7 | 	,
 8 | 	ContainerNode[Byte, {Missing[
 9 |    "UnsafeCharacterEncoding_StraySurrogate"]}, <|SyntaxIssues -> {EncodingIssue[
10 |      "StraySurrogate", "Stray surrogate.", 
11 |      "Fatal", <|Source -> {{1, 2}, {1, 3}}, 
12 |       ConfidenceLevel -> 1.|>]}|>]
13 | 	,
14 | 	TestID->"Unsafe-20211223-G8M5U2"
15 | ]
16 | 
17 | 
18 | Test[
19 | 	CodeParse[{16^^E1, 16^^A0, 16^^C0}]
20 | 	,
21 | 	ContainerNode[Byte, {Missing[
22 |    "UnsafeCharacterEncoding_IncompleteUTF8Sequence"]}, <|SyntaxIssues -> {EncodingIssue[
23 |      "IncompleteUTF8Sequence", "Incomplete UTF-8 sequence.", 
24 |      "Fatal", <|Source -> {{1, 1}, {1, 2}}, ConfidenceLevel -> 1.|>], 
25 |     EncodingIssue["IncompleteUTF8Sequence", "Incomplete UTF-8 sequence.", 
26 |      "Fatal", <|Source -> {{1, 2}, {1, 3}}, 
27 |       ConfidenceLevel -> 1.|>]}|>]
28 | 	,
29 | 	TestID->"Unsafe-20211223-B6H1C5"
30 | ]
31 | 
32 | 
33 | Test[
34 | 	CodeParse[{16^^C0 , 16^^80}]
35 | 	,
36 | 	ContainerNode[Byte, {Missing[
37 |    "UnsafeCharacterEncoding_IncompleteUTF8Sequence"]}, <|SyntaxIssues -> {EncodingIssue[
38 |      "IncompleteUTF8Sequence", "Incomplete UTF-8 sequence.", 
39 |      "Fatal", <|Source -> {{1, 1}, {1, 2}}, ConfidenceLevel -> 1.|>], 
40 |     EncodingIssue["IncompleteUTF8Sequence", "Incomplete UTF-8 sequence.", 
41 |      "Fatal", <|Source -> {{1, 2}, {1, 3}}, 
42 |       ConfidenceLevel -> 1.|>]}|>]
43 | 	,
44 | 	TestID->"Unsafe-20211224-A8O4H2"
45 | ]
46 | 
47 | 
48 | unsafe = FileNameJoin[{DirectoryName[$CurrentTestSource], "files", "small", "unsafe1.wl"}]
49 | 
50 | Test[
51 | 	CodeParse[File[unsafe]]
52 | 	,
53 | 	ContainerNode[File, {
54 | 		Missing["UnsafeCharacterEncoding_IncompleteUTF8Sequence"]}, <|
55 | 			SyntaxIssues -> {
56 | 				EncodingIssue["IncompleteUTF8Sequence", "Incomplete UTF-8 sequence.", "Fatal", <|Source -> {{1, 16}, {1, 17}}, ConfidenceLevel -> 1.|>]},
57 | 			"FileName" -> unsafe|>]
58 | 	,
59 | 	TestID->"Unsafe-20220121-L0W6B5"
60 | ]
61 | 
62 | 
63 | (*
64 | from bug 420623
65 | 
66 | unsafe2.wl has bytes:
67 | 
68 | 0x5c 0xa9
69 | 
70 | 0x5c is '\\' backslash character
71 | 
72 | 0xa9 is incomplete UTF-8 sequence
73 | *)
74 | unsafe = FileNameJoin[{DirectoryName[$CurrentTestSource], "files", "small", "unsafe2.wl"}]
75 | 
76 | Test[
77 | 	CodeParse[File[unsafe]]
78 | 	,
79 | 	ContainerNode[File, {
80 | 		Missing["UnsafeCharacterEncoding_IncompleteUTF8Sequence"]}, <|
81 | 			SyntaxIssues -> {
82 | 				EncodingIssue["IncompleteUTF8Sequence", "Incomplete UTF-8 sequence.", "Fatal", <|Source -> {{1, 2}, {1, 3}}, ConfidenceLevel -> 1.|>]},
83 | 			"FileName" -> unsafe|>]
84 | 	,
85 | 	TestID->"Unsafe-20220223-W0U9G9"
86 | ]
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/CodeTools/Generate/GenerateSources.wl:
--------------------------------------------------------------------------------
  1 | BeginPackage["CodeTools`Generate`GenerateSources`"]
  2 | 
  3 | buildDirFlagPosition
  4 | 
  5 | buildDir
  6 | 
  7 | srcDirFlagPosition
  8 | 
  9 | srcDir
 10 | 
 11 | script
 12 | 
 13 | pacletFlagPosition
 14 | 
 15 | paclet
 16 | 
 17 | retryFlagPosition
 18 | 
 19 | retry
 20 | 
 21 | pacletLayoutDirFlagPosition
 22 | 
 23 | pacletLayoutDir
 24 | 
 25 | 
 26 | checkBuildDir
 27 | 
 28 | checkSrcDir
 29 | 
 30 | checkPaclet
 31 | 
 32 | checkPacletLayoutDir
 33 | 
 34 | 
 35 | Begin["`Private`"]
 36 | 
 37 | buildDirFlagPosition = FirstPosition[$CommandLine, "-buildDir"]
 38 | 
 39 | buildDir := buildDir = $CommandLine[[buildDirFlagPosition[[1]] + 1]]
 40 | 
 41 | srcDirFlagPosition = FirstPosition[$CommandLine, "-srcDir"]
 42 | 
 43 | srcDir := srcDir = $CommandLine[[srcDirFlagPosition[[1]] + 1]]
 44 | 
 45 | scriptPosition = FirstPosition[$CommandLine, "-script"]
 46 | 
 47 | script := script = $CommandLine[[scriptPosition[[1]] + 1]]
 48 | 
 49 | pacletFlagPosition = FirstPosition[$CommandLine, "-paclet"]
 50 | 
 51 | paclet := paclet = $CommandLine[[pacletFlagPosition[[1]] + 1]]
 52 | 
 53 | retryFlagPosition = FirstPosition[$CommandLine, "-retry"]
 54 | 
 55 | retry = !MissingQ[retryFlagPosition]
 56 | 
 57 | pacletLayoutDirFlagPosition = FirstPosition[$CommandLine, "-pacletLayoutDir"]
 58 | 
 59 | pacletLayoutDir := pacletLayoutDir = $CommandLine[[pacletLayoutDirFlagPosition[[1]] + 1]]
 60 | 
 61 | 
 62 | checkBuildDir[] :=
 63 | Module[{},
 64 |   If[MissingQ[buildDirFlagPosition],
 65 |     Print["Cannot proceed; buildDir flag missing"];
 66 |     Quit[1]
 67 |   ];
 68 | 
 69 |   If[!DirectoryQ[buildDir],
 70 |     Print["Cannot proceed; Unsupported buildDir: ", buildDir];
 71 |     Quit[1]
 72 |   ];
 73 | ]
 74 | 
 75 | 
 76 | checkSrcDir[] :=
 77 | Module[{},
 78 |   If[MissingQ[srcDirFlagPosition],
 79 |     Print["Cannot proceed; srcDir flag missing"];
 80 |     Quit[1]
 81 |   ];
 82 | 
 83 |   If[!DirectoryQ[srcDir],
 84 |     Print["Cannot proceed; Unsupported srcDir: ", srcDir];
 85 |     Quit[1]
 86 |   ];
 87 | ]
 88 | 
 89 | 
 90 | checkPaclet[] :=
 91 | Module[{},
 92 |   If[MissingQ[pacletFlagPosition],
 93 |     Print["Cannot proceed; paclet flag missing"];
 94 |     Quit[1]
 95 |   ];
 96 | ]
 97 | 
 98 | 
 99 | checkPacletLayoutDir[] :=
100 | Module[{},
101 |   If[MissingQ[pacletLayoutDirFlagPosition],
102 |     Print["Cannot proceed; pacletLayoutDir flag missing"];
103 |     Quit[1]
104 |   ];
105 | 
106 |   If[!DirectoryQ[pacletLayoutDir],
107 |     Print["Cannot proceed; Unsupported pacletLayoutDir: ", pacletLayoutDir];
108 |     Quit[1]
109 |   ];
110 | 
111 |   If[FileNameTake[pacletLayoutDir, -1] =!= "paclet",
112 |     Print["Cannot proceed; Unsupported pacletLayoutDir: ", pacletLayoutDir];
113 |     Quit[1]
114 |   ]; 
115 | ]
116 | 
117 | End[]
118 | 
119 | EndPackage[]
120 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/tests/test_node.rs:
--------------------------------------------------------------------------------
 1 | use crate::{
 2 |     cst::{BinaryNode, CompoundNode, Cst, OperatorNode},
 3 |     macros::{src, token},
 4 |     parse::operators::{BinaryOperator, CompoundOperator},
 5 |     parse_cst,
 6 |     source::Span,
 7 |     tests::assert_src,
 8 |     NodeSeq, ParseOptions,
 9 | };
10 | 
11 | use pretty_assertions::assert_eq;
12 | 
13 | 
14 | #[test]
15 | fn NodeTest_Bug1() {
16 |     let input = "a_.";
17 | 
18 |     let NodeSeq(tokens) = crate::tokenize(input, &ParseOptions::default());
19 | 
20 |     assert_eq!(
21 |         tokens,
22 |         vec![
23 |             token!(Symbol, "a", src!(1:1-1:2)),
24 |             token!(UnderDot, "_.", src!(1:2-1:4))
25 |         ]
26 |     );
27 | 
28 |     let [T1, T2] = tokens.try_into().unwrap();
29 | 
30 |     let N = CompoundNode::new2(
31 |         CompoundOperator::CodeParser_PatternOptionalDefault,
32 |         T1,
33 |         T2,
34 |     );
35 | 
36 |     let NSource = Cst::Compound(N).get_source();
37 | 
38 |     assert_eq!(NSource.start(), src!(1:1).into());
39 |     assert_eq!(NSource.end(), src!(1:4).into());
40 | 
41 |     // FIXME: Check that no issues were generated; make tokenize() return a
42 |     //        ParseResult
43 |     // assert_eq!(session.non_fatal_issues().len(), 0);
44 |     // assert_eq!(session.fatal_issues().len(), 0);
45 | }
46 | 
47 | #[test]
48 | fn test_parse_span() {
49 |     // Binary Span with implicit 1st arg
50 |     assert_eq!(
51 |         parse_cst(";; b", &Default::default()).syntax,
52 |         assert_src!(1:1-5 => Cst::Binary(BinaryNode(OperatorNode {
53 |             op: BinaryOperator::Span,
54 |             children: NodeSeq(vec![
55 |                 Cst::Token(token![
56 |                     Fake_ImplicitOne,
57 |                     "",
58 |                     Span::from(src!(1:1-1:1))
59 |                 ]),
60 |                 Cst::Token(token![SemiSemi, ";;", Span::from(src!(1:1-1:3))]),
61 |                 Cst::Token(token![Whitespace, " ", Span::from(src!(1:3-1:4))]),
62 |                 Cst::Token(token![Symbol, "b", Span::from(src!(1:4-1:5))]),
63 |             ]),
64 |         })))
65 |     );
66 | 
67 | 
68 |     // Binary Span
69 |     assert_eq!(
70 |         parse_cst("a ;; b", &Default::default()).syntax,
71 |         assert_src!(1:1-7 => Cst::Binary(BinaryNode(OperatorNode {
72 |             op: BinaryOperator::Span,
73 |             children: NodeSeq(vec![
74 |                 Cst::Token(token![Symbol, "a", Span::from(src!(1:1-1:2))]),
75 |                 Cst::Token(token![Whitespace, " ", Span::from(src!(1:2-1:3))]),
76 |                 Cst::Token(token![SemiSemi, ";;", Span::from(src!(1:3-1:5))]),
77 |                 Cst::Token(token![Whitespace, " ", Span::from(src!(1:5-1:6))]),
78 |                 Cst::Token(token![Symbol, "b", Span::from(src!(1:6-1:7))]),
79 |             ]),
80 | 
81 |         })))
82 |     );
83 | }
84 | 


--------------------------------------------------------------------------------
/CodeParser/Kernel/Node.wl:
--------------------------------------------------------------------------------
  1 | BeginPackage["CodeParser`Node`"]
  2 | 
  3 | Begin["`Private`"]
  4 | 
  5 | Needs["CodeParser`"]
  6 | Needs["CodeParser`Utils`"]
  7 | 
  8 | 
  9 | (*
 10 | Some selectors
 11 | *)
 12 | 
 13 | LeafNode[_, str_, _]["String"] := str
 14 | 
 15 | 
 16 | 
 17 | (*
 18 | Some attributes
 19 | *)
 20 | 
 21 | Attributes[CodeNode] = {HoldAllComplete}
 22 | 
 23 | 
 24 | 
 25 | (*
 26 | ToNode[sym] returns a LeafNode[Symbol]
 27 | *)
 28 | ToNode[s_Symbol] :=
 29 |   If[Context[s] == "System`",
 30 |     LeafNode[Symbol, SymbolName[s], <||>]
 31 |     ,
 32 |     (*
 33 |     Play it safe for now and fully qualify any non-System` symbol
 34 |     *)
 35 |     LeafNode[Symbol, Context[s]<>SymbolName[s], <||>]
 36 |   ]
 37 | 
 38 | (*
 39 | ToNode[string] returns a LeafNode[String]
 40 | *)
 41 | ToNode[s_String] := LeafNode[String, escapeString[s], <||>]
 42 | 
 43 | (*
 44 | ToNode[integer] returns a LeafNode[Integer]
 45 | ToNode[real] returns a LeafNode[Real]
 46 | *)
 47 | ToNode[i_Integer] := LeafNode[Integer, ToString[i], <||>]
 48 | ToNode[r_Real] := LeafNode[Real, ToString[r, InputForm], <||>]
 49 | 
 50 | (*
 51 | ToNode[rational] returns:
 52 |   if possible to convert to Rational literal then return LeafNode[Rational]
 53 |   otherwise, return CallNode[Rational]
 54 | *)
 55 | ToNode[r_Rational] :=
 56 | Catch[
 57 | Module[{num, den, e},
 58 |   (*
 59 |   TODO: when targeting 12.0 as a minimum, use NumeratorDenominator[r]
 60 |   *)
 61 |   {num, den} = {Numerator[r], Denominator[r]};
 62 |   (*
 63 |   loop between 2 and 36 and test if the base works
 64 | 
 65 |   loop from 36 to 2, going down
 66 | 
 67 |   out of all of these ways of constructing 1/16:
 68 |   2^^1*^-4
 69 |   4^^1*^-2
 70 |   8^^4*^-2
 71 |   16^^1*^-1
 72 | 
 73 |   prefer to do 16^^1*^-1
 74 |   that is, prefer the highest base
 75 |   *)
 76 |   Do[
 77 |     e = IntegerExponent[den, b];
 78 |     If[e != 0,
 79 |      Throw[LeafNode[Rational, ToString[b] <> "^^" <> IntegerString[num, b] <> "*^-" <> ToString[e], <||>]]
 80 |     ]
 81 |     ,
 82 |     {b, 36, 2, -1}
 83 |   ];
 84 |   CallNode[LeafNode[Symbol, "Rational", <||>], {ToNode[num], ToNode[den]}, <||>]
 85 | ]]
 86 | 
 87 | ToNode[f_?FailureQ] := f
 88 | 
 89 | ToNode[args___] :=
 90 |   Failure["Unhandled", <| "Function" -> ToNode, "Arguments" -> HoldForm[{args}] |>]
 91 | 
 92 | 
 93 | 
 94 | FromNode[LeafNode[Symbol, s_, _]] :=
 95 |   Symbol[s]
 96 | 
 97 | (*
 98 | No simple way to convert "\"123\"" to "123"
 99 | *)
100 | FromNode[LeafNode[String, s_, _]] :=
101 |   ToExpression[s]
102 | 
103 | (*
104 | No simple way to convert "123.456``7" to 123.456``7
105 | *)
106 | FromNode[LeafNode[Integer, i_, _]] :=
107 |   ToExpression[i]
108 | 
109 | FromNode[LeafNode[Real, r_, _]] :=
110 |   ToExpression[r]
111 | 
112 | FromNode[LeafNode[Rational, r_, _]] :=
113 |   ToExpression[r]
114 | 
115 | FromNode[f_?FailureQ] := f
116 | 
117 | FromNode[args___] :=
118 |   Failure["Unhandled", <| "Function" -> FromNode, "Arguments" -> HoldForm[{args}] |>]
119 | 
120 | 
121 | 
122 | End[]
123 | 
124 | EndPackage[]
125 | 


--------------------------------------------------------------------------------
/CodeParser/Generate/Common.wl:
--------------------------------------------------------------------------------
  1 | BeginPackage["CodeParser`Generate`Common`"]
  2 | 
  3 | toGlobal
  4 | toTokenEnumVariant
  5 | 
  6 | generatedCPPDir
  7 | generatedCPPIncludeDir
  8 | generatedCPPSrcDir
  9 | 
 10 | dataDir
 11 | 
 12 | importedPrefixParselets
 13 | 
 14 | importedInfixParselets
 15 | 
 16 | importedLongNames
 17 | 
 18 | importedPrecedenceSource
 19 | 
 20 | FatalError::usage = "FatalError[expr, ...] prints an error message an exists with a fatal error code."
 21 | 
 22 | Begin["`Private`"]
 23 | 
 24 | (*
 25 | Do not allow PacletManager to participate in finding `Generate` files
 26 | 
 27 | PacletManager will find e.g. CodeParser/Kernel/TokenEnum.wl when asked to find CodeParser`Generate`TokenEnum`
 28 | 
 29 | related issues: PACMAN-54
 30 | *)
 31 | Block[{Internal`PacletFindFile = Null&},
 32 | Needs["CodeTools`Generate`GenerateSources`"];
 33 | ]
 34 | 
 35 | 
 36 | (*
 37 | uppercases and replaces ` with _
 38 | *)
 39 | toGlobal[n0_String] := Module[{n = n0},
 40 | 	(* TODO(cleanup): This is a workaround *)
 41 | 	If[StringStartsQ[n, "CodePoint`LongName`"],
 42 | 		n = ToUpperCase[n]
 43 | 	];
 44 | 
 45 | 	StringReplace[n, {"`" -> "_", "$" -> "_"}]
 46 | ]
 47 | 
 48 | toGlobal[n_Symbol] := (
 49 | 	If[StringStartsQ[Context[n], "Precedence`"],
 50 | 		StringReplace[
 51 | 			toGlobal[ToUpperCase[ToString[n]]],
 52 | 			"PRECEDENCE_" -> "Precedence::"
 53 | 		]
 54 | 		,
 55 | 		toGlobal[ToUpperCase[ToString[n]]]
 56 | 	]
 57 | )
 58 | 
 59 | toGlobal[n_, "CodePoint"] :=
 60 | 	Replace[n, {
 61 | 		CodePoint`CRLF -> "CodePoint::CRLF",
 62 | 		"CodePoint`LongName`RawDoubleQuote" -> toGlobal[n],
 63 | 		"CodePoint`LongName`RawBackslash" -> toGlobal[n],
 64 | 		other_String :> StringJoin["CodePoint::from_char(", toGlobal[other], ")"]
 65 | 	}]
 66 | 
 67 | toGlobal[n_, "UpperCamelCase"] :=
 68 |   StringReplace[ToString[n], {"`" -> "_", "$" -> "_"}]
 69 | 
 70 | 
 71 | toGlobal[sym_Symbol, "DefinePrecedence"] :=
 72 | 	StringTrim[toGlobal[sym], "Precedence::"]
 73 | 
 74 | toGlobal[args___] := FatalError[{"BAD ARGS: ", args}]
 75 | 
 76 | toTokenEnumVariant[name_] :=
 77 | 	StringReplace[
 78 | 		toGlobal[name, "UpperCamelCase"],
 79 | 		StartOfString ~~ "Token_" -> ""
 80 | 	]
 81 | 
 82 | (* generatedCPPDir = FileNameJoin[{buildDir, "generated", "rust"}] *)
 83 | generatedCPPDir = FileNameJoin[{srcDir, "crates", "wolfram-parser", "src", "generated"}]
 84 | generatedCPPIncludeDir = FileNameJoin[{generatedCPPDir}]
 85 | generatedCPPSrcDir = FileNameJoin[{generatedCPPDir}]
 86 | 
 87 | dataDir := dataDir = FileNameJoin[{srcDir, "CodeParser", "Data"}]
 88 | 
 89 | importedPrefixParselets := importedPrefixParselets = Get[FileNameJoin[{dataDir, "PrefixParselets.wl"}]]
 90 | 
 91 | importedInfixParselets := importedInfixParselets = Get[FileNameJoin[{dataDir, "InfixParselets.wl"}]]
 92 | 
 93 | importedLongNames := importedLongNames = Get[FileNameJoin[{dataDir, "LongNames.wl"}]]
 94 | 
 95 | importedPrecedenceSource := importedPrecedenceSource = Get[FileNameJoin[{dataDir, "Precedence.wl"}]]
 96 | 
 97 | FatalError[args___] := (
 98 | 	Print["\n\nFATAL ERROR: ", args, "\n\n"];
 99 | 
100 | 	Exit[-1]
101 | )
102 | 
103 | End[]
104 | 
105 | EndPackage[]
106 | 


--------------------------------------------------------------------------------
/docs/fuzz-testing.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Fuzz testing with AFL++
  3 | 
  4 | https://aflplus.plus/
  5 | 
  6 | 
  7 | 
  8 | ## Building AFL++
  9 | 
 10 | 
 11 | ### prerequisite: coreutils is installed
 12 | 
 13 | the install of AFL++ assumes to be using `install` command from coreutils
 14 | 
 15 | 
 16 | this is the bad `install`:
 17 | ```
 18 | % which install
 19 | /usr/bin/install
 20 | ```
 21 | 
 22 | ```
 23 | brew install coreutils
 24 | ```
 25 | 
 26 | you will see:
 27 | ```
 28 | Commands also provided by macOS and the commands dir, dircolors, vdir have been installed with the prefix "g".
 29 | If you need to use these commands with their normal names, you can add a "gnubin" directory to your PATH with:
 30 |   PATH="/usr/local/opt/coreutils/libexec/gnubin:$PATH"
 31 | ```
 32 | 
 33 | do what it says:
 34 | ```
 35 | % export PATH="/usr/local/opt/coreutils/libexec/gnubin:$PATH"
 36 | ```
 37 | 
 38 | or:
 39 | ```
 40 | % export PATH="/opt/homebrew/opt/coreutils/libexec/gnubin:$PATH"
 41 | ```
 42 | 
 43 | this is the good `install`:
 44 | ```
 45 | % which install                                              
 46 | /usr/local/opt/coreutils/libexec/gnubin/install
 47 | ```
 48 | 
 49 | 
 50 | ### prerequisite: LLVM clang is installed
 51 | 
 52 | AFL++ assumes to be using LLVM clang and NOT Apple clang
 53 | 
 54 | this is the bad clang:
 55 | ```
 56 | % which clang
 57 | /usr/bin/clang
 58 | ```
 59 | 
 60 | ```
 61 | brew install llvm
 62 | ```
 63 | 
 64 | you will see:
 65 | ```
 66 | If you need to have llvm first in your PATH, run:
 67 |   echo 'export PATH="/usr/local/opt/llvm/bin:$PATH"' >> ~/.zshrc
 68 | ```
 69 | 
 70 | do what it says:
 71 | ```
 72 | % export PATH="/usr/local/opt/llvm/bin:$PATH"
 73 | ```
 74 | 
 75 | or:
 76 | ```
 77 | % export PATH="/opt/homebrew/opt/llvm/bin:$PATH"
 78 | ```
 79 | 
 80 | this is the good `clang`:
 81 | ```
 82 | % which clang                                
 83 | /usr/local/opt/llvm/bin/clang
 84 | ```
 85 | 
 86 | 
 87 | ### building AFL++
 88 | 
 89 | https://aflplus.plus/building/
 90 | 
 91 | 
 92 | ```
 93 | git clone https://github.com/AFLplusplus/AFLplusplus
 94 | 
 95 | cd AFLplusplus
 96 | 
 97 | make clean
 98 | 
 99 | make distrib
100 | 
101 | sudo make install
102 | ```
103 | 
104 | Verify afl-fuzz is installed:
105 | ```
106 | % which afl-fuzz                                                                                                                                     
107 | /usr/local/bin/afl-fuzz
108 | ```
109 | 
110 | 
111 | ## Building CodeParser
112 | 
113 | ```
114 | mkdir build-afl
115 | 
116 | cd build-afl
117 | 
118 | cmake -DTRANSPORT=None-DBUILD_EXE=ON -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_COMPILER=afl-clang-fast  -DCMAKE_CXX_COMPILER=afl-clang-fast++ -DMATHEMATICA_INSTALL_DIR=/Applications/Mathematica.app/Contents ..
119 | 
120 | cmake --build . --target codeparser-exe
121 | ```
122 | 
123 | 
124 | ## Running CodeParser with AFL++
125 | 
126 | ```
127 | cd build-afl
128 | 
129 | rm -rf afl_out
130 | 
131 | afl-fuzz -i ../Tests/files/small -o afl_out/ -x ../Tests/wl.dict -D -- cpp/src/exe/codeparser -file @@
132 | ```
133 | 
134 | 
135 | ## Troubleshooting
136 | 
137 | Might get this:
138 | ```
139 | [-]  SYSTEM ERROR : shmget() failed, try running afl-system-config
140 |     Stop location : afl_shm_init(), src/afl-sharedmem.c:252
141 |        OS message : Invalid argument
142 | ```
143 | 
144 | do what it says and run:
145 | ```
146 | sudo afl-system-config
147 | ```
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 


--------------------------------------------------------------------------------
/CodeParser/Generate/Precedence.wl:
--------------------------------------------------------------------------------
  1 | (* ::Package::"Tags"-><|"SuspiciousSessionSymbol" -> <|Enabled -> False|>|>:: *)
  2 | 
  3 | If[!MemberQ[$Path, #], PrependTo[$Path, #]]&[DirectoryName[$InputFileName, 3]]
  4 | 
  5 | BeginPackage["CodeParser`Generate`Precedence`"]
  6 | 
  7 | Begin["`Private`"]
  8 | 
  9 | (*
 10 | Do not allow PacletManager to participate in finding `Generate` files
 11 | 
 12 | PacletManager will find e.g. CodeParser/Kernel/TokenEnum.wl when asked to find CodeParser`Generate`TokenEnum`
 13 | 
 14 | related issues: PACMAN-54
 15 | *)
 16 | Block[{Internal`PacletFindFile = Null&},
 17 | Needs["CodeParser`Generate`Common`"];
 18 | Needs["CodeTools`Generate`GenerateSources`"];
 19 | ]
 20 | 
 21 | 
 22 | checkBuildDir[]
 23 | 
 24 | 
 25 | associativityToValue[Associativity`NonRight] = 0
 26 | associativityToValue[Associativity`Right] = 1
 27 | 
 28 | 
 29 | generate[] := (
 30 | 
 31 | Print["Generating Precedence..."];
 32 | 
 33 | If[FailureQ[importedPrecedenceSource],
 34 |   Print[importedPrecedenceSource];
 35 |   Quit[1]
 36 | ];
 37 | 
 38 | (*
 39 | resolve the symbolic values in the Precedence table to integer values
 40 | *)
 41 | cur = {0, Associativity`NonRight};
 42 | enumMap = <||>;
 43 | KeyValueMap[(
 44 |   Which[
 45 |     Head[#2] === Symbol, cur = enumMap[#2],
 46 |     Head[#2[[1]]] === Integer, cur = #2,
 47 |     #2[[1]] === Next, cur[[1]]++;cur[[2]] = #2[[2]],
 48 |     True, Print["Unhandled precedence"]; Quit[1]
 49 |   ];
 50 |   AssociateTo[enumMap, #1 -> cur])&
 51 |   ,
 52 |   importedPrecedenceSource
 53 | ];
 54 | 
 55 | 
 56 | (*
 57 | sanity check that all precedences are in order
 58 | *)
 59 | cur = -Infinity;
 60 | KeyValueMap[
 61 |   If[!TrueQ[#2[[1]] >= cur],
 62 |     Print["Precedence is out of order: ", #1 -> #2];
 63 |     Quit[1]
 64 |     ,
 65 |     cur = #2[[1]]
 66 |   ]&
 67 |   ,
 68 |   enumMap
 69 | ];
 70 | 
 71 | 
 72 | precedenceCPPHeader = {
 73 | "\
 74 | //
 75 | // AUTO GENERATED FILE
 76 | // DO NOT MODIFY
 77 | //
 78 | 
 79 | #![allow(dead_code)]
 80 | 
 81 | use crate::precedence::Precedence;
 82 | 
 83 | impl Precedence {\
 84 | "} ~Join~
 85 | 	KeyValueMap[
 86 | 		{key, value} |-> Row[{
 87 | 			"\tpub const ",
 88 | 			toGlobal[key, "DefinePrecedence"],
 89 | 			": Precedence = Precedence::new(",
 90 | 			BitShiftLeft[value[[1]], 1] + associativityToValue[value[[2]]],
 91 | 			"); // prec: ", value[[1]], ", assoc: ", value[[2]]
 92 | 		}],
 93 | 		enumMap
 94 | 	]
 95 | ~Join~ {
 96 | 	"\n}"
 97 | };
 98 | 
 99 | Print["exporting Precedence.h"];
100 | res = Export[FileNameJoin[{generatedCPPIncludeDir, "precedence_values.rs"}], Column[precedenceCPPHeader], "String"];
101 | 
102 | Print[res];
103 | 
104 | If[FailureQ[res],
105 |   Quit[1]
106 | ];
107 | 
108 | precedenceWL = {
109 | "
110 | (*
111 | AUTO GENERATED FILE
112 | DO NOT MODIFY
113 | *)
114 | 
115 | <|"} ~Join~
116 |   KeyValueMap[(Row[{#1, " -> ", BitShiftLeft[#2[[1]], 1] + associativityToValue[#2[[2]]], ",", "(* prec: ", #2[[1]], ", assoc: ", #2[[2]], " *)"}])&, enumMap] ~Join~ {
117 | "Nothing
118 | |>
119 | "
120 | };
121 | 
122 | Print["exporting Precedence.wl"];
123 | res = Export[FileNameJoin[{buildDir, "paclet", "CodeParser", "Resources", "Generated", "Precedence.wl"}], Column[precedenceWL], "String"];
124 | 
125 | Print[res];
126 | 
127 | If[FailureQ[res],
128 |   Quit[1]
129 | ];
130 | 
131 | Print["Done Precedence"]
132 | )
133 | 
134 | If[!StringQ[script],
135 |   Quit[1]
136 | ]
137 | If[AbsoluteFileName[script] === AbsoluteFileName[$InputFileName],
138 | generate[]
139 | ]
140 | 
141 | End[]
142 | 
143 | EndPackage[]
144 | 


--------------------------------------------------------------------------------
/CodeParser/Kernel/Definitions.wl:
--------------------------------------------------------------------------------
  1 | BeginPackage["CodeParser`Definitions`"]
  2 | 
  3 | 
  4 | DefinitionSymbols
  5 | 
  6 | 
  7 | Begin["`Private`"]
  8 | 
  9 | Needs["CodeParser`"]
 10 | Needs["CodeParser`Utils`"]
 11 | 
 12 | 
 13 | (*
 14 | given an LHS AST node, determine the symbol that gives the definition
 15 | *)
 16 | 
 17 | DefinitionSymbols[n:LeafNode[Symbol, _, _]] := {n}
 18 | 
 19 | (*
 20 | this is really a definition for Subscript
 21 | 
 22 | adhere to principle of not re-implementing MakeExpression and do not try to refine
 23 | *)
 24 | DefinitionSymbols[n:BoxNode[SubscriptBox, _, _]] := {n}
 25 | 
 26 | (*
 27 | this is really a definition for Power, SuperStar, etc.
 28 | 
 29 | SuperscriptBox["a", "b"] is a definition for Power
 30 | 
 31 | SuperscriptBox["a", "*"] is a definition for SuperStar
 32 | 
 33 | adhere to principle of not re-implementing MakeExpression and do not try to refine
 34 | *)
 35 | DefinitionSymbols[n:BoxNode[SuperscriptBox, _, _]] := {n}
 36 | 
 37 | 
 38 | DefinitionSymbols[LeafNode[_, _, _]] := {}
 39 | DefinitionSymbols[ErrorNode[_, _, _]] := {}
 40 | DefinitionSymbols[AbstractSyntaxErrorNode[_, _, _]] := {}
 41 | DefinitionSymbols[PrefixNode[PrefixLinearSyntaxBang, _, _]] := {}
 42 | 
 43 | 
 44 | DefinitionSymbols[CallNode[LeafNode[Symbol, "Condition", _], {node_, _}, _]] := DefinitionSymbols[node]
 45 | DefinitionSymbols[CallNode[LeafNode[Symbol, "Pattern", _], {_, node_}, _]] := DefinitionSymbols[node]
 46 | DefinitionSymbols[CallNode[LeafNode[Symbol, "PatternTest", _], {node_, _}, _]] := DefinitionSymbols[node]
 47 | DefinitionSymbols[CallNode[LeafNode[Symbol, "HoldPattern", _], {node_}, _]] := DefinitionSymbols[node]
 48 | 
 49 | DefinitionSymbols[CallNode[LeafNode[Symbol, "Attributes", _], {node_}, _]] := DefinitionSymbols[node]
 50 | DefinitionSymbols[CallNode[LeafNode[Symbol, "Format", _], {node_}, _]] := DefinitionSymbols[node]
 51 | DefinitionSymbols[CallNode[LeafNode[Symbol, "Options", _], {node_}, _]] := DefinitionSymbols[node]
 52 | DefinitionSymbols[CallNode[LeafNode[Symbol, "MessageName", _], {node_, _, ___}, _]] := DefinitionSymbols[node]
 53 | 
 54 | DefinitionSymbols[CallNode[LeafNode[Symbol, "Blank", _], {node_}, _]] := DefinitionSymbols[node]
 55 | 
 56 | (*
 57 | Something like a /: (b|c)[a] := d
 58 | 
 59 | When scanning over (b|c)[a], we want to treat both b and c as definitions
 60 | *)
 61 | DefinitionSymbols[CallNode[LeafNode[Symbol, "Alternatives", _], children_, _]] :=
 62 | Catch[
 63 | Module[{defs},
 64 | 
 65 |   defs = DefinitionSymbols /@ children;
 66 | 
 67 |   If[AnyTrue[defs, FailureQ],
 68 |     Throw[SelectFirst[defs, FailureQ]]
 69 |   ];
 70 | 
 71 |   Flatten[defs]
 72 | ]]
 73 | 
 74 | DefinitionSymbols[CallNode[LeafNode[Symbol, "List", _], children_, _]] :=
 75 | Catch[
 76 | Module[{defs},
 77 | 
 78 |   defs = DefinitionSymbols /@ children;
 79 | 
 80 |   If[AnyTrue[defs, FailureQ],
 81 |     Throw[SelectFirst[defs, FailureQ]]
 82 |   ];
 83 | 
 84 |   Flatten[defs]
 85 | ]]
 86 | 
 87 | DefinitionSymbols[CallNode[node_, _, _]] := DefinitionSymbols[node]
 88 | 
 89 | DefinitionSymbols[SyntaxErrorNode[_, _, _]] := {}
 90 | 
 91 | DefinitionSymbols[args___] :=
 92 |   Failure["Unhandled", <| "Function" -> DefinitionSymbols, "Arguments" -> HoldForm[{args}] |>]
 93 | 
 94 | 
 95 | 
 96 | 
 97 | (*
 98 | DeclarationName is appropriate for when you want a single name string
 99 | 
100 | If there are 0 names or if there is more than 1 name, then a Failure is returned
101 | *)
102 | DeclarationName[node_] :=
103 | Catch[
104 | Module[{syms},
105 |   
106 |   syms = DefinitionSymbols[node];
107 | 
108 |   If[empty[syms],
109 |     Throw[Failure["NoDefinitions", <| "Node" -> node |>]]
110 |   ];
111 | 
112 |   If[Length[syms] > 1,
113 |     Throw[Failure["TooManyDefinitions", <| "Node" -> node |>]]
114 |   ];
115 | 
116 |   syms[[1, 2]]
117 | ]]
118 | 
119 | 
120 | 
121 | End[]
122 | 
123 | EndPackage[]
124 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/parse/parselet/times_parselet.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     panic_if_aborted,
  3 |     parse::{parselet::*, ParserSession},
  4 |     precedence::Precedence,
  5 |     tokenize::{TokenKind, TokenRef},
  6 | };
  7 | 
  8 | 
  9 | impl<'i, B: ParseBuilder<'i> + 'i> InfixParselet<'i, B> for TimesParselet {
 10 |     fn parse_infix(
 11 |         &self,
 12 |         session: &mut ParserSession<'i, B>,
 13 |         first_node: B::Node,
 14 |         trivia1: B::TriviaHandle,
 15 |         tok_in: TokenRef<'i>,
 16 |     ) -> B::Node {
 17 |         panic_if_aborted!();
 18 | 
 19 |         let tok_in = session.push_syntax_and_next(tok_in);
 20 | 
 21 |         let mut infix_state =
 22 |             session.begin_infix(InfixOperator::Times, first_node);
 23 | 
 24 |         //
 25 |         // Unroll 1 iteration of the loop because we know that tok_in has already been read
 26 |         //
 27 | 
 28 |         let (trivia2, tok2) = session.current_token_eat_trivia();
 29 | 
 30 |         let second_node = session.parse_prefix(tok2);
 31 | 
 32 |         session.builder.infix_add(
 33 |             &mut infix_state,
 34 |             trivia1,
 35 |             tok_in,
 36 |             trivia2,
 37 |             second_node,
 38 |         );
 39 | 
 40 |         return TimesParselet::parse_loop(session, infix_state);
 41 |     }
 42 | 
 43 |     fn getOp(&self) -> InfixParseletOperator {
 44 |         return InfixOperator::Times.into();
 45 |     }
 46 | 
 47 |     fn getPrecedence(&self, _: &ParserSession<'i, B>) -> Option<Precedence> {
 48 |         return Some(Precedence::STAR);
 49 |     }
 50 | }
 51 | 
 52 | impl TimesParselet {
 53 |     fn parse_loop<'i, B: ParseBuilder<'i> + 'i>(
 54 |         session: &mut ParserSession<'i, B>,
 55 |         mut infix_state: B::InfixParseState,
 56 |     ) -> B::Node {
 57 |         loop {
 58 |             panic_if_aborted!();
 59 | 
 60 | 
 61 |             let (mut trivia1, mut tok1) = session.current_token();
 62 | 
 63 |             tok1 = session.do_process_implicit_times(tok1);
 64 | 
 65 |             if tok1.tok == TokenKind::Fake_ImplicitTimes {
 66 |                 //
 67 |                 // implicit Times should not cross toplevel newlines
 68 |                 //
 69 |                 // so reset and try again
 70 |                 //
 71 | 
 72 |                 session.trivia_reset(trivia1);
 73 | 
 74 |                 (trivia1, tok1) = session
 75 |                     .current_token_eat_trivia_but_not_toplevel_newlines_into();
 76 | 
 77 |                 tok1 = session.do_process_implicit_times(tok1)
 78 |             }
 79 | 
 80 |             //
 81 |             // Cannot just compare tokens
 82 |             //
 83 |             // May be something like  a * b c \[Times] d
 84 |             //
 85 |             // and we want only a single Infix node created
 86 |             //
 87 | 
 88 |             let tok1_op =
 89 |                 B::with_infix_parselet(tok1.tok, |parselet| parselet.getOp());
 90 | 
 91 |             if tok1_op
 92 |                 != <TimesParselet as InfixParselet<B>>::getOp(&TimesParselet {})
 93 |             {
 94 |                 //
 95 |                 // Tok.tok != tok_in.tok, so break
 96 |                 //
 97 | 
 98 |                 session.trivia_reset(trivia1);
 99 | 
100 |                 let node = session.reduce_infix(infix_state);
101 | 
102 |                 // MUSTTAIL
103 |                 return session.parse_climb(node);
104 |             }
105 | 
106 |             let (trivia1, tok1) = session.commit_syntax_and_next(trivia1, tok1);
107 | 
108 |             let (trivia2, Tok2) = session.current_token_eat_trivia();
109 | 
110 |             let operand = session.parse_prefix(Tok2);
111 | 
112 |             session.builder.infix_add(
113 |                 &mut infix_state,
114 |                 trivia1,
115 |                 tok1,
116 |                 trivia2,
117 |                 operand,
118 |             );
119 |         } // loop
120 |     }
121 | }
122 | 


--------------------------------------------------------------------------------
/Tests/ToString.mt:
--------------------------------------------------------------------------------
  1 | Print["\n===== Start ToString.mt =====\n"]
  2 | 
  3 | Needs["CodeParser`"]
  4 | Needs["CodeParser`Abstract`"]
  5 | Needs["CodeParser`ToString`"] (* ToInputFormString *)
  6 | 
  7 | 
  8 | Test[
  9 | 	ToInputFormString[Aggregate[CodeConcreteParse["1+1"]]]
 10 | 	,
 11 | 	" 1 + 1 "
 12 | 	,
 13 | 	TestID->"ToString-20181230-P1F9Q9"
 14 | ]
 15 | 
 16 | Test[
 17 | 	ToInputFormString[Aggregate[CodeConcreteParse["_ + __ * ___"]]]
 18 | 	,
 19 | 	" _ +  __*___  "
 20 | 	,
 21 | 	TestID->"ToString-20181230-S7R9U8"
 22 | ]
 23 | 
 24 | Test[
 25 | 	ToInputFormString[Aggregate[CodeConcreteParse["% ^ # ^ ## ^ f''[x]"]]]
 26 | 	,
 27 | 	" %^ #^ ##^  f' ' [x]   "
 28 | 	,
 29 | 	TestID->"ToString-20181230-E6E4O1"
 30 | ]
 31 | 
 32 | 
 33 | Test[
 34 | 	ToInputFormString[Aggregate[CodeConcreteParse["@"]]]
 35 | 	,
 36 | 	" @ "
 37 | 	,
 38 | 	TestID->"ToString-20181230-V8O8B1"
 39 | ]
 40 | 
 41 | Test[
 42 | 	ToInputFormString[Aggregate[CodeConcreteParse["{a_b, c__d, e___f, _., g_.}"]]]
 43 | 	,
 44 | 	"{ a_b,c__d,e___f,_.,g_. }"
 45 | 	,
 46 | 	TestID->"ToString-20181230-U1H3E1"
 47 | ]
 48 | 
 49 | 
 50 | Test[
 51 | 	ToInputFormString[Aggregate[CodeConcreteParse["aaa - bbb + ccc - !ddd"]]]
 52 | 	,
 53 | 	" aaa - bbb + ccc -  !ddd  "
 54 | 	,
 55 | 	TestID->"ToString-20181230-Z9F3L8"
 56 | ]
 57 | 
 58 | 
 59 | 
 60 | Test[
 61 | 	ToInputFormString[Aggregate[CodeConcreteParse["a::b::c"]]]
 62 | 	,
 63 | 	" a::b::c "
 64 | 	,
 65 | 	TestID->"ToString-20181230-P0K1Y7"
 66 | ]
 67 | 
 68 | Test[
 69 | 	ToInputFormString[Aggregate[CodeConcreteParse["a /: b := c"]]]
 70 | 	,
 71 | 	" a/:b:=c "
 72 | 	,
 73 | 	TestID->"ToString-20181230-H9T6O8"
 74 | ]
 75 | 
 76 | 
 77 | Test[
 78 | 	ToInputFormString[Aggregate[CodeConcreteParse["##&"]]]
 79 | 	,
 80 | 	" ##& "
 81 | 	,
 82 | 	TestID->"ToString-20181230-A2F7W1"
 83 | ]
 84 | 
 85 | 
 86 | Test[
 87 | 	ToInputFormString[Aggregate[CodeConcreteParse["f[]"]]]
 88 | 	,
 89 | 	"f[]"
 90 | 	,
 91 | 	TestID->"ToString-20181230-R5Q3J4"
 92 | ]
 93 | 
 94 | Test[
 95 | 	ToInputFormString[Aggregate[CodeConcreteParse["f["]]]
 96 | 	,
 97 | 	"f["
 98 | 	,
 99 | 	TestID->"ToString-20181230-T4A0R3"
100 | ]
101 | 
102 | 
103 | Test[
104 | 	ToInputFormString[Aggregate[CodeConcreteParse["f[[4]]"]]]
105 | 	,
106 | 	"f[[4]]"
107 | 	,
108 | 	TestID->"ToString-20181230-C6W4M5"
109 | ]
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | Test[
121 | 	ToInputFormString[Aggregate[CodeConcreteParse["\\(x\\)"]]]
122 | 	,
123 | 	"\\(x\\)"
124 | 	,
125 | 	TestID->"ToString-20181230-U6K9Q7"
126 | ]
127 | 
128 | Test[
129 | 	ToInputFormString[Aggregate[CodeConcreteParse["\\(x,y\\)"]]]
130 | 	,
131 | 	"\\(x,y\\)"
132 | 	,
133 | 	TestID->"ToString-20181231-U3W4B3"
134 | ]
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | Test[
151 | 	ToInputFormString[Aggregate[CodeConcreteParse["a& & + b"]]]
152 | 	,
153 | 	"   a& &  + b "
154 | 	,
155 | 	TestID->"ToString-20181231-F0J3L4"
156 | ]
157 | 
158 | 
159 | 
160 | 
161 | 
162 | 
163 | (*
164 | Error handling
165 | *)
166 | Test[
167 | 	ToInputFormString[Aggregate[CodeConcreteParse["A B:C:.Ne"]]]
168 | 	,
169 | 	" A   B:C :  . Ne   "
170 | 	,
171 | 	TestID->"ToString-20190523-V1I4S4"
172 | ]
173 | 
174 | 
175 | Test[
176 | 	ToInputFormString[Aggregate[CodeConcreteParse["a:"]]]
177 | 	,
178 | 	" a: "
179 | 	,
180 | 	TestID->"ToString-20190523-H5C9J2"
181 | ]
182 | 
183 | 
184 | 
185 | 
186 | 
187 | 
188 | Test[
189 | 	StringJoin[ToSourceCharacterString /@ CodeConcreteParse["{]", ContainerNode -> (#[[1]]&)]]
190 | 	,
191 | 	"{]"
192 | 	,
193 | 	TestID->"ToString-20190926-T4I8S1"
194 | ]
195 | 
196 | 
197 | ast = CodeParse["a // -1"]
198 | 
199 | Test[
200 | 	ToFullFormString[ast]
201 | 	,
202 | 	"(-1)[a]"
203 | 	,
204 | 	TestID->"ToString-20200315-Z6K8C4"
205 | ]
206 | 
207 | 
208 | ast = CodeParse["{\"a\", \\\n\"b\"}"]
209 | 
210 | Test[
211 | 	ToFullFormString[ast]
212 | 	,
213 | 	"List[\"a\", \"b\"]"
214 | 	,
215 | 	TestID->"ToString-20200601-D9Z8Z8"
216 | ]
217 | 
218 | 
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/parse/parselet/under_parselet.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     panic_if_aborted,
  3 |     parse::{
  4 |         operators::CompoundOperator, parselet::*, ParserSession, UnderParseData,
  5 |     },
  6 |     tokenize::{TokenKind, TokenRef},
  7 | };
  8 | 
  9 | impl UnderParselet {
 10 |     pub(crate) const fn new(
 11 |         BOp: CompoundOperator,
 12 |         PBOp: CompoundOperator,
 13 |     ) -> Self {
 14 |         Self { BOp, PBOp }
 15 |     }
 16 | }
 17 | 
 18 | impl<'i, B: ParseBuilder<'i> + 'i> PrefixParselet<'i, B> for UnderParselet {
 19 |     fn parse_prefix(
 20 |         &self,
 21 |         session: &mut ParserSession<'i, B>,
 22 |         tok_in: TokenRef<'i>,
 23 |     ) -> B::Node {
 24 |         //
 25 |         // prefix
 26 |         //
 27 |         // Something like  _  or  _a
 28 |         //
 29 | 
 30 |         let node = self.get_parse_under_context_sensitive(session, tok_in);
 31 | 
 32 |         let node = session.builder.push_compound_blank(node);
 33 | 
 34 |         // MUSTTAIL
 35 |         return session.parse_climb(node);
 36 |     }
 37 | }
 38 | 
 39 | impl UnderParselet {
 40 |     pub(in crate::parse) fn get_parse_infix_context_sensitive<
 41 |         'i,
 42 |         B: ParseBuilder<'i> + 'i,
 43 |     >(
 44 |         &self,
 45 |         session: &mut ParserSession<'i, B>,
 46 |         tok_in: TokenRef<'i>,
 47 |     ) -> UnderParseData<'i> {
 48 |         //
 49 |         // infix
 50 |         //
 51 |         // Something like  a_b
 52 |         //
 53 | 
 54 |         self.get_parse_under_context_sensitive(session, tok_in)
 55 |     }
 56 | 
 57 |     fn get_parse_under_context_sensitive<'i, B: ParseBuilder<'i> + 'i>(
 58 |         &self,
 59 |         session: &mut ParserSession<'i, B>,
 60 |         tok_in: TokenRef<'i>,
 61 |     ) -> UnderParseData<'i> {
 62 |         panic_if_aborted!();
 63 | 
 64 |         tok_in.skip(&mut session.tokenizer);
 65 | 
 66 |         let tok = session.tokenizer.peek_token();
 67 | 
 68 |         match tok.tok {
 69 |             TokenKind::Symbol => {
 70 |                 //
 71 |                 // Something like
 72 |                 //     prefix:  _b
 73 |                 //      infix:  a_b
 74 |                 //
 75 | 
 76 |                 // Context-sensitive infix parse of Symbol token
 77 |                 //
 78 |                 // Something like  _b
 79 |                 //                  ^
 80 |                 // We know we are already in the middle of parsing _
 81 |                 //
 82 |                 // Just push this symbol
 83 |                 //
 84 |                 tok.skip(&mut session.tokenizer);
 85 | 
 86 |                 UnderParseData::UnderSymbol {
 87 |                     op: self.BOp,
 88 |                     under: tok_in,
 89 |                     symbol: tok,
 90 |                 }
 91 |             },
 92 | 
 93 |             TokenKind::Error_ExpectedLetterlike => {
 94 |                 //
 95 |                 // Something like:
 96 |                 //     prefix:  _a`   (TID:231016/1)
 97 |                 //      infix:  a_b`  (TID:231016/2)
 98 |                 //
 99 |                 // It's nice to include the error inside of the blank
100 |                 //
101 | 
102 |                 tok.skip(&mut session.tokenizer);
103 | 
104 |                 UnderParseData::UnderSymbol {
105 |                     op: self.BOp,
106 |                     under: tok_in,
107 |                     symbol: tok,
108 |                 }
109 |             },
110 | 
111 |             _ => UnderParseData::Under(tok_in),
112 |         }
113 |     }
114 | }
115 | 
116 | //======================================
117 | // UnderDotParselet
118 | //======================================
119 | 
120 | impl<'i, B: ParseBuilder<'i> + 'i> PrefixParselet<'i, B> for UnderDotParselet {
121 |     fn parse_prefix(
122 |         &self,
123 |         session: &mut ParserSession<'i, B>,
124 |         tok_in: TokenRef<'i>,
125 |     ) -> B::Node {
126 |         //
127 |         // prefix
128 |         //
129 |         // Something like  _.
130 |         //
131 | 
132 |         panic_if_aborted!();
133 | 
134 | 
135 |         let node = session.push_leaf_and_next(tok_in);
136 | 
137 |         // MUSTTAIL
138 |         return session.parse_climb(node);
139 |     }
140 | }
141 | 


--------------------------------------------------------------------------------
/docs/stages.md:
--------------------------------------------------------------------------------
  1 | docs: stages of parser
  2 | 
  3 | 
  4 | 
  5 | 
  6 | 
  7 | bytes
  8 | 
  9 | -> decode bytes ->
 10 | 
 11 | Source characters
 12 | 
 13 | -> decode Source characters ->
 14 | 
 15 | WL characters
 16 | 
 17 | -> tokenize ->
 18 | 
 19 | tokens
 20 | 
 21 | -> parse ->
 22 | 
 23 | concrete nodes
 24 | 
 25 | -> aggregate ->
 26 | 
 27 | aggregate nodes
 28 | 
 29 | -> abstract ->
 30 | 
 31 | abstract nodes
 32 | 
 33 | 
 34 | 
 35 | 
 36 | 
 37 | 
 38 | 
 39 | 
 40 | 
 41 | 
 42 | 
 43 | 
 44 | 
 45 | different levels of syntax
 46 | 
 47 | 
 48 | 
 49 | # boxes
 50 | 
 51 | ```
 52 | RowBox[{"1", "+", RowBox[{"(*", "*)"}], "a"}]
 53 | ```
 54 | 
 55 | ```
 56 | RowBox[{"1", "+", RowBox[{"(*", "*)"}], SqrtBox["a"]}]
 57 | ```
 58 | 
 59 | tree structure of tokens
 60 | 
 61 | no type information
 62 | 
 63 | no Implicit tokens
 64 | 
 65 | Trivia is kept
 66 | 
 67 | 
 68 | 
 69 | ## What is trivia?
 70 | 
 71 | Taken from:
 72 | https://github.com/dotnet/roslyn/blob/master/docs/wiki/Roslyn-Overview.md#syntax-trivia
 73 | 
 74 | comments
 75 | 
 76 | whitespace
 77 | 
 78 | newlines
 79 | 
 80 | trivia is only ever RIFFLED between tokens, never at the beginning or end
 81 | 
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | # concrete
 92 | 
 93 | ```
 94 | InfixNode[Plus, {
 95 | 	LeafNode[Integer, "1", <||>],
 96 | 	LeafNode[Token`Plus, "+", <||>],
 97 | 	LeafNode[Token`Comment, "(**)", <||>],
 98 | 	LeafNode[Symbol, "a", <||>] }, <||>]
 99 | 
100 | InfixNode[Plus, {
101 | 	LeafNode[Integer, "1", <||>],
102 | 	LeafNode[Token`Plus, "+", <||>],
103 | 	LeafNode[Token`Comment, "(**)", <||>],
104 | 	LeafNode[SqrtBox, "a", <||>] }, <||>]
105 | ```
106 | 
107 | Trivia is kept
108 | 
109 | type information is added
110 | type information is the wrapper like InfixNode[Plus, ...]
111 | 
112 | and also type information is all of the riffled tokens InfixNode[Plus, { 1, +, 2, \[ImplicitPlus], 3 }]
113 | 
114 | 
115 | 
116 | Implicit tokens are added
117 | 
118 | 
119 | ## What are Implicit tokens?
120 | 
121 | 
122 | when parsing   ;;   it is convenient to remember the implicit   1 ;; All
123 | 
124 | when parsing a; ;  it is convenient to remember the implicit a ; Null ; Null
125 | 
126 | 
127 | 
128 | implicit Times
129 | 
130 | when parsing   a b   it is convenient to remember the implicit   a ImplicitTimes b
131 | 
132 | 
133 | 
134 | concrete syntax is everything
135 | 
136 | concrete syntax has CallNode[{head, comment}, {child1}]
137 | 
138 | concrete syntax has InfixNode[Plus, {1, +, comment, 1}]
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 
152 | 
153 | # aggregate
154 | 
155 | ```
156 | InfixNode[Plus, {
157 | 	LeafNode[Integer, "1", <||>],
158 | 	LeafNode[Token`Plus, "+", <||>],
159 | 	LeafNode[Symbol, "a", <||>] }, <||>]
160 | 
161 | InfixNode[Plus, {
162 | 	LeafNode[Integer, "1", <||>],
163 | 	LeafNode[Token`Plus, "+", <||>],
164 | 	LeafNode[SqrtBox, "a", <||>] }, <||>]
165 | ```
166 | 
167 | type information is kept
168 | 
169 | Implicit tokens are kept
170 | 
171 | Trivia is removed
172 | 
173 | aggregate syntax
174 | 
175 | aggregate removes comments, whitespace, and newlines
176 | 
177 | aggregate syntax has CallNode[head, {child1}]
178 | 
179 | aggregate syntax has InfixNode[Plus, {1, +, 1}]
180 | 
181 | 
182 | 
183 | 
184 | 
185 | 
186 | 
187 | 
188 | 
189 | 
190 | 
191 | 
192 | 
193 | 
194 | 
195 | # abstract
196 | 
197 | ```
198 | CallNode[LeafNode[Symbol, "Plus", <||>], {
199 | 				LeafNode[Integer, "1", <||>],
200 | 				LeafNode[Symbol, "a", <||>] }, <||>]
201 | 
202 | CallNode[LeafNode[Symbol, "Plus", <||>], {
203 | 				LeafNode[Integer, "1", <||>],
204 | 				LeafNode[SqrtBox, "a", <||>] }, <||>]
205 | ```
206 | 
207 | everything is a Call
208 | 
209 | type information is lost because everything is a CallNode
210 | 
211 | Implicit tokens are converted to actual tokens
212 | 
213 | abstract syntax
214 | 
215 | abstract syntax has CallNode[head, {child1}]
216 | 
217 | abstract syntax has CallNode[Plus, {1, 1}]
218 | 
219 | 
220 | 
221 | 
222 | 
223 | # further work that could be done
224 | 
225 | * removing line continuations
226 | 
227 | * converting characters (e.g., \[Infinity] and \[Degree]) to symbols (e.g., Infinity and Degree)
228 | 
229 | * removing \< \> from strings
230 | 
231 | * more?
232 | 
233 | 
234 | 
235 | 
236 | 
237 | 
238 | 
239 | 
240 | 
241 | 
242 | 


--------------------------------------------------------------------------------
/Tests/Error.mt:
--------------------------------------------------------------------------------
  1 | Print["\n===== Start Error.mt =====\n"]
  2 | 
  3 | path = FileNameJoin[{DirectoryName[$CurrentTestSource], "CodeParserTestUtils"}]
  4 | PrependTo[$Path, path]
  5 | 
  6 | Needs["CodeParserTestUtils`"]
  7 | 
  8 | Needs["CodeParser`"]
  9 | Needs["CodeParser`Utils`"]
 10 | 
 11 | 
 12 | 
 13 | 
 14 | (*
 15 | 
 16 | Tests related to Error.wl:
 17 | 
 18 | Handling unterminated groups
 19 | Handling unterminated tokens
 20 | 
 21 | Chunks
 22 | 
 23 | etc.
 24 | 
 25 | *)
 26 | 
 27 | 
 28 | TestMatch[
 29 | 	CodeConcreteParse["\"\n", SourceConvention -> "LineColumn"]
 30 | 	,
 31 | 	ContainerNode[String, {
 32 | 		ErrorNode[Token`Error`UnterminatedString, "\"", <|Source -> {{1, 1}, {1, 2}}|>]}, _]
 33 | 	,
 34 | 	TestID->"Error-20210118-C0F5T5"
 35 | ]
 36 | 
 37 | TestMatch[
 38 | 	CodeConcreteParse["\"\r", SourceConvention -> "LineColumn"]
 39 | 	,
 40 | 	ContainerNode[String, {
 41 | 		ErrorNode[Token`Error`UnterminatedString, "\"", <|Source -> {{1, 1}, {1, 2}}|>]}, _]
 42 | 	,
 43 | 	TestID->"Error-20210118-R8T2P0"
 44 | ]
 45 | 
 46 | TestMatch[
 47 | 	CodeConcreteParse["\"\r\n", SourceConvention -> "LineColumn"]
 48 | 	,
 49 | 	ContainerNode[String, {
 50 | 		ErrorNode[Token`Error`UnterminatedString, "\"", <|Source -> {{1, 1}, {1, 2}}|>]}, _]
 51 | 	,
 52 | 	TestID->"Error-20210118-T0V9F9"
 53 | ]
 54 | 
 55 | TestMatch[
 56 | 	CodeConcreteParse["\"\n", SourceConvention -> "SourceCharacterIndex"]
 57 | 	,
 58 | 	ContainerNode[String, {
 59 | 		ErrorNode[Token`Error`UnterminatedString, "\"\n", <|Source -> {1, 2}|>]}, _]
 60 | 	,
 61 | 	TestID->"Error-20210118-O5Q6Y0"
 62 | ]
 63 | 
 64 | TestMatch[
 65 | 	CodeConcreteParse["\"\r", SourceConvention -> "SourceCharacterIndex"]
 66 | 	,
 67 | 	ContainerNode[String, {
 68 | 		ErrorNode[Token`Error`UnterminatedString, "\"\r", <|Source -> {1, 2}|>]}, _]
 69 | 	,
 70 | 	TestID->"Error-20210118-W7L4K8"
 71 | ]
 72 | 
 73 | TestMatch[
 74 | 	CodeConcreteParse["\"\r\n", SourceConvention -> "SourceCharacterIndex"]
 75 | 	,
 76 | 	ContainerNode[String, {
 77 | 		ErrorNode[Token`Error`UnterminatedString, "\"\r\n", <|Source -> {1, 3}|>]}, _]
 78 | 	,
 79 | 	TestID->"Error-20210118-R9E3S6"
 80 | ]
 81 | 
 82 | 
 83 | Test[
 84 | 	CodeConcreteParse["\\|110000"]
 85 | 	,
 86 | 	ContainerNode[String, {
 87 | 		ErrorNode[Token`Error`UnhandledCharacter, "\\|110000", <|Source -> {{1, 1}, {1, 9}}|>]}, <|Source -> {{1, 1}, {1, 9}}|>]
 88 | 	,
 89 | 	TestID->"Error-20211104-P0L8Y0"
 90 | ]
 91 | 
 92 | Test[
 93 | 	CodeConcreteParse["\\|FFFFFF"]
 94 | 	,
 95 | 	ContainerNode[String, {
 96 | 		ErrorNode[Token`Error`UnhandledCharacter, "\\|FFFFFF", <|Source -> {{1, 1}, {1, 9}}|>]}, <|Source -> {{1, 1}, {1, 9}}|>]
 97 | 	,
 98 | 	TestID->"Error-20211104-Q2O1J4"
 99 | ]
100 | 
101 | 
102 | Test[
103 | 	CodeConcreteParse["\\\\[Alpa]"]
104 | 	,
105 | 	ContainerNode[String, {
106 | 		ErrorNode[Token`Error`UnhandledCharacter, "\\\\", <|Source -> {{1, 1}, {1, 3}}|>],
107 | 		GroupNode[GroupSquare, {
108 | 			LeafNode[Token`OpenSquare, "[", <|Source -> {{1, 3}, {1, 4}}|>],
109 | 			LeafNode[Symbol, "Alpa", <|Source -> {{1, 4}, {1, 8}}|>],
110 | 			LeafNode[Token`CloseSquare, "]", <|Source -> {{1, 8}, {1, 9}}|>]}, <|Source -> {{1, 3}, {1, 9}}|>]}, <|SyntaxIssues -> {
111 | 		
112 | 		SyntaxIssue["UnrecognizedLongName", "Unrecognized longname: ``\\\\[Alpa]``.", "Error", <|Source -> {{1, 1}, {1, 9}}, ConfidenceLevel -> 0.75, CodeActions -> {CodeAction["Replace with ``\\\\[Alpha]``", ReplaceText, <|Source -> {{1, 1}, {1, 9}}, "ReplacementText" -> "\\\\[Alpha]"|>]}, "AdditionalDescriptions" -> {"``Alpa`` is not a valid long name."}|>]}, Source -> {{1, 1}, {1, 9}}|>]
113 | 	,
114 | 	TestID->"Error-20220709-M4Y7Z3"
115 | ]
116 | 
117 | (*
118 | no warning
119 | *)
120 | Test[
121 | 	CodeConcreteParse["RegularExpression[\"\\\\[a-zA-Z0-9]+\\\\]\"]"]
122 | 	,
123 | 	ContainerNode[String, {
124 | 		CallNode[{LeafNode[Symbol, "RegularExpression", <|Source -> {{1, 1}, {1, 18}}|>]},
125 | 			GroupNode[GroupSquare, {
126 | 				LeafNode[Token`OpenSquare, "[", <|Source -> {{1, 18}, {1, 19}}|>],
127 | 				LeafNode[String, "\"\\\\[a-zA-Z0-9]+\\\\]\"", <|Source -> {{1, 19}, {1, 38}}|>],
128 | 				LeafNode[Token`CloseSquare, "]", <|Source -> {{1, 38}, {1, 39}}|>]}, <|Source -> {{1, 18}, {1, 39}}|>], <|Source -> {{1, 1}, {1, 39}}|>]}, <|Source -> {{1, 1}, {1, 39}}|>]
129 | 	,
130 | 	TestID->"Error-20220711-I6O1H4"
131 | ]
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/quirks.rs:
--------------------------------------------------------------------------------
  1 | use std::{cell::Cell, fmt::Debug};
  2 | 
  3 | thread_local! {
  4 |     // TODO(cleanup): Don't store these settings using error-prone global state.
  5 |     static QUIRK_SETTINGS: Cell<QuirkSettings> =
  6 |         Cell::new(QuirkSettings::const_default());
  7 | }
  8 | 
  9 | 
 10 | #[derive(Debug, Copy, Clone, PartialEq)]
 11 | pub struct QuirkSettings {
 12 |     /// "InfixBinaryAt" quirk
 13 |     ///
 14 |     ///
 15 |     /// The kernel parses `a<>StringJoin@b` as `StringJoin[a, b]`
 16 |     ///
 17 |     /// Most infix operators can be used with this syntax.
 18 |     /// Notably, SameQ and UnsameQ do NOT work with this syntax.
 19 |     ///
 20 |     /// *Related bugs: 365013*
 21 |     pub infix_binary_at: bool,
 22 | 
 23 |     /// "FlattenTimes" quirk
 24 |     ///
 25 |     /// In 12.1 and before:
 26 |     ///
 27 |     /// * `a / b / c` is parsed as `Times[a, Power[b, -1], Power[c, -1]]`
 28 |     /// * `-a / b` is parsed as `Times[-1, a, Power[b, -1]]`
 29 |     ///
 30 |     /// In 12.2 and after:
 31 |     ///
 32 |     /// * `a / b / c` is parsed as `Times[Times[a, Power[b, -1]], Power[c, -1]]`
 33 |     /// * `-a / b` is parsed as `Times[Times[-1, a], Power[b, -1]]`
 34 |     ///
 35 |     /// TODO: when targeting v12.2 as a minimum, remove this quirk
 36 |     ///
 37 |     /// *Related bugs: 57064, 139531, 153875, 160919*
 38 |     pub flatten_times: bool,
 39 | 
 40 |     /// "OldAtAtAt" quirk
 41 |     ///
 42 |     /// Changed in 13.1: `@@@`
 43 |     ///
 44 |     /// In 13.0 and before:
 45 |     ///
 46 |     /// `a @@@ b` parsed as `Apply[a, b, {1}]`
 47 |     ///
 48 |     /// In 13.1 and after:
 49 |     ///
 50 |     /// `a @@@ b` parses as `MapApply[a, b]`
 51 |     pub old_at_at_at: bool,
 52 | }
 53 | 
 54 | pub enum Quirk {
 55 |     /// "InfixBinaryAt" quirk
 56 |     ///
 57 |     ///
 58 |     /// The kernel parses `a<>StringJoin@b` as `StringJoin[a, b]`
 59 |     ///
 60 |     /// Most infix operators can be used with this syntax.
 61 |     /// Notably, SameQ and UnsameQ do NOT work with this syntax.
 62 |     ///
 63 |     /// *Related bugs: 365013*
 64 |     InfixBinaryAt,
 65 | 
 66 |     /// "FlattenTimes" quirk
 67 |     ///
 68 |     /// In 12.1 and before:
 69 |     ///
 70 |     /// * `a / b / c` is parsed as `Times[a, Power[b, -1], Power[c, -1]]`
 71 |     /// * `-a / b` is parsed as `Times[-1, a, Power[b, -1]]`
 72 |     ///
 73 |     /// In 12.2 and after:
 74 |     ///
 75 |     /// * `a / b / c` is parsed as `Times[Times[a, Power[b, -1]], Power[c, -1]]`
 76 |     /// * `-a / b` is parsed as `Times[Times[-1, a], Power[b, -1]]`
 77 |     ///
 78 |     /// TODO: when targeting v12.2 as a minimum, remove this quirk
 79 |     ///
 80 |     /// *Related bugs: 57064, 139531, 153875, 160919*
 81 |     FlattenTimes,
 82 | 
 83 |     /// "OldAtAtAt" quirk
 84 |     ///
 85 |     /// Changed in 13.1: `@@@`
 86 |     ///
 87 |     /// In 13.0 and before:
 88 |     ///
 89 |     /// `a @@@ b` parsed as `Apply[a, b, {1}]`
 90 |     ///
 91 |     /// In 13.1 and after:
 92 |     ///
 93 |     /// `a @@@ b` parses as `MapApply[a, b]`
 94 |     OldAtAtAt,
 95 | }
 96 | 
 97 | impl QuirkSettings {
 98 |     pub const fn const_default() -> Self {
 99 |         Self {
100 |             infix_binary_at: true,
101 |             flatten_times: false,
102 |             old_at_at_at: false,
103 |         }
104 |     }
105 | 
106 |     pub fn flatten_times(self, value: bool) -> Self {
107 |         QuirkSettings {
108 |             flatten_times: value,
109 |             ..self
110 |         }
111 |     }
112 | 
113 |     pub fn infix_binary_at(self, value: bool) -> Self {
114 |         QuirkSettings {
115 |             infix_binary_at: value,
116 |             ..self
117 |         }
118 |     }
119 | 
120 |     pub fn old_at_at_at(self, value: bool) -> Self {
121 |         QuirkSettings {
122 |             old_at_at_at: value,
123 |             ..self
124 |         }
125 |     }
126 | }
127 | 
128 | impl Default for QuirkSettings {
129 |     fn default() -> Self {
130 |         Self::const_default()
131 |     }
132 | }
133 | 
134 | pub fn set_quirks(quirks: QuirkSettings) {
135 |     QUIRK_SETTINGS.set(quirks);
136 | }
137 | 
138 | pub(crate) fn is_quirk_enabled(quirk: Quirk) -> bool {
139 |     let settings = QUIRK_SETTINGS.get();
140 | 
141 |     match quirk {
142 |         Quirk::InfixBinaryAt => settings.infix_binary_at,
143 |         Quirk::FlattenTimes => settings.flatten_times,
144 |         Quirk::OldAtAtAt => settings.old_at_at_at,
145 |     }
146 | }
147 | 


--------------------------------------------------------------------------------
/Tests/TokenErrors.mt:
--------------------------------------------------------------------------------
  1 | Print["\n===== Start TokenErrors.mt =====\n"]
  2 | 
  3 | Needs["CodeParser`"]
  4 | 
  5 | (*
  6 | UnhandledCharacter:
  7 | *)
  8 | 
  9 | Test[
 10 | 	CodeTokenize["\\[SkeletonIndicator]"]
 11 | 	,
 12 | 	{ErrorNode[Token`Error`UnhandledCharacter, "\\[SkeletonIndicator]", <|Source -> {{1, 1}, {1, 21}}|>]}
 13 | 	,
 14 | 	TestID->"TokenErrors-20190520-B1H0A6"
 15 | ]
 16 | 
 17 | Test[
 18 | 	CodeTokenize["\\\""]
 19 | 	,
 20 | 	{ErrorNode[Token`Error`UnhandledCharacter, "\\\"", <|Source -> {{1, 1}, {1, 3}}|>]}
 21 | 	,
 22 | 	TestID->"TokenErrors-20190816-G5Q8B5"
 23 | ]
 24 | 
 25 | Test[
 26 | 	CodeTokenize["a::\\\""]
 27 | 	,
 28 | 	{
 29 | 		LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>],
 30 | 		LeafNode[Token`ColonColon, "::", <|Source -> {{1, 2}, {1, 4}}|>],
 31 | 		ErrorNode[Token`Error`UnhandledCharacter, "\\\"", <|Source -> {{1, 4}, {1, 6}}|>] }
 32 | 	,
 33 | 	TestID->"TokenErrors-20190520-L5N7B0"
 34 | ]
 35 | 
 36 | 
 37 | 
 38 | (*
 39 | UnterminatedComment:
 40 | *)
 41 | 
 42 | Test[
 43 | 	CodeTokenize["(*"]
 44 | 	,
 45 | 	{ErrorNode[Token`Error`UnterminatedComment, "(*", <|Source -> {{1, 1}, {1, 3}}|>]}
 46 | 	,
 47 | 	TestID->"TokenErrors-20190520-C8W1P2"
 48 | ]
 49 | 
 50 | 
 51 | 
 52 | 
 53 | (*
 54 | ExpectedAlphaOrDollar:
 55 | *)
 56 | 
 57 | 
 58 | Test[
 59 | 	CodeTokenize["aaa`1"]
 60 | 	,
 61 | 	{
 62 | 		ErrorNode[Token`Error`ExpectedLetterlike, "aaa`", <|Source -> {{1, 1}, {1, 5}}|>],
 63 | 		LeafNode[Integer, "1", <|Source -> {{1, 5}, {1, 6}}|>]
 64 | 	}
 65 | 	,
 66 | 	TestID->"TokenErrors-20190520-H9P0H9"
 67 | ]
 68 | 
 69 | 
 70 | 
 71 | 
 72 | (*
 73 | EmptyString:
 74 | *)
 75 | 
 76 | Test[
 77 | 	CodeTokenize["a::"]
 78 | 	,
 79 | 	{
 80 | 		LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>],
 81 | 		LeafNode[Token`ColonColon, "::", <|Source -> {{1, 2}, {1, 4}}|>] }
 82 | 	,
 83 | 	TestID->"TokenErrors-20190520-R2P3A3"
 84 | ]
 85 | 
 86 | Test[
 87 | 	CodeTokenize["a>>"]
 88 | 	,
 89 | 	{
 90 | 		LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>],
 91 | 		LeafNode[Token`GreaterGreater, ">>", <|Source -> {{1, 2}, {1, 4}}|>] }
 92 | 	,
 93 | 	TestID->"TokenErrors-20190520-M3H7E9"
 94 | ]
 95 | 
 96 | 
 97 | (*
 98 | UnterminatedString:
 99 | *)
100 | 
101 | Test[
102 | 	CodeTokenize["\""]
103 | 	,
104 | 	{ErrorNode[Token`Error`UnterminatedString, "\"", <|Source -> {{1, 1}, {1, 2}}|>]}
105 | 	,
106 | 	TestID->"TokenErrors-20190520-L6N6S8"
107 | ]
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | (*
118 | InvalidBase:
119 | *)
120 | 
121 | Test[
122 | 	CodeTokenize["37^^2"]
123 | 	,
124 | 	{ErrorNode[Token`Error`Number, "37^^2", <|Source -> {{1, 1}, {1, 6}}|>]}
125 | 	,
126 | 	TestID->"TokenErrors-20190520-Q9B9R6"
127 | ]
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 
136 | (*
137 | ExpectedDigitOrAlpha:
138 | *)
139 | 
140 | Test[
141 | 	CodeTokenize["2^^3"]
142 | 	,
143 | 	{ErrorNode[Token`Error`Number, "2^^3", <|Source -> {{1, 1}, {1, 5}}|>]}
144 | 	,
145 | 	TestID->"TokenErrors-20190520-B7G4V4"
146 | ]
147 | 
148 | 
149 | Test[
150 | 	CodeTokenize["2^^@"]
151 | 	,
152 | 	{
153 | 		ErrorNode[Token`Error`Number, "2^^", <|Source -> {{1, 1}, {1, 4}}|>],
154 | 		LeafNode[Token`At, "@", <|Source -> {{1, 4}, {1, 5}}|>]}
155 | 	,
156 | 	TestID->"TokenErrors-20190520-J3Q2S7"
157 | ]
158 | 
159 | 
160 | 
161 | 
162 | (*
163 | ExpectedAccuracy:
164 | *)
165 | 
166 | Test[
167 | 	CodeTokenize["1.2``->3"]
168 | 	,
169 | 	{
170 | 		ErrorNode[Token`Error`Number, "1.2``-", <|Source -> {{1, 1}, {1, 7}}|>],
171 | 		LeafNode[Token`Greater, ">", <|Source -> {{1, 7}, {1, 8}}|>],
172 | 		LeafNode[Integer, "3", <|Source -> {{1, 8}, {1, 9}}|>]}
173 | 	,
174 | 	TestID->"TokenErrors-20190520-B2J9I4"
175 | ]
176 | 
177 | 
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 
184 | (*
185 | ExpectedExponent:
186 | *)
187 | 
188 | Test[
189 | 	CodeTokenize["123*^"]
190 | 	,
191 | 	{ErrorNode[Token`Error`Number, "123*^", <|Source -> {{1, 1}, {1, 6}}|>]}
192 | 	,
193 | 	TestID->"TokenErrors-20190520-L1J8C1"
194 | ]
195 | 
196 | 
197 | 
198 | 
199 | 
200 | (*
201 | ExpectedEqual:
202 | *)
203 | 
204 | Test[
205 | 	CodeTokenize["a ^: f"]
206 | 	,
207 | 	{
208 | 		LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>],
209 | 		LeafNode[Whitespace, " ", <|Source -> {{1, 2}, {1, 3}}|>],
210 | 		ErrorNode[Token`Error`ExpectedEqual, "^:", <|Source -> {{1, 3}, {1, 5}}|>],
211 | 		LeafNode[Whitespace, " ", <|Source -> {{1, 5}, {1, 6}}|>],
212 | 		LeafNode[Symbol, "f", <|Source -> {{1, 6}, {1, 7}}|>] }
213 | 	,
214 | 	TestID->"TokenErrors-20190520-M3N7T5"
215 | ]
216 | 
217 | 
218 | 
219 | 
220 | 
221 | 
222 | 
223 | 


--------------------------------------------------------------------------------
/cpp/include/Diagnostics.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "WolframLibrary.h"
  3 | #undef True
  4 | #undef False
  5 | 
  6 | #include <ostream>
  7 | #include <string>
  8 | 
  9 | 
 10 | EXTERN_C DLLEXPORT void DiagnosticsLog(std::string s);
 11 | 
 12 | EXTERN_C DLLEXPORT void DiagnosticsMarkTime();
 13 | 
 14 | EXTERN_C DLLEXPORT void DiagnosticsLogTime();
 15 | 
 16 | EXTERN_C DLLEXPORT void DiagnosticsPrint();
 17 | 
 18 | extern int ByteBuffer_size;
 19 | 
 20 | extern int ByteDecoder_PrintableCount;
 21 | 
 22 | extern int ByteDecoder_LineFeedCount;
 23 | 
 24 | extern int ByteDecoder_TabCount;
 25 | 
 26 | extern int ByteDecoder_CarriageReturnCount;
 27 | 
 28 | extern int ByteDecoder_1ByteCount;
 29 | 
 30 | extern int ByteDecoder_2ByteCount;
 31 | 
 32 | extern int ByteDecoder_3ByteCount;
 33 | 
 34 | extern int ByteDecoder_4ByteCount;
 35 | 
 36 | extern int ByteDecoder_FFCount;
 37 | 
 38 | extern int ByteDecoder_Incomplete1ByteCount;
 39 | 
 40 | extern int CharacterDecoder_UnescapedCount;
 41 | 
 42 | extern int CharacterDecoder_LineContinuationCount;
 43 | 
 44 | extern int CharacterDecoder_LongNameCount;
 45 | 
 46 | extern int CharacterDecoder_4HexCount;
 47 | 
 48 | extern int CharacterDecoder_2HexCount;
 49 | 
 50 | extern int CharacterDecoder_6HexCount;
 51 | 
 52 | extern int CharacterDecoder_OctalCount;
 53 | 
 54 | extern int CharacterDecoder_StringMetaBackspaceCount;
 55 | 
 56 | extern int CharacterDecoder_StringMetaFormFeedCount;
 57 | 
 58 | extern int CharacterDecoder_StringMetaLineFeedCount;
 59 | 
 60 | extern int CharacterDecoder_StringMetaCarriageReturnCount;
 61 | 
 62 | extern int CharacterDecoder_StringMetaTabCount;
 63 | 
 64 | extern int CharacterDecoder_StringMetaDoubleQuoteCount;
 65 | 
 66 | extern int CharacterDecoder_StringMetaBackslashCount;
 67 | 
 68 | extern int CharacterDecoder_StringMetaOpenCount;
 69 | 
 70 | extern int CharacterDecoder_StringMetaCloseCount;
 71 | 
 72 | extern int CharacterDecoder_LinearSyntaxBangCount;
 73 | 
 74 | extern int CharacterDecoder_LinearSyntaxPercentCount;
 75 | 
 76 | extern int CharacterDecoder_LinearSyntaxAmpCount;
 77 | 
 78 | extern int CharacterDecoder_LinearSyntaxOpenParenCount;
 79 | 
 80 | extern int CharacterDecoder_LinearSyntaxCloseParenCount;
 81 | 
 82 | extern int CharacterDecoder_LinearSyntaxStarCount;
 83 | 
 84 | extern int CharacterDecoder_LinearSyntaxPlusCount;
 85 | 
 86 | extern int CharacterDecoder_LinearSyntaxSlashCount;
 87 | 
 88 | extern int CharacterDecoder_LinearSyntaxAtCount;
 89 | 
 90 | extern int CharacterDecoder_LinearSyntaxCaretCount;
 91 | 
 92 | extern int CharacterDecoder_LinearSyntaxUnderscoreCount;
 93 | 
 94 | extern int CharacterDecoder_LinearSyntaxBacktickCount;
 95 | 
 96 | extern int CharacterDecoder_LinearSyntaxSpaceCount;
 97 | 
 98 | extern int CharacterDecoder_UnhandledCount;
 99 | 
100 | extern int Tokenizer_StringFastCount;
101 | 
102 | extern int Tokenizer_StringSlowCount;
103 | 
104 | extern int Tokenizer_CommentCount;
105 | 
106 | extern int Tokenizer_NewlineCount;
107 | 
108 | extern int Tokenizer_SymbolCount;
109 | 
110 | extern int Tokenizer_OpenSquareCount;
111 | 
112 | extern int Tokenizer_OpenCurlyCount;
113 | 
114 | extern int Tokenizer_WhitespaceCount;
115 | 
116 | extern int Tokenizer_CommaCount;
117 | 
118 | extern int Tokenizer_CloseSquareCount;
119 | 
120 | extern int Tokenizer_CloseCurlyCount;
121 | 
122 | extern int Tokenizer_CloseParenCount;
123 | 
124 | extern int Tokenizer_MinusGreaterCount;
125 | 
126 | extern int Tokenizer_NumberCount;
127 | 
128 | extern int Tokenizer_ColonGreaterCount;
129 | 
130 | extern int Tokenizer_MinusCount;
131 | 
132 | extern int Tokenizer_OpenParenCount;
133 | 
134 | extern int Tokenizer_HashCount;
135 | 
136 | extern int Tokenizer_AmpCount;
137 | 
138 | extern int Tokenizer_PlusCount;
139 | 
140 | extern int Node_LeafNodeCount;
141 | 
142 | extern int Node_ErrorNodeCount;
143 | 
144 | extern int Node_UnterminatedTokenErrorNeedsReparseNodeCount;
145 | 
146 | extern int Node_SyntaxErrorNodeCount;
147 | 
148 | //extern int Node_OperatorNodeCount;
149 | 
150 | extern int Node_AbortNodeCount;
151 | 
152 | extern int Node_PrefixNodeCount;
153 | 
154 | extern int Node_BinaryNodeCount;
155 | 
156 | extern int Node_InfixNodeCount;
157 | 
158 | extern int Node_TernaryNodeCount;
159 | 
160 | extern int Node_PostfixNodeCount;
161 | 
162 | extern int Node_PrefixBinaryNodeCount;
163 | 
164 | extern int Node_GroupNodeCount;
165 | 
166 | extern int Node_CompoundNodeCount;
167 | 
168 | extern int Node_GroupMissingCloserNodeCount;
169 | 
170 | extern int Node_UnterminatedGroupNeedsReparseNodeCount;
171 | 
172 | extern int Node_CallNodeCount;
173 | 


--------------------------------------------------------------------------------
/Tests/AbstractSyntaxIssues.mt:
--------------------------------------------------------------------------------
  1 | Print["\n===== Start AbstractSyntaxIssues.mt =====\n"]
  2 | 
  3 | Needs["CodeParser`"]
  4 | 
  5 | (*
  6 | Package:
  7 | *)
  8 | 
  9 | TestMatch[
 10 | 	FirstCase[CodeParse["BeginPackage[\"Foo`\"]", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)],
 11 | 		KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}]
 12 | 	,
 13 | 	KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["Package", _, _, _]}]
 14 | 	,
 15 | 	TestID->"AbstractSyntaxIssues-20190520-P2N0D7"
 16 | ]
 17 | 
 18 | TestMatch[
 19 | 	FirstCase[CodeParse["EndPackage[]", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)],
 20 | 		KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}]
 21 | 	,
 22 | 	KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["Package", _, _, _]}]
 23 | 	,
 24 | 	TestID->"AbstractSyntaxIssues-20190520-M6K6Y5"
 25 | ]
 26 | 
 27 | TestMatch[
 28 | 	FirstCase[CodeParse["Begin[\"Foo`\"]", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)],
 29 | 		KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}]
 30 | 	,
 31 | 	KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["Package", _, _, _]}]
 32 | 	,
 33 | 	TestID->"AbstractSyntaxIssues-20190520-F7B2Y5"
 34 | ]
 35 | 
 36 | TestMatch[
 37 | 	FirstCase[CodeParse["End[]", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)],
 38 | 		KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}]
 39 | 	,
 40 | 	KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["Package", _, _, _]}]
 41 | 	,
 42 | 	TestID->"AbstractSyntaxIssues-20190520-T0U9L8"
 43 | ]
 44 | 
 45 | 
 46 | 
 47 | 
 48 | 
 49 | 
 50 | 
 51 | 
 52 | (*
 53 | StrangeCall:
 54 | *)
 55 | 
 56 | (*
 57 | TestMatch[
 58 | 	FirstCase[CodeParse[" %[] ", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)],
 59 | 		KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}]
 60 | 	,
 61 | 	KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["StrangeCall", _, _, _]}]
 62 | 	,
 63 | 	TestID->"AbstractSyntaxIssues-20190520-X5H0W9"
 64 | ]
 65 | *)
 66 | 
 67 | TestMatch[
 68 | 	FirstCase[CodeParse[" \\!\\(x\\)[] ", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)],
 69 | 		KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}]
 70 | 	,
 71 | 	KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["StrangeCall", _, _, _]}]
 72 | 	,
 73 | 	TestID->"AbstractSyntaxIssues-20190520-V9T6S1"
 74 | ]
 75 | 
 76 | TestMatch[
 77 | 	FirstCase[CodeParse[" \\(x\\)[] ", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)],
 78 | 		KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}]
 79 | 	,
 80 | 	KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["StrangeCall", _, _, _]}]
 81 | 	,
 82 | 	TestID->"AbstractSyntaxIssues-20190520-I7T4W0"
 83 | ]
 84 | 
 85 | 
 86 | TestMatch[
 87 | 	FirstCase[CodeParse[" x--[] ", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)],
 88 | 		KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}]
 89 | 	,
 90 | 	KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["StrangeCall", _, _, _]}]
 91 | 	,
 92 | 	TestID->"AbstractSyntaxIssues-20190520-I3X6I7"
 93 | ]
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | (*
102 | SyntaxUndocumentedMessageName:
103 | *)
104 | 
105 | TestMatch[
106 | 	FirstCase[CodeParse[" a::b::c::d ", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)],
107 | 		KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}]
108 | 	,
109 | 	KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["SyntaxUndocumentedMessageName", _, _, _]}]
110 | 	,
111 | 	TestID->"AbstractSyntaxIssues-20190520-F4W6X1"
112 | ]
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | (*
121 | StrangeCallSlotSequence:
122 | *)
123 | 
124 | TestMatch[
125 | 	FirstCase[CodeParse[" ##2[] ", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)],
126 | 		KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}]
127 | 	,
128 | 	KeyValuePattern[AbstractSyntaxIssues -> {SyntaxIssue["StrangeCallSlotSequence", _, _, _]}]
129 | 	,
130 | 	TestID->"AbstractSyntaxIssues-20190520-O7G6C1"
131 | ]
132 | 
133 | 
134 | 
135 | 
136 | 
137 | (*
138 | NotContiguous:
139 | *)
140 | (*
141 | Not handled by parser any more
142 | 
143 | handled by syntax highlighting
144 | *)
145 | (*TestMatch[
146 | 	FirstCase[CodeParse[" a[[] ] ", ContainerNode -> (ContainerNode[Hold, #[[1]], <||>]&)],
147 | 		KeyValuePattern[AbstractSyntaxIssues -> _], $Failed, {0, Infinity}]
148 | 	,
149 | 	KeyValuePattern[AbstractSyntaxIssues -> {FormatIssue["NotContiguous", _, _, _]}]
150 | 	,
151 | 	TestID->"AbstractSyntaxIssues-20190520-U1R2G5"
152 | ]
153 | 
154 | *)
155 | 
156 | 
157 | 
158 | 
159 | 
160 | 


--------------------------------------------------------------------------------
/docs/compatibility.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Compatibility
  3 | 
  4 | 
  5 | ## Source Compatibility
  6 | 
  7 | CodeParser has source compatibility with 11.0+
  8 | 
  9 | 
 10 | ## FrontEnd Compatibility
 11 | 
 12 | Any source .wl files that have `(* ::Package::"Tags" *)` or `(* ::Code::Initialization::"Tags" *)` syntax may only be edited with a version 12.3+ FE 
 13 | 
 14 | 
 15 | ## Runtime Compatibility
 16 | 
 17 | Building with Wolfram 11.0+ and running with the same version should always work.
 18 | 
 19 | Building and running with different Wolfram versions will not always work.
 20 | 
 21 | Building with the latest Wolfram version (which is 13.1) is only guaranteed to work back to Wolfram 13.1+.
 22 | 
 23 | This is due to various issues including LibraryLink versioning and rpath changes on MacOSX.
 24 | 
 25 | 
 26 | ## C++ Compiler Compatibility
 27 | 
 28 | CodeParser uses C++17 features and requires a compiler that can support at least C++17.
 29 | 
 30 | 
 31 | ## Wolfram Compiler Compatibility
 32 | 
 33 | The ExprLibrary built by the Wolfram Compiler requires 13.1+.
 34 | 
 35 | 
 36 | ## CCompilerDriver libraries
 37 | 
 38 | This table show the value of -mmacosx-version-min for libraries generated by CCompilerDriver:
 39 | 
 40 | | Wolfram version | MacOSX-x86-64 | MacOSX-ARM64 |
 41 | | --------------- | ------------- | ------------ |
 42 | | 12.0            | 10.10         | N/A          |
 43 | | 12.1            | 10.10         | N/A          |
 44 | | 12.2            | 10.12         | 11.0         |
 45 | | 12.3            | 10.14         | 11.0         |
 46 | | 13.0            | 10.14         | 11.0         |
 47 | | 13.1            | 10.14         | 11.0         |
 48 | | 13.2            | 10.15         | 11.0         |
 49 | | 13.3            | 11.00         | 11.0         |
 50 | 
 51 | CodeParser sets the same macosx-version-min in order to achieve maximum compatibility with libraries generated by CCompilerDriver.
 52 | 
 53 | 
 54 | ## Earlier Versions
 55 | 
 56 | Wolfram versions before 12.1 must build from sources to use CodeParser.
 57 | 
 58 | Manually modify WolframVersion in PacletInfo.wl to allow the paclet to be used.
 59 | 
 60 | The message that you get when you install the paclet:
 61 | ```
 62 | The paclet CodeParser was successfully installed.
 63 | ```
 64 | does not necessarily mean that the paclet can be used.
 65 | 
 66 | Make sure that the correct WolframVersion is specified.
 67 | 
 68 | 
 69 | ## LibraryLink
 70 | 
 71 | CodeParser uses [LibraryLink](https://reference.wolfram.com/language/guide/LibraryLink.html).
 72 | 
 73 | The version of LibraryLink was updated in version 13.1:
 74 | 
 75 | | Wolfram version | LibraryLink version |
 76 | | --------------- | ------------------- |
 77 | | 12.0            | 5                   |
 78 | | 12.1            | 6                   |
 79 | | 12.2            | 6                   |
 80 | | 12.3            | 6                   |
 81 | | 13.0            | 6                   |
 82 | | 13.1            | 7                   |
 83 | 
 84 | The LibraryLink version is defined in the header file WolframLibrary.h
 85 | 
 86 | In the [LibraryLink documentation](https://reference.wolfram.com/language/LibraryLink/tutorial/LibraryStructure.html#280210622), it is described how backwards-compatibility is not maintained:
 87 | 
 88 | >However, you should note that you cannot use a library built with a newer version of the header into an older version of the Wolfram Language.
 89 | 
 90 | So LibraryLink defines Wolfram version 13.1 as a minimum that can run with libraries built with the current Wolfram version.
 91 | 
 92 | But to be clear, LibraryLink does have forwards-compatibility. For example, libraries built with LibraryLink version 6 will work with LibraryLink version 7.
 93 | 
 94 | The CodeParser paclets distributed by Wolfram Research on the public paclet server are built with LibraryLink version 6 in order to guarantee compatibility with all versions of Wolfram System from 12.1 onward.
 95 | 
 96 | 
 97 | ## rpath (MacOSX)
 98 | 
 99 | CodeParser uses MathLink.
100 | 
101 | The mathlink rpath was changed in version 12.1:
102 | 
103 | | Wolfram version | mathlink rpath                                                           |
104 | | --------------- | ------------------------------------------------------------------------ |
105 | | 12.0            | @executable_path/../Frameworks/mathlink.framework/Versions/4.36/mathlink |
106 | | 12.1            | @rpath/mathlink.framework/Versions/4/mathlink                            |
107 | 
108 | This means that CodeParser.dylib built with 12.1+ will not work with previous versions.
109 | 
110 | It is possible to use `install_name_tool` to change the rpath, but it is recommended to build from sources.
111 | 


--------------------------------------------------------------------------------
/Tests/Scoping.mt:
--------------------------------------------------------------------------------
  1 | Print["\n===== Start Scoping.mt =====\n"]
  2 | 
  3 | Needs["CodeParser`"]
  4 | Needs["CodeParser`Scoping`"]
  5 | Needs["CodeParser`Utils`"]
  6 | Needs["CodeParser`Library`"]
  7 | 
  8 | 
  9 | ast = CodeParse["Module[{x, y}, Block[{x, z}, x]]"];
 10 | 
 11 | (*
 12 | Test that x is not marked as unused in the Module
 13 | 
 14 | bug 414554
 15 | *)
 16 | Test[
 17 | 	ScopingData[ast]
 18 | 	,
 19 | 	{
 20 |  scopingDataObject[{{1, 30}, {1, 31}}, {"Module", "Block"}, {"shadowed"}, "x"],
 21 |  scopingDataObject[{{1, 23}, {1, 24}}, {"Module", "Block"}, {"shadowed"}, "x"],
 22 |  scopingDataObject[{{1, 9}, {1, 10}}, {"Module"}, {}, "x"],
 23 |  scopingDataObject[{{1, 26}, {1, 27}}, {"Block"}, {"unused"}, "z"],
 24 |  scopingDataObject[{{1, 12}, {1, 13}}, {"Module"}, {"unused"}, "y"]}
 25 | 	,
 26 | 	TestID->"Scoping-20210921-U4U6T2"
 27 | ]
 28 | 
 29 | 
 30 | 
 31 | 
 32 | 
 33 | 
 34 | box = RowBox[{SuperscriptBox["u",
 35 |      TagBox[RowBox[{"(", RowBox[{"dx_", ",", "0"}], ")"}],
 36 |       Derivative]], "\[RuleDelayed]", "a"}];
 37 | 
 38 | cst = CodeConcreteParseBox[box];
 39 | 
 40 | Test[
 41 | 	cst,
 42 | 	ContainerNode[Box, {
 43 | 		BinaryNode[RuleDelayed, {
 44 | 			BoxNode[SuperscriptBox, {
 45 | 				LeafNode[Symbol, "u", <| Source -> {1, 1, 1} |>],
 46 | 				BoxNode[TagBox, {
 47 | 					GroupNode[
 48 | 						GroupParen,
 49 | 						{
 50 | 							LeafNode[Token`OpenParen, "(", <| Source -> {1, 1, 2, 1, 1, 1} |>],
 51 | 							InfixNode[Comma, {
 52 | 								CompoundNode[
 53 | 									PatternBlank,
 54 | 									{
 55 | 										LeafNode[Symbol, "dx", <| Source -> {1, 1, 2, 1, 1, 2, 1, 1} |>],
 56 | 										LeafNode[Token`Under, "_", <| Source -> {1, 1, 2, 1, 1, 2, 1, 1} |>]
 57 | 									},
 58 | 									<| Source -> {1, 1, 2, 1, 1, 2, 1, 1} |>
 59 | 								],
 60 | 								LeafNode[Token`Comma, ",", <| Source -> {1, 1, 2, 1, 1, 2, 1, 2} |>],
 61 | 								LeafNode[Integer, "0", <| Source -> {1, 1, 2, 1, 1, 2, 1, 3} |>]
 62 | 							},
 63 | 								<| Source -> {1, 1, 2, 1, 1, 2} |>
 64 | 							],
 65 | 							LeafNode[Token`CloseParen, ")" , <| Source -> {1, 1, 2, 1, 1, 3} |>]
 66 | 						},
 67 | 						<| Source -> {1, 1, 2, 1} |>
 68 | 					],
 69 | 					CodeNode @@ {Evaluated, Derivative, <||>}
 70 | 				},
 71 | 					<| Source -> {1, 1, 2} |>
 72 | 				]
 73 | 			},
 74 | 				<| Source -> {1, 1} |>
 75 | 			],
 76 | 			LeafNode[Token`LongName`RuleDelayed, "\[RuleDelayed]",<| Source -> {1, 2} |>],
 77 | 			LeafNode[Symbol, "a", <| Source -> {1, 3} |>]
 78 | 		}, <|Source -> {} |>]
 79 | 	}, <||>]
 80 | ]
 81 | 
 82 | Test[RoundTripCst[cst], cst]
 83 | 
 84 | agg = CodeParser`Abstract`Aggregate[cst];
 85 | 
 86 | ast = CodeParser`Abstract`Abstract[agg];
 87 | 
 88 | Test[
 89 | 	ScopingData[ast]
 90 | 	,
 91 | 	{scopingDataObject[{1, 1, 2, 1, 1, 2, 1, 1}, {"RuleDelayed"}, {"unused"}, "dx"]}
 92 | 	,
 93 | 	TestID->"Scoping-20220211-E8N5O8"
 94 | ]
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | ast = CodeParse["foo[] := \\!\\(\\*s\\)"]
102 | 
103 | Test[
104 | 	ScopingData[ast]
105 | 	,
106 | 	{scopingDataObject[{{1, 1}, {1, 4}}, {"Defined"}, {"definition"}, "foo"]}
107 | 	,
108 | 	TestID->"Scoping-20220316-D3G1W4"
109 | ]
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | ast = CodeParse["
123 | 
124 | foo[x_]:=x+1
125 | 
126 | Module[{a}, a+1]
127 | 
128 | Module[{y},
129 |   y + 2
130 | ]
131 | 
132 | Module[{b}, b+1]
133 | 
134 | bar[z_]:=z+3
135 | 
136 | "]
137 | 
138 | 
139 | Test[
140 | 	ScopingData[ast]
141 | 	,
142 | 	{scopingDataObject[{{3, 1}, {3, 4}}, {"Defined"}, {"definition"}, "foo"],
143 | 	scopingDataObject[{{3, 10}, {3, 11}}, {"SetDelayed"}, {}, "x"],
144 | 	scopingDataObject[{{3, 5}, {3, 6}}, {"SetDelayed"}, {}, "x"],
145 | 	scopingDataObject[{{5, 13}, {5, 14}}, {"Module"}, {}, "a"],
146 | 	scopingDataObject[{{5, 9}, {5, 10}}, {"Module"}, {}, "a"],
147 | 	scopingDataObject[{{8, 3}, {8, 4}}, {"Module"}, {}, "y"],
148 | 	scopingDataObject[{{7, 9}, {7, 10}}, {"Module"}, {}, "y"],
149 | 	scopingDataObject[{{11, 13}, {11, 14}}, {"Module"}, {}, "b"],
150 | 	scopingDataObject[{{11, 9}, {11, 10}}, {"Module"}, {}, "b"],
151 | 	scopingDataObject[{{13, 1}, {13, 4}}, {"Defined"}, {"definition"}, "bar"],
152 | 	scopingDataObject[{{13, 10}, {13, 11}}, {"SetDelayed"}, {}, "z"],
153 | 	scopingDataObject[{{13, 5}, {13, 6}}, {"SetDelayed"}, {}, "z"]}
154 | 	,
155 | 	TestID->"Scoping-20220830-W8Q8Y1"
156 | ]
157 | 
158 | Test[
159 | 	ScopingData[ast, SourceMemberQ[#[[3, Key[Source]]], {8, 3}]&]
160 | 	,
161 | 	{scopingDataObject[{{8, 3}, {8, 4}}, {"Module"}, {}, "y"],
162 | 	scopingDataObject[{{7, 9}, {7, 10}}, {"Module"}, {}, "y"]}
163 | 	,
164 | 	TestID->"Scoping-20220830-X8E0N5"
165 | ]
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/parse/parser_docs.rs:
--------------------------------------------------------------------------------
  1 | //! # Parser Design
  2 | //!
  3 | //! Each parse of a Wolfram input is managed by a [`ParserSession`] instance.
  4 | //!
  5 | //! Parsing logic is structured into individual "modules" calls *parselets*.
  6 | //!
  7 | //! There are two kinds of parselet:
  8 | //!
  9 | //! * [`PrefixParselet`] — invoked when there is no previous expression in the
 10 | //!   current context.
 11 | //! * [`InfixParselet`] — invoked when there is a previous expression in the
 12 | //!   current context.
 13 | //!
 14 | //! Every token is associated with one [`PrefixParselet`] instance
 15 | //! ([`ParseBuilder::with_prefix_parselet()`]) and one
 16 | //! [`InfixParselet`] instance ([`ParseBuilder::with_infix_parselet()`]), which are
 17 | //! invoked, respectively, when that token is encountered in "prefix" or "infix"
 18 | //! position.
 19 | //!
 20 | //! Parselet implementations will typically view the current or next token,
 21 | //! do a bit of logic checking for possible ways forward, and then continue the
 22 | //! parsing process by doing one of the following:
 23 | //!
 24 | //! * For simple parselets, like [`LeafParselet`], construct a parsed node from
 25 | //!   a single [*operand token*][term] and return it.
 26 | //!
 27 | //! * Call [`ParserSession::parse_prefix()`] on subsequent token(s) in the input
 28 | //!   to parse parselet-defined argument subexpression(s), followed by
 29 | //!   calling a [`reduce_*()` method][self#reduce-methods] to produce a new
 30 | //!   parsed node.
 31 | //!
 32 | //! * Call [`ParserSession::parse_infix()`] on a subsequent token
 33 | //!    in the input, passing in the immediately previously completed parsed
 34 | //!    sub-expression.
 35 | //!
 36 | //! In the majority of cases, parselet implementations should finish by calling
 37 | //! [`parse_climb()`][ParserSession::parse_climb] and passing in the completed
 38 | //! parsed node value.
 39 | //!
 40 | //! [term]: crate::parse#general-terminology
 41 | //!
 42 | //! # Parse Contexts
 43 | //!
 44 | //! The term "context" is used to refer to the state kept by the parser to
 45 | //! guide the parsing of a subexpression within the input.
 46 | //!
 47 | //! Context state is stored as [`Context`] value created by calls to
 48 | //! [`ParserSession::push_context()`]. A new parser context is typically created
 49 | //! when the parser begins processing a higher-precedence subexpression.
 50 | //!
 51 | //! The text diagram below roughly indicates the region of source code covered
 52 | //! by several contexts:
 53 | //!
 54 | //! ```text
 55 | //! a + b * foo[x / y]  |
 56 | //!             ^^!^^   | BinaryOperatorParselet, Precedence::SLASH,   reduce_binary()
 57 | //!         ^^^!^^^^^^  | CallParselet,           Precedence::HIGHEST, reduce_call()
 58 | //!     ^^!^^^^^^^^^^^  | InfixOperatorParselet,  Prececence::STAR,    reduce_infix()
 59 | //! ^^!^^^^^^^^^^^^^^^  | InfixOperatorParselet,  Precedence::PLUS,    reduce_infix()
 60 | //! ```
 61 | //!
 62 | //! From this diagram, a few corrolary statements about contexts follow:
 63 | //!
 64 | //! * Roughly speaking, one context exists for each logical subexpression in the
 65 | //!   input.
 66 | //!
 67 | //! * A parser context must always contain at least one node (its initial node).
 68 | //!
 69 | //!   *Note:* [`ParserSession::push_context()`] must only be called by a
 70 | //!   parselet implementation after a node has been pushed.
 71 | //!
 72 | //! * A parser context has an associated precedence value, typically the
 73 | //!   precedence of the operator that caused a new parsing context to begin.
 74 | //!
 75 | //! * At any given time during parsing, the current parsing contexts form a
 76 | //!   stack, with the latest (further along in the input) context at the top.
 77 | //!
 78 | //! Typically, though not always, parser contexts are created automatically
 79 | //! when [`ParserSession::parse_climb()`] detects that a subsequent token
 80 | //! in the input has a higher precedence than the current top context, and
 81 | //! begin a new context using [`push_context()`][ParserSession::push_context] to
 82 | //! contain the parsing of the higher-precedence subexpression.
 83 | //!
 84 | //! Parser contexts provide a bit of ambient information to guide the parser,
 85 | //! but they are not responsible for creating, storing or manipulating parsed
 86 | //! expressions.
 87 | //!
 88 | //!
 89 | //!
 90 | 
 91 | 
 92 | 
 93 | // Import items referenced in the module doc comment
 94 | #[allow(unused_imports)]
 95 | use crate::{
 96 |     cst::Cst,
 97 |     parse::{
 98 |         parselet::{InfixParselet, LeafParselet, PrefixParselet},
 99 |         Context, ParseBuilder, ParserSession,
100 |     },
101 |     read::Reader,
102 |     tokenize::TokenKind,
103 | };
104 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/tests/test_api.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     issue::{CodeAction, Issue, IssueTag, Severity},
  3 |     macros::src,
  4 |     parse_cst,
  5 |     source::{Source, Span},
  6 |     symbols as sym, ParseOptions, SourceConvention, StringifyMode,
  7 | };
  8 | 
  9 | use pretty_assertions::assert_eq;
 10 | 
 11 | 
 12 | //
 13 | // this used to assert
 14 | //
 15 | #[test]
 16 | fn APITest_Bug1() {
 17 |     let result = crate::parse_to_token(
 18 |         b"abc[]",
 19 |         &ParseOptions::default(),
 20 |         StringifyMode::Normal,
 21 |     );
 22 | 
 23 |     assert_eq!(result.non_fatal_issues, Vec::new());
 24 |     assert_eq!(result.fatal_issues, Vec::new());
 25 | }
 26 | 
 27 | //
 28 | // this used to hang
 29 | //
 30 | #[test]
 31 | fn APITest_Hang1() {
 32 |     let strIn = "<<rr[R";
 33 | 
 34 |     let result = parse_cst(strIn, &ParseOptions::default());
 35 | 
 36 |     assert_eq!(result.non_fatal_issues, Vec::new());
 37 |     assert_eq!(result.fatal_issues, Vec::new());
 38 | }
 39 | 
 40 | //
 41 | // this used to crash
 42 | //
 43 | #[test]
 44 | fn APITest_Crash1() {
 45 |     let strIn = "0^^";
 46 | 
 47 |     let result = parse_cst(strIn, &ParseOptions::default());
 48 | 
 49 |     assert_eq!(result.non_fatal_issues, Vec::new());
 50 |     assert_eq!(result.fatal_issues, Vec::new());
 51 | }
 52 | 
 53 | //
 54 | // this used to crash
 55 | //
 56 | #[test]
 57 | fn APITest_Crash2() {
 58 |     let strIn = ".2^^0";
 59 | 
 60 |     let result = parse_cst(strIn, &ParseOptions::default());
 61 | 
 62 |     assert_eq!(result.non_fatal_issues, Vec::new());
 63 |     assert_eq!(result.fatal_issues, Vec::new());
 64 | }
 65 | 
 66 | //
 67 | // this used to crash
 68 | //
 69 | #[test]
 70 | fn APITest_Crash3() {
 71 |     let strIn = "12^^a.a";
 72 | 
 73 |     let result = parse_cst(strIn, &ParseOptions::default());
 74 | 
 75 |     assert_eq!(result.non_fatal_issues, Vec::new());
 76 |     assert_eq!(result.fatal_issues, Vec::new());
 77 | }
 78 | 
 79 | //
 80 | // this used to crash
 81 | //
 82 | #[test]
 83 | fn APITest_Crash4() {
 84 |     let strIn = "12..";
 85 | 
 86 |     let result = parse_cst(strIn, &ParseOptions::default());
 87 | 
 88 |     assert_eq!(
 89 |         result.non_fatal_issues,
 90 |         vec![Issue {
 91 |             make_sym: sym::CodeParser_FormatIssue,
 92 |             tag: IssueTag::Ambiguous,
 93 |             msg: "Ambiguous syntax.".to_owned(),
 94 |             sev: Severity::Formatting,
 95 |             src: Source::Span(Span::from(src!(1:3-3))),
 96 |             val: 1.0,
 97 |             actions: vec![CodeAction::insert_text(
 98 |                 "Insert space".into(),
 99 |                 Span::from(src!(1:3-3)),
100 |                 " ".into(),
101 |             )],
102 |             additional_descriptions: vec![],
103 |             additional_sources: vec![],
104 |         }]
105 |     );
106 |     assert_eq!(result.fatal_issues, Vec::new());
107 | }
108 | 
109 | //
110 | // this used to crash
111 | //
112 | #[test]
113 | fn APITest_Crash5() {
114 |     let strIn = "123\\\n.45";
115 | 
116 |     let result = parse_cst(strIn, &ParseOptions::default());
117 | 
118 |     assert_eq!(result.non_fatal_issues, Vec::new());
119 |     assert_eq!(result.fatal_issues, Vec::new());
120 | }
121 | 
122 | //
123 | // this used to crash
124 | //
125 | #[test]
126 | fn APITest_Crash6() {
127 |     let strIn = "\\0560";
128 | 
129 |     let result = parse_cst(strIn, &ParseOptions::default());
130 | 
131 |     assert_eq!(result.non_fatal_issues, Vec::new());
132 |     assert_eq!(result.fatal_issues, Vec::new());
133 | }
134 | 
135 | //
136 | // this used to crash
137 | //
138 | // NOTE: This test was part of the C++ version, but is not possible in the
139 | //       Rust version of CodeParser, which does not allow invalid values
140 | //       for the SourceConvention.
141 | #[test]
142 | fn APITest_Crash7() {
143 |     //let strIn = "1+1";
144 | 
145 |     //
146 |     // this was originally using SOURCECONVENTION_UNKNOWN, which was 0
147 |     // but 0 is now SourceConvention::LineColumn
148 |     // so make up a bogus SourceConvention of 2
149 |     //
150 | 
151 |     //let mut session = ParserSession::new(strIn.as_bytes(),static_cast<SourceConvention>(2), DEFAULT_TAB_WIDTH, FirstLineBehavior::NotScript, EncodingMode::Normal);
152 |     //assert_eq!(res, PARSERSESSIONINIT_ERROR);
153 | }
154 | 
155 | //
156 | // this used to crash
157 | //
158 | // CODETOOLS-62
159 | //
160 | #[test]
161 | fn APITest_Crash8() {
162 |     let bufAndLen = "(*\r\n*)";
163 | 
164 |     let result = parse_cst(
165 |         bufAndLen,
166 |         &ParseOptions::default()
167 |             .source_convention(SourceConvention::CharacterIndex),
168 |     );
169 | 
170 |     assert_eq!(result.non_fatal_issues, Vec::new());
171 |     assert_eq!(result.fatal_issues, Vec::new());
172 | }
173 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/bin/main.rs:
--------------------------------------------------------------------------------
  1 | use std::io::{self, Write};
  2 | 
  3 | use wolfram_parser::{
  4 |     fmt_as_expr::FmtAsExpr, ParseOptions, QuirkSettings, StringifyMode,
  5 | };
  6 | 
  7 | 
  8 | // #if DIAGNOSTICS
  9 | // #include "Diagnostics.h"
 10 | // #endif // DIAGNOSTICS
 11 | 
 12 | #[derive(Copy, Clone)]
 13 | enum ApiMode {
 14 |     CstExpr,
 15 |     Cst,
 16 |     Ast,
 17 |     Tokenize,
 18 |     Leaf,
 19 |     SafeString,
 20 | }
 21 | 
 22 | #[derive(Copy, Clone)]
 23 | enum OutputMode {
 24 |     None,
 25 |     Print,
 26 |     #[allow(dead_code)]
 27 |     PrintDryrun,
 28 |     SyntaxQ,
 29 | }
 30 | 
 31 | fn main() {
 32 |     let mut file_input = None;
 33 |     let mut api_mode = ApiMode::CstExpr;
 34 |     let mut output_mode = OutputMode::Print;
 35 |     let mut quirks = QuirkSettings::default();
 36 | 
 37 |     let args: Vec<String> = std::env::args().skip(1).collect();
 38 | 
 39 |     let mut i = 0;
 40 |     loop {
 41 |         if i >= args.len() {
 42 |             break;
 43 |         }
 44 | 
 45 |         let arg = &args[i];
 46 | 
 47 |         match &**arg {
 48 |             "-file" => {
 49 |                 i += 1;
 50 |                 file_input = Some(args[i].clone());
 51 |             },
 52 |             "-tokenize" => api_mode = ApiMode::Tokenize,
 53 |             "-leaf" => api_mode = ApiMode::Leaf,
 54 |             "-safestring" => api_mode = ApiMode::SafeString,
 55 |             "--cst" => api_mode = ApiMode::Cst,
 56 |             "--ast" => api_mode = ApiMode::Ast,
 57 |             "-n" => output_mode = OutputMode::None,
 58 |             "-check" | "-syntaxq" | "-syntaxQ" => {
 59 |                 output_mode = OutputMode::SyntaxQ;
 60 |             },
 61 |             "--flatten-times" => {
 62 |                 quirks.flatten_times = true;
 63 |             },
 64 |             _ => panic!("unrecognized argument: {arg}"),
 65 |         }
 66 | 
 67 |         i += 1;
 68 |     }
 69 | 
 70 |     let result = match file_input {
 71 |         Some(file_input) => {
 72 |             read_file(&file_input, api_mode, output_mode, quirks)
 73 |         },
 74 |         None => read_std_in(api_mode, output_mode, quirks),
 75 |     };
 76 | 
 77 |     return result;
 78 | }
 79 | 
 80 | fn read_std_in(mode: ApiMode, output_mode: OutputMode, quirks: QuirkSettings) {
 81 |     loop {
 82 |         let mut input = String::new();
 83 | 
 84 |         print!(">>> ");
 85 |         io::stdout().flush().unwrap();
 86 | 
 87 |         io::stdin().read_line(&mut input).unwrap();
 88 | 
 89 |         handle(input.trim_end().as_bytes(), mode, output_mode, quirks)
 90 |     }
 91 | 
 92 |     // #if DIAGNOSTICS
 93 |     //         DiagnosticsPrint();
 94 |     // #endif // DIAGNOSTICS
 95 | }
 96 | 
 97 | fn read_file(
 98 |     file: &str,
 99 |     mode: ApiMode,
100 |     output_mode: OutputMode,
101 |     quirks: QuirkSettings,
102 | ) {
103 |     let fb: Vec<u8> = std::fs::read(file).expect("error reading file");
104 | 
105 |     handle(fb.as_slice(), mode, output_mode, quirks)
106 | 
107 |     // #if DIAGNOSTICS
108 |     //     DiagnosticsPrint();
109 |     // #endif // DIAGNOSTICS
110 | }
111 | 
112 | fn handle(
113 |     input: &[u8],
114 |     mode: ApiMode,
115 |     output_mode: OutputMode,
116 |     quirks: QuirkSettings,
117 | ) {
118 |     let mut opts = ParseOptions::default();
119 |     opts.quirk_settings = quirks;
120 | 
121 |     match mode {
122 |         ApiMode::Tokenize => {
123 |             let result = wolfram_parser::tokenize_bytes(input, &opts).unwrap();
124 |             output(output_mode, FmtAsExpr(&result));
125 |         },
126 |         ApiMode::Leaf => {
127 |             let result = wolfram_parser::parse_to_token(
128 |                 input,
129 |                 &opts,
130 |                 StringifyMode::Normal,
131 |             );
132 |             output(output_mode, FmtAsExpr(&result.syntax));
133 |         },
134 |         ApiMode::SafeString => {
135 |             let result = wolfram_parser::safe_string(input, &opts).unwrap();
136 |             output(output_mode, result);
137 |         },
138 |         ApiMode::CstExpr => {
139 |             let result = wolfram_parser::parse_bytes_cst_seq(input, &opts);
140 |             output(output_mode, FmtAsExpr(&result.syntax));
141 |         },
142 |         ApiMode::Cst => {
143 |             let result = wolfram_parser::parse_bytes_cst_seq(input, &opts);
144 |             output(output_mode, format!("{:#?}", result.syntax));
145 |         },
146 |         ApiMode::Ast => {
147 |             let result = wolfram_parser::parse_bytes_ast_seq(input, &opts);
148 |             output(output_mode, format!("{:#?}", result.syntax));
149 |         },
150 |     }
151 | }
152 | 
153 | fn output<T: std::fmt::Display>(mode: OutputMode, value: T) {
154 |     match mode {
155 |         OutputMode::Print => {
156 |             println!("{value}");
157 |         },
158 |         OutputMode::PrintDryrun => {
159 |             let mut buffer = Vec::new();
160 | 
161 |             write!(buffer, "{value}\n").unwrap();
162 |         },
163 |         OutputMode::None | OutputMode::SyntaxQ => {},
164 |     }
165 | }
166 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/parse/parselet/integral_parselet.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     panic_if_aborted,
  3 |     parse::{parselet::*, ParserSession},
  4 |     precedence::Precedence,
  5 |     tokenize::{Token, TokenKind, TokenRef},
  6 | };
  7 | 
  8 | impl IntegralParselet {
  9 |     pub(crate) const fn new(
 10 |         Op1: PrefixBinaryOperator,
 11 |         Op2: PrefixOperator,
 12 |     ) -> Self {
 13 |         IntegralParselet { Op1, Op2 }
 14 |     }
 15 | }
 16 | 
 17 | impl<'i, B: ParseBuilder<'i> + 'i> PrefixParselet<'i, B> for IntegralParselet {
 18 |     fn parse_prefix(
 19 |         &self,
 20 |         session: &mut ParserSession<'i, B>,
 21 |         tok_in: TokenRef<'i>,
 22 |     ) -> B::Node {
 23 |         //
 24 |         // Something like "\[Integral] f \[DifferentialD] x" (TID:231113/1)
 25 |         //
 26 | 
 27 |         panic_if_aborted!();
 28 | 
 29 |         let tok_in = session.push_syntax_and_next(tok_in);
 30 | 
 31 |         let _ = session.push_context(Precedence::CLASS_INTEGRATIONOPERATORS);
 32 | 
 33 |         let (trivia1, Tok) = session.current_token_eat_trivia();
 34 | 
 35 |         if Tok.tok == TokenKind::LongName_DifferentialD
 36 |             || Tok.tok == TokenKind::LongName_CapitalDifferentialD
 37 |         {
 38 |             //
 39 |             // TID:231113/2: "\[Integral] \[DifferentialD] x"
 40 |             //
 41 | 
 42 |             let node = session
 43 |                 .push_leaf(Token::at_start(TokenKind::Fake_ImplicitOne, Tok));
 44 | 
 45 |             return IntegralParselet::parse1(
 46 |                 self, session, tok_in, trivia1, node,
 47 |             );
 48 |         }
 49 | 
 50 |         let lhs_expr = session.parse_prefix(Tok);
 51 | 
 52 |         // MUSTTAIL
 53 |         return IntegralParselet::parse1(
 54 |             self, session, tok_in, trivia1, lhs_expr,
 55 |         );
 56 |     }
 57 | }
 58 | 
 59 | impl IntegralParselet {
 60 |     fn parse1<'i, B: ParseBuilder<'i> + 'i>(
 61 |         &self,
 62 |         session: &mut ParserSession<'i, B>,
 63 |         prefix_op_token: B::SyntaxTokenNode,
 64 |         trivia1: B::TriviaHandle,
 65 |         first_operand: B::Node,
 66 |     ) -> B::Node {
 67 |         panic_if_aborted!();
 68 | 
 69 | 
 70 |         let (trivia2, tok) = session.current_token();
 71 | 
 72 |         if !(tok.tok == TokenKind::LongName_DifferentialD
 73 |             || tok.tok == TokenKind::LongName_CapitalDifferentialD)
 74 |         {
 75 |             session.trivia_reset(trivia2);
 76 | 
 77 |             //
 78 |             // TID:231113/3: "\[Integral] f"
 79 |             //
 80 | 
 81 |             let node = session.reduce_prefix(
 82 |                 self.Op2,
 83 |                 prefix_op_token,
 84 |                 trivia1,
 85 |                 first_operand,
 86 |             );
 87 | 
 88 |             // MUSTTAIL
 89 |             return session.parse_climb(node);
 90 |         }
 91 | 
 92 |         let trivia2 = session.builder.push_trivia_seq(trivia2);
 93 | 
 94 |         // TODO(cleanup):
 95 |         // `tok` here is a known prefix operator.
 96 |         // Statically check somehow that `second_operand` is a prefix
 97 |         // parselet, because we know it is LongName_{Capital}DifferentialD
 98 | 
 99 |         // MUSTTAIL
100 |         let second_operand = session.parse_prefix(tok);
101 | 
102 | 
103 |         // \[Integral] f \[DifferentialD] x
104 | 
105 |         let node = session.reduce_prefix_binary(
106 |             self.Op1,
107 |             prefix_op_token,
108 |             trivia1,
109 |             first_operand,
110 |             trivia2,
111 |             second_operand,
112 |         );
113 | 
114 |         return session.parse_climb(node);
115 |     }
116 | }
117 | 
118 | impl<'i, B: ParseBuilder<'i> + 'i> InfixParselet<'i, B>
119 |     for InfixDifferentialDParselet
120 | {
121 |     fn parse_infix(
122 |         &self,
123 |         _session: &mut ParserSession<'i, B>,
124 |         _node: B::Node,
125 |         _trivia1: B::TriviaHandle,
126 |         _token: TokenRef,
127 |     ) -> B::Node {
128 |         panic!("illegal call to InfixDifferentialDParselet::parse_infix()")
129 |     }
130 | 
131 |     fn getPrecedence(
132 |         &self,
133 |         session: &ParserSession<'i, B>,
134 |     ) -> Option<Precedence> {
135 |         if session.top_precedence() == Precedence::CLASS_INTEGRATIONOPERATORS {
136 |             //
137 |             // Inside \[Integral], so \[DifferentialD] is treated specially
138 |             //
139 | 
140 |             return None;
141 |         }
142 | 
143 |         return Some(Precedence::FAKE_IMPLICITTIMES);
144 |     }
145 | 
146 |     fn process_implicit_times(
147 |         &self,
148 |         session: &mut ParserSession<'i, B>,
149 |         tok_in: TokenRef<'i>,
150 |     ) -> TokenRef<'i> {
151 |         if session.top_precedence() == Precedence::CLASS_INTEGRATIONOPERATORS {
152 |             //
153 |             // Inside \[Integral], so \[DifferentialD] is treated specially
154 |             //
155 | 
156 |             return tok_in;
157 |         }
158 | 
159 |         return Token::at_start(TokenKind::Fake_ImplicitTimes, tok_in);
160 |     }
161 | }
162 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/long_names.rs:
--------------------------------------------------------------------------------
  1 | //! Collection of utility functions for codepoints and long names
  2 | 
  3 | use crate::{
  4 |     generated::long_names_registration::{
  5 |         ASCII_REPLACEMENTS_MAP, CODEPOINT_TO_LONGNAME_MAP,
  6 |         LONGNAME_TO_CODEPOINT_MAP, MB_NEWLINE_CODE_POINTS,
  7 |         MB_NOT_STRAGE_LETTERLIKE_CODE_POINTS, MB_PUNCTUATION_CODE_POINTS,
  8 |         MB_UNINTERPRETABLE_CODE_POINTS, MB_WHITESPACE_CODE_POINTS, RAW_SET,
  9 |     },
 10 |     read::code_point::CodePoint,
 11 |     utils,
 12 | };
 13 | 
 14 | pub(crate) fn codepoint_has_longname(point: char) -> bool {
 15 |     codepoint_to_longname(CodePoint::Char(point)).is_some()
 16 | }
 17 | 
 18 | pub(crate) fn codepoint_to_longname(point: CodePoint) -> Option<&'static str> {
 19 |     // NOTE: This assertion currently spuriously fails because the
 20 |     //       StringMeta_DoubleQuote and StringMeta_Backslash codepoints are fake
 21 |     //       codepoints with negative values.
 22 |     /*
 23 |     debug_assert!(utils::is_sorted_by(
 24 |         &CODEPOINT_TO_LONGNAME_MAP,
 25 |         |(point, _): &(CodePoint, &str)| *point
 26 |     ));
 27 |     */
 28 | 
 29 |     let index: usize = CODEPOINT_TO_LONGNAME_MAP
 30 |         .binary_search_by(|(cp, _)| cp.cmp(&point))
 31 |         .ok()?;
 32 | 
 33 |     let (_, longname) = CODEPOINT_TO_LONGNAME_MAP[index];
 34 | 
 35 |     Some(longname)
 36 | }
 37 | 
 38 | pub(crate) fn longname_to_codepoint(longname: &str) -> Option<CodePoint> {
 39 |     debug_assert!(utils::is_sorted_by(
 40 |         &LONGNAME_TO_CODEPOINT_MAP,
 41 |         |(str, _): &(&str, CodePoint)| *str
 42 |     ));
 43 | 
 44 |     let index: usize = LONGNAME_TO_CODEPOINT_MAP
 45 |         .binary_search_by(|&(str, _)| str.cmp(longname))
 46 |         .ok()?;
 47 | 
 48 |     let (_, point) = LONGNAME_TO_CODEPOINT_MAP[index];
 49 | 
 50 |     Some(point)
 51 | }
 52 | 
 53 | /// Is this \[Raw] something?
 54 | pub fn isRaw(long_name_str: &str) -> bool {
 55 |     debug_assert!(utils::is_sorted(&RAW_SET));
 56 |     return RAW_SET.binary_search(&long_name_str).is_ok();
 57 | }
 58 | 
 59 | pub fn isMBNotStrangeLetterlike(point: CodePoint) -> bool {
 60 |     // TODO(cleanup): Change param type?
 61 |     let Some(char) = point.as_char() else {
 62 |         return false;
 63 |     };
 64 | 
 65 |     debug_assert!(utils::is_sorted(&MB_NOT_STRAGE_LETTERLIKE_CODE_POINTS));
 66 |     return MB_NOT_STRAGE_LETTERLIKE_CODE_POINTS
 67 |         .binary_search(&char)
 68 |         .is_ok();
 69 | }
 70 | 
 71 | pub fn asciiReplacements(point: CodePoint) -> Vec<String> {
 72 |     // TODO(cleanup): Change param type?
 73 |     let Some(char) = point.as_char() else {
 74 |         return Vec::new();
 75 |     };
 76 | 
 77 |     debug_assert!(utils::is_sorted(ASCII_REPLACEMENTS_MAP));
 78 | 
 79 |     let Some(index): Option<usize> = ASCII_REPLACEMENTS_MAP
 80 |         .binary_search_by(|(cp, _)| cp.cmp(&char))
 81 |         .ok()
 82 |     else {
 83 |         return Vec::new();
 84 |     };
 85 | 
 86 |     let (_, replacements) = ASCII_REPLACEMENTS_MAP[index];
 87 | 
 88 |     replacements
 89 |         .into_iter()
 90 |         .map(|&s: &&str| s.to_owned())
 91 |         .collect()
 92 | }
 93 | 
 94 | pub fn replacementGraphical(replacement: String) -> String {
 95 |     if replacement == " " {
 96 |         //
 97 |         // \[SpaceIndicator]
 98 |         //
 99 | 
100 |         // this was:
101 |         // return "\u2423";
102 |         //
103 |         // But MSVC gave:
104 |         // warning C4566: character represented by universal-character-name '\u2423' cannot be represented in the current code page (1252)
105 |         //
106 | 
107 |         //
108 |         // UTF-8 bytes for U+2423
109 |         //
110 |         return String::from("\u{2423}");
111 |     }
112 | 
113 |     if replacement == "\n" {
114 |         return String::from("\\n");
115 |     }
116 | 
117 |     return replacement;
118 | }
119 | 
120 | pub fn isMBPunctuation(point: CodePoint) -> bool {
121 |     // TODO(cleanup): Change param type?
122 |     let Some(char) = point.as_char() else {
123 |         return false;
124 |     };
125 | 
126 |     debug_assert!(utils::is_sorted(&MB_PUNCTUATION_CODE_POINTS));
127 |     return MB_PUNCTUATION_CODE_POINTS.binary_search(&char).is_ok();
128 | }
129 | 
130 | pub fn isMBWhitespace(point: CodePoint) -> bool {
131 |     // TODO(cleanup): Change param type?
132 |     let Some(char) = point.as_char() else {
133 |         return false;
134 |     };
135 | 
136 |     debug_assert!(utils::is_sorted(&MB_WHITESPACE_CODE_POINTS));
137 |     return MB_WHITESPACE_CODE_POINTS.binary_search(&char).is_ok();
138 | }
139 | 
140 | pub fn isMBNewline(point: CodePoint) -> bool {
141 |     debug_assert!(utils::is_sorted(&MB_NEWLINE_CODE_POINTS));
142 |     return MB_NEWLINE_CODE_POINTS.binary_search(&point).is_ok();
143 | }
144 | 
145 | pub fn isMBUninterpretable(point: CodePoint) -> bool {
146 |     // TODO(cleanup): Change param type?
147 |     let Some(char) = point.as_char() else {
148 |         return false;
149 |     };
150 | 
151 |     debug_assert!(utils::is_sorted(&MB_UNINTERPRETABLE_CODE_POINTS));
152 |     return MB_UNINTERPRETABLE_CODE_POINTS.binary_search(&char).is_ok();
153 | }
154 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # CodeParser
  2 | 
  3 | CodeParser is a package for parsing Wolfram Language source code as abstract syntax trees (ASTs) or concrete syntax trees (CSTs).
  4 | CodeParser is useful for inspecting code, formatting code, and instrumenting code (for e.g., coverage reporting or profiling), and much more!
  5 | 
  6 | CodeParser has many key features:
  7 | * Understands practically entire Wolfram Language syntax.
  8 | * Fast native library implementation.
  9 | * Tested with combination of suite of hand-written tests and fuzz testing.
 10 | * Gracious error handling and recovery
 11 | 
 12 | 
 13 | ```
 14 | Needs["CodeParser`"]
 15 | 
 16 | CodeParse["1+1"]
 17 | ```
 18 | ```
 19 | Out[2]= ContainerNode[String, {CallNode[LeafNode[Symbol, "Plus", <||>], {LeafNode[Integer, "1", <|Source -> {{1, 1}, {1, 2}}|>], LeafNode[Integer, "1", <|Source -> {{1, 3}, {1, 4}}|>]}, <|Source -> {{1, 1}, {1, 4}}|>]}, <||>]
 20 | ```
 21 | 
 22 | ["CodeParser and CodeInspector" on community.wolfram.com](https://community.wolfram.com/groups/-/m/t/1931315)
 23 | 
 24 | [Parsing the Wolfram Language from WTC 2019: Watch Video (youtube)](https://www.youtube.com/watch?v=rOa5IntICFA)
 25 | 
 26 | [Parsing the Wolfram Language from WTC 2019: Watch Video (wolfram.com)](https://www.wolfram.com/broadcast/video.php?v=2908)
 27 | 
 28 | [Parsing the Wolfram Language from WTC 2019: Download Presentation](https://files.wolframcdn.com/pub/www.wolfram.com/technology-conference/2019/Thursday/2019BrentonBostickParsingTheWL.nb)
 29 | 
 30 | 
 31 | ## Setup
 32 | 
 33 | CodeParser is included in Mathematica 12.2 and above.
 34 | 
 35 | For older versions, install CodeParser paclet from the public paclet server:
 36 | ```
 37 | PacletInstall["CodeParser"]
 38 | ```
 39 | 
 40 | [Build and install the CodeParser paclet locally](HowToBuild.md)
 41 | 
 42 | 
 43 | ## Using CodeParser
 44 | 
 45 | After CodeParser is installed, it can be used.
 46 | 
 47 | ```
 48 | Needs["CodeParser`"]
 49 | 
 50 | CodeParse["1+1"]
 51 | ```
 52 | ```
 53 | Out[2]= ContainerNode[String, {CallNode[LeafNode[Symbol, "Plus", <||>], {LeafNode[Integer, "1", <|Source -> {{1, 1}, {1, 2}}|>], LeafNode[Integer, "1", <|Source -> {{1, 3}, {1, 4}}|>]}, <|Source -> {{1, 1}, {1, 4}}|>]}, <||>]
 54 | ```
 55 | 
 56 | The input to `CodeParse` may be a string, a `File`, or a list of bytes.
 57 | 
 58 | 
 59 | ### Command-line tool (Optional)
 60 | 
 61 | An optional `codeparser` command-line tool is also built and can be used.
 62 | 
 63 | ```
 64 | cmake -DBUILD_EXE=ON ..
 65 | cmake --build . --target codeparser-exe
 66 | 
 67 | $cpp/src/exe/codeparser
 68 | >>> 1+1
 69 | InfixNode[Plus, {LeafNode[Integer, "1", <|Source->{{1, 2}, {1, 2}}|>], LeafNode[Integer, 1, <|Source->{{1, 3}, {1, 4}}|>]}, <|Source->{{1, 1}, {1, 4}}|>]
 70 | 
 71 | >>>
 72 | ```
 73 | 
 74 | 
 75 | ## Troubleshooting
 76 | 
 77 | Make sure that the CodeParser can be found on your system:
 78 | ```
 79 | Needs["CodeParser`"]
 80 | ```
 81 | 
 82 | and try a basic example:
 83 | ```
 84 | CodeParse["1+1"]
 85 | ```
 86 | 
 87 | You may get `LibraryFunction` messages:
 88 | ```
 89 | In[1]:= Needs["CodeParser`"]
 90 | 
 91 | In[2]:= CodeParse["1+1"]
 92 | 
 93 | During evaluation of In[2]:= LibraryFunction::version: The version number 7 of the library is not consistent with the current or any previous WolframLibraryVersion.
 94 | 
 95 | During evaluation of In[2]:= LibraryFunction::initerr: A nonzero error code 7 was returned during the initialization of the library /Users/user/Library/Mathematica/Paclets/Repository/CodeParser-1.6/LibraryResources/MacOSX-x86-64/CodeParser.dylib.
 96 | 
 97 | During evaluation of In[2]:= LibraryFunction::libload: The function ConcreteParseBytes_Listable_LibraryLink was not loaded from the file /Users/user/Library/Mathematica/Paclets/Repository/CodeParser-1.6/LibraryResources/MacOSX-x86-64/CodeParser.dylib.
 98 | 
 99 | Out[2]= $Failed
100 | ```
101 | 
102 | This means that CodeParser was built with a newer version of Wolfram System than your system supports.
103 | 
104 | To fix this, build CodeParser from source with the version of Wolfram System that you will use.
105 | 
106 | ## Benchmarks
107 | 
108 | > Some of the benchmarks test large data files. Those files are tracked in this
109 | > repository to ensure that benchmarks are always run against identical input.
110 | > [Git LFS](https://git-lfs.github.com/) is used to ensure that a basic checkout
111 | > of this repository remains small, which is important in CI/CD builds.
112 | 
113 | To run the benchmarks, first ensure that the large benchmark files have been
114 | checked out locally using:
115 | 
116 | ```shell
117 | $ git lfs pull --exclude="" --include="*"
118 | ```
119 | 
120 | This will override the default settings in [`.lfsconfig`](./.lfsconfig).
121 | 
122 | Then, to begin running the benchmarks, execute:
123 | 
124 | ```shell
125 | $ cargo bench
126 | ```
127 | 
128 | ## File Overview
129 | 
130 | * [Tests/files/large/](./Tests/files/large/) contains files
131 |   managed by [`Git LFS`](https://git-lfs.github.com/). The files in this
132 |   directory are used by the benchmarks. These files should never be modified, to
133 |   ensure that benchmark comparisions between different revisions of this
134 |   repository can be meaningfully compared.


--------------------------------------------------------------------------------
/Tests/AbstractSyntaxErrorNodes.mt:
--------------------------------------------------------------------------------
  1 | Print["\n===== Start AbstractSyntaxErrorNodes.mt =====\n"]
  2 | 
  3 | Needs["CodeParser`"]
  4 | 
  5 | 
  6 | (*
  7 | OpenSquare:
  8 | *)
  9 | 
 10 | TestMatch[
 11 | 	CodeParse[" [x] "]
 12 | 	,
 13 | 	ContainerNode[String, {AbstractSyntaxErrorNode[AbstractSyntaxError`OpenSquare, _, _]}, _]
 14 | 	,
 15 | 	TestID->"AbstractSyntaxErrorNodes-20190520-U4J1C1"
 16 | ]
 17 | 
 18 | 
 19 | TestMatch[
 20 | 	CodeParse[" ::[x] "]
 21 | 	,
 22 | 	ContainerNode[String, {
 23 | 		AbstractSyntaxErrorNode[AbstractSyntaxError`ColonColonOpenSquare, {
 24 | 			LeafNode[Symbol, "x", <|Source -> {{1, 5}, {1, 6}}|>]}, <|Source -> {{1, 2}, {1, 7}}|>]}, <|Source -> {{1, 1}, {1, 8}}|>]
 25 | 	,
 26 | 	TestID->"AbstractSyntaxErrorNodes-20220917-G3L5M1"
 27 | ]
 28 | 
 29 | TestMatch[
 30 | 	CodeParse[" \\[LeftDoubleBracket]x\\[RightDoubleBracket] "]
 31 | 	,
 32 | 	ContainerNode[String, {
 33 | 		AbstractSyntaxErrorNode[AbstractSyntaxError`LeftDoubleBracket, {
 34 | 			LeafNode[Symbol, "x", <|Source -> {{1, 22}, {1, 23}}|>]}, <|Source -> {{1, 2}, {1, 44}}|>]}, <|Source -> {{1, 1}, {1, 45}}|>]
 35 | 	,
 36 | 	TestID->"AbstractSyntaxErrorNodes-20220917-C4T9X0"
 37 | ]
 38 | 
 39 | (*
 40 | OpenParen:
 41 | *)
 42 | 
 43 | TestMatch[
 44 | 	CodeParse[" (1,2,3) "]
 45 | 	,
 46 | 	ContainerNode[String, {AbstractSyntaxErrorNode[AbstractSyntaxError`OpenParen, _, _]}, _]
 47 | 	,
 48 | 	TestID->"AbstractSyntaxErrorNodes-20190520-E0X9G7"
 49 | ]
 50 | 
 51 | 
 52 | 
 53 | (*
 54 | GroupMissingCloser:
 55 | *)
 56 | 
 57 | TestMatch[
 58 | 	CodeParse["{"]
 59 | 	,
 60 | 	ContainerNode[String, {GroupMissingCloserNode[List, _, _]}, _]
 61 | 	,
 62 | 	TestID->"AbstractSyntaxErrorNodes-20190520-M0B3Z5"
 63 | ]
 64 | 
 65 | 
 66 | TestMatch[
 67 | 	CodeParse["<|"]
 68 | 	,
 69 | 	ContainerNode[String, {GroupMissingCloserNode[Association, _, _]}, _]
 70 | 	,
 71 | 	TestID->"AbstractSyntaxErrorNodes-20190520-U0L5P6"
 72 | ]
 73 | 
 74 | TestMatch[
 75 | 	CodeParse["\[LeftAngleBracket]"]
 76 | 	,
 77 | 	ContainerNode[String, {GroupMissingCloserNode[AngleBracket, _, _]}, _]
 78 | 	,
 79 | 	TestID->"AbstractSyntaxErrorNodes-20190520-X7G1G5"
 80 | ]
 81 | 
 82 | 
 83 | TestMatch[
 84 | 	CodeParse["\[LeftCeiling]"]
 85 | 	,
 86 | 	ContainerNode[String, {GroupMissingCloserNode[Ceiling, _, _]}, _]
 87 | 	,
 88 | 	TestID->"AbstractSyntaxErrorNodes-20190520-Q4A4B9"
 89 | ]
 90 | 
 91 | 
 92 | TestMatch[
 93 | 	CodeParse["\[LeftFloor]"]
 94 | 	,
 95 | 	ContainerNode[String, {GroupMissingCloserNode[Floor, _, _]}, _]
 96 | 	,
 97 | 	TestID->"AbstractSyntaxErrorNodes-20190520-C4T4D9"
 98 | ]
 99 | 
100 | TestMatch[
101 | 	CodeParse["\[LeftDoubleBracket]"]
102 | 	,
103 | 	ContainerNode[String, {GroupMissingCloserNode[GroupDoubleBracket, _, _]}, _]
104 | 	,
105 | 	TestID->"AbstractSyntaxErrorNodes-20190520-S1C3U4"
106 | ]
107 | 
108 | TestMatch[
109 | 	CodeParse["\[LeftBracketingBar]"]
110 | 	,
111 | 	ContainerNode[String, {GroupMissingCloserNode[BracketingBar, _, _]}, _]
112 | 	,
113 | 	TestID->"AbstractSyntaxErrorNodes-20190520-H0B3W9"
114 | ]
115 | 
116 | TestMatch[
117 | 	CodeParse["\[LeftDoubleBracketingBar]"]
118 | 	,
119 | 	ContainerNode[String, {GroupMissingCloserNode[DoubleBracketingBar, _, _]}, _]
120 | 	,
121 | 	TestID->"AbstractSyntaxErrorNodes-20190520-R4A5I7"
122 | ]
123 | 
124 | TestMatch[
125 | 	CodeParse["("]
126 | 	,
127 | 	ContainerNode[String, {GroupMissingCloserNode[GroupParen, _, _]}, _]
128 | 	,
129 | 	TestID->"AbstractSyntaxErrorNodes-20190520-K6C7J1"
130 | ]
131 | 
132 | TestMatch[
133 | 	CodeParse["["]
134 | 	,
135 | 	ContainerNode[String, {GroupMissingCloserNode[GroupSquare, _, _]}, _]
136 | 	,
137 | 	TestID->"AbstractSyntaxErrorNodes-20190520-Y0H1P1"
138 | ]
139 | 
140 | 
141 | TestMatch[
142 | 	CodeParse["\\("]
143 | 	,
144 | 	ContainerNode[String, {ErrorNode[Token`Error`UnterminatedLinearSyntaxBlob, _, _]}, _]
145 | 	,
146 | 	TestID->"AbstractSyntaxErrorNodes-20190520-B2V0A0"
147 | ]
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | (*
157 | LinearSyntaxBang:
158 | *)
159 | 
160 | 
161 | Test[
162 | 	CodeParse["\\!123"]
163 | 	,
164 | 	ContainerNode[String, {
165 | 		AbstractSyntaxErrorNode[AbstractSyntaxError`LinearSyntaxBang, {
166 | 			LeafNode[Integer, "123", <|Source -> {{1, 3}, {1, 6}}|>]}, <|Source -> {{1, 1}, {1, 6}}|>]}, <|Source -> {{1, 1}, {1, 6}}|>]
167 | 	,
168 | 	TestID->"AbstractSyntaxErrorNodes-20190520-N8K8K4"
169 | ]
170 | 
171 | 
172 | (*
173 | NonAssociative:
174 | 
175 | TODO: is this a quirk?
176 | 
177 | *)
178 | 
179 | Test[
180 | 	CodeParse["a ? b ? c"]
181 | 	,
182 | 	ContainerNode[String, {
183 | 		AbstractSyntaxErrorNode[AbstractSyntaxError`NonAssociativePatternTest, {
184 | 			CallNode[LeafNode[Symbol, "PatternTest", <||>], {
185 | 				LeafNode[Symbol, "a", <|Source -> {{1, 1}, {1, 2}}|>],
186 | 				LeafNode[Symbol, "b", <|Source -> {{1, 5}, {1, 6}}|>]}, <|Source -> {{1, 1}, {1, 6}}|>],
187 | 			LeafNode[Symbol, "c", <|Source -> {{1, 9}, {1, 10}}|>]}, <|Source -> {{1, 1}, {1, 10}}|>] }, <|Source -> {{1, 1}, {1, 10}}|>]
188 | 	,
189 | 	TestID->"AbstractSyntaxErrorNodes-20190521-A6K4H1"
190 | ]
191 | 
192 | 
193 | (*
194 | ExpectedSymbol:
195 | *)
196 | 
197 | Test[
198 | 	CodeParse["1:2"]
199 | 	,
200 | 	ContainerNode[String, {
201 | 		SyntaxErrorNode[SyntaxError`ExpectedSymbol, {
202 | 			LeafNode[Integer, "1", <|Source -> {{1, 1}, {1, 2}}|>],
203 | 			LeafNode[Integer, "2", <|Source -> {{1, 3}, {1, 4}}|>]}, <|Source -> {{1, 1}, {1, 4}}|>]}, <|Source -> {{1, 1}, {1, 4}}|>]
204 | 	,
205 | 	TestID->"AbstractSyntaxErrorNodes-20190521-Z6D6T1"
206 | ]
207 | 
208 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/iter.rs:
--------------------------------------------------------------------------------
  1 | //! Iterators over source characters, Wolfram characters, and tokens.
  2 | //!
  3 | //! ## Source Characters
  4 | //!
  5 | //! Iterate over [`SourceCharacter`]s using [`source_chars()`]:
  6 | //!
  7 | //! ```
  8 | //! use wolfram_parser::{iter::source_chars, source::SourceCharacter};
  9 | //!
 10 | //! let mut chars = source_chars(r#"2*\[Pi]"#, &Default::default());
 11 | //!
 12 | //! assert_eq!(chars.next(), Some(SourceCharacter::Char('2')));
 13 | //! assert_eq!(chars.next(), Some(SourceCharacter::Char('*')));
 14 | //! assert_eq!(chars.next(), Some(SourceCharacter::Char('\\')));
 15 | //! assert_eq!(chars.next(), Some(SourceCharacter::Char('[')));
 16 | //! assert_eq!(chars.next(), Some(SourceCharacter::Char('P')));
 17 | //! assert_eq!(chars.next(), Some(SourceCharacter::Char('i')));
 18 | //! assert_eq!(chars.next(), Some(SourceCharacter::Char(']')));
 19 | //! assert_eq!(chars.next(), None);
 20 | //! ```
 21 | //!
 22 | //! ## Wolfram Characters
 23 | //!
 24 | //! Iterate over [`WLCharacter`]s using [`wolfram_chars()`]:
 25 | //!
 26 | //! ```
 27 | //! use wolfram_parser::{iter::wolfram_chars, read::{WLCharacter, Escape}};
 28 | //!
 29 | //! let mut chars = wolfram_chars(r#"2*\[Pi]"#, &Default::default());
 30 | //!
 31 | //! assert_eq!(chars.next(), Some(WLCharacter::new('2')));
 32 | //! assert_eq!(chars.next(), Some(WLCharacter::new('*')));
 33 | //! assert_eq!(chars.next(), Some(WLCharacter::escaped('π', Escape::LongName)));
 34 | //! assert_eq!(chars.next(), None);
 35 | //! ```
 36 | //!
 37 | //! ## Tokens
 38 | //!
 39 | //! Iterate over [`Token`]s using [`tokens()`]:
 40 | //!
 41 | //! ```
 42 | //! use wolfram_parser::{
 43 | //!     iter::tokens,
 44 | //!     tokenize::{Token, TokenKind},
 45 | //!     macros::src,
 46 | //! };
 47 | //!
 48 | //! let mut chars = tokens(r#"2*\[Pi]"#, &Default::default());
 49 | //!
 50 | //! assert_eq!(chars.next(), Some(Token::new(TokenKind::Integer, "2", src!(1:1-2))));
 51 | //! assert_eq!(chars.next(), Some(Token::new(TokenKind::Star, "*", src!(1:2-3))));
 52 | //! assert_eq!(chars.next(), Some(Token::new(TokenKind::Symbol, "\\[Pi]", src!(1:3-8))));
 53 | //! assert_eq!(chars.next(), None);
 54 | //! ```
 55 | 
 56 | use crate::{
 57 |     read::{code_point::CodePoint, Reader, WLCharacter},
 58 |     source::{SourceCharacter, TOPLEVEL},
 59 |     tokenize::{Token, TokenKind, TokenStr, Tokenizer},
 60 |     ParseOptions,
 61 | };
 62 | 
 63 | //======================================
 64 | // API Functions
 65 | //======================================
 66 | 
 67 | /// Get an iterator over the [`SourceCharacter`]s in a Wolfram Language input.
 68 | pub fn source_chars<'i>(
 69 |     input: &'i str,
 70 |     opts: &ParseOptions,
 71 | ) -> SourceChars<'i> {
 72 |     SourceChars {
 73 |         reader: Reader::new(input.as_bytes(), opts),
 74 |     }
 75 | }
 76 | 
 77 | /// Get an iterator over the [`WLCharacter`]s in a Wolfram Language input.
 78 | pub fn wolfram_chars<'i>(
 79 |     input: &'i str,
 80 |     opts: &ParseOptions,
 81 | ) -> WolframChars<'i> {
 82 |     WolframChars {
 83 |         reader: Reader::new(input.as_bytes(), opts),
 84 |     }
 85 | }
 86 | 
 87 | /// Get an iterator over the [`Token`]s in a Wolfram Language input.
 88 | pub fn tokens<'i>(input: &'i str, opts: &ParseOptions) -> Tokens<'i> {
 89 |     Tokens {
 90 |         tokenizer: Tokenizer::new(input.as_bytes(), opts),
 91 |     }
 92 | }
 93 | 
 94 | //======================================
 95 | // Types
 96 | //======================================
 97 | 
 98 | /// Iterator over [`SourceCharacter`]s in a Wolfram Language input.
 99 | ///
100 | /// Returned by [`source_chars()`].
101 | pub struct SourceChars<'i> {
102 |     reader: Reader<'i>,
103 | }
104 | 
105 | 
106 | /// Iterator over [`WLCharacter`]s in a Wolfram Language input.
107 | ///
108 | /// Returned by [`wolfram_chars()`].
109 | pub struct WolframChars<'i> {
110 |     reader: Reader<'i>,
111 | }
112 | 
113 | /// Iterator over [`Token`]s in a Wolfram Language input.
114 | ///
115 | /// Returned by [`tokens()`].
116 | pub struct Tokens<'i> {
117 |     tokenizer: Tokenizer<'i>,
118 | }
119 | 
120 | //=======================================
121 | // Iterator Impls
122 | //=======================================
123 | 
124 | impl<'i> Iterator for SourceChars<'i> {
125 |     type Item = SourceCharacter;
126 | 
127 |     fn next(&mut self) -> Option<Self::Item> {
128 |         let SourceChars { reader } = self;
129 | 
130 |         let char = reader.next_source_char(TOPLEVEL);
131 | 
132 |         if char != CodePoint::EndOfFile {
133 |             Some(char)
134 |         } else {
135 |             None
136 |         }
137 |     }
138 | }
139 | 
140 | impl<'i> Iterator for WolframChars<'i> {
141 |     type Item = WLCharacter;
142 | 
143 |     fn next(&mut self) -> Option<Self::Item> {
144 |         let WolframChars { reader } = self;
145 | 
146 |         let char = reader.next_wolfram_char(TOPLEVEL);
147 | 
148 |         if char.point != CodePoint::EndOfFile {
149 |             Some(char)
150 |         } else {
151 |             None
152 |         }
153 |     }
154 | }
155 | 
156 | impl<'i> Iterator for Tokens<'i> {
157 |     type Item = Token<TokenStr<'i>>;
158 | 
159 |     fn next(&mut self) -> Option<Self::Item> {
160 |         let Tokens { tokenizer } = self;
161 | 
162 |         let token = tokenizer.next_token();
163 | 
164 |         if token.tok != TokenKind::EndOfFile {
165 |             Some(token)
166 |         } else {
167 |             None
168 |         }
169 |     }
170 | }
171 | 


--------------------------------------------------------------------------------
/Tests/CallMissingCloserNodes.mt:
--------------------------------------------------------------------------------
  1 | Print["\n===== Start CallMissingCloserNodes.mt =====\n"]
  2 | 
  3 | (* Wolfram Language Test file *)
  4 | 
  5 | Needs["CodeParser`"]
  6 | 
  7 | 
  8 | Test[
  9 | 	CodeParse["f["]
 10 | 	,
 11 | 	ContainerNode[String, {
 12 | 		CallMissingCloserNode[
 13 | 			LeafNode[Symbol, "f", <|Source -> {{1, 1}, {1, 2}}|>], {}, <|Source -> {{1, 1}, {1, 3}}|>]}, <|Source -> {{1, 1}, {1, 3}}|>]
 14 | 	,
 15 | 	TestID->"CallMissingCloserNodes-20190701-H7G3R7"
 16 | ]
 17 | 
 18 | Test[
 19 | 	CodeParse["f[1"]
 20 | 	,
 21 | 	ContainerNode[String, {
 22 | 		CallMissingCloserNode[
 23 | 			LeafNode[Symbol, "f", <|Source -> {{1, 1}, {1, 2}}|>], {
 24 | 				LeafNode[Integer, "1", <|Source -> {{1, 3}, {1, 4}}|>]}, <|Source -> {{1, 1}, {1, 4}}|>]}, <|Source -> {{1, 1}, {1, 4}}|>]
 25 | 	,
 26 | 	TestID->"CallMissingCloserNodes-20220917-B7K4Z8"
 27 | ]
 28 | 
 29 | Test[
 30 | 	CodeParse["f::["]
 31 | 	,
 32 | 	ContainerNode[String, {CallMissingCloserNode[CallNode[LeafNode[Symbol, "TypeSpecifier", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 1}, {1, 2}}|>]}, <||>], {}, <|Source -> {{1, 1}, {1, 5}}|>]}, <|Source -> {{1, 1}, {1, 5}}|>]
 33 | 	,
 34 | 	TestID->"CallMissingCloserNodes-20220917-W2D0J4"
 35 | ]
 36 | 
 37 | Test[
 38 | 	CodeParse["f::[1"]
 39 | 	,
 40 | 	ContainerNode[String, {CallMissingCloserNode[CallNode[LeafNode[Symbol, "TypeSpecifier", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 1}, {1, 2}}|>]}, <||>], {LeafNode[Integer, "1", <|Source -> {{1, 5}, {1, 6}}|>]}, <|Source -> {{1, 1}, {1, 6}}|>]}, <|Source -> {{1, 1}, {1, 6}}|>]
 41 | 	,
 42 | 	TestID->"CallMissingCloserNodes-20220917-L2H2N5"
 43 | ]
 44 | 
 45 | Test[
 46 | 	CodeParse["f\\[LeftDoubleBracket]"]
 47 | 	,
 48 | 	ContainerNode[String, {CallMissingCloserNode[LeafNode[Symbol, "Part", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 1}, {1, 2}}|>]}, <|Source -> {{1, 1}, {1, 22}}|>]}, <|Source -> {{1, 1}, {1, 22}}|>]
 49 | 	,
 50 | 	TestID->"CallMissingCloserNodes-20220917-C2J4K7"
 51 | ]
 52 | 
 53 | Test[
 54 | 	CodeParse["f\\[LeftDoubleBracket]1"]
 55 | 	,
 56 | 	ContainerNode[String, {CallMissingCloserNode[LeafNode[Symbol, "Part", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 1}, {1, 2}}|>], LeafNode[Integer, "1", <|Source -> {{1, 22}, {1, 23}}|>]}, <|Source -> {{1, 1}, {1, 23}}|>]}, <|Source -> {{1, 1}, {1, 23}}|>]
 57 | 	,
 58 | 	TestID->"CallMissingCloserNodes-20220917-J0V0J3"
 59 | ]
 60 | 
 61 | Test[
 62 | 	CodeParse["( f[ )"]
 63 | 	,
 64 | 	ContainerNode[String, {CallMissingCloserNode[LeafNode[Symbol, "f", <|Source -> {{1, 3}, {1, 4}}|>], {}, <|Source -> {{1, 3}, {1, 5}}|>]}, <|Source -> {{1, 1}, {1, 7}}|>]
 65 | 	,
 66 | 	TestID->"CallMissingCloserNodes-20220917-L0B1H1"
 67 | ]
 68 | 
 69 | Test[
 70 | 	CodeParse["( f[1 )"]
 71 | 	,
 72 | 	ContainerNode[String, {CallMissingCloserNode[LeafNode[Symbol, "f", <|Source -> {{1, 3}, {1, 4}}|>], {LeafNode[Integer, "1", <|Source -> {{1, 5}, {1, 6}}|>]}, <|Source -> {{1, 3}, {1, 6}}|>]}, <|Source -> {{1, 1}, {1, 8}}|>]
 73 | 	,
 74 | 	TestID->"CallMissingCloserNodes-20220917-J2C1D4"
 75 | ]
 76 | 
 77 | Test[
 78 | 	CodeParse["( f::[ )"]
 79 | 	,
 80 | 	ContainerNode[String, {CallMissingCloserNode[CallNode[LeafNode[Symbol, "TypeSpecifier", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 3}, {1, 4}}|>]}, <||>], {}, <|Source -> {{1, 3}, {1, 7}}|>]}, <|Source -> {{1, 1}, {1, 9}}|>]
 81 | 	,
 82 | 	TestID->"CallMissingCloserNodes-20220917-S2K7R7"
 83 | ]
 84 | 
 85 | Test[
 86 | 	CodeParse["( f::[1 )"]
 87 | 	,
 88 | 	ContainerNode[String, {CallMissingCloserNode[CallNode[LeafNode[Symbol, "TypeSpecifier", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 3}, {1, 4}}|>]}, <||>], {LeafNode[Integer, "1", <|Source -> {{1, 7}, {1, 8}}|>]}, <|Source -> {{1, 3}, {1, 8}}|>]}, <|Source -> {{1, 1}, {1, 10}}|>]
 89 | 	,
 90 | 	TestID->"CallMissingCloserNodes-20220917-K9J6S4"
 91 | ]
 92 | 
 93 | Test[
 94 | 	CodeParse["( f\\[LeftDoubleBracket] )"]
 95 | 	,
 96 | 	ContainerNode[String, {CallMissingCloserNode[LeafNode[Symbol, "Part", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 3}, {1, 4}}|>]}, <|Source -> {{1, 3}, {1, 24}}|>]}, <|Source -> {{1, 1}, {1, 26}}|>]
 97 | 	,
 98 | 	TestID->"CallMissingCloserNodes-20220917-D4Q8T3"
 99 | ]
100 | 
101 | Test[
102 | 	CodeParse["( f\\[LeftDoubleBracket]1 )"]
103 | 	,
104 | 	ContainerNode[String, {CallMissingCloserNode[LeafNode[Symbol, "Part", <||>], {LeafNode[Symbol, "f", <|Source -> {{1, 3}, {1, 4}}|>], LeafNode[Integer, "1", <|Source -> {{1, 24}, {1, 25}}|>]}, <|Source -> {{1, 3}, {1, 25}}|>]}, <|Source -> {{1, 1}, {1, 27}}|>]
105 | 	,
106 | 	TestID->"CallMissingCloserNodes-20220917-V9R1M3"
107 | ]
108 | 
109 | Test[
110 | 	CodeParse["( [ )"]
111 | 	,
112 | 	ContainerNode[String, {GroupMissingCloserNode[GroupSquare, {}, <|Source -> {{1, 3}, {1, 4}}|>]}, <|Source -> {{1, 1}, {1, 6}}|>]
113 | 	,
114 | 	TestID->"CallMissingCloserNodes-20220917-B5I2K2"
115 | ]
116 | 
117 | Test[
118 | 	CodeParse["( ::[ )"]
119 | 	,
120 | 	ContainerNode[String, {GroupMissingCloserNode[GroupTypeSpecifier, {}, <|Source -> {{1, 3}, {1, 6}}|>]}, <|Source -> {{1, 1}, {1, 8}}|>]
121 | 	,
122 | 	TestID->"CallMissingCloserNodes-20220917-X9W0H8"
123 | ]
124 | 
125 | Test[
126 | 	CodeParse["( \\[LeftDoubleBracket] )"]
127 | 	,
128 | 	ContainerNode[String, {GroupMissingCloserNode[GroupDoubleBracket, {}, <|Source -> {{1, 3}, {1, 23}}|>]}, <|Source -> {{1, 1}, {1, 25}}|>]
129 | 	,
130 | 	TestID->"CallMissingCloserNodes-20220917-G3Y7X9"
131 | ]
132 | 
133 | Test[
134 | 	CodeParse["(a[b[])"]
135 | 	,
136 | 	ContainerNode[String, {
137 | 		CallMissingCloserNode[LeafNode[Symbol, "a", <|Source -> {{1, 2}, {1, 3}}|>], {
138 | 			CallNode[LeafNode[Symbol, "b", <|Source -> {{1, 4}, {1, 5}}|>], {}, <|Source -> {{1, 4}, {1, 7}}|>]}, <|Source -> {{1, 2}, {1, 7}}|>]}, <|Source -> {{1, 1}, {1, 8}}|>]
139 | 	,
140 | 	TestID->"CallMissingCloserNodes-20190803-C7O2S5"
141 | ]
142 | 
143 | Test[
144 | 	CodeParse["List[a"]
145 | 	,
146 | 	ContainerNode[String, {CallMissingCloserNode[LeafNode[Symbol, "List", <|Source -> {{1, 1}, {1, 5}}|>], {LeafNode[Symbol, "a", <|Source -> {{1, 6}, {1, 7}}|>]}, <|Source -> {{1, 1}, {1, 7}}|>]}, <|Source -> {{1, 1}, {1, 7}}|>]
147 | 	,
148 | 	TestID->"CallMissingCloserNodes-20200708-Y2V4V2"
149 | 
150 | ]
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 


--------------------------------------------------------------------------------
/crates/wolfram-parser/src/agg.rs:
--------------------------------------------------------------------------------
  1 | use crate::{cst::Cst, source::Span, tokenize::TokenString, NodeSeq};
  2 | 
  3 | pub type AggNodeSeq<I = TokenString, S = Span> = NodeSeq<Cst<I, S>>;
  4 | 
  5 | //==========================================================
  6 | // Macros
  7 | //==========================================================
  8 | 
  9 | //======================================
 10 | // LHS!
 11 | //======================================
 12 | 
 13 | // Note: Don't make this public outside of this crate until `AggCallNode` is
 14 | //       made part of Node. And updating it to use $crate for types.
 15 | macro_rules! LHS {
 16 |     (LeafNode[$($head_kind:ident)|*, _, _]) => {
 17 |         $crate::cst::Cst::Token(Token {
 18 |             tok: $(TK::$head_kind)|*,
 19 |             ..
 20 |         })
 21 |     };
 22 | 
 23 |     //==================================
 24 |     // CallNode
 25 |     //==================================
 26 | 
 27 |     (CallNode[LeafNode[$token_kind:ident, _, _], $children:ident:_, _]) => {
 28 |         AggCallNode {
 29 |             head: Node::Token(Token {
 30 |                 tok: TK::$token_kind,
 31 |                 ..
 32 |             }),
 33 |             children: $children,
 34 |         }
 35 |     };
 36 | 
 37 |     (CallNode[
 38 |         $head_name:ident:$node_head:ident[$($node_args:tt)*],
 39 |         $group_name:ident:$group_head:ident[$group_kind:ident, _],
 40 |         $data:ident:_
 41 |     ]) => {
 42 |         AggCallNode {
 43 |             head: $head_name @ LHS!($node_head[$($node_args)*]),
 44 |             body: LHS!($group_name : $group_head[$group_kind, _]),
 45 |             src: $data,
 46 |         }
 47 |     };
 48 |     (CallNode[
 49 |         $head_name:ident:_,
 50 |         $group_name:ident:$group_head:ident[$($group_kind:ident)|*, _],
 51 |         $data:ident:_
 52 |     ]) => {
 53 |         AggCallNode {
 54 |             head: $head_name,
 55 |             body: LHS!($group_name:$group_head[$($group_kind)|*, _]),
 56 |             src: $data
 57 |         }
 58 |     };
 59 |     (CallNode[
 60 |         $head_name:ident:($($sub_head_pat:ident[$($sub_head_args:tt)*])|*),
 61 |         $group_name:ident:GroupNode[$group_kind:ident, _],
 62 |         $data:ident:_
 63 |     ]) => {
 64 |         AggCallNode {
 65 |             head: $head_name @ ($(LHS!($sub_head_pat[$($sub_head_args)*]))|*),
 66 |             body: LHS!($group_name:GroupNode[$group_kind, _]),
 67 |             src: $data
 68 |         }
 69 |     };
 70 |     (CallNode[_, _, _]) => {
 71 |         Cst::Call(_)
 72 |     };
 73 | 
 74 |     //==================================
 75 |     // CompoundNode, BinaryNode, InfixNode, PrefixNode
 76 |     //==================================
 77 | 
 78 |     (CompoundNode[$($op_kind:ident)|*, _, _]) => {
 79 |         Cst::Compound(CompoundNode(OperatorNode {
 80 |             op: $(crate::parse::operators::CompoundOperator::$op_kind)|*,
 81 |             ..
 82 |         }))
 83 |     };
 84 | 
 85 |     (BinaryNode[$($op_kind:ident)|*, _, _]) => {
 86 |         Cst::Binary(BinaryNode(OperatorNode {
 87 |             op: $($crate::parse::operators::BinaryOperator::$op_kind)|*,
 88 |             ..
 89 |         }))
 90 |     };
 91 | 
 92 |     (InfixNode[$($op_kind:ident)|*, _, _]) => {
 93 |         Cst::Infix(InfixNode(OperatorNode {
 94 |             op: $($crate::parse::operators::InfixOperator::$op_kind)|*,
 95 |             ..
 96 |         }))
 97 |     };
 98 |     (PrefixNode[$($op_kind:ident)|*, _, _]) => {
 99 |         Cst::Prefix(PrefixNode(OperatorNode {
100 |             op: $($crate::parse::operators::PrefixOperator::$op_kind)|*,
101 |             ..
102 |         }))
103 |     };
104 | 
105 |     (PostfixNode[$($op_kind:ident)|*, _, _]) => {
106 |         Cst::Postfix(PostfixNode(OperatorNode {
107 |             op: $(crate::parse::operators::PostfixOperator::$op_kind)|*,
108 |             ..
109 |         }))
110 |     };
111 | 
112 |     //==================================
113 |     // GroupNode
114 |     //==================================
115 | 
116 |     (GroupNode[$($op_kind:ident)|*, $children:ident:_]) => {
117 |         $crate::cst::Cst::Group(GroupNode(OperatorNode {
118 |             op: $(GroupOperator::$op_kind)|*,
119 |             children: $children,
120 |         }))
121 |     };
122 | 
123 |     (GroupNode[$($op_kind:ident)|*, _, _]) => {
124 |         Cst::Group(GroupNode(OperatorNode {
125 |             op: $(GroupOperator::$op_kind)|*,
126 |             ..
127 |         }))
128 |     };
129 |     ($name:ident:GroupNode[$group_kind:ident, _]) => {
130 |         CallBody::Group($name @ GroupNode(OperatorNode {
131 |             op: $crate::parse::operators::CallOperator::$group_kind,
132 |             children: _,
133 |         }))
134 |     };
135 | 
136 |     (GroupNode[_, _, _]) => {
137 |         Cst::Group(GroupNode(OperatorNode {
138 |             op: _,
139 |             ..
140 |         }))
141 |     };
142 | 
143 |     //----------------------------------
144 |     // GroupMissingCloserNode
145 |     //----------------------------------
146 | 
147 |     ($name:ident:GroupMissingCloserNode[$($op_kind:ident)|*, _]) => {
148 |         $crate::cst::CallBody::GroupMissingCloser($name @ $crate::cst::GroupMissingCloserNode(OperatorNode {
149 |             op: $($crate::parse::operators::CallOperator::$op_kind)|*,
150 |             ..
151 |         }))
152 |     };
153 | 
154 |     //==================================
155 |     // BoxNode
156 |     //==================================
157 | 
158 |     (BoxNode[$box_kind:ident:_, _, _]) => {
159 |         Cst::Box(BoxNode {
160 |             kind: $box_kind,
161 |             ..
162 |         })
163 |     };
164 |     (BoxNode[$box_kind:ident:_, $children:ident:_, $data:ident:_]) => {
165 |         Cst::Box(BoxNode {
166 |             kind: $box_kind,
167 |             children: $children,
168 |             src: $data,
169 |         })
170 |     };
171 |     (BoxNode[$box_kind:ident, $children:ident:_, $data:ident:_]) => {
172 |         $crate::cst::Cst::Box(BoxNode {
173 |             kind: BoxKind::$box_kind,
174 |             children: $children,
175 |             src: $data,
176 |         })
177 |     };
178 |     (BoxNode[_, _, _]) => {
179 |         Cst::Box(_)
180 |     };
181 | }
182 | 
183 | 
184 | pub(crate) use LHS;
185 | 


--------------------------------------------------------------------------------
/scripts/re_build_CodeParser.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <project name='CodeParser' basedir='..' xmlns:if='ant:if' xmlns:unless='ant:unless'>
  3 | 
  4 | 	<property name='component' value='CodeParser' />
  5 | 	<property name='app.name' value='CodeParser' />
  6 | 	<property name='inputDir' value='${basedir}/${app.name}/Documentation' />
  7 | 
  8 | 	<property environment='env' />
  9 | 	<import file='${env.RE_ANTLIBRARY_HOME}/ant-lib.xml' />
 10 | 
 11 | 	<target name='Paclet.CodeParser.init.custom' extensionOf='Paclet.init.custom'>
 12 | 
 13 | 		<!-- Paclet builds aren't supposed to have a system_id, but these do. -->
 14 | 		<required-property name='system_id' />
 15 | 		<property name='build_type' value='${system_id}' />
 16 | 
 17 | 		<!-- Report to the build monitor, publish artifacts to images-frontend, etc. -->
 18 | 		<property name='has-buildmonitor' value='true' />
 19 | 
 20 | 		<!-- - CMake generates makefiles here -->
 21 | 		<property name='build.dir' location='${checkout_directory}/CodeParser/build' />
 22 | 
 23 | 	</target>
 24 | 
 25 | 	<target name='Paclet.CodeParser.clean' extensionOf='Paclet.clean'>
 26 | 
 27 | 		<delete dir='${build.dir}' />
 28 | 		<mkdir dir='${build.dir}' />
 29 | 
 30 | 	</target>
 31 | 
 32 | 	<!--=============================-->
 33 | 	<!-- Install Rust                -->
 34 | 	<!--=============================-->
 35 | 
 36 | 	<target name='Paclet.CodeParser.Unix.execute' extensionOf='Paclet.prebuild' unless='is.windows'>
 37 | 		<exec dir='${build.dir}' executable='sh'>
 38 | 			<env key='CARGO_HOME' value='${build.dir}/.cargo' />
 39 | 			<env key='RUSTUP_HOME' value='${build.dir}/.rust' />
 40 | 			<arg value='-c' />
 41 | 			<arg value='curl https://sh.rustup.rs -sSf | sh -s -- -y --no-modify-path' />
 42 | 		</exec>
 43 | 	</target>
 44 | 	
 45 | 	<target name='Paclet.CodeParser.Windows.execute' extensionOf='Paclet.prebuild' if='is.windows'>
 46 | 		<exec dir='${build.dir}' executable='cmd'>
 47 | 			<arg value='/c curl -O https://static.rust-lang.org/rustup/dist/x86_64-pc-windows-msvc/rustup-init.exe' />
 48 | 		</exec>
 49 | 		<exec dir='${build.dir}' executable='cmd'>
 50 | 			<env key='CARGO_HOME' value='${build.dir}\.cargo' />
 51 | 			<env key='RUSTUP_HOME' value='${build.dir}\.rust' />
 52 | 			<arg value='/c echo 1|rustup-init.exe --no-modify-path' />
 53 | 		</exec>
 54 | 
 55 | 	</target>
 56 | 
 57 | 	<!--=============================-->
 58 | 	<!-- Main CodeParser build task  -->
 59 | 	<!--=============================-->
 60 | 
 61 | 	<target name='Paclet.CodeParser.execute' extensionOf='Paclet.execute' >
 62 | 		<local name='MATHLINK_INCLUDE_DIR' />
 63 | 		<pathconvert dirsep='/' property='MATHLINK_INCLUDE_DIR'>
 64 | 			<path location='${re.build.prerequisites.mathlink_directory}/CompilerAdditions' />
 65 | 		</pathconvert>
 66 | 
 67 | 		<local name='MATHLINK_LIB_DIR' />
 68 | 		<pathconvert dirsep='/' property='MATHLINK_LIB_DIR'>
 69 | 			<path location='${re.build.prerequisites.mathlink_directory}/CompilerAdditions' />
 70 | 		</pathconvert>
 71 | 
 72 | 		<local name='WOLFRAMLIBRARY_INCLUDE_DIR' />
 73 | 		<pathconvert dirsep='/' property='WOLFRAMLIBRARY_INCLUDE_DIR'>
 74 | 			<path location='${re.build.prerequisites.runtimelibrary_directory}/${re.build.prerequisites.runtimelibrary.system_id}' />
 75 | 		</pathconvert>
 76 | 
 77 | 		<switch value='${system_id}'>
 78 | 			<case value='MacOSX-x86-64'>
 79 | 				<property name='CMAKE_OSX_DEPLOYMENT_TARGET_LINE' value='-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0' />
 80 | 			</case>
 81 | 			<case value='MacOSX-ARM64'>
 82 | 				<property name='CMAKE_OSX_DEPLOYMENT_TARGET_LINE' value='-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0' />
 83 | 			</case>
 84 | 			<default>
 85 | 				<property name='CMAKE_OSX_DEPLOYMENT_TARGET_LINE' value='' />
 86 | 			</default>
 87 | 		</switch>
 88 | 
 89 | 		<condition property='CARGO_PATH' value='${env.Path};${build.dir}/.cargo/bin' else='${env.PATH}:${build.dir}/.cargo/bin'>
 90 | 		    <equals arg1='${system_id}' arg2='Windows-x86-64'/>
 91 | 		</condition>
 92 | 		
 93 | 		<re-cmake-exec dir='${build.dir}'>
 94 | 			<arg line='-DWOLFRAMKERNEL=${mathExe}' />
 95 | 			<arg line='-DMATHEMATICA_INSTALL_DIR=${env.MATHEMATICA_DIRECTORY}' />
 96 | 			<arg line='-DMATHLINK_INCLUDE_DIR=${MATHLINK_INCLUDE_DIR}' />
 97 | 			<arg line='-DMATHLINK_LIB_DIR=${MATHLINK_LIB_DIR}' />
 98 | 			<arg line='-DWOLFRAMLIBRARY_INCLUDE_DIR=${WOLFRAMLIBRARY_INCLUDE_DIR}' />
 99 | 			<arg line='-DBUILDNUMBER=${env.BUILD_NUMBER}' />
100 | 			<arg line='${CMAKE_OSX_DEPLOYMENT_TARGET_LINE}' />
101 | 			<arg line='-DCMAKE_BUILD_TYPE=Release' />
102 | 			<arg line='-G &quot;${env.CMAKE_GENERATOR}&quot;' />
103 | 			<arg value='${checkout_directory}/CodeParser' />
104 | 		</re-cmake-exec>
105 | 
106 | 		<!--===============================================================-->
107 | 		<!-- Assemble the paclet by copying the source files, building the -->
108 | 		<!-- generated files, and compiling the CodeParser dynamic library -->
109 | 		<!--===============================================================-->
110 | 
111 | 		<re-cmake-exec dir='${build.dir}'>
112 | 			<env key='CARGO_HOME' value='${build.dir}/.cargo' />
113 | 			<env key='RUSTUP_HOME' value='${build.dir}/.rust' />
114 | 			<env key='PATH' value='${CARGO_PATH}' />
115 | 			<arg line='--build .' />
116 | 			<arg line='--target assemble-paclet' />
117 | 			<arg line='--verbose' />
118 | 			<arg line='--config Release' />
119 | 		</re-cmake-exec>
120 | 
121 | 		<!--==========================================-->
122 | 		<!-- Code sign the CodeParser dynamic library -->
123 | 		<!--==========================================-->
124 | 
125 | 		<if>
126 | 			<not><isset property='is.unix'/></not>
127 | 		<then>
128 | 			<sign dir='${build.dir}'>
129 | 				<include name='**/*.dylib' if='is.osx' />
130 | 				<include name='**/*.dll' if='is.windows' />
131 | 			</sign>
132 | 		</then>
133 | 		</if>
134 | 
135 | 		<!--==============================================-->
136 | 		<!-- Build the final CodeParser-X.Y.Z.paclet file -->
137 | 		<!--==============================================-->
138 | 
139 | 		<re-cmake-exec dir='${build.dir}'>
140 | 			<arg line='--build .' />
141 | 			<arg line='--target create-paclet-archive' />
142 | 			<arg line='--verbose' />
143 | 			<arg line='--config Release' />
144 | 		</re-cmake-exec>
145 | 
146 | 	</target>
147 | 
148 | 	<target name='Paclet.CodeParser.postbuild' extensionOf='Paclet.postbuild'>
149 | 
150 | 		<mkdir dir='${files_directory}/CodeParser' />
151 | 		<copy todir='${files_directory}/CodeParser'>
152 | 			<fileset dir='${build.dir}/paclet/CodeParser' />
153 | 		</copy>
154 | 
155 | 		<copy todir='${output_directory}'>
156 | 			<fileset dir='${build.dir}/paclet' includes='*.paclet' />
157 | 		</copy>
158 | 
159 | 	</target>
160 | 
161 | </project>
162 | 


--------------------------------------------------------------------------------