├── tests ├── fsyacc │ ├── Test1 │ │ ├── paket.references │ │ ├── test1.input1 │ │ ├── test1.input3 │ │ ├── test1.input4 │ │ ├── test1.input1.bsl │ │ ├── test1.input3.bsl │ │ ├── test1.input4.bsl │ │ ├── test1.input2.bsl │ │ ├── test1.input2.variation1 │ │ ├── test1.input2.variation2 │ │ ├── test1.input1.tokens.bsl │ │ ├── test1.fsproj │ │ ├── test1.fsy │ │ ├── test1lex.fsl │ │ ├── test1.input3.tokens.bsl │ │ └── test1.input4.tokens.bsl │ ├── Test2 │ │ ├── paket.references │ │ ├── test2.input1.bsl │ │ ├── test2.input1 │ │ ├── test2.badInput │ │ ├── test2.badInput.bsl │ │ ├── test2.fsproj │ │ ├── test2.fsy │ │ ├── test2.input1.tokens.bsl │ │ └── test2.badInput.tokens.bsl │ ├── unicode │ │ ├── paket.references │ │ ├── test1-unicode.input1.bsl │ │ ├── test1-unicode.input2.bsl │ │ ├── test1-unicode.input3.utf8 │ │ ├── test1-unicode.WithTitleCaseLetter.utf8 │ │ ├── test1-unicode.input1.tokens.bsl │ │ ├── test1-unicode.fsy │ │ ├── test1-unicode.fsproj │ │ ├── test1-unicode-lex.fsl │ │ ├── test1-unicode.WithTitleCaseLetter.tokens.error.bsl │ │ ├── test1-unicode.input3.tokens.bsl │ │ └── test1-unicode.WithTitleCaseLetter.tokens.bsl │ ├── tree.fs │ ├── .gitignore │ ├── repro_#141 │ │ └── Lexer_fail_option_i.fsl │ ├── arg2.fs │ ├── repro1885 │ │ └── repro1885.fsl │ ├── main.fs │ └── OldFsYaccTests.fsx ├── JsonLexAndYaccExample │ ├── .gitignore │ ├── JsonValue.fs │ ├── Script.fsx │ ├── JsonLexAndYaccExample.fsproj │ ├── Parser.fsy │ ├── Program.fs │ └── Lexer.fsl ├── LexAndYaccMiniProject │ ├── .gitignore │ ├── Parser.fsy │ ├── Lexer.fsl │ ├── Program.fs │ └── LexAndYaccMiniProject.fsproj ├── FsYacc.Core.Tests │ ├── paket.references │ ├── Main.fs │ ├── Sample.fs │ └── FsYacc.Core.Tests.fsproj ├── Directory.Build.props └── FsLex.Core.Tests │ ├── Main.fs │ ├── paket.references │ ├── FsLex.Core.Tests.fsproj │ └── UnicodeTests.fs ├── src ├── FsLex │ ├── paket.references │ ├── fslex.fsx │ ├── App.config │ ├── AssemblyInfo.fs │ ├── fslex.fsproj │ └── fslex.fs ├── FsYacc │ ├── paket.references │ ├── fsyacc.fsx │ ├── AssemblyInfo.fs │ ├── fsyacc.fsproj │ └── fsyacc.fs ├── FsLexYacc.Runtime │ ├── paket.references │ ├── AssemblyInfo.fs │ ├── FsLexYacc.Runtime.fsproj │ ├── Parsing.fsi │ └── Lexing.fsi ├── FsLex.Core │ ├── paket.references │ ├── AssemblyInfo.fs │ ├── FsLex.Core.fsproj │ ├── fslexpars.fsi │ ├── fslexpars.fsy │ ├── fslexlex.fsl │ └── fslexdriver.fs ├── FsYacc.Core │ ├── paket.references │ ├── AssemblyInfo.fs │ ├── FsYacc.Core.fsproj │ ├── fsyaccpars.fsi │ ├── fsyaccpars.fsy │ └── fsyacclex.fsl ├── FsLexYacc.Build.Tasks │ ├── AssemblyInfo.fs │ └── FsLexYacc.targets ├── Directory.Build.props └── Common │ ├── Arg.fsi │ └── Arg.fs ├── nuget ├── publish.cmd └── FsLexYacc.template ├── docs ├── img │ ├── logo.pdn │ ├── logo.png │ └── favicon.ico ├── index.fsx └── content │ ├── fsyacc.md │ └── fslex.md ├── global.json ├── .github ├── dependabot.yml └── workflows │ ├── pull-requests.yml │ └── push-main.yml ├── paket.dependencies ├── .fantomasignore ├── .editorconfig ├── .config └── dotnet-tools.json ├── ISSUE_TEMPLATE.md ├── README.md ├── LICENSE.txt ├── RELEASE_NOTES.md ├── paket.lock ├── .gitattributes ├── .gitignore └── FsLexYacc.sln /tests/fsyacc/Test1/paket.references: -------------------------------------------------------------------------------- 1 | FSharp.Core -------------------------------------------------------------------------------- /tests/fsyacc/Test2/paket.references: -------------------------------------------------------------------------------- 1 | FSharp.Core -------------------------------------------------------------------------------- /tests/fsyacc/unicode/paket.references: -------------------------------------------------------------------------------- 1 | FSharp.Core -------------------------------------------------------------------------------- /tests/fsyacc/Test1/test1.input1: -------------------------------------------------------------------------------- 1 | 2 | id + id 3 | -------------------------------------------------------------------------------- /tests/fsyacc/Test1/test1.input3: -------------------------------------------------------------------------------- 1 | let id x + x in id + id end -------------------------------------------------------------------------------- /tests/fsyacc/Test1/test1.input4: -------------------------------------------------------------------------------- 1 | LeT id x + x iN id + id eNd -------------------------------------------------------------------------------- /tests/fsyacc/Test1/test1.input1.bsl: -------------------------------------------------------------------------------- 1 | parsed test1.input1 ok 2 | -------------------------------------------------------------------------------- /tests/fsyacc/Test1/test1.input3.bsl: -------------------------------------------------------------------------------- 1 | parsed test1.input3 ok 2 | -------------------------------------------------------------------------------- /tests/fsyacc/Test2/test2.input1.bsl: -------------------------------------------------------------------------------- 1 | parsed ./test2.input1 ok 2 | -------------------------------------------------------------------------------- /src/FsLex/paket.references: -------------------------------------------------------------------------------- 1 | FSharp.Core 2 | Microsoft.SourceLink.GitHub -------------------------------------------------------------------------------- /src/FsYacc/paket.references: -------------------------------------------------------------------------------- 1 | FSharp.Core 2 | Microsoft.SourceLink.GitHub -------------------------------------------------------------------------------- /nuget/publish.cmd: -------------------------------------------------------------------------------- 1 | @for %%f in (..\bin\*.nupkg) do @..\.nuget\NuGet.exe push %%f -------------------------------------------------------------------------------- /tests/JsonLexAndYaccExample/.gitignore: -------------------------------------------------------------------------------- 1 | Lexer.fs 2 | Parser.fs 3 | Parser.fsi -------------------------------------------------------------------------------- /tests/fsyacc/unicode/test1-unicode.input1.bsl: -------------------------------------------------------------------------------- 1 | parsed ./test1.input1 ok 2 | -------------------------------------------------------------------------------- /src/FsLexYacc.Runtime/paket.references: -------------------------------------------------------------------------------- 1 | FSharp.Core 2 | Microsoft.SourceLink.GitHub -------------------------------------------------------------------------------- /src/FsLex.Core/paket.references: -------------------------------------------------------------------------------- 1 | FSharp.Core 2 | FsLexYacc 3 | Microsoft.SourceLink.GitHub -------------------------------------------------------------------------------- /tests/LexAndYaccMiniProject/.gitignore: -------------------------------------------------------------------------------- 1 | Lexer.fs 2 | Parser.fs 3 | Parser.fsi 4 | test.txt -------------------------------------------------------------------------------- /docs/img/logo.pdn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/FsLexYacc/HEAD/docs/img/logo.pdn -------------------------------------------------------------------------------- /docs/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/FsLexYacc/HEAD/docs/img/logo.png -------------------------------------------------------------------------------- /src/FsYacc.Core/paket.references: -------------------------------------------------------------------------------- 1 | FSharp.Core 2 | FsLexYacc 3 | Microsoft.SourceLink.GitHub -------------------------------------------------------------------------------- /tests/fsyacc/Test1/test1.input4.bsl: -------------------------------------------------------------------------------- 1 | parsed test1.input3 ok 2 | parsed test1.input4 ok 3 | -------------------------------------------------------------------------------- /docs/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/FsLexYacc/HEAD/docs/img/favicon.ico -------------------------------------------------------------------------------- /tests/fsyacc/Test2/test2.input1: -------------------------------------------------------------------------------- 1 | 2 | x (id + id) 3 | y (id + id + id) 4 | z (id + id * id) 5 | -------------------------------------------------------------------------------- /global.json: -------------------------------------------------------------------------------- 1 | { 2 | "sdk": { 3 | "version": "6.0.400", 4 | "rollForward": "minor" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /tests/fsyacc/Test1/test1.input2.bsl: -------------------------------------------------------------------------------- 1 | parsed test1.input2.variation1 ok 2 | parsed test1.input2.variation2 ok 3 | -------------------------------------------------------------------------------- /tests/FsYacc.Core.Tests/paket.references: -------------------------------------------------------------------------------- 1 | FSharp.Core 2 | Expecto 3 | Microsoft.NET.Test.Sdk 4 | YoloDev.Expecto.TestSdk -------------------------------------------------------------------------------- /tests/fsyacc/tree.fs: -------------------------------------------------------------------------------- 1 | module Tree 2 | type tree = Node of string * tree list 3 | type decl = Decl of string * tree 4 | -------------------------------------------------------------------------------- /tests/fsyacc/unicode/test1-unicode.input2.bsl: -------------------------------------------------------------------------------- 1 | parsed ./test1.input2.variation1 ok 2 | parsed ./test1.input2.variation2 ok 3 | -------------------------------------------------------------------------------- /tests/Directory.Build.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | false 4 | 5 | -------------------------------------------------------------------------------- /tests/FsLex.Core.Tests/Main.fs: -------------------------------------------------------------------------------- 1 | [] 2 | let main argv = Expecto.Tests.runTestsInAssembly Expecto.Tests.defaultConfig argv 3 | -------------------------------------------------------------------------------- /tests/FsLex.Core.Tests/paket.references: -------------------------------------------------------------------------------- 1 | FSharp.Core 2 | Expecto 3 | Expecto.FsCheck 4 | Microsoft.NET.Test.Sdk 5 | YoloDev.Expecto.TestSdk -------------------------------------------------------------------------------- /tests/FsYacc.Core.Tests/Main.fs: -------------------------------------------------------------------------------- 1 | [] 2 | let main argv = Expecto.Tests.runTestsInAssembly Expecto.Tests.defaultConfig argv 3 | -------------------------------------------------------------------------------- /tests/fsyacc/Test1/test1.input2.variation1: -------------------------------------------------------------------------------- 1 | 2 | (id + id + id) + (id * id * id) + (id - id - id) + (id + id * id) + (id * id + id) 3 | 4 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "nuget" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" -------------------------------------------------------------------------------- /tests/fsyacc/Test1/test1.input2.variation2: -------------------------------------------------------------------------------- 1 | 2 | (((((id + id) + id) + ((id * id) * id)) + (id - (id - id))) + (id + (id * id))) + ((id * id) + id) 3 | -------------------------------------------------------------------------------- /src/FsLex/fslex.fsx: -------------------------------------------------------------------------------- 1 | #load "Lexing.fsi" "Lexing.fs" "Parsing.fsi" "Parsing.fs" "Arg.fsi" "Arg.fs" "fslexast.fs" "fslexpars.fs" "fslexlex.fs" "fslex.fs" 2 | 3 | let v = FsLexYacc.FsLex.Driver.result 4 | -------------------------------------------------------------------------------- /src/FsYacc/fsyacc.fsx: -------------------------------------------------------------------------------- 1 | #load "Lexing.fsi" "Lexing.fs" "Parsing.fsi" "Parsing.fs" "Arg.fsi" "Arg.fs" "fsyaccast.fs" "fsyaccpars.fs" "fsyacclex.fs" "fsyacc.fs" 2 | 3 | let v = FsLexYacc.FsYacc.Driver.result 4 | -------------------------------------------------------------------------------- /tests/FsYacc.Core.Tests/Sample.fs: -------------------------------------------------------------------------------- 1 | module FsYacc.Core.Tests.Sample 2 | 3 | open Expecto 4 | 5 | [] 6 | let tests = 7 | test "sample" { 8 | Expect.equal 2 2 "2=2" 9 | } 10 | -------------------------------------------------------------------------------- /tests/fsyacc/Test2/test2.badInput: -------------------------------------------------------------------------------- 1 | 2 | z1 (let x in id end) 3 | x2 (id + id 4 | y3 (id + id + id) 5 | z4 (id + id * id) 6 | z5 (let x + in id end) 7 | z6 (let x + in id end) 8 | z7 (let x + y in id end) 9 | z8 (let x ))) in id end) 10 | 11 | -------------------------------------------------------------------------------- /tests/fsyacc/unicode/test1-unicode.input3.utf8: -------------------------------------------------------------------------------- 1 | 2 | next line tests one unicode character class 3 | ÄËÖÏÜâæçñõö + id 4 | next line tests specific unicode characters 5 | ≠ ≠≠ ≈≈ ≈≈≈ 6 | id + id 7 | next line tests some more random unicode characters 8 | МНОПРСТУФХЦẀẁẂќ αβΛΘΩΨΧΣδζȚŶǺ 9 | id -------------------------------------------------------------------------------- /tests/fsyacc/unicode/test1-unicode.WithTitleCaseLetter.utf8: -------------------------------------------------------------------------------- 1 | 2 | next line tests one unicode character class 3 | ÄËÖÏÜâæçñõö + id 4 | next line tests specific unicode characters 5 | ≠ ≠≠ ≈≈ ≈≈≈ 6 | id + id 7 | next line tests some more random unicode characters 8 | DzМНОПРСТУФХЦẀẁẂќ αβΛΘΩΨΧΣδζȚŶǺ 9 | id -------------------------------------------------------------------------------- /paket.dependencies: -------------------------------------------------------------------------------- 1 | source https://api.nuget.org/v3/index.json 2 | 3 | storage: none 4 | frameworks: netstandard2.0, net6.0 5 | 6 | nuget FSharp.Core >= 4.6.0 7 | nuget FsLexYacc copy_local: true 8 | nuget Microsoft.SourceLink.GitHub copy_local: true 9 | nuget Expecto ~> 9.0 10 | nuget Expecto.FsCheck 11 | nuget Microsoft.NET.Test.Sdk 12 | nuget YoloDev.Expecto.TestSdk -------------------------------------------------------------------------------- /.fantomasignore: -------------------------------------------------------------------------------- 1 | # Generated by FAKE 2 | AssemblyInfo.fs 3 | .fake/ 4 | 5 | # Generated files 6 | src/FsLex.Core/fslexlex.fs 7 | src/FsLex.Core/fslexpars.fs 8 | src/FsLex.Core/fslexpars.fsi 9 | src/FsYacc.Core/fsyacclex.fs 10 | src/FsYacc.Core/fsyaccpars.fs 11 | src/FsYacc.Core/fsyaccpars.fsi 12 | 13 | # Ignore for now 14 | tests/ 15 | 16 | # We cannot parse this file for all define combinations 17 | src/FsLexYacc.Runtime/Parsing.fs -------------------------------------------------------------------------------- /tests/fsyacc/Test1/test1.input1.tokens.bsl: -------------------------------------------------------------------------------- 1 | tokenize - getting one token 2 | tokenize - got IDENT, now at char 2 3 | ident char = 105 4 | ident char = 100 5 | tokenize - getting one token 6 | tokenize - got PLUS, now at char 5 7 | tokenize - getting one token 8 | tokenize - got IDENT, now at char 7 9 | ident char = 105 10 | ident char = 100 11 | tokenize - getting one token 12 | tokenize - got EOF, now at char 11 13 | -------------------------------------------------------------------------------- /tests/fsyacc/unicode/test1-unicode.input1.tokens.bsl: -------------------------------------------------------------------------------- 1 | tokenize - getting one token 2 | tokenize - got IDENT, now at char 2 3 | ident char = 105 4 | ident char = 100 5 | tokenize - getting one token 6 | tokenize - got PLUS, now at char 5 7 | tokenize - getting one token 8 | tokenize - got IDENT, now at char 7 9 | ident char = 105 10 | ident char = 100 11 | tokenize - getting one token 12 | tokenize - got EOF, now at char 11 13 | -------------------------------------------------------------------------------- /tests/fsyacc/Test2/test2.badInput.bsl: -------------------------------------------------------------------------------- 1 | invisible error recovery successful. 2 | Missing paren: visible recovery successful. 3 | invisible error recovery successful. 4 | invisible error recovery successful. 5 | invisible error recovery successful. 6 | Three parens is a bit rich - why not use Lisp if you like that sort of thing. Raising explicit parse error, which we will recover from. 7 | invisible error recovery successful. 8 | parsed ./test2.badInput ok 9 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | # max_line_length is set to 140. At some point we will reduce it to 120 for as many files as reasonable. 4 | [*.{fs,fsi,fsx}] 5 | max_line_length = 140 6 | fsharp_newline_between_type_definition_and_members = true 7 | fsharp_max_function_binding_width = 40 8 | fsharp_max_if_then_else_short_width = 60 9 | fsharp_max_infix_operator_expression = 80 10 | fsharp_max_array_or_list_width = 80 11 | fsharp_max_dot_get_expression_width = 80 12 | fsharp_multiline_bracket_style = aligned 13 | fsharp_keep_max_number_of_blank_lines = 1 14 | -------------------------------------------------------------------------------- /tests/fsyacc/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | /FSharp.Core.dll 3 | /FsLexYacc.Runtime.dll 4 | /repro1885.fs 5 | /test1.exe 6 | /test1.fs 7 | /test1.fsi 8 | /test1.ml 9 | /test1.mli 10 | /test1compat.exe 11 | /test1compat.ml 12 | /test1compat.mli 13 | /test1lex.fs 14 | /test1lex.ml 15 | /test1-unicode.exe 16 | /test1-unicode.fs 17 | /test1-unicode.fsi 18 | /test1-unicode.ml 19 | /test1-unicode.mli 20 | /test1-unicode-lex.fs 21 | /test1-unicode-lex.ml 22 | /test2.exe 23 | /test2.fs 24 | /test2.fsi 25 | /test2.ml 26 | /test2.mli 27 | /test2compat.exe 28 | /test2compat.ml 29 | /test2compat.mli 30 | -------------------------------------------------------------------------------- /src/FsLex/App.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.config/dotnet-tools.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 1, 3 | "isRoot": true, 4 | "tools": { 5 | "fake-cli": { 6 | "version": "6.1.3", 7 | "commands": [ 8 | "fake" 9 | ] 10 | }, 11 | "paket": { 12 | "version": "8.0.3", 13 | "commands": [ 14 | "paket" 15 | ] 16 | }, 17 | "fantomas": { 18 | "version": "6.3.16", 19 | "commands": [ 20 | "fantomas" 21 | ] 22 | }, 23 | "fsdocs-tool": { 24 | "version": "20.0.1", 25 | "commands": [ 26 | "fsdocs" 27 | ] 28 | } 29 | } 30 | } -------------------------------------------------------------------------------- /tests/FsLex.Core.Tests/FsLex.Core.Tests.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /tests/FsYacc.Core.Tests/FsYacc.Core.Tests.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /.github/workflows/pull-requests.yml: -------------------------------------------------------------------------------- 1 | name: Build and test 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | build: 10 | 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | os: [ubuntu-latest, windows-latest, macOS-latest] 15 | runs-on: ${{ matrix.os }} 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Setup .NET for main project build 20 | uses: actions/setup-dotnet@v3 21 | - name: Install local tools 22 | run: dotnet tool restore 23 | - name: Paket restore 24 | run: dotnet paket restore 25 | - name: Build 26 | run: dotnet fake run build.fsx 27 | -------------------------------------------------------------------------------- /tests/LexAndYaccMiniProject/Parser.fsy: -------------------------------------------------------------------------------- 1 | // The start token becomes a parser function in the compiled code: 2 | %start start 3 | 4 | // Regular tokens 5 | %token HELLO 6 | 7 | // Misc tokens 8 | %token EOF 9 | 10 | // This is the type of the data produced by a successful reduction of the 'start' 11 | // symbol: 12 | %type < int > start 13 | 14 | %% 15 | 16 | // These are the rules of the grammar along with the F# code of the 17 | // actions executed as rules are reduced. 18 | start: File end { $1 } 19 | | end { $1 } 20 | 21 | File: 22 | | HELLO { 1 } 23 | | HELLO HELLO { 2 } 24 | 25 | 26 | // Using F# keywords for nonterminal names is okay. 27 | end: EOF { 3 } 28 | -------------------------------------------------------------------------------- /tests/LexAndYaccMiniProject/Lexer.fsl: -------------------------------------------------------------------------------- 1 | { 2 | 3 | // Opens methods related to fslex.exe 4 | open FSharp.Text.Lexing 5 | 6 | let newline (lexbuf: LexBuffer<_>) = 7 | lexbuf.StartPos <- lexbuf.StartPos.NextLine 8 | 9 | } 10 | 11 | // Regular expressions 12 | let whitespace = [' ' '\t' ] 13 | let newline = ('\n' | '\r' '\n') 14 | 15 | rule tokenstream = parse 16 | // -------------------------- 17 | | "hello" { Parser.HELLO } 18 | // -------------------------- 19 | | whitespace { tokenstream lexbuf } 20 | | newline { newline lexbuf; tokenstream lexbuf } 21 | // -------------------------- 22 | | _ { failwith ("ParseError" + LexBuffer<_>.LexemeString lexbuf) } 23 | | eof { Parser.EOF } 24 | -------------------------------------------------------------------------------- /tests/fsyacc/repro_#141/Lexer_fail_option_i.fsl: -------------------------------------------------------------------------------- 1 | { 2 | 3 | module Lexer 4 | 5 | // Opens methods related to fslex.exe 6 | open FSharp.Text.Lexing 7 | } 8 | 9 | // Regular expressions 10 | let whitespace = [' ' '\t' ] 11 | let newline = ('\n' | '\r' '\n') 12 | 13 | rule tokenstream = parse 14 | // -------------------------- 15 | | whitespace { tokenstream lexbuf } 16 | 17 | // -------------------------- 18 | | newline { newline lexbuf; tokenstream lexbuf } 19 | // -------------------------- 20 | | _ { raise (new EqInterpretationReglesException (sprintf "[Lexer] Erreur %s %d %d" (LexBuffer<_>.LexemeString lexbuf) (lexbuf.StartPos.Line + 1) lexbuf.StartPos.Column)) } 21 | | eof { Parser.EOF } 22 | -------------------------------------------------------------------------------- /tests/fsyacc/unicode/test1-unicode.fsy: -------------------------------------------------------------------------------- 1 | %{ 2 | //module TestParser 3 | 4 | %} 5 | 6 | %type start 7 | %token MINUS STAR LPAREN RPAREN PLUS EOF LET IN END UNICODE1 UNICODE2 8 | %token IDENT 9 | %start start 10 | 11 | %right MINUS 12 | %left PLUS 13 | %left STAR 14 | %% 15 | 16 | start: expr EOF { $1 } 17 | 18 | decl: IDENT expr { Tree.Node("decl",[$2]) } 19 | 20 | expr: expr MINUS expr { Tree.Node("-",[$1;$3]) } 21 | | expr PLUS expr { Tree.Node("+",[$1;$3]) } 22 | | expr STAR expr { Tree.Node("*",[$1;$3]) } 23 | | LPAREN expr RPAREN { $2 } 24 | | IDENT { Tree.Node($1,[]) } 25 | | LET decl IN expr END { $4 } 26 | | UNICODE1 { Tree.Node("UNICODE1",[])} 27 | | UNICODE2 { Tree.Node("UNICODE2",[])} 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /src/FsLex/AssemblyInfo.fs: -------------------------------------------------------------------------------- 1 | // Auto-Generated by FAKE; do not edit 2 | namespace System 3 | open System.Reflection 4 | 5 | [] 6 | [] 7 | [] 8 | [] 9 | [] 10 | do () 11 | 12 | module internal AssemblyVersionInformation = 13 | let [] AssemblyTitle = "FsLex" 14 | let [] AssemblyProduct = "FsLexYacc" 15 | let [] AssemblyDescription = "FsLex/FsYacc lexer/parser generation tools" 16 | let [] AssemblyVersion = "11.3.0" 17 | let [] AssemblyFileVersion = "11.3.0" 18 | -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | ### Description 3 | 4 | Please provide a succinct description of your issue. 5 | 6 | ### Repro steps 7 | 8 | Please provide the steps required to reproduce the problem 9 | 10 | 1. Step A 11 | 12 | 2. Step B 13 | 14 | ### Expected behavior 15 | 16 | Please provide a description of the behaviour you expect. 17 | 18 | ### Actual behavior 19 | 20 | Please provide a description of the actual behaviour you observe. 21 | 22 | ### Known workarounds 23 | 24 | Please provide a description of any known workarounds. 25 | 26 | ### Related information 27 | 28 | * Operating system 29 | * Branch 30 | * Database versions and sample databases being used 31 | * .NET Runtime, CoreCLR or Mono Version 32 | * Performance information, links to performance testing scripts 33 | 34 | -------------------------------------------------------------------------------- /src/FsYacc/AssemblyInfo.fs: -------------------------------------------------------------------------------- 1 | // Auto-Generated by FAKE; do not edit 2 | namespace System 3 | open System.Reflection 4 | 5 | [] 6 | [] 7 | [] 8 | [] 9 | [] 10 | do () 11 | 12 | module internal AssemblyVersionInformation = 13 | let [] AssemblyTitle = "FsYacc" 14 | let [] AssemblyProduct = "FsLexYacc" 15 | let [] AssemblyDescription = "FsLex/FsYacc lexer/parser generation tools" 16 | let [] AssemblyVersion = "11.3.0" 17 | let [] AssemblyFileVersion = "11.3.0" 18 | -------------------------------------------------------------------------------- /src/FsLex.Core/AssemblyInfo.fs: -------------------------------------------------------------------------------- 1 | // Auto-Generated by FAKE; do not edit 2 | namespace System 3 | open System.Reflection 4 | 5 | [] 6 | [] 7 | [] 8 | [] 9 | [] 10 | do () 11 | 12 | module internal AssemblyVersionInformation = 13 | let [] AssemblyTitle = "FsLex.Core" 14 | let [] AssemblyProduct = "FsLexYacc" 15 | let [] AssemblyDescription = "FsLex/FsYacc lexer/parser generation tools" 16 | let [] AssemblyVersion = "11.3.0" 17 | let [] AssemblyFileVersion = "11.3.0" 18 | -------------------------------------------------------------------------------- /src/FsYacc.Core/AssemblyInfo.fs: -------------------------------------------------------------------------------- 1 | // Auto-Generated by FAKE; do not edit 2 | namespace System 3 | open System.Reflection 4 | 5 | [] 6 | [] 7 | [] 8 | [] 9 | [] 10 | do () 11 | 12 | module internal AssemblyVersionInformation = 13 | let [] AssemblyTitle = "FsYacc.Core" 14 | let [] AssemblyProduct = "FsLexYacc" 15 | let [] AssemblyDescription = "FsLex/FsYacc lexer/parser generation tools" 16 | let [] AssemblyVersion = "11.3.0" 17 | let [] AssemblyFileVersion = "11.3.0" 18 | -------------------------------------------------------------------------------- /tests/fsyacc/Test1/test1.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | FS0760 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /tests/fsyacc/Test2/test2.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | FS0760 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /tests/fsyacc/Test1/test1.fsy: -------------------------------------------------------------------------------- 1 | %{ 2 | //module TestParser 3 | //Bug1885: is about skipping // comments in the header and code sections, rather than lexing as tokens 4 | //Bug1885: REPRO: Convert a string such as "\"c:\\windows\\\"" into "c:\windows\" 5 | %} 6 | 7 | %type start 8 | %token MINUS STAR LPAREN RPAREN PLUS EOF LET IN END 9 | %token IDENT 10 | %start start 11 | 12 | %right MINUS 13 | %left PLUS 14 | %left STAR 15 | %% 16 | 17 | start: expr EOF { $1 } 18 | 19 | decl: IDENT expr { Tree.Node("decl",[$2]) } 20 | 21 | expr: expr MINUS expr { Tree.Node("-",[$1;$3]) } 22 | | expr PLUS expr { Tree.Node("+",[$1;$3]) } 23 | | expr STAR expr { Tree.Node("*",[$1;$3]) } 24 | | LPAREN expr RPAREN { $2 } 25 | | IDENT { Tree.Node($1,[]) } 26 | | LET decl IN expr END { $4 } 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/FsLexYacc.Build.Tasks/AssemblyInfo.fs: -------------------------------------------------------------------------------- 1 | // Auto-Generated by FAKE; do not edit 2 | namespace System 3 | open System.Reflection 4 | 5 | [] 6 | [] 7 | [] 8 | [] 9 | [] 10 | do () 11 | 12 | module internal AssemblyVersionInformation = 13 | let [] AssemblyTitle = "FsLexYacc.Build.Tasks" 14 | let [] AssemblyProduct = "FsLexYacc" 15 | let [] AssemblyDescription = "FsLex/FsYacc lexer/parser generation tools" 16 | let [] AssemblyVersion = "7.0.0" 17 | let [] AssemblyFileVersion = "7.0.0" 18 | -------------------------------------------------------------------------------- /src/FsLexYacc.Runtime/AssemblyInfo.fs: -------------------------------------------------------------------------------- 1 | // Auto-Generated by FAKE; do not edit 2 | namespace System 3 | open System.Reflection 4 | 5 | [] 6 | [] 7 | [] 8 | [] 9 | [] 10 | do () 11 | 12 | module internal AssemblyVersionInformation = 13 | let [] AssemblyTitle = "FsLexYacc.Runtime" 14 | let [] AssemblyProduct = "FsLexYacc.Runtime" 15 | let [] AssemblyDescription = "FsLex/FsYacc lexer/parser generation tools" 16 | let [] AssemblyVersion = "11.3.0" 17 | let [] AssemblyFileVersion = "11.3.0" 18 | -------------------------------------------------------------------------------- /tests/fsyacc/Test1/test1lex.fsl: -------------------------------------------------------------------------------- 1 | 2 | { 3 | module TestLexer 4 | open TestParser 5 | } 6 | 7 | let letter = ['A'-'Z'] | ['a'-'z'] 8 | let digit = ['0'-'9'] 9 | let ident_start_char = 10 | letter | ['_'] 11 | let ident_char = ( ident_start_char| digit | ['\''] ) 12 | let ident = ident_start_char ident_char* 13 | let whitespace = [' ' '\t' '\n' '\r'] 14 | 15 | 16 | rule token = parse 17 | | "(" { LPAREN } 18 | | ")" { RPAREN } 19 | | "*" { STAR } 20 | | "+" { PLUS } 21 | | "-" { MINUS } 22 | | "let" { LET } 23 | | "in" { IN } 24 | | "end" { END } 25 | | ident { let s = lexbuf.Lexeme |> System.Text.Encoding.ASCII.GetString 26 | match s with 27 | | "let" -> LET 28 | | "in" -> IN 29 | | "end" -> END 30 | | _ -> IDENT(s) } 31 | | whitespace { token lexbuf } 32 | | eof { EOF } 33 | 34 | -------------------------------------------------------------------------------- /tests/fsyacc/unicode/test1-unicode.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | FS0760 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /src/FsYacc/fsyacc.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | Major 7 | false 8 | 9 | 10 | 11 | AssemblyInfo.fs 12 | 13 | 14 | arg.fsi 15 | 16 | 17 | arg.fs 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/FsLex/fslex.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | Major 7 | false 8 | true 9 | 10 | 11 | 12 | 13 | arg.fsi 14 | 15 | 16 | arg.fs 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/FsLex.Core/FsLex.Core.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netstandard2.0 5 | 6 | 7 | 8 | 9 | --module FsLexYacc.FsLex.Parser --lexlib FSharp.Text.Lexing --parslib FSharp.Text.Parsing 10 | 11 | 12 | --unicode --lexlib FSharp.Text.Lexing 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/FsLexYacc.Runtime/FsLexYacc.Runtime.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netstandard2.0 5 | Library 6 | Runtime for FsLex/FsYacc lexer/parser generation tools 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /src/FsYacc.Core/FsYacc.Core.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netstandard2.0 5 | 6 | 7 | 8 | 9 | --unicode --lexlib FSharp.Text.Lexing 10 | 11 | 12 | --module FsLexYacc.FsYacc.Parser --lexlib FSharp.Text.Lexing --parslib FSharp.Text.Parsing 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | FsLexYacc 2 | ======================= 3 | 4 | FsLex and FsYacc tools 5 | 6 | See https://fsprojects.github.io/FsLexYacc. 7 | 8 | * FsLexYacc.Runtime - [![NuGet Badge](https://buildstats.info/nuget/FsLexYacc.Runtime)](https://www.nuget.org/packages/FsLexYacc.Runtime) 9 | * FsLexYacc - [![NuGet Badge](https://buildstats.info/nuget/FsLexYacc)](https://www.nuget.org/packages/FsLexYacc) 10 | 11 | Build the project 12 | ----------------- 13 | 14 | [![Build Status](https://github.com/fsprojects/FsLexYacc/workflows/Build%20and%20test/badge.svg?branch=master)](https://github.com/fsprojects/FsLexYacc/actions?query=branch%3Amaster) 15 | 16 | * Unix: Run *build.sh* 17 | * Windows: Run *build.cmd* 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | ### Maintainer(s) 30 | 31 | - [@kkm000](https://github.com/kkm000) 32 | - [@dsyme](https://github.com/dsyme) 33 | 34 | The default maintainer account for projects under "fsprojects" is [@fsprojectsgit](https://github.com/fsprojectsgit) - F# Community Project Incubation Space (repo management) 35 | 36 | -------------------------------------------------------------------------------- /tests/fsyacc/Test1/test1.input3.tokens.bsl: -------------------------------------------------------------------------------- 1 | tokenize - getting one token 2 | tokenize - got LET, now at char 0 3 | tokenize - getting one token 4 | tokenize - got IDENT, now at char 4 5 | ident char = 105 6 | ident char = 100 7 | tokenize - getting one token 8 | tokenize - got IDENT, now at char 7 9 | ident char = 120 10 | tokenize - getting one token 11 | tokenize - got PLUS, now at char 9 12 | tokenize - getting one token 13 | tokenize - got IDENT, now at char 11 14 | ident char = 120 15 | tokenize - getting one token 16 | tokenize - got IN, now at char 13 17 | tokenize - getting one token 18 | tokenize - got IDENT, now at char 16 19 | ident char = 105 20 | ident char = 100 21 | tokenize - getting one token 22 | tokenize - got PLUS, now at char 19 23 | tokenize - getting one token 24 | tokenize - got IDENT, now at char 21 25 | ident char = 105 26 | ident char = 100 27 | tokenize - getting one token 28 | tokenize - got END, now at char 24 29 | tokenize - getting one token 30 | tokenize - got EOF, now at char 27 31 | -------------------------------------------------------------------------------- /tests/fsyacc/Test1/test1.input4.tokens.bsl: -------------------------------------------------------------------------------- 1 | tokenize - getting one token 2 | tokenize - got LET, now at char 0 3 | tokenize - getting one token 4 | tokenize - got IDENT, now at char 4 5 | ident char = 105 6 | ident char = 100 7 | tokenize - getting one token 8 | tokenize - got IDENT, now at char 7 9 | ident char = 120 10 | tokenize - getting one token 11 | tokenize - got PLUS, now at char 9 12 | tokenize - getting one token 13 | tokenize - got IDENT, now at char 11 14 | ident char = 120 15 | tokenize - getting one token 16 | tokenize - got IN, now at char 13 17 | tokenize - getting one token 18 | tokenize - got IDENT, now at char 16 19 | ident char = 105 20 | ident char = 100 21 | tokenize - getting one token 22 | tokenize - got PLUS, now at char 19 23 | tokenize - getting one token 24 | tokenize - got IDENT, now at char 21 25 | ident char = 105 26 | ident char = 100 27 | tokenize - getting one token 28 | tokenize - got END, now at char 24 29 | tokenize - getting one token 30 | tokenize - got EOF, now at char 27 31 | -------------------------------------------------------------------------------- /tests/fsyacc/arg2.fs: -------------------------------------------------------------------------------- 1 | // (c) Microsoft Corporation 2005-2009. 2 | 3 | [] 4 | module Microsoft.FSharp.Compatibility.OCaml.Arg 5 | open FSharp.Text 6 | 7 | let Clear x = ArgType.Clear x 8 | let Float x = ArgType.Float x 9 | let Int x = ArgType.Int x 10 | let Rest x = ArgType.Rest x 11 | let Set x = ArgType.Set x 12 | let String x = ArgType.String x 13 | let Unit x = ArgType.Unit x 14 | 15 | type spec = ArgType 16 | type argspec = (string * spec * string) 17 | #if FX_NO_COMMAND_LINE_ARGS 18 | #else 19 | 20 | exception Bad of string 21 | exception Help of string 22 | let parse_argv cursor argv specs other usageText = 23 | ArgParser.ParsePartial(cursor, argv, List.map (fun (a,b,c) -> ArgInfo(a,b,c)) specs, other, usageText) 24 | 25 | let parse specs other usageText = 26 | ArgParser.Parse(List.map (fun (a,b,c) -> ArgInfo(a,b,c)) specs, other, usageText) 27 | 28 | let usage specs usageText = 29 | ArgParser.Usage(List.map (fun (a,b,c) -> ArgInfo(a,b,c)) specs, usageText) 30 | #endif -------------------------------------------------------------------------------- /tests/LexAndYaccMiniProject/Program.fs: -------------------------------------------------------------------------------- 1 | // Learn more about F# at http://fsharp.net 2 | 3 | open System.IO 4 | open FSharp.Text.Lexing 5 | 6 | let testLexerAndParserFromString text expectedCount = 7 | let lexbuf = LexBuffer.FromString text 8 | 9 | let countFromParser = Parser.start Lexer.tokenstream lexbuf 10 | 11 | printfn "countFromParser: result = %d, expected %d" countFromParser expectedCount 12 | 13 | let testLexerAndParserFromFile (fileName:string) expectedCount = 14 | use textReader = new System.IO.StreamReader(fileName) 15 | let lexbuf = LexBuffer.FromTextReader textReader 16 | 17 | let countFromParser = Parser.start Lexer.tokenstream lexbuf 18 | 19 | printfn "countFromParser: result = %d, expected %d" countFromParser expectedCount 20 | 21 | testLexerAndParserFromString "hello" 1 22 | testLexerAndParserFromString "hello hello" 2 23 | 24 | let testFile = Path.Combine(__SOURCE_DIRECTORY__, "test.txt") 25 | File.WriteAllText(testFile, "hello hello") 26 | testLexerAndParserFromFile testFile 2 27 | 28 | printfn "Press any key to continue..." 29 | System.Console.ReadLine() |> ignore 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /tests/JsonLexAndYaccExample/JsonValue.fs: -------------------------------------------------------------------------------- 1 | module JsonParsing 2 | 3 | 4 | type JsonValue = 5 | | Assoc of (string * JsonValue) list 6 | | Bool of bool 7 | | Float of float 8 | | Int of int 9 | | List of JsonValue list 10 | | Null 11 | | String of string 12 | 13 | 14 | //below function is not important, it simply prints values 15 | static member print x = 16 | match x with 17 | | Bool b -> sprintf "Bool(%b)" b 18 | | Float f -> sprintf "Float(%f)" f 19 | | Int d -> sprintf "Int(%d)" d 20 | | String s -> sprintf "String(%s)" s 21 | | Null -> "Null()" 22 | | Assoc props -> props 23 | |> List.map (fun (name,value) -> sprintf "\"%s\" : %s" name (JsonValue.print(value))) 24 | |> String.concat "," 25 | |> sprintf "Assoc(%s)" 26 | | List values -> values 27 | |> List.map (fun value -> JsonValue.print(value)) 28 | |> String.concat "," 29 | |> sprintf "List(%s)" 30 | -------------------------------------------------------------------------------- /tests/LexAndYaccMiniProject/LexAndYaccMiniProject.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | ..\..\src\FsLex\bin\$(Configuration)\net6.0 7 | ..\..\src\FsYacc\bin\$(Configuration)\net6.0 8 | 9 | 10 | 11 | --module Parser 12 | 13 | 14 | --module Lexer --unicode 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) Microsoft Corporation. 4 | All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- /tests/JsonLexAndYaccExample/Script.fsx: -------------------------------------------------------------------------------- 1 | #r "bin\\debug\\FsLexYacc.Runtime.dll" 2 | #r "bin\\debug\\JsonLexAndYaccExample.exe" 3 | 4 | open FSharp.Text.Lexing 5 | open JsonParsing 6 | open System.IO 7 | 8 | let parse json = 9 | let lexbuf = LexBuffer.FromString json 10 | let res = Parser.start Lexer.read lexbuf 11 | res 12 | 13 | //a few parsing tests with simple and complex json 14 | let simpleJson = "{\"f\" : 1, \"x\" : 1}" 15 | let (Some parseResult) = simpleJson |> parse 16 | printfn "%s" (JsonValue.print parseResult) 17 | 18 | 19 | let simpleJson2 = @"{ 20 | ""title"": ""Cities"", 21 | ""cities"": [ 22 | { ""name"": ""Chicago"", ""zips"": [60601,60600] }, 23 | { ""name"": ""New York"", ""zips"": [10001] } 24 | ] 25 | }" 26 | let (Some parseResult2) = simpleJson2 |> parse 27 | printfn "%s" (JsonValue.print parseResult2) 28 | 29 | 30 | let complexJson = File.ReadAllText (Path.Combine(__SOURCE_DIRECTORY__,"randomComplexTestsJson.json")) 31 | complexJson |> parse |> ignore 32 | 33 | 34 | //test lexing error 35 | try 36 | let simpleJson = "{\"f\" ;" 37 | let (Some parseResult) = simpleJson |> parse 38 | printfn "%s" (JsonValue.print parseResult) 39 | with 40 | | e -> printfn "Error is expected here: \n %s" (e.Message) -------------------------------------------------------------------------------- /tests/JsonLexAndYaccExample/JsonLexAndYaccExample.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | ..\..\src\FsLex\bin\$(Configuration)\net6.0 7 | ..\..\src\FsYacc\bin\$(Configuration)\net6.0 8 | 9 | 10 | 11 | 12 | --module Parser 13 | 14 | 15 | --unicode 16 | 17 | 18 | 19 | 20 | 21 | 22 | Always 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /tests/JsonLexAndYaccExample/Parser.fsy: -------------------------------------------------------------------------------- 1 | //This parser has been writen with help of "Real world OCaml" book By Yaron Minsky, Anil Madhavapeddy, Jason Hickey (chapter 15) 2 | %{ 3 | open JsonParsing 4 | %} 5 | 6 | %start start 7 | 8 | %token INT 9 | %token FLOAT 10 | %token ID 11 | %token STRING 12 | %token TRUE 13 | %token FALSE 14 | %token NULL 15 | %token LEFT_BRACE 16 | %token RIGHT_BRACE 17 | %token LEFT_BRACK 18 | %token RIGHT_BRACK 19 | %token COLON 20 | %token COMMA 21 | %token EOF 22 | %type start 23 | 24 | %% 25 | 26 | start: prog { $1 } 27 | 28 | prog: 29 | | EOF { None } 30 | | value { Some $1 } 31 | 32 | value: 33 | | LEFT_BRACE object_fields RIGHT_BRACE { Assoc $2 } 34 | | LEFT_BRACK array_values RIGHT_BRACK { List $2 } 35 | | STRING { String $1 } 36 | | INT { Int $1 } 37 | | FLOAT { Float $1 } 38 | | TRUE { Bool true } 39 | | FALSE { Bool false } 40 | | NULL { Null } 41 | 42 | object_fields: rev_object_fields { List.rev $1 }; 43 | 44 | rev_object_fields: 45 | | { [] } 46 | | STRING COLON value { [($1,$3)] } 47 | | rev_object_fields COMMA STRING COLON value { ($3, $5) :: $1 } 48 | 49 | array_values: 50 | | { [] } 51 | | rev_values { List.rev $1 } 52 | 53 | rev_values: 54 | | value { [$1] } 55 | | rev_values COMMA value { $3 :: $1 } -------------------------------------------------------------------------------- /.github/workflows/push-main.yml: -------------------------------------------------------------------------------- 1 | name: Build and Test and Publish (master) 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | permissions: 9 | contents: read 10 | pages: write 11 | id-token: write 12 | 13 | jobs: 14 | build: 15 | 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | os: [ubuntu-latest] 20 | runs-on: ${{ matrix.os }} 21 | 22 | steps: 23 | - uses: actions/checkout@v3 24 | - name: Setup .NET for main project build 25 | uses: actions/setup-dotnet@v3 26 | - name: Install local tools 27 | run: dotnet tool restore 28 | - name: Paket restore 29 | run: dotnet paket restore 30 | - name: Build 31 | run: dotnet fake run build.fsx -t Release 32 | - name: Publish NuGets (if main version changed) 33 | run: dotnet nuget push "bin/*.nupkg" -s https://api.nuget.org/v3/index.json -k ${{ secrets.NUGET_KEY }} --skip-duplicate 34 | - name: Build documentation 35 | run: dotnet fake run build.fsx -t GenerateDocs 36 | - name: Upload documentation 37 | uses: actions/upload-pages-artifact@v1 38 | with: 39 | path: ./output 40 | 41 | docs: 42 | runs-on: ubuntu-latest 43 | needs: build 44 | steps: 45 | - name: Deploy to GitHub Pages 46 | id: deployment 47 | uses: actions/deploy-pages@v1 48 | -------------------------------------------------------------------------------- /tests/JsonLexAndYaccExample/Program.fs: -------------------------------------------------------------------------------- 1 | 2 | module Program 3 | open FSharp.Text.Lexing 4 | open JsonParsing 5 | 6 | [] 7 | let main argv = 8 | let parse json = 9 | let lexbuf = LexBuffer.FromString json 10 | let res = Parser.start Lexer.read lexbuf 11 | res 12 | 13 | //a few parsing tests with simple and complex json 14 | let simpleJson = "{\"f\" : 1}" 15 | let parseResult = simpleJson |> parse 16 | printfn "%s" (JsonValue.print parseResult.Value) 17 | 18 | 19 | let simpleJson2 = @"{ 20 | ""title"": ""Cities"", 21 | ""cities"": [ 22 | { ""name"": ""Chicago"", ""zips"": [60601,60600] }, 23 | { ""name"": ""New York"", ""zips"": [10001] } 24 | ] 25 | }" 26 | let parseResult2 = simpleJson2 |> parse 27 | printfn "%s" (JsonValue.print parseResult2.Value) 28 | 29 | let complexJson = System.IO.File.ReadAllText "randomComplexTestsJson.json" 30 | complexJson |> parse |> ignore 31 | 32 | 33 | //test lexing error 34 | try 35 | let simpleJson = "{\"f\"\n" + "\n" + ";" 36 | let parseResult = simpleJson |> parse 37 | printfn "%s" (JsonValue.print parseResult.Value) 38 | with 39 | | e -> printfn "Error is expected here: \n %s" (e.Message) 40 | 41 | 0 -------------------------------------------------------------------------------- /src/Directory.Build.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | true 4 | true 5 | Microsoft Corporation, Don Syme, F# Software Foundation contributors 6 | F#, fsharp, yacc, fsyacc, lex, parsing, lexing, fslex 7 | MIT 8 | logo.png 9 | https://raw.githubusercontent.com/fsprojects/FsLexYacc/master/docs/img/logo.png 10 | https://fsprojects.github.io/FsLexYacc/ 11 | embedded 12 | true 13 | true 14 | $(AllowedOutputExtensionsInPackageBuildOutputFolder);.xml 15 | https://github.com/fsprojects/fslexYacc/blob/master/LICENSE.txt 16 | https://github.com/fsprojects/FsLexYacc/blob/master/RELEASE_NOTES.md 17 | https://github.com/fsprojects/FsLexYacc/ 18 | img/logo.png 19 | img/favicon.ico 20 | 21 | -------------------------------------------------------------------------------- /tests/fsyacc/unicode/test1-unicode-lex.fsl: -------------------------------------------------------------------------------- 1 | 2 | { 3 | module TestLexer 4 | open TestParser 5 | } 6 | 7 | (* These specifications follow the C# specification *) 8 | let digit = '\Nd' 9 | let letter = '\Lu' | '\Ll' | '\Lm' | '\Lo' | '\Nl' 10 | 11 | let ident_start_char = letter | ['_'] 12 | 13 | let connecting_char = '\Pc' 14 | let combining_char = '\Mn' | '\Mc' 15 | let formatting_char = '\Cf' 16 | let ident_char = letter | digit | connecting_char | combining_char | formatting_char 17 | 18 | let ident = ident_start_char ident_char* 19 | 20 | let whitespace = 21 | '\Zs' 22 | | '\u0009' (* horizontal tab *) 23 | | '\u000B' (* vertical tab *) 24 | | '\u000C' (* form feed *) 25 | | '\u000D' (* carriage return *) 26 | | '\u000A' (* line feed *) 27 | | '\u0085' (* next line *) 28 | | '\u2028' (* line separator *) 29 | | '\u2029' (* paragraph separator *) 30 | 31 | 32 | rule token = parse 33 | | "(" { LPAREN } 34 | (* the "approx equals" symbol, just to test a random specific Unicode character *) 35 | | '≈'+ { IDENT(new System.String(lexbuf.Lexeme) ) } 36 | (* | '\U00002248'+ { IDENT(new System.String(lexbuf.Lexeme) ) } *) 37 | 38 | (* the "not equals" symbol, just to test a random specific Unicode character *) 39 | | '≠'+ { IDENT(new System.String(lexbuf.Lexeme) ) } 40 | (* | '\U00002260'+ { IDENT(new System.String(lexbuf.Lexeme) ) } *) 41 | | ")" { RPAREN } 42 | | "*" { STAR } 43 | | "+" { PLUS } 44 | | "-" { MINUS } 45 | | ident { let s = new System.String(lexbuf.Lexeme) 46 | match s with 47 | | "let" -> LET 48 | | "in" -> IN 49 | | "end" -> END 50 | | _ -> IDENT(s) } 51 | | whitespace { token lexbuf } 52 | | eof { EOF } 53 | 54 | -------------------------------------------------------------------------------- /tests/JsonLexAndYaccExample/Lexer.fsl: -------------------------------------------------------------------------------- 1 | //This lexer has been writen with help of "Real world OCaml" book By Yaron Minsky, Anil Madhavapeddy, Jason Hickey (chapter 15) 2 | { 3 | 4 | module Lexer 5 | 6 | open FSharp.Text.Lexing 7 | open System 8 | open Parser 9 | 10 | exception SyntaxError of string 11 | 12 | let lexeme = LexBuffer<_>.LexemeString 13 | 14 | let newline (lexbuf: LexBuffer<_>) = 15 | lexbuf.EndPos <- lexbuf.EndPos.NextLine 16 | } 17 | 18 | let int = ['-' '+']? ['0'-'9']+ 19 | let digit = ['0'-'9'] 20 | let frac = '.' digit* 21 | let exp = ['e' 'E'] ['-' '+']? digit+ 22 | let float = '-'? digit* frac? exp? 23 | 24 | let white = [' ' '\t']+ 25 | let newline = '\r' | '\n' | "\r\n" 26 | 27 | rule read = 28 | parse 29 | | white { read lexbuf } 30 | | newline { newline lexbuf; read lexbuf } 31 | | int { INT (int (lexeme lexbuf)) } 32 | | float { FLOAT (float (lexeme lexbuf)) } 33 | | "true" { TRUE } 34 | | "false" { FALSE } 35 | | "null" { NULL } 36 | | '"' { read_string "" false lexbuf } 37 | | '{' { LEFT_BRACE } 38 | | '}' { RIGHT_BRACE } 39 | | '[' { LEFT_BRACK } 40 | | ']' { RIGHT_BRACK } 41 | | ':' { COLON } 42 | | ',' { COMMA } 43 | | eof { EOF } 44 | | _ { raise (Exception (sprintf "SyntaxError: Unexpected char: '%s' Line: %d Column: %d" (lexeme lexbuf) (lexbuf.StartPos.Line+1) (lexbuf.StartPos.Column+1))) } 45 | 46 | 47 | and read_string str ignorequote = 48 | parse 49 | | '"' { if ignorequote then (read_string (str+"\\\"") false lexbuf) else STRING (str) } 50 | | '\\' { read_string str true lexbuf } 51 | | [^ '"' '\\']+ { read_string (str+(lexeme lexbuf)) false lexbuf } 52 | | eof { raise (Exception ("String is not terminated")) } -------------------------------------------------------------------------------- /docs/index.fsx: -------------------------------------------------------------------------------- 1 | (*** hide ***) 2 | // This block of code is omitted in the generated HTML documentation. Use 3 | // it to define helpers that you do not want to show in the documentation. 4 | // #I "../../bin" 5 | 6 | (** 7 | FsLex, FsYacc 8 | ============= 9 | 10 | Example: 11 | 12 | * [Project File](https://github.com/fsprojects/FsLexYacc/blob/master/tests/LexAndYaccMiniProject/LexAndYaccMiniProject.fsproj) 13 | * [Lexer](https://github.com/fsprojects/FsLexYacc/blob/master/tests/LexAndYaccMiniProject/Lexer.fsl) 14 | * [Parser](https://github.com/fsprojects/FsLexYacc/blob/master/tests/LexAndYaccMiniProject/Parser.fsy) 15 | * [Program](https://github.com/fsprojects/FsLexYacc/blob/master/tests/LexAndYaccMiniProject/Program.fs) 16 | 17 | Video Tutorial: 18 | 19 | * [A gentle introduction to FsLexYacc](https://youtu.be/w7H_RQ6Fvvo?si=H0d2wBg9JcNCmJpn) 20 | 21 | *) 22 | 23 | (** 24 | 25 | Contributing and copyright 26 | -------------------------- 27 | 28 | The project is hosted on [GitHub][gh] where you can [report issues][issues], fork 29 | the project and submit pull requests. If you're adding new public API, please also 30 | consider adding [samples][content] that can be turned into a documentation. You might 31 | also want to read [library design notes][readme] to understand how it works. 32 | 33 | The library is available under the MIT license, see the 34 | [License file][license] in the GitHub repository. 35 | 36 | [content]: https://github.com/fsprojects/FsLexYacc/tree/master/docs/content 37 | [gh]: https://github.com/fsprojects/FsLexYacc 38 | [issues]: https://github.com/fsprojects/FsLexYacc/issues 39 | [readme]: https://github.com/fsprojects/FsLexYacc/blob/master/README.md 40 | [license]: https://github.com/fsprojects/FsLexYacc/blob/master/LICENSE.txt 41 | *) 42 | -------------------------------------------------------------------------------- /tests/FsLex.Core.Tests/UnicodeTests.fs: -------------------------------------------------------------------------------- 1 | module FsLex.Core.Tests.UnicodeTests 2 | open System 3 | open System.Globalization 4 | open FsLexYacc.FsLex 5 | open Expecto 6 | 7 | [] 8 | let tests = 9 | testList "Unicode" [ 10 | testList "Unicode Categories" [ 11 | test "Every unicode category should have a mapping" { 12 | let allUnicodeCategories = Enum.GetValues(typeof) |> Seq.cast 13 | let mappedUnicodeCategories = AST.unicodeCategories.Values 14 | 15 | Expect.containsAll mappedUnicodeCategories allUnicodeCategories "Not all unicode categories are mapped" 16 | } 17 | 18 | test "IsUnicodeCategory should recognize every encoded unicode category" { 19 | let unicodeCategoriesAsStrings = AST.unicodeCategories.Keys 20 | let encodedUnicodeCategories = 21 | unicodeCategoriesAsStrings 22 | |> Seq.map (fun uc -> AST.EncodeUnicodeCategory uc {unicode=true; caseInsensitive=false}) 23 | 24 | Expect.all encodedUnicodeCategories AST.IsUnicodeCategory "Not all encoded unicode categories are recognized" 25 | } 26 | 27 | testProperty "TryDecodeUnicodeCategory should decode all valid EncodeUnicodeCategoryIndex outputs" <| fun (a:UnicodeCategory) -> 28 | a |> int |> AST.EncodeUnicodeCategoryIndex |> AST.TryDecodeUnicodeCategory = Some a 29 | 30 | 31 | testProperty "TryDecodeUnicodeCategory should return None for all EncodeChar outputs" <| fun (c:FsCheck.UnicodeChar) -> 32 | let encodedChar = AST.EncodeChar (c.Get) {unicode=true; caseInsensitive=false} 33 | encodedChar |> AST.TryDecodeUnicodeCategory = None 34 | ] 35 | ] 36 | -------------------------------------------------------------------------------- /tests/fsyacc/Test2/test2.fsy: -------------------------------------------------------------------------------- 1 | %{ 2 | //module TestParser 3 | 4 | /// Stephan Tolksdorf reported a bug where quotation characters in headers and semantic 5 | /// actions caused the parser generator to fail with an "unterminated string" comment. 6 | let testQuotationCharInHeader1 = '"' 7 | let testQuotationCharInHeader2 = '\"' 8 | 9 | open Microsoft.FSharp.Compatibility.OCaml 10 | 11 | %} 12 | 13 | %type start 14 | %token MINUS STAR LPAREN RPAREN PLUS EOF LET IN END 15 | %token IDENT 16 | %start start 17 | 18 | %right MINUS 19 | %left PLUS 20 | %left STAR 21 | %% 22 | 23 | start: decls EOF { System.Console.WriteLine("#decls = {0}.", List.length $1); Tree.Node("decls",$1) } 24 | 25 | decls: decls decl { $2 :: $1 } | decl { [$1] } 26 | 27 | 28 | decl: IDENT expr { 29 | /// Stephan Tolksdorf reported a bug where quotation characters in headers and semantic 30 | /// actions caused the parser generator to fail with an "unterminated string" comment. 31 | let testQuotationCharInHeader1 = '"' 32 | let testQuotationCharInHeader2 = '\"' 33 | Tree.Node("decl",[$2]) } 34 | 35 | expr: expr MINUS expr { Tree.Node("-",[$1;$3]) } 36 | | expr PLUS expr { Tree.Node("+",[$1;$3]) } 37 | | expr STAR expr { Tree.Node("*",[$1;$3]) } 38 | | LPAREN expr RPAREN { $2 } 39 | | LET decl IN expr END { $4 } 40 | | LET error IN expr END { System.Console.Error.WriteLine("invisible error recovery successful."); $4 } 41 | | LPAREN expr error { System.Console.Error.WriteLine("Missing paren: visible recovery successful."); $2 } 42 | | RPAREN RPAREN RPAREN { System.Console.Error.WriteLine("Three parens is a bit rich - why not use Lisp if you like that sort of thing. Raising explicit parse error, which we will recover from."); 43 | raise FSharp.Text.Parsing.RecoverableParseError } 44 | | IDENT { Tree.Node($1,[]) } 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /src/FsYacc.Core/fsyaccpars.fsi: -------------------------------------------------------------------------------- 1 | // Signature file for parser generated by fsyacc 2 | module FsLexYacc.FsYacc.Parser 3 | type token = 4 | | TOKEN of (string option) 5 | | TYPE of (string) 6 | | BAR 7 | | PERCENT_PERCENT 8 | | START 9 | | LEFT 10 | | RIGHT 11 | | NONASSOC 12 | | LESS 13 | | GREATER 14 | | COLON 15 | | PREC 16 | | SEMI 17 | | EOF 18 | | ERROR 19 | | HEADER of (AST.Code) 20 | | CODE of (AST.Code) 21 | | IDENT of (string) 22 | type tokenId = 23 | | TOKEN_TOKEN 24 | | TOKEN_TYPE 25 | | TOKEN_BAR 26 | | TOKEN_PERCENT_PERCENT 27 | | TOKEN_START 28 | | TOKEN_LEFT 29 | | TOKEN_RIGHT 30 | | TOKEN_NONASSOC 31 | | TOKEN_LESS 32 | | TOKEN_GREATER 33 | | TOKEN_COLON 34 | | TOKEN_PREC 35 | | TOKEN_SEMI 36 | | TOKEN_EOF 37 | | TOKEN_ERROR 38 | | TOKEN_HEADER 39 | | TOKEN_CODE 40 | | TOKEN_IDENT 41 | | TOKEN_end_of_input 42 | | TOKEN_error 43 | type nonTerminalId = 44 | | NONTERM__startspec 45 | | NONTERM_spec 46 | | NONTERM_headeropt 47 | | NONTERM_decls 48 | | NONTERM_decl 49 | | NONTERM_idents 50 | | NONTERM_rules 51 | | NONTERM_rule 52 | | NONTERM_optbar 53 | | NONTERM_optsemi 54 | | NONTERM_clauses 55 | | NONTERM_clause 56 | | NONTERM_syms 57 | | NONTERM_optprec 58 | /// This function maps tokens to integer indexes 59 | val tagOfToken: token -> int 60 | 61 | /// This function maps integer indexes to symbolic token ids 62 | val tokenTagToTokenId: int -> tokenId 63 | 64 | /// This function maps production indexes returned in syntax errors to strings representing the non terminal that would be produced by that production 65 | val prodIdxToNonTerminal: int -> nonTerminalId 66 | 67 | /// This function gets the name of a token as a string 68 | val token_to_string: token -> string 69 | val spec : (FSharp.Text.Lexing.LexBuffer<'cty> -> token) -> FSharp.Text.Lexing.LexBuffer<'cty> -> (AST.ParserSpec) 70 | -------------------------------------------------------------------------------- /tests/fsyacc/repro1885/repro1885.fsl: -------------------------------------------------------------------------------- 1 | { 2 | (* 3 | Repro of FSharp Bugs 1885, "FSLex doesn't ignore strings in comments" 4 | *) 5 | 6 | open System 7 | 8 | (* 9 | Testcase " <-------------------------- 10 | *) 11 | 12 | (* 13 | Testcase "asdfasdfasdf" <-------------------------- 14 | *) 15 | 16 | // Opens methods related to fslex.exe 17 | open Lexing 18 | 19 | // All of our token types are now generated by fsYacc 20 | open Parser 21 | 22 | 23 | let inc_lnum bol pos = 24 | let lnum = pos.pos_lnum in 25 | {pos with pos_lnum = lnum+1; pos_bol = bol } 26 | 27 | let newline lexbuf = 28 | lexbuf_set_curr_p lexbuf 29 | ( inc_lnum (lexeme_end lexbuf) (lexeme_end_p lexbuf)) 30 | 31 | // Convert a string such as "\"c:\\windows\\\"" into "c:\windows\" <-------------------------- 32 | // "another testcase" <-------------------------- 33 | // "and another <-------------------------- 34 | let normalizeString (str : string) = 35 | let str = str.Replace("\\\"", "\"") 36 | let str = str.Replace("\\\\", "\\") 37 | if str.[0] = '\"' && str.[str.Length - 1] = '\"' then 38 | str.Substring(1, str.Length - 2) 39 | else 40 | str 41 | 42 | } 43 | 44 | // Regular expressions 45 | let whitespace = [' ' '\t' ] 46 | let newline = ('\n' | '\r' '\n') 47 | let str = '\"' [^ '\"']* '\"' 48 | 49 | rule tokenstream = parse 50 | // -------------------------- 51 | | "{" { LCURLY } 52 | | "}" { RCURLY } 53 | | "=" { EQUALS } 54 | // -------------------------- 55 | | str { STR(lexeme lexbuf) } 56 | // -------------------------- 57 | | whitespace { tokenstream lexbuf } 58 | | newline { newline lexbuf; tokenstream lexbuf } 59 | // -------------------------- 60 | | _ { STR("ParseError" + (lexeme lexbuf)) } 61 | | eof { EOF } -------------------------------------------------------------------------------- /src/Common/Arg.fsi: -------------------------------------------------------------------------------- 1 | // (c) Microsoft Corporation 2005-2009. 2 | 3 | // A simple command-line argument processor. 4 | namespace FSharp.Text 5 | 6 | /// The spec value describes the action of the argument, 7 | /// and whether it expects a following parameter. 8 | [] 9 | type ArgType = 10 | static member Clear: bool ref -> ArgType 11 | static member Float: (float -> unit) -> ArgType 12 | static member Int: (int -> unit) -> ArgType 13 | static member Rest: (string -> unit) -> ArgType 14 | static member Set: bool ref -> ArgType 15 | static member String: (string -> unit) -> ArgType 16 | static member Unit: (unit -> unit) -> ArgType 17 | 18 | type ArgInfo = 19 | new: name: string * action: ArgType * help: string -> ArgInfo 20 | /// Return the name of the argument 21 | member Name: string 22 | /// Return the argument type and action of the argument 23 | member ArgType: ArgType 24 | /// Return the usage help associated with the argument 25 | member HelpText: string 26 | 27 | [] 28 | type ArgParser = 29 | #if FX_NO_COMMAND_LINE_ARGS 30 | #else 31 | 32 | /// Parse some of the arguments given by 'argv', starting at the given position 33 | [] 34 | static member ParsePartial: 35 | cursor: int ref * argv: string[] * arguments: seq * ?otherArgs: (string -> unit) * ?usageText: string -> unit 36 | 37 | /// Parse the arguments given by System.Environment.GetCommandLineArgs() 38 | /// according to the argument processing specifications "specs". 39 | /// Args begin with "-". Non-arguments are passed to "f" in 40 | /// order. "use" is printed as part of the usage line if an error occurs. 41 | 42 | static member Parse: arguments: seq * ?otherArgs: (string -> unit) * ?usageText: string -> unit 43 | #endif 44 | 45 | /// Prints the help for each argument. 46 | static member Usage: arguments: seq * ?usage: string -> unit 47 | -------------------------------------------------------------------------------- /nuget/FsLexYacc.template: -------------------------------------------------------------------------------- 1 | type file 2 | id FsLexYacc 3 | description 4 | Tools for FsLex/FsYacc lexer/parser generation tools 5 | authors 6 | Microsoft Corporation, Don Syme, F# Software Foundation contributors 7 | summary 8 | Tools for FsLex/FsYacc lexer/parser generation tools 9 | licenseurl https://github.com/fsprojects/FsLexYacc/blob/master/LICENSE.txt 10 | projecturl https://github.com/fsprojects/FsLexYacc 11 | iconurl https://raw.githubusercontent.com/fsprojects/FsLexYacc/master/docs/img/logo.png 12 | tags 13 | F#, fsharp, yacc, fsyacc, lex, parsing, lexing, fslex 14 | files 15 | ../src/FsLex/bin/Release/net6.0/publish ==> build/fslex/net6.0 16 | ../src/FsYacc/bin/Release/net6.0/publish ==> build/fsyacc/net6.0 17 | ../src/FsLexYacc.Build.Tasks/FsLexYacc.targets ==> build 18 | ../src/FsLexYacc.Runtime/Lexing.fsi ==> src/fslex 19 | ../src/FsLexYacc.Runtime/Lexing.fs ==> src/fslex 20 | ../src/FsLexYacc.Runtime/Parsing.fsi ==> src/fslex 21 | ../src/FsLexYacc.Runtime/Parsing.fs ==> src/fslex 22 | ../src/Common/Arg.fsi ==> src/fslex 23 | ../src/Common/Arg.fs ==> src/fslex 24 | ../src/FsLex.Core/fslexast.fs ==> src/fslex 25 | ../src/FsLex.Core/fslexpars.fs ==> src/fslex 26 | ../src/FsLex.Core/fslexlex.fs ==> src/fslex 27 | ../src/FsLex/fslex.fs ==> src/fslex 28 | ../src/FsLex/fslex.fsx ==> src/fslex 29 | ../src/FsLexYacc.Runtime/Lexing.fsi ==> src/fsyacc 30 | ../src/FsLexYacc.Runtime/Lexing.fs ==> src/fsyacc 31 | ../src/FsLexYacc.Runtime/Parsing.fsi ==> src/fsyacc 32 | ../src/FsLexYacc.Runtime/Parsing.fs ==> src/fsyacc 33 | ../src/Common/Arg.fsi ==> src/fsyacc 34 | ../src/Common/Arg.fs ==> src/fsyacc 35 | ../src/FsYacc.Core/fsyaccast.fs ==> src/fsyacc 36 | ../src/FsYacc.Core/fsyaccpars.fs ==> src/fsyacc 37 | ../src/FsYacc.Core/fsyacclex.fs ==> src/fsyacc 38 | ../src/FsYacc/fsyacc.fs ==> src/fsyacc 39 | ../src/FsYacc/fsyacc.fsx ==> src/fsyacc 40 | ../src/FsLexYacc.Build.Tasks/FsLexYacc.targets ==> src 41 | dependencies 42 | framework: netstandard2.0 43 | FsLexYacc.Runtime >= CURRENTVERSION 44 | FSharp.Core >= LOCKEDVERSION 45 | -------------------------------------------------------------------------------- /src/FsLex.Core/fslexpars.fsi: -------------------------------------------------------------------------------- 1 | // Signature file for parser generated by fsyacc 2 | module FsLexYacc.FsLex.Parser 3 | type token = 4 | | EOF 5 | | BAR 6 | | DOT 7 | | PLUS 8 | | STAR 9 | | QMARK 10 | | EQUALS 11 | | UNDERSCORE 12 | | LBRACK 13 | | RBRACK 14 | | HAT 15 | | DASH 16 | | RULE 17 | | PARSE 18 | | LET 19 | | AND 20 | | LPAREN 21 | | RPAREN 22 | | COLON 23 | | UNICODE_CATEGORY of (string) 24 | | CHAR of (char) 25 | | CODE of (AST.Code) 26 | | STRING of (string) 27 | | IDENT of (string) 28 | type tokenId = 29 | | TOKEN_EOF 30 | | TOKEN_BAR 31 | | TOKEN_DOT 32 | | TOKEN_PLUS 33 | | TOKEN_STAR 34 | | TOKEN_QMARK 35 | | TOKEN_EQUALS 36 | | TOKEN_UNDERSCORE 37 | | TOKEN_LBRACK 38 | | TOKEN_RBRACK 39 | | TOKEN_HAT 40 | | TOKEN_DASH 41 | | TOKEN_RULE 42 | | TOKEN_PARSE 43 | | TOKEN_LET 44 | | TOKEN_AND 45 | | TOKEN_LPAREN 46 | | TOKEN_RPAREN 47 | | TOKEN_COLON 48 | | TOKEN_UNICODE_CATEGORY 49 | | TOKEN_CHAR 50 | | TOKEN_CODE 51 | | TOKEN_STRING 52 | | TOKEN_IDENT 53 | | TOKEN_end_of_input 54 | | TOKEN_error 55 | type nonTerminalId = 56 | | NONTERM__startspec 57 | | NONTERM_spec 58 | | NONTERM_codeopt 59 | | NONTERM_Macros 60 | | NONTERM_macro 61 | | NONTERM_Rules 62 | | NONTERM_rule 63 | | NONTERM_args 64 | | NONTERM_optbar 65 | | NONTERM_clauses 66 | | NONTERM_clause 67 | | NONTERM_regexp 68 | | NONTERM_charset 69 | /// This function maps tokens to integer indexes 70 | val tagOfToken: token -> int 71 | 72 | /// This function maps integer indexes to symbolic token ids 73 | val tokenTagToTokenId: int -> tokenId 74 | 75 | /// This function maps production indexes returned in syntax errors to strings representing the non terminal that would be produced by that production 76 | val prodIdxToNonTerminal: int -> nonTerminalId 77 | 78 | /// This function gets the name of a token as a string 79 | val token_to_string: token -> string 80 | val spec : (FSharp.Text.Lexing.LexBuffer<'cty> -> token) -> FSharp.Text.Lexing.LexBuffer<'cty> -> (AST.Spec) 81 | -------------------------------------------------------------------------------- /src/FsYacc.Core/fsyaccpars.fsy: -------------------------------------------------------------------------------- 1 | %{ 2 | (* (c) Microsoft Corporation 2005-2008. *) 3 | 4 | // FsLexYacc.FsYacc.Parser 5 | 6 | open FsLexYacc.FsYacc 7 | open FsLexYacc.FsYacc.AST 8 | 9 | #nowarn "62" // This construct is for ML compatibility 10 | #nowarn "64" // Turn off warnings that type variables used in production annotations are instantiated to concrete type" 11 | 12 | %} 13 | 14 | %type spec 15 | %token IDENT 16 | %token HEADER CODE 17 | %token BAR PERCENT_PERCENT START LEFT RIGHT NONASSOC LESS GREATER COLON PREC SEMI EOF ERROR 18 | %token TYPE 19 | %token TOKEN 20 | %start spec 21 | %left BAR 22 | %% 23 | 24 | spec: 25 | headeropt decls PERCENT_PERCENT rules 26 | { List.foldBack (fun f x -> f x) $2 { Header=$1;Tokens=[];Types=[];Associativities=[];StartSymbols=[];Rules=$4 } } 27 | 28 | headeropt: 29 | | HEADER 30 | { $1 } 31 | | 32 | { "", (parseState.ResultRange |> fst)} 33 | 34 | decls: 35 | { [] } 36 | | decl decls { $1 :: $2 } 37 | 38 | decl: 39 | TOKEN idents { (fun x -> {x with Tokens = x.Tokens @ (List.map (fun x -> (x,$1)) $2)}) } 40 | | TYPE idents { (fun x -> {x with Types = x.Types @ (List.map (fun x -> (x,$1)) $2)} ) } 41 | | START idents { (fun x -> {x with StartSymbols = x.StartSymbols @ $2} ) } 42 | | LEFT idents { (fun x -> {x with Associativities = x.Associativities @ [(List.map (fun x -> (x,LeftAssoc)) $2)]} ) } 43 | | RIGHT idents { (fun x -> {x with Associativities = x.Associativities @ [(List.map (fun x -> (x,RightAssoc)) $2)]} ) } 44 | | NONASSOC idents { (fun x -> {x with Associativities = x.Associativities @ [(List.map (fun x -> (x,NonAssoc)) $2)]} ) } 45 | 46 | idents: IDENT idents { $1 :: $2 } | { [] } 47 | rules: rule rules { $1 :: $2 } | rule { [$1] } 48 | rule: IDENT COLON optbar clauses optsemi { ($1,$4) } 49 | optbar: { } | BAR { } 50 | optsemi: { } | SEMI { } 51 | clauses: clause BAR clauses {$1 :: $3 } | clause { [$1] } 52 | clause: syms optprec CODE { Rule($1,$2,Some $3) } 53 | syms: IDENT syms { $1 :: $2 } | ERROR syms { "error" :: $2 } | { [] } 54 | optprec: { None } | PREC IDENT { Some $2 } 55 | 56 | 57 | -------------------------------------------------------------------------------- /tests/fsyacc/Test2/test2.input1.tokens.bsl: -------------------------------------------------------------------------------- 1 | tokenize - getting one token 2 | tokenize - got IDENT, now at char 2 3 | ident char = 120 4 | tokenize - getting one token 5 | tokenize - got LPAREN, now at char 4 6 | tokenize - getting one token 7 | tokenize - got IDENT, now at char 5 8 | ident char = 105 9 | ident char = 100 10 | tokenize - getting one token 11 | tokenize - got PLUS, now at char 8 12 | tokenize - getting one token 13 | tokenize - got IDENT, now at char 10 14 | ident char = 105 15 | ident char = 100 16 | tokenize - getting one token 17 | tokenize - got RPAREN, now at char 12 18 | tokenize - getting one token 19 | tokenize - got IDENT, now at char 15 20 | ident char = 121 21 | tokenize - getting one token 22 | tokenize - got LPAREN, now at char 17 23 | tokenize - getting one token 24 | tokenize - got IDENT, now at char 18 25 | ident char = 105 26 | ident char = 100 27 | tokenize - getting one token 28 | tokenize - got PLUS, now at char 21 29 | tokenize - getting one token 30 | tokenize - got IDENT, now at char 23 31 | ident char = 105 32 | ident char = 100 33 | tokenize - getting one token 34 | tokenize - got PLUS, now at char 26 35 | tokenize - getting one token 36 | tokenize - got IDENT, now at char 28 37 | ident char = 105 38 | ident char = 100 39 | tokenize - getting one token 40 | tokenize - got RPAREN, now at char 30 41 | tokenize - getting one token 42 | tokenize - got IDENT, now at char 33 43 | ident char = 122 44 | tokenize - getting one token 45 | tokenize - got LPAREN, now at char 35 46 | tokenize - getting one token 47 | tokenize - got IDENT, now at char 36 48 | ident char = 105 49 | ident char = 100 50 | tokenize - getting one token 51 | tokenize - got PLUS, now at char 39 52 | tokenize - getting one token 53 | tokenize - got IDENT, now at char 41 54 | ident char = 105 55 | ident char = 100 56 | tokenize - getting one token 57 | tokenize - got STAR, now at char 44 58 | tokenize - getting one token 59 | tokenize - got IDENT, now at char 46 60 | ident char = 105 61 | ident char = 100 62 | tokenize - getting one token 63 | tokenize - got RPAREN, now at char 48 64 | tokenize - getting one token 65 | tokenize - got EOF, now at char 51 66 | -------------------------------------------------------------------------------- /src/FsLex.Core/fslexpars.fsy: -------------------------------------------------------------------------------- 1 | %{ 2 | (* (c) Microsoft Corporation 2005-2008. *) 3 | 4 | open FsLexYacc.FsLex 5 | open FsLexYacc.FsLex.AST 6 | 7 | %} 8 | 9 | %type spec 10 | %token STRING IDENT 11 | %token CODE 12 | %token CHAR 13 | %token UNICODE_CATEGORY 14 | %token RULE PARSE LET AND LPAREN RPAREN COLON 15 | %token EOF BAR DOT PLUS STAR QMARK EQUALS UNDERSCORE LBRACK RBRACK HAT DASH 16 | %start spec 17 | %left BAR 18 | %left regexp_alt 19 | %left regexp_seq 20 | %nonassoc regexp_opt 21 | %nonassoc regexp_plus regexp_star 22 | %% 23 | 24 | spec: 25 | | codeopt Macros RULE Rules codeopt { 26 | { TopCode=$1;Macros=$2;Rules=$4;BottomCode=$5 } 27 | } 28 | 29 | codeopt: 30 | | CODE { $1 } 31 | | { "", (parseState.ResultRange |> fst) } 32 | 33 | Macros: 34 | | { [] } 35 | | macro Macros { 36 | $1 :: $2 37 | } 38 | 39 | macro: 40 | | LET IDENT EQUALS regexp { 41 | ($2, $4) 42 | } 43 | 44 | Rules: 45 | | rule AND Rules { 46 | $1 :: $3 47 | } 48 | | rule { [$1] } 49 | 50 | rule: 51 | | IDENT args EQUALS PARSE optbar clauses { 52 | ($1,$2,$6) 53 | } 54 | 55 | args: 56 | | { [] } 57 | | LPAREN IDENT COLON IDENT RPAREN args { RuleArgument.Typed($2, $4) :: $6 } 58 | | IDENT args { RuleArgument.Ident($1) :: $2 } 59 | 60 | optbar: 61 | | { } 62 | | BAR { } 63 | 64 | clauses: 65 | | clause BAR clauses {$1 :: $3 } 66 | | clause { [$1] } 67 | 68 | clause: 69 | | regexp CODE { $1, $2 } 70 | 71 | regexp: 72 | | CHAR { Inp(Alphabet(EncodeChar $1))} 73 | | UNICODE_CATEGORY { Inp(UnicodeCategory $1)} 74 | | EOF { Inp(Alphabet(fun ctx -> Eof))} 75 | | UNDERSCORE { Inp Any } 76 | | STRING { Seq([ for n in 0 .. $1.Length - 1 -> Inp(Alphabet(EncodeChar $1.[n]))])} 77 | | IDENT { Macro($1) } 78 | | regexp regexp %prec regexp_seq { Seq[$1;$2] } 79 | | regexp PLUS %prec regexp_plus { Seq[$1;Star $1] } 80 | | regexp STAR %prec regexp_star { Star $1 } 81 | | regexp QMARK %prec regexp_opt { Alt(fun ctx -> [Seq[];$1])} 82 | | regexp BAR regexp %prec regexp_alt { Alt(fun ctx -> [$1;$3])} 83 | | LPAREN regexp RPAREN { $2 } 84 | | LBRACK charset RBRACK { Alt (fun ctx -> [ for c in ($2 ctx) -> Inp(Alphabet(fun ctx -> c)) ])} 85 | | LBRACK HAT charset RBRACK { Inp(NotCharSet(fun ctx -> $3 ctx))} 86 | 87 | charset: 88 | | CHAR { fun ctx -> Set.singleton(EncodeChar $1 ctx)} 89 | | CHAR DASH CHAR { fun ctx -> Set.ofSeq [ for c in $1 .. $3 -> EncodeChar c ctx ]} 90 | | charset charset { fun ctx -> Set.union ($1 ctx) ($2 ctx)} 91 | 92 | 93 | -------------------------------------------------------------------------------- /RELEASE_NOTES.md: -------------------------------------------------------------------------------- 1 | #### 11.3.0 - Unreleased 2 | * Add Fable support to FsLexYacc.Runtime. 3 | 4 | #### 11.2.0 - 12 May, 2023 5 | * Add `--open` option for fslex. 6 | * Generate signature files for transformed files in fslex. 7 | 8 | #### 11.1.0 - 3 May, 2023 9 | * Add `--buffer-type-argument` option for fsyacc. 10 | 11 | #### 11.0.1 - 10 January, 2022 12 | * Resolve FSharp.Core dependency restriction #168 13 | 14 | #### 11.0.0 - 10 January, 2022 15 | * Migration to net6.0 #166 16 | * Fix Activating case insensitive option crash the lexer generator #141 17 | * Reuse produced reductions table #141 18 | 19 | #### 11.0.0-beta1 - 11 July, 2021 20 | * Break out core domain logic and generation into core libraries #144 21 | * Update FsLexYacc.targets #149 22 | * Avoid copying a string twice in LexBuffer.FromString. #150 23 | * Fix misc packaging issues #145 24 | 25 | #### 10.2.0 - 22 November, 2020 26 | * Enable running tools under .net 5.0 27 | 28 | #### 10.1.0 - 04 October, 2020 29 | * Add caseInsensitive option 30 | * Migration to netcoreapp3.1 31 | 32 | #### 10.0.0 - 24 October, 2019 33 | * Migration to netcoreapp3.0 based versions of FxLex and FsYacc 34 | 35 | #### 9.1.0 - 22 October, 2019 36 | * Make async lexing obsolete 37 | * Restart doc generation (manually) 38 | 39 | #### 9.0.3 - 12 April, 2019 40 | * Don't require FSharp.Core for tools package 41 | * Bootstrap using new package 42 | 43 | #### 9.0.2 - 12 April, 2019 44 | * Bootstrap using new package 45 | 46 | #### 9.0.1 - 12 April, 2019 47 | * Tools now run on .NET Core 48 | 49 | #### 8.0.1 - 21 March, 2019 50 | * Fix recursion problem 51 | * Support netstandard2.0 52 | * Build with dotnet toolchain 53 | * Cleanup runtime code 54 | 55 | #### 7.0.6 - 23 June, 2017 56 | * Add source to build 57 | 58 | #### 7.0.5 - February 1, 2017 59 | * Fix an error preventing the use of verbose mode 60 | 61 | #### 7.0.4 - January 22, 2017 62 | * Fix targets file for OSX 63 | 64 | #### 7.0.3 - November 29, 2016 65 | * Fix targets file when space in path 66 | 67 | #### 7.0.2 - November 5, 2016 68 | * Improve output 69 | 70 | #### 7.0.1 - November 5, 2016 71 | * Fix targets file 72 | * Remove and and just have the user pass them in via 73 | 74 | #### 7.0.0 - November 5, 2016 75 | * Use only profile 259, move to Paket, remove LKG 76 | * Remove the use of a task DLL 77 | 78 | #### 6.1.0 - March 20, 2015 79 | * Adding the package to solution automatically configures targets 80 | * Build system upgraded to MSBuild 4.0 81 | * Fixed Mono/Linux compilation 82 | * New example with a walkthrough 83 | 84 | #### 6.0.4 - September 15, 2014 85 | * Add profiles 7, 259 to runtime 86 | 87 | #### 6.0.3 - June 18 2014 88 | * FsLex/FsYacc output redirected to VS Output window 89 | * FsYacc verbose output added to MSBuild log (and VS Output window) 90 | 91 | #### 6.0.2 - June 16 2014 92 | * Logo was added 93 | * FsLexYacc.Runtime published as a separate NuGet package 94 | 95 | #### 6.0.0 - April 18 2014 96 | * First release of the new packaging of fslex/fsyacc 97 | -------------------------------------------------------------------------------- /paket.lock: -------------------------------------------------------------------------------- 1 | STORAGE: NONE 2 | RESTRICTION: || (== net6.0) (== netstandard2.0) 3 | NUGET 4 | remote: https://api.nuget.org/v3/index.json 5 | Expecto (9.0.4) 6 | FSharp.Core (>= 4.6) 7 | Mono.Cecil (>= 0.11.3) 8 | Expecto.FsCheck (9.0.4) 9 | Expecto (>= 9.0.4) 10 | FsCheck (>= 2.14.3) 11 | FsCheck (2.16.5) 12 | FSharp.Core (>= 4.2.3) 13 | FSharp.Core (4.6.2) 14 | FsLexYacc (10.2) - copy_local: true 15 | FSharp.Core (>= 4.5.2) 16 | FsLexYacc.Runtime (>= 10.2 < 10.3) 17 | FsLexYacc.Runtime (10.2) - copy_local: true 18 | FSharp.Core (>= 4.5.2) 19 | Microsoft.Build.Tasks.Git (1.1.1) - copy_local: true 20 | Microsoft.CodeCoverage (17.4.1) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= net462)) (&& (== netstandard2.0) (>= netcoreapp3.1)) 21 | Microsoft.NET.Test.Sdk (17.4.1) 22 | Microsoft.CodeCoverage (>= 17.4.1) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= net462)) (&& (== netstandard2.0) (>= netcoreapp3.1)) 23 | Microsoft.TestPlatform.TestHost (>= 17.4.1) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1)) 24 | Microsoft.SourceLink.Common (1.1.1) - copy_local: true 25 | Microsoft.SourceLink.GitHub (1.1.1) - copy_local: true 26 | Microsoft.Build.Tasks.Git (>= 1.1.1) 27 | Microsoft.SourceLink.Common (>= 1.1.1) 28 | Microsoft.TestPlatform.ObjectModel (17.4.1) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1)) 29 | NuGet.Frameworks (>= 5.11) 30 | System.Reflection.Metadata (>= 1.6) 31 | Microsoft.TestPlatform.TestHost (17.4.1) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1)) 32 | Microsoft.TestPlatform.ObjectModel (>= 17.4.1) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1)) 33 | Newtonsoft.Json (>= 13.0.1) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1)) 34 | Mono.Cecil (0.11.4) 35 | Newtonsoft.Json (13.0.2) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1)) 36 | NuGet.Frameworks (6.4) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1)) 37 | System.Collections.Immutable (7.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1)) 38 | System.Runtime.CompilerServices.Unsafe (>= 6.0) 39 | System.Reflection.Metadata (7.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1)) 40 | System.Collections.Immutable (>= 7.0) 41 | System.Runtime.CompilerServices.Unsafe (6.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= net6.0)) (&& (== netstandard2.0) (>= netcoreapp3.1)) 42 | YoloDev.Expecto.TestSdk (0.13.3) 43 | Expecto (>= 9.0 < 10.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1)) 44 | FSharp.Core (>= 4.6.2) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1)) 45 | System.Collections.Immutable (>= 6.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1)) 46 | -------------------------------------------------------------------------------- /src/FsLexYacc.Build.Tasks/FsLexYacc.targets: -------------------------------------------------------------------------------- 1 | 14 | 15 | 16 | 17 | CallFsLex;CallFsYacc;$(CompileDependsOn) 18 | $(MSBuildThisFileDirectory)/fslex/net6.0 19 | fslex.dll 20 | $(MSBuildThisFileDirectory)/fsyacc/net6.0 21 | fsyacc.dll 22 | dotnet 23 | 24 | 25 | 26 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | false 63 | 64 | 65 | false 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /tests/fsyacc/main.fs: -------------------------------------------------------------------------------- 1 | open Tree 2 | open System.IO 3 | open Microsoft.FSharp.Quotations 4 | open Microsoft.FSharp.Compatibility.OCaml 5 | open FSharp.Text.Lexing 6 | let tokenize = ref false 7 | 8 | let usage = [ "--tokens", Arg.Set tokenize, "tokenize the first file and exit" ] 9 | 10 | let mutable inputs = [] 11 | 12 | Arg.parse usage (fun x -> inputs <- inputs @ [x]) "test... \nTests that all inputs give equivalent syntac trees" 13 | 14 | let createLexBuffer (a:Expr<'a->_>) (x:FileStream) : 'a = 15 | (if typeof<'a> = typeof> then 16 | x |> StreamReader |> LexBuffer<_>.FromTextReader :> obj 17 | elif typeof<'a> = typeof> then 18 | x |> BinaryReader |> LexBuffer<_>.FromBinaryReader :> obj 19 | else 20 | failwith "Pies") 21 | :?> _ 22 | 23 | if inputs = [] then 24 | Printf.eprintf "at least one input should be given\n"; 25 | try 26 | let results = 27 | inputs 28 | |> List.map 29 | (fun filename -> 30 | use is = File.OpenRead filename 31 | let lexbuf = createLexBuffer <@ TestLexer.token @> is 32 | if !tokenize then 33 | while true do 34 | Printf.eprintf "tokenize - getting one token\n" 35 | let t = TestLexer.token lexbuf 36 | Printf.eprintf "tokenize - got %s, now at char %d\n" (TestParser.token_to_string t) (lexbuf.StartPos).pos_cnum 37 | match t with 38 | | TestParser.EOF -> exit 0 39 | | TestParser.IDENT s -> 40 | for c in s do 41 | Printf.eprintf " ident char = %d\n" (int c) 42 | | _ -> () 43 | let tree = 44 | try 45 | TestParser.start TestLexer.token lexbuf 46 | with e -> 47 | Printf.eprintf "%s(%d,%d): error: %s\n" filename lexbuf.StartPos.pos_lnum (lexbuf.StartPos.pos_cnum - lexbuf.StartPos.pos_bol) (match e with Failure s -> s | _ -> e.ToString()) 48 | exit 1 49 | Printf.eprintf "parsed %s ok\n" filename 50 | (filename,tree) 51 | ) 52 | results 53 | |> List.iter 54 | (fun (filename1,tree1) -> 55 | results 56 | |> List.iter 57 | (fun (filename2,tree2) -> 58 | if filename1 > filename2 then 59 | if tree1 <> tree2 then 60 | Printf.eprintf "file %s and file %s parsed to different results!\n" filename1 filename2 61 | let rec ptree os (Node(n,l)) = 62 | Printf.fprintf os "(%s %a)" n ptrees l 63 | and ptrees os l = 64 | match l with 65 | | [] -> () 66 | | [h] -> ptree os h 67 | | h::t -> Printf.fprintf os "%a %a" ptree h ptrees t 68 | Printf.eprintf "file %s = %a\n" filename1 ptree tree1 69 | Printf.eprintf "file %s = %a\n" filename2 ptree tree2 70 | exit 1 71 | ) 72 | ) 73 | with e -> 74 | Printf.eprintf "Error: %s\n" (match e with Failure s -> s | e -> e.ToString()); 75 | exit 1 76 | 77 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | 65 | *.sh text eol=lf 66 | 67 | ############################################################################### 68 | # diff behavior for input/output/baseline files for Old FsLexYacc tests 69 | ############################################################################### 70 | *.input1 eol=crlf 71 | *.badInput eol=crlf 72 | *.variation1 eol=crlf 73 | *.variation2 eol=crlf 74 | *.bsl eol=crlf 75 | *.utf8 eol=crlf 76 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # User-specific files 2 | *.suo 3 | *.user 4 | *.sln.docstates 5 | 6 | # Xamarin Studio / monodevelop user-specific 7 | *.userprefs 8 | 9 | # Build results 10 | 11 | [Dd]ebug/ 12 | [Rr]elease/ 13 | x64/ 14 | build/ 15 | [Bb]in/ 16 | [Oo]bj/ 17 | 18 | # Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets 19 | !packages/*/build/ 20 | 21 | # MSTest test Results 22 | [Tt]est[Rr]esult*/ 23 | [Bb]uild[Ll]og.* 24 | 25 | *_i.c 26 | *_p.c 27 | *.ilk 28 | *.meta 29 | *.obj 30 | *.pch 31 | *.pdb 32 | *.pgc 33 | *.pgd 34 | *.rsp 35 | *.sbr 36 | *.tlb 37 | *.tli 38 | *.tlh 39 | *.tmp 40 | *.tmp_proj 41 | *.log 42 | *.vspscc 43 | *.vssscc 44 | .builds 45 | *.pidb 46 | *.log 47 | *.scc 48 | 49 | 50 | # Visual C++ cache files 51 | ipch/ 52 | *.aps 53 | *.ncb 54 | *.opensdf 55 | *.sdf 56 | *.cachefile 57 | 58 | # Visual Studio profiler 59 | *.psess 60 | *.vsp 61 | *.vspx 62 | 63 | # Guidance Automation Toolkit 64 | *.gpState 65 | 66 | # ReSharper is a .NET coding add-in 67 | _ReSharper*/ 68 | *.[Rr]e[Ss]harper 69 | 70 | # TeamCity is a build add-in 71 | _TeamCity* 72 | 73 | # DotCover is a Code Coverage Tool 74 | *.dotCover 75 | 76 | # NCrunch 77 | *.ncrunch* 78 | .*crunch*.local.xml 79 | 80 | # Installshield output folder 81 | [Ee]xpress/ 82 | 83 | # DocProject is a documentation generator add-in 84 | DocProject/buildhelp/ 85 | DocProject/Help/*.HxT 86 | DocProject/Help/*.HxC 87 | DocProject/Help/*.hhc 88 | DocProject/Help/*.hhk 89 | DocProject/Help/*.hhp 90 | DocProject/Help/Html2 91 | DocProject/Help/html 92 | 93 | # Click-Once directory 94 | publish/ 95 | 96 | # Publish Web Output 97 | *.Publish.xml 98 | 99 | # Enable nuget.exe in the .nuget folder (though normally executables are not tracked) 100 | !.nuget/NuGet.exe 101 | 102 | # Windows Azure Build Output 103 | csx 104 | *.build.csdef 105 | 106 | # Windows Store app package directory 107 | AppPackages/ 108 | 109 | # Others 110 | sql/ 111 | *.Cache 112 | ClientBin/ 113 | [Ss]tyle[Cc]op.* 114 | ~$* 115 | *~ 116 | *.dbmdl 117 | *.[Pp]ublish.xml 118 | *.pfx 119 | *.publishsettings 120 | 121 | # RIA/Silverlight projects 122 | Generated_Code/ 123 | 124 | # Backup & report files from converting an old project file to a newer 125 | # Visual Studio version. Backup files are not needed, because we have git ;-) 126 | _UpgradeReport_Files/ 127 | Backup*/ 128 | UpgradeLog*.XML 129 | UpgradeLog*.htm 130 | 131 | # SQL Server files 132 | App_Data/*.mdf 133 | App_Data/*.ldf 134 | 135 | 136 | #LightSwitch generated files 137 | GeneratedArtifacts/ 138 | _Pvt_Extensions/ 139 | ModelManifest.xml 140 | 141 | # ========================= 142 | # Windows detritus 143 | # ========================= 144 | 145 | # Windows image file caches 146 | Thumbs.db 147 | ehthumbs.db 148 | 149 | # Folder config file 150 | Desktop.ini 151 | 152 | # Recycle Bin used on file shares 153 | $RECYCLE.BIN/ 154 | 155 | # Mac desktop service store files 156 | .DS_Store 157 | 158 | # =================================================== 159 | # Exclude F# project specific directories and files 160 | # =================================================== 161 | 162 | # NuGet Packages Directory 163 | packages/ 164 | 165 | # Generated documentation folder 166 | output/ 167 | .fsdocs/ 168 | tmp/ 169 | 170 | # Temp folder used for publishing docs 171 | temp/ 172 | 173 | # Test results produced by build 174 | TestResults.xml 175 | 176 | # Nuget outputs 177 | nuget/*.nupkg 178 | 179 | # FAKE build 180 | .fake/ 181 | 182 | # IDEs 183 | .vs/ 184 | .idea/ 185 | .ionide/ 186 | 187 | # =================================================== 188 | # Exclude generated lexer/parser source files 189 | # =================================================== 190 | src/FsLex/fslexlex.fs 191 | src/FsLex/fslexpars.fs 192 | src/FsLex/fslexpars.fsi 193 | src/FsYacc/fsyacclex.fs 194 | src/FsYacc/fsyaccpars.fs 195 | src/FsYacc/fsyaccpars.fsi 196 | tests/fsyacc/test1-unicode.exe.config 197 | tests/fsyacc/test1.exe.config 198 | tests/fsyacc/test1.input1.bsl.err 199 | tests/fsyacc/test1compat.exe.config 200 | tests/fsyacc/test2.exe.config 201 | tests/fsyacc/test2compat.exe.config 202 | tests/fsyacc/Test1/test1.fs 203 | tests/fsyacc/Test1/test1.fsi 204 | tests/fsyacc/Test1/test1lex.fs 205 | 206 | .paket 207 | .idea/ 208 | .vs/ 209 | .ionide/ 210 | tests/fsyacc/unicode/test1-unicode-lex.fs 211 | tests/fsyacc/unicode/test1-unicode.fs 212 | tests/fsyacc/unicode/test1-unicode.fsi 213 | tests/fsyacc/unicode/test1-unicode.input3.tokens.bsl.err 214 | tests/fsyacc/Test2/test2lex.fs 215 | tests/fsyacc/Test2/test2.fs 216 | tests/fsyacc/Test2/test2.fsi 217 | tests/fsyacc/repro_#141/Lexer_fail_option_i.fs 218 | -------------------------------------------------------------------------------- /src/FsLex/fslex.fs: -------------------------------------------------------------------------------- 1 | // (c) Microsoft Corporation 2005-2009. 2 | 3 | module FsLexYacc.FsLex.Program 4 | 5 | open FsLexYacc.FsLex.AST 6 | open FsLexYacc.FsLex.Driver 7 | open Printf 8 | open FSharp.Text 9 | open System.IO 10 | 11 | //------------------------------------------------------------------ 12 | // This is the program proper 13 | 14 | let mutable input = None 15 | let mutable out = None 16 | let mutable inputCodePage = None 17 | let mutable light = None 18 | let mutable modname = None 19 | let mutable internal_module = false 20 | let mutable opens = [] 21 | let mutable lexlib = "FSharp.Text.Lexing" 22 | let mutable unicode = false 23 | let mutable caseInsensitive = false 24 | 25 | let usage = 26 | [ 27 | ArgInfo("-o", ArgType.String(fun s -> out <- Some s), "Name the output file.") 28 | ArgInfo("--module", ArgType.String(fun s -> modname <- Some s), "Define the F# module name to host the generated parser.") 29 | ArgInfo("--internal", ArgType.Unit(fun () -> internal_module <- true), "Generate an internal module") 30 | ArgInfo( 31 | "--open", 32 | ArgType.String(fun s -> opens <- opens @ [ s ]), 33 | "Add the given module to the list of those to open in both the generated signature and implementation." 34 | ) 35 | ArgInfo( 36 | "--codepage", 37 | ArgType.Int(fun i -> inputCodePage <- Some i), 38 | "Assume input lexer specification file is encoded with the given codepage." 39 | ) 40 | ArgInfo("--light", ArgType.Unit(fun () -> light <- Some true), "(ignored)") 41 | ArgInfo("--light-off", ArgType.Unit(fun () -> light <- Some false), "Add #light \"off\" to the top of the generated file") 42 | ArgInfo( 43 | "--lexlib", 44 | ArgType.String(fun s -> lexlib <- s), 45 | "Specify the namespace for the implementation of the lexer table interpreter (default FSharp.Text.Lexing)" 46 | ) 47 | ArgInfo("--unicode", ArgType.Unit(fun () -> unicode <- true), "Produce a lexer for use with 16-bit unicode characters.") 48 | ArgInfo("-i", ArgType.Unit(fun () -> caseInsensitive <- true), "Produce a case-insensitive lexer.") 49 | ] 50 | 51 | let _ = 52 | ArgParser.Parse( 53 | usage, 54 | (fun x -> 55 | match input with 56 | | Some _ -> failwith "more than one input given" 57 | | None -> input <- Some x), 58 | "fslex " 59 | ) 60 | 61 | let compileSpec (spec: Spec) (ctx: ParseContext) = 62 | let perRuleData, dfaNodes = Compile ctx spec 63 | let dfaNodes = dfaNodes |> List.sortBy (fun n -> n.Id) 64 | perRuleData, dfaNodes 65 | 66 | let main () = 67 | try 68 | let filename = 69 | (match input with 70 | | Some x -> x 71 | | None -> failwith "no input given") 72 | 73 | let parseContext = 74 | { 75 | unicode = unicode 76 | caseInsensitive = caseInsensitive 77 | } 78 | 79 | let spec = 80 | match readSpecFromFile filename inputCodePage with 81 | | Ok spec -> spec 82 | | Error(e, line, column) -> 83 | eprintf 84 | "%s(%d,%d): error: %s" 85 | filename 86 | line 87 | column 88 | (match e with 89 | | Failure s -> s 90 | | _ -> e.Message) 91 | 92 | exit 1 93 | 94 | printfn "compiling to dfas (can take a while...)" 95 | let perRuleData, dfaNodes = compileSpec spec parseContext 96 | printfn "%d states" dfaNodes.Length 97 | 98 | printfn "writing output" 99 | 100 | let output = 101 | match out with 102 | | Some x -> x 103 | | _ -> Path.ChangeExtension(filename, ".fs") 104 | 105 | let state: GeneratorState = 106 | { 107 | inputFileName = filename 108 | outputFileName = output 109 | inputCodePage = 110 | inputCodePage 111 | |> Option.map System.Text.Encoding.GetEncoding 112 | |> Option.defaultValue System.Text.Encoding.UTF8 113 | generatedModuleName = modname 114 | disableLightMode = light 115 | generateInternalModule = internal_module 116 | opens = opens 117 | lexerLibraryName = lexlib 118 | domain = if unicode then Unicode else ASCII 119 | } 120 | 121 | writeSpecToFile state spec perRuleData dfaNodes 122 | 123 | with e -> 124 | eprintf 125 | "FSLEX: error FSL000: %s" 126 | (match e with 127 | | Failure s -> s 128 | | e -> e.ToString()) 129 | 130 | exit 1 131 | 132 | let result = main () 133 | -------------------------------------------------------------------------------- /src/FsLexYacc.Runtime/Parsing.fsi: -------------------------------------------------------------------------------- 1 | //========================================================================== 2 | // (c) Microsoft Corporation 2005-2009. 3 | //========================================================================= 4 | 5 | namespace FSharp.Text.Parsing 6 | 7 | open FSharp.Text.Lexing 8 | 9 | open System.Collections.Generic 10 | 11 | /// The information accessible via the parseState value within parser actions. 12 | type IParseState = 13 | /// Get the start and end position for the terminal or non-terminal at a given index matched by the production 14 | abstract InputRange: index: int -> Position * Position 15 | 16 | /// Get the end position for the terminal or non-terminal at a given index matched by the production 17 | abstract InputEndPosition: int -> Position 18 | 19 | /// Get the start position for the terminal or non-terminal at a given index matched by the production 20 | abstract InputStartPosition: int -> Position 21 | 22 | /// Get the full range of positions matched by the production 23 | abstract ResultRange: Position * Position 24 | 25 | /// Get the value produced by the terminal or non-terminal at the given position 26 | abstract GetInput: int -> obj 27 | 28 | /// Get the store of local values associated with this parser 29 | // Dynamically typed, non-lexically scoped local store 30 | abstract ParserLocalStore: IDictionary 31 | 32 | /// Raise an error in this parse context 33 | abstract RaiseError<'b> : unit -> 'b 34 | 35 | /// The context provided when a parse error occurs 36 | [] 37 | type ParseErrorContext<'tok> = 38 | /// The stack of state indexes active at the parse error 39 | member StateStack: int list 40 | 41 | /// The state active at the parse error 42 | member ParseState: IParseState 43 | 44 | /// The tokens that would cause a reduction at the parse error 45 | member ReduceTokens: int list 46 | 47 | /// The stack of productions that would be reduced at the parse error 48 | member ReducibleProductions: int list list 49 | 50 | /// The token that caused the parse error 51 | member CurrentToken: 'tok option 52 | 53 | /// The token that would cause a shift at the parse error 54 | member ShiftTokens: int list 55 | 56 | /// The message associated with the parse error 57 | member Message: string 58 | 59 | /// Tables generated by fsyacc 60 | /// The type of the tables contained in a file produced by the fsyacc.exe parser generator. 61 | type Tables<'tok> = 62 | { 63 | /// The reduction table 64 | reductions: (IParseState -> obj) array 65 | 66 | /// The token number indicating the end of input 67 | endOfInputTag: int 68 | 69 | /// A function to compute the tag of a token 70 | tagOfToken: 'tok -> int 71 | 72 | /// A function to compute the data carried by a token 73 | dataOfToken: 'tok -> obj 74 | 75 | /// The sparse action table elements 76 | actionTableElements: uint16[] 77 | 78 | /// The sparse action table row offsets 79 | actionTableRowOffsets: uint16[] 80 | 81 | /// The number of symbols for each reduction 82 | reductionSymbolCounts: uint16[] 83 | 84 | /// The immediate action table 85 | immediateActions: uint16[] 86 | 87 | /// The sparse goto table 88 | gotos: uint16[] 89 | 90 | /// The sparse goto table row offsets 91 | sparseGotoTableRowOffsets: uint16[] 92 | 93 | /// The sparse table for the productions active for each state 94 | stateToProdIdxsTableElements: uint16[] 95 | 96 | /// The sparse table offsets for the productions active for each state 97 | stateToProdIdxsTableRowOffsets: uint16[] 98 | 99 | /// This table is logically part of the Goto table 100 | productionToNonTerminalTable: uint16[] 101 | 102 | /// This function is used to hold the user specified "parse_error" or "parse_error_rich" functions 103 | parseError: ParseErrorContext<'tok> -> unit 104 | 105 | /// The total number of terminals 106 | numTerminals: int 107 | 108 | /// The tag of the error terminal 109 | tagOfErrorTerminal: int 110 | } 111 | 112 | /// Interpret the parser table taking input from the given lexer, using the given lex buffer, and the given start state. 113 | /// Returns an object indicating the final synthesized value for the parse. 114 | member Interpret: lexer: (LexBuffer<'char> -> 'tok) * lexbuf: LexBuffer<'char> * startState: int -> obj 115 | 116 | /// Indicates an accept action has occured 117 | exception Accept of obj 118 | /// Indicates a parse error has occured and parse recovery is in progress 119 | exception RecoverableParseError 120 | 121 | #if __DEBUG 122 | module internal Flags = 123 | val mutable debug: bool 124 | #endif 125 | 126 | /// Helpers used by generated parsers. 127 | module ParseHelpers = 128 | /// The default implementation of the parse_error_rich function 129 | val parse_error_rich: (ParseErrorContext<'tok> -> unit) option 130 | 131 | /// The default implementation of the parse_error function 132 | val parse_error: string -> unit 133 | -------------------------------------------------------------------------------- /src/FsYacc.Core/fsyacclex.fsl: -------------------------------------------------------------------------------- 1 | { 2 | (* (c) Microsoft Corporation 2005-2008. *) 3 | 4 | module FsLexYacc.FsYacc.Lexer 5 | 6 | open FsLexYacc.FsYacc.AST 7 | open FsLexYacc.FsYacc.Parser 8 | open System.Text 9 | open FSharp.Text.Lexing 10 | 11 | let lexeme (lexbuf : LexBuffer) = new System.String(lexbuf.Lexeme) 12 | let newline (lexbuf:LexBuffer<_>) = lexbuf.EndPos <- lexbuf.EndPos.NextLine 13 | 14 | let unexpected_char lexbuf = 15 | failwith ("Unexpected character '"+(lexeme lexbuf)+"'") 16 | 17 | let typeDepth = ref 0 18 | let startPos = ref Position.Empty 19 | let mutable str_buf = new System.Text.StringBuilder() 20 | 21 | let appendBuf (str:string) = str_buf.Append str |> ignore 22 | let clearBuf () = str_buf <- new System.Text.StringBuilder() 23 | 24 | } 25 | 26 | let letter = ['A'-'Z'] | ['a'-'z'] 27 | let digit = ['0'-'9'] 28 | let whitespace = [' ' '\t'] 29 | let newline = ('\n' | '\r' '\n') 30 | let ident_start_char = letter 31 | let ident_char = ( ident_start_char| digit | ['\'' '_'] ) 32 | let ident = ident_start_char ident_char* 33 | 34 | rule token = parse 35 | | "%{" { let p = lexbuf.StartPos in header p (new StringBuilder 100) lexbuf } 36 | | "%%" { PERCENT_PERCENT } 37 | | "%token" (whitespace* '<') { typeDepth.Value <- 1; startPos.Value <- lexbuf.StartPos; clearBuf(); TOKEN (fs_type lexbuf) } 38 | | "%token" { TOKEN (None) } 39 | | "%start"{ START } 40 | | "%prec"{ PREC } 41 | | "%type" (whitespace* '<') { typeDepth.Value <- 1; startPos.Value <- lexbuf.StartPos; clearBuf(); TYPE (match fs_type lexbuf with Some x -> x | None -> failwith "gettype") } 42 | | "%left" { LEFT } 43 | | "%right" { RIGHT } 44 | | "%nonassoc" { NONASSOC } 45 | | "error" { ERROR } 46 | | '<' { LESS } 47 | | '>' { GREATER } 48 | | ';' { SEMI } 49 | | '{' { let p = lexbuf.StartPos in 50 | let buff = (new StringBuilder 100) in 51 | // adjust the first line to get even indentation for all lines w.r.t. the left hand margin 52 | buff.Append (String.replicate (lexbuf.StartPos.Column+1) " ") |> ignore; 53 | code p buff lexbuf } 54 | | whitespace+ { token lexbuf } 55 | | newline { newline lexbuf; token lexbuf } 56 | | ident_start_char ident_char* { IDENT (lexeme lexbuf) } 57 | | '|' { BAR } 58 | | "/*" { ignore(comment lexbuf); token lexbuf } 59 | | "//" [^'\n''\r']* { token lexbuf } 60 | | ':' { COLON } 61 | | _ { unexpected_char lexbuf } 62 | | eof { EOF } 63 | 64 | and fs_type = parse 65 | | '<' { typeDepth.Value <- typeDepth.Value + 1; appendBuf(lexeme lexbuf); fs_type lexbuf} 66 | | '>' 67 | { typeDepth.Value <- typeDepth.Value - 1; 68 | if typeDepth.Value = 0 69 | then Some(string str_buf) 70 | else appendBuf(lexeme lexbuf); fs_type lexbuf } 71 | | _ { appendBuf(lexeme lexbuf); fs_type lexbuf } 72 | 73 | and header p buff = parse 74 | | "%}" { HEADER (buff.ToString(), p) } 75 | | newline { newline lexbuf; 76 | ignore <| buff.Append System.Environment.NewLine; 77 | header p buff lexbuf } 78 | | (whitespace | letter | digit) + 79 | { ignore <| buff.Append (lexeme lexbuf); 80 | header p buff lexbuf } 81 | | "//" [^'\n''\r']* 82 | { ignore <| buff.Append (lexeme lexbuf); 83 | header p buff lexbuf } 84 | | "'\"'" | "'\\\"'" 85 | { ignore <| buff.Append (lexeme lexbuf); 86 | header p buff lexbuf } 87 | | "\"" 88 | { ignore <| buff.Append (lexeme lexbuf); 89 | ignore(codestring buff lexbuf); 90 | header p buff lexbuf } 91 | | eof { EOF } 92 | | _ { ignore <| buff.Append(lexeme lexbuf).[0]; 93 | header p buff lexbuf } 94 | and code p buff = parse 95 | | "}" { CODE (buff.ToString(), p) } 96 | | "{" { ignore <| buff.Append (lexeme lexbuf); 97 | ignore(code p buff lexbuf); 98 | ignore <| buff.Append "}"; 99 | code p buff lexbuf } 100 | | newline { newline lexbuf; 101 | ignore <| buff.Append System.Environment.NewLine; 102 | code p buff lexbuf } 103 | | "'\"'" | "'\\\"'" 104 | { ignore <| buff.Append (lexeme lexbuf); 105 | code p buff lexbuf } 106 | | "\"" { ignore <| buff.Append (lexeme lexbuf); 107 | ignore(codestring buff lexbuf); 108 | code p buff lexbuf } 109 | | (whitespace | letter | digit) + 110 | { ignore <| buff.Append (lexeme lexbuf); 111 | code p buff lexbuf } 112 | | "//" [^'\n''\r']* 113 | { ignore <| buff.Append (lexeme lexbuf); 114 | code p buff lexbuf } 115 | | eof { EOF } 116 | | _ { ignore <| buff.Append(lexeme lexbuf).[0]; 117 | code p buff lexbuf } 118 | 119 | 120 | and codestring buff = parse 121 | | '\\' ('"' | '\\') 122 | { ignore <| buff.Append (lexeme lexbuf); 123 | codestring buff lexbuf } 124 | | '"' { ignore <| buff.Append (lexeme lexbuf); 125 | buff.ToString() } 126 | | newline { newline lexbuf; 127 | ignore <| buff.Append System.Environment.NewLine; 128 | codestring buff lexbuf } 129 | | (whitespace | letter | digit) + 130 | { ignore <| buff.Append (lexeme lexbuf); 131 | codestring buff lexbuf } 132 | | eof { failwith "unterminated string in code" } 133 | | _ { ignore <| buff.Append(lexeme lexbuf).[0]; 134 | codestring buff lexbuf } 135 | 136 | 137 | and comment = parse 138 | | "/*" { ignore(comment lexbuf); comment lexbuf } 139 | | newline { newline lexbuf; comment lexbuf } 140 | | "*/" { () } 141 | | eof { failwith "end of file in comment" } 142 | | [^ '/' '*' '\n' '\r' '"' '/' ]+ { comment lexbuf } 143 | | _ { comment lexbuf } 144 | 145 | -------------------------------------------------------------------------------- /src/FsYacc/fsyacc.fs: -------------------------------------------------------------------------------- 1 | (* (c) Microsoft Corporation 2005-2008. *) 2 | 3 | module FsLexYacc.FsYacc.Program 4 | 5 | open Printf 6 | open FSharp.Text 7 | open FsLexYacc.FsYacc.AST 8 | open FsLexYacc.FsYacc.Driver 9 | 10 | //------------------------------------------------------------------ 11 | // This is the program proper 12 | 13 | let mutable input = None 14 | let mutable modname = None 15 | let mutable internal_module = false 16 | let mutable opens = [] 17 | let mutable out = None 18 | let mutable tokenize = false 19 | let mutable compat = false 20 | let mutable log = false 21 | let mutable light = None 22 | let mutable inputCodePage = None 23 | let mutable lexlib = "FSharp.Text.Lexing" 24 | let mutable parslib = "FSharp.Text.Parsing" 25 | let mutable bufferTypeArgument = "'cty" 26 | 27 | let usage = 28 | [ 29 | ArgInfo("-o", ArgType.String(fun s -> out <- Some s), "Name the output file.") 30 | ArgInfo("-v", ArgType.Unit(fun () -> log <- true), "Produce a listing file.") 31 | ArgInfo("--module", ArgType.String(fun s -> modname <- Some s), "Define the F# module name to host the generated parser.") 32 | ArgInfo("--internal", ArgType.Unit(fun () -> internal_module <- true), "Generate an internal module") 33 | ArgInfo( 34 | "--open", 35 | ArgType.String(fun s -> opens <- opens @ [ s ]), 36 | "Add the given module to the list of those to open in both the generated signature and implementation." 37 | ) 38 | ArgInfo("--light", ArgType.Unit(fun () -> light <- Some true), "(ignored)") 39 | ArgInfo("--light-off", ArgType.Unit(fun () -> light <- Some false), "Add #light \"off\" to the top of the generated file") 40 | ArgInfo( 41 | "--ml-compatibility", 42 | ArgType.Unit(fun _ -> compat <- true), 43 | "Support the use of the global state from the 'Parsing' module in FSharp.PowerPack.dll." 44 | ) 45 | ArgInfo("--tokens", ArgType.Unit(fun _ -> tokenize <- true), "Simply tokenize the specification file itself.") 46 | ArgInfo( 47 | "--lexlib", 48 | ArgType.String(fun s -> lexlib <- s), 49 | "Specify the namespace for the implementation of the lexer (default: FSharp.Text.Lexing)" 50 | ) 51 | ArgInfo( 52 | "--parslib", 53 | ArgType.String(fun s -> parslib <- s), 54 | "Specify the namespace for the implementation of the parser table interpreter (default: FSharp.Text.Parsing)" 55 | ) 56 | ArgInfo( 57 | "--codepage", 58 | ArgType.Int(fun i -> inputCodePage <- Some i), 59 | "Assume input lexer specification file is encoded with the given codepage." 60 | ) 61 | ArgInfo("--buffer-type-argument", ArgType.String(fun s -> bufferTypeArgument <- s), "Generic type argument of the LexBuffer type.") 62 | ] 63 | 64 | let _ = 65 | ArgParser.Parse( 66 | usage, 67 | (fun x -> 68 | match input with 69 | | Some _ -> failwith "more than one input given" 70 | | None -> input <- Some x), 71 | "fsyacc " 72 | ) 73 | 74 | let main () = 75 | let filename = 76 | (match input with 77 | | Some x -> x 78 | | None -> failwith "no input given") in 79 | 80 | if tokenize then 81 | printTokens filename inputCodePage 82 | 83 | let spec = 84 | match readSpecFromFile filename inputCodePage with 85 | | Ok spec -> spec 86 | | Result.Error(e, line, col) -> 87 | eprintf "%s(%d,%d): error: %s" filename line col e.Message 88 | exit 1 89 | 90 | use logger = 91 | match logFileName (filename, out, log) with 92 | | Some outputLogName -> new FileLogger(outputLogName) :> Logger 93 | | None -> new NullLogger() :> Logger 94 | 95 | let compiledSpec = compileSpec spec logger 96 | printfn " building tables" 97 | printfn " %d states" compiledSpec.states.Length 98 | printfn " %d nonterminals" compiledSpec.gotoTable.[0].Length 99 | printfn " %d terminals" compiledSpec.actionTable.[0].Length 100 | printfn " %d productions" compiledSpec.prods.Length 101 | printfn " #rows in action table: %d" compiledSpec.actionTable.Length 102 | (* 103 | printfn "#unique rows in action table: %d" (List.length (Array.foldBack (fun row acc -> insert (Array.to_list row) acc) actionTable [])); 104 | printfn "maximum #different actions per state: %d" (Array.foldBack (fun row acc ->max (List.length (List.foldBack insert (Array.to_list row) [])) acc) actionTable 0); 105 | printfn "average #different actions per state: %d" ((Array.foldBack (fun row acc -> (List.length (List.foldBack insert (Array.to_list row) [])) + acc) actionTable 0) / (Array.length states)); 106 | *) 107 | 108 | let generatorState: GeneratorState = 109 | { GeneratorState.Default with 110 | input = filename 111 | output = out 112 | logger = logger 113 | light = light 114 | modname = modname 115 | internal_module = internal_module 116 | opens = opens 117 | lexlib = lexlib 118 | parslib = parslib 119 | compat = compat 120 | bufferTypeArgument = bufferTypeArgument 121 | } 122 | 123 | writeSpecToFile generatorState spec compiledSpec 124 | 125 | let result = 126 | try 127 | main () 128 | with e -> 129 | eprintf 130 | "FSYACC: error FSY000: %s\n%s" 131 | (match e with 132 | | Failure s -> s 133 | | e -> e.Message) 134 | e.StackTrace 135 | 136 | exit 1 137 | -------------------------------------------------------------------------------- /tests/fsyacc/unicode/test1-unicode.WithTitleCaseLetter.tokens.error.bsl: -------------------------------------------------------------------------------- 1 | tokenize - getting one token 2 | tokenize - got IDENT, now at char 2 3 | ident char = 110 4 | ident char = 101 5 | ident char = 120 6 | ident char = 116 7 | tokenize - getting one token 8 | tokenize - got IDENT, now at char 7 9 | ident char = 108 10 | ident char = 105 11 | ident char = 110 12 | ident char = 101 13 | tokenize - getting one token 14 | tokenize - got IDENT, now at char 12 15 | ident char = 116 16 | ident char = 101 17 | ident char = 115 18 | ident char = 116 19 | ident char = 115 20 | tokenize - getting one token 21 | tokenize - got IDENT, now at char 18 22 | ident char = 111 23 | ident char = 110 24 | ident char = 101 25 | tokenize - getting one token 26 | tokenize - got IDENT, now at char 22 27 | ident char = 117 28 | ident char = 110 29 | ident char = 105 30 | ident char = 99 31 | ident char = 111 32 | ident char = 100 33 | ident char = 101 34 | tokenize - getting one token 35 | tokenize - got IDENT, now at char 30 36 | ident char = 99 37 | ident char = 104 38 | ident char = 97 39 | ident char = 114 40 | ident char = 97 41 | ident char = 99 42 | ident char = 116 43 | ident char = 101 44 | ident char = 114 45 | tokenize - getting one token 46 | tokenize - got IDENT, now at char 40 47 | ident char = 99 48 | ident char = 108 49 | ident char = 97 50 | ident char = 115 51 | ident char = 115 52 | tokenize - getting one token 53 | tokenize - got IDENT, now at char 47 54 | ident char = 196 55 | ident char = 203 56 | ident char = 214 57 | ident char = 207 58 | ident char = 220 59 | ident char = 226 60 | ident char = 230 61 | ident char = 231 62 | ident char = 241 63 | ident char = 245 64 | ident char = 246 65 | tokenize - getting one token 66 | tokenize - got PLUS, now at char 59 67 | tokenize - getting one token 68 | tokenize - got IDENT, now at char 61 69 | ident char = 105 70 | ident char = 100 71 | tokenize - getting one token 72 | tokenize - got IDENT, now at char 65 73 | ident char = 110 74 | ident char = 101 75 | ident char = 120 76 | ident char = 116 77 | tokenize - getting one token 78 | tokenize - got IDENT, now at char 70 79 | ident char = 108 80 | ident char = 105 81 | ident char = 110 82 | ident char = 101 83 | tokenize - getting one token 84 | tokenize - got IDENT, now at char 75 85 | ident char = 116 86 | ident char = 101 87 | ident char = 115 88 | ident char = 116 89 | ident char = 115 90 | tokenize - getting one token 91 | tokenize - got IDENT, now at char 81 92 | ident char = 115 93 | ident char = 112 94 | ident char = 101 95 | ident char = 99 96 | ident char = 105 97 | ident char = 102 98 | ident char = 105 99 | ident char = 99 100 | tokenize - getting one token 101 | tokenize - got IDENT, now at char 90 102 | ident char = 117 103 | ident char = 110 104 | ident char = 105 105 | ident char = 99 106 | ident char = 111 107 | ident char = 100 108 | ident char = 101 109 | tokenize - getting one token 110 | tokenize - got IDENT, now at char 98 111 | ident char = 99 112 | ident char = 104 113 | ident char = 97 114 | ident char = 114 115 | ident char = 97 116 | ident char = 99 117 | ident char = 116 118 | ident char = 101 119 | ident char = 114 120 | ident char = 115 121 | tokenize - getting one token 122 | tokenize - got IDENT, now at char 110 123 | ident char = 8800 124 | tokenize - getting one token 125 | tokenize - got IDENT, now at char 112 126 | ident char = 8800 127 | ident char = 8800 128 | tokenize - getting one token 129 | tokenize - got IDENT, now at char 115 130 | ident char = 8776 131 | ident char = 8776 132 | tokenize - getting one token 133 | tokenize - got IDENT, now at char 118 134 | ident char = 8776 135 | ident char = 8776 136 | ident char = 8776 137 | tokenize - getting one token 138 | tokenize - got IDENT, now at char 123 139 | ident char = 105 140 | ident char = 100 141 | tokenize - getting one token 142 | tokenize - got PLUS, now at char 126 143 | tokenize - getting one token 144 | tokenize - got IDENT, now at char 128 145 | ident char = 105 146 | ident char = 100 147 | tokenize - getting one token 148 | tokenize - got IDENT, now at char 132 149 | ident char = 110 150 | ident char = 101 151 | ident char = 120 152 | ident char = 116 153 | tokenize - getting one token 154 | tokenize - got IDENT, now at char 137 155 | ident char = 108 156 | ident char = 105 157 | ident char = 110 158 | ident char = 101 159 | tokenize - getting one token 160 | tokenize - got IDENT, now at char 142 161 | ident char = 116 162 | ident char = 101 163 | ident char = 115 164 | ident char = 116 165 | ident char = 115 166 | tokenize - getting one token 167 | tokenize - got IDENT, now at char 148 168 | ident char = 115 169 | ident char = 111 170 | ident char = 109 171 | ident char = 101 172 | tokenize - getting one token 173 | tokenize - got IDENT, now at char 153 174 | ident char = 109 175 | ident char = 111 176 | ident char = 114 177 | ident char = 101 178 | tokenize - getting one token 179 | tokenize - got IDENT, now at char 158 180 | ident char = 114 181 | ident char = 97 182 | ident char = 110 183 | ident char = 100 184 | ident char = 111 185 | ident char = 109 186 | tokenize - getting one token 187 | tokenize - got IDENT, now at char 165 188 | ident char = 117 189 | ident char = 110 190 | ident char = 105 191 | ident char = 99 192 | ident char = 111 193 | ident char = 100 194 | ident char = 101 195 | tokenize - getting one token 196 | tokenize - got IDENT, now at char 173 197 | ident char = 99 198 | ident char = 104 199 | ident char = 97 200 | ident char = 114 201 | ident char = 97 202 | ident char = 99 203 | ident char = 116 204 | ident char = 101 205 | ident char = 114 206 | ident char = 115 207 | tokenize - getting one token 208 | Error: unrecognized input 209 | -------------------------------------------------------------------------------- /docs/content/fsyacc.md: -------------------------------------------------------------------------------- 1 | FsYacc Overview 2 | ======== 3 | 4 | `fsyacc` is a `LALR` parser generator. It follows a similar specification to the `OCamlYacc` parser generator (especially when used with the `ml compatibility` switch) 5 | 6 | Getting Started 7 | --------------- 8 | 9 | Build the tool by cloning this project and running /build.sh or build.cmd 10 | 11 | Add a reference in your project to `FsLexYacc` package via Nuget or paket. 12 | 13 | You can run the parser generator directly: 14 | dotnet fsyacc.dll inputFile -o outputFile 15 | 16 | 17 | 18 | Sample input 19 | ------------ 20 | 21 | Parser generators typically produce numbers represented by values in an F# Union Type. For example: 22 | 23 | type Expr = 24 | | Val of string 25 | | Int of int 26 | | Float of float 27 | | Decr of Expr 28 | 29 | 30 | type Stmt = 31 | | Assign of string * Expr 32 | | While of Expr * Stmt 33 | | Seq of Stmt list 34 | | IfThen of Expr * Stmt 35 | | IfThenElse of Expr * Stmt * Stmt 36 | | Print of Expr 37 | 38 | 39 | type Prog = Prog of Stmt list 40 | 41 | Given that, a typical parser specification is as follows: 42 | 43 | %{ 44 | open Ast 45 | %} 46 | 47 | %start start 48 | %token ID 49 | %token INT 50 | %token FLOAT 51 | %token DECR LPAREN RPAREN WHILE DO END BEGIN IF THEN ELSE PRINT SEMI ASSIGN EOF 52 | %type < Ast.Prog > start 53 | 54 | 55 | %% 56 | 57 | 58 | start: Prog { $1 } 59 | 60 | 61 | Prog: StmtList { Prog(List.rev($1)) } 62 | 63 | 64 | Expr: ID { Val($1) } 65 | | INT { Int($1) } 66 | | FLOAT { Float($1) } 67 | | DECR LPAREN Expr RPAREN { Decr($3) } 68 | 69 | 70 | Stmt: ID ASSIGN Expr { Assign($1,$3) } 71 | | WHILE Expr DO Stmt { While($2,$4) } 72 | | BEGIN StmtList END { Seq(List.rev($2)) } 73 | | IF Expr THEN Stmt { IfThen($2,$4) } 74 | | IF Expr THEN Stmt ELSE Stmt { IfThenElse($2,$4,$6) } 75 | | PRINT Expr { Print($2) } 76 | 77 | 78 | StmtList: Stmt { [$1] } 79 | | StmtList SEMI Stmt { $3 :: $1 } 80 | 81 | The above generates a datatype for tokens and a function for each `start` production. Parsers are typically combined with a lexer generated using `FsLex`. 82 | 83 | MSBuild support 84 | --------------- 85 | 86 | The nuget package includes MSBuild support for `FsLex` and `FsYacc`. New MSBuild targets are added automatically by the nuget package. 87 | But you must manually add `FsLex` andd `FsYacc` entries inside of an `ItemGroup` to your `.fsproj` file like this: 88 | 89 | 90 | --module Parser 91 | 92 | 93 | --unicode 94 | 95 | 96 | If you want to see `verbose` output from `FsYacc` you need to add `-v` in the `OtherFlags` section like this: 97 | 98 | 99 | --module Parser -v 100 | 101 | 102 | Command line options 103 | -------------------- 104 | 105 | fsyacc fsyacc 106 | 107 | -o : Name the output file. 108 | 109 | -v: Produce a listing file. 110 | 111 | --module : Define the F# module name to host the generated parser. 112 | 113 | --internal: Generate an internal module 114 | 115 | --open : Add the given module to the list of those to open in both the generated signature and implementation. 116 | 117 | --light: (ignored) 118 | 119 | --light-off: Add #light "off" to the top of the generated file 120 | 121 | --ml-compatibility: Support the use of the global state from the 'Parsing' module in FSharp.PowerPack.dll. 122 | 123 | --tokens: Simply tokenize the specification file itself. 124 | 125 | --lexlib : Specify the namespace for the implementation of the parser table interperter (default FSharp.Text.Parsing) 126 | 127 | --parslib : Specify the namespace for the implementation of the parser table interperter (default FSharp.Text.Parsing) 128 | 129 | --codepage : Assume input lexer specification file is encoded with the given codepage. 130 | 131 | --help: display this list of options 132 | 133 | -help: display this list of options 134 | 135 | Managing and using position markers 136 | ----------------------------------- 137 | 138 | Each action in an fsyacc parser has access to a parseState value through which you can access position information. 139 | 140 | type IParseState = 141 | abstract InputStartPosition: int -> Position 142 | abstract InputEndPosition: int -> Position 143 | abstract InputRange: int -> Position * Position 144 | abstract ParserLocalStore: IDictionary 145 | abstract ResultRange : Position * Position 146 | abstract RaiseError<'b> : unit -> 'b 147 | 148 | `Input` relate to the indexes of the items on the right hand side of the current production, the `Result` relates to the entire range covered by the production. You shouldn't use `GetData` directly - these is called automatically by `$1`, `$2` etc. You can call `RaiseError` if you like. 149 | 150 | You must set the initial position when you create the lexbuf: 151 | 152 | let setInitialPos (lexbuf:LexBuffer<_>) filename = 153 | lexbuf.EndPos <- { pos_bol = 0; 154 | pos_fname=filename; 155 | pos_cnum=0; 156 | pos_lnum=1 } 157 | 158 | 159 | You must also update the position recorded in the lex buffer each time you process what you consider to be a new line: 160 | 161 | let newline (lexbuf:lexbuf) = 162 | lexbuf.EndPos <- lexbuf.EndPos.AsNewLinePos() 163 | 164 | 165 | Likewise if your language includes the ability to mark source code locations, see custom essay (e.g. the `#line` directive in OCaml and F#) then you must similarly adjust the `lexbuf.EndPos` according to the information you grok from your input. 166 | 167 | Notes on OCaml Compatibility 168 | ---------------------------- 169 | 170 | `OCamlYacc` accepts the following: 171 | 172 | %type < context -> context > toplevel 173 | 174 | For `FsYacc` you just add parentheses: 175 | 176 | %type < (context -> context) > toplevel 177 | -------------------------------------------------------------------------------- /tests/fsyacc/Test2/test2.badInput.tokens.bsl: -------------------------------------------------------------------------------- 1 | tokenize - getting one token 2 | tokenize - got IDENT, now at char 2 3 | ident char = 122 4 | ident char = 49 5 | tokenize - getting one token 6 | tokenize - got LPAREN, now at char 5 7 | tokenize - getting one token 8 | tokenize - got LET, now at char 6 9 | tokenize - getting one token 10 | tokenize - got IDENT, now at char 10 11 | ident char = 120 12 | tokenize - getting one token 13 | tokenize - got IN, now at char 12 14 | tokenize - getting one token 15 | tokenize - got IDENT, now at char 15 16 | ident char = 105 17 | ident char = 100 18 | tokenize - getting one token 19 | tokenize - got END, now at char 18 20 | tokenize - getting one token 21 | tokenize - got RPAREN, now at char 21 22 | tokenize - getting one token 23 | tokenize - got IDENT, now at char 24 24 | ident char = 120 25 | ident char = 50 26 | tokenize - getting one token 27 | tokenize - got LPAREN, now at char 27 28 | tokenize - getting one token 29 | tokenize - got IDENT, now at char 28 30 | ident char = 105 31 | ident char = 100 32 | tokenize - getting one token 33 | tokenize - got PLUS, now at char 31 34 | tokenize - getting one token 35 | tokenize - got IDENT, now at char 33 36 | ident char = 105 37 | ident char = 100 38 | tokenize - getting one token 39 | tokenize - got IDENT, now at char 37 40 | ident char = 121 41 | ident char = 51 42 | tokenize - getting one token 43 | tokenize - got LPAREN, now at char 40 44 | tokenize - getting one token 45 | tokenize - got IDENT, now at char 41 46 | ident char = 105 47 | ident char = 100 48 | tokenize - getting one token 49 | tokenize - got PLUS, now at char 44 50 | tokenize - getting one token 51 | tokenize - got IDENT, now at char 46 52 | ident char = 105 53 | ident char = 100 54 | tokenize - getting one token 55 | tokenize - got PLUS, now at char 49 56 | tokenize - getting one token 57 | tokenize - got IDENT, now at char 51 58 | ident char = 105 59 | ident char = 100 60 | tokenize - getting one token 61 | tokenize - got RPAREN, now at char 53 62 | tokenize - getting one token 63 | tokenize - got IDENT, now at char 56 64 | ident char = 122 65 | ident char = 52 66 | tokenize - getting one token 67 | tokenize - got LPAREN, now at char 59 68 | tokenize - getting one token 69 | tokenize - got IDENT, now at char 60 70 | ident char = 105 71 | ident char = 100 72 | tokenize - getting one token 73 | tokenize - got PLUS, now at char 63 74 | tokenize - getting one token 75 | tokenize - got IDENT, now at char 65 76 | ident char = 105 77 | ident char = 100 78 | tokenize - getting one token 79 | tokenize - got STAR, now at char 68 80 | tokenize - getting one token 81 | tokenize - got IDENT, now at char 70 82 | ident char = 105 83 | ident char = 100 84 | tokenize - getting one token 85 | tokenize - got RPAREN, now at char 72 86 | tokenize - getting one token 87 | tokenize - got IDENT, now at char 75 88 | ident char = 122 89 | ident char = 53 90 | tokenize - getting one token 91 | tokenize - got LPAREN, now at char 78 92 | tokenize - getting one token 93 | tokenize - got LET, now at char 79 94 | tokenize - getting one token 95 | tokenize - got IDENT, now at char 83 96 | ident char = 120 97 | tokenize - getting one token 98 | tokenize - got PLUS, now at char 85 99 | tokenize - getting one token 100 | tokenize - got IN, now at char 87 101 | tokenize - getting one token 102 | tokenize - got IDENT, now at char 90 103 | ident char = 105 104 | ident char = 100 105 | tokenize - getting one token 106 | tokenize - got END, now at char 93 107 | tokenize - getting one token 108 | tokenize - got RPAREN, now at char 96 109 | tokenize - getting one token 110 | tokenize - got IDENT, now at char 99 111 | ident char = 122 112 | ident char = 54 113 | tokenize - getting one token 114 | tokenize - got LPAREN, now at char 102 115 | tokenize - getting one token 116 | tokenize - got LET, now at char 103 117 | tokenize - getting one token 118 | tokenize - got IDENT, now at char 107 119 | ident char = 120 120 | tokenize - getting one token 121 | tokenize - got PLUS, now at char 109 122 | tokenize - getting one token 123 | tokenize - got IN, now at char 111 124 | tokenize - getting one token 125 | tokenize - got IDENT, now at char 114 126 | ident char = 105 127 | ident char = 100 128 | tokenize - getting one token 129 | tokenize - got END, now at char 117 130 | tokenize - getting one token 131 | tokenize - got RPAREN, now at char 120 132 | tokenize - getting one token 133 | tokenize - got IDENT, now at char 123 134 | ident char = 122 135 | ident char = 55 136 | tokenize - getting one token 137 | tokenize - got LPAREN, now at char 126 138 | tokenize - getting one token 139 | tokenize - got LET, now at char 127 140 | tokenize - getting one token 141 | tokenize - got IDENT, now at char 131 142 | ident char = 120 143 | tokenize - getting one token 144 | tokenize - got PLUS, now at char 133 145 | tokenize - getting one token 146 | tokenize - got IDENT, now at char 135 147 | ident char = 121 148 | tokenize - getting one token 149 | tokenize - got IN, now at char 137 150 | tokenize - getting one token 151 | tokenize - got IDENT, now at char 140 152 | ident char = 105 153 | ident char = 100 154 | tokenize - getting one token 155 | tokenize - got END, now at char 143 156 | tokenize - getting one token 157 | tokenize - got RPAREN, now at char 146 158 | tokenize - getting one token 159 | tokenize - got IDENT, now at char 149 160 | ident char = 122 161 | ident char = 56 162 | tokenize - getting one token 163 | tokenize - got LPAREN, now at char 152 164 | tokenize - getting one token 165 | tokenize - got LET, now at char 153 166 | tokenize - getting one token 167 | tokenize - got IDENT, now at char 157 168 | ident char = 120 169 | tokenize - getting one token 170 | tokenize - got RPAREN, now at char 159 171 | tokenize - getting one token 172 | tokenize - got RPAREN, now at char 160 173 | tokenize - getting one token 174 | tokenize - got RPAREN, now at char 161 175 | tokenize - getting one token 176 | tokenize - got IN, now at char 163 177 | tokenize - getting one token 178 | tokenize - got IDENT, now at char 166 179 | ident char = 105 180 | ident char = 100 181 | tokenize - getting one token 182 | tokenize - got END, now at char 169 183 | tokenize - getting one token 184 | tokenize - got RPAREN, now at char 172 185 | tokenize - getting one token 186 | tokenize - got EOF, now at char 177 187 | -------------------------------------------------------------------------------- /src/Common/Arg.fs: -------------------------------------------------------------------------------- 1 | // (c) Microsoft Corporation 2005-2009. 2 | 3 | namespace FSharp.Text 4 | 5 | type ArgType = 6 | | ClearArg of bool ref 7 | | FloatArg of (float -> unit) 8 | | IntArg of (int -> unit) 9 | | RestArg of (string -> unit) 10 | | SetArg of bool ref 11 | | StringArg of (string -> unit) 12 | | UnitArg of (unit -> unit) 13 | 14 | static member Clear r = ClearArg r 15 | static member Float r = FloatArg r 16 | static member Int r = IntArg r 17 | static member Rest r = RestArg r 18 | static member Set r = SetArg r 19 | static member String r = StringArg r 20 | static member Unit r = UnitArg r 21 | 22 | type ArgInfo(name, action, help) = 23 | member x.Name = name 24 | member x.ArgType = action 25 | member x.HelpText = help 26 | 27 | exception Bad of string 28 | exception HelpText of string 29 | 30 | [] 31 | type ArgParser() = 32 | static let getUsage specs u = 33 | let sbuf = System.Text.StringBuilder 100 34 | let pstring (s: string) = sbuf.Append s |> ignore 35 | 36 | let pendline s = 37 | pstring s 38 | pstring "\n" 39 | 40 | pendline u 41 | 42 | List.iter 43 | (fun (arg: ArgInfo) -> 44 | match arg.Name, arg.ArgType, arg.HelpText with 45 | | s, (UnitArg _ | SetArg _ | ClearArg _), helpText -> 46 | pstring "\t" 47 | pstring s 48 | pstring ": " 49 | pendline helpText 50 | | s, StringArg _, helpText -> 51 | pstring "\t" 52 | pstring s 53 | pstring " : " 54 | pendline helpText 55 | | s, IntArg _, helpText -> 56 | pstring "\t" 57 | pstring s 58 | pstring " : " 59 | pendline helpText 60 | | s, FloatArg _, helpText -> 61 | pstring "\t" 62 | pstring s 63 | pstring " : " 64 | pendline helpText 65 | | s, RestArg _, helpText -> 66 | pstring "\t" 67 | pstring s 68 | pstring " ...: " 69 | pendline helpText) 70 | specs 71 | 72 | pstring "\t" 73 | pstring "--help" 74 | pstring ": " 75 | pendline "display this list of options" 76 | pstring "\t" 77 | pstring "-help" 78 | pstring ": " 79 | pendline "display this list of options" 80 | sbuf.ToString() 81 | 82 | static member ParsePartial(cursor: ref, argv, arguments: seq, ?otherArgs, ?usageText) = 83 | let other = defaultArg otherArgs (fun _ -> ()) 84 | let usageText = defaultArg usageText "" 85 | let nargs = Array.length argv 86 | cursor.Value <- cursor.Value + 1 87 | let argSpecs = arguments |> Seq.toList 88 | let specs = argSpecs |> List.map (fun (arg: ArgInfo) -> arg.Name, arg.ArgType) 89 | 90 | while cursor.Value < nargs do 91 | let arg = argv.[cursor.Value] 92 | 93 | let rec findMatchingArg args = 94 | match args with 95 | | (s, action) :: _ when s = arg -> 96 | let getSecondArg () = 97 | if cursor.Value + 1 >= nargs then 98 | raise (Bad("option " + s + " needs an argument.\n" + getUsage argSpecs usageText)) 99 | 100 | argv.[cursor.Value + 1] 101 | 102 | match action with 103 | | UnitArg f -> 104 | f () 105 | cursor.Value <- cursor.Value + 1 106 | | SetArg f -> 107 | f.Value <- true 108 | cursor.Value <- cursor.Value + 1 109 | | ClearArg f -> 110 | f.Value <- false 111 | cursor.Value <- cursor.Value + 1 112 | | StringArg f -> 113 | let arg2 = getSecondArg () 114 | f arg2 115 | cursor.Value <- cursor.Value + 2 116 | | IntArg f -> 117 | let arg2 = getSecondArg () 118 | 119 | let arg2 = 120 | try 121 | int32 arg2 122 | with _ -> 123 | raise (Bad(getUsage argSpecs usageText)) in 124 | 125 | f arg2 126 | cursor.Value <- cursor.Value + 2 127 | | FloatArg f -> 128 | let arg2 = getSecondArg () 129 | 130 | let arg2 = 131 | try 132 | float arg2 133 | with _ -> 134 | raise (Bad(getUsage argSpecs usageText)) in 135 | 136 | f arg2 137 | cursor.Value <- cursor.Value + 2 138 | | RestArg f -> 139 | cursor.Value <- cursor.Value + 1 140 | 141 | while cursor.Value < nargs do 142 | f argv.[cursor.Value] 143 | cursor.Value <- cursor.Value + 1 144 | 145 | | _ :: more -> findMatchingArg more 146 | | [] -> 147 | if arg = "-help" || arg = "--help" || arg = "/help" || arg = "/help" || arg = "/?" then 148 | raise (HelpText(getUsage argSpecs usageText)) 149 | // Note: for '/abc/def' does not count as an argument 150 | // Note: '/abc' does 151 | elif 152 | arg.Length > 0 153 | && (arg.[0] = '-' 154 | || (arg.[0] = '/' && not (arg.Length > 1 && arg.[1..].Contains "/"))) 155 | then 156 | raise (Bad("unrecognized argument: " + arg + "\n" + getUsage argSpecs usageText)) 157 | else 158 | other arg 159 | cursor.Value <- cursor.Value + 1 160 | 161 | findMatchingArg specs 162 | 163 | static member Usage(arguments, ?usage) = 164 | let usage = defaultArg usage "" 165 | System.Console.Error.WriteLine(getUsage (Seq.toList arguments) usage) 166 | 167 | #if FX_NO_COMMAND_LINE_ARGS 168 | #else 169 | static member Parse(arguments, ?otherArgs, ?usageText) = 170 | let current = ref 0 171 | let argv = System.Environment.GetCommandLineArgs() 172 | 173 | try 174 | ArgParser.ParsePartial(current, argv, arguments, ?otherArgs = otherArgs, ?usageText = usageText) 175 | with 176 | | Bad h 177 | | HelpText h -> 178 | System.Console.Error.WriteLine h 179 | System.Console.Error.Flush() 180 | System.Environment.Exit(1) 181 | | _ -> reraise () 182 | #endif 183 | -------------------------------------------------------------------------------- /tests/fsyacc/unicode/test1-unicode.input3.tokens.bsl: -------------------------------------------------------------------------------- 1 | tokenize - getting one token 2 | tokenize - got IDENT, now at char 2 3 | ident char = 110 4 | ident char = 101 5 | ident char = 120 6 | ident char = 116 7 | tokenize - getting one token 8 | tokenize - got IDENT, now at char 7 9 | ident char = 108 10 | ident char = 105 11 | ident char = 110 12 | ident char = 101 13 | tokenize - getting one token 14 | tokenize - got IDENT, now at char 12 15 | ident char = 116 16 | ident char = 101 17 | ident char = 115 18 | ident char = 116 19 | ident char = 115 20 | tokenize - getting one token 21 | tokenize - got IDENT, now at char 18 22 | ident char = 111 23 | ident char = 110 24 | ident char = 101 25 | tokenize - getting one token 26 | tokenize - got IDENT, now at char 22 27 | ident char = 117 28 | ident char = 110 29 | ident char = 105 30 | ident char = 99 31 | ident char = 111 32 | ident char = 100 33 | ident char = 101 34 | tokenize - getting one token 35 | tokenize - got IDENT, now at char 30 36 | ident char = 99 37 | ident char = 104 38 | ident char = 97 39 | ident char = 114 40 | ident char = 97 41 | ident char = 99 42 | ident char = 116 43 | ident char = 101 44 | ident char = 114 45 | tokenize - getting one token 46 | tokenize - got IDENT, now at char 40 47 | ident char = 99 48 | ident char = 108 49 | ident char = 97 50 | ident char = 115 51 | ident char = 115 52 | tokenize - getting one token 53 | tokenize - got IDENT, now at char 47 54 | ident char = 196 55 | ident char = 203 56 | ident char = 214 57 | ident char = 207 58 | ident char = 220 59 | ident char = 226 60 | ident char = 230 61 | ident char = 231 62 | ident char = 241 63 | ident char = 245 64 | ident char = 246 65 | tokenize - getting one token 66 | tokenize - got PLUS, now at char 59 67 | tokenize - getting one token 68 | tokenize - got IDENT, now at char 61 69 | ident char = 105 70 | ident char = 100 71 | tokenize - getting one token 72 | tokenize - got IDENT, now at char 65 73 | ident char = 110 74 | ident char = 101 75 | ident char = 120 76 | ident char = 116 77 | tokenize - getting one token 78 | tokenize - got IDENT, now at char 70 79 | ident char = 108 80 | ident char = 105 81 | ident char = 110 82 | ident char = 101 83 | tokenize - getting one token 84 | tokenize - got IDENT, now at char 75 85 | ident char = 116 86 | ident char = 101 87 | ident char = 115 88 | ident char = 116 89 | ident char = 115 90 | tokenize - getting one token 91 | tokenize - got IDENT, now at char 81 92 | ident char = 115 93 | ident char = 112 94 | ident char = 101 95 | ident char = 99 96 | ident char = 105 97 | ident char = 102 98 | ident char = 105 99 | ident char = 99 100 | tokenize - getting one token 101 | tokenize - got IDENT, now at char 90 102 | ident char = 117 103 | ident char = 110 104 | ident char = 105 105 | ident char = 99 106 | ident char = 111 107 | ident char = 100 108 | ident char = 101 109 | tokenize - getting one token 110 | tokenize - got IDENT, now at char 98 111 | ident char = 99 112 | ident char = 104 113 | ident char = 97 114 | ident char = 114 115 | ident char = 97 116 | ident char = 99 117 | ident char = 116 118 | ident char = 101 119 | ident char = 114 120 | ident char = 115 121 | tokenize - getting one token 122 | tokenize - got IDENT, now at char 110 123 | ident char = 8800 124 | tokenize - getting one token 125 | tokenize - got IDENT, now at char 112 126 | ident char = 8800 127 | ident char = 8800 128 | tokenize - getting one token 129 | tokenize - got IDENT, now at char 115 130 | ident char = 8776 131 | ident char = 8776 132 | tokenize - getting one token 133 | tokenize - got IDENT, now at char 118 134 | ident char = 8776 135 | ident char = 8776 136 | ident char = 8776 137 | tokenize - getting one token 138 | tokenize - got IDENT, now at char 123 139 | ident char = 105 140 | ident char = 100 141 | tokenize - getting one token 142 | tokenize - got PLUS, now at char 126 143 | tokenize - getting one token 144 | tokenize - got IDENT, now at char 128 145 | ident char = 105 146 | ident char = 100 147 | tokenize - getting one token 148 | tokenize - got IDENT, now at char 132 149 | ident char = 110 150 | ident char = 101 151 | ident char = 120 152 | ident char = 116 153 | tokenize - getting one token 154 | tokenize - got IDENT, now at char 137 155 | ident char = 108 156 | ident char = 105 157 | ident char = 110 158 | ident char = 101 159 | tokenize - getting one token 160 | tokenize - got IDENT, now at char 142 161 | ident char = 116 162 | ident char = 101 163 | ident char = 115 164 | ident char = 116 165 | ident char = 115 166 | tokenize - getting one token 167 | tokenize - got IDENT, now at char 148 168 | ident char = 115 169 | ident char = 111 170 | ident char = 109 171 | ident char = 101 172 | tokenize - getting one token 173 | tokenize - got IDENT, now at char 153 174 | ident char = 109 175 | ident char = 111 176 | ident char = 114 177 | ident char = 101 178 | tokenize - getting one token 179 | tokenize - got IDENT, now at char 158 180 | ident char = 114 181 | ident char = 97 182 | ident char = 110 183 | ident char = 100 184 | ident char = 111 185 | ident char = 109 186 | tokenize - getting one token 187 | tokenize - got IDENT, now at char 165 188 | ident char = 117 189 | ident char = 110 190 | ident char = 105 191 | ident char = 99 192 | ident char = 111 193 | ident char = 100 194 | ident char = 101 195 | tokenize - getting one token 196 | tokenize - got IDENT, now at char 173 197 | ident char = 99 198 | ident char = 104 199 | ident char = 97 200 | ident char = 114 201 | ident char = 97 202 | ident char = 99 203 | ident char = 116 204 | ident char = 101 205 | ident char = 114 206 | ident char = 115 207 | tokenize - getting one token 208 | tokenize - got IDENT, now at char 185 209 | ident char = 1052 210 | ident char = 1053 211 | ident char = 1054 212 | ident char = 1055 213 | ident char = 1056 214 | ident char = 1057 215 | ident char = 1058 216 | ident char = 1059 217 | ident char = 1060 218 | ident char = 1061 219 | ident char = 1062 220 | ident char = 7808 221 | ident char = 7809 222 | ident char = 7810 223 | ident char = 1116 224 | tokenize - getting one token 225 | tokenize - got IDENT, now at char 201 226 | ident char = 945 227 | ident char = 946 228 | ident char = 923 229 | ident char = 920 230 | ident char = 937 231 | ident char = 936 232 | ident char = 935 233 | ident char = 931 234 | ident char = 948 235 | ident char = 950 236 | ident char = 538 237 | ident char = 374 238 | ident char = 506 239 | tokenize - getting one token 240 | tokenize - got IDENT, now at char 216 241 | ident char = 105 242 | ident char = 100 243 | tokenize - getting one token 244 | tokenize - got EOF, now at char 218 245 | -------------------------------------------------------------------------------- /tests/fsyacc/unicode/test1-unicode.WithTitleCaseLetter.tokens.bsl: -------------------------------------------------------------------------------- 1 | tokenize - getting one token 2 | tokenize - got IDENT, now at char 2 3 | ident char = 110 4 | ident char = 101 5 | ident char = 120 6 | ident char = 116 7 | tokenize - getting one token 8 | tokenize - got IDENT, now at char 7 9 | ident char = 108 10 | ident char = 105 11 | ident char = 110 12 | ident char = 101 13 | tokenize - getting one token 14 | tokenize - got IDENT, now at char 12 15 | ident char = 116 16 | ident char = 101 17 | ident char = 115 18 | ident char = 116 19 | ident char = 115 20 | tokenize - getting one token 21 | tokenize - got IDENT, now at char 18 22 | ident char = 111 23 | ident char = 110 24 | ident char = 101 25 | tokenize - getting one token 26 | tokenize - got IDENT, now at char 22 27 | ident char = 117 28 | ident char = 110 29 | ident char = 105 30 | ident char = 99 31 | ident char = 111 32 | ident char = 100 33 | ident char = 101 34 | tokenize - getting one token 35 | tokenize - got IDENT, now at char 30 36 | ident char = 99 37 | ident char = 104 38 | ident char = 97 39 | ident char = 114 40 | ident char = 97 41 | ident char = 99 42 | ident char = 116 43 | ident char = 101 44 | ident char = 114 45 | tokenize - getting one token 46 | tokenize - got IDENT, now at char 40 47 | ident char = 99 48 | ident char = 108 49 | ident char = 97 50 | ident char = 115 51 | ident char = 115 52 | tokenize - getting one token 53 | tokenize - got IDENT, now at char 47 54 | ident char = 196 55 | ident char = 203 56 | ident char = 214 57 | ident char = 207 58 | ident char = 220 59 | ident char = 226 60 | ident char = 230 61 | ident char = 231 62 | ident char = 241 63 | ident char = 245 64 | ident char = 246 65 | tokenize - getting one token 66 | tokenize - got PLUS, now at char 59 67 | tokenize - getting one token 68 | tokenize - got IDENT, now at char 61 69 | ident char = 105 70 | ident char = 100 71 | tokenize - getting one token 72 | tokenize - got IDENT, now at char 65 73 | ident char = 110 74 | ident char = 101 75 | ident char = 120 76 | ident char = 116 77 | tokenize - getting one token 78 | tokenize - got IDENT, now at char 70 79 | ident char = 108 80 | ident char = 105 81 | ident char = 110 82 | ident char = 101 83 | tokenize - getting one token 84 | tokenize - got IDENT, now at char 75 85 | ident char = 116 86 | ident char = 101 87 | ident char = 115 88 | ident char = 116 89 | ident char = 115 90 | tokenize - getting one token 91 | tokenize - got IDENT, now at char 81 92 | ident char = 115 93 | ident char = 112 94 | ident char = 101 95 | ident char = 99 96 | ident char = 105 97 | ident char = 102 98 | ident char = 105 99 | ident char = 99 100 | tokenize - getting one token 101 | tokenize - got IDENT, now at char 90 102 | ident char = 117 103 | ident char = 110 104 | ident char = 105 105 | ident char = 99 106 | ident char = 111 107 | ident char = 100 108 | ident char = 101 109 | tokenize - getting one token 110 | tokenize - got IDENT, now at char 98 111 | ident char = 99 112 | ident char = 104 113 | ident char = 97 114 | ident char = 114 115 | ident char = 97 116 | ident char = 99 117 | ident char = 116 118 | ident char = 101 119 | ident char = 114 120 | ident char = 115 121 | tokenize - getting one token 122 | tokenize - got IDENT, now at char 110 123 | ident char = 8800 124 | tokenize - getting one token 125 | tokenize - got IDENT, now at char 112 126 | ident char = 8800 127 | ident char = 8800 128 | tokenize - getting one token 129 | tokenize - got IDENT, now at char 115 130 | ident char = 8776 131 | ident char = 8776 132 | tokenize - getting one token 133 | tokenize - got IDENT, now at char 118 134 | ident char = 8776 135 | ident char = 8776 136 | ident char = 8776 137 | tokenize - getting one token 138 | tokenize - got IDENT, now at char 123 139 | ident char = 105 140 | ident char = 100 141 | tokenize - getting one token 142 | tokenize - got PLUS, now at char 126 143 | tokenize - getting one token 144 | tokenize - got IDENT, now at char 128 145 | ident char = 105 146 | ident char = 100 147 | tokenize - getting one token 148 | tokenize - got IDENT, now at char 132 149 | ident char = 110 150 | ident char = 101 151 | ident char = 120 152 | ident char = 116 153 | tokenize - getting one token 154 | tokenize - got IDENT, now at char 137 155 | ident char = 108 156 | ident char = 105 157 | ident char = 110 158 | ident char = 101 159 | tokenize - getting one token 160 | tokenize - got IDENT, now at char 142 161 | ident char = 116 162 | ident char = 101 163 | ident char = 115 164 | ident char = 116 165 | ident char = 115 166 | tokenize - getting one token 167 | tokenize - got IDENT, now at char 148 168 | ident char = 115 169 | ident char = 111 170 | ident char = 109 171 | ident char = 101 172 | tokenize - getting one token 173 | tokenize - got IDENT, now at char 153 174 | ident char = 109 175 | ident char = 111 176 | ident char = 114 177 | ident char = 101 178 | tokenize - getting one token 179 | tokenize - got IDENT, now at char 158 180 | ident char = 114 181 | ident char = 97 182 | ident char = 110 183 | ident char = 100 184 | ident char = 111 185 | ident char = 109 186 | tokenize - getting one token 187 | tokenize - got IDENT, now at char 165 188 | ident char = 117 189 | ident char = 110 190 | ident char = 105 191 | ident char = 99 192 | ident char = 111 193 | ident char = 100 194 | ident char = 101 195 | tokenize - getting one token 196 | tokenize - got IDENT, now at char 173 197 | ident char = 99 198 | ident char = 104 199 | ident char = 97 200 | ident char = 114 201 | ident char = 97 202 | ident char = 99 203 | ident char = 116 204 | ident char = 101 205 | ident char = 114 206 | ident char = 115 207 | tokenize - getting one token 208 | tokenize - got IDENT, now at char 185 209 | ident char = 498 210 | ident char = 1052 211 | ident char = 1053 212 | ident char = 1054 213 | ident char = 1055 214 | ident char = 1056 215 | ident char = 1057 216 | ident char = 1058 217 | ident char = 1059 218 | ident char = 1060 219 | ident char = 1061 220 | ident char = 1062 221 | ident char = 7808 222 | ident char = 7809 223 | ident char = 7810 224 | ident char = 1116 225 | tokenize - getting one token 226 | tokenize - got IDENT, now at char 202 227 | ident char = 945 228 | ident char = 946 229 | ident char = 923 230 | ident char = 920 231 | ident char = 937 232 | ident char = 936 233 | ident char = 935 234 | ident char = 931 235 | ident char = 948 236 | ident char = 950 237 | ident char = 538 238 | ident char = 374 239 | ident char = 506 240 | tokenize - getting one token 241 | tokenize - got IDENT, now at char 217 242 | ident char = 105 243 | ident char = 100 244 | tokenize - getting one token 245 | tokenize - got EOF, now at char 219 246 | -------------------------------------------------------------------------------- /src/FsLexYacc.Runtime/Lexing.fsi: -------------------------------------------------------------------------------- 1 | //========================================================================== 2 | // LexBuffers are for use with automatically generated lexical analyzers, 3 | // in particular those produced by 'fslex'. 4 | // 5 | // (c) Microsoft Corporation 2005-2008. 6 | //=========================================================================== 7 | 8 | module FSharp.Text.Lexing 9 | 10 | open System.Collections.Generic 11 | 12 | /// Position information stored for lexing tokens 13 | [] 14 | type Position = 15 | { 16 | /// The file name for the position 17 | pos_fname: string 18 | 19 | /// The line number for the position 20 | pos_lnum: int 21 | 22 | /// The line number for the position in the original source file 23 | pos_orig_lnum: int 24 | 25 | /// The absolute offset of the beginning of the line 26 | pos_bol: int 27 | 28 | /// The absolute offset of the column for the position 29 | pos_cnum: int 30 | } 31 | 32 | /// The file name associated with the input stream. 33 | member FileName: string 34 | 35 | /// The line number in the input stream, assuming fresh positions have been updated 36 | /// using AsNewLinePos() and by modifying the EndPos property of the LexBuffer. 37 | member Line: int 38 | 39 | /// The line number for the position in the input stream, assuming fresh positions have been updated 40 | /// using AsNewLinePos() 41 | member OriginalLine: int 42 | 43 | [] 44 | member Char: int 45 | 46 | /// The character number in the input stream 47 | member AbsoluteOffset: int 48 | 49 | /// Return absolute offset of the start of the line marked by the position 50 | member StartOfLineAbsoluteOffset: int 51 | 52 | /// Return the column number marked by the position, i.e. the difference between the AbsoluteOffset and the StartOfLineAbsoluteOffset 53 | member Column: int 54 | 55 | // Given a position just beyond the end of a line, return a position at the start of the next line 56 | member NextLine: Position 57 | 58 | /// Given a position at the start of a token of length n, return a position just beyond the end of the token 59 | member EndOfToken: n: int -> Position 60 | 61 | /// Gives a position shifted by specified number of characters 62 | member ShiftColumnBy: by: int -> Position 63 | 64 | [] 65 | member AsNewLinePos: unit -> Position 66 | 67 | /// Get an arbitrary position, with the empty string as filename, and 68 | static member Empty: Position 69 | 70 | /// Get a position corresponding to the first line (line number 1) in a given file 71 | static member FirstLine: filename: string -> Position 72 | 73 | /// Input buffers consumed by lexers generated by fslex.exe 74 | [] 75 | type LexBuffer<'char> = 76 | /// The start position for the lexeme 77 | member StartPos: Position with get, set 78 | 79 | /// The end position for the lexeme 80 | member EndPos: Position with get, set 81 | 82 | /// The matched string 83 | member Lexeme: 'char array 84 | 85 | /// Fast helper to turn the matched characters into a string, avoiding an intermediate array 86 | static member LexemeString: LexBuffer -> string 87 | 88 | /// The length of the matched string 89 | member LexemeLength: int 90 | 91 | /// Fetch a particular character in the matched string 92 | member LexemeChar: int -> 'char 93 | 94 | /// Dynamically typed, non-lexically scoped parameter table 95 | member BufferLocalStore: IDictionary 96 | 97 | /// True if the refill of the buffer ever failed , or if explicitly set to true. 98 | member IsPastEndOfStream: bool with get, set 99 | 100 | /// Remove all input, though don't discard the current lexeme 101 | member DiscardInput: unit -> unit 102 | 103 | /// Create a lex buffer suitable for byte lexing that reads characters from the given array 104 | static member FromBytes: byte[] -> LexBuffer 105 | 106 | /// Create a lex buffer suitable for Unicode lexing that reads characters from the given array 107 | static member FromChars: char[] -> LexBuffer 108 | 109 | /// Create a lex buffer suitable for Unicode lexing that reads characters from the given string 110 | static member FromString: string -> LexBuffer 111 | 112 | /// Create a lex buffer that reads character or byte inputs by using the given function 113 | static member FromFunction: ('char[] * int * int -> int) -> LexBuffer<'char> 114 | 115 | /// Create a lex buffer that asynchronously reads character or byte inputs by using the given function 116 | static member FromAsyncFunction: ('char[] * int * int -> Async) -> LexBuffer<'char> 117 | 118 | [.FromFunction instead")>] 119 | static member FromCharFunction: (char[] -> int -> int) -> LexBuffer 120 | 121 | [.FromFunction instead")>] 122 | static member FromByteFunction: (byte[] -> int -> int) -> LexBuffer 123 | 124 | #if !FABLE_COMPILER 125 | 126 | /// Create a lex buffer suitable for use with a Unicode lexer that reads character inputs from the given text reader 127 | static member FromTextReader: System.IO.TextReader -> LexBuffer 128 | 129 | /// Create a lex buffer suitable for use with ASCII byte lexing that reads byte inputs from the given binary reader 130 | static member FromBinaryReader: System.IO.BinaryReader -> LexBuffer 131 | 132 | #endif 133 | 134 | /// The type of tables for an ascii lexer generated by fslex. 135 | [] 136 | type AsciiTables = 137 | static member Create: uint16[] array * uint16[] -> AsciiTables 138 | 139 | /// Interpret tables for an ascii lexer generated by fslex. 140 | member Interpret: initialState: int * LexBuffer -> int 141 | 142 | /// Interpret tables for an ascii lexer generated by fslex, processing input asynchronously 143 | [] 144 | member AsyncInterpret: initialState: int * LexBuffer -> Async 145 | 146 | /// The type of tables for an unicode lexer generated by fslex. 147 | [] 148 | type UnicodeTables = 149 | 150 | static member Create: uint16[] array * uint16[] -> UnicodeTables 151 | 152 | /// Interpret tables for a unicode lexer generated by fslex. 153 | member Interpret: initialState: int * LexBuffer -> int 154 | 155 | /// Interpret tables for a unicode lexer generated by fslex, processing input asynchronously 156 | [] 157 | member AsyncInterpret: initialState: int * LexBuffer -> Async 158 | 159 | #if !FABLE_COMPILER 160 | 161 | /// Standard utility to create a Unicode LexBuffer 162 | /// 163 | /// One small annoyance is that LexBuffers are not IDisposable. This means 164 | /// we can't just return the LexBuffer object, since the file it wraps wouldn't 165 | /// get closed when we're finished with the LexBuffer. Hence we return the stream, 166 | /// the reader and the LexBuffer. The caller should dispose the first two when done. 167 | val UnicodeFileAsLexbuf: string * int option -> System.IO.FileStream * System.IO.StreamReader * LexBuffer 168 | 169 | #endif 170 | -------------------------------------------------------------------------------- /src/FsLex.Core/fslexlex.fsl: -------------------------------------------------------------------------------- 1 | { 2 | (* (c) Microsoft Corporation 2005-2008. *) 3 | 4 | module FsLexYacc.FsLex.Lexer 5 | 6 | open FsLexYacc.FsLex.AST 7 | open FsLexYacc.FsLex.Parser 8 | open FSharp.Text 9 | open FSharp.Text.Lexing 10 | open System.Text 11 | 12 | let escape c = 13 | match c with 14 | | '\\' -> '\\' 15 | | '\'' -> '\'' 16 | | 'n' -> '\n' 17 | | 't' -> '\t' 18 | | 'b' -> '\b' 19 | | 'r' -> '\r' 20 | | c -> c 21 | 22 | let lexeme (lexbuf : LexBuffer) = new System.String(lexbuf.Lexeme) 23 | let newline (lexbuf:LexBuffer<_>) = lexbuf.EndPos <- lexbuf.EndPos.NextLine 24 | 25 | let unexpected_char lexbuf = 26 | failwith ("Unexpected character '"+(lexeme lexbuf)+"'") 27 | 28 | let digit d = 29 | if d >= '0' && d <= '9' then int32 d - int32 '0' 30 | else failwith "digit" 31 | 32 | let hexdigit d = 33 | if d >= '0' && d <= '9' then digit d 34 | else if d >= 'a' && d <= 'f' then int32 d - int32 'a' + 10 35 | else if d >= 'A' && d <= 'F' then int32 d - int32 'A' + 10 36 | else failwithf "bad hexdigit: %c" d 37 | 38 | let trigraph c1 c2 c3 = 39 | char (digit c1 * 100 + digit c2 * 10 + digit c3) 40 | 41 | let hexgraph c1 c2 = 42 | char (hexdigit c1 * 16 + hexdigit c2) 43 | 44 | let unicodegraph_short (s:string) = 45 | if s.Length <> 4 then failwith "unicodegraph"; 46 | char(hexdigit s.[0] * 4096 + hexdigit s.[1] * 256 + hexdigit s.[2] * 16 + hexdigit s.[3]) 47 | 48 | let unicodegraph_long (s:string) = 49 | if s.Length <> 8 then failwith "unicodegraph_long"; 50 | let high = hexdigit s.[0] * 4096 + hexdigit s.[1] * 256 + hexdigit s.[2] * 16 + hexdigit s.[3] in 51 | let low = hexdigit s.[4] * 4096 + hexdigit s.[5] * 256 + hexdigit s.[6] * 16 + hexdigit s.[7] in 52 | if high = 0 then None, char low 53 | else 54 | (* A surrogate pair - see http://www.unicode.org/unicode/uni2book/ch03.pdf, section 3.7 *) 55 | Some (char(0xD800 + ((high * 0x10000 + low - 0x10000) / 0x400))), 56 | char(0xDF30 + ((high * 0x10000 + low - 0x10000) % 0x400)) 57 | 58 | } 59 | 60 | let letter = ['A'-'Z'] | ['a'-'z'] 61 | let digit = ['0'-'9'] 62 | let whitespace = [' ' '\t'] 63 | let char = '\'' ( [^'\\'] | ('\\' ( '\\' | '\'' | "\"" | 'n' | 't' | 'b' | 'r'))) '\'' 64 | let hex = ['0'-'9'] | ['A'-'F'] | ['a'-'f'] 65 | let hexgraph = '\\' 'x' hex hex 66 | let trigraph = '\\' digit digit digit 67 | let newline = ('\n' | '\r' '\n') 68 | let ident_start_char = letter 69 | let ident_char = ( ident_start_char| digit | ['\'' '_'] ) 70 | let ident = ident_start_char ident_char* 71 | 72 | let unicodegraph_short = '\\' 'u' hex hex hex hex 73 | let unicodegraph_long = '\\' 'U' hex hex hex hex hex hex hex hex 74 | 75 | rule token = parse 76 | | "rule" {RULE } 77 | | "parse" {PARSE } 78 | | "eof" {EOF } 79 | | "let" {LET } 80 | | "and" {AND } 81 | | char 82 | { let s = lexeme lexbuf in 83 | CHAR (if s.[1] = '\\' then escape s.[2] else s.[1]) } 84 | 85 | | '\'' trigraph '\'' 86 | { let s = lexeme lexbuf in 87 | CHAR (trigraph s.[2] s.[3] s.[4]) } 88 | 89 | | '\'' hexgraph '\'' 90 | { let s = lexeme lexbuf in 91 | CHAR (hexgraph s.[3] s.[4]) } 92 | 93 | | '\'' unicodegraph_short '\'' 94 | { let s = lexeme lexbuf in 95 | CHAR (unicodegraph_short s.[3..6]) } 96 | 97 | | '\'' unicodegraph_long '\'' 98 | { let s = lexeme lexbuf in 99 | match (unicodegraph_long s.[3..10]) with 100 | | None, c -> CHAR(c) 101 | | Some _ , _ -> failwith "Unicode characters needing surrogate pairs are not yet supported by this tool" } 102 | 103 | | '\'' '\\' ['A'-'Z'] ['a'-'z'] '\'' 104 | { let s = (lexeme lexbuf).[2..3] in 105 | UNICODE_CATEGORY (s) } 106 | 107 | | '{' { let p = lexbuf.StartPos in 108 | let buff = (new StringBuilder 100) in 109 | // adjust the first line to get even indentation for all lines w.r.t. the left hand margin 110 | buff.Append (String.replicate (lexbuf.StartPos.Column+1) " ") |> ignore; 111 | code p buff lexbuf } 112 | 113 | | '"' { string lexbuf.StartPos (new StringBuilder 100) lexbuf } 114 | 115 | | whitespace+ { token lexbuf } 116 | | newline { newline lexbuf; token lexbuf } 117 | | ident_start_char ident_char* { IDENT (lexeme lexbuf) } 118 | | '|' { BAR } 119 | | '.' { DOT } 120 | | '+' { PLUS } 121 | | '*' { STAR } 122 | | '?' { QMARK } 123 | | '=' { EQUALS } 124 | | '[' { LBRACK } 125 | | ']' { RBRACK } 126 | | '(' { LPAREN } 127 | | ')' { RPAREN } 128 | | ':' { COLON } 129 | | '_' { UNDERSCORE } 130 | | '^' { HAT } 131 | | '-' { DASH } 132 | | "(*" { ignore(comment lexbuf.StartPos lexbuf); token lexbuf } 133 | | "//" [^'\n''\r']* { token lexbuf } 134 | | _ { unexpected_char lexbuf } 135 | | eof { EOF } 136 | and string p buff = parse 137 | | '\\' newline { newline lexbuf; string p buff lexbuf } 138 | | '\\' ( '"' | '\\' | '\'' | 'n' | 't' | 'b' | 'r') 139 | { let _ = buff.Append (escape (lexeme lexbuf).[1]) in 140 | string p buff lexbuf } 141 | | trigraph 142 | { let s = lexeme lexbuf in 143 | let _ = buff.Append (trigraph s.[1] s.[2] s.[3]) in 144 | string p buff lexbuf } 145 | | '"' { STRING (buff.ToString()) } 146 | | newline { newline lexbuf; 147 | let _ = buff.Append System.Environment.NewLine in 148 | string p buff lexbuf } 149 | | (whitespace | letter | digit) + 150 | { let _ = buff.Append (lexeme lexbuf) in 151 | string p buff lexbuf } 152 | | eof { failwith (Printf.sprintf "end of file in string started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol)) } 153 | | _ { let _ = buff.Append (lexeme lexbuf).[0] in 154 | string p buff lexbuf } 155 | and code p buff = parse 156 | | "}" { CODE (buff.ToString(), p) } 157 | | "{" { let _ = buff.Append (lexeme lexbuf) in 158 | ignore(code p buff lexbuf); 159 | let _ = buff.Append "}" in 160 | code p buff lexbuf } 161 | | '\\' ('"' | '\\') 162 | { let _ = buff.Append (lexeme lexbuf) in 163 | code p buff lexbuf } 164 | | "\"" { let _ = buff.Append (lexeme lexbuf) in 165 | ignore(codestring buff lexbuf); 166 | code p buff lexbuf } 167 | | newline { newline lexbuf; 168 | let _ = buff.Append System.Environment.NewLine in 169 | code p buff lexbuf } 170 | | (whitespace | letter | digit) + 171 | { let _ = buff.Append (lexeme lexbuf) in 172 | code p buff lexbuf } 173 | | "//" [^'\n''\r']* 174 | { let _ = buff.Append (lexeme lexbuf) in 175 | code p buff lexbuf } 176 | | eof { EOF } 177 | | _ { let _ = buff.Append (lexeme lexbuf).[0] in 178 | code p buff lexbuf } 179 | 180 | and codestring buff = parse 181 | | '\\' ('"' | '\\') 182 | { let _ = buff.Append (lexeme lexbuf) in 183 | codestring buff lexbuf } 184 | | '"' { let _ = buff.Append (lexeme lexbuf) in 185 | buff.ToString() } 186 | | newline { newline lexbuf; 187 | let _ = buff.Append System.Environment.NewLine in 188 | codestring buff lexbuf } 189 | | (whitespace | letter | digit) + 190 | { let _ = buff.Append (lexeme lexbuf) in 191 | codestring buff lexbuf } 192 | | eof { failwith "unterminated string in code" } 193 | | _ { let _ = buff.Append (lexeme lexbuf).[0] in 194 | codestring buff lexbuf } 195 | 196 | and comment p = parse 197 | | char { comment p lexbuf } 198 | | '"' { ignore(try string lexbuf.StartPos (new StringBuilder 100) lexbuf 199 | with Failure s -> failwith (s + "\n" + Printf.sprintf "error while processing string nested in comment started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol))); 200 | comment p lexbuf } 201 | | "(*" { ignore(try comment p lexbuf with Failure s -> failwith (s + "\n" + Printf.sprintf "error while processing nested comment started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol))); 202 | comment p lexbuf } 203 | | newline { newline lexbuf; comment p lexbuf } 204 | | "*)" { () } 205 | | eof { failwith (Printf.sprintf "end of file in comment started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol)) } 206 | | [^ '\'' '(' '*' '\n' '\r' '"' ')' ]+ { comment p lexbuf } 207 | | _ { comment p lexbuf } 208 | 209 | 210 | -------------------------------------------------------------------------------- /tests/fsyacc/OldFsYaccTests.fsx: -------------------------------------------------------------------------------- 1 | #r @"paket: 2 | frameworks: net6.0 3 | 4 | nuget FSharp.Core ~> 5.0 5 | nuget Fake.IO.FileSystem 6 | nuget Fake.DotNet.Fsc 7 | nuget Fake.Core.Trace //" 8 | 9 | #if !FAKE 10 | #load "./.fake/oldfsyacctests.fsx/intellisense.fsx" 11 | #r "netstandard" // Temp fix for https://github.com/fsharp/FAKE/issues/1985 12 | #endif 13 | 14 | open System 15 | open System.Runtime.InteropServices 16 | open System.IO 17 | 18 | open Fake.IO 19 | open Fake.Core 20 | 21 | let assertFileExists file = 22 | if File.Exists(file) then 23 | printfn "%s exists!" file 24 | else 25 | failwithf "'%s' doesn't exist" file 26 | 27 | let dotnet arguments = 28 | let result = 29 | CreateProcess.fromRawCommandLine "dotnet" arguments 30 | |> Proc.run 31 | 32 | if result.ExitCode <> 0 then 33 | failwithf "Failed to run \"dotnet %s\"" arguments 34 | 35 | let run project args = 36 | Trace.traceImportant <| sprintf "Running '%s' with args %s" project args 37 | dotnet $"build {project} -c Release" 38 | dotnet $"run --project {project} {args}" 39 | 40 | let test proj shouldBeOK (args, baseLineOutput) = 41 | Trace.traceImportant <| sprintf "Running '%s' with args '%s'" proj args 42 | 43 | let res = 44 | CreateProcess.fromRawCommandLine "dotnet" $"run --project {proj} {args}" 45 | |> CreateProcess.redirectOutput 46 | |> Proc.run 47 | 48 | if (res.ExitCode = 0) <> shouldBeOK then 49 | failwithf "Process failed with code %d on input %s" res.ExitCode args 50 | 51 | let output = 52 | // For some reason, the output is captured in the stderr 53 | res.Result.Error.Split('\n', StringSplitOptions.RemoveEmptyEntries) 54 | |> Array.map (fun line -> 55 | if line.StartsWith("parsed") then 56 | let pieces = line.Split(' ') 57 | let pathPiece = pieces.[1] 58 | let idx = 59 | let value = 60 | if RuntimeInformation.IsOSPlatform(OSPlatform.Windows) then 61 | @"\" 62 | else 63 | "/" 64 | pathPiece.LastIndexOf(value) 65 | let pathPiece = 66 | if idx >= 0 then 67 | pathPiece.[idx+1 ..] 68 | else 69 | pathPiece 70 | pieces.[0] + " " + pathPiece + " " + pieces.[2] 71 | else 72 | line) 73 | 74 | if (not <| File.Exists baseLineOutput) 75 | then failwithf "Baseline file '%s' does not exist" baseLineOutput 76 | 77 | let expectedLines = File.ReadAllLines baseLineOutput 78 | 79 | if output.Length <> expectedLines.Length || 80 | Seq.map2 (=) output expectedLines |> Seq.exists not 81 | then 82 | printfn "Expected:" 83 | for line in expectedLines do 84 | printfn "\t%s" line 85 | 86 | printfn "Output:" 87 | for line in output do 88 | printfn "\t%s" line 89 | 90 | File.WriteAllLines(baseLineOutput+".err", output) 91 | failwithf "Output is not equal to expected base line '%s'" baseLineOutput 92 | 93 | let fslexProject = Path.Combine(__SOURCE_DIRECTORY__, "..", "..", "src", "FsLex", "fslex.fsproj") 94 | let fsYaccProject = Path.Combine(__SOURCE_DIRECTORY__ , "..", "..", "src", "FsYacc", "fsyacc.fsproj") 95 | 96 | assertFileExists fslexProject 97 | assertFileExists fsYaccProject 98 | 99 | let fsLex = run fslexProject 100 | let fsYacc = run fsYaccProject 101 | 102 | let repro1885Fsl = Path.Combine(__SOURCE_DIRECTORY__, "repro1885", "repro1885.fsl") 103 | // Regression test for FSB 1885 104 | fsLex repro1885Fsl 105 | 106 | // Test 1 107 | let test1lexFs = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1lex.fs") 108 | let test1lexFsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1lex.fsl") 109 | let test1Fs = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.fs") 110 | let test1Fsy = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.fsy") 111 | let test1Input1 = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input1") 112 | let test1Input1Bsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input1.bsl") 113 | let test1Input1TokensBsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input1.tokens.bsl") 114 | let test1Input2Variation1 = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input2.variation1") 115 | let test1Input2Variation2 = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input2.variation2") 116 | let test1Input2Bsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input2.bsl") 117 | let test1Input3 = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input3") 118 | let test1Input3Bsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input3.bsl") 119 | let test1Input3TokensBsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input3.tokens.bsl") 120 | let test1Proj = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.fsproj") 121 | let test1Input4 = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input4") 122 | let test1Input4Bsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input4.bsl") 123 | let test1Input4TokensBsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input4.tokens.bsl") 124 | 125 | 126 | let runTests' shouldBeOK projFile xs = 127 | dotnet $"build {projFile} -c Release" 128 | xs |> List.iter (test projFile shouldBeOK) 129 | let runTests projFile xs = runTests' true projFile xs 130 | 131 | fsLex ("-o " + test1lexFs + " " + test1lexFsl) 132 | fsYacc ("--module TestParser -o " + test1Fs + " " + test1Fsy) 133 | runTests test1Proj [ 134 | sprintf "--tokens %s" test1Input1, test1Input1TokensBsl 135 | test1Input1, test1Input1Bsl 136 | test1Input1, test1Input1Bsl 137 | sprintf "%s %s" test1Input2Variation1 test1Input2Variation2, test1Input2Bsl 138 | sprintf "--tokens %s" test1Input3, test1Input3TokensBsl 139 | test1Input3, test1Input3Bsl 140 | ] 141 | 142 | // Case insensitive option test 143 | fsLex ("-i -o " + test1lexFs + " " + test1lexFsl) 144 | runTests test1Proj [ 145 | sprintf "--tokens %s" test1Input4, test1Input4TokensBsl 146 | sprintf "--tokens %s" test1Input3, test1Input4TokensBsl 147 | sprintf "%s %s" test1Input3 test1Input4, test1Input4Bsl 148 | ] 149 | 150 | // Test 1 unicode 151 | let test1unicodelexFs = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode-lex.fs") 152 | let test1unicodelexFsl = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode-lex.fsl") 153 | let test1unicodeFs = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.fs") 154 | let test1unicodeFsy = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.fsy") 155 | let test1unicodeProj = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.fsproj") 156 | let test1unicodeInput3 = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.input3.utf8") 157 | let test1unicodeInput3TokensBsl = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.input3.tokens.bsl") 158 | let test1unicodeWithTitleCaseLetter = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.WithTitleCaseLetter.utf8") 159 | let test1unicodeWithTitleCaseLetterTokensBsl = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.WithTitleCaseLetter.tokens.bsl") 160 | let test1unicodeWithTitleCaseLetterTokensErrorBsl = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.WithTitleCaseLetter.tokens.error.bsl") 161 | 162 | fsLex ("--unicode -o " + test1unicodelexFs + " " + test1unicodelexFsl) 163 | fsYacc ("--module TestParser -o " + test1unicodeFs + " " + test1unicodeFsy) 164 | 165 | runTests test1unicodeProj [ 166 | sprintf "--tokens %s" test1Input1, test1Input1TokensBsl 167 | test1Input1, test1Input1Bsl 168 | sprintf "%s %s" test1Input2Variation1 test1Input2Variation2, test1Input2Bsl 169 | sprintf "--tokens %s" test1unicodeInput3, test1unicodeInput3TokensBsl 170 | ] 171 | 172 | runTests' false test1unicodeProj [ 173 | sprintf "--tokens %s" test1unicodeWithTitleCaseLetter, test1unicodeWithTitleCaseLetterTokensErrorBsl 174 | ] 175 | 176 | // Case insensitive option test 177 | fsLex ("--unicode -i -o " + test1unicodelexFs + " " + test1unicodelexFsl) 178 | runTests test1unicodeProj [ 179 | sprintf "--tokens %s" test1Input1, test1Input1TokensBsl 180 | test1Input1, test1Input1Bsl 181 | sprintf "%s %s" test1Input2Variation1 test1Input2Variation2, test1Input2Bsl 182 | sprintf "--tokens %s" test1unicodeInput3, test1unicodeInput3TokensBsl 183 | sprintf "--tokens %s" test1unicodeWithTitleCaseLetter, test1unicodeWithTitleCaseLetterTokensBsl 184 | ] 185 | 186 | // Test 2 187 | let test2lexFs = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2lex.fs") 188 | let test2lexFsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1lex.fsl") 189 | let test2Fs = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2.fs") 190 | let test2Fsy = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2.fsy") 191 | let test2Proj = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2.fsproj") 192 | let test2Input1 = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2.input1") 193 | let test2Input1TokensBsl = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2.input1.tokens.bsl") 194 | let test2BadInput = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2.badInput") 195 | let test2BadInputTokensBsl = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2.badInput.tokens.bsl") 196 | 197 | fsLex ("-o " + test2lexFs + " " + test2lexFsl) 198 | fsYacc ("--module TestParser -o " + test2Fs + " " + test2Fsy) 199 | 200 | runTests test2Proj [ 201 | sprintf "--tokens %s" test2Input1, test2Input1TokensBsl 202 | sprintf "--tokens %s" test2BadInput, test2BadInputTokensBsl 203 | ] 204 | 205 | // #141 TODO 206 | let repro141Fsl = Path.Combine(__SOURCE_DIRECTORY__, "repro_#141", "Lexer_fail_option_i.fsl") 207 | let repro141Fs = Path.Combine(__SOURCE_DIRECTORY__, "repro_#141", "Lexer_fail_option_i.fs") 208 | fsLex ("-i -o " + repro141Fs + " " + repro141Fsl) 209 | fsLex ("--unicode -i -o " + repro141Fs + " " + repro141Fsl) 210 | -------------------------------------------------------------------------------- /src/FsLex.Core/fslexdriver.fs: -------------------------------------------------------------------------------- 1 | module FsLexYacc.FsLex.Driver 2 | 3 | open FsLexYacc.FsLex.AST 4 | open System 5 | open System.IO 6 | open FSharp.Text.Lexing 7 | open System.Collections.Generic 8 | 9 | type Domain = 10 | | Unicode 11 | | ASCII 12 | 13 | /// Wraps the inputs to the code generator 14 | type GeneratorState = 15 | { 16 | inputFileName: string 17 | outputFileName: string 18 | inputCodePage: System.Text.Encoding 19 | generatedModuleName: string option 20 | disableLightMode: bool option 21 | generateInternalModule: bool 22 | opens: string list 23 | lexerLibraryName: string 24 | domain: Domain 25 | } 26 | 27 | type PerRuleData = list> 28 | type DfaNodes = list 29 | 30 | type Writer(outputFileName, outputFileInterface) = 31 | let os = File.CreateText outputFileName :> TextWriter 32 | let mutable lineCount = 0 33 | let osi = File.CreateText outputFileInterface :> TextWriter 34 | let mutable interfaceLineCount = 0 35 | let incr () = lineCount <- lineCount + 1 36 | 37 | member x.WriteLine fmt = 38 | Printf.kfprintf 39 | (fun () -> 40 | incr () 41 | os.WriteLine()) 42 | os 43 | fmt 44 | 45 | member x.Write fmt = Printf.fprintf os fmt 46 | 47 | member x.WriteCode(code, pos: Position) = 48 | if 49 | pos <> Position.Empty // If bottom code is unspecified, then position is empty. 50 | then 51 | x.WriteLine "# %d \"%s\"" pos.Line pos.FileName 52 | x.WriteLine "%s" code 53 | let numLines = code.Replace("\r", "").Split([| '\n' |]).Length 54 | lineCount <- lineCount + numLines 55 | x.WriteLine "# %d \"%s\"" lineCount outputFileName 56 | 57 | member x.WriteUint16(n: int) = 58 | os.Write n 59 | os.Write "us;" 60 | 61 | member x.LineCount = lineCount 62 | 63 | member x.WriteInterface format = fprintf osi format 64 | 65 | member x.WriteLineInterface format = 66 | Printf.kfprintf 67 | (fun _ -> 68 | interfaceLineCount <- interfaceLineCount + 1 69 | osi.WriteLine()) 70 | osi 71 | format 72 | 73 | member x.InterfaceLineCount = interfaceLineCount 74 | 75 | interface IDisposable with 76 | member x.Dispose() = 77 | os.Dispose() 78 | osi.Dispose() 79 | 80 | let sentinel = 255 * 256 + 255 81 | 82 | let readSpecFromFile fileName codePage = 83 | let stream, reader, lexbuf = UnicodeFileAsLexbuf(fileName, codePage) 84 | use stream = stream 85 | use reader = reader 86 | 87 | try 88 | let spec = Parser.spec Lexer.token lexbuf 89 | Ok spec 90 | with e -> 91 | (e, lexbuf.StartPos.Line, lexbuf.StartPos.Column) |> Error 92 | 93 | let writeLightMode lightModeDisabled (fileName: string) (writer: Writer) = 94 | if 95 | lightModeDisabled = Some false 96 | || (lightModeDisabled = None 97 | && (Path.HasExtension(fileName) && Path.GetExtension(fileName) = ".ml")) 98 | then 99 | writer.Write "#light \"off\"" 100 | 101 | let writeModuleExpression genModuleName isInternal (writer: Writer) = 102 | match genModuleName with 103 | | None -> () 104 | | Some s -> 105 | let internal_tag = if isInternal then "internal " else "" 106 | writer.WriteLine "module %s%s" internal_tag s 107 | writer.WriteLineInterface "module %s%s" internal_tag s 108 | 109 | let writeOpens opens (writer: Writer) = 110 | writer.WriteLine "" 111 | writer.WriteLineInterface "" 112 | 113 | for s in opens do 114 | writer.WriteLine "open %s" s 115 | writer.WriteLineInterface "open %s" s 116 | 117 | if not (Seq.isEmpty opens) then 118 | writer.WriteLine "" 119 | writer.WriteLineInterface "" 120 | 121 | let writeTopCode code (writer: Writer) = 122 | writer.WriteCode code 123 | 124 | let moduleAndOpens = 125 | (fst code).Split([| '\n'; '\r' |]) 126 | |> Array.filter (fun s -> 127 | s.StartsWith("module ", StringComparison.Ordinal) 128 | || s.StartsWith("open ", StringComparison.Ordinal)) 129 | |> String.concat Environment.NewLine 130 | 131 | writer.WriteInterface "%s" moduleAndOpens 132 | 133 | let writeUnicodeTranslationArray dfaNodes domain (writer: Writer) = 134 | let parseContext = 135 | { 136 | unicode = 137 | match domain with 138 | | Unicode -> true 139 | | ASCII -> false 140 | caseInsensitive = false 141 | } 142 | 143 | writer.WriteLine "let trans : uint16[] array = " 144 | writer.WriteLine " [| " 145 | 146 | match domain with 147 | | Unicode -> 148 | let specificUnicodeChars = GetSpecificUnicodeChars() 149 | // This emits a (numLowUnicodeChars+NumUnicodeCategories+(2*#specificUnicodeChars)+1) * #states array of encoded UInt16 values 150 | 151 | // Each row for the Unicode table has format 152 | // 128 entries for ASCII characters 153 | // A variable number of 2*UInt16 entries for SpecificUnicodeChars 154 | // 30 entries, one for each UnicodeCategory 155 | // 1 entry for EOF 156 | // 157 | // Each entry is an encoded UInt16 value indicating the next state to transition to for this input. 158 | // 159 | // For the SpecificUnicodeChars the entries are char/next-state pairs. 160 | for state in dfaNodes do 161 | writer.WriteLine " (* State %d *)" state.Id 162 | writer.Write " [| " 163 | 164 | let trans = 165 | let dict = Dictionary() 166 | state.Transitions |> List.iter dict.Add 167 | dict 168 | 169 | let emit n = 170 | if trans.ContainsKey(n) then 171 | writer.WriteUint16 trans.[n].Id 172 | else 173 | writer.WriteUint16 sentinel 174 | 175 | for i = 0 to numLowUnicodeChars - 1 do 176 | let c = char i 177 | emit (EncodeChar c parseContext) 178 | 179 | for c in specificUnicodeChars do 180 | writer.WriteUint16(int c) 181 | emit (EncodeChar c parseContext) 182 | 183 | for i = 0 to NumUnicodeCategories - 1 do 184 | emit (EncodeUnicodeCategoryIndex i) 185 | 186 | emit Eof 187 | writer.WriteLine "|];" 188 | 189 | | ASCII -> 190 | // Each row for the ASCII table has format 191 | // 256 entries for ASCII characters 192 | // 1 entry for EOF 193 | // 194 | // Each entry is an encoded UInt16 value indicating the next state to transition to for this input. 195 | 196 | // This emits a (256+1) * #states array of encoded UInt16 values 197 | for state in dfaNodes do 198 | writer.WriteLine " (* State %d *)" state.Id 199 | writer.Write " [|" 200 | 201 | let trans = 202 | let dict = Dictionary() 203 | state.Transitions |> List.iter dict.Add 204 | dict 205 | 206 | let emit n = 207 | if trans.ContainsKey(n) then 208 | writer.WriteUint16 trans.[n].Id 209 | else 210 | writer.WriteUint16 sentinel 211 | 212 | for i = 0 to 255 do 213 | let c = char i 214 | emit (EncodeChar c parseContext) 215 | 216 | emit Eof 217 | writer.WriteLine "|];" 218 | 219 | writer.WriteLine " |] " 220 | 221 | let writeUnicodeActionsArray dfaNodes (writer: Writer) = 222 | writer.Write "let actions : uint16[] = [|" 223 | 224 | for state in dfaNodes do 225 | if state.Accepted.Length > 0 then 226 | writer.WriteUint16(snd state.Accepted.Head) 227 | else 228 | writer.WriteUint16 sentinel 229 | 230 | writer.WriteLine "|]" 231 | 232 | let writeUnicodeTables lexerLibraryName domain dfaNodes (writer: Writer) = 233 | writeUnicodeTranslationArray dfaNodes domain writer 234 | writeUnicodeActionsArray dfaNodes writer 235 | 236 | writer.WriteLine 237 | "let _fslex_tables = %s.%sTables.Create(trans,actions)" 238 | lexerLibraryName 239 | (match domain with 240 | | Unicode -> "Unicode" 241 | | ASCII -> "Ascii") 242 | 243 | let writeRules (rules: Rule list) (perRuleData: PerRuleData) outputFileName (writer: Writer) = 244 | writer.WriteLine "let rec _fslex_dummy () = _fslex_dummy() " 245 | 246 | // These actions push the additional start state and come first, because they are then typically inlined into later 247 | // rules. This means more tailcalls are taken as direct branches, increasing efficiency and 248 | // improving stack usage on platforms that do not take tailcalls. 249 | for (startNode, actions), (ident, args, _) in List.zip perRuleData rules do 250 | writer.WriteLine "// Rule %s" ident 251 | writer.WriteLineInterface "/// Rule %s" ident 252 | 253 | let arguments = 254 | args 255 | |> List.map (function 256 | | RuleArgument.Ident ident -> ident 257 | | RuleArgument.Typed(ident, typ) -> sprintf "(%s: %s)" ident typ) 258 | |> String.concat " " 259 | 260 | writer.WriteLine "and %s %s lexbuf =" ident arguments 261 | 262 | let signature = 263 | if List.isEmpty args then 264 | sprintf "val %s: lexbuf: LexBuffer -> token" ident 265 | else 266 | args 267 | |> List.map (function 268 | | RuleArgument.Ident ident -> 269 | // This is not going to lead to a valid signature file, the only workaround is that the caller will specify the type. 270 | sprintf "%s: obj" ident 271 | | RuleArgument.Typed(ident, typ) -> sprintf "%s: %s" ident typ) 272 | |> String.concat " -> " 273 | |> sprintf "val %s: %s -> lexbuf: LexBuffer -> token" ident 274 | 275 | writer.WriteLineInterface "%s" signature 276 | 277 | writer.WriteLine " match _fslex_tables.Interpret(%d,lexbuf) with" startNode.Id 278 | 279 | actions 280 | |> Seq.iteri (fun i (code: string, pos) -> 281 | writer.WriteLine " | %d -> ( " i 282 | writer.WriteLine "# %d \"%s\"" pos.Line pos.FileName 283 | let lines = code.Split([| '\r'; '\n' |], StringSplitOptions.RemoveEmptyEntries) 284 | 285 | for line in lines do 286 | writer.WriteLine " %s" line 287 | 288 | writer.WriteLine "# %d \"%s\"" writer.LineCount outputFileName 289 | writer.WriteLine " )") 290 | 291 | writer.WriteLine " | _ -> failwith \"%s\"" ident 292 | 293 | writer.WriteLine "" 294 | 295 | let writeBottomCode code (writer: Writer) = writer.WriteCode code 296 | 297 | let writeFooter outputFileName (writer: Writer) = 298 | writer.WriteLine "# 3000000 \"%s\"" outputFileName 299 | 300 | let writeSpecToFile (state: GeneratorState) (spec: Spec) (perRuleData: PerRuleData) (dfaNodes: DfaNodes) = 301 | let output, outputi = state.outputFileName, String.Concat(state.outputFileName, "i") 302 | use writer = new Writer(output, outputi) 303 | writeLightMode state.disableLightMode state.outputFileName writer 304 | writeModuleExpression state.generatedModuleName state.generateInternalModule writer 305 | writeOpens state.opens writer 306 | writeTopCode spec.TopCode writer 307 | writeUnicodeTables state.lexerLibraryName state.domain dfaNodes writer 308 | writeRules spec.Rules perRuleData state.outputFileName writer 309 | writeBottomCode spec.BottomCode writer 310 | writeFooter state.outputFileName writer 311 | () 312 | -------------------------------------------------------------------------------- /FsLexYacc.sln: -------------------------------------------------------------------------------- 1 | Microsoft Visual Studio Solution File, Format Version 12.00 2 | # Visual Studio Version 16 3 | VisualStudioVersion = 16.0.28803.202 4 | MinimumVisualStudioVersion = 10.0.40219.1 5 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "project", "project", "{BF60BC93-E09B-4E5F-9D85-95A519479D54}" 6 | ProjectSection(SolutionItems) = preProject 7 | build.fsx = build.fsx 8 | nuget\FsLexYacc.template = nuget\FsLexYacc.template 9 | README.md = README.md 10 | RELEASE_NOTES.md = RELEASE_NOTES.md 11 | EndProjectSection 12 | EndProject 13 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{ED8079DD-2B06-4030-9F0F-DC548F98E1C4}" 14 | ProjectSection(SolutionItems) = preProject 15 | tests\fsyacc\OldFsYaccTests.fsx = tests\fsyacc\OldFsYaccTests.fsx 16 | EndProjectSection 17 | EndProject 18 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "JsonLexAndYaccExample", "tests\JsonLexAndYaccExample\JsonLexAndYaccExample.fsproj", "{3A7662D3-A30C-4BD4-BA0A-08A53DC59445}" 19 | EndProject 20 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "LexAndYaccMiniProject", "tests\LexAndYaccMiniProject\LexAndYaccMiniProject.fsproj", "{91D0BE7A-E128-498A-BB68-6ED65A582E04}" 21 | EndProject 22 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "docs", "docs", "{4BB66375-380B-4EBD-9BA6-40CE92EB3D98}" 23 | EndProject 24 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{884C599D-FDE2-4AC3-828A-12F6C662F273}" 25 | EndProject 26 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FsLexYacc.Runtime", "src\FsLexYacc.Runtime\FsLexYacc.Runtime.fsproj", "{31A44BBA-0A6C-48FE-BB45-5BC23190A587}" 27 | EndProject 28 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FsLex.Core", "src\FsLex.Core\FsLex.Core.fsproj", "{52D108CA-B379-4C30-BD85-0AE8E0C5723B}" 29 | EndProject 30 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "fslex", "src\FsLex\fslex.fsproj", "{D64B2492-43AA-4436-B6D5-6CBFE44989DF}" 31 | EndProject 32 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FsYacc.Core", "src\FsYacc.Core\FsYacc.Core.fsproj", "{C73D328C-4247-4F99-81BF-2E274410E9C4}" 33 | EndProject 34 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "fsyacc", "src\FsYacc\fsyacc.fsproj", "{FC9E0584-0139-4D02-8017-29AD01282449}" 35 | EndProject 36 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FsLex.Core.Tests", "tests\FsLex.Core.Tests\FsLex.Core.Tests.fsproj", "{BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}" 37 | EndProject 38 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FsYacc.Core.Tests", "tests\FsYacc.Core.Tests\FsYacc.Core.Tests.fsproj", "{F66C2590-5FDD-4962-9EEB-AD1B74545EAE}" 39 | EndProject 40 | Global 41 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 42 | Debug|Any CPU = Debug|Any CPU 43 | Debug|Mixed Platforms = Debug|Mixed Platforms 44 | Debug|x86 = Debug|x86 45 | Release|Any CPU = Release|Any CPU 46 | Release|Mixed Platforms = Release|Mixed Platforms 47 | Release|x86 = Release|x86 48 | EndGlobalSection 49 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 50 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 51 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Debug|Any CPU.Build.0 = Debug|Any CPU 52 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU 53 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU 54 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Debug|x86.ActiveCfg = Debug|Any CPU 55 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Debug|x86.Build.0 = Debug|Any CPU 56 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Release|Any CPU.ActiveCfg = Release|Any CPU 57 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Release|Any CPU.Build.0 = Release|Any CPU 58 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU 59 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Release|Mixed Platforms.Build.0 = Release|Any CPU 60 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Release|x86.ActiveCfg = Release|Any CPU 61 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Release|x86.Build.0 = Release|Any CPU 62 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 63 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Debug|Any CPU.Build.0 = Debug|Any CPU 64 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU 65 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU 66 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Debug|x86.ActiveCfg = Debug|Any CPU 67 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Debug|x86.Build.0 = Debug|Any CPU 68 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Release|Any CPU.ActiveCfg = Release|Any CPU 69 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Release|Any CPU.Build.0 = Release|Any CPU 70 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU 71 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Release|Mixed Platforms.Build.0 = Release|Any CPU 72 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Release|x86.ActiveCfg = Release|Any CPU 73 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Release|x86.Build.0 = Release|Any CPU 74 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 75 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Debug|Any CPU.Build.0 = Debug|Any CPU 76 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU 77 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU 78 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Debug|x86.ActiveCfg = Debug|Any CPU 79 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Debug|x86.Build.0 = Debug|Any CPU 80 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Release|Any CPU.ActiveCfg = Release|Any CPU 81 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Release|Any CPU.Build.0 = Release|Any CPU 82 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU 83 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Release|Mixed Platforms.Build.0 = Release|Any CPU 84 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Release|x86.ActiveCfg = Release|Any CPU 85 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Release|x86.Build.0 = Release|Any CPU 86 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 87 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Debug|Any CPU.Build.0 = Debug|Any CPU 88 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU 89 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU 90 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Debug|x86.ActiveCfg = Debug|Any CPU 91 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Debug|x86.Build.0 = Debug|Any CPU 92 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Release|Any CPU.ActiveCfg = Release|Any CPU 93 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Release|Any CPU.Build.0 = Release|Any CPU 94 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU 95 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Release|Mixed Platforms.Build.0 = Release|Any CPU 96 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Release|x86.ActiveCfg = Release|Any CPU 97 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Release|x86.Build.0 = Release|Any CPU 98 | {FC9E0584-0139-4D02-8017-29AD01282449}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 99 | {FC9E0584-0139-4D02-8017-29AD01282449}.Debug|Any CPU.Build.0 = Debug|Any CPU 100 | {FC9E0584-0139-4D02-8017-29AD01282449}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU 101 | {FC9E0584-0139-4D02-8017-29AD01282449}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU 102 | {FC9E0584-0139-4D02-8017-29AD01282449}.Debug|x86.ActiveCfg = Debug|Any CPU 103 | {FC9E0584-0139-4D02-8017-29AD01282449}.Debug|x86.Build.0 = Debug|Any CPU 104 | {FC9E0584-0139-4D02-8017-29AD01282449}.Release|Any CPU.ActiveCfg = Release|Any CPU 105 | {FC9E0584-0139-4D02-8017-29AD01282449}.Release|Any CPU.Build.0 = Release|Any CPU 106 | {FC9E0584-0139-4D02-8017-29AD01282449}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU 107 | {FC9E0584-0139-4D02-8017-29AD01282449}.Release|Mixed Platforms.Build.0 = Release|Any CPU 108 | {FC9E0584-0139-4D02-8017-29AD01282449}.Release|x86.ActiveCfg = Release|Any CPU 109 | {FC9E0584-0139-4D02-8017-29AD01282449}.Release|x86.Build.0 = Release|Any CPU 110 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 111 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Debug|Any CPU.Build.0 = Debug|Any CPU 112 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU 113 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU 114 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Debug|x86.ActiveCfg = Debug|Any CPU 115 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Debug|x86.Build.0 = Debug|Any CPU 116 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Release|Any CPU.ActiveCfg = Release|Any CPU 117 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Release|Any CPU.Build.0 = Release|Any CPU 118 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU 119 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Release|Mixed Platforms.Build.0 = Release|Any CPU 120 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Release|x86.ActiveCfg = Release|Any CPU 121 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Release|x86.Build.0 = Release|Any CPU 122 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 123 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Debug|Any CPU.Build.0 = Debug|Any CPU 124 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU 125 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU 126 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Debug|x86.ActiveCfg = Debug|Any CPU 127 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Debug|x86.Build.0 = Debug|Any CPU 128 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Release|Any CPU.ActiveCfg = Release|Any CPU 129 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Release|Any CPU.Build.0 = Release|Any CPU 130 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU 131 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Release|Mixed Platforms.Build.0 = Release|Any CPU 132 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Release|x86.ActiveCfg = Release|Any CPU 133 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Release|x86.Build.0 = Release|Any CPU 134 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 135 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Debug|Any CPU.Build.0 = Debug|Any CPU 136 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU 137 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU 138 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Debug|x86.ActiveCfg = Debug|Any CPU 139 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Debug|x86.Build.0 = Debug|Any CPU 140 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Release|Any CPU.ActiveCfg = Release|Any CPU 141 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Release|Any CPU.Build.0 = Release|Any CPU 142 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU 143 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Release|Mixed Platforms.Build.0 = Release|Any CPU 144 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Release|x86.ActiveCfg = Release|Any CPU 145 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Release|x86.Build.0 = Release|Any CPU 146 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 147 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Debug|Any CPU.Build.0 = Debug|Any CPU 148 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU 149 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU 150 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Debug|x86.ActiveCfg = Debug|Any CPU 151 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Debug|x86.Build.0 = Debug|Any CPU 152 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Release|Any CPU.ActiveCfg = Release|Any CPU 153 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Release|Any CPU.Build.0 = Release|Any CPU 154 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU 155 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Release|Mixed Platforms.Build.0 = Release|Any CPU 156 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Release|x86.ActiveCfg = Release|Any CPU 157 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Release|x86.Build.0 = Release|Any CPU 158 | EndGlobalSection 159 | GlobalSection(SolutionProperties) = preSolution 160 | HideSolutionNode = FALSE 161 | EndGlobalSection 162 | GlobalSection(NestedProjects) = preSolution 163 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445} = {ED8079DD-2B06-4030-9F0F-DC548F98E1C4} 164 | {91D0BE7A-E128-498A-BB68-6ED65A582E04} = {ED8079DD-2B06-4030-9F0F-DC548F98E1C4} 165 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B} = {884C599D-FDE2-4AC3-828A-12F6C662F273} 166 | {C73D328C-4247-4F99-81BF-2E274410E9C4} = {884C599D-FDE2-4AC3-828A-12F6C662F273} 167 | {FC9E0584-0139-4D02-8017-29AD01282449} = {884C599D-FDE2-4AC3-828A-12F6C662F273} 168 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF} = {884C599D-FDE2-4AC3-828A-12F6C662F273} 169 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587} = {884C599D-FDE2-4AC3-828A-12F6C662F273} 170 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928} = {ED8079DD-2B06-4030-9F0F-DC548F98E1C4} 171 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE} = {ED8079DD-2B06-4030-9F0F-DC548F98E1C4} 172 | EndGlobalSection 173 | GlobalSection(ExtensibilityGlobals) = postSolution 174 | SolutionGuid = {5F6E586E-166D-4397-A502-18C61E31AA9C} 175 | EndGlobalSection 176 | EndGlobal 177 | -------------------------------------------------------------------------------- /docs/content/fslex.md: -------------------------------------------------------------------------------- 1 | FsLex Overview 2 | ======== 3 | 4 | The `fslex` tool is a lexer generator for byte and Unicode character input. 5 | 6 | Getting Started 7 | --------------- 8 | 9 | Build the tool by cloning this project and running /build.sh or build.cmd 10 | 11 | Add a reference to `FsLexYacc` package via Nuget or paket. 12 | 13 | You can run the lexer directly: 14 | dotnet fslex.dll inputFile -o outputFile 15 | 16 | 17 | Or you can add it to your build project via entries like this: 18 | 19 | 20 | --module Parser 21 | 22 | 23 | --module Lexer --unicode 24 | 25 | 26 | 27 | 28 | 29 | Lexer syntax 30 | ------------ 31 | 32 | Define your lexer in the Lexer.fsl file. 33 | 34 | { header } 35 | let ident = regexp ... 36 | rule entrypoint [arg1... argn] = 37 | parse regexp { action } 38 | | ... 39 | | regexp { action } 40 | and entrypoint [arg1… argn] = 41 | parse ... 42 | and ... 43 | { trailer } 44 | 45 | Comments are delimited by (* and *) and line comments // are also supported, as in F#. 46 | 47 | The rule and parse keywords are required. 48 | 49 | The header and trailer sections are arbitrary F# code, which will write to the beginning and end of the output file (Lexer.fs). 50 | Either or both can be omitted. Headers typically include values and functions used in the rule body actions. 51 | 52 | Following the header and before the rules are named regular expressions for use in the rules. 53 | 54 | let ident = regexp … 55 | 56 | Following this declaration, the identifier ident can be used as shorthand for regexp. 57 | 58 | Entry points 59 | ------------ 60 | 61 | Entry points are valid F# identifiers. Similarly, the arguments 62 | 63 | arg1... argn 64 | 65 | must be valid identifiers. 66 | Each entry point becomes a function that takes n+1 arguments, the implicit last argument being of type LexBuffer<'a>. 67 | Characters are read from the LexBuffer<'a> argument and matched against the regular expressions provided in the rule, until a prefix of the input matches one of the rules. 68 | The Lexer then evaluates the action and returns it as the result of the function. Rule entry points can be entered recursively. 69 | 70 | If several regular expressions match a prefix of the input the regular expression that matches the longest prefix of the input is selected. 71 | In case of tie, the regular expression that occurs earlier in the rule is selected. 72 | 73 | Rule regular expressions 74 | ------------------------ 75 | 76 | ' regular-char | escape-sequence ' 77 | 78 | A character constant, with the same syntax as F# character constants. Match the denoted character. 79 | 80 | _ 81 | 82 | (underscore) Match any character. 83 | 84 | eof 85 | 86 | Match the end of the lexer input. 87 | 88 | Note: Fslex will not correctly handle regular expressions that contain eof followed by something else. 89 | 90 | "string" 91 | 92 | A string constant, with the same syntax as F# string constants. Match the corresponding sequence of characters. 93 | 94 | [ character-set ] 95 | 96 | Match any single character belonging to the given character set. Valid character sets are: single character constants ' c '; ranges of characters ' c1 ' - ' c2 ' (all characters between c1 and c2, inclusive); and the union of two or more character sets, denoted by concatenation. 97 | 98 | [ ^ character-set ] 99 | 100 | Match any single character not belonging to the given character set. 101 | 102 | regexp1 # regexp2 103 | 104 | (difference of character sets) Regular expressions regexp1 and regexp2 must be character sets defined with […] (or a a single character expression or underscore _). Match the difference of the two specified character sets. 105 | 106 | regexp * 107 | 108 | (repetition) Match the concatenation of zero or more strings that match regexp. 109 | 110 | regexp + 111 | 112 | (strict repetition) Match the concatenation of one or more strings that match regexp. 113 | 114 | regexp ? 115 | 116 | (option) Match the empty string, or a string matching regexp. 117 | 118 | regexp1 | regexp2 119 | 120 | (alternative) Match any string that matches regexp1 or regexp2 121 | 122 | regexp1 regexp2 123 | 124 | (concatenation) Match the concatenation of two strings, the first matching regexp1, the second matching regexp2. 125 | 126 | ( regexp ) 127 | 128 | Match the same strings as regexp. 129 | 130 | ident 131 | 132 | Reference the regular expression bound to ident by an earlier let ident = regexp definition. 133 | 134 | Concerning the precedences of operators, # has the highest precedence, followed by *, + and ?, then concatenation, then | (alternation). 135 | 136 | Rule actions 137 | ------------ 138 | 139 | The actions are arbitrary F# expressions. Additionally, `lexbuf` is bound to the current lexer buffer. 140 | 141 | Some typical uses for `lexbuf`, in conjunction with the operations on lexer buffers provided by the FSharp.Text.Lexing standard library module, are listed below. 142 | 143 | lexeme lexbuf 144 | 145 | Return the matched string. 146 | 147 | lexbuf.LexemeChar n 148 | 149 | Return the nth character in the matched string. The first character corresponds to n = 0. 150 | 151 | lexbuf.StartPos 152 | 153 | Return the data on the absolute position in the input text of the beginning of the matched string (i.e. the offset of the first character of the matched string) in an object of type Position. The first character read from the input text has offset 0. 154 | 155 | lexbuf.EndPos 156 | 157 | Return the data on absolute position in the input text of the end of the matched string (i.e. the offset of the first character after the matched string) in an object of type Position. The first character read from the input text has offset 0. 158 | 159 | entrypoint [exp1… expn] lexbuf 160 | 161 | (Where entrypoint is the name of another entry point in the same lexer definition.) Recursively call the lexer on the given entry point. Notice that lexbuf is the last argument. Useful for lexing nested comments, for example. 162 | 163 | The Position type 164 | ----------------- 165 | 166 | type Position = 167 | { /// The file name for the position 168 | pos_fname: string 169 | /// The line number for the position 170 | pos_lnum: int 171 | /// The absolute offset of the beginning of the line 172 | pos_bol: int 173 | /// The absolute offset of the column for the position 174 | pos_cnum: int } 175 | /// The file name associated with the input stream. 176 | member FileName : string 177 | /// The line number in the input stream, assuming fresh positions have been updated 178 | /// using AsNewLinePos() and by modifying the EndPos property of the LexBuffer. 179 | member Line : int 180 | /// The character number in the input stream 181 | member AbsoluteOffset : int 182 | /// Return absolute offset of the start of the line marked by the position 183 | member StartOfLineAbsoluteOffset : int 184 | /// Return the column number marked by the position, i.e. the difference between the AbsoluteOffset and the StartOfLineAbsoluteOffset 185 | member Column : int 186 | // Given a position just beyond the end of a line, return a position at the start of the next line 187 | member NextLine : Position 188 | /// Given a position at the start of a token of length n, return a position just beyond the end of the token 189 | member EndOfToken: n:int -> Position 190 | /// Gives a position shifted by specified number of characters 191 | member ShiftColumnBy: by:int -> Position 192 | 193 | Sample input 194 | ------------ 195 | 196 | This is taken from the `Parsing` sample previously in the F# distribution. See below for information on `newline` and line counting. 197 | 198 | let digit = ['0'-'9'] 199 | let whitespace = [' ' '\t' ] 200 | let newline = ('\n' | '\r' '\n') 201 | 202 | 203 | rule token = parse 204 | | whitespace { token lexbuf } 205 | | newline { newline lexbuf; token lexbuf } 206 | | "while" { WHILE } 207 | | "begin" { BEGIN } 208 | | "end" { END } 209 | | "do" { DO } 210 | | "if" { IF } 211 | | "then" { THEN } 212 | | "else" { ELSE } 213 | | "print" { PRINT } 214 | | "decr" { DECR } 215 | | "(" { LPAREN } 216 | | ")" { RPAREN } 217 | | ";" { SEMI } 218 | | ":=" { ASSIGN } 219 | | ['a'-'z']+ { ID(lexeme lexbuf) } 220 | | ['-']?digit+ { INT (Int32.Parse(lexeme lexbuf)) } 221 | | ['-']?digit+('.'digit+)?(['e''E']digit+)? { FLOAT (Double.Parse(lexeme lexbuf)) } 222 | | eof { EOF } 223 | 224 | 225 | 226 | More than one lexer state is permitted - use 227 | 228 | rule state1 = 229 | | "this" { state2 lexbuf } 230 | | ... 231 | and state2 = 232 | | "that" { state1 lexbuf } 233 | | ... 234 | 235 | 236 | States can be passed arguments: 237 | 238 | rule state1 arg1 arg2 = ... 239 | | "this" { state2 (arg1+1) (arg2+2) lexbuf } 240 | | ... 241 | and state2 arg1 arg2 = ... 242 | | ... 243 | 244 | 245 | 246 | **Using a lexer** 247 | 248 | If in the first example above the constructors `INT` etc generate values of type `tok` then the above generates a lexer with a function 249 | 250 | val token : LexBuffer -> tok 251 | 252 | Once you have a lexbuffer you can call the above to generate new tokens. Typically you use some methods from `FSharp.Text.Lexing` 253 | to create lex buffers, either a `LexBuffer` for ASCII lexing, or `LexBuffer` for Unicode lexing. 254 | 255 | Some ways of creating lex buffers are by using: 256 | 257 | LexBuffer<_>.FromChars 258 | LexBuffer<_>.FromFunction 259 | LexBuffer<_>.FromStream 260 | LexBuffer<_>.FromTextReader 261 | LexBuffer<_>.FromBytes 262 | 263 | Within lexing actions the variable `lexbuf` is in scope and you may use properties on the `LexBuffer` type such as: 264 | 265 | lexbuf.Lexeme // get the lexeme as an array of characters or bytes 266 | LexBuffer.LexemeString lexbuf // get the lexeme as a string, for Unicode lexing 267 | 268 | Lexing positions give locations in source files (the relevant type is `FSharp.Text.Lexing.Position`). 269 | 270 | Generated lexers are nearly always used in conjunction with parsers generated by `FsYacc` (also documented on this site). See the Parsed Language starter template. 271 | 272 | Command line options 273 | 274 | fslex 275 | -o : Name the output file. 276 | 277 | --module : Define the F# module name to host the generated parser. 278 | 279 | --internal: Generate an internal module 280 | 281 | --codepage : Assume input lexer specification file is encoded with the given codepage. 282 | 283 | --light: (ignored) 284 | 285 | --light-off: Add #light "off" to the top of the generated file 286 | 287 | --lexlib : Specify the namespace for the implementation of the lexer table interperter (default FSharp.Text.Lexing) 288 | 289 | --unicode: Produce a lexer for use with 16-bit unicode characters. 290 | 291 | --help: display this list of options 292 | 293 | -help: display this list of options 294 | 295 | Positions and line counting in lexers 296 | 297 | Within a lexer lines can in theory be counted simply by incrementing a global variable or a passed line number count: 298 | 299 | rule token line = ... 300 | | "\n" | '\r' '\n' { token (line+1) } 301 | | ... 302 | 303 | However for character positions this is tedious, as it means every action becomes polluted with character counting, as you have to manually attach line numbers to tokens. Also, for error reporting writing service it is useful to have position information associated held as part of the state in the lexbuffer itself. 304 | 305 | Thus F# follows the `OCamlLex` model where the lexer and parser state carry `position` values that record information for the current match (`lex`) and the `l.h.s`/`r.h.s` of the grammar productions (`yacc`). 306 | 307 | The information carried for each position is: 308 | 309 | * a filename 310 | * a current 'absolute' character number 311 | * a placeholder for a user-tracked beginning-of-line marker 312 | * a placeholder for a user-tracked line number count. 313 | 314 | Passing state through lexers 315 | --------------------------- 316 | 317 | It is sometimes under-appreciated that you can pass arguments around between lexer states. For example, in one example we used imperative state to track a line number. 318 | 319 | let current_line = ref 0 320 | let current_char = ref 0 321 | let set_next_line lexbuf = .. 322 | 323 | ... 324 | rule main = parse 325 | | ... 326 | | "//" [^ '\n']* '\n' { 327 | set_next_line lexbuf; main lexbuf 328 | } 329 | 330 | 331 | This sort of imperative code is better replaced by passing arguments: 332 | 333 | rule main line char = parse 334 | | ... 335 | | "//" [^ '\n']* '\n' { 336 | main (line+1) 0 lexbuf 337 | } 338 | 339 | A good example is that when lexing a comment you want to pass through the start-of-comment position so that you can give a good error message if no end-of-comment is found. Or likewise you may want to pass through the number of nested of comments. 340 | --------------------------------------------------------------------------------