├── tests
├── fsyacc
│ ├── Test1
│ │ ├── paket.references
│ │ ├── test1.input1
│ │ ├── test1.input3
│ │ ├── test1.input4
│ │ ├── test1.input1.bsl
│ │ ├── test1.input3.bsl
│ │ ├── test1.input4.bsl
│ │ ├── test1.input2.bsl
│ │ ├── test1.input2.variation1
│ │ ├── test1.input2.variation2
│ │ ├── test1.input1.tokens.bsl
│ │ ├── test1.fsproj
│ │ ├── test1.fsy
│ │ ├── test1lex.fsl
│ │ ├── test1.input3.tokens.bsl
│ │ └── test1.input4.tokens.bsl
│ ├── Test2
│ │ ├── paket.references
│ │ ├── test2.input1.bsl
│ │ ├── test2.input1
│ │ ├── test2.badInput
│ │ ├── test2.badInput.bsl
│ │ ├── test2.fsproj
│ │ ├── test2.fsy
│ │ ├── test2.input1.tokens.bsl
│ │ └── test2.badInput.tokens.bsl
│ ├── unicode
│ │ ├── paket.references
│ │ ├── test1-unicode.input1.bsl
│ │ ├── test1-unicode.input2.bsl
│ │ ├── test1-unicode.input3.utf8
│ │ ├── test1-unicode.WithTitleCaseLetter.utf8
│ │ ├── test1-unicode.input1.tokens.bsl
│ │ ├── test1-unicode.fsy
│ │ ├── test1-unicode.fsproj
│ │ ├── test1-unicode-lex.fsl
│ │ ├── test1-unicode.WithTitleCaseLetter.tokens.error.bsl
│ │ ├── test1-unicode.input3.tokens.bsl
│ │ └── test1-unicode.WithTitleCaseLetter.tokens.bsl
│ ├── tree.fs
│ ├── .gitignore
│ ├── repro_#141
│ │ └── Lexer_fail_option_i.fsl
│ ├── arg2.fs
│ ├── repro1885
│ │ └── repro1885.fsl
│ ├── main.fs
│ └── OldFsYaccTests.fsx
├── JsonLexAndYaccExample
│ ├── .gitignore
│ ├── JsonValue.fs
│ ├── Script.fsx
│ ├── JsonLexAndYaccExample.fsproj
│ ├── Parser.fsy
│ ├── Program.fs
│ └── Lexer.fsl
├── LexAndYaccMiniProject
│ ├── .gitignore
│ ├── Parser.fsy
│ ├── Lexer.fsl
│ ├── Program.fs
│ └── LexAndYaccMiniProject.fsproj
├── FsYacc.Core.Tests
│ ├── paket.references
│ ├── Main.fs
│ ├── Sample.fs
│ └── FsYacc.Core.Tests.fsproj
├── Directory.Build.props
└── FsLex.Core.Tests
│ ├── Main.fs
│ ├── paket.references
│ ├── FsLex.Core.Tests.fsproj
│ └── UnicodeTests.fs
├── src
├── FsLex
│ ├── paket.references
│ ├── fslex.fsx
│ ├── App.config
│ ├── AssemblyInfo.fs
│ ├── fslex.fsproj
│ └── fslex.fs
├── FsYacc
│ ├── paket.references
│ ├── fsyacc.fsx
│ ├── AssemblyInfo.fs
│ ├── fsyacc.fsproj
│ └── fsyacc.fs
├── FsLexYacc.Runtime
│ ├── paket.references
│ ├── AssemblyInfo.fs
│ ├── FsLexYacc.Runtime.fsproj
│ ├── Parsing.fsi
│ └── Lexing.fsi
├── FsLex.Core
│ ├── paket.references
│ ├── AssemblyInfo.fs
│ ├── FsLex.Core.fsproj
│ ├── fslexpars.fsi
│ ├── fslexpars.fsy
│ ├── fslexlex.fsl
│ └── fslexdriver.fs
├── FsYacc.Core
│ ├── paket.references
│ ├── AssemblyInfo.fs
│ ├── FsYacc.Core.fsproj
│ ├── fsyaccpars.fsi
│ ├── fsyaccpars.fsy
│ └── fsyacclex.fsl
├── FsLexYacc.Build.Tasks
│ ├── AssemblyInfo.fs
│ └── FsLexYacc.targets
├── Directory.Build.props
└── Common
│ ├── Arg.fsi
│ └── Arg.fs
├── nuget
├── publish.cmd
└── FsLexYacc.template
├── docs
├── img
│ ├── logo.pdn
│ ├── logo.png
│ └── favicon.ico
├── index.fsx
└── content
│ ├── fsyacc.md
│ └── fslex.md
├── global.json
├── .github
├── dependabot.yml
└── workflows
│ ├── pull-requests.yml
│ └── push-main.yml
├── paket.dependencies
├── .fantomasignore
├── .editorconfig
├── .config
└── dotnet-tools.json
├── ISSUE_TEMPLATE.md
├── README.md
├── LICENSE.txt
├── RELEASE_NOTES.md
├── paket.lock
├── .gitattributes
├── .gitignore
└── FsLexYacc.sln
/tests/fsyacc/Test1/paket.references:
--------------------------------------------------------------------------------
1 | FSharp.Core
--------------------------------------------------------------------------------
/tests/fsyacc/Test2/paket.references:
--------------------------------------------------------------------------------
1 | FSharp.Core
--------------------------------------------------------------------------------
/tests/fsyacc/unicode/paket.references:
--------------------------------------------------------------------------------
1 | FSharp.Core
--------------------------------------------------------------------------------
/tests/fsyacc/Test1/test1.input1:
--------------------------------------------------------------------------------
1 |
2 | id + id
3 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test1/test1.input3:
--------------------------------------------------------------------------------
1 | let id x + x in id + id end
--------------------------------------------------------------------------------
/tests/fsyacc/Test1/test1.input4:
--------------------------------------------------------------------------------
1 | LeT id x + x iN id + id eNd
--------------------------------------------------------------------------------
/tests/fsyacc/Test1/test1.input1.bsl:
--------------------------------------------------------------------------------
1 | parsed test1.input1 ok
2 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test1/test1.input3.bsl:
--------------------------------------------------------------------------------
1 | parsed test1.input3 ok
2 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test2/test2.input1.bsl:
--------------------------------------------------------------------------------
1 | parsed ./test2.input1 ok
2 |
--------------------------------------------------------------------------------
/src/FsLex/paket.references:
--------------------------------------------------------------------------------
1 | FSharp.Core
2 | Microsoft.SourceLink.GitHub
--------------------------------------------------------------------------------
/src/FsYacc/paket.references:
--------------------------------------------------------------------------------
1 | FSharp.Core
2 | Microsoft.SourceLink.GitHub
--------------------------------------------------------------------------------
/nuget/publish.cmd:
--------------------------------------------------------------------------------
1 | @for %%f in (..\bin\*.nupkg) do @..\.nuget\NuGet.exe push %%f
--------------------------------------------------------------------------------
/tests/JsonLexAndYaccExample/.gitignore:
--------------------------------------------------------------------------------
1 | Lexer.fs
2 | Parser.fs
3 | Parser.fsi
--------------------------------------------------------------------------------
/tests/fsyacc/unicode/test1-unicode.input1.bsl:
--------------------------------------------------------------------------------
1 | parsed ./test1.input1 ok
2 |
--------------------------------------------------------------------------------
/src/FsLexYacc.Runtime/paket.references:
--------------------------------------------------------------------------------
1 | FSharp.Core
2 | Microsoft.SourceLink.GitHub
--------------------------------------------------------------------------------
/src/FsLex.Core/paket.references:
--------------------------------------------------------------------------------
1 | FSharp.Core
2 | FsLexYacc
3 | Microsoft.SourceLink.GitHub
--------------------------------------------------------------------------------
/tests/LexAndYaccMiniProject/.gitignore:
--------------------------------------------------------------------------------
1 | Lexer.fs
2 | Parser.fs
3 | Parser.fsi
4 | test.txt
--------------------------------------------------------------------------------
/docs/img/logo.pdn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fsprojects/FsLexYacc/HEAD/docs/img/logo.pdn
--------------------------------------------------------------------------------
/docs/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fsprojects/FsLexYacc/HEAD/docs/img/logo.png
--------------------------------------------------------------------------------
/src/FsYacc.Core/paket.references:
--------------------------------------------------------------------------------
1 | FSharp.Core
2 | FsLexYacc
3 | Microsoft.SourceLink.GitHub
--------------------------------------------------------------------------------
/tests/fsyacc/Test1/test1.input4.bsl:
--------------------------------------------------------------------------------
1 | parsed test1.input3 ok
2 | parsed test1.input4 ok
3 |
--------------------------------------------------------------------------------
/docs/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fsprojects/FsLexYacc/HEAD/docs/img/favicon.ico
--------------------------------------------------------------------------------
/tests/fsyacc/Test2/test2.input1:
--------------------------------------------------------------------------------
1 |
2 | x (id + id)
3 | y (id + id + id)
4 | z (id + id * id)
5 |
--------------------------------------------------------------------------------
/global.json:
--------------------------------------------------------------------------------
1 | {
2 | "sdk": {
3 | "version": "6.0.400",
4 | "rollForward": "minor"
5 | }
6 | }
7 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test1/test1.input2.bsl:
--------------------------------------------------------------------------------
1 | parsed test1.input2.variation1 ok
2 | parsed test1.input2.variation2 ok
3 |
--------------------------------------------------------------------------------
/tests/FsYacc.Core.Tests/paket.references:
--------------------------------------------------------------------------------
1 | FSharp.Core
2 | Expecto
3 | Microsoft.NET.Test.Sdk
4 | YoloDev.Expecto.TestSdk
--------------------------------------------------------------------------------
/tests/fsyacc/tree.fs:
--------------------------------------------------------------------------------
1 | module Tree
2 | type tree = Node of string * tree list
3 | type decl = Decl of string * tree
4 |
--------------------------------------------------------------------------------
/tests/fsyacc/unicode/test1-unicode.input2.bsl:
--------------------------------------------------------------------------------
1 | parsed ./test1.input2.variation1 ok
2 | parsed ./test1.input2.variation2 ok
3 |
--------------------------------------------------------------------------------
/tests/Directory.Build.props:
--------------------------------------------------------------------------------
1 |
2 |
3 | false
4 |
5 |
--------------------------------------------------------------------------------
/tests/FsLex.Core.Tests/Main.fs:
--------------------------------------------------------------------------------
1 | []
2 | let main argv = Expecto.Tests.runTestsInAssembly Expecto.Tests.defaultConfig argv
3 |
--------------------------------------------------------------------------------
/tests/FsLex.Core.Tests/paket.references:
--------------------------------------------------------------------------------
1 | FSharp.Core
2 | Expecto
3 | Expecto.FsCheck
4 | Microsoft.NET.Test.Sdk
5 | YoloDev.Expecto.TestSdk
--------------------------------------------------------------------------------
/tests/FsYacc.Core.Tests/Main.fs:
--------------------------------------------------------------------------------
1 | []
2 | let main argv = Expecto.Tests.runTestsInAssembly Expecto.Tests.defaultConfig argv
3 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test1/test1.input2.variation1:
--------------------------------------------------------------------------------
1 |
2 | (id + id + id) + (id * id * id) + (id - id - id) + (id + id * id) + (id * id + id)
3 |
4 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "nuget"
4 | directory: "/"
5 | schedule:
6 | interval: "weekly"
--------------------------------------------------------------------------------
/tests/fsyacc/Test1/test1.input2.variation2:
--------------------------------------------------------------------------------
1 |
2 | (((((id + id) + id) + ((id * id) * id)) + (id - (id - id))) + (id + (id * id))) + ((id * id) + id)
3 |
--------------------------------------------------------------------------------
/src/FsLex/fslex.fsx:
--------------------------------------------------------------------------------
1 | #load "Lexing.fsi" "Lexing.fs" "Parsing.fsi" "Parsing.fs" "Arg.fsi" "Arg.fs" "fslexast.fs" "fslexpars.fs" "fslexlex.fs" "fslex.fs"
2 |
3 | let v = FsLexYacc.FsLex.Driver.result
4 |
--------------------------------------------------------------------------------
/src/FsYacc/fsyacc.fsx:
--------------------------------------------------------------------------------
1 | #load "Lexing.fsi" "Lexing.fs" "Parsing.fsi" "Parsing.fs" "Arg.fsi" "Arg.fs" "fsyaccast.fs" "fsyaccpars.fs" "fsyacclex.fs" "fsyacc.fs"
2 |
3 | let v = FsLexYacc.FsYacc.Driver.result
4 |
--------------------------------------------------------------------------------
/tests/FsYacc.Core.Tests/Sample.fs:
--------------------------------------------------------------------------------
1 | module FsYacc.Core.Tests.Sample
2 |
3 | open Expecto
4 |
5 | []
6 | let tests =
7 | test "sample" {
8 | Expect.equal 2 2 "2=2"
9 | }
10 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test2/test2.badInput:
--------------------------------------------------------------------------------
1 |
2 | z1 (let x in id end)
3 | x2 (id + id
4 | y3 (id + id + id)
5 | z4 (id + id * id)
6 | z5 (let x + in id end)
7 | z6 (let x + in id end)
8 | z7 (let x + y in id end)
9 | z8 (let x ))) in id end)
10 |
11 |
--------------------------------------------------------------------------------
/tests/fsyacc/unicode/test1-unicode.input3.utf8:
--------------------------------------------------------------------------------
1 |
2 | next line tests one unicode character class
3 | ÄËÖÏÜâæçñõö + id
4 | next line tests specific unicode characters
5 | ≠ ≠≠ ≈≈ ≈≈≈
6 | id + id
7 | next line tests some more random unicode characters
8 | МНОПРСТУФХЦẀẁẂќ αβΛΘΩΨΧΣδζȚŶǺ
9 | id
--------------------------------------------------------------------------------
/tests/fsyacc/unicode/test1-unicode.WithTitleCaseLetter.utf8:
--------------------------------------------------------------------------------
1 |
2 | next line tests one unicode character class
3 | ÄËÖÏÜâæçñõö + id
4 | next line tests specific unicode characters
5 | ≠ ≠≠ ≈≈ ≈≈≈
6 | id + id
7 | next line tests some more random unicode characters
8 | DzМНОПРСТУФХЦẀẁẂќ αβΛΘΩΨΧΣδζȚŶǺ
9 | id
--------------------------------------------------------------------------------
/paket.dependencies:
--------------------------------------------------------------------------------
1 | source https://api.nuget.org/v3/index.json
2 |
3 | storage: none
4 | frameworks: netstandard2.0, net6.0
5 |
6 | nuget FSharp.Core >= 4.6.0
7 | nuget FsLexYacc copy_local: true
8 | nuget Microsoft.SourceLink.GitHub copy_local: true
9 | nuget Expecto ~> 9.0
10 | nuget Expecto.FsCheck
11 | nuget Microsoft.NET.Test.Sdk
12 | nuget YoloDev.Expecto.TestSdk
--------------------------------------------------------------------------------
/.fantomasignore:
--------------------------------------------------------------------------------
1 | # Generated by FAKE
2 | AssemblyInfo.fs
3 | .fake/
4 |
5 | # Generated files
6 | src/FsLex.Core/fslexlex.fs
7 | src/FsLex.Core/fslexpars.fs
8 | src/FsLex.Core/fslexpars.fsi
9 | src/FsYacc.Core/fsyacclex.fs
10 | src/FsYacc.Core/fsyaccpars.fs
11 | src/FsYacc.Core/fsyaccpars.fsi
12 |
13 | # Ignore for now
14 | tests/
15 |
16 | # We cannot parse this file for all define combinations
17 | src/FsLexYacc.Runtime/Parsing.fs
--------------------------------------------------------------------------------
/tests/fsyacc/Test1/test1.input1.tokens.bsl:
--------------------------------------------------------------------------------
1 | tokenize - getting one token
2 | tokenize - got IDENT, now at char 2
3 | ident char = 105
4 | ident char = 100
5 | tokenize - getting one token
6 | tokenize - got PLUS, now at char 5
7 | tokenize - getting one token
8 | tokenize - got IDENT, now at char 7
9 | ident char = 105
10 | ident char = 100
11 | tokenize - getting one token
12 | tokenize - got EOF, now at char 11
13 |
--------------------------------------------------------------------------------
/tests/fsyacc/unicode/test1-unicode.input1.tokens.bsl:
--------------------------------------------------------------------------------
1 | tokenize - getting one token
2 | tokenize - got IDENT, now at char 2
3 | ident char = 105
4 | ident char = 100
5 | tokenize - getting one token
6 | tokenize - got PLUS, now at char 5
7 | tokenize - getting one token
8 | tokenize - got IDENT, now at char 7
9 | ident char = 105
10 | ident char = 100
11 | tokenize - getting one token
12 | tokenize - got EOF, now at char 11
13 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test2/test2.badInput.bsl:
--------------------------------------------------------------------------------
1 | invisible error recovery successful.
2 | Missing paren: visible recovery successful.
3 | invisible error recovery successful.
4 | invisible error recovery successful.
5 | invisible error recovery successful.
6 | Three parens is a bit rich - why not use Lisp if you like that sort of thing. Raising explicit parse error, which we will recover from.
7 | invisible error recovery successful.
8 | parsed ./test2.badInput ok
9 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | # max_line_length is set to 140. At some point we will reduce it to 120 for as many files as reasonable.
4 | [*.{fs,fsi,fsx}]
5 | max_line_length = 140
6 | fsharp_newline_between_type_definition_and_members = true
7 | fsharp_max_function_binding_width = 40
8 | fsharp_max_if_then_else_short_width = 60
9 | fsharp_max_infix_operator_expression = 80
10 | fsharp_max_array_or_list_width = 80
11 | fsharp_max_dot_get_expression_width = 80
12 | fsharp_multiline_bracket_style = aligned
13 | fsharp_keep_max_number_of_blank_lines = 1
14 |
--------------------------------------------------------------------------------
/tests/fsyacc/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | /FSharp.Core.dll
3 | /FsLexYacc.Runtime.dll
4 | /repro1885.fs
5 | /test1.exe
6 | /test1.fs
7 | /test1.fsi
8 | /test1.ml
9 | /test1.mli
10 | /test1compat.exe
11 | /test1compat.ml
12 | /test1compat.mli
13 | /test1lex.fs
14 | /test1lex.ml
15 | /test1-unicode.exe
16 | /test1-unicode.fs
17 | /test1-unicode.fsi
18 | /test1-unicode.ml
19 | /test1-unicode.mli
20 | /test1-unicode-lex.fs
21 | /test1-unicode-lex.ml
22 | /test2.exe
23 | /test2.fs
24 | /test2.fsi
25 | /test2.ml
26 | /test2.mli
27 | /test2compat.exe
28 | /test2compat.ml
29 | /test2compat.mli
30 |
--------------------------------------------------------------------------------
/src/FsLex/App.config:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.config/dotnet-tools.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": 1,
3 | "isRoot": true,
4 | "tools": {
5 | "fake-cli": {
6 | "version": "6.1.3",
7 | "commands": [
8 | "fake"
9 | ]
10 | },
11 | "paket": {
12 | "version": "8.0.3",
13 | "commands": [
14 | "paket"
15 | ]
16 | },
17 | "fantomas": {
18 | "version": "6.3.16",
19 | "commands": [
20 | "fantomas"
21 | ]
22 | },
23 | "fsdocs-tool": {
24 | "version": "20.0.1",
25 | "commands": [
26 | "fsdocs"
27 | ]
28 | }
29 | }
30 | }
--------------------------------------------------------------------------------
/tests/FsLex.Core.Tests/FsLex.Core.Tests.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net6.0
6 | false
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/tests/FsYacc.Core.Tests/FsYacc.Core.Tests.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net6.0
6 | false
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/.github/workflows/pull-requests.yml:
--------------------------------------------------------------------------------
1 | name: Build and test
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - master
7 |
8 | jobs:
9 | build:
10 |
11 | strategy:
12 | fail-fast: false
13 | matrix:
14 | os: [ubuntu-latest, windows-latest, macOS-latest]
15 | runs-on: ${{ matrix.os }}
16 |
17 | steps:
18 | - uses: actions/checkout@v3
19 | - name: Setup .NET for main project build
20 | uses: actions/setup-dotnet@v3
21 | - name: Install local tools
22 | run: dotnet tool restore
23 | - name: Paket restore
24 | run: dotnet paket restore
25 | - name: Build
26 | run: dotnet fake run build.fsx
27 |
--------------------------------------------------------------------------------
/tests/LexAndYaccMiniProject/Parser.fsy:
--------------------------------------------------------------------------------
1 | // The start token becomes a parser function in the compiled code:
2 | %start start
3 |
4 | // Regular tokens
5 | %token HELLO
6 |
7 | // Misc tokens
8 | %token EOF
9 |
10 | // This is the type of the data produced by a successful reduction of the 'start'
11 | // symbol:
12 | %type < int > start
13 |
14 | %%
15 |
16 | // These are the rules of the grammar along with the F# code of the
17 | // actions executed as rules are reduced.
18 | start: File end { $1 }
19 | | end { $1 }
20 |
21 | File:
22 | | HELLO { 1 }
23 | | HELLO HELLO { 2 }
24 |
25 |
26 | // Using F# keywords for nonterminal names is okay.
27 | end: EOF { 3 }
28 |
--------------------------------------------------------------------------------
/tests/LexAndYaccMiniProject/Lexer.fsl:
--------------------------------------------------------------------------------
1 | {
2 |
3 | // Opens methods related to fslex.exe
4 | open FSharp.Text.Lexing
5 |
6 | let newline (lexbuf: LexBuffer<_>) =
7 | lexbuf.StartPos <- lexbuf.StartPos.NextLine
8 |
9 | }
10 |
11 | // Regular expressions
12 | let whitespace = [' ' '\t' ]
13 | let newline = ('\n' | '\r' '\n')
14 |
15 | rule tokenstream = parse
16 | // --------------------------
17 | | "hello" { Parser.HELLO }
18 | // --------------------------
19 | | whitespace { tokenstream lexbuf }
20 | | newline { newline lexbuf; tokenstream lexbuf }
21 | // --------------------------
22 | | _ { failwith ("ParseError" + LexBuffer<_>.LexemeString lexbuf) }
23 | | eof { Parser.EOF }
24 |
--------------------------------------------------------------------------------
/tests/fsyacc/repro_#141/Lexer_fail_option_i.fsl:
--------------------------------------------------------------------------------
1 | {
2 |
3 | module Lexer
4 |
5 | // Opens methods related to fslex.exe
6 | open FSharp.Text.Lexing
7 | }
8 |
9 | // Regular expressions
10 | let whitespace = [' ' '\t' ]
11 | let newline = ('\n' | '\r' '\n')
12 |
13 | rule tokenstream = parse
14 | // --------------------------
15 | | whitespace { tokenstream lexbuf }
16 |
17 | // --------------------------
18 | | newline { newline lexbuf; tokenstream lexbuf }
19 | // --------------------------
20 | | _ { raise (new EqInterpretationReglesException (sprintf "[Lexer] Erreur %s %d %d" (LexBuffer<_>.LexemeString lexbuf) (lexbuf.StartPos.Line + 1) lexbuf.StartPos.Column)) }
21 | | eof { Parser.EOF }
22 |
--------------------------------------------------------------------------------
/tests/fsyacc/unicode/test1-unicode.fsy:
--------------------------------------------------------------------------------
1 | %{
2 | //module TestParser
3 |
4 | %}
5 |
6 | %type start
7 | %token MINUS STAR LPAREN RPAREN PLUS EOF LET IN END UNICODE1 UNICODE2
8 | %token IDENT
9 | %start start
10 |
11 | %right MINUS
12 | %left PLUS
13 | %left STAR
14 | %%
15 |
16 | start: expr EOF { $1 }
17 |
18 | decl: IDENT expr { Tree.Node("decl",[$2]) }
19 |
20 | expr: expr MINUS expr { Tree.Node("-",[$1;$3]) }
21 | | expr PLUS expr { Tree.Node("+",[$1;$3]) }
22 | | expr STAR expr { Tree.Node("*",[$1;$3]) }
23 | | LPAREN expr RPAREN { $2 }
24 | | IDENT { Tree.Node($1,[]) }
25 | | LET decl IN expr END { $4 }
26 | | UNICODE1 { Tree.Node("UNICODE1",[])}
27 | | UNICODE2 { Tree.Node("UNICODE2",[])}
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/src/FsLex/AssemblyInfo.fs:
--------------------------------------------------------------------------------
1 | // Auto-Generated by FAKE; do not edit
2 | namespace System
3 | open System.Reflection
4 |
5 | []
6 | []
7 | []
8 | []
9 | []
10 | do ()
11 |
12 | module internal AssemblyVersionInformation =
13 | let [] AssemblyTitle = "FsLex"
14 | let [] AssemblyProduct = "FsLexYacc"
15 | let [] AssemblyDescription = "FsLex/FsYacc lexer/parser generation tools"
16 | let [] AssemblyVersion = "11.3.0"
17 | let [] AssemblyFileVersion = "11.3.0"
18 |
--------------------------------------------------------------------------------
/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
2 | ### Description
3 |
4 | Please provide a succinct description of your issue.
5 |
6 | ### Repro steps
7 |
8 | Please provide the steps required to reproduce the problem
9 |
10 | 1. Step A
11 |
12 | 2. Step B
13 |
14 | ### Expected behavior
15 |
16 | Please provide a description of the behaviour you expect.
17 |
18 | ### Actual behavior
19 |
20 | Please provide a description of the actual behaviour you observe.
21 |
22 | ### Known workarounds
23 |
24 | Please provide a description of any known workarounds.
25 |
26 | ### Related information
27 |
28 | * Operating system
29 | * Branch
30 | * Database versions and sample databases being used
31 | * .NET Runtime, CoreCLR or Mono Version
32 | * Performance information, links to performance testing scripts
33 |
34 |
--------------------------------------------------------------------------------
/src/FsYacc/AssemblyInfo.fs:
--------------------------------------------------------------------------------
1 | // Auto-Generated by FAKE; do not edit
2 | namespace System
3 | open System.Reflection
4 |
5 | []
6 | []
7 | []
8 | []
9 | []
10 | do ()
11 |
12 | module internal AssemblyVersionInformation =
13 | let [] AssemblyTitle = "FsYacc"
14 | let [] AssemblyProduct = "FsLexYacc"
15 | let [] AssemblyDescription = "FsLex/FsYacc lexer/parser generation tools"
16 | let [] AssemblyVersion = "11.3.0"
17 | let [] AssemblyFileVersion = "11.3.0"
18 |
--------------------------------------------------------------------------------
/src/FsLex.Core/AssemblyInfo.fs:
--------------------------------------------------------------------------------
1 | // Auto-Generated by FAKE; do not edit
2 | namespace System
3 | open System.Reflection
4 |
5 | []
6 | []
7 | []
8 | []
9 | []
10 | do ()
11 |
12 | module internal AssemblyVersionInformation =
13 | let [] AssemblyTitle = "FsLex.Core"
14 | let [] AssemblyProduct = "FsLexYacc"
15 | let [] AssemblyDescription = "FsLex/FsYacc lexer/parser generation tools"
16 | let [] AssemblyVersion = "11.3.0"
17 | let [] AssemblyFileVersion = "11.3.0"
18 |
--------------------------------------------------------------------------------
/src/FsYacc.Core/AssemblyInfo.fs:
--------------------------------------------------------------------------------
1 | // Auto-Generated by FAKE; do not edit
2 | namespace System
3 | open System.Reflection
4 |
5 | []
6 | []
7 | []
8 | []
9 | []
10 | do ()
11 |
12 | module internal AssemblyVersionInformation =
13 | let [] AssemblyTitle = "FsYacc.Core"
14 | let [] AssemblyProduct = "FsLexYacc"
15 | let [] AssemblyDescription = "FsLex/FsYacc lexer/parser generation tools"
16 | let [] AssemblyVersion = "11.3.0"
17 | let [] AssemblyFileVersion = "11.3.0"
18 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test1/test1.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net6.0
6 | FS0760
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test2/test2.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net6.0
6 | FS0760
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test1/test1.fsy:
--------------------------------------------------------------------------------
1 | %{
2 | //module TestParser
3 | //Bug1885: is about skipping // comments in the header and code sections, rather than lexing as tokens
4 | //Bug1885: REPRO: Convert a string such as "\"c:\\windows\\\"" into "c:\windows\"
5 | %}
6 |
7 | %type start
8 | %token MINUS STAR LPAREN RPAREN PLUS EOF LET IN END
9 | %token IDENT
10 | %start start
11 |
12 | %right MINUS
13 | %left PLUS
14 | %left STAR
15 | %%
16 |
17 | start: expr EOF { $1 }
18 |
19 | decl: IDENT expr { Tree.Node("decl",[$2]) }
20 |
21 | expr: expr MINUS expr { Tree.Node("-",[$1;$3]) }
22 | | expr PLUS expr { Tree.Node("+",[$1;$3]) }
23 | | expr STAR expr { Tree.Node("*",[$1;$3]) }
24 | | LPAREN expr RPAREN { $2 }
25 | | IDENT { Tree.Node($1,[]) }
26 | | LET decl IN expr END { $4 }
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/src/FsLexYacc.Build.Tasks/AssemblyInfo.fs:
--------------------------------------------------------------------------------
1 | // Auto-Generated by FAKE; do not edit
2 | namespace System
3 | open System.Reflection
4 |
5 | []
6 | []
7 | []
8 | []
9 | []
10 | do ()
11 |
12 | module internal AssemblyVersionInformation =
13 | let [] AssemblyTitle = "FsLexYacc.Build.Tasks"
14 | let [] AssemblyProduct = "FsLexYacc"
15 | let [] AssemblyDescription = "FsLex/FsYacc lexer/parser generation tools"
16 | let [] AssemblyVersion = "7.0.0"
17 | let [] AssemblyFileVersion = "7.0.0"
18 |
--------------------------------------------------------------------------------
/src/FsLexYacc.Runtime/AssemblyInfo.fs:
--------------------------------------------------------------------------------
1 | // Auto-Generated by FAKE; do not edit
2 | namespace System
3 | open System.Reflection
4 |
5 | []
6 | []
7 | []
8 | []
9 | []
10 | do ()
11 |
12 | module internal AssemblyVersionInformation =
13 | let [] AssemblyTitle = "FsLexYacc.Runtime"
14 | let [] AssemblyProduct = "FsLexYacc.Runtime"
15 | let [] AssemblyDescription = "FsLex/FsYacc lexer/parser generation tools"
16 | let [] AssemblyVersion = "11.3.0"
17 | let [] AssemblyFileVersion = "11.3.0"
18 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test1/test1lex.fsl:
--------------------------------------------------------------------------------
1 |
2 | {
3 | module TestLexer
4 | open TestParser
5 | }
6 |
7 | let letter = ['A'-'Z'] | ['a'-'z']
8 | let digit = ['0'-'9']
9 | let ident_start_char =
10 | letter | ['_']
11 | let ident_char = ( ident_start_char| digit | ['\''] )
12 | let ident = ident_start_char ident_char*
13 | let whitespace = [' ' '\t' '\n' '\r']
14 |
15 |
16 | rule token = parse
17 | | "(" { LPAREN }
18 | | ")" { RPAREN }
19 | | "*" { STAR }
20 | | "+" { PLUS }
21 | | "-" { MINUS }
22 | | "let" { LET }
23 | | "in" { IN }
24 | | "end" { END }
25 | | ident { let s = lexbuf.Lexeme |> System.Text.Encoding.ASCII.GetString
26 | match s with
27 | | "let" -> LET
28 | | "in" -> IN
29 | | "end" -> END
30 | | _ -> IDENT(s) }
31 | | whitespace { token lexbuf }
32 | | eof { EOF }
33 |
34 |
--------------------------------------------------------------------------------
/tests/fsyacc/unicode/test1-unicode.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net6.0
6 | FS0760
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/src/FsYacc/fsyacc.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net6.0
6 | Major
7 | false
8 |
9 |
10 |
11 | AssemblyInfo.fs
12 |
13 |
14 | arg.fsi
15 |
16 |
17 | arg.fs
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/src/FsLex/fslex.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net6.0
6 | Major
7 | false
8 | true
9 |
10 |
11 |
12 |
13 | arg.fsi
14 |
15 |
16 | arg.fs
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/src/FsLex.Core/FsLex.Core.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | netstandard2.0
5 |
6 |
7 |
8 |
9 | --module FsLexYacc.FsLex.Parser --lexlib FSharp.Text.Lexing --parslib FSharp.Text.Parsing
10 |
11 |
12 | --unicode --lexlib FSharp.Text.Lexing
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/src/FsLexYacc.Runtime/FsLexYacc.Runtime.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | netstandard2.0
5 | Library
6 | Runtime for FsLex/FsYacc lexer/parser generation tools
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/src/FsYacc.Core/FsYacc.Core.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | netstandard2.0
5 |
6 |
7 |
8 |
9 | --unicode --lexlib FSharp.Text.Lexing
10 |
11 |
12 | --module FsLexYacc.FsYacc.Parser --lexlib FSharp.Text.Lexing --parslib FSharp.Text.Parsing
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | FsLexYacc
2 | =======================
3 |
4 | FsLex and FsYacc tools
5 |
6 | See https://fsprojects.github.io/FsLexYacc.
7 |
8 | * FsLexYacc.Runtime - [](https://www.nuget.org/packages/FsLexYacc.Runtime)
9 | * FsLexYacc - [](https://www.nuget.org/packages/FsLexYacc)
10 |
11 | Build the project
12 | -----------------
13 |
14 | [](https://github.com/fsprojects/FsLexYacc/actions?query=branch%3Amaster)
15 |
16 | * Unix: Run *build.sh*
17 | * Windows: Run *build.cmd*
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 | ### Maintainer(s)
30 |
31 | - [@kkm000](https://github.com/kkm000)
32 | - [@dsyme](https://github.com/dsyme)
33 |
34 | The default maintainer account for projects under "fsprojects" is [@fsprojectsgit](https://github.com/fsprojectsgit) - F# Community Project Incubation Space (repo management)
35 |
36 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test1/test1.input3.tokens.bsl:
--------------------------------------------------------------------------------
1 | tokenize - getting one token
2 | tokenize - got LET, now at char 0
3 | tokenize - getting one token
4 | tokenize - got IDENT, now at char 4
5 | ident char = 105
6 | ident char = 100
7 | tokenize - getting one token
8 | tokenize - got IDENT, now at char 7
9 | ident char = 120
10 | tokenize - getting one token
11 | tokenize - got PLUS, now at char 9
12 | tokenize - getting one token
13 | tokenize - got IDENT, now at char 11
14 | ident char = 120
15 | tokenize - getting one token
16 | tokenize - got IN, now at char 13
17 | tokenize - getting one token
18 | tokenize - got IDENT, now at char 16
19 | ident char = 105
20 | ident char = 100
21 | tokenize - getting one token
22 | tokenize - got PLUS, now at char 19
23 | tokenize - getting one token
24 | tokenize - got IDENT, now at char 21
25 | ident char = 105
26 | ident char = 100
27 | tokenize - getting one token
28 | tokenize - got END, now at char 24
29 | tokenize - getting one token
30 | tokenize - got EOF, now at char 27
31 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test1/test1.input4.tokens.bsl:
--------------------------------------------------------------------------------
1 | tokenize - getting one token
2 | tokenize - got LET, now at char 0
3 | tokenize - getting one token
4 | tokenize - got IDENT, now at char 4
5 | ident char = 105
6 | ident char = 100
7 | tokenize - getting one token
8 | tokenize - got IDENT, now at char 7
9 | ident char = 120
10 | tokenize - getting one token
11 | tokenize - got PLUS, now at char 9
12 | tokenize - getting one token
13 | tokenize - got IDENT, now at char 11
14 | ident char = 120
15 | tokenize - getting one token
16 | tokenize - got IN, now at char 13
17 | tokenize - getting one token
18 | tokenize - got IDENT, now at char 16
19 | ident char = 105
20 | ident char = 100
21 | tokenize - getting one token
22 | tokenize - got PLUS, now at char 19
23 | tokenize - getting one token
24 | tokenize - got IDENT, now at char 21
25 | ident char = 105
26 | ident char = 100
27 | tokenize - getting one token
28 | tokenize - got END, now at char 24
29 | tokenize - getting one token
30 | tokenize - got EOF, now at char 27
31 |
--------------------------------------------------------------------------------
/tests/fsyacc/arg2.fs:
--------------------------------------------------------------------------------
1 | // (c) Microsoft Corporation 2005-2009.
2 |
3 | []
4 | module Microsoft.FSharp.Compatibility.OCaml.Arg
5 | open FSharp.Text
6 |
7 | let Clear x = ArgType.Clear x
8 | let Float x = ArgType.Float x
9 | let Int x = ArgType.Int x
10 | let Rest x = ArgType.Rest x
11 | let Set x = ArgType.Set x
12 | let String x = ArgType.String x
13 | let Unit x = ArgType.Unit x
14 |
15 | type spec = ArgType
16 | type argspec = (string * spec * string)
17 | #if FX_NO_COMMAND_LINE_ARGS
18 | #else
19 |
20 | exception Bad of string
21 | exception Help of string
22 | let parse_argv cursor argv specs other usageText =
23 | ArgParser.ParsePartial(cursor, argv, List.map (fun (a,b,c) -> ArgInfo(a,b,c)) specs, other, usageText)
24 |
25 | let parse specs other usageText =
26 | ArgParser.Parse(List.map (fun (a,b,c) -> ArgInfo(a,b,c)) specs, other, usageText)
27 |
28 | let usage specs usageText =
29 | ArgParser.Usage(List.map (fun (a,b,c) -> ArgInfo(a,b,c)) specs, usageText)
30 | #endif
--------------------------------------------------------------------------------
/tests/LexAndYaccMiniProject/Program.fs:
--------------------------------------------------------------------------------
1 | // Learn more about F# at http://fsharp.net
2 |
3 | open System.IO
4 | open FSharp.Text.Lexing
5 |
6 | let testLexerAndParserFromString text expectedCount =
7 | let lexbuf = LexBuffer.FromString text
8 |
9 | let countFromParser = Parser.start Lexer.tokenstream lexbuf
10 |
11 | printfn "countFromParser: result = %d, expected %d" countFromParser expectedCount
12 |
13 | let testLexerAndParserFromFile (fileName:string) expectedCount =
14 | use textReader = new System.IO.StreamReader(fileName)
15 | let lexbuf = LexBuffer.FromTextReader textReader
16 |
17 | let countFromParser = Parser.start Lexer.tokenstream lexbuf
18 |
19 | printfn "countFromParser: result = %d, expected %d" countFromParser expectedCount
20 |
21 | testLexerAndParserFromString "hello" 1
22 | testLexerAndParserFromString "hello hello" 2
23 |
24 | let testFile = Path.Combine(__SOURCE_DIRECTORY__, "test.txt")
25 | File.WriteAllText(testFile, "hello hello")
26 | testLexerAndParserFromFile testFile 2
27 |
28 | printfn "Press any key to continue..."
29 | System.Console.ReadLine() |> ignore
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/tests/JsonLexAndYaccExample/JsonValue.fs:
--------------------------------------------------------------------------------
1 | module JsonParsing
2 |
3 |
4 | type JsonValue =
5 | | Assoc of (string * JsonValue) list
6 | | Bool of bool
7 | | Float of float
8 | | Int of int
9 | | List of JsonValue list
10 | | Null
11 | | String of string
12 |
13 |
14 | //below function is not important, it simply prints values
15 | static member print x =
16 | match x with
17 | | Bool b -> sprintf "Bool(%b)" b
18 | | Float f -> sprintf "Float(%f)" f
19 | | Int d -> sprintf "Int(%d)" d
20 | | String s -> sprintf "String(%s)" s
21 | | Null -> "Null()"
22 | | Assoc props -> props
23 | |> List.map (fun (name,value) -> sprintf "\"%s\" : %s" name (JsonValue.print(value)))
24 | |> String.concat ","
25 | |> sprintf "Assoc(%s)"
26 | | List values -> values
27 | |> List.map (fun value -> JsonValue.print(value))
28 | |> String.concat ","
29 | |> sprintf "List(%s)"
30 |
--------------------------------------------------------------------------------
/tests/LexAndYaccMiniProject/LexAndYaccMiniProject.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net6.0
6 | ..\..\src\FsLex\bin\$(Configuration)\net6.0
7 | ..\..\src\FsYacc\bin\$(Configuration)\net6.0
8 |
9 |
10 |
11 | --module Parser
12 |
13 |
14 | --module Lexer --unicode
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) Microsoft Corporation.
4 | All rights reserved.
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
24 |
--------------------------------------------------------------------------------
/tests/JsonLexAndYaccExample/Script.fsx:
--------------------------------------------------------------------------------
1 | #r "bin\\debug\\FsLexYacc.Runtime.dll"
2 | #r "bin\\debug\\JsonLexAndYaccExample.exe"
3 |
4 | open FSharp.Text.Lexing
5 | open JsonParsing
6 | open System.IO
7 |
8 | let parse json =
9 | let lexbuf = LexBuffer.FromString json
10 | let res = Parser.start Lexer.read lexbuf
11 | res
12 |
13 | //a few parsing tests with simple and complex json
14 | let simpleJson = "{\"f\" : 1, \"x\" : 1}"
15 | let (Some parseResult) = simpleJson |> parse
16 | printfn "%s" (JsonValue.print parseResult)
17 |
18 |
19 | let simpleJson2 = @"{
20 | ""title"": ""Cities"",
21 | ""cities"": [
22 | { ""name"": ""Chicago"", ""zips"": [60601,60600] },
23 | { ""name"": ""New York"", ""zips"": [10001] }
24 | ]
25 | }"
26 | let (Some parseResult2) = simpleJson2 |> parse
27 | printfn "%s" (JsonValue.print parseResult2)
28 |
29 |
30 | let complexJson = File.ReadAllText (Path.Combine(__SOURCE_DIRECTORY__,"randomComplexTestsJson.json"))
31 | complexJson |> parse |> ignore
32 |
33 |
34 | //test lexing error
35 | try
36 | let simpleJson = "{\"f\" ;"
37 | let (Some parseResult) = simpleJson |> parse
38 | printfn "%s" (JsonValue.print parseResult)
39 | with
40 | | e -> printfn "Error is expected here: \n %s" (e.Message)
--------------------------------------------------------------------------------
/tests/JsonLexAndYaccExample/JsonLexAndYaccExample.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net6.0
6 | ..\..\src\FsLex\bin\$(Configuration)\net6.0
7 | ..\..\src\FsYacc\bin\$(Configuration)\net6.0
8 |
9 |
10 |
11 |
12 | --module Parser
13 |
14 |
15 | --unicode
16 |
17 |
18 |
19 |
20 |
21 |
22 | Always
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/tests/JsonLexAndYaccExample/Parser.fsy:
--------------------------------------------------------------------------------
1 | //This parser has been writen with help of "Real world OCaml" book By Yaron Minsky, Anil Madhavapeddy, Jason Hickey (chapter 15)
2 | %{
3 | open JsonParsing
4 | %}
5 |
6 | %start start
7 |
8 | %token INT
9 | %token FLOAT
10 | %token ID
11 | %token STRING
12 | %token TRUE
13 | %token FALSE
14 | %token NULL
15 | %token LEFT_BRACE
16 | %token RIGHT_BRACE
17 | %token LEFT_BRACK
18 | %token RIGHT_BRACK
19 | %token COLON
20 | %token COMMA
21 | %token EOF
22 | %type start
23 |
24 | %%
25 |
26 | start: prog { $1 }
27 |
28 | prog:
29 | | EOF { None }
30 | | value { Some $1 }
31 |
32 | value:
33 | | LEFT_BRACE object_fields RIGHT_BRACE { Assoc $2 }
34 | | LEFT_BRACK array_values RIGHT_BRACK { List $2 }
35 | | STRING { String $1 }
36 | | INT { Int $1 }
37 | | FLOAT { Float $1 }
38 | | TRUE { Bool true }
39 | | FALSE { Bool false }
40 | | NULL { Null }
41 |
42 | object_fields: rev_object_fields { List.rev $1 };
43 |
44 | rev_object_fields:
45 | | { [] }
46 | | STRING COLON value { [($1,$3)] }
47 | | rev_object_fields COMMA STRING COLON value { ($3, $5) :: $1 }
48 |
49 | array_values:
50 | | { [] }
51 | | rev_values { List.rev $1 }
52 |
53 | rev_values:
54 | | value { [$1] }
55 | | rev_values COMMA value { $3 :: $1 }
--------------------------------------------------------------------------------
/.github/workflows/push-main.yml:
--------------------------------------------------------------------------------
1 | name: Build and Test and Publish (master)
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 |
8 | permissions:
9 | contents: read
10 | pages: write
11 | id-token: write
12 |
13 | jobs:
14 | build:
15 |
16 | strategy:
17 | fail-fast: false
18 | matrix:
19 | os: [ubuntu-latest]
20 | runs-on: ${{ matrix.os }}
21 |
22 | steps:
23 | - uses: actions/checkout@v3
24 | - name: Setup .NET for main project build
25 | uses: actions/setup-dotnet@v3
26 | - name: Install local tools
27 | run: dotnet tool restore
28 | - name: Paket restore
29 | run: dotnet paket restore
30 | - name: Build
31 | run: dotnet fake run build.fsx -t Release
32 | - name: Publish NuGets (if main version changed)
33 | run: dotnet nuget push "bin/*.nupkg" -s https://api.nuget.org/v3/index.json -k ${{ secrets.NUGET_KEY }} --skip-duplicate
34 | - name: Build documentation
35 | run: dotnet fake run build.fsx -t GenerateDocs
36 | - name: Upload documentation
37 | uses: actions/upload-pages-artifact@v1
38 | with:
39 | path: ./output
40 |
41 | docs:
42 | runs-on: ubuntu-latest
43 | needs: build
44 | steps:
45 | - name: Deploy to GitHub Pages
46 | id: deployment
47 | uses: actions/deploy-pages@v1
48 |
--------------------------------------------------------------------------------
/tests/JsonLexAndYaccExample/Program.fs:
--------------------------------------------------------------------------------
1 |
2 | module Program
3 | open FSharp.Text.Lexing
4 | open JsonParsing
5 |
6 | []
7 | let main argv =
8 | let parse json =
9 | let lexbuf = LexBuffer.FromString json
10 | let res = Parser.start Lexer.read lexbuf
11 | res
12 |
13 | //a few parsing tests with simple and complex json
14 | let simpleJson = "{\"f\" : 1}"
15 | let parseResult = simpleJson |> parse
16 | printfn "%s" (JsonValue.print parseResult.Value)
17 |
18 |
19 | let simpleJson2 = @"{
20 | ""title"": ""Cities"",
21 | ""cities"": [
22 | { ""name"": ""Chicago"", ""zips"": [60601,60600] },
23 | { ""name"": ""New York"", ""zips"": [10001] }
24 | ]
25 | }"
26 | let parseResult2 = simpleJson2 |> parse
27 | printfn "%s" (JsonValue.print parseResult2.Value)
28 |
29 | let complexJson = System.IO.File.ReadAllText "randomComplexTestsJson.json"
30 | complexJson |> parse |> ignore
31 |
32 |
33 | //test lexing error
34 | try
35 | let simpleJson = "{\"f\"\n" + "\n" + ";"
36 | let parseResult = simpleJson |> parse
37 | printfn "%s" (JsonValue.print parseResult.Value)
38 | with
39 | | e -> printfn "Error is expected here: \n %s" (e.Message)
40 |
41 | 0
--------------------------------------------------------------------------------
/src/Directory.Build.props:
--------------------------------------------------------------------------------
1 |
2 |
3 | true
4 | true
5 | Microsoft Corporation, Don Syme, F# Software Foundation contributors
6 | F#, fsharp, yacc, fsyacc, lex, parsing, lexing, fslex
7 | MIT
8 | logo.png
9 | https://raw.githubusercontent.com/fsprojects/FsLexYacc/master/docs/img/logo.png
10 | https://fsprojects.github.io/FsLexYacc/
11 | embedded
12 | true
13 | true
14 | $(AllowedOutputExtensionsInPackageBuildOutputFolder);.xml
15 | https://github.com/fsprojects/fslexYacc/blob/master/LICENSE.txt
16 | https://github.com/fsprojects/FsLexYacc/blob/master/RELEASE_NOTES.md
17 | https://github.com/fsprojects/FsLexYacc/
18 | img/logo.png
19 | img/favicon.ico
20 |
21 |
--------------------------------------------------------------------------------
/tests/fsyacc/unicode/test1-unicode-lex.fsl:
--------------------------------------------------------------------------------
1 |
2 | {
3 | module TestLexer
4 | open TestParser
5 | }
6 |
7 | (* These specifications follow the C# specification *)
8 | let digit = '\Nd'
9 | let letter = '\Lu' | '\Ll' | '\Lm' | '\Lo' | '\Nl'
10 |
11 | let ident_start_char = letter | ['_']
12 |
13 | let connecting_char = '\Pc'
14 | let combining_char = '\Mn' | '\Mc'
15 | let formatting_char = '\Cf'
16 | let ident_char = letter | digit | connecting_char | combining_char | formatting_char
17 |
18 | let ident = ident_start_char ident_char*
19 |
20 | let whitespace =
21 | '\Zs'
22 | | '\u0009' (* horizontal tab *)
23 | | '\u000B' (* vertical tab *)
24 | | '\u000C' (* form feed *)
25 | | '\u000D' (* carriage return *)
26 | | '\u000A' (* line feed *)
27 | | '\u0085' (* next line *)
28 | | '\u2028' (* line separator *)
29 | | '\u2029' (* paragraph separator *)
30 |
31 |
32 | rule token = parse
33 | | "(" { LPAREN }
34 | (* the "approx equals" symbol, just to test a random specific Unicode character *)
35 | | '≈'+ { IDENT(new System.String(lexbuf.Lexeme) ) }
36 | (* | '\U00002248'+ { IDENT(new System.String(lexbuf.Lexeme) ) } *)
37 |
38 | (* the "not equals" symbol, just to test a random specific Unicode character *)
39 | | '≠'+ { IDENT(new System.String(lexbuf.Lexeme) ) }
40 | (* | '\U00002260'+ { IDENT(new System.String(lexbuf.Lexeme) ) } *)
41 | | ")" { RPAREN }
42 | | "*" { STAR }
43 | | "+" { PLUS }
44 | | "-" { MINUS }
45 | | ident { let s = new System.String(lexbuf.Lexeme)
46 | match s with
47 | | "let" -> LET
48 | | "in" -> IN
49 | | "end" -> END
50 | | _ -> IDENT(s) }
51 | | whitespace { token lexbuf }
52 | | eof { EOF }
53 |
54 |
--------------------------------------------------------------------------------
/tests/JsonLexAndYaccExample/Lexer.fsl:
--------------------------------------------------------------------------------
1 | //This lexer has been writen with help of "Real world OCaml" book By Yaron Minsky, Anil Madhavapeddy, Jason Hickey (chapter 15)
2 | {
3 |
4 | module Lexer
5 |
6 | open FSharp.Text.Lexing
7 | open System
8 | open Parser
9 |
10 | exception SyntaxError of string
11 |
12 | let lexeme = LexBuffer<_>.LexemeString
13 |
14 | let newline (lexbuf: LexBuffer<_>) =
15 | lexbuf.EndPos <- lexbuf.EndPos.NextLine
16 | }
17 |
18 | let int = ['-' '+']? ['0'-'9']+
19 | let digit = ['0'-'9']
20 | let frac = '.' digit*
21 | let exp = ['e' 'E'] ['-' '+']? digit+
22 | let float = '-'? digit* frac? exp?
23 |
24 | let white = [' ' '\t']+
25 | let newline = '\r' | '\n' | "\r\n"
26 |
27 | rule read =
28 | parse
29 | | white { read lexbuf }
30 | | newline { newline lexbuf; read lexbuf }
31 | | int { INT (int (lexeme lexbuf)) }
32 | | float { FLOAT (float (lexeme lexbuf)) }
33 | | "true" { TRUE }
34 | | "false" { FALSE }
35 | | "null" { NULL }
36 | | '"' { read_string "" false lexbuf }
37 | | '{' { LEFT_BRACE }
38 | | '}' { RIGHT_BRACE }
39 | | '[' { LEFT_BRACK }
40 | | ']' { RIGHT_BRACK }
41 | | ':' { COLON }
42 | | ',' { COMMA }
43 | | eof { EOF }
44 | | _ { raise (Exception (sprintf "SyntaxError: Unexpected char: '%s' Line: %d Column: %d" (lexeme lexbuf) (lexbuf.StartPos.Line+1) (lexbuf.StartPos.Column+1))) }
45 |
46 |
47 | and read_string str ignorequote =
48 | parse
49 | | '"' { if ignorequote then (read_string (str+"\\\"") false lexbuf) else STRING (str) }
50 | | '\\' { read_string str true lexbuf }
51 | | [^ '"' '\\']+ { read_string (str+(lexeme lexbuf)) false lexbuf }
52 | | eof { raise (Exception ("String is not terminated")) }
--------------------------------------------------------------------------------
/docs/index.fsx:
--------------------------------------------------------------------------------
1 | (*** hide ***)
2 | // This block of code is omitted in the generated HTML documentation. Use
3 | // it to define helpers that you do not want to show in the documentation.
4 | // #I "../../bin"
5 |
6 | (**
7 | FsLex, FsYacc
8 | =============
9 |
10 | Example:
11 |
12 | * [Project File](https://github.com/fsprojects/FsLexYacc/blob/master/tests/LexAndYaccMiniProject/LexAndYaccMiniProject.fsproj)
13 | * [Lexer](https://github.com/fsprojects/FsLexYacc/blob/master/tests/LexAndYaccMiniProject/Lexer.fsl)
14 | * [Parser](https://github.com/fsprojects/FsLexYacc/blob/master/tests/LexAndYaccMiniProject/Parser.fsy)
15 | * [Program](https://github.com/fsprojects/FsLexYacc/blob/master/tests/LexAndYaccMiniProject/Program.fs)
16 |
17 | Video Tutorial:
18 |
19 | * [A gentle introduction to FsLexYacc](https://youtu.be/w7H_RQ6Fvvo?si=H0d2wBg9JcNCmJpn)
20 |
21 | *)
22 |
23 | (**
24 |
25 | Contributing and copyright
26 | --------------------------
27 |
28 | The project is hosted on [GitHub][gh] where you can [report issues][issues], fork
29 | the project and submit pull requests. If you're adding new public API, please also
30 | consider adding [samples][content] that can be turned into a documentation. You might
31 | also want to read [library design notes][readme] to understand how it works.
32 |
33 | The library is available under the MIT license, see the
34 | [License file][license] in the GitHub repository.
35 |
36 | [content]: https://github.com/fsprojects/FsLexYacc/tree/master/docs/content
37 | [gh]: https://github.com/fsprojects/FsLexYacc
38 | [issues]: https://github.com/fsprojects/FsLexYacc/issues
39 | [readme]: https://github.com/fsprojects/FsLexYacc/blob/master/README.md
40 | [license]: https://github.com/fsprojects/FsLexYacc/blob/master/LICENSE.txt
41 | *)
42 |
--------------------------------------------------------------------------------
/tests/FsLex.Core.Tests/UnicodeTests.fs:
--------------------------------------------------------------------------------
1 | module FsLex.Core.Tests.UnicodeTests
2 | open System
3 | open System.Globalization
4 | open FsLexYacc.FsLex
5 | open Expecto
6 |
7 | []
8 | let tests =
9 | testList "Unicode" [
10 | testList "Unicode Categories" [
11 | test "Every unicode category should have a mapping" {
12 | let allUnicodeCategories = Enum.GetValues(typeof) |> Seq.cast
13 | let mappedUnicodeCategories = AST.unicodeCategories.Values
14 |
15 | Expect.containsAll mappedUnicodeCategories allUnicodeCategories "Not all unicode categories are mapped"
16 | }
17 |
18 | test "IsUnicodeCategory should recognize every encoded unicode category" {
19 | let unicodeCategoriesAsStrings = AST.unicodeCategories.Keys
20 | let encodedUnicodeCategories =
21 | unicodeCategoriesAsStrings
22 | |> Seq.map (fun uc -> AST.EncodeUnicodeCategory uc {unicode=true; caseInsensitive=false})
23 |
24 | Expect.all encodedUnicodeCategories AST.IsUnicodeCategory "Not all encoded unicode categories are recognized"
25 | }
26 |
27 | testProperty "TryDecodeUnicodeCategory should decode all valid EncodeUnicodeCategoryIndex outputs" <| fun (a:UnicodeCategory) ->
28 | a |> int |> AST.EncodeUnicodeCategoryIndex |> AST.TryDecodeUnicodeCategory = Some a
29 |
30 |
31 | testProperty "TryDecodeUnicodeCategory should return None for all EncodeChar outputs" <| fun (c:FsCheck.UnicodeChar) ->
32 | let encodedChar = AST.EncodeChar (c.Get) {unicode=true; caseInsensitive=false}
33 | encodedChar |> AST.TryDecodeUnicodeCategory = None
34 | ]
35 | ]
36 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test2/test2.fsy:
--------------------------------------------------------------------------------
1 | %{
2 | //module TestParser
3 |
4 | /// Stephan Tolksdorf reported a bug where quotation characters in headers and semantic
5 | /// actions caused the parser generator to fail with an "unterminated string" comment.
6 | let testQuotationCharInHeader1 = '"'
7 | let testQuotationCharInHeader2 = '\"'
8 |
9 | open Microsoft.FSharp.Compatibility.OCaml
10 |
11 | %}
12 |
13 | %type start
14 | %token MINUS STAR LPAREN RPAREN PLUS EOF LET IN END
15 | %token IDENT
16 | %start start
17 |
18 | %right MINUS
19 | %left PLUS
20 | %left STAR
21 | %%
22 |
23 | start: decls EOF { System.Console.WriteLine("#decls = {0}.", List.length $1); Tree.Node("decls",$1) }
24 |
25 | decls: decls decl { $2 :: $1 } | decl { [$1] }
26 |
27 |
28 | decl: IDENT expr {
29 | /// Stephan Tolksdorf reported a bug where quotation characters in headers and semantic
30 | /// actions caused the parser generator to fail with an "unterminated string" comment.
31 | let testQuotationCharInHeader1 = '"'
32 | let testQuotationCharInHeader2 = '\"'
33 | Tree.Node("decl",[$2]) }
34 |
35 | expr: expr MINUS expr { Tree.Node("-",[$1;$3]) }
36 | | expr PLUS expr { Tree.Node("+",[$1;$3]) }
37 | | expr STAR expr { Tree.Node("*",[$1;$3]) }
38 | | LPAREN expr RPAREN { $2 }
39 | | LET decl IN expr END { $4 }
40 | | LET error IN expr END { System.Console.Error.WriteLine("invisible error recovery successful."); $4 }
41 | | LPAREN expr error { System.Console.Error.WriteLine("Missing paren: visible recovery successful."); $2 }
42 | | RPAREN RPAREN RPAREN { System.Console.Error.WriteLine("Three parens is a bit rich - why not use Lisp if you like that sort of thing. Raising explicit parse error, which we will recover from.");
43 | raise FSharp.Text.Parsing.RecoverableParseError }
44 | | IDENT { Tree.Node($1,[]) }
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/src/FsYacc.Core/fsyaccpars.fsi:
--------------------------------------------------------------------------------
1 | // Signature file for parser generated by fsyacc
2 | module FsLexYacc.FsYacc.Parser
3 | type token =
4 | | TOKEN of (string option)
5 | | TYPE of (string)
6 | | BAR
7 | | PERCENT_PERCENT
8 | | START
9 | | LEFT
10 | | RIGHT
11 | | NONASSOC
12 | | LESS
13 | | GREATER
14 | | COLON
15 | | PREC
16 | | SEMI
17 | | EOF
18 | | ERROR
19 | | HEADER of (AST.Code)
20 | | CODE of (AST.Code)
21 | | IDENT of (string)
22 | type tokenId =
23 | | TOKEN_TOKEN
24 | | TOKEN_TYPE
25 | | TOKEN_BAR
26 | | TOKEN_PERCENT_PERCENT
27 | | TOKEN_START
28 | | TOKEN_LEFT
29 | | TOKEN_RIGHT
30 | | TOKEN_NONASSOC
31 | | TOKEN_LESS
32 | | TOKEN_GREATER
33 | | TOKEN_COLON
34 | | TOKEN_PREC
35 | | TOKEN_SEMI
36 | | TOKEN_EOF
37 | | TOKEN_ERROR
38 | | TOKEN_HEADER
39 | | TOKEN_CODE
40 | | TOKEN_IDENT
41 | | TOKEN_end_of_input
42 | | TOKEN_error
43 | type nonTerminalId =
44 | | NONTERM__startspec
45 | | NONTERM_spec
46 | | NONTERM_headeropt
47 | | NONTERM_decls
48 | | NONTERM_decl
49 | | NONTERM_idents
50 | | NONTERM_rules
51 | | NONTERM_rule
52 | | NONTERM_optbar
53 | | NONTERM_optsemi
54 | | NONTERM_clauses
55 | | NONTERM_clause
56 | | NONTERM_syms
57 | | NONTERM_optprec
58 | /// This function maps tokens to integer indexes
59 | val tagOfToken: token -> int
60 |
61 | /// This function maps integer indexes to symbolic token ids
62 | val tokenTagToTokenId: int -> tokenId
63 |
64 | /// This function maps production indexes returned in syntax errors to strings representing the non terminal that would be produced by that production
65 | val prodIdxToNonTerminal: int -> nonTerminalId
66 |
67 | /// This function gets the name of a token as a string
68 | val token_to_string: token -> string
69 | val spec : (FSharp.Text.Lexing.LexBuffer<'cty> -> token) -> FSharp.Text.Lexing.LexBuffer<'cty> -> (AST.ParserSpec)
70 |
--------------------------------------------------------------------------------
/tests/fsyacc/repro1885/repro1885.fsl:
--------------------------------------------------------------------------------
1 | {
2 | (*
3 | Repro of FSharp Bugs 1885, "FSLex doesn't ignore strings in comments"
4 | *)
5 |
6 | open System
7 |
8 | (*
9 | Testcase " <--------------------------
10 | *)
11 |
12 | (*
13 | Testcase "asdfasdfasdf" <--------------------------
14 | *)
15 |
16 | // Opens methods related to fslex.exe
17 | open Lexing
18 |
19 | // All of our token types are now generated by fsYacc
20 | open Parser
21 |
22 |
23 | let inc_lnum bol pos =
24 | let lnum = pos.pos_lnum in
25 | {pos with pos_lnum = lnum+1; pos_bol = bol }
26 |
27 | let newline lexbuf =
28 | lexbuf_set_curr_p lexbuf
29 | ( inc_lnum (lexeme_end lexbuf) (lexeme_end_p lexbuf))
30 |
31 | // Convert a string such as "\"c:\\windows\\\"" into "c:\windows\" <--------------------------
32 | // "another testcase" <--------------------------
33 | // "and another <--------------------------
34 | let normalizeString (str : string) =
35 | let str = str.Replace("\\\"", "\"")
36 | let str = str.Replace("\\\\", "\\")
37 | if str.[0] = '\"' && str.[str.Length - 1] = '\"' then
38 | str.Substring(1, str.Length - 2)
39 | else
40 | str
41 |
42 | }
43 |
44 | // Regular expressions
45 | let whitespace = [' ' '\t' ]
46 | let newline = ('\n' | '\r' '\n')
47 | let str = '\"' [^ '\"']* '\"'
48 |
49 | rule tokenstream = parse
50 | // --------------------------
51 | | "{" { LCURLY }
52 | | "}" { RCURLY }
53 | | "=" { EQUALS }
54 | // --------------------------
55 | | str { STR(lexeme lexbuf) }
56 | // --------------------------
57 | | whitespace { tokenstream lexbuf }
58 | | newline { newline lexbuf; tokenstream lexbuf }
59 | // --------------------------
60 | | _ { STR("ParseError" + (lexeme lexbuf)) }
61 | | eof { EOF }
--------------------------------------------------------------------------------
/src/Common/Arg.fsi:
--------------------------------------------------------------------------------
1 | // (c) Microsoft Corporation 2005-2009.
2 |
3 | // A simple command-line argument processor.
4 | namespace FSharp.Text
5 |
6 | /// The spec value describes the action of the argument,
7 | /// and whether it expects a following parameter.
8 | []
9 | type ArgType =
10 | static member Clear: bool ref -> ArgType
11 | static member Float: (float -> unit) -> ArgType
12 | static member Int: (int -> unit) -> ArgType
13 | static member Rest: (string -> unit) -> ArgType
14 | static member Set: bool ref -> ArgType
15 | static member String: (string -> unit) -> ArgType
16 | static member Unit: (unit -> unit) -> ArgType
17 |
18 | type ArgInfo =
19 | new: name: string * action: ArgType * help: string -> ArgInfo
20 | /// Return the name of the argument
21 | member Name: string
22 | /// Return the argument type and action of the argument
23 | member ArgType: ArgType
24 | /// Return the usage help associated with the argument
25 | member HelpText: string
26 |
27 | []
28 | type ArgParser =
29 | #if FX_NO_COMMAND_LINE_ARGS
30 | #else
31 |
32 | /// Parse some of the arguments given by 'argv', starting at the given position
33 | []
34 | static member ParsePartial:
35 | cursor: int ref * argv: string[] * arguments: seq * ?otherArgs: (string -> unit) * ?usageText: string -> unit
36 |
37 | /// Parse the arguments given by System.Environment.GetCommandLineArgs()
38 | /// according to the argument processing specifications "specs".
39 | /// Args begin with "-". Non-arguments are passed to "f" in
40 | /// order. "use" is printed as part of the usage line if an error occurs.
41 |
42 | static member Parse: arguments: seq * ?otherArgs: (string -> unit) * ?usageText: string -> unit
43 | #endif
44 |
45 | /// Prints the help for each argument.
46 | static member Usage: arguments: seq * ?usage: string -> unit
47 |
--------------------------------------------------------------------------------
/nuget/FsLexYacc.template:
--------------------------------------------------------------------------------
1 | type file
2 | id FsLexYacc
3 | description
4 | Tools for FsLex/FsYacc lexer/parser generation tools
5 | authors
6 | Microsoft Corporation, Don Syme, F# Software Foundation contributors
7 | summary
8 | Tools for FsLex/FsYacc lexer/parser generation tools
9 | licenseurl https://github.com/fsprojects/FsLexYacc/blob/master/LICENSE.txt
10 | projecturl https://github.com/fsprojects/FsLexYacc
11 | iconurl https://raw.githubusercontent.com/fsprojects/FsLexYacc/master/docs/img/logo.png
12 | tags
13 | F#, fsharp, yacc, fsyacc, lex, parsing, lexing, fslex
14 | files
15 | ../src/FsLex/bin/Release/net6.0/publish ==> build/fslex/net6.0
16 | ../src/FsYacc/bin/Release/net6.0/publish ==> build/fsyacc/net6.0
17 | ../src/FsLexYacc.Build.Tasks/FsLexYacc.targets ==> build
18 | ../src/FsLexYacc.Runtime/Lexing.fsi ==> src/fslex
19 | ../src/FsLexYacc.Runtime/Lexing.fs ==> src/fslex
20 | ../src/FsLexYacc.Runtime/Parsing.fsi ==> src/fslex
21 | ../src/FsLexYacc.Runtime/Parsing.fs ==> src/fslex
22 | ../src/Common/Arg.fsi ==> src/fslex
23 | ../src/Common/Arg.fs ==> src/fslex
24 | ../src/FsLex.Core/fslexast.fs ==> src/fslex
25 | ../src/FsLex.Core/fslexpars.fs ==> src/fslex
26 | ../src/FsLex.Core/fslexlex.fs ==> src/fslex
27 | ../src/FsLex/fslex.fs ==> src/fslex
28 | ../src/FsLex/fslex.fsx ==> src/fslex
29 | ../src/FsLexYacc.Runtime/Lexing.fsi ==> src/fsyacc
30 | ../src/FsLexYacc.Runtime/Lexing.fs ==> src/fsyacc
31 | ../src/FsLexYacc.Runtime/Parsing.fsi ==> src/fsyacc
32 | ../src/FsLexYacc.Runtime/Parsing.fs ==> src/fsyacc
33 | ../src/Common/Arg.fsi ==> src/fsyacc
34 | ../src/Common/Arg.fs ==> src/fsyacc
35 | ../src/FsYacc.Core/fsyaccast.fs ==> src/fsyacc
36 | ../src/FsYacc.Core/fsyaccpars.fs ==> src/fsyacc
37 | ../src/FsYacc.Core/fsyacclex.fs ==> src/fsyacc
38 | ../src/FsYacc/fsyacc.fs ==> src/fsyacc
39 | ../src/FsYacc/fsyacc.fsx ==> src/fsyacc
40 | ../src/FsLexYacc.Build.Tasks/FsLexYacc.targets ==> src
41 | dependencies
42 | framework: netstandard2.0
43 | FsLexYacc.Runtime >= CURRENTVERSION
44 | FSharp.Core >= LOCKEDVERSION
45 |
--------------------------------------------------------------------------------
/src/FsLex.Core/fslexpars.fsi:
--------------------------------------------------------------------------------
1 | // Signature file for parser generated by fsyacc
2 | module FsLexYacc.FsLex.Parser
3 | type token =
4 | | EOF
5 | | BAR
6 | | DOT
7 | | PLUS
8 | | STAR
9 | | QMARK
10 | | EQUALS
11 | | UNDERSCORE
12 | | LBRACK
13 | | RBRACK
14 | | HAT
15 | | DASH
16 | | RULE
17 | | PARSE
18 | | LET
19 | | AND
20 | | LPAREN
21 | | RPAREN
22 | | COLON
23 | | UNICODE_CATEGORY of (string)
24 | | CHAR of (char)
25 | | CODE of (AST.Code)
26 | | STRING of (string)
27 | | IDENT of (string)
28 | type tokenId =
29 | | TOKEN_EOF
30 | | TOKEN_BAR
31 | | TOKEN_DOT
32 | | TOKEN_PLUS
33 | | TOKEN_STAR
34 | | TOKEN_QMARK
35 | | TOKEN_EQUALS
36 | | TOKEN_UNDERSCORE
37 | | TOKEN_LBRACK
38 | | TOKEN_RBRACK
39 | | TOKEN_HAT
40 | | TOKEN_DASH
41 | | TOKEN_RULE
42 | | TOKEN_PARSE
43 | | TOKEN_LET
44 | | TOKEN_AND
45 | | TOKEN_LPAREN
46 | | TOKEN_RPAREN
47 | | TOKEN_COLON
48 | | TOKEN_UNICODE_CATEGORY
49 | | TOKEN_CHAR
50 | | TOKEN_CODE
51 | | TOKEN_STRING
52 | | TOKEN_IDENT
53 | | TOKEN_end_of_input
54 | | TOKEN_error
55 | type nonTerminalId =
56 | | NONTERM__startspec
57 | | NONTERM_spec
58 | | NONTERM_codeopt
59 | | NONTERM_Macros
60 | | NONTERM_macro
61 | | NONTERM_Rules
62 | | NONTERM_rule
63 | | NONTERM_args
64 | | NONTERM_optbar
65 | | NONTERM_clauses
66 | | NONTERM_clause
67 | | NONTERM_regexp
68 | | NONTERM_charset
69 | /// This function maps tokens to integer indexes
70 | val tagOfToken: token -> int
71 |
72 | /// This function maps integer indexes to symbolic token ids
73 | val tokenTagToTokenId: int -> tokenId
74 |
75 | /// This function maps production indexes returned in syntax errors to strings representing the non terminal that would be produced by that production
76 | val prodIdxToNonTerminal: int -> nonTerminalId
77 |
78 | /// This function gets the name of a token as a string
79 | val token_to_string: token -> string
80 | val spec : (FSharp.Text.Lexing.LexBuffer<'cty> -> token) -> FSharp.Text.Lexing.LexBuffer<'cty> -> (AST.Spec)
81 |
--------------------------------------------------------------------------------
/src/FsYacc.Core/fsyaccpars.fsy:
--------------------------------------------------------------------------------
1 | %{
2 | (* (c) Microsoft Corporation 2005-2008. *)
3 |
4 | // FsLexYacc.FsYacc.Parser
5 |
6 | open FsLexYacc.FsYacc
7 | open FsLexYacc.FsYacc.AST
8 |
9 | #nowarn "62" // This construct is for ML compatibility
10 | #nowarn "64" // Turn off warnings that type variables used in production annotations are instantiated to concrete type"
11 |
12 | %}
13 |
14 | %type spec
15 | %token IDENT
16 | %token HEADER CODE
17 | %token BAR PERCENT_PERCENT START LEFT RIGHT NONASSOC LESS GREATER COLON PREC SEMI EOF ERROR
18 | %token TYPE
19 | %token TOKEN
20 | %start spec
21 | %left BAR
22 | %%
23 |
24 | spec:
25 | headeropt decls PERCENT_PERCENT rules
26 | { List.foldBack (fun f x -> f x) $2 { Header=$1;Tokens=[];Types=[];Associativities=[];StartSymbols=[];Rules=$4 } }
27 |
28 | headeropt:
29 | | HEADER
30 | { $1 }
31 | |
32 | { "", (parseState.ResultRange |> fst)}
33 |
34 | decls:
35 | { [] }
36 | | decl decls { $1 :: $2 }
37 |
38 | decl:
39 | TOKEN idents { (fun x -> {x with Tokens = x.Tokens @ (List.map (fun x -> (x,$1)) $2)}) }
40 | | TYPE idents { (fun x -> {x with Types = x.Types @ (List.map (fun x -> (x,$1)) $2)} ) }
41 | | START idents { (fun x -> {x with StartSymbols = x.StartSymbols @ $2} ) }
42 | | LEFT idents { (fun x -> {x with Associativities = x.Associativities @ [(List.map (fun x -> (x,LeftAssoc)) $2)]} ) }
43 | | RIGHT idents { (fun x -> {x with Associativities = x.Associativities @ [(List.map (fun x -> (x,RightAssoc)) $2)]} ) }
44 | | NONASSOC idents { (fun x -> {x with Associativities = x.Associativities @ [(List.map (fun x -> (x,NonAssoc)) $2)]} ) }
45 |
46 | idents: IDENT idents { $1 :: $2 } | { [] }
47 | rules: rule rules { $1 :: $2 } | rule { [$1] }
48 | rule: IDENT COLON optbar clauses optsemi { ($1,$4) }
49 | optbar: { } | BAR { }
50 | optsemi: { } | SEMI { }
51 | clauses: clause BAR clauses {$1 :: $3 } | clause { [$1] }
52 | clause: syms optprec CODE { Rule($1,$2,Some $3) }
53 | syms: IDENT syms { $1 :: $2 } | ERROR syms { "error" :: $2 } | { [] }
54 | optprec: { None } | PREC IDENT { Some $2 }
55 |
56 |
57 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test2/test2.input1.tokens.bsl:
--------------------------------------------------------------------------------
1 | tokenize - getting one token
2 | tokenize - got IDENT, now at char 2
3 | ident char = 120
4 | tokenize - getting one token
5 | tokenize - got LPAREN, now at char 4
6 | tokenize - getting one token
7 | tokenize - got IDENT, now at char 5
8 | ident char = 105
9 | ident char = 100
10 | tokenize - getting one token
11 | tokenize - got PLUS, now at char 8
12 | tokenize - getting one token
13 | tokenize - got IDENT, now at char 10
14 | ident char = 105
15 | ident char = 100
16 | tokenize - getting one token
17 | tokenize - got RPAREN, now at char 12
18 | tokenize - getting one token
19 | tokenize - got IDENT, now at char 15
20 | ident char = 121
21 | tokenize - getting one token
22 | tokenize - got LPAREN, now at char 17
23 | tokenize - getting one token
24 | tokenize - got IDENT, now at char 18
25 | ident char = 105
26 | ident char = 100
27 | tokenize - getting one token
28 | tokenize - got PLUS, now at char 21
29 | tokenize - getting one token
30 | tokenize - got IDENT, now at char 23
31 | ident char = 105
32 | ident char = 100
33 | tokenize - getting one token
34 | tokenize - got PLUS, now at char 26
35 | tokenize - getting one token
36 | tokenize - got IDENT, now at char 28
37 | ident char = 105
38 | ident char = 100
39 | tokenize - getting one token
40 | tokenize - got RPAREN, now at char 30
41 | tokenize - getting one token
42 | tokenize - got IDENT, now at char 33
43 | ident char = 122
44 | tokenize - getting one token
45 | tokenize - got LPAREN, now at char 35
46 | tokenize - getting one token
47 | tokenize - got IDENT, now at char 36
48 | ident char = 105
49 | ident char = 100
50 | tokenize - getting one token
51 | tokenize - got PLUS, now at char 39
52 | tokenize - getting one token
53 | tokenize - got IDENT, now at char 41
54 | ident char = 105
55 | ident char = 100
56 | tokenize - getting one token
57 | tokenize - got STAR, now at char 44
58 | tokenize - getting one token
59 | tokenize - got IDENT, now at char 46
60 | ident char = 105
61 | ident char = 100
62 | tokenize - getting one token
63 | tokenize - got RPAREN, now at char 48
64 | tokenize - getting one token
65 | tokenize - got EOF, now at char 51
66 |
--------------------------------------------------------------------------------
/src/FsLex.Core/fslexpars.fsy:
--------------------------------------------------------------------------------
1 | %{
2 | (* (c) Microsoft Corporation 2005-2008. *)
3 |
4 | open FsLexYacc.FsLex
5 | open FsLexYacc.FsLex.AST
6 |
7 | %}
8 |
9 | %type spec
10 | %token STRING IDENT
11 | %token CODE
12 | %token CHAR
13 | %token UNICODE_CATEGORY
14 | %token RULE PARSE LET AND LPAREN RPAREN COLON
15 | %token EOF BAR DOT PLUS STAR QMARK EQUALS UNDERSCORE LBRACK RBRACK HAT DASH
16 | %start spec
17 | %left BAR
18 | %left regexp_alt
19 | %left regexp_seq
20 | %nonassoc regexp_opt
21 | %nonassoc regexp_plus regexp_star
22 | %%
23 |
24 | spec:
25 | | codeopt Macros RULE Rules codeopt {
26 | { TopCode=$1;Macros=$2;Rules=$4;BottomCode=$5 }
27 | }
28 |
29 | codeopt:
30 | | CODE { $1 }
31 | | { "", (parseState.ResultRange |> fst) }
32 |
33 | Macros:
34 | | { [] }
35 | | macro Macros {
36 | $1 :: $2
37 | }
38 |
39 | macro:
40 | | LET IDENT EQUALS regexp {
41 | ($2, $4)
42 | }
43 |
44 | Rules:
45 | | rule AND Rules {
46 | $1 :: $3
47 | }
48 | | rule { [$1] }
49 |
50 | rule:
51 | | IDENT args EQUALS PARSE optbar clauses {
52 | ($1,$2,$6)
53 | }
54 |
55 | args:
56 | | { [] }
57 | | LPAREN IDENT COLON IDENT RPAREN args { RuleArgument.Typed($2, $4) :: $6 }
58 | | IDENT args { RuleArgument.Ident($1) :: $2 }
59 |
60 | optbar:
61 | | { }
62 | | BAR { }
63 |
64 | clauses:
65 | | clause BAR clauses {$1 :: $3 }
66 | | clause { [$1] }
67 |
68 | clause:
69 | | regexp CODE { $1, $2 }
70 |
71 | regexp:
72 | | CHAR { Inp(Alphabet(EncodeChar $1))}
73 | | UNICODE_CATEGORY { Inp(UnicodeCategory $1)}
74 | | EOF { Inp(Alphabet(fun ctx -> Eof))}
75 | | UNDERSCORE { Inp Any }
76 | | STRING { Seq([ for n in 0 .. $1.Length - 1 -> Inp(Alphabet(EncodeChar $1.[n]))])}
77 | | IDENT { Macro($1) }
78 | | regexp regexp %prec regexp_seq { Seq[$1;$2] }
79 | | regexp PLUS %prec regexp_plus { Seq[$1;Star $1] }
80 | | regexp STAR %prec regexp_star { Star $1 }
81 | | regexp QMARK %prec regexp_opt { Alt(fun ctx -> [Seq[];$1])}
82 | | regexp BAR regexp %prec regexp_alt { Alt(fun ctx -> [$1;$3])}
83 | | LPAREN regexp RPAREN { $2 }
84 | | LBRACK charset RBRACK { Alt (fun ctx -> [ for c in ($2 ctx) -> Inp(Alphabet(fun ctx -> c)) ])}
85 | | LBRACK HAT charset RBRACK { Inp(NotCharSet(fun ctx -> $3 ctx))}
86 |
87 | charset:
88 | | CHAR { fun ctx -> Set.singleton(EncodeChar $1 ctx)}
89 | | CHAR DASH CHAR { fun ctx -> Set.ofSeq [ for c in $1 .. $3 -> EncodeChar c ctx ]}
90 | | charset charset { fun ctx -> Set.union ($1 ctx) ($2 ctx)}
91 |
92 |
93 |
--------------------------------------------------------------------------------
/RELEASE_NOTES.md:
--------------------------------------------------------------------------------
1 | #### 11.3.0 - Unreleased
2 | * Add Fable support to FsLexYacc.Runtime.
3 |
4 | #### 11.2.0 - 12 May, 2023
5 | * Add `--open` option for fslex.
6 | * Generate signature files for transformed files in fslex.
7 |
8 | #### 11.1.0 - 3 May, 2023
9 | * Add `--buffer-type-argument` option for fsyacc.
10 |
11 | #### 11.0.1 - 10 January, 2022
12 | * Resolve FSharp.Core dependency restriction #168
13 |
14 | #### 11.0.0 - 10 January, 2022
15 | * Migration to net6.0 #166
16 | * Fix Activating case insensitive option crash the lexer generator #141
17 | * Reuse produced reductions table #141
18 |
19 | #### 11.0.0-beta1 - 11 July, 2021
20 | * Break out core domain logic and generation into core libraries #144
21 | * Update FsLexYacc.targets #149
22 | * Avoid copying a string twice in LexBuffer.FromString. #150
23 | * Fix misc packaging issues #145
24 |
25 | #### 10.2.0 - 22 November, 2020
26 | * Enable running tools under .net 5.0
27 |
28 | #### 10.1.0 - 04 October, 2020
29 | * Add caseInsensitive option
30 | * Migration to netcoreapp3.1
31 |
32 | #### 10.0.0 - 24 October, 2019
33 | * Migration to netcoreapp3.0 based versions of FxLex and FsYacc
34 |
35 | #### 9.1.0 - 22 October, 2019
36 | * Make async lexing obsolete
37 | * Restart doc generation (manually)
38 |
39 | #### 9.0.3 - 12 April, 2019
40 | * Don't require FSharp.Core for tools package
41 | * Bootstrap using new package
42 |
43 | #### 9.0.2 - 12 April, 2019
44 | * Bootstrap using new package
45 |
46 | #### 9.0.1 - 12 April, 2019
47 | * Tools now run on .NET Core
48 |
49 | #### 8.0.1 - 21 March, 2019
50 | * Fix recursion problem
51 | * Support netstandard2.0
52 | * Build with dotnet toolchain
53 | * Cleanup runtime code
54 |
55 | #### 7.0.6 - 23 June, 2017
56 | * Add source to build
57 |
58 | #### 7.0.5 - February 1, 2017
59 | * Fix an error preventing the use of verbose mode
60 |
61 | #### 7.0.4 - January 22, 2017
62 | * Fix targets file for OSX
63 |
64 | #### 7.0.3 - November 29, 2016
65 | * Fix targets file when space in path
66 |
67 | #### 7.0.2 - November 5, 2016
68 | * Improve output
69 |
70 | #### 7.0.1 - November 5, 2016
71 | * Fix targets file
72 | * Remove and and just have the user pass them in via
73 |
74 | #### 7.0.0 - November 5, 2016
75 | * Use only profile 259, move to Paket, remove LKG
76 | * Remove the use of a task DLL
77 |
78 | #### 6.1.0 - March 20, 2015
79 | * Adding the package to solution automatically configures targets
80 | * Build system upgraded to MSBuild 4.0
81 | * Fixed Mono/Linux compilation
82 | * New example with a walkthrough
83 |
84 | #### 6.0.4 - September 15, 2014
85 | * Add profiles 7, 259 to runtime
86 |
87 | #### 6.0.3 - June 18 2014
88 | * FsLex/FsYacc output redirected to VS Output window
89 | * FsYacc verbose output added to MSBuild log (and VS Output window)
90 |
91 | #### 6.0.2 - June 16 2014
92 | * Logo was added
93 | * FsLexYacc.Runtime published as a separate NuGet package
94 |
95 | #### 6.0.0 - April 18 2014
96 | * First release of the new packaging of fslex/fsyacc
97 |
--------------------------------------------------------------------------------
/paket.lock:
--------------------------------------------------------------------------------
1 | STORAGE: NONE
2 | RESTRICTION: || (== net6.0) (== netstandard2.0)
3 | NUGET
4 | remote: https://api.nuget.org/v3/index.json
5 | Expecto (9.0.4)
6 | FSharp.Core (>= 4.6)
7 | Mono.Cecil (>= 0.11.3)
8 | Expecto.FsCheck (9.0.4)
9 | Expecto (>= 9.0.4)
10 | FsCheck (>= 2.14.3)
11 | FsCheck (2.16.5)
12 | FSharp.Core (>= 4.2.3)
13 | FSharp.Core (4.6.2)
14 | FsLexYacc (10.2) - copy_local: true
15 | FSharp.Core (>= 4.5.2)
16 | FsLexYacc.Runtime (>= 10.2 < 10.3)
17 | FsLexYacc.Runtime (10.2) - copy_local: true
18 | FSharp.Core (>= 4.5.2)
19 | Microsoft.Build.Tasks.Git (1.1.1) - copy_local: true
20 | Microsoft.CodeCoverage (17.4.1) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= net462)) (&& (== netstandard2.0) (>= netcoreapp3.1))
21 | Microsoft.NET.Test.Sdk (17.4.1)
22 | Microsoft.CodeCoverage (>= 17.4.1) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= net462)) (&& (== netstandard2.0) (>= netcoreapp3.1))
23 | Microsoft.TestPlatform.TestHost (>= 17.4.1) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1))
24 | Microsoft.SourceLink.Common (1.1.1) - copy_local: true
25 | Microsoft.SourceLink.GitHub (1.1.1) - copy_local: true
26 | Microsoft.Build.Tasks.Git (>= 1.1.1)
27 | Microsoft.SourceLink.Common (>= 1.1.1)
28 | Microsoft.TestPlatform.ObjectModel (17.4.1) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1))
29 | NuGet.Frameworks (>= 5.11)
30 | System.Reflection.Metadata (>= 1.6)
31 | Microsoft.TestPlatform.TestHost (17.4.1) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1))
32 | Microsoft.TestPlatform.ObjectModel (>= 17.4.1) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1))
33 | Newtonsoft.Json (>= 13.0.1) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1))
34 | Mono.Cecil (0.11.4)
35 | Newtonsoft.Json (13.0.2) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1))
36 | NuGet.Frameworks (6.4) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1))
37 | System.Collections.Immutable (7.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1))
38 | System.Runtime.CompilerServices.Unsafe (>= 6.0)
39 | System.Reflection.Metadata (7.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1))
40 | System.Collections.Immutable (>= 7.0)
41 | System.Runtime.CompilerServices.Unsafe (6.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= net6.0)) (&& (== netstandard2.0) (>= netcoreapp3.1))
42 | YoloDev.Expecto.TestSdk (0.13.3)
43 | Expecto (>= 9.0 < 10.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1))
44 | FSharp.Core (>= 4.6.2) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1))
45 | System.Collections.Immutable (>= 6.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.1))
46 |
--------------------------------------------------------------------------------
/src/FsLexYacc.Build.Tasks/FsLexYacc.targets:
--------------------------------------------------------------------------------
1 |
14 |
15 |
16 |
17 | CallFsLex;CallFsYacc;$(CompileDependsOn)
18 | $(MSBuildThisFileDirectory)/fslex/net6.0
19 | fslex.dll
20 | $(MSBuildThisFileDirectory)/fsyacc/net6.0
21 | fsyacc.dll
22 | dotnet
23 |
24 |
25 |
26 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 | false
63 |
64 |
65 | false
66 |
67 |
68 |
69 |
70 |
71 |
--------------------------------------------------------------------------------
/tests/fsyacc/main.fs:
--------------------------------------------------------------------------------
1 | open Tree
2 | open System.IO
3 | open Microsoft.FSharp.Quotations
4 | open Microsoft.FSharp.Compatibility.OCaml
5 | open FSharp.Text.Lexing
6 | let tokenize = ref false
7 |
8 | let usage = [ "--tokens", Arg.Set tokenize, "tokenize the first file and exit" ]
9 |
10 | let mutable inputs = []
11 |
12 | Arg.parse usage (fun x -> inputs <- inputs @ [x]) "test... \nTests that all inputs give equivalent syntac trees"
13 |
14 | let createLexBuffer (a:Expr<'a->_>) (x:FileStream) : 'a =
15 | (if typeof<'a> = typeof> then
16 | x |> StreamReader |> LexBuffer<_>.FromTextReader :> obj
17 | elif typeof<'a> = typeof> then
18 | x |> BinaryReader |> LexBuffer<_>.FromBinaryReader :> obj
19 | else
20 | failwith "Pies")
21 | :?> _
22 |
23 | if inputs = [] then
24 | Printf.eprintf "at least one input should be given\n";
25 | try
26 | let results =
27 | inputs
28 | |> List.map
29 | (fun filename ->
30 | use is = File.OpenRead filename
31 | let lexbuf = createLexBuffer <@ TestLexer.token @> is
32 | if !tokenize then
33 | while true do
34 | Printf.eprintf "tokenize - getting one token\n"
35 | let t = TestLexer.token lexbuf
36 | Printf.eprintf "tokenize - got %s, now at char %d\n" (TestParser.token_to_string t) (lexbuf.StartPos).pos_cnum
37 | match t with
38 | | TestParser.EOF -> exit 0
39 | | TestParser.IDENT s ->
40 | for c in s do
41 | Printf.eprintf " ident char = %d\n" (int c)
42 | | _ -> ()
43 | let tree =
44 | try
45 | TestParser.start TestLexer.token lexbuf
46 | with e ->
47 | Printf.eprintf "%s(%d,%d): error: %s\n" filename lexbuf.StartPos.pos_lnum (lexbuf.StartPos.pos_cnum - lexbuf.StartPos.pos_bol) (match e with Failure s -> s | _ -> e.ToString())
48 | exit 1
49 | Printf.eprintf "parsed %s ok\n" filename
50 | (filename,tree)
51 | )
52 | results
53 | |> List.iter
54 | (fun (filename1,tree1) ->
55 | results
56 | |> List.iter
57 | (fun (filename2,tree2) ->
58 | if filename1 > filename2 then
59 | if tree1 <> tree2 then
60 | Printf.eprintf "file %s and file %s parsed to different results!\n" filename1 filename2
61 | let rec ptree os (Node(n,l)) =
62 | Printf.fprintf os "(%s %a)" n ptrees l
63 | and ptrees os l =
64 | match l with
65 | | [] -> ()
66 | | [h] -> ptree os h
67 | | h::t -> Printf.fprintf os "%a %a" ptree h ptrees t
68 | Printf.eprintf "file %s = %a\n" filename1 ptree tree1
69 | Printf.eprintf "file %s = %a\n" filename2 ptree tree2
70 | exit 1
71 | )
72 | )
73 | with e ->
74 | Printf.eprintf "Error: %s\n" (match e with Failure s -> s | e -> e.ToString());
75 | exit 1
76 |
77 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Set default behavior to automatically normalize line endings.
3 | ###############################################################################
4 | * text=auto
5 |
6 | ###############################################################################
7 | # Set default behavior for command prompt diff.
8 | #
9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs diff=csharp
14 |
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln merge=binary
26 | #*.csproj merge=binary
27 | #*.vbproj merge=binary
28 | #*.vcxproj merge=binary
29 | #*.vcproj merge=binary
30 | #*.dbproj merge=binary
31 | #*.fsproj merge=binary
32 | #*.lsproj merge=binary
33 | #*.wixproj merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj merge=binary
36 | #*.wwaproj merge=binary
37 |
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg binary
44 | #*.png binary
45 | #*.gif binary
46 |
47 | ###############################################################################
48 | # diff behavior for common document formats
49 | #
50 | # Convert binary document formats to text before diffing them. This feature
51 | # is only available from the command line. Turn it on by uncommenting the
52 | # entries below.
53 | ###############################################################################
54 | #*.doc diff=astextplain
55 | #*.DOC diff=astextplain
56 | #*.docx diff=astextplain
57 | #*.DOCX diff=astextplain
58 | #*.dot diff=astextplain
59 | #*.DOT diff=astextplain
60 | #*.pdf diff=astextplain
61 | #*.PDF diff=astextplain
62 | #*.rtf diff=astextplain
63 | #*.RTF diff=astextplain
64 |
65 | *.sh text eol=lf
66 |
67 | ###############################################################################
68 | # diff behavior for input/output/baseline files for Old FsLexYacc tests
69 | ###############################################################################
70 | *.input1 eol=crlf
71 | *.badInput eol=crlf
72 | *.variation1 eol=crlf
73 | *.variation2 eol=crlf
74 | *.bsl eol=crlf
75 | *.utf8 eol=crlf
76 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # User-specific files
2 | *.suo
3 | *.user
4 | *.sln.docstates
5 |
6 | # Xamarin Studio / monodevelop user-specific
7 | *.userprefs
8 |
9 | # Build results
10 |
11 | [Dd]ebug/
12 | [Rr]elease/
13 | x64/
14 | build/
15 | [Bb]in/
16 | [Oo]bj/
17 |
18 | # Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets
19 | !packages/*/build/
20 |
21 | # MSTest test Results
22 | [Tt]est[Rr]esult*/
23 | [Bb]uild[Ll]og.*
24 |
25 | *_i.c
26 | *_p.c
27 | *.ilk
28 | *.meta
29 | *.obj
30 | *.pch
31 | *.pdb
32 | *.pgc
33 | *.pgd
34 | *.rsp
35 | *.sbr
36 | *.tlb
37 | *.tli
38 | *.tlh
39 | *.tmp
40 | *.tmp_proj
41 | *.log
42 | *.vspscc
43 | *.vssscc
44 | .builds
45 | *.pidb
46 | *.log
47 | *.scc
48 |
49 |
50 | # Visual C++ cache files
51 | ipch/
52 | *.aps
53 | *.ncb
54 | *.opensdf
55 | *.sdf
56 | *.cachefile
57 |
58 | # Visual Studio profiler
59 | *.psess
60 | *.vsp
61 | *.vspx
62 |
63 | # Guidance Automation Toolkit
64 | *.gpState
65 |
66 | # ReSharper is a .NET coding add-in
67 | _ReSharper*/
68 | *.[Rr]e[Ss]harper
69 |
70 | # TeamCity is a build add-in
71 | _TeamCity*
72 |
73 | # DotCover is a Code Coverage Tool
74 | *.dotCover
75 |
76 | # NCrunch
77 | *.ncrunch*
78 | .*crunch*.local.xml
79 |
80 | # Installshield output folder
81 | [Ee]xpress/
82 |
83 | # DocProject is a documentation generator add-in
84 | DocProject/buildhelp/
85 | DocProject/Help/*.HxT
86 | DocProject/Help/*.HxC
87 | DocProject/Help/*.hhc
88 | DocProject/Help/*.hhk
89 | DocProject/Help/*.hhp
90 | DocProject/Help/Html2
91 | DocProject/Help/html
92 |
93 | # Click-Once directory
94 | publish/
95 |
96 | # Publish Web Output
97 | *.Publish.xml
98 |
99 | # Enable nuget.exe in the .nuget folder (though normally executables are not tracked)
100 | !.nuget/NuGet.exe
101 |
102 | # Windows Azure Build Output
103 | csx
104 | *.build.csdef
105 |
106 | # Windows Store app package directory
107 | AppPackages/
108 |
109 | # Others
110 | sql/
111 | *.Cache
112 | ClientBin/
113 | [Ss]tyle[Cc]op.*
114 | ~$*
115 | *~
116 | *.dbmdl
117 | *.[Pp]ublish.xml
118 | *.pfx
119 | *.publishsettings
120 |
121 | # RIA/Silverlight projects
122 | Generated_Code/
123 |
124 | # Backup & report files from converting an old project file to a newer
125 | # Visual Studio version. Backup files are not needed, because we have git ;-)
126 | _UpgradeReport_Files/
127 | Backup*/
128 | UpgradeLog*.XML
129 | UpgradeLog*.htm
130 |
131 | # SQL Server files
132 | App_Data/*.mdf
133 | App_Data/*.ldf
134 |
135 |
136 | #LightSwitch generated files
137 | GeneratedArtifacts/
138 | _Pvt_Extensions/
139 | ModelManifest.xml
140 |
141 | # =========================
142 | # Windows detritus
143 | # =========================
144 |
145 | # Windows image file caches
146 | Thumbs.db
147 | ehthumbs.db
148 |
149 | # Folder config file
150 | Desktop.ini
151 |
152 | # Recycle Bin used on file shares
153 | $RECYCLE.BIN/
154 |
155 | # Mac desktop service store files
156 | .DS_Store
157 |
158 | # ===================================================
159 | # Exclude F# project specific directories and files
160 | # ===================================================
161 |
162 | # NuGet Packages Directory
163 | packages/
164 |
165 | # Generated documentation folder
166 | output/
167 | .fsdocs/
168 | tmp/
169 |
170 | # Temp folder used for publishing docs
171 | temp/
172 |
173 | # Test results produced by build
174 | TestResults.xml
175 |
176 | # Nuget outputs
177 | nuget/*.nupkg
178 |
179 | # FAKE build
180 | .fake/
181 |
182 | # IDEs
183 | .vs/
184 | .idea/
185 | .ionide/
186 |
187 | # ===================================================
188 | # Exclude generated lexer/parser source files
189 | # ===================================================
190 | src/FsLex/fslexlex.fs
191 | src/FsLex/fslexpars.fs
192 | src/FsLex/fslexpars.fsi
193 | src/FsYacc/fsyacclex.fs
194 | src/FsYacc/fsyaccpars.fs
195 | src/FsYacc/fsyaccpars.fsi
196 | tests/fsyacc/test1-unicode.exe.config
197 | tests/fsyacc/test1.exe.config
198 | tests/fsyacc/test1.input1.bsl.err
199 | tests/fsyacc/test1compat.exe.config
200 | tests/fsyacc/test2.exe.config
201 | tests/fsyacc/test2compat.exe.config
202 | tests/fsyacc/Test1/test1.fs
203 | tests/fsyacc/Test1/test1.fsi
204 | tests/fsyacc/Test1/test1lex.fs
205 |
206 | .paket
207 | .idea/
208 | .vs/
209 | .ionide/
210 | tests/fsyacc/unicode/test1-unicode-lex.fs
211 | tests/fsyacc/unicode/test1-unicode.fs
212 | tests/fsyacc/unicode/test1-unicode.fsi
213 | tests/fsyacc/unicode/test1-unicode.input3.tokens.bsl.err
214 | tests/fsyacc/Test2/test2lex.fs
215 | tests/fsyacc/Test2/test2.fs
216 | tests/fsyacc/Test2/test2.fsi
217 | tests/fsyacc/repro_#141/Lexer_fail_option_i.fs
218 |
--------------------------------------------------------------------------------
/src/FsLex/fslex.fs:
--------------------------------------------------------------------------------
1 | // (c) Microsoft Corporation 2005-2009.
2 |
3 | module FsLexYacc.FsLex.Program
4 |
5 | open FsLexYacc.FsLex.AST
6 | open FsLexYacc.FsLex.Driver
7 | open Printf
8 | open FSharp.Text
9 | open System.IO
10 |
11 | //------------------------------------------------------------------
12 | // This is the program proper
13 |
14 | let mutable input = None
15 | let mutable out = None
16 | let mutable inputCodePage = None
17 | let mutable light = None
18 | let mutable modname = None
19 | let mutable internal_module = false
20 | let mutable opens = []
21 | let mutable lexlib = "FSharp.Text.Lexing"
22 | let mutable unicode = false
23 | let mutable caseInsensitive = false
24 |
25 | let usage =
26 | [
27 | ArgInfo("-o", ArgType.String(fun s -> out <- Some s), "Name the output file.")
28 | ArgInfo("--module", ArgType.String(fun s -> modname <- Some s), "Define the F# module name to host the generated parser.")
29 | ArgInfo("--internal", ArgType.Unit(fun () -> internal_module <- true), "Generate an internal module")
30 | ArgInfo(
31 | "--open",
32 | ArgType.String(fun s -> opens <- opens @ [ s ]),
33 | "Add the given module to the list of those to open in both the generated signature and implementation."
34 | )
35 | ArgInfo(
36 | "--codepage",
37 | ArgType.Int(fun i -> inputCodePage <- Some i),
38 | "Assume input lexer specification file is encoded with the given codepage."
39 | )
40 | ArgInfo("--light", ArgType.Unit(fun () -> light <- Some true), "(ignored)")
41 | ArgInfo("--light-off", ArgType.Unit(fun () -> light <- Some false), "Add #light \"off\" to the top of the generated file")
42 | ArgInfo(
43 | "--lexlib",
44 | ArgType.String(fun s -> lexlib <- s),
45 | "Specify the namespace for the implementation of the lexer table interpreter (default FSharp.Text.Lexing)"
46 | )
47 | ArgInfo("--unicode", ArgType.Unit(fun () -> unicode <- true), "Produce a lexer for use with 16-bit unicode characters.")
48 | ArgInfo("-i", ArgType.Unit(fun () -> caseInsensitive <- true), "Produce a case-insensitive lexer.")
49 | ]
50 |
51 | let _ =
52 | ArgParser.Parse(
53 | usage,
54 | (fun x ->
55 | match input with
56 | | Some _ -> failwith "more than one input given"
57 | | None -> input <- Some x),
58 | "fslex "
59 | )
60 |
61 | let compileSpec (spec: Spec) (ctx: ParseContext) =
62 | let perRuleData, dfaNodes = Compile ctx spec
63 | let dfaNodes = dfaNodes |> List.sortBy (fun n -> n.Id)
64 | perRuleData, dfaNodes
65 |
66 | let main () =
67 | try
68 | let filename =
69 | (match input with
70 | | Some x -> x
71 | | None -> failwith "no input given")
72 |
73 | let parseContext =
74 | {
75 | unicode = unicode
76 | caseInsensitive = caseInsensitive
77 | }
78 |
79 | let spec =
80 | match readSpecFromFile filename inputCodePage with
81 | | Ok spec -> spec
82 | | Error(e, line, column) ->
83 | eprintf
84 | "%s(%d,%d): error: %s"
85 | filename
86 | line
87 | column
88 | (match e with
89 | | Failure s -> s
90 | | _ -> e.Message)
91 |
92 | exit 1
93 |
94 | printfn "compiling to dfas (can take a while...)"
95 | let perRuleData, dfaNodes = compileSpec spec parseContext
96 | printfn "%d states" dfaNodes.Length
97 |
98 | printfn "writing output"
99 |
100 | let output =
101 | match out with
102 | | Some x -> x
103 | | _ -> Path.ChangeExtension(filename, ".fs")
104 |
105 | let state: GeneratorState =
106 | {
107 | inputFileName = filename
108 | outputFileName = output
109 | inputCodePage =
110 | inputCodePage
111 | |> Option.map System.Text.Encoding.GetEncoding
112 | |> Option.defaultValue System.Text.Encoding.UTF8
113 | generatedModuleName = modname
114 | disableLightMode = light
115 | generateInternalModule = internal_module
116 | opens = opens
117 | lexerLibraryName = lexlib
118 | domain = if unicode then Unicode else ASCII
119 | }
120 |
121 | writeSpecToFile state spec perRuleData dfaNodes
122 |
123 | with e ->
124 | eprintf
125 | "FSLEX: error FSL000: %s"
126 | (match e with
127 | | Failure s -> s
128 | | e -> e.ToString())
129 |
130 | exit 1
131 |
132 | let result = main ()
133 |
--------------------------------------------------------------------------------
/src/FsLexYacc.Runtime/Parsing.fsi:
--------------------------------------------------------------------------------
1 | //==========================================================================
2 | // (c) Microsoft Corporation 2005-2009.
3 | //=========================================================================
4 |
5 | namespace FSharp.Text.Parsing
6 |
7 | open FSharp.Text.Lexing
8 |
9 | open System.Collections.Generic
10 |
11 | /// The information accessible via the parseState value within parser actions.
12 | type IParseState =
13 | /// Get the start and end position for the terminal or non-terminal at a given index matched by the production
14 | abstract InputRange: index: int -> Position * Position
15 |
16 | /// Get the end position for the terminal or non-terminal at a given index matched by the production
17 | abstract InputEndPosition: int -> Position
18 |
19 | /// Get the start position for the terminal or non-terminal at a given index matched by the production
20 | abstract InputStartPosition: int -> Position
21 |
22 | /// Get the full range of positions matched by the production
23 | abstract ResultRange: Position * Position
24 |
25 | /// Get the value produced by the terminal or non-terminal at the given position
26 | abstract GetInput: int -> obj
27 |
28 | /// Get the store of local values associated with this parser
29 | // Dynamically typed, non-lexically scoped local store
30 | abstract ParserLocalStore: IDictionary
31 |
32 | /// Raise an error in this parse context
33 | abstract RaiseError<'b> : unit -> 'b
34 |
35 | /// The context provided when a parse error occurs
36 | []
37 | type ParseErrorContext<'tok> =
38 | /// The stack of state indexes active at the parse error
39 | member StateStack: int list
40 |
41 | /// The state active at the parse error
42 | member ParseState: IParseState
43 |
44 | /// The tokens that would cause a reduction at the parse error
45 | member ReduceTokens: int list
46 |
47 | /// The stack of productions that would be reduced at the parse error
48 | member ReducibleProductions: int list list
49 |
50 | /// The token that caused the parse error
51 | member CurrentToken: 'tok option
52 |
53 | /// The token that would cause a shift at the parse error
54 | member ShiftTokens: int list
55 |
56 | /// The message associated with the parse error
57 | member Message: string
58 |
59 | /// Tables generated by fsyacc
60 | /// The type of the tables contained in a file produced by the fsyacc.exe parser generator.
61 | type Tables<'tok> =
62 | {
63 | /// The reduction table
64 | reductions: (IParseState -> obj) array
65 |
66 | /// The token number indicating the end of input
67 | endOfInputTag: int
68 |
69 | /// A function to compute the tag of a token
70 | tagOfToken: 'tok -> int
71 |
72 | /// A function to compute the data carried by a token
73 | dataOfToken: 'tok -> obj
74 |
75 | /// The sparse action table elements
76 | actionTableElements: uint16[]
77 |
78 | /// The sparse action table row offsets
79 | actionTableRowOffsets: uint16[]
80 |
81 | /// The number of symbols for each reduction
82 | reductionSymbolCounts: uint16[]
83 |
84 | /// The immediate action table
85 | immediateActions: uint16[]
86 |
87 | /// The sparse goto table
88 | gotos: uint16[]
89 |
90 | /// The sparse goto table row offsets
91 | sparseGotoTableRowOffsets: uint16[]
92 |
93 | /// The sparse table for the productions active for each state
94 | stateToProdIdxsTableElements: uint16[]
95 |
96 | /// The sparse table offsets for the productions active for each state
97 | stateToProdIdxsTableRowOffsets: uint16[]
98 |
99 | /// This table is logically part of the Goto table
100 | productionToNonTerminalTable: uint16[]
101 |
102 | /// This function is used to hold the user specified "parse_error" or "parse_error_rich" functions
103 | parseError: ParseErrorContext<'tok> -> unit
104 |
105 | /// The total number of terminals
106 | numTerminals: int
107 |
108 | /// The tag of the error terminal
109 | tagOfErrorTerminal: int
110 | }
111 |
112 | /// Interpret the parser table taking input from the given lexer, using the given lex buffer, and the given start state.
113 | /// Returns an object indicating the final synthesized value for the parse.
114 | member Interpret: lexer: (LexBuffer<'char> -> 'tok) * lexbuf: LexBuffer<'char> * startState: int -> obj
115 |
116 | /// Indicates an accept action has occured
117 | exception Accept of obj
118 | /// Indicates a parse error has occured and parse recovery is in progress
119 | exception RecoverableParseError
120 |
121 | #if __DEBUG
122 | module internal Flags =
123 | val mutable debug: bool
124 | #endif
125 |
126 | /// Helpers used by generated parsers.
127 | module ParseHelpers =
128 | /// The default implementation of the parse_error_rich function
129 | val parse_error_rich: (ParseErrorContext<'tok> -> unit) option
130 |
131 | /// The default implementation of the parse_error function
132 | val parse_error: string -> unit
133 |
--------------------------------------------------------------------------------
/src/FsYacc.Core/fsyacclex.fsl:
--------------------------------------------------------------------------------
1 | {
2 | (* (c) Microsoft Corporation 2005-2008. *)
3 |
4 | module FsLexYacc.FsYacc.Lexer
5 |
6 | open FsLexYacc.FsYacc.AST
7 | open FsLexYacc.FsYacc.Parser
8 | open System.Text
9 | open FSharp.Text.Lexing
10 |
11 | let lexeme (lexbuf : LexBuffer) = new System.String(lexbuf.Lexeme)
12 | let newline (lexbuf:LexBuffer<_>) = lexbuf.EndPos <- lexbuf.EndPos.NextLine
13 |
14 | let unexpected_char lexbuf =
15 | failwith ("Unexpected character '"+(lexeme lexbuf)+"'")
16 |
17 | let typeDepth = ref 0
18 | let startPos = ref Position.Empty
19 | let mutable str_buf = new System.Text.StringBuilder()
20 |
21 | let appendBuf (str:string) = str_buf.Append str |> ignore
22 | let clearBuf () = str_buf <- new System.Text.StringBuilder()
23 |
24 | }
25 |
26 | let letter = ['A'-'Z'] | ['a'-'z']
27 | let digit = ['0'-'9']
28 | let whitespace = [' ' '\t']
29 | let newline = ('\n' | '\r' '\n')
30 | let ident_start_char = letter
31 | let ident_char = ( ident_start_char| digit | ['\'' '_'] )
32 | let ident = ident_start_char ident_char*
33 |
34 | rule token = parse
35 | | "%{" { let p = lexbuf.StartPos in header p (new StringBuilder 100) lexbuf }
36 | | "%%" { PERCENT_PERCENT }
37 | | "%token" (whitespace* '<') { typeDepth.Value <- 1; startPos.Value <- lexbuf.StartPos; clearBuf(); TOKEN (fs_type lexbuf) }
38 | | "%token" { TOKEN (None) }
39 | | "%start"{ START }
40 | | "%prec"{ PREC }
41 | | "%type" (whitespace* '<') { typeDepth.Value <- 1; startPos.Value <- lexbuf.StartPos; clearBuf(); TYPE (match fs_type lexbuf with Some x -> x | None -> failwith "gettype") }
42 | | "%left" { LEFT }
43 | | "%right" { RIGHT }
44 | | "%nonassoc" { NONASSOC }
45 | | "error" { ERROR }
46 | | '<' { LESS }
47 | | '>' { GREATER }
48 | | ';' { SEMI }
49 | | '{' { let p = lexbuf.StartPos in
50 | let buff = (new StringBuilder 100) in
51 | // adjust the first line to get even indentation for all lines w.r.t. the left hand margin
52 | buff.Append (String.replicate (lexbuf.StartPos.Column+1) " ") |> ignore;
53 | code p buff lexbuf }
54 | | whitespace+ { token lexbuf }
55 | | newline { newline lexbuf; token lexbuf }
56 | | ident_start_char ident_char* { IDENT (lexeme lexbuf) }
57 | | '|' { BAR }
58 | | "/*" { ignore(comment lexbuf); token lexbuf }
59 | | "//" [^'\n''\r']* { token lexbuf }
60 | | ':' { COLON }
61 | | _ { unexpected_char lexbuf }
62 | | eof { EOF }
63 |
64 | and fs_type = parse
65 | | '<' { typeDepth.Value <- typeDepth.Value + 1; appendBuf(lexeme lexbuf); fs_type lexbuf}
66 | | '>'
67 | { typeDepth.Value <- typeDepth.Value - 1;
68 | if typeDepth.Value = 0
69 | then Some(string str_buf)
70 | else appendBuf(lexeme lexbuf); fs_type lexbuf }
71 | | _ { appendBuf(lexeme lexbuf); fs_type lexbuf }
72 |
73 | and header p buff = parse
74 | | "%}" { HEADER (buff.ToString(), p) }
75 | | newline { newline lexbuf;
76 | ignore <| buff.Append System.Environment.NewLine;
77 | header p buff lexbuf }
78 | | (whitespace | letter | digit) +
79 | { ignore <| buff.Append (lexeme lexbuf);
80 | header p buff lexbuf }
81 | | "//" [^'\n''\r']*
82 | { ignore <| buff.Append (lexeme lexbuf);
83 | header p buff lexbuf }
84 | | "'\"'" | "'\\\"'"
85 | { ignore <| buff.Append (lexeme lexbuf);
86 | header p buff lexbuf }
87 | | "\""
88 | { ignore <| buff.Append (lexeme lexbuf);
89 | ignore(codestring buff lexbuf);
90 | header p buff lexbuf }
91 | | eof { EOF }
92 | | _ { ignore <| buff.Append(lexeme lexbuf).[0];
93 | header p buff lexbuf }
94 | and code p buff = parse
95 | | "}" { CODE (buff.ToString(), p) }
96 | | "{" { ignore <| buff.Append (lexeme lexbuf);
97 | ignore(code p buff lexbuf);
98 | ignore <| buff.Append "}";
99 | code p buff lexbuf }
100 | | newline { newline lexbuf;
101 | ignore <| buff.Append System.Environment.NewLine;
102 | code p buff lexbuf }
103 | | "'\"'" | "'\\\"'"
104 | { ignore <| buff.Append (lexeme lexbuf);
105 | code p buff lexbuf }
106 | | "\"" { ignore <| buff.Append (lexeme lexbuf);
107 | ignore(codestring buff lexbuf);
108 | code p buff lexbuf }
109 | | (whitespace | letter | digit) +
110 | { ignore <| buff.Append (lexeme lexbuf);
111 | code p buff lexbuf }
112 | | "//" [^'\n''\r']*
113 | { ignore <| buff.Append (lexeme lexbuf);
114 | code p buff lexbuf }
115 | | eof { EOF }
116 | | _ { ignore <| buff.Append(lexeme lexbuf).[0];
117 | code p buff lexbuf }
118 |
119 |
120 | and codestring buff = parse
121 | | '\\' ('"' | '\\')
122 | { ignore <| buff.Append (lexeme lexbuf);
123 | codestring buff lexbuf }
124 | | '"' { ignore <| buff.Append (lexeme lexbuf);
125 | buff.ToString() }
126 | | newline { newline lexbuf;
127 | ignore <| buff.Append System.Environment.NewLine;
128 | codestring buff lexbuf }
129 | | (whitespace | letter | digit) +
130 | { ignore <| buff.Append (lexeme lexbuf);
131 | codestring buff lexbuf }
132 | | eof { failwith "unterminated string in code" }
133 | | _ { ignore <| buff.Append(lexeme lexbuf).[0];
134 | codestring buff lexbuf }
135 |
136 |
137 | and comment = parse
138 | | "/*" { ignore(comment lexbuf); comment lexbuf }
139 | | newline { newline lexbuf; comment lexbuf }
140 | | "*/" { () }
141 | | eof { failwith "end of file in comment" }
142 | | [^ '/' '*' '\n' '\r' '"' '/' ]+ { comment lexbuf }
143 | | _ { comment lexbuf }
144 |
145 |
--------------------------------------------------------------------------------
/src/FsYacc/fsyacc.fs:
--------------------------------------------------------------------------------
1 | (* (c) Microsoft Corporation 2005-2008. *)
2 |
3 | module FsLexYacc.FsYacc.Program
4 |
5 | open Printf
6 | open FSharp.Text
7 | open FsLexYacc.FsYacc.AST
8 | open FsLexYacc.FsYacc.Driver
9 |
10 | //------------------------------------------------------------------
11 | // This is the program proper
12 |
13 | let mutable input = None
14 | let mutable modname = None
15 | let mutable internal_module = false
16 | let mutable opens = []
17 | let mutable out = None
18 | let mutable tokenize = false
19 | let mutable compat = false
20 | let mutable log = false
21 | let mutable light = None
22 | let mutable inputCodePage = None
23 | let mutable lexlib = "FSharp.Text.Lexing"
24 | let mutable parslib = "FSharp.Text.Parsing"
25 | let mutable bufferTypeArgument = "'cty"
26 |
27 | let usage =
28 | [
29 | ArgInfo("-o", ArgType.String(fun s -> out <- Some s), "Name the output file.")
30 | ArgInfo("-v", ArgType.Unit(fun () -> log <- true), "Produce a listing file.")
31 | ArgInfo("--module", ArgType.String(fun s -> modname <- Some s), "Define the F# module name to host the generated parser.")
32 | ArgInfo("--internal", ArgType.Unit(fun () -> internal_module <- true), "Generate an internal module")
33 | ArgInfo(
34 | "--open",
35 | ArgType.String(fun s -> opens <- opens @ [ s ]),
36 | "Add the given module to the list of those to open in both the generated signature and implementation."
37 | )
38 | ArgInfo("--light", ArgType.Unit(fun () -> light <- Some true), "(ignored)")
39 | ArgInfo("--light-off", ArgType.Unit(fun () -> light <- Some false), "Add #light \"off\" to the top of the generated file")
40 | ArgInfo(
41 | "--ml-compatibility",
42 | ArgType.Unit(fun _ -> compat <- true),
43 | "Support the use of the global state from the 'Parsing' module in FSharp.PowerPack.dll."
44 | )
45 | ArgInfo("--tokens", ArgType.Unit(fun _ -> tokenize <- true), "Simply tokenize the specification file itself.")
46 | ArgInfo(
47 | "--lexlib",
48 | ArgType.String(fun s -> lexlib <- s),
49 | "Specify the namespace for the implementation of the lexer (default: FSharp.Text.Lexing)"
50 | )
51 | ArgInfo(
52 | "--parslib",
53 | ArgType.String(fun s -> parslib <- s),
54 | "Specify the namespace for the implementation of the parser table interpreter (default: FSharp.Text.Parsing)"
55 | )
56 | ArgInfo(
57 | "--codepage",
58 | ArgType.Int(fun i -> inputCodePage <- Some i),
59 | "Assume input lexer specification file is encoded with the given codepage."
60 | )
61 | ArgInfo("--buffer-type-argument", ArgType.String(fun s -> bufferTypeArgument <- s), "Generic type argument of the LexBuffer type.")
62 | ]
63 |
64 | let _ =
65 | ArgParser.Parse(
66 | usage,
67 | (fun x ->
68 | match input with
69 | | Some _ -> failwith "more than one input given"
70 | | None -> input <- Some x),
71 | "fsyacc "
72 | )
73 |
74 | let main () =
75 | let filename =
76 | (match input with
77 | | Some x -> x
78 | | None -> failwith "no input given") in
79 |
80 | if tokenize then
81 | printTokens filename inputCodePage
82 |
83 | let spec =
84 | match readSpecFromFile filename inputCodePage with
85 | | Ok spec -> spec
86 | | Result.Error(e, line, col) ->
87 | eprintf "%s(%d,%d): error: %s" filename line col e.Message
88 | exit 1
89 |
90 | use logger =
91 | match logFileName (filename, out, log) with
92 | | Some outputLogName -> new FileLogger(outputLogName) :> Logger
93 | | None -> new NullLogger() :> Logger
94 |
95 | let compiledSpec = compileSpec spec logger
96 | printfn " building tables"
97 | printfn " %d states" compiledSpec.states.Length
98 | printfn " %d nonterminals" compiledSpec.gotoTable.[0].Length
99 | printfn " %d terminals" compiledSpec.actionTable.[0].Length
100 | printfn " %d productions" compiledSpec.prods.Length
101 | printfn " #rows in action table: %d" compiledSpec.actionTable.Length
102 | (*
103 | printfn "#unique rows in action table: %d" (List.length (Array.foldBack (fun row acc -> insert (Array.to_list row) acc) actionTable []));
104 | printfn "maximum #different actions per state: %d" (Array.foldBack (fun row acc ->max (List.length (List.foldBack insert (Array.to_list row) [])) acc) actionTable 0);
105 | printfn "average #different actions per state: %d" ((Array.foldBack (fun row acc -> (List.length (List.foldBack insert (Array.to_list row) [])) + acc) actionTable 0) / (Array.length states));
106 | *)
107 |
108 | let generatorState: GeneratorState =
109 | { GeneratorState.Default with
110 | input = filename
111 | output = out
112 | logger = logger
113 | light = light
114 | modname = modname
115 | internal_module = internal_module
116 | opens = opens
117 | lexlib = lexlib
118 | parslib = parslib
119 | compat = compat
120 | bufferTypeArgument = bufferTypeArgument
121 | }
122 |
123 | writeSpecToFile generatorState spec compiledSpec
124 |
125 | let result =
126 | try
127 | main ()
128 | with e ->
129 | eprintf
130 | "FSYACC: error FSY000: %s\n%s"
131 | (match e with
132 | | Failure s -> s
133 | | e -> e.Message)
134 | e.StackTrace
135 |
136 | exit 1
137 |
--------------------------------------------------------------------------------
/tests/fsyacc/unicode/test1-unicode.WithTitleCaseLetter.tokens.error.bsl:
--------------------------------------------------------------------------------
1 | tokenize - getting one token
2 | tokenize - got IDENT, now at char 2
3 | ident char = 110
4 | ident char = 101
5 | ident char = 120
6 | ident char = 116
7 | tokenize - getting one token
8 | tokenize - got IDENT, now at char 7
9 | ident char = 108
10 | ident char = 105
11 | ident char = 110
12 | ident char = 101
13 | tokenize - getting one token
14 | tokenize - got IDENT, now at char 12
15 | ident char = 116
16 | ident char = 101
17 | ident char = 115
18 | ident char = 116
19 | ident char = 115
20 | tokenize - getting one token
21 | tokenize - got IDENT, now at char 18
22 | ident char = 111
23 | ident char = 110
24 | ident char = 101
25 | tokenize - getting one token
26 | tokenize - got IDENT, now at char 22
27 | ident char = 117
28 | ident char = 110
29 | ident char = 105
30 | ident char = 99
31 | ident char = 111
32 | ident char = 100
33 | ident char = 101
34 | tokenize - getting one token
35 | tokenize - got IDENT, now at char 30
36 | ident char = 99
37 | ident char = 104
38 | ident char = 97
39 | ident char = 114
40 | ident char = 97
41 | ident char = 99
42 | ident char = 116
43 | ident char = 101
44 | ident char = 114
45 | tokenize - getting one token
46 | tokenize - got IDENT, now at char 40
47 | ident char = 99
48 | ident char = 108
49 | ident char = 97
50 | ident char = 115
51 | ident char = 115
52 | tokenize - getting one token
53 | tokenize - got IDENT, now at char 47
54 | ident char = 196
55 | ident char = 203
56 | ident char = 214
57 | ident char = 207
58 | ident char = 220
59 | ident char = 226
60 | ident char = 230
61 | ident char = 231
62 | ident char = 241
63 | ident char = 245
64 | ident char = 246
65 | tokenize - getting one token
66 | tokenize - got PLUS, now at char 59
67 | tokenize - getting one token
68 | tokenize - got IDENT, now at char 61
69 | ident char = 105
70 | ident char = 100
71 | tokenize - getting one token
72 | tokenize - got IDENT, now at char 65
73 | ident char = 110
74 | ident char = 101
75 | ident char = 120
76 | ident char = 116
77 | tokenize - getting one token
78 | tokenize - got IDENT, now at char 70
79 | ident char = 108
80 | ident char = 105
81 | ident char = 110
82 | ident char = 101
83 | tokenize - getting one token
84 | tokenize - got IDENT, now at char 75
85 | ident char = 116
86 | ident char = 101
87 | ident char = 115
88 | ident char = 116
89 | ident char = 115
90 | tokenize - getting one token
91 | tokenize - got IDENT, now at char 81
92 | ident char = 115
93 | ident char = 112
94 | ident char = 101
95 | ident char = 99
96 | ident char = 105
97 | ident char = 102
98 | ident char = 105
99 | ident char = 99
100 | tokenize - getting one token
101 | tokenize - got IDENT, now at char 90
102 | ident char = 117
103 | ident char = 110
104 | ident char = 105
105 | ident char = 99
106 | ident char = 111
107 | ident char = 100
108 | ident char = 101
109 | tokenize - getting one token
110 | tokenize - got IDENT, now at char 98
111 | ident char = 99
112 | ident char = 104
113 | ident char = 97
114 | ident char = 114
115 | ident char = 97
116 | ident char = 99
117 | ident char = 116
118 | ident char = 101
119 | ident char = 114
120 | ident char = 115
121 | tokenize - getting one token
122 | tokenize - got IDENT, now at char 110
123 | ident char = 8800
124 | tokenize - getting one token
125 | tokenize - got IDENT, now at char 112
126 | ident char = 8800
127 | ident char = 8800
128 | tokenize - getting one token
129 | tokenize - got IDENT, now at char 115
130 | ident char = 8776
131 | ident char = 8776
132 | tokenize - getting one token
133 | tokenize - got IDENT, now at char 118
134 | ident char = 8776
135 | ident char = 8776
136 | ident char = 8776
137 | tokenize - getting one token
138 | tokenize - got IDENT, now at char 123
139 | ident char = 105
140 | ident char = 100
141 | tokenize - getting one token
142 | tokenize - got PLUS, now at char 126
143 | tokenize - getting one token
144 | tokenize - got IDENT, now at char 128
145 | ident char = 105
146 | ident char = 100
147 | tokenize - getting one token
148 | tokenize - got IDENT, now at char 132
149 | ident char = 110
150 | ident char = 101
151 | ident char = 120
152 | ident char = 116
153 | tokenize - getting one token
154 | tokenize - got IDENT, now at char 137
155 | ident char = 108
156 | ident char = 105
157 | ident char = 110
158 | ident char = 101
159 | tokenize - getting one token
160 | tokenize - got IDENT, now at char 142
161 | ident char = 116
162 | ident char = 101
163 | ident char = 115
164 | ident char = 116
165 | ident char = 115
166 | tokenize - getting one token
167 | tokenize - got IDENT, now at char 148
168 | ident char = 115
169 | ident char = 111
170 | ident char = 109
171 | ident char = 101
172 | tokenize - getting one token
173 | tokenize - got IDENT, now at char 153
174 | ident char = 109
175 | ident char = 111
176 | ident char = 114
177 | ident char = 101
178 | tokenize - getting one token
179 | tokenize - got IDENT, now at char 158
180 | ident char = 114
181 | ident char = 97
182 | ident char = 110
183 | ident char = 100
184 | ident char = 111
185 | ident char = 109
186 | tokenize - getting one token
187 | tokenize - got IDENT, now at char 165
188 | ident char = 117
189 | ident char = 110
190 | ident char = 105
191 | ident char = 99
192 | ident char = 111
193 | ident char = 100
194 | ident char = 101
195 | tokenize - getting one token
196 | tokenize - got IDENT, now at char 173
197 | ident char = 99
198 | ident char = 104
199 | ident char = 97
200 | ident char = 114
201 | ident char = 97
202 | ident char = 99
203 | ident char = 116
204 | ident char = 101
205 | ident char = 114
206 | ident char = 115
207 | tokenize - getting one token
208 | Error: unrecognized input
209 |
--------------------------------------------------------------------------------
/docs/content/fsyacc.md:
--------------------------------------------------------------------------------
1 | FsYacc Overview
2 | ========
3 |
4 | `fsyacc` is a `LALR` parser generator. It follows a similar specification to the `OCamlYacc` parser generator (especially when used with the `ml compatibility` switch)
5 |
6 | Getting Started
7 | ---------------
8 |
9 | Build the tool by cloning this project and running /build.sh or build.cmd
10 |
11 | Add a reference in your project to `FsLexYacc` package via Nuget or paket.
12 |
13 | You can run the parser generator directly:
14 | dotnet fsyacc.dll inputFile -o outputFile
15 |
16 |
17 |
18 | Sample input
19 | ------------
20 |
21 | Parser generators typically produce numbers represented by values in an F# Union Type. For example:
22 |
23 | type Expr =
24 | | Val of string
25 | | Int of int
26 | | Float of float
27 | | Decr of Expr
28 |
29 |
30 | type Stmt =
31 | | Assign of string * Expr
32 | | While of Expr * Stmt
33 | | Seq of Stmt list
34 | | IfThen of Expr * Stmt
35 | | IfThenElse of Expr * Stmt * Stmt
36 | | Print of Expr
37 |
38 |
39 | type Prog = Prog of Stmt list
40 |
41 | Given that, a typical parser specification is as follows:
42 |
43 | %{
44 | open Ast
45 | %}
46 |
47 | %start start
48 | %token ID
49 | %token INT
50 | %token FLOAT
51 | %token DECR LPAREN RPAREN WHILE DO END BEGIN IF THEN ELSE PRINT SEMI ASSIGN EOF
52 | %type < Ast.Prog > start
53 |
54 |
55 | %%
56 |
57 |
58 | start: Prog { $1 }
59 |
60 |
61 | Prog: StmtList { Prog(List.rev($1)) }
62 |
63 |
64 | Expr: ID { Val($1) }
65 | | INT { Int($1) }
66 | | FLOAT { Float($1) }
67 | | DECR LPAREN Expr RPAREN { Decr($3) }
68 |
69 |
70 | Stmt: ID ASSIGN Expr { Assign($1,$3) }
71 | | WHILE Expr DO Stmt { While($2,$4) }
72 | | BEGIN StmtList END { Seq(List.rev($2)) }
73 | | IF Expr THEN Stmt { IfThen($2,$4) }
74 | | IF Expr THEN Stmt ELSE Stmt { IfThenElse($2,$4,$6) }
75 | | PRINT Expr { Print($2) }
76 |
77 |
78 | StmtList: Stmt { [$1] }
79 | | StmtList SEMI Stmt { $3 :: $1 }
80 |
81 | The above generates a datatype for tokens and a function for each `start` production. Parsers are typically combined with a lexer generated using `FsLex`.
82 |
83 | MSBuild support
84 | ---------------
85 |
86 | The nuget package includes MSBuild support for `FsLex` and `FsYacc`. New MSBuild targets are added automatically by the nuget package.
87 | But you must manually add `FsLex` andd `FsYacc` entries inside of an `ItemGroup` to your `.fsproj` file like this:
88 |
89 |
90 | --module Parser
91 |
92 |
93 | --unicode
94 |
95 |
96 | If you want to see `verbose` output from `FsYacc` you need to add `-v` in the `OtherFlags` section like this:
97 |
98 |
99 | --module Parser -v
100 |
101 |
102 | Command line options
103 | --------------------
104 |
105 | fsyacc fsyacc
106 |
107 | -o : Name the output file.
108 |
109 | -v: Produce a listing file.
110 |
111 | --module : Define the F# module name to host the generated parser.
112 |
113 | --internal: Generate an internal module
114 |
115 | --open : Add the given module to the list of those to open in both the generated signature and implementation.
116 |
117 | --light: (ignored)
118 |
119 | --light-off: Add #light "off" to the top of the generated file
120 |
121 | --ml-compatibility: Support the use of the global state from the 'Parsing' module in FSharp.PowerPack.dll.
122 |
123 | --tokens: Simply tokenize the specification file itself.
124 |
125 | --lexlib : Specify the namespace for the implementation of the parser table interperter (default FSharp.Text.Parsing)
126 |
127 | --parslib : Specify the namespace for the implementation of the parser table interperter (default FSharp.Text.Parsing)
128 |
129 | --codepage : Assume input lexer specification file is encoded with the given codepage.
130 |
131 | --help: display this list of options
132 |
133 | -help: display this list of options
134 |
135 | Managing and using position markers
136 | -----------------------------------
137 |
138 | Each action in an fsyacc parser has access to a parseState value through which you can access position information.
139 |
140 | type IParseState =
141 | abstract InputStartPosition: int -> Position
142 | abstract InputEndPosition: int -> Position
143 | abstract InputRange: int -> Position * Position
144 | abstract ParserLocalStore: IDictionary
145 | abstract ResultRange : Position * Position
146 | abstract RaiseError<'b> : unit -> 'b
147 |
148 | `Input` relate to the indexes of the items on the right hand side of the current production, the `Result` relates to the entire range covered by the production. You shouldn't use `GetData` directly - these is called automatically by `$1`, `$2` etc. You can call `RaiseError` if you like.
149 |
150 | You must set the initial position when you create the lexbuf:
151 |
152 | let setInitialPos (lexbuf:LexBuffer<_>) filename =
153 | lexbuf.EndPos <- { pos_bol = 0;
154 | pos_fname=filename;
155 | pos_cnum=0;
156 | pos_lnum=1 }
157 |
158 |
159 | You must also update the position recorded in the lex buffer each time you process what you consider to be a new line:
160 |
161 | let newline (lexbuf:lexbuf) =
162 | lexbuf.EndPos <- lexbuf.EndPos.AsNewLinePos()
163 |
164 |
165 | Likewise if your language includes the ability to mark source code locations, see custom essay (e.g. the `#line` directive in OCaml and F#) then you must similarly adjust the `lexbuf.EndPos` according to the information you grok from your input.
166 |
167 | Notes on OCaml Compatibility
168 | ----------------------------
169 |
170 | `OCamlYacc` accepts the following:
171 |
172 | %type < context -> context > toplevel
173 |
174 | For `FsYacc` you just add parentheses:
175 |
176 | %type < (context -> context) > toplevel
177 |
--------------------------------------------------------------------------------
/tests/fsyacc/Test2/test2.badInput.tokens.bsl:
--------------------------------------------------------------------------------
1 | tokenize - getting one token
2 | tokenize - got IDENT, now at char 2
3 | ident char = 122
4 | ident char = 49
5 | tokenize - getting one token
6 | tokenize - got LPAREN, now at char 5
7 | tokenize - getting one token
8 | tokenize - got LET, now at char 6
9 | tokenize - getting one token
10 | tokenize - got IDENT, now at char 10
11 | ident char = 120
12 | tokenize - getting one token
13 | tokenize - got IN, now at char 12
14 | tokenize - getting one token
15 | tokenize - got IDENT, now at char 15
16 | ident char = 105
17 | ident char = 100
18 | tokenize - getting one token
19 | tokenize - got END, now at char 18
20 | tokenize - getting one token
21 | tokenize - got RPAREN, now at char 21
22 | tokenize - getting one token
23 | tokenize - got IDENT, now at char 24
24 | ident char = 120
25 | ident char = 50
26 | tokenize - getting one token
27 | tokenize - got LPAREN, now at char 27
28 | tokenize - getting one token
29 | tokenize - got IDENT, now at char 28
30 | ident char = 105
31 | ident char = 100
32 | tokenize - getting one token
33 | tokenize - got PLUS, now at char 31
34 | tokenize - getting one token
35 | tokenize - got IDENT, now at char 33
36 | ident char = 105
37 | ident char = 100
38 | tokenize - getting one token
39 | tokenize - got IDENT, now at char 37
40 | ident char = 121
41 | ident char = 51
42 | tokenize - getting one token
43 | tokenize - got LPAREN, now at char 40
44 | tokenize - getting one token
45 | tokenize - got IDENT, now at char 41
46 | ident char = 105
47 | ident char = 100
48 | tokenize - getting one token
49 | tokenize - got PLUS, now at char 44
50 | tokenize - getting one token
51 | tokenize - got IDENT, now at char 46
52 | ident char = 105
53 | ident char = 100
54 | tokenize - getting one token
55 | tokenize - got PLUS, now at char 49
56 | tokenize - getting one token
57 | tokenize - got IDENT, now at char 51
58 | ident char = 105
59 | ident char = 100
60 | tokenize - getting one token
61 | tokenize - got RPAREN, now at char 53
62 | tokenize - getting one token
63 | tokenize - got IDENT, now at char 56
64 | ident char = 122
65 | ident char = 52
66 | tokenize - getting one token
67 | tokenize - got LPAREN, now at char 59
68 | tokenize - getting one token
69 | tokenize - got IDENT, now at char 60
70 | ident char = 105
71 | ident char = 100
72 | tokenize - getting one token
73 | tokenize - got PLUS, now at char 63
74 | tokenize - getting one token
75 | tokenize - got IDENT, now at char 65
76 | ident char = 105
77 | ident char = 100
78 | tokenize - getting one token
79 | tokenize - got STAR, now at char 68
80 | tokenize - getting one token
81 | tokenize - got IDENT, now at char 70
82 | ident char = 105
83 | ident char = 100
84 | tokenize - getting one token
85 | tokenize - got RPAREN, now at char 72
86 | tokenize - getting one token
87 | tokenize - got IDENT, now at char 75
88 | ident char = 122
89 | ident char = 53
90 | tokenize - getting one token
91 | tokenize - got LPAREN, now at char 78
92 | tokenize - getting one token
93 | tokenize - got LET, now at char 79
94 | tokenize - getting one token
95 | tokenize - got IDENT, now at char 83
96 | ident char = 120
97 | tokenize - getting one token
98 | tokenize - got PLUS, now at char 85
99 | tokenize - getting one token
100 | tokenize - got IN, now at char 87
101 | tokenize - getting one token
102 | tokenize - got IDENT, now at char 90
103 | ident char = 105
104 | ident char = 100
105 | tokenize - getting one token
106 | tokenize - got END, now at char 93
107 | tokenize - getting one token
108 | tokenize - got RPAREN, now at char 96
109 | tokenize - getting one token
110 | tokenize - got IDENT, now at char 99
111 | ident char = 122
112 | ident char = 54
113 | tokenize - getting one token
114 | tokenize - got LPAREN, now at char 102
115 | tokenize - getting one token
116 | tokenize - got LET, now at char 103
117 | tokenize - getting one token
118 | tokenize - got IDENT, now at char 107
119 | ident char = 120
120 | tokenize - getting one token
121 | tokenize - got PLUS, now at char 109
122 | tokenize - getting one token
123 | tokenize - got IN, now at char 111
124 | tokenize - getting one token
125 | tokenize - got IDENT, now at char 114
126 | ident char = 105
127 | ident char = 100
128 | tokenize - getting one token
129 | tokenize - got END, now at char 117
130 | tokenize - getting one token
131 | tokenize - got RPAREN, now at char 120
132 | tokenize - getting one token
133 | tokenize - got IDENT, now at char 123
134 | ident char = 122
135 | ident char = 55
136 | tokenize - getting one token
137 | tokenize - got LPAREN, now at char 126
138 | tokenize - getting one token
139 | tokenize - got LET, now at char 127
140 | tokenize - getting one token
141 | tokenize - got IDENT, now at char 131
142 | ident char = 120
143 | tokenize - getting one token
144 | tokenize - got PLUS, now at char 133
145 | tokenize - getting one token
146 | tokenize - got IDENT, now at char 135
147 | ident char = 121
148 | tokenize - getting one token
149 | tokenize - got IN, now at char 137
150 | tokenize - getting one token
151 | tokenize - got IDENT, now at char 140
152 | ident char = 105
153 | ident char = 100
154 | tokenize - getting one token
155 | tokenize - got END, now at char 143
156 | tokenize - getting one token
157 | tokenize - got RPAREN, now at char 146
158 | tokenize - getting one token
159 | tokenize - got IDENT, now at char 149
160 | ident char = 122
161 | ident char = 56
162 | tokenize - getting one token
163 | tokenize - got LPAREN, now at char 152
164 | tokenize - getting one token
165 | tokenize - got LET, now at char 153
166 | tokenize - getting one token
167 | tokenize - got IDENT, now at char 157
168 | ident char = 120
169 | tokenize - getting one token
170 | tokenize - got RPAREN, now at char 159
171 | tokenize - getting one token
172 | tokenize - got RPAREN, now at char 160
173 | tokenize - getting one token
174 | tokenize - got RPAREN, now at char 161
175 | tokenize - getting one token
176 | tokenize - got IN, now at char 163
177 | tokenize - getting one token
178 | tokenize - got IDENT, now at char 166
179 | ident char = 105
180 | ident char = 100
181 | tokenize - getting one token
182 | tokenize - got END, now at char 169
183 | tokenize - getting one token
184 | tokenize - got RPAREN, now at char 172
185 | tokenize - getting one token
186 | tokenize - got EOF, now at char 177
187 |
--------------------------------------------------------------------------------
/src/Common/Arg.fs:
--------------------------------------------------------------------------------
1 | // (c) Microsoft Corporation 2005-2009.
2 |
3 | namespace FSharp.Text
4 |
5 | type ArgType =
6 | | ClearArg of bool ref
7 | | FloatArg of (float -> unit)
8 | | IntArg of (int -> unit)
9 | | RestArg of (string -> unit)
10 | | SetArg of bool ref
11 | | StringArg of (string -> unit)
12 | | UnitArg of (unit -> unit)
13 |
14 | static member Clear r = ClearArg r
15 | static member Float r = FloatArg r
16 | static member Int r = IntArg r
17 | static member Rest r = RestArg r
18 | static member Set r = SetArg r
19 | static member String r = StringArg r
20 | static member Unit r = UnitArg r
21 |
22 | type ArgInfo(name, action, help) =
23 | member x.Name = name
24 | member x.ArgType = action
25 | member x.HelpText = help
26 |
27 | exception Bad of string
28 | exception HelpText of string
29 |
30 | []
31 | type ArgParser() =
32 | static let getUsage specs u =
33 | let sbuf = System.Text.StringBuilder 100
34 | let pstring (s: string) = sbuf.Append s |> ignore
35 |
36 | let pendline s =
37 | pstring s
38 | pstring "\n"
39 |
40 | pendline u
41 |
42 | List.iter
43 | (fun (arg: ArgInfo) ->
44 | match arg.Name, arg.ArgType, arg.HelpText with
45 | | s, (UnitArg _ | SetArg _ | ClearArg _), helpText ->
46 | pstring "\t"
47 | pstring s
48 | pstring ": "
49 | pendline helpText
50 | | s, StringArg _, helpText ->
51 | pstring "\t"
52 | pstring s
53 | pstring " : "
54 | pendline helpText
55 | | s, IntArg _, helpText ->
56 | pstring "\t"
57 | pstring s
58 | pstring " : "
59 | pendline helpText
60 | | s, FloatArg _, helpText ->
61 | pstring "\t"
62 | pstring s
63 | pstring " : "
64 | pendline helpText
65 | | s, RestArg _, helpText ->
66 | pstring "\t"
67 | pstring s
68 | pstring " ...: "
69 | pendline helpText)
70 | specs
71 |
72 | pstring "\t"
73 | pstring "--help"
74 | pstring ": "
75 | pendline "display this list of options"
76 | pstring "\t"
77 | pstring "-help"
78 | pstring ": "
79 | pendline "display this list of options"
80 | sbuf.ToString()
81 |
82 | static member ParsePartial(cursor: ref, argv, arguments: seq, ?otherArgs, ?usageText) =
83 | let other = defaultArg otherArgs (fun _ -> ())
84 | let usageText = defaultArg usageText ""
85 | let nargs = Array.length argv
86 | cursor.Value <- cursor.Value + 1
87 | let argSpecs = arguments |> Seq.toList
88 | let specs = argSpecs |> List.map (fun (arg: ArgInfo) -> arg.Name, arg.ArgType)
89 |
90 | while cursor.Value < nargs do
91 | let arg = argv.[cursor.Value]
92 |
93 | let rec findMatchingArg args =
94 | match args with
95 | | (s, action) :: _ when s = arg ->
96 | let getSecondArg () =
97 | if cursor.Value + 1 >= nargs then
98 | raise (Bad("option " + s + " needs an argument.\n" + getUsage argSpecs usageText))
99 |
100 | argv.[cursor.Value + 1]
101 |
102 | match action with
103 | | UnitArg f ->
104 | f ()
105 | cursor.Value <- cursor.Value + 1
106 | | SetArg f ->
107 | f.Value <- true
108 | cursor.Value <- cursor.Value + 1
109 | | ClearArg f ->
110 | f.Value <- false
111 | cursor.Value <- cursor.Value + 1
112 | | StringArg f ->
113 | let arg2 = getSecondArg ()
114 | f arg2
115 | cursor.Value <- cursor.Value + 2
116 | | IntArg f ->
117 | let arg2 = getSecondArg ()
118 |
119 | let arg2 =
120 | try
121 | int32 arg2
122 | with _ ->
123 | raise (Bad(getUsage argSpecs usageText)) in
124 |
125 | f arg2
126 | cursor.Value <- cursor.Value + 2
127 | | FloatArg f ->
128 | let arg2 = getSecondArg ()
129 |
130 | let arg2 =
131 | try
132 | float arg2
133 | with _ ->
134 | raise (Bad(getUsage argSpecs usageText)) in
135 |
136 | f arg2
137 | cursor.Value <- cursor.Value + 2
138 | | RestArg f ->
139 | cursor.Value <- cursor.Value + 1
140 |
141 | while cursor.Value < nargs do
142 | f argv.[cursor.Value]
143 | cursor.Value <- cursor.Value + 1
144 |
145 | | _ :: more -> findMatchingArg more
146 | | [] ->
147 | if arg = "-help" || arg = "--help" || arg = "/help" || arg = "/help" || arg = "/?" then
148 | raise (HelpText(getUsage argSpecs usageText))
149 | // Note: for '/abc/def' does not count as an argument
150 | // Note: '/abc' does
151 | elif
152 | arg.Length > 0
153 | && (arg.[0] = '-'
154 | || (arg.[0] = '/' && not (arg.Length > 1 && arg.[1..].Contains "/")))
155 | then
156 | raise (Bad("unrecognized argument: " + arg + "\n" + getUsage argSpecs usageText))
157 | else
158 | other arg
159 | cursor.Value <- cursor.Value + 1
160 |
161 | findMatchingArg specs
162 |
163 | static member Usage(arguments, ?usage) =
164 | let usage = defaultArg usage ""
165 | System.Console.Error.WriteLine(getUsage (Seq.toList arguments) usage)
166 |
167 | #if FX_NO_COMMAND_LINE_ARGS
168 | #else
169 | static member Parse(arguments, ?otherArgs, ?usageText) =
170 | let current = ref 0
171 | let argv = System.Environment.GetCommandLineArgs()
172 |
173 | try
174 | ArgParser.ParsePartial(current, argv, arguments, ?otherArgs = otherArgs, ?usageText = usageText)
175 | with
176 | | Bad h
177 | | HelpText h ->
178 | System.Console.Error.WriteLine h
179 | System.Console.Error.Flush()
180 | System.Environment.Exit(1)
181 | | _ -> reraise ()
182 | #endif
183 |
--------------------------------------------------------------------------------
/tests/fsyacc/unicode/test1-unicode.input3.tokens.bsl:
--------------------------------------------------------------------------------
1 | tokenize - getting one token
2 | tokenize - got IDENT, now at char 2
3 | ident char = 110
4 | ident char = 101
5 | ident char = 120
6 | ident char = 116
7 | tokenize - getting one token
8 | tokenize - got IDENT, now at char 7
9 | ident char = 108
10 | ident char = 105
11 | ident char = 110
12 | ident char = 101
13 | tokenize - getting one token
14 | tokenize - got IDENT, now at char 12
15 | ident char = 116
16 | ident char = 101
17 | ident char = 115
18 | ident char = 116
19 | ident char = 115
20 | tokenize - getting one token
21 | tokenize - got IDENT, now at char 18
22 | ident char = 111
23 | ident char = 110
24 | ident char = 101
25 | tokenize - getting one token
26 | tokenize - got IDENT, now at char 22
27 | ident char = 117
28 | ident char = 110
29 | ident char = 105
30 | ident char = 99
31 | ident char = 111
32 | ident char = 100
33 | ident char = 101
34 | tokenize - getting one token
35 | tokenize - got IDENT, now at char 30
36 | ident char = 99
37 | ident char = 104
38 | ident char = 97
39 | ident char = 114
40 | ident char = 97
41 | ident char = 99
42 | ident char = 116
43 | ident char = 101
44 | ident char = 114
45 | tokenize - getting one token
46 | tokenize - got IDENT, now at char 40
47 | ident char = 99
48 | ident char = 108
49 | ident char = 97
50 | ident char = 115
51 | ident char = 115
52 | tokenize - getting one token
53 | tokenize - got IDENT, now at char 47
54 | ident char = 196
55 | ident char = 203
56 | ident char = 214
57 | ident char = 207
58 | ident char = 220
59 | ident char = 226
60 | ident char = 230
61 | ident char = 231
62 | ident char = 241
63 | ident char = 245
64 | ident char = 246
65 | tokenize - getting one token
66 | tokenize - got PLUS, now at char 59
67 | tokenize - getting one token
68 | tokenize - got IDENT, now at char 61
69 | ident char = 105
70 | ident char = 100
71 | tokenize - getting one token
72 | tokenize - got IDENT, now at char 65
73 | ident char = 110
74 | ident char = 101
75 | ident char = 120
76 | ident char = 116
77 | tokenize - getting one token
78 | tokenize - got IDENT, now at char 70
79 | ident char = 108
80 | ident char = 105
81 | ident char = 110
82 | ident char = 101
83 | tokenize - getting one token
84 | tokenize - got IDENT, now at char 75
85 | ident char = 116
86 | ident char = 101
87 | ident char = 115
88 | ident char = 116
89 | ident char = 115
90 | tokenize - getting one token
91 | tokenize - got IDENT, now at char 81
92 | ident char = 115
93 | ident char = 112
94 | ident char = 101
95 | ident char = 99
96 | ident char = 105
97 | ident char = 102
98 | ident char = 105
99 | ident char = 99
100 | tokenize - getting one token
101 | tokenize - got IDENT, now at char 90
102 | ident char = 117
103 | ident char = 110
104 | ident char = 105
105 | ident char = 99
106 | ident char = 111
107 | ident char = 100
108 | ident char = 101
109 | tokenize - getting one token
110 | tokenize - got IDENT, now at char 98
111 | ident char = 99
112 | ident char = 104
113 | ident char = 97
114 | ident char = 114
115 | ident char = 97
116 | ident char = 99
117 | ident char = 116
118 | ident char = 101
119 | ident char = 114
120 | ident char = 115
121 | tokenize - getting one token
122 | tokenize - got IDENT, now at char 110
123 | ident char = 8800
124 | tokenize - getting one token
125 | tokenize - got IDENT, now at char 112
126 | ident char = 8800
127 | ident char = 8800
128 | tokenize - getting one token
129 | tokenize - got IDENT, now at char 115
130 | ident char = 8776
131 | ident char = 8776
132 | tokenize - getting one token
133 | tokenize - got IDENT, now at char 118
134 | ident char = 8776
135 | ident char = 8776
136 | ident char = 8776
137 | tokenize - getting one token
138 | tokenize - got IDENT, now at char 123
139 | ident char = 105
140 | ident char = 100
141 | tokenize - getting one token
142 | tokenize - got PLUS, now at char 126
143 | tokenize - getting one token
144 | tokenize - got IDENT, now at char 128
145 | ident char = 105
146 | ident char = 100
147 | tokenize - getting one token
148 | tokenize - got IDENT, now at char 132
149 | ident char = 110
150 | ident char = 101
151 | ident char = 120
152 | ident char = 116
153 | tokenize - getting one token
154 | tokenize - got IDENT, now at char 137
155 | ident char = 108
156 | ident char = 105
157 | ident char = 110
158 | ident char = 101
159 | tokenize - getting one token
160 | tokenize - got IDENT, now at char 142
161 | ident char = 116
162 | ident char = 101
163 | ident char = 115
164 | ident char = 116
165 | ident char = 115
166 | tokenize - getting one token
167 | tokenize - got IDENT, now at char 148
168 | ident char = 115
169 | ident char = 111
170 | ident char = 109
171 | ident char = 101
172 | tokenize - getting one token
173 | tokenize - got IDENT, now at char 153
174 | ident char = 109
175 | ident char = 111
176 | ident char = 114
177 | ident char = 101
178 | tokenize - getting one token
179 | tokenize - got IDENT, now at char 158
180 | ident char = 114
181 | ident char = 97
182 | ident char = 110
183 | ident char = 100
184 | ident char = 111
185 | ident char = 109
186 | tokenize - getting one token
187 | tokenize - got IDENT, now at char 165
188 | ident char = 117
189 | ident char = 110
190 | ident char = 105
191 | ident char = 99
192 | ident char = 111
193 | ident char = 100
194 | ident char = 101
195 | tokenize - getting one token
196 | tokenize - got IDENT, now at char 173
197 | ident char = 99
198 | ident char = 104
199 | ident char = 97
200 | ident char = 114
201 | ident char = 97
202 | ident char = 99
203 | ident char = 116
204 | ident char = 101
205 | ident char = 114
206 | ident char = 115
207 | tokenize - getting one token
208 | tokenize - got IDENT, now at char 185
209 | ident char = 1052
210 | ident char = 1053
211 | ident char = 1054
212 | ident char = 1055
213 | ident char = 1056
214 | ident char = 1057
215 | ident char = 1058
216 | ident char = 1059
217 | ident char = 1060
218 | ident char = 1061
219 | ident char = 1062
220 | ident char = 7808
221 | ident char = 7809
222 | ident char = 7810
223 | ident char = 1116
224 | tokenize - getting one token
225 | tokenize - got IDENT, now at char 201
226 | ident char = 945
227 | ident char = 946
228 | ident char = 923
229 | ident char = 920
230 | ident char = 937
231 | ident char = 936
232 | ident char = 935
233 | ident char = 931
234 | ident char = 948
235 | ident char = 950
236 | ident char = 538
237 | ident char = 374
238 | ident char = 506
239 | tokenize - getting one token
240 | tokenize - got IDENT, now at char 216
241 | ident char = 105
242 | ident char = 100
243 | tokenize - getting one token
244 | tokenize - got EOF, now at char 218
245 |
--------------------------------------------------------------------------------
/tests/fsyacc/unicode/test1-unicode.WithTitleCaseLetter.tokens.bsl:
--------------------------------------------------------------------------------
1 | tokenize - getting one token
2 | tokenize - got IDENT, now at char 2
3 | ident char = 110
4 | ident char = 101
5 | ident char = 120
6 | ident char = 116
7 | tokenize - getting one token
8 | tokenize - got IDENT, now at char 7
9 | ident char = 108
10 | ident char = 105
11 | ident char = 110
12 | ident char = 101
13 | tokenize - getting one token
14 | tokenize - got IDENT, now at char 12
15 | ident char = 116
16 | ident char = 101
17 | ident char = 115
18 | ident char = 116
19 | ident char = 115
20 | tokenize - getting one token
21 | tokenize - got IDENT, now at char 18
22 | ident char = 111
23 | ident char = 110
24 | ident char = 101
25 | tokenize - getting one token
26 | tokenize - got IDENT, now at char 22
27 | ident char = 117
28 | ident char = 110
29 | ident char = 105
30 | ident char = 99
31 | ident char = 111
32 | ident char = 100
33 | ident char = 101
34 | tokenize - getting one token
35 | tokenize - got IDENT, now at char 30
36 | ident char = 99
37 | ident char = 104
38 | ident char = 97
39 | ident char = 114
40 | ident char = 97
41 | ident char = 99
42 | ident char = 116
43 | ident char = 101
44 | ident char = 114
45 | tokenize - getting one token
46 | tokenize - got IDENT, now at char 40
47 | ident char = 99
48 | ident char = 108
49 | ident char = 97
50 | ident char = 115
51 | ident char = 115
52 | tokenize - getting one token
53 | tokenize - got IDENT, now at char 47
54 | ident char = 196
55 | ident char = 203
56 | ident char = 214
57 | ident char = 207
58 | ident char = 220
59 | ident char = 226
60 | ident char = 230
61 | ident char = 231
62 | ident char = 241
63 | ident char = 245
64 | ident char = 246
65 | tokenize - getting one token
66 | tokenize - got PLUS, now at char 59
67 | tokenize - getting one token
68 | tokenize - got IDENT, now at char 61
69 | ident char = 105
70 | ident char = 100
71 | tokenize - getting one token
72 | tokenize - got IDENT, now at char 65
73 | ident char = 110
74 | ident char = 101
75 | ident char = 120
76 | ident char = 116
77 | tokenize - getting one token
78 | tokenize - got IDENT, now at char 70
79 | ident char = 108
80 | ident char = 105
81 | ident char = 110
82 | ident char = 101
83 | tokenize - getting one token
84 | tokenize - got IDENT, now at char 75
85 | ident char = 116
86 | ident char = 101
87 | ident char = 115
88 | ident char = 116
89 | ident char = 115
90 | tokenize - getting one token
91 | tokenize - got IDENT, now at char 81
92 | ident char = 115
93 | ident char = 112
94 | ident char = 101
95 | ident char = 99
96 | ident char = 105
97 | ident char = 102
98 | ident char = 105
99 | ident char = 99
100 | tokenize - getting one token
101 | tokenize - got IDENT, now at char 90
102 | ident char = 117
103 | ident char = 110
104 | ident char = 105
105 | ident char = 99
106 | ident char = 111
107 | ident char = 100
108 | ident char = 101
109 | tokenize - getting one token
110 | tokenize - got IDENT, now at char 98
111 | ident char = 99
112 | ident char = 104
113 | ident char = 97
114 | ident char = 114
115 | ident char = 97
116 | ident char = 99
117 | ident char = 116
118 | ident char = 101
119 | ident char = 114
120 | ident char = 115
121 | tokenize - getting one token
122 | tokenize - got IDENT, now at char 110
123 | ident char = 8800
124 | tokenize - getting one token
125 | tokenize - got IDENT, now at char 112
126 | ident char = 8800
127 | ident char = 8800
128 | tokenize - getting one token
129 | tokenize - got IDENT, now at char 115
130 | ident char = 8776
131 | ident char = 8776
132 | tokenize - getting one token
133 | tokenize - got IDENT, now at char 118
134 | ident char = 8776
135 | ident char = 8776
136 | ident char = 8776
137 | tokenize - getting one token
138 | tokenize - got IDENT, now at char 123
139 | ident char = 105
140 | ident char = 100
141 | tokenize - getting one token
142 | tokenize - got PLUS, now at char 126
143 | tokenize - getting one token
144 | tokenize - got IDENT, now at char 128
145 | ident char = 105
146 | ident char = 100
147 | tokenize - getting one token
148 | tokenize - got IDENT, now at char 132
149 | ident char = 110
150 | ident char = 101
151 | ident char = 120
152 | ident char = 116
153 | tokenize - getting one token
154 | tokenize - got IDENT, now at char 137
155 | ident char = 108
156 | ident char = 105
157 | ident char = 110
158 | ident char = 101
159 | tokenize - getting one token
160 | tokenize - got IDENT, now at char 142
161 | ident char = 116
162 | ident char = 101
163 | ident char = 115
164 | ident char = 116
165 | ident char = 115
166 | tokenize - getting one token
167 | tokenize - got IDENT, now at char 148
168 | ident char = 115
169 | ident char = 111
170 | ident char = 109
171 | ident char = 101
172 | tokenize - getting one token
173 | tokenize - got IDENT, now at char 153
174 | ident char = 109
175 | ident char = 111
176 | ident char = 114
177 | ident char = 101
178 | tokenize - getting one token
179 | tokenize - got IDENT, now at char 158
180 | ident char = 114
181 | ident char = 97
182 | ident char = 110
183 | ident char = 100
184 | ident char = 111
185 | ident char = 109
186 | tokenize - getting one token
187 | tokenize - got IDENT, now at char 165
188 | ident char = 117
189 | ident char = 110
190 | ident char = 105
191 | ident char = 99
192 | ident char = 111
193 | ident char = 100
194 | ident char = 101
195 | tokenize - getting one token
196 | tokenize - got IDENT, now at char 173
197 | ident char = 99
198 | ident char = 104
199 | ident char = 97
200 | ident char = 114
201 | ident char = 97
202 | ident char = 99
203 | ident char = 116
204 | ident char = 101
205 | ident char = 114
206 | ident char = 115
207 | tokenize - getting one token
208 | tokenize - got IDENT, now at char 185
209 | ident char = 498
210 | ident char = 1052
211 | ident char = 1053
212 | ident char = 1054
213 | ident char = 1055
214 | ident char = 1056
215 | ident char = 1057
216 | ident char = 1058
217 | ident char = 1059
218 | ident char = 1060
219 | ident char = 1061
220 | ident char = 1062
221 | ident char = 7808
222 | ident char = 7809
223 | ident char = 7810
224 | ident char = 1116
225 | tokenize - getting one token
226 | tokenize - got IDENT, now at char 202
227 | ident char = 945
228 | ident char = 946
229 | ident char = 923
230 | ident char = 920
231 | ident char = 937
232 | ident char = 936
233 | ident char = 935
234 | ident char = 931
235 | ident char = 948
236 | ident char = 950
237 | ident char = 538
238 | ident char = 374
239 | ident char = 506
240 | tokenize - getting one token
241 | tokenize - got IDENT, now at char 217
242 | ident char = 105
243 | ident char = 100
244 | tokenize - getting one token
245 | tokenize - got EOF, now at char 219
246 |
--------------------------------------------------------------------------------
/src/FsLexYacc.Runtime/Lexing.fsi:
--------------------------------------------------------------------------------
1 | //==========================================================================
2 | // LexBuffers are for use with automatically generated lexical analyzers,
3 | // in particular those produced by 'fslex'.
4 | //
5 | // (c) Microsoft Corporation 2005-2008.
6 | //===========================================================================
7 |
8 | module FSharp.Text.Lexing
9 |
10 | open System.Collections.Generic
11 |
12 | /// Position information stored for lexing tokens
13 | []
14 | type Position =
15 | {
16 | /// The file name for the position
17 | pos_fname: string
18 |
19 | /// The line number for the position
20 | pos_lnum: int
21 |
22 | /// The line number for the position in the original source file
23 | pos_orig_lnum: int
24 |
25 | /// The absolute offset of the beginning of the line
26 | pos_bol: int
27 |
28 | /// The absolute offset of the column for the position
29 | pos_cnum: int
30 | }
31 |
32 | /// The file name associated with the input stream.
33 | member FileName: string
34 |
35 | /// The line number in the input stream, assuming fresh positions have been updated
36 | /// using AsNewLinePos() and by modifying the EndPos property of the LexBuffer.
37 | member Line: int
38 |
39 | /// The line number for the position in the input stream, assuming fresh positions have been updated
40 | /// using AsNewLinePos()
41 | member OriginalLine: int
42 |
43 | []
44 | member Char: int
45 |
46 | /// The character number in the input stream
47 | member AbsoluteOffset: int
48 |
49 | /// Return absolute offset of the start of the line marked by the position
50 | member StartOfLineAbsoluteOffset: int
51 |
52 | /// Return the column number marked by the position, i.e. the difference between the AbsoluteOffset and the StartOfLineAbsoluteOffset
53 | member Column: int
54 |
55 | // Given a position just beyond the end of a line, return a position at the start of the next line
56 | member NextLine: Position
57 |
58 | /// Given a position at the start of a token of length n, return a position just beyond the end of the token
59 | member EndOfToken: n: int -> Position
60 |
61 | /// Gives a position shifted by specified number of characters
62 | member ShiftColumnBy: by: int -> Position
63 |
64 | []
65 | member AsNewLinePos: unit -> Position
66 |
67 | /// Get an arbitrary position, with the empty string as filename, and
68 | static member Empty: Position
69 |
70 | /// Get a position corresponding to the first line (line number 1) in a given file
71 | static member FirstLine: filename: string -> Position
72 |
73 | /// Input buffers consumed by lexers generated by fslex.exe
74 | []
75 | type LexBuffer<'char> =
76 | /// The start position for the lexeme
77 | member StartPos: Position with get, set
78 |
79 | /// The end position for the lexeme
80 | member EndPos: Position with get, set
81 |
82 | /// The matched string
83 | member Lexeme: 'char array
84 |
85 | /// Fast helper to turn the matched characters into a string, avoiding an intermediate array
86 | static member LexemeString: LexBuffer -> string
87 |
88 | /// The length of the matched string
89 | member LexemeLength: int
90 |
91 | /// Fetch a particular character in the matched string
92 | member LexemeChar: int -> 'char
93 |
94 | /// Dynamically typed, non-lexically scoped parameter table
95 | member BufferLocalStore: IDictionary
96 |
97 | /// True if the refill of the buffer ever failed , or if explicitly set to true.
98 | member IsPastEndOfStream: bool with get, set
99 |
100 | /// Remove all input, though don't discard the current lexeme
101 | member DiscardInput: unit -> unit
102 |
103 | /// Create a lex buffer suitable for byte lexing that reads characters from the given array
104 | static member FromBytes: byte[] -> LexBuffer
105 |
106 | /// Create a lex buffer suitable for Unicode lexing that reads characters from the given array
107 | static member FromChars: char[] -> LexBuffer
108 |
109 | /// Create a lex buffer suitable for Unicode lexing that reads characters from the given string
110 | static member FromString: string -> LexBuffer
111 |
112 | /// Create a lex buffer that reads character or byte inputs by using the given function
113 | static member FromFunction: ('char[] * int * int -> int) -> LexBuffer<'char>
114 |
115 | /// Create a lex buffer that asynchronously reads character or byte inputs by using the given function
116 | static member FromAsyncFunction: ('char[] * int * int -> Async) -> LexBuffer<'char>
117 |
118 | [.FromFunction instead")>]
119 | static member FromCharFunction: (char[] -> int -> int) -> LexBuffer
120 |
121 | [.FromFunction instead")>]
122 | static member FromByteFunction: (byte[] -> int -> int) -> LexBuffer
123 |
124 | #if !FABLE_COMPILER
125 |
126 | /// Create a lex buffer suitable for use with a Unicode lexer that reads character inputs from the given text reader
127 | static member FromTextReader: System.IO.TextReader -> LexBuffer
128 |
129 | /// Create a lex buffer suitable for use with ASCII byte lexing that reads byte inputs from the given binary reader
130 | static member FromBinaryReader: System.IO.BinaryReader -> LexBuffer
131 |
132 | #endif
133 |
134 | /// The type of tables for an ascii lexer generated by fslex.
135 | []
136 | type AsciiTables =
137 | static member Create: uint16[] array * uint16[] -> AsciiTables
138 |
139 | /// Interpret tables for an ascii lexer generated by fslex.
140 | member Interpret: initialState: int * LexBuffer -> int
141 |
142 | /// Interpret tables for an ascii lexer generated by fslex, processing input asynchronously
143 | []
144 | member AsyncInterpret: initialState: int * LexBuffer -> Async
145 |
146 | /// The type of tables for an unicode lexer generated by fslex.
147 | []
148 | type UnicodeTables =
149 |
150 | static member Create: uint16[] array * uint16[] -> UnicodeTables
151 |
152 | /// Interpret tables for a unicode lexer generated by fslex.
153 | member Interpret: initialState: int * LexBuffer -> int
154 |
155 | /// Interpret tables for a unicode lexer generated by fslex, processing input asynchronously
156 | []
157 | member AsyncInterpret: initialState: int * LexBuffer -> Async
158 |
159 | #if !FABLE_COMPILER
160 |
161 | /// Standard utility to create a Unicode LexBuffer
162 | ///
163 | /// One small annoyance is that LexBuffers are not IDisposable. This means
164 | /// we can't just return the LexBuffer object, since the file it wraps wouldn't
165 | /// get closed when we're finished with the LexBuffer. Hence we return the stream,
166 | /// the reader and the LexBuffer. The caller should dispose the first two when done.
167 | val UnicodeFileAsLexbuf: string * int option -> System.IO.FileStream * System.IO.StreamReader * LexBuffer
168 |
169 | #endif
170 |
--------------------------------------------------------------------------------
/src/FsLex.Core/fslexlex.fsl:
--------------------------------------------------------------------------------
1 | {
2 | (* (c) Microsoft Corporation 2005-2008. *)
3 |
4 | module FsLexYacc.FsLex.Lexer
5 |
6 | open FsLexYacc.FsLex.AST
7 | open FsLexYacc.FsLex.Parser
8 | open FSharp.Text
9 | open FSharp.Text.Lexing
10 | open System.Text
11 |
12 | let escape c =
13 | match c with
14 | | '\\' -> '\\'
15 | | '\'' -> '\''
16 | | 'n' -> '\n'
17 | | 't' -> '\t'
18 | | 'b' -> '\b'
19 | | 'r' -> '\r'
20 | | c -> c
21 |
22 | let lexeme (lexbuf : LexBuffer) = new System.String(lexbuf.Lexeme)
23 | let newline (lexbuf:LexBuffer<_>) = lexbuf.EndPos <- lexbuf.EndPos.NextLine
24 |
25 | let unexpected_char lexbuf =
26 | failwith ("Unexpected character '"+(lexeme lexbuf)+"'")
27 |
28 | let digit d =
29 | if d >= '0' && d <= '9' then int32 d - int32 '0'
30 | else failwith "digit"
31 |
32 | let hexdigit d =
33 | if d >= '0' && d <= '9' then digit d
34 | else if d >= 'a' && d <= 'f' then int32 d - int32 'a' + 10
35 | else if d >= 'A' && d <= 'F' then int32 d - int32 'A' + 10
36 | else failwithf "bad hexdigit: %c" d
37 |
38 | let trigraph c1 c2 c3 =
39 | char (digit c1 * 100 + digit c2 * 10 + digit c3)
40 |
41 | let hexgraph c1 c2 =
42 | char (hexdigit c1 * 16 + hexdigit c2)
43 |
44 | let unicodegraph_short (s:string) =
45 | if s.Length <> 4 then failwith "unicodegraph";
46 | char(hexdigit s.[0] * 4096 + hexdigit s.[1] * 256 + hexdigit s.[2] * 16 + hexdigit s.[3])
47 |
48 | let unicodegraph_long (s:string) =
49 | if s.Length <> 8 then failwith "unicodegraph_long";
50 | let high = hexdigit s.[0] * 4096 + hexdigit s.[1] * 256 + hexdigit s.[2] * 16 + hexdigit s.[3] in
51 | let low = hexdigit s.[4] * 4096 + hexdigit s.[5] * 256 + hexdigit s.[6] * 16 + hexdigit s.[7] in
52 | if high = 0 then None, char low
53 | else
54 | (* A surrogate pair - see http://www.unicode.org/unicode/uni2book/ch03.pdf, section 3.7 *)
55 | Some (char(0xD800 + ((high * 0x10000 + low - 0x10000) / 0x400))),
56 | char(0xDF30 + ((high * 0x10000 + low - 0x10000) % 0x400))
57 |
58 | }
59 |
60 | let letter = ['A'-'Z'] | ['a'-'z']
61 | let digit = ['0'-'9']
62 | let whitespace = [' ' '\t']
63 | let char = '\'' ( [^'\\'] | ('\\' ( '\\' | '\'' | "\"" | 'n' | 't' | 'b' | 'r'))) '\''
64 | let hex = ['0'-'9'] | ['A'-'F'] | ['a'-'f']
65 | let hexgraph = '\\' 'x' hex hex
66 | let trigraph = '\\' digit digit digit
67 | let newline = ('\n' | '\r' '\n')
68 | let ident_start_char = letter
69 | let ident_char = ( ident_start_char| digit | ['\'' '_'] )
70 | let ident = ident_start_char ident_char*
71 |
72 | let unicodegraph_short = '\\' 'u' hex hex hex hex
73 | let unicodegraph_long = '\\' 'U' hex hex hex hex hex hex hex hex
74 |
75 | rule token = parse
76 | | "rule" {RULE }
77 | | "parse" {PARSE }
78 | | "eof" {EOF }
79 | | "let" {LET }
80 | | "and" {AND }
81 | | char
82 | { let s = lexeme lexbuf in
83 | CHAR (if s.[1] = '\\' then escape s.[2] else s.[1]) }
84 |
85 | | '\'' trigraph '\''
86 | { let s = lexeme lexbuf in
87 | CHAR (trigraph s.[2] s.[3] s.[4]) }
88 |
89 | | '\'' hexgraph '\''
90 | { let s = lexeme lexbuf in
91 | CHAR (hexgraph s.[3] s.[4]) }
92 |
93 | | '\'' unicodegraph_short '\''
94 | { let s = lexeme lexbuf in
95 | CHAR (unicodegraph_short s.[3..6]) }
96 |
97 | | '\'' unicodegraph_long '\''
98 | { let s = lexeme lexbuf in
99 | match (unicodegraph_long s.[3..10]) with
100 | | None, c -> CHAR(c)
101 | | Some _ , _ -> failwith "Unicode characters needing surrogate pairs are not yet supported by this tool" }
102 |
103 | | '\'' '\\' ['A'-'Z'] ['a'-'z'] '\''
104 | { let s = (lexeme lexbuf).[2..3] in
105 | UNICODE_CATEGORY (s) }
106 |
107 | | '{' { let p = lexbuf.StartPos in
108 | let buff = (new StringBuilder 100) in
109 | // adjust the first line to get even indentation for all lines w.r.t. the left hand margin
110 | buff.Append (String.replicate (lexbuf.StartPos.Column+1) " ") |> ignore;
111 | code p buff lexbuf }
112 |
113 | | '"' { string lexbuf.StartPos (new StringBuilder 100) lexbuf }
114 |
115 | | whitespace+ { token lexbuf }
116 | | newline { newline lexbuf; token lexbuf }
117 | | ident_start_char ident_char* { IDENT (lexeme lexbuf) }
118 | | '|' { BAR }
119 | | '.' { DOT }
120 | | '+' { PLUS }
121 | | '*' { STAR }
122 | | '?' { QMARK }
123 | | '=' { EQUALS }
124 | | '[' { LBRACK }
125 | | ']' { RBRACK }
126 | | '(' { LPAREN }
127 | | ')' { RPAREN }
128 | | ':' { COLON }
129 | | '_' { UNDERSCORE }
130 | | '^' { HAT }
131 | | '-' { DASH }
132 | | "(*" { ignore(comment lexbuf.StartPos lexbuf); token lexbuf }
133 | | "//" [^'\n''\r']* { token lexbuf }
134 | | _ { unexpected_char lexbuf }
135 | | eof { EOF }
136 | and string p buff = parse
137 | | '\\' newline { newline lexbuf; string p buff lexbuf }
138 | | '\\' ( '"' | '\\' | '\'' | 'n' | 't' | 'b' | 'r')
139 | { let _ = buff.Append (escape (lexeme lexbuf).[1]) in
140 | string p buff lexbuf }
141 | | trigraph
142 | { let s = lexeme lexbuf in
143 | let _ = buff.Append (trigraph s.[1] s.[2] s.[3]) in
144 | string p buff lexbuf }
145 | | '"' { STRING (buff.ToString()) }
146 | | newline { newline lexbuf;
147 | let _ = buff.Append System.Environment.NewLine in
148 | string p buff lexbuf }
149 | | (whitespace | letter | digit) +
150 | { let _ = buff.Append (lexeme lexbuf) in
151 | string p buff lexbuf }
152 | | eof { failwith (Printf.sprintf "end of file in string started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol)) }
153 | | _ { let _ = buff.Append (lexeme lexbuf).[0] in
154 | string p buff lexbuf }
155 | and code p buff = parse
156 | | "}" { CODE (buff.ToString(), p) }
157 | | "{" { let _ = buff.Append (lexeme lexbuf) in
158 | ignore(code p buff lexbuf);
159 | let _ = buff.Append "}" in
160 | code p buff lexbuf }
161 | | '\\' ('"' | '\\')
162 | { let _ = buff.Append (lexeme lexbuf) in
163 | code p buff lexbuf }
164 | | "\"" { let _ = buff.Append (lexeme lexbuf) in
165 | ignore(codestring buff lexbuf);
166 | code p buff lexbuf }
167 | | newline { newline lexbuf;
168 | let _ = buff.Append System.Environment.NewLine in
169 | code p buff lexbuf }
170 | | (whitespace | letter | digit) +
171 | { let _ = buff.Append (lexeme lexbuf) in
172 | code p buff lexbuf }
173 | | "//" [^'\n''\r']*
174 | { let _ = buff.Append (lexeme lexbuf) in
175 | code p buff lexbuf }
176 | | eof { EOF }
177 | | _ { let _ = buff.Append (lexeme lexbuf).[0] in
178 | code p buff lexbuf }
179 |
180 | and codestring buff = parse
181 | | '\\' ('"' | '\\')
182 | { let _ = buff.Append (lexeme lexbuf) in
183 | codestring buff lexbuf }
184 | | '"' { let _ = buff.Append (lexeme lexbuf) in
185 | buff.ToString() }
186 | | newline { newline lexbuf;
187 | let _ = buff.Append System.Environment.NewLine in
188 | codestring buff lexbuf }
189 | | (whitespace | letter | digit) +
190 | { let _ = buff.Append (lexeme lexbuf) in
191 | codestring buff lexbuf }
192 | | eof { failwith "unterminated string in code" }
193 | | _ { let _ = buff.Append (lexeme lexbuf).[0] in
194 | codestring buff lexbuf }
195 |
196 | and comment p = parse
197 | | char { comment p lexbuf }
198 | | '"' { ignore(try string lexbuf.StartPos (new StringBuilder 100) lexbuf
199 | with Failure s -> failwith (s + "\n" + Printf.sprintf "error while processing string nested in comment started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol)));
200 | comment p lexbuf }
201 | | "(*" { ignore(try comment p lexbuf with Failure s -> failwith (s + "\n" + Printf.sprintf "error while processing nested comment started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol)));
202 | comment p lexbuf }
203 | | newline { newline lexbuf; comment p lexbuf }
204 | | "*)" { () }
205 | | eof { failwith (Printf.sprintf "end of file in comment started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol)) }
206 | | [^ '\'' '(' '*' '\n' '\r' '"' ')' ]+ { comment p lexbuf }
207 | | _ { comment p lexbuf }
208 |
209 |
210 |
--------------------------------------------------------------------------------
/tests/fsyacc/OldFsYaccTests.fsx:
--------------------------------------------------------------------------------
1 | #r @"paket:
2 | frameworks: net6.0
3 |
4 | nuget FSharp.Core ~> 5.0
5 | nuget Fake.IO.FileSystem
6 | nuget Fake.DotNet.Fsc
7 | nuget Fake.Core.Trace //"
8 |
9 | #if !FAKE
10 | #load "./.fake/oldfsyacctests.fsx/intellisense.fsx"
11 | #r "netstandard" // Temp fix for https://github.com/fsharp/FAKE/issues/1985
12 | #endif
13 |
14 | open System
15 | open System.Runtime.InteropServices
16 | open System.IO
17 |
18 | open Fake.IO
19 | open Fake.Core
20 |
21 | let assertFileExists file =
22 | if File.Exists(file) then
23 | printfn "%s exists!" file
24 | else
25 | failwithf "'%s' doesn't exist" file
26 |
27 | let dotnet arguments =
28 | let result =
29 | CreateProcess.fromRawCommandLine "dotnet" arguments
30 | |> Proc.run
31 |
32 | if result.ExitCode <> 0 then
33 | failwithf "Failed to run \"dotnet %s\"" arguments
34 |
35 | let run project args =
36 | Trace.traceImportant <| sprintf "Running '%s' with args %s" project args
37 | dotnet $"build {project} -c Release"
38 | dotnet $"run --project {project} {args}"
39 |
40 | let test proj shouldBeOK (args, baseLineOutput) =
41 | Trace.traceImportant <| sprintf "Running '%s' with args '%s'" proj args
42 |
43 | let res =
44 | CreateProcess.fromRawCommandLine "dotnet" $"run --project {proj} {args}"
45 | |> CreateProcess.redirectOutput
46 | |> Proc.run
47 |
48 | if (res.ExitCode = 0) <> shouldBeOK then
49 | failwithf "Process failed with code %d on input %s" res.ExitCode args
50 |
51 | let output =
52 | // For some reason, the output is captured in the stderr
53 | res.Result.Error.Split('\n', StringSplitOptions.RemoveEmptyEntries)
54 | |> Array.map (fun line ->
55 | if line.StartsWith("parsed") then
56 | let pieces = line.Split(' ')
57 | let pathPiece = pieces.[1]
58 | let idx =
59 | let value =
60 | if RuntimeInformation.IsOSPlatform(OSPlatform.Windows) then
61 | @"\"
62 | else
63 | "/"
64 | pathPiece.LastIndexOf(value)
65 | let pathPiece =
66 | if idx >= 0 then
67 | pathPiece.[idx+1 ..]
68 | else
69 | pathPiece
70 | pieces.[0] + " " + pathPiece + " " + pieces.[2]
71 | else
72 | line)
73 |
74 | if (not <| File.Exists baseLineOutput)
75 | then failwithf "Baseline file '%s' does not exist" baseLineOutput
76 |
77 | let expectedLines = File.ReadAllLines baseLineOutput
78 |
79 | if output.Length <> expectedLines.Length ||
80 | Seq.map2 (=) output expectedLines |> Seq.exists not
81 | then
82 | printfn "Expected:"
83 | for line in expectedLines do
84 | printfn "\t%s" line
85 |
86 | printfn "Output:"
87 | for line in output do
88 | printfn "\t%s" line
89 |
90 | File.WriteAllLines(baseLineOutput+".err", output)
91 | failwithf "Output is not equal to expected base line '%s'" baseLineOutput
92 |
93 | let fslexProject = Path.Combine(__SOURCE_DIRECTORY__, "..", "..", "src", "FsLex", "fslex.fsproj")
94 | let fsYaccProject = Path.Combine(__SOURCE_DIRECTORY__ , "..", "..", "src", "FsYacc", "fsyacc.fsproj")
95 |
96 | assertFileExists fslexProject
97 | assertFileExists fsYaccProject
98 |
99 | let fsLex = run fslexProject
100 | let fsYacc = run fsYaccProject
101 |
102 | let repro1885Fsl = Path.Combine(__SOURCE_DIRECTORY__, "repro1885", "repro1885.fsl")
103 | // Regression test for FSB 1885
104 | fsLex repro1885Fsl
105 |
106 | // Test 1
107 | let test1lexFs = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1lex.fs")
108 | let test1lexFsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1lex.fsl")
109 | let test1Fs = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.fs")
110 | let test1Fsy = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.fsy")
111 | let test1Input1 = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input1")
112 | let test1Input1Bsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input1.bsl")
113 | let test1Input1TokensBsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input1.tokens.bsl")
114 | let test1Input2Variation1 = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input2.variation1")
115 | let test1Input2Variation2 = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input2.variation2")
116 | let test1Input2Bsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input2.bsl")
117 | let test1Input3 = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input3")
118 | let test1Input3Bsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input3.bsl")
119 | let test1Input3TokensBsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input3.tokens.bsl")
120 | let test1Proj = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.fsproj")
121 | let test1Input4 = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input4")
122 | let test1Input4Bsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input4.bsl")
123 | let test1Input4TokensBsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1.input4.tokens.bsl")
124 |
125 |
126 | let runTests' shouldBeOK projFile xs =
127 | dotnet $"build {projFile} -c Release"
128 | xs |> List.iter (test projFile shouldBeOK)
129 | let runTests projFile xs = runTests' true projFile xs
130 |
131 | fsLex ("-o " + test1lexFs + " " + test1lexFsl)
132 | fsYacc ("--module TestParser -o " + test1Fs + " " + test1Fsy)
133 | runTests test1Proj [
134 | sprintf "--tokens %s" test1Input1, test1Input1TokensBsl
135 | test1Input1, test1Input1Bsl
136 | test1Input1, test1Input1Bsl
137 | sprintf "%s %s" test1Input2Variation1 test1Input2Variation2, test1Input2Bsl
138 | sprintf "--tokens %s" test1Input3, test1Input3TokensBsl
139 | test1Input3, test1Input3Bsl
140 | ]
141 |
142 | // Case insensitive option test
143 | fsLex ("-i -o " + test1lexFs + " " + test1lexFsl)
144 | runTests test1Proj [
145 | sprintf "--tokens %s" test1Input4, test1Input4TokensBsl
146 | sprintf "--tokens %s" test1Input3, test1Input4TokensBsl
147 | sprintf "%s %s" test1Input3 test1Input4, test1Input4Bsl
148 | ]
149 |
150 | // Test 1 unicode
151 | let test1unicodelexFs = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode-lex.fs")
152 | let test1unicodelexFsl = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode-lex.fsl")
153 | let test1unicodeFs = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.fs")
154 | let test1unicodeFsy = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.fsy")
155 | let test1unicodeProj = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.fsproj")
156 | let test1unicodeInput3 = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.input3.utf8")
157 | let test1unicodeInput3TokensBsl = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.input3.tokens.bsl")
158 | let test1unicodeWithTitleCaseLetter = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.WithTitleCaseLetter.utf8")
159 | let test1unicodeWithTitleCaseLetterTokensBsl = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.WithTitleCaseLetter.tokens.bsl")
160 | let test1unicodeWithTitleCaseLetterTokensErrorBsl = Path.Combine(__SOURCE_DIRECTORY__, "unicode", "test1-unicode.WithTitleCaseLetter.tokens.error.bsl")
161 |
162 | fsLex ("--unicode -o " + test1unicodelexFs + " " + test1unicodelexFsl)
163 | fsYacc ("--module TestParser -o " + test1unicodeFs + " " + test1unicodeFsy)
164 |
165 | runTests test1unicodeProj [
166 | sprintf "--tokens %s" test1Input1, test1Input1TokensBsl
167 | test1Input1, test1Input1Bsl
168 | sprintf "%s %s" test1Input2Variation1 test1Input2Variation2, test1Input2Bsl
169 | sprintf "--tokens %s" test1unicodeInput3, test1unicodeInput3TokensBsl
170 | ]
171 |
172 | runTests' false test1unicodeProj [
173 | sprintf "--tokens %s" test1unicodeWithTitleCaseLetter, test1unicodeWithTitleCaseLetterTokensErrorBsl
174 | ]
175 |
176 | // Case insensitive option test
177 | fsLex ("--unicode -i -o " + test1unicodelexFs + " " + test1unicodelexFsl)
178 | runTests test1unicodeProj [
179 | sprintf "--tokens %s" test1Input1, test1Input1TokensBsl
180 | test1Input1, test1Input1Bsl
181 | sprintf "%s %s" test1Input2Variation1 test1Input2Variation2, test1Input2Bsl
182 | sprintf "--tokens %s" test1unicodeInput3, test1unicodeInput3TokensBsl
183 | sprintf "--tokens %s" test1unicodeWithTitleCaseLetter, test1unicodeWithTitleCaseLetterTokensBsl
184 | ]
185 |
186 | // Test 2
187 | let test2lexFs = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2lex.fs")
188 | let test2lexFsl = Path.Combine(__SOURCE_DIRECTORY__, "Test1", "test1lex.fsl")
189 | let test2Fs = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2.fs")
190 | let test2Fsy = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2.fsy")
191 | let test2Proj = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2.fsproj")
192 | let test2Input1 = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2.input1")
193 | let test2Input1TokensBsl = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2.input1.tokens.bsl")
194 | let test2BadInput = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2.badInput")
195 | let test2BadInputTokensBsl = Path.Combine(__SOURCE_DIRECTORY__, "Test2", "test2.badInput.tokens.bsl")
196 |
197 | fsLex ("-o " + test2lexFs + " " + test2lexFsl)
198 | fsYacc ("--module TestParser -o " + test2Fs + " " + test2Fsy)
199 |
200 | runTests test2Proj [
201 | sprintf "--tokens %s" test2Input1, test2Input1TokensBsl
202 | sprintf "--tokens %s" test2BadInput, test2BadInputTokensBsl
203 | ]
204 |
205 | // #141 TODO
206 | let repro141Fsl = Path.Combine(__SOURCE_DIRECTORY__, "repro_#141", "Lexer_fail_option_i.fsl")
207 | let repro141Fs = Path.Combine(__SOURCE_DIRECTORY__, "repro_#141", "Lexer_fail_option_i.fs")
208 | fsLex ("-i -o " + repro141Fs + " " + repro141Fsl)
209 | fsLex ("--unicode -i -o " + repro141Fs + " " + repro141Fsl)
210 |
--------------------------------------------------------------------------------
/src/FsLex.Core/fslexdriver.fs:
--------------------------------------------------------------------------------
1 | module FsLexYacc.FsLex.Driver
2 |
3 | open FsLexYacc.FsLex.AST
4 | open System
5 | open System.IO
6 | open FSharp.Text.Lexing
7 | open System.Collections.Generic
8 |
9 | type Domain =
10 | | Unicode
11 | | ASCII
12 |
13 | /// Wraps the inputs to the code generator
14 | type GeneratorState =
15 | {
16 | inputFileName: string
17 | outputFileName: string
18 | inputCodePage: System.Text.Encoding
19 | generatedModuleName: string option
20 | disableLightMode: bool option
21 | generateInternalModule: bool
22 | opens: string list
23 | lexerLibraryName: string
24 | domain: Domain
25 | }
26 |
27 | type PerRuleData = list>
28 | type DfaNodes = list
29 |
30 | type Writer(outputFileName, outputFileInterface) =
31 | let os = File.CreateText outputFileName :> TextWriter
32 | let mutable lineCount = 0
33 | let osi = File.CreateText outputFileInterface :> TextWriter
34 | let mutable interfaceLineCount = 0
35 | let incr () = lineCount <- lineCount + 1
36 |
37 | member x.WriteLine fmt =
38 | Printf.kfprintf
39 | (fun () ->
40 | incr ()
41 | os.WriteLine())
42 | os
43 | fmt
44 |
45 | member x.Write fmt = Printf.fprintf os fmt
46 |
47 | member x.WriteCode(code, pos: Position) =
48 | if
49 | pos <> Position.Empty // If bottom code is unspecified, then position is empty.
50 | then
51 | x.WriteLine "# %d \"%s\"" pos.Line pos.FileName
52 | x.WriteLine "%s" code
53 | let numLines = code.Replace("\r", "").Split([| '\n' |]).Length
54 | lineCount <- lineCount + numLines
55 | x.WriteLine "# %d \"%s\"" lineCount outputFileName
56 |
57 | member x.WriteUint16(n: int) =
58 | os.Write n
59 | os.Write "us;"
60 |
61 | member x.LineCount = lineCount
62 |
63 | member x.WriteInterface format = fprintf osi format
64 |
65 | member x.WriteLineInterface format =
66 | Printf.kfprintf
67 | (fun _ ->
68 | interfaceLineCount <- interfaceLineCount + 1
69 | osi.WriteLine())
70 | osi
71 | format
72 |
73 | member x.InterfaceLineCount = interfaceLineCount
74 |
75 | interface IDisposable with
76 | member x.Dispose() =
77 | os.Dispose()
78 | osi.Dispose()
79 |
80 | let sentinel = 255 * 256 + 255
81 |
82 | let readSpecFromFile fileName codePage =
83 | let stream, reader, lexbuf = UnicodeFileAsLexbuf(fileName, codePage)
84 | use stream = stream
85 | use reader = reader
86 |
87 | try
88 | let spec = Parser.spec Lexer.token lexbuf
89 | Ok spec
90 | with e ->
91 | (e, lexbuf.StartPos.Line, lexbuf.StartPos.Column) |> Error
92 |
93 | let writeLightMode lightModeDisabled (fileName: string) (writer: Writer) =
94 | if
95 | lightModeDisabled = Some false
96 | || (lightModeDisabled = None
97 | && (Path.HasExtension(fileName) && Path.GetExtension(fileName) = ".ml"))
98 | then
99 | writer.Write "#light \"off\""
100 |
101 | let writeModuleExpression genModuleName isInternal (writer: Writer) =
102 | match genModuleName with
103 | | None -> ()
104 | | Some s ->
105 | let internal_tag = if isInternal then "internal " else ""
106 | writer.WriteLine "module %s%s" internal_tag s
107 | writer.WriteLineInterface "module %s%s" internal_tag s
108 |
109 | let writeOpens opens (writer: Writer) =
110 | writer.WriteLine ""
111 | writer.WriteLineInterface ""
112 |
113 | for s in opens do
114 | writer.WriteLine "open %s" s
115 | writer.WriteLineInterface "open %s" s
116 |
117 | if not (Seq.isEmpty opens) then
118 | writer.WriteLine ""
119 | writer.WriteLineInterface ""
120 |
121 | let writeTopCode code (writer: Writer) =
122 | writer.WriteCode code
123 |
124 | let moduleAndOpens =
125 | (fst code).Split([| '\n'; '\r' |])
126 | |> Array.filter (fun s ->
127 | s.StartsWith("module ", StringComparison.Ordinal)
128 | || s.StartsWith("open ", StringComparison.Ordinal))
129 | |> String.concat Environment.NewLine
130 |
131 | writer.WriteInterface "%s" moduleAndOpens
132 |
133 | let writeUnicodeTranslationArray dfaNodes domain (writer: Writer) =
134 | let parseContext =
135 | {
136 | unicode =
137 | match domain with
138 | | Unicode -> true
139 | | ASCII -> false
140 | caseInsensitive = false
141 | }
142 |
143 | writer.WriteLine "let trans : uint16[] array = "
144 | writer.WriteLine " [| "
145 |
146 | match domain with
147 | | Unicode ->
148 | let specificUnicodeChars = GetSpecificUnicodeChars()
149 | // This emits a (numLowUnicodeChars+NumUnicodeCategories+(2*#specificUnicodeChars)+1) * #states array of encoded UInt16 values
150 |
151 | // Each row for the Unicode table has format
152 | // 128 entries for ASCII characters
153 | // A variable number of 2*UInt16 entries for SpecificUnicodeChars
154 | // 30 entries, one for each UnicodeCategory
155 | // 1 entry for EOF
156 | //
157 | // Each entry is an encoded UInt16 value indicating the next state to transition to for this input.
158 | //
159 | // For the SpecificUnicodeChars the entries are char/next-state pairs.
160 | for state in dfaNodes do
161 | writer.WriteLine " (* State %d *)" state.Id
162 | writer.Write " [| "
163 |
164 | let trans =
165 | let dict = Dictionary()
166 | state.Transitions |> List.iter dict.Add
167 | dict
168 |
169 | let emit n =
170 | if trans.ContainsKey(n) then
171 | writer.WriteUint16 trans.[n].Id
172 | else
173 | writer.WriteUint16 sentinel
174 |
175 | for i = 0 to numLowUnicodeChars - 1 do
176 | let c = char i
177 | emit (EncodeChar c parseContext)
178 |
179 | for c in specificUnicodeChars do
180 | writer.WriteUint16(int c)
181 | emit (EncodeChar c parseContext)
182 |
183 | for i = 0 to NumUnicodeCategories - 1 do
184 | emit (EncodeUnicodeCategoryIndex i)
185 |
186 | emit Eof
187 | writer.WriteLine "|];"
188 |
189 | | ASCII ->
190 | // Each row for the ASCII table has format
191 | // 256 entries for ASCII characters
192 | // 1 entry for EOF
193 | //
194 | // Each entry is an encoded UInt16 value indicating the next state to transition to for this input.
195 |
196 | // This emits a (256+1) * #states array of encoded UInt16 values
197 | for state in dfaNodes do
198 | writer.WriteLine " (* State %d *)" state.Id
199 | writer.Write " [|"
200 |
201 | let trans =
202 | let dict = Dictionary()
203 | state.Transitions |> List.iter dict.Add
204 | dict
205 |
206 | let emit n =
207 | if trans.ContainsKey(n) then
208 | writer.WriteUint16 trans.[n].Id
209 | else
210 | writer.WriteUint16 sentinel
211 |
212 | for i = 0 to 255 do
213 | let c = char i
214 | emit (EncodeChar c parseContext)
215 |
216 | emit Eof
217 | writer.WriteLine "|];"
218 |
219 | writer.WriteLine " |] "
220 |
221 | let writeUnicodeActionsArray dfaNodes (writer: Writer) =
222 | writer.Write "let actions : uint16[] = [|"
223 |
224 | for state in dfaNodes do
225 | if state.Accepted.Length > 0 then
226 | writer.WriteUint16(snd state.Accepted.Head)
227 | else
228 | writer.WriteUint16 sentinel
229 |
230 | writer.WriteLine "|]"
231 |
232 | let writeUnicodeTables lexerLibraryName domain dfaNodes (writer: Writer) =
233 | writeUnicodeTranslationArray dfaNodes domain writer
234 | writeUnicodeActionsArray dfaNodes writer
235 |
236 | writer.WriteLine
237 | "let _fslex_tables = %s.%sTables.Create(trans,actions)"
238 | lexerLibraryName
239 | (match domain with
240 | | Unicode -> "Unicode"
241 | | ASCII -> "Ascii")
242 |
243 | let writeRules (rules: Rule list) (perRuleData: PerRuleData) outputFileName (writer: Writer) =
244 | writer.WriteLine "let rec _fslex_dummy () = _fslex_dummy() "
245 |
246 | // These actions push the additional start state and come first, because they are then typically inlined into later
247 | // rules. This means more tailcalls are taken as direct branches, increasing efficiency and
248 | // improving stack usage on platforms that do not take tailcalls.
249 | for (startNode, actions), (ident, args, _) in List.zip perRuleData rules do
250 | writer.WriteLine "// Rule %s" ident
251 | writer.WriteLineInterface "/// Rule %s" ident
252 |
253 | let arguments =
254 | args
255 | |> List.map (function
256 | | RuleArgument.Ident ident -> ident
257 | | RuleArgument.Typed(ident, typ) -> sprintf "(%s: %s)" ident typ)
258 | |> String.concat " "
259 |
260 | writer.WriteLine "and %s %s lexbuf =" ident arguments
261 |
262 | let signature =
263 | if List.isEmpty args then
264 | sprintf "val %s: lexbuf: LexBuffer -> token" ident
265 | else
266 | args
267 | |> List.map (function
268 | | RuleArgument.Ident ident ->
269 | // This is not going to lead to a valid signature file, the only workaround is that the caller will specify the type.
270 | sprintf "%s: obj" ident
271 | | RuleArgument.Typed(ident, typ) -> sprintf "%s: %s" ident typ)
272 | |> String.concat " -> "
273 | |> sprintf "val %s: %s -> lexbuf: LexBuffer -> token" ident
274 |
275 | writer.WriteLineInterface "%s" signature
276 |
277 | writer.WriteLine " match _fslex_tables.Interpret(%d,lexbuf) with" startNode.Id
278 |
279 | actions
280 | |> Seq.iteri (fun i (code: string, pos) ->
281 | writer.WriteLine " | %d -> ( " i
282 | writer.WriteLine "# %d \"%s\"" pos.Line pos.FileName
283 | let lines = code.Split([| '\r'; '\n' |], StringSplitOptions.RemoveEmptyEntries)
284 |
285 | for line in lines do
286 | writer.WriteLine " %s" line
287 |
288 | writer.WriteLine "# %d \"%s\"" writer.LineCount outputFileName
289 | writer.WriteLine " )")
290 |
291 | writer.WriteLine " | _ -> failwith \"%s\"" ident
292 |
293 | writer.WriteLine ""
294 |
295 | let writeBottomCode code (writer: Writer) = writer.WriteCode code
296 |
297 | let writeFooter outputFileName (writer: Writer) =
298 | writer.WriteLine "# 3000000 \"%s\"" outputFileName
299 |
300 | let writeSpecToFile (state: GeneratorState) (spec: Spec) (perRuleData: PerRuleData) (dfaNodes: DfaNodes) =
301 | let output, outputi = state.outputFileName, String.Concat(state.outputFileName, "i")
302 | use writer = new Writer(output, outputi)
303 | writeLightMode state.disableLightMode state.outputFileName writer
304 | writeModuleExpression state.generatedModuleName state.generateInternalModule writer
305 | writeOpens state.opens writer
306 | writeTopCode spec.TopCode writer
307 | writeUnicodeTables state.lexerLibraryName state.domain dfaNodes writer
308 | writeRules spec.Rules perRuleData state.outputFileName writer
309 | writeBottomCode spec.BottomCode writer
310 | writeFooter state.outputFileName writer
311 | ()
312 |
--------------------------------------------------------------------------------
/FsLexYacc.sln:
--------------------------------------------------------------------------------
1 | Microsoft Visual Studio Solution File, Format Version 12.00
2 | # Visual Studio Version 16
3 | VisualStudioVersion = 16.0.28803.202
4 | MinimumVisualStudioVersion = 10.0.40219.1
5 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "project", "project", "{BF60BC93-E09B-4E5F-9D85-95A519479D54}"
6 | ProjectSection(SolutionItems) = preProject
7 | build.fsx = build.fsx
8 | nuget\FsLexYacc.template = nuget\FsLexYacc.template
9 | README.md = README.md
10 | RELEASE_NOTES.md = RELEASE_NOTES.md
11 | EndProjectSection
12 | EndProject
13 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{ED8079DD-2B06-4030-9F0F-DC548F98E1C4}"
14 | ProjectSection(SolutionItems) = preProject
15 | tests\fsyacc\OldFsYaccTests.fsx = tests\fsyacc\OldFsYaccTests.fsx
16 | EndProjectSection
17 | EndProject
18 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "JsonLexAndYaccExample", "tests\JsonLexAndYaccExample\JsonLexAndYaccExample.fsproj", "{3A7662D3-A30C-4BD4-BA0A-08A53DC59445}"
19 | EndProject
20 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "LexAndYaccMiniProject", "tests\LexAndYaccMiniProject\LexAndYaccMiniProject.fsproj", "{91D0BE7A-E128-498A-BB68-6ED65A582E04}"
21 | EndProject
22 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "docs", "docs", "{4BB66375-380B-4EBD-9BA6-40CE92EB3D98}"
23 | EndProject
24 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{884C599D-FDE2-4AC3-828A-12F6C662F273}"
25 | EndProject
26 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FsLexYacc.Runtime", "src\FsLexYacc.Runtime\FsLexYacc.Runtime.fsproj", "{31A44BBA-0A6C-48FE-BB45-5BC23190A587}"
27 | EndProject
28 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FsLex.Core", "src\FsLex.Core\FsLex.Core.fsproj", "{52D108CA-B379-4C30-BD85-0AE8E0C5723B}"
29 | EndProject
30 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "fslex", "src\FsLex\fslex.fsproj", "{D64B2492-43AA-4436-B6D5-6CBFE44989DF}"
31 | EndProject
32 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FsYacc.Core", "src\FsYacc.Core\FsYacc.Core.fsproj", "{C73D328C-4247-4F99-81BF-2E274410E9C4}"
33 | EndProject
34 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "fsyacc", "src\FsYacc\fsyacc.fsproj", "{FC9E0584-0139-4D02-8017-29AD01282449}"
35 | EndProject
36 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FsLex.Core.Tests", "tests\FsLex.Core.Tests\FsLex.Core.Tests.fsproj", "{BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}"
37 | EndProject
38 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FsYacc.Core.Tests", "tests\FsYacc.Core.Tests\FsYacc.Core.Tests.fsproj", "{F66C2590-5FDD-4962-9EEB-AD1B74545EAE}"
39 | EndProject
40 | Global
41 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
42 | Debug|Any CPU = Debug|Any CPU
43 | Debug|Mixed Platforms = Debug|Mixed Platforms
44 | Debug|x86 = Debug|x86
45 | Release|Any CPU = Release|Any CPU
46 | Release|Mixed Platforms = Release|Mixed Platforms
47 | Release|x86 = Release|x86
48 | EndGlobalSection
49 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
50 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
51 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Debug|Any CPU.Build.0 = Debug|Any CPU
52 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
53 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
54 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Debug|x86.ActiveCfg = Debug|Any CPU
55 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Debug|x86.Build.0 = Debug|Any CPU
56 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Release|Any CPU.ActiveCfg = Release|Any CPU
57 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Release|Any CPU.Build.0 = Release|Any CPU
58 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
59 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Release|Mixed Platforms.Build.0 = Release|Any CPU
60 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Release|x86.ActiveCfg = Release|Any CPU
61 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445}.Release|x86.Build.0 = Release|Any CPU
62 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
63 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Debug|Any CPU.Build.0 = Debug|Any CPU
64 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
65 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
66 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Debug|x86.ActiveCfg = Debug|Any CPU
67 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Debug|x86.Build.0 = Debug|Any CPU
68 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Release|Any CPU.ActiveCfg = Release|Any CPU
69 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Release|Any CPU.Build.0 = Release|Any CPU
70 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
71 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Release|Mixed Platforms.Build.0 = Release|Any CPU
72 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Release|x86.ActiveCfg = Release|Any CPU
73 | {91D0BE7A-E128-498A-BB68-6ED65A582E04}.Release|x86.Build.0 = Release|Any CPU
74 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
75 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Debug|Any CPU.Build.0 = Debug|Any CPU
76 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
77 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
78 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Debug|x86.ActiveCfg = Debug|Any CPU
79 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Debug|x86.Build.0 = Debug|Any CPU
80 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Release|Any CPU.ActiveCfg = Release|Any CPU
81 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Release|Any CPU.Build.0 = Release|Any CPU
82 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
83 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Release|Mixed Platforms.Build.0 = Release|Any CPU
84 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Release|x86.ActiveCfg = Release|Any CPU
85 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B}.Release|x86.Build.0 = Release|Any CPU
86 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
87 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Debug|Any CPU.Build.0 = Debug|Any CPU
88 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
89 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
90 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Debug|x86.ActiveCfg = Debug|Any CPU
91 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Debug|x86.Build.0 = Debug|Any CPU
92 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Release|Any CPU.ActiveCfg = Release|Any CPU
93 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Release|Any CPU.Build.0 = Release|Any CPU
94 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
95 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Release|Mixed Platforms.Build.0 = Release|Any CPU
96 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Release|x86.ActiveCfg = Release|Any CPU
97 | {C73D328C-4247-4F99-81BF-2E274410E9C4}.Release|x86.Build.0 = Release|Any CPU
98 | {FC9E0584-0139-4D02-8017-29AD01282449}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
99 | {FC9E0584-0139-4D02-8017-29AD01282449}.Debug|Any CPU.Build.0 = Debug|Any CPU
100 | {FC9E0584-0139-4D02-8017-29AD01282449}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
101 | {FC9E0584-0139-4D02-8017-29AD01282449}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
102 | {FC9E0584-0139-4D02-8017-29AD01282449}.Debug|x86.ActiveCfg = Debug|Any CPU
103 | {FC9E0584-0139-4D02-8017-29AD01282449}.Debug|x86.Build.0 = Debug|Any CPU
104 | {FC9E0584-0139-4D02-8017-29AD01282449}.Release|Any CPU.ActiveCfg = Release|Any CPU
105 | {FC9E0584-0139-4D02-8017-29AD01282449}.Release|Any CPU.Build.0 = Release|Any CPU
106 | {FC9E0584-0139-4D02-8017-29AD01282449}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
107 | {FC9E0584-0139-4D02-8017-29AD01282449}.Release|Mixed Platforms.Build.0 = Release|Any CPU
108 | {FC9E0584-0139-4D02-8017-29AD01282449}.Release|x86.ActiveCfg = Release|Any CPU
109 | {FC9E0584-0139-4D02-8017-29AD01282449}.Release|x86.Build.0 = Release|Any CPU
110 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
111 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Debug|Any CPU.Build.0 = Debug|Any CPU
112 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
113 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
114 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Debug|x86.ActiveCfg = Debug|Any CPU
115 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Debug|x86.Build.0 = Debug|Any CPU
116 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Release|Any CPU.ActiveCfg = Release|Any CPU
117 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Release|Any CPU.Build.0 = Release|Any CPU
118 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
119 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Release|Mixed Platforms.Build.0 = Release|Any CPU
120 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Release|x86.ActiveCfg = Release|Any CPU
121 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF}.Release|x86.Build.0 = Release|Any CPU
122 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
123 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Debug|Any CPU.Build.0 = Debug|Any CPU
124 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
125 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
126 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Debug|x86.ActiveCfg = Debug|Any CPU
127 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Debug|x86.Build.0 = Debug|Any CPU
128 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Release|Any CPU.ActiveCfg = Release|Any CPU
129 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Release|Any CPU.Build.0 = Release|Any CPU
130 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
131 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Release|Mixed Platforms.Build.0 = Release|Any CPU
132 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Release|x86.ActiveCfg = Release|Any CPU
133 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587}.Release|x86.Build.0 = Release|Any CPU
134 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
135 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Debug|Any CPU.Build.0 = Debug|Any CPU
136 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
137 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
138 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Debug|x86.ActiveCfg = Debug|Any CPU
139 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Debug|x86.Build.0 = Debug|Any CPU
140 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Release|Any CPU.ActiveCfg = Release|Any CPU
141 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Release|Any CPU.Build.0 = Release|Any CPU
142 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
143 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Release|Mixed Platforms.Build.0 = Release|Any CPU
144 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Release|x86.ActiveCfg = Release|Any CPU
145 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928}.Release|x86.Build.0 = Release|Any CPU
146 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
147 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Debug|Any CPU.Build.0 = Debug|Any CPU
148 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
149 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
150 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Debug|x86.ActiveCfg = Debug|Any CPU
151 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Debug|x86.Build.0 = Debug|Any CPU
152 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Release|Any CPU.ActiveCfg = Release|Any CPU
153 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Release|Any CPU.Build.0 = Release|Any CPU
154 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
155 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Release|Mixed Platforms.Build.0 = Release|Any CPU
156 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Release|x86.ActiveCfg = Release|Any CPU
157 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE}.Release|x86.Build.0 = Release|Any CPU
158 | EndGlobalSection
159 | GlobalSection(SolutionProperties) = preSolution
160 | HideSolutionNode = FALSE
161 | EndGlobalSection
162 | GlobalSection(NestedProjects) = preSolution
163 | {3A7662D3-A30C-4BD4-BA0A-08A53DC59445} = {ED8079DD-2B06-4030-9F0F-DC548F98E1C4}
164 | {91D0BE7A-E128-498A-BB68-6ED65A582E04} = {ED8079DD-2B06-4030-9F0F-DC548F98E1C4}
165 | {52D108CA-B379-4C30-BD85-0AE8E0C5723B} = {884C599D-FDE2-4AC3-828A-12F6C662F273}
166 | {C73D328C-4247-4F99-81BF-2E274410E9C4} = {884C599D-FDE2-4AC3-828A-12F6C662F273}
167 | {FC9E0584-0139-4D02-8017-29AD01282449} = {884C599D-FDE2-4AC3-828A-12F6C662F273}
168 | {D64B2492-43AA-4436-B6D5-6CBFE44989DF} = {884C599D-FDE2-4AC3-828A-12F6C662F273}
169 | {31A44BBA-0A6C-48FE-BB45-5BC23190A587} = {884C599D-FDE2-4AC3-828A-12F6C662F273}
170 | {BEC28BC7-9F2E-4B2D-948B-F5E0648FB928} = {ED8079DD-2B06-4030-9F0F-DC548F98E1C4}
171 | {F66C2590-5FDD-4962-9EEB-AD1B74545EAE} = {ED8079DD-2B06-4030-9F0F-DC548F98E1C4}
172 | EndGlobalSection
173 | GlobalSection(ExtensibilityGlobals) = postSolution
174 | SolutionGuid = {5F6E586E-166D-4397-A502-18C61E31AA9C}
175 | EndGlobalSection
176 | EndGlobal
177 |
--------------------------------------------------------------------------------
/docs/content/fslex.md:
--------------------------------------------------------------------------------
1 | FsLex Overview
2 | ========
3 |
4 | The `fslex` tool is a lexer generator for byte and Unicode character input.
5 |
6 | Getting Started
7 | ---------------
8 |
9 | Build the tool by cloning this project and running /build.sh or build.cmd
10 |
11 | Add a reference to `FsLexYacc` package via Nuget or paket.
12 |
13 | You can run the lexer directly:
14 | dotnet fslex.dll inputFile -o outputFile
15 |
16 |
17 | Or you can add it to your build project via entries like this:
18 |
19 |
20 | --module Parser
21 |
22 |
23 | --module Lexer --unicode
24 |
25 |
26 |
27 |
28 |
29 | Lexer syntax
30 | ------------
31 |
32 | Define your lexer in the Lexer.fsl file.
33 |
34 | { header }
35 | let ident = regexp ...
36 | rule entrypoint [arg1... argn] =
37 | parse regexp { action }
38 | | ...
39 | | regexp { action }
40 | and entrypoint [arg1… argn] =
41 | parse ...
42 | and ...
43 | { trailer }
44 |
45 | Comments are delimited by (* and *) and line comments // are also supported, as in F#.
46 |
47 | The rule and parse keywords are required.
48 |
49 | The header and trailer sections are arbitrary F# code, which will write to the beginning and end of the output file (Lexer.fs).
50 | Either or both can be omitted. Headers typically include values and functions used in the rule body actions.
51 |
52 | Following the header and before the rules are named regular expressions for use in the rules.
53 |
54 | let ident = regexp …
55 |
56 | Following this declaration, the identifier ident can be used as shorthand for regexp.
57 |
58 | Entry points
59 | ------------
60 |
61 | Entry points are valid F# identifiers. Similarly, the arguments
62 |
63 | arg1... argn
64 |
65 | must be valid identifiers.
66 | Each entry point becomes a function that takes n+1 arguments, the implicit last argument being of type LexBuffer<'a>.
67 | Characters are read from the LexBuffer<'a> argument and matched against the regular expressions provided in the rule, until a prefix of the input matches one of the rules.
68 | The Lexer then evaluates the action and returns it as the result of the function. Rule entry points can be entered recursively.
69 |
70 | If several regular expressions match a prefix of the input the regular expression that matches the longest prefix of the input is selected.
71 | In case of tie, the regular expression that occurs earlier in the rule is selected.
72 |
73 | Rule regular expressions
74 | ------------------------
75 |
76 | ' regular-char | escape-sequence '
77 |
78 | A character constant, with the same syntax as F# character constants. Match the denoted character.
79 |
80 | _
81 |
82 | (underscore) Match any character.
83 |
84 | eof
85 |
86 | Match the end of the lexer input.
87 |
88 | Note: Fslex will not correctly handle regular expressions that contain eof followed by something else.
89 |
90 | "string"
91 |
92 | A string constant, with the same syntax as F# string constants. Match the corresponding sequence of characters.
93 |
94 | [ character-set ]
95 |
96 | Match any single character belonging to the given character set. Valid character sets are: single character constants ' c '; ranges of characters ' c1 ' - ' c2 ' (all characters between c1 and c2, inclusive); and the union of two or more character sets, denoted by concatenation.
97 |
98 | [ ^ character-set ]
99 |
100 | Match any single character not belonging to the given character set.
101 |
102 | regexp1 # regexp2
103 |
104 | (difference of character sets) Regular expressions regexp1 and regexp2 must be character sets defined with […] (or a a single character expression or underscore _). Match the difference of the two specified character sets.
105 |
106 | regexp *
107 |
108 | (repetition) Match the concatenation of zero or more strings that match regexp.
109 |
110 | regexp +
111 |
112 | (strict repetition) Match the concatenation of one or more strings that match regexp.
113 |
114 | regexp ?
115 |
116 | (option) Match the empty string, or a string matching regexp.
117 |
118 | regexp1 | regexp2
119 |
120 | (alternative) Match any string that matches regexp1 or regexp2
121 |
122 | regexp1 regexp2
123 |
124 | (concatenation) Match the concatenation of two strings, the first matching regexp1, the second matching regexp2.
125 |
126 | ( regexp )
127 |
128 | Match the same strings as regexp.
129 |
130 | ident
131 |
132 | Reference the regular expression bound to ident by an earlier let ident = regexp definition.
133 |
134 | Concerning the precedences of operators, # has the highest precedence, followed by *, + and ?, then concatenation, then | (alternation).
135 |
136 | Rule actions
137 | ------------
138 |
139 | The actions are arbitrary F# expressions. Additionally, `lexbuf` is bound to the current lexer buffer.
140 |
141 | Some typical uses for `lexbuf`, in conjunction with the operations on lexer buffers provided by the FSharp.Text.Lexing standard library module, are listed below.
142 |
143 | lexeme lexbuf
144 |
145 | Return the matched string.
146 |
147 | lexbuf.LexemeChar n
148 |
149 | Return the nth character in the matched string. The first character corresponds to n = 0.
150 |
151 | lexbuf.StartPos
152 |
153 | Return the data on the absolute position in the input text of the beginning of the matched string (i.e. the offset of the first character of the matched string) in an object of type Position. The first character read from the input text has offset 0.
154 |
155 | lexbuf.EndPos
156 |
157 | Return the data on absolute position in the input text of the end of the matched string (i.e. the offset of the first character after the matched string) in an object of type Position. The first character read from the input text has offset 0.
158 |
159 | entrypoint [exp1… expn] lexbuf
160 |
161 | (Where entrypoint is the name of another entry point in the same lexer definition.) Recursively call the lexer on the given entry point. Notice that lexbuf is the last argument. Useful for lexing nested comments, for example.
162 |
163 | The Position type
164 | -----------------
165 |
166 | type Position =
167 | { /// The file name for the position
168 | pos_fname: string
169 | /// The line number for the position
170 | pos_lnum: int
171 | /// The absolute offset of the beginning of the line
172 | pos_bol: int
173 | /// The absolute offset of the column for the position
174 | pos_cnum: int }
175 | /// The file name associated with the input stream.
176 | member FileName : string
177 | /// The line number in the input stream, assuming fresh positions have been updated
178 | /// using AsNewLinePos() and by modifying the EndPos property of the LexBuffer.
179 | member Line : int
180 | /// The character number in the input stream
181 | member AbsoluteOffset : int
182 | /// Return absolute offset of the start of the line marked by the position
183 | member StartOfLineAbsoluteOffset : int
184 | /// Return the column number marked by the position, i.e. the difference between the AbsoluteOffset and the StartOfLineAbsoluteOffset
185 | member Column : int
186 | // Given a position just beyond the end of a line, return a position at the start of the next line
187 | member NextLine : Position
188 | /// Given a position at the start of a token of length n, return a position just beyond the end of the token
189 | member EndOfToken: n:int -> Position
190 | /// Gives a position shifted by specified number of characters
191 | member ShiftColumnBy: by:int -> Position
192 |
193 | Sample input
194 | ------------
195 |
196 | This is taken from the `Parsing` sample previously in the F# distribution. See below for information on `newline` and line counting.
197 |
198 | let digit = ['0'-'9']
199 | let whitespace = [' ' '\t' ]
200 | let newline = ('\n' | '\r' '\n')
201 |
202 |
203 | rule token = parse
204 | | whitespace { token lexbuf }
205 | | newline { newline lexbuf; token lexbuf }
206 | | "while" { WHILE }
207 | | "begin" { BEGIN }
208 | | "end" { END }
209 | | "do" { DO }
210 | | "if" { IF }
211 | | "then" { THEN }
212 | | "else" { ELSE }
213 | | "print" { PRINT }
214 | | "decr" { DECR }
215 | | "(" { LPAREN }
216 | | ")" { RPAREN }
217 | | ";" { SEMI }
218 | | ":=" { ASSIGN }
219 | | ['a'-'z']+ { ID(lexeme lexbuf) }
220 | | ['-']?digit+ { INT (Int32.Parse(lexeme lexbuf)) }
221 | | ['-']?digit+('.'digit+)?(['e''E']digit+)? { FLOAT (Double.Parse(lexeme lexbuf)) }
222 | | eof { EOF }
223 |
224 |
225 |
226 | More than one lexer state is permitted - use
227 |
228 | rule state1 =
229 | | "this" { state2 lexbuf }
230 | | ...
231 | and state2 =
232 | | "that" { state1 lexbuf }
233 | | ...
234 |
235 |
236 | States can be passed arguments:
237 |
238 | rule state1 arg1 arg2 = ...
239 | | "this" { state2 (arg1+1) (arg2+2) lexbuf }
240 | | ...
241 | and state2 arg1 arg2 = ...
242 | | ...
243 |
244 |
245 |
246 | **Using a lexer**
247 |
248 | If in the first example above the constructors `INT` etc generate values of type `tok` then the above generates a lexer with a function
249 |
250 | val token : LexBuffer -> tok
251 |
252 | Once you have a lexbuffer you can call the above to generate new tokens. Typically you use some methods from `FSharp.Text.Lexing`
253 | to create lex buffers, either a `LexBuffer` for ASCII lexing, or `LexBuffer` for Unicode lexing.
254 |
255 | Some ways of creating lex buffers are by using:
256 |
257 | LexBuffer<_>.FromChars
258 | LexBuffer<_>.FromFunction
259 | LexBuffer<_>.FromStream
260 | LexBuffer<_>.FromTextReader
261 | LexBuffer<_>.FromBytes
262 |
263 | Within lexing actions the variable `lexbuf` is in scope and you may use properties on the `LexBuffer` type such as:
264 |
265 | lexbuf.Lexeme // get the lexeme as an array of characters or bytes
266 | LexBuffer.LexemeString lexbuf // get the lexeme as a string, for Unicode lexing
267 |
268 | Lexing positions give locations in source files (the relevant type is `FSharp.Text.Lexing.Position`).
269 |
270 | Generated lexers are nearly always used in conjunction with parsers generated by `FsYacc` (also documented on this site). See the Parsed Language starter template.
271 |
272 | Command line options
273 |
274 | fslex
275 | -o : Name the output file.
276 |
277 | --module : Define the F# module name to host the generated parser.
278 |
279 | --internal: Generate an internal module
280 |
281 | --codepage : Assume input lexer specification file is encoded with the given codepage.
282 |
283 | --light: (ignored)
284 |
285 | --light-off: Add #light "off" to the top of the generated file
286 |
287 | --lexlib : Specify the namespace for the implementation of the lexer table interperter (default FSharp.Text.Lexing)
288 |
289 | --unicode: Produce a lexer for use with 16-bit unicode characters.
290 |
291 | --help: display this list of options
292 |
293 | -help: display this list of options
294 |
295 | Positions and line counting in lexers
296 |
297 | Within a lexer lines can in theory be counted simply by incrementing a global variable or a passed line number count:
298 |
299 | rule token line = ...
300 | | "\n" | '\r' '\n' { token (line+1) }
301 | | ...
302 |
303 | However for character positions this is tedious, as it means every action becomes polluted with character counting, as you have to manually attach line numbers to tokens. Also, for error reporting writing service it is useful to have position information associated held as part of the state in the lexbuffer itself.
304 |
305 | Thus F# follows the `OCamlLex` model where the lexer and parser state carry `position` values that record information for the current match (`lex`) and the `l.h.s`/`r.h.s` of the grammar productions (`yacc`).
306 |
307 | The information carried for each position is:
308 |
309 | * a filename
310 | * a current 'absolute' character number
311 | * a placeholder for a user-tracked beginning-of-line marker
312 | * a placeholder for a user-tracked line number count.
313 |
314 | Passing state through lexers
315 | ---------------------------
316 |
317 | It is sometimes under-appreciated that you can pass arguments around between lexer states. For example, in one example we used imperative state to track a line number.
318 |
319 | let current_line = ref 0
320 | let current_char = ref 0
321 | let set_next_line lexbuf = ..
322 |
323 | ...
324 | rule main = parse
325 | | ...
326 | | "//" [^ '\n']* '\n' {
327 | set_next_line lexbuf; main lexbuf
328 | }
329 |
330 |
331 | This sort of imperative code is better replaced by passing arguments:
332 |
333 | rule main line char = parse
334 | | ...
335 | | "//" [^ '\n']* '\n' {
336 | main (line+1) 0 lexbuf
337 | }
338 |
339 | A good example is that when lexing a comment you want to pass through the start-of-comment position so that you can give a good error message if no end-of-comment is found. Or likewise you may want to pass through the number of nested of comments.
340 |
--------------------------------------------------------------------------------