├── .gitattributes ├── .github └── workflows │ └── go.yml ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── cmd └── textmapper │ ├── debug.go │ ├── generate.go │ ├── ls.go │ └── main.go ├── compiler ├── compiler.go ├── compiler_test.go ├── lexer.go ├── options.go ├── resolver.go ├── syntax.go └── testdata │ ├── backtrack.tmerr │ ├── badinput.tmerr │ ├── conflict1.tmerr │ ├── debug.tm │ ├── disabled_syntax.tmerr │ ├── flexmode.tmerr │ ├── greedy.tmerr │ ├── inject.tmerr │ ├── inline_input.tmerr │ ├── lexer.tmerr │ ├── lr0.tmerr │ ├── max_la.tmerr │ ├── model1.tm │ ├── noinput.tmerr │ ├── opts.tmerr │ ├── opts_ok.tmerr │ ├── parser.tmerr │ ├── parser_confl.tmerr │ ├── set.tmerr │ ├── set2.tmerr │ └── templ_input.tmerr ├── gen ├── funcs.go ├── funcs_test.go ├── gen.go ├── gen_test.go ├── post_go.go ├── post_test.go ├── post_ts.go ├── templates.go └── templates │ ├── bison.go.tmpl │ ├── cc_cached.go.tmpl │ ├── cc_lexer_cc.go.tmpl │ ├── cc_lexer_h.go.tmpl │ ├── cc_parser_cc.go.tmpl │ ├── cc_parser_h.go.tmpl │ ├── cc_shared.go.tmpl │ ├── cc_token_codes_inc.go.tmpl │ ├── cc_token_h.go.tmpl │ ├── go_ast.go.tmpl │ ├── go_ast_factory.go.tmpl │ ├── go_ast_parse.go.tmpl │ ├── go_ast_tree.go.tmpl │ ├── go_cached.go.tmpl │ ├── go_lexer.go.tmpl │ ├── go_lexer_tables.go.tmpl │ ├── go_listener.go.tmpl │ ├── go_parser.go.tmpl │ ├── go_parser_tables.go.tmpl │ ├── go_selector.go.tmpl │ ├── go_shared.go.tmpl │ ├── go_stream.go.tmpl │ ├── go_token.go.tmpl │ ├── ts_builder.go.tmpl │ ├── ts_cached.go.tmpl │ ├── ts_common.go.tmpl │ ├── ts_lexer.go.tmpl │ ├── ts_lexer_tables.go.tmpl │ ├── ts_listener.go.tmpl │ ├── ts_parser.go.tmpl │ ├── ts_parser_tables.go.tmpl │ ├── ts_selector.go.tmpl │ ├── ts_shared.go.tmpl │ ├── ts_stream.go.tmpl │ ├── ts_token.go.tmpl │ └── ts_tree.go.tmpl ├── go.mod ├── go.sum ├── grammar ├── debug.go ├── gen.go └── grammar.go ├── lalr ├── compile.go ├── compile_test.go ├── conflict.go ├── debug.go ├── lalr.go ├── lookahead.go ├── lookahead_test.go └── optimize.go ├── lex ├── charset.go ├── charset_test.go ├── compile.go ├── compile_test.go ├── compress.go ├── compress_test.go ├── generator.go ├── lex.go ├── lex_test.go ├── regexp.go ├── regexp_fuzz.go └── regexp_test.go ├── ls ├── server.go └── unimplemented.go ├── parsers ├── js │ ├── ast │ │ ├── ast.go │ │ ├── factory.go │ │ ├── parse.go │ │ └── tree.go │ ├── const.go │ ├── const_test.go │ ├── js.tm │ ├── js.y │ ├── lexer.go │ ├── lexer_impl.go │ ├── lexer_tables.go │ ├── lexer_test.go │ ├── listener.go │ ├── parser.go │ ├── parser_impl.go │ ├── parser_tables.go │ ├── parser_test.go │ ├── selector │ │ └── selector.go │ ├── stream.go │ ├── stream_impl.go │ └── token │ │ └── token.go ├── json │ ├── ast.go │ ├── json.tm │ ├── lexer.go │ ├── lexer_tables.go │ ├── lexer_test.go │ ├── listener.go │ ├── parser.go │ ├── parser_tables.go │ ├── parser_test.go │ └── token │ │ └── token.go ├── parsertest │ ├── parsertest.go │ └── parsertest_test.go ├── simple │ ├── lexer.go │ ├── lexer_tables.go │ ├── lexer_test.go │ ├── listener.go │ ├── parser.go │ ├── parser_tables.go │ ├── simple.tm │ └── token │ │ └── token.go ├── test │ ├── ast │ │ ├── ast.go │ │ └── factory.go │ ├── consts.go │ ├── lexer.go │ ├── lexer_tables.go │ ├── lexer_test.go │ ├── listener.go │ ├── parser.go │ ├── parser_tables.go │ ├── parser_test.go │ ├── selector │ │ └── selector.go │ ├── test.tm │ ├── test.y │ └── token │ │ └── token.go ├── tm │ ├── ast │ │ ├── ast.go │ │ ├── factory.go │ │ ├── parse.go │ │ ├── parser_test.go │ │ ├── tree.go │ │ ├── tree_ext.go │ │ └── tree_test.go │ ├── lexer.go │ ├── lexer_actions.go │ ├── lexer_tables.go │ ├── lexer_test.go │ ├── listener.go │ ├── parser.go │ ├── parser_tables.go │ ├── parser_test.go │ ├── selector │ │ └── selector.go │ ├── stream.go │ ├── textmapper.tm │ ├── tm.y │ ├── token │ │ └── token.go │ └── tokens.go └── walker.go ├── regen.sh ├── shiftdfa ├── shiftdfa.go └── shiftdfa_test.go ├── status ├── status.go └── status_test.go ├── syntax ├── expand.go ├── expand_test.go ├── nullable.go ├── nullable_test.go ├── set.go ├── set_test.go ├── syntax.go ├── syntax_test.go ├── templates.go ├── templates_test.go ├── types.go └── types_test.go ├── testing ├── .bazelignore ├── .bazelrc ├── .bazelversion ├── BUILD ├── MODULE.bazel ├── MODULE.bazel.lock ├── clangd.sh ├── cpp │ ├── json │ │ ├── BUILD │ │ ├── benchmark.cc │ │ ├── json.tm │ │ ├── json_lexer.cc │ │ ├── json_lexer.h │ │ ├── json_parser.cc │ │ ├── json_parser.h │ │ ├── json_token.h │ │ ├── lexer_test.cc │ │ └── parser_test.cc │ ├── json_flex │ │ ├── BUILD │ │ ├── json.tm │ │ ├── json_lexer.cc │ │ ├── json_lexer.h │ │ ├── json_parser.cc │ │ ├── json_parser.h │ │ ├── json_token.h │ │ └── json_token_codes.inc │ └── markup │ │ ├── BUILD │ │ ├── markup.cc │ │ ├── markup.h │ │ └── markup_test.cc ├── gen.sh └── ts │ ├── BUILD │ ├── jest.config.js │ ├── json │ ├── BUILD │ ├── builder.ts │ ├── common.ts │ ├── json.tm │ ├── lexer.test.ts │ ├── lexer.ts │ ├── lexer_tables.ts │ ├── listener.ts │ ├── parser.ts │ ├── parser_tables.ts │ ├── selector.ts │ ├── token.ts │ └── tree.ts │ ├── package.json │ ├── pnpm-lock.yaml │ └── tsconfig.json ├── util ├── container │ ├── bitset.go │ ├── bitset_test.go │ ├── intset.go │ ├── intset_test.go │ ├── slicemap.go │ └── slicemap_test.go ├── debug │ └── debug.go ├── diff │ ├── diff.go │ └── diff_test.go ├── dump │ ├── dump.go │ └── dump_test.go ├── graph │ ├── matrix.go │ ├── matrix_test.go │ ├── path.go │ ├── path_test.go │ ├── tarjan.go │ ├── tarjan_test.go │ ├── transpose.go │ └── transpose_test.go ├── ident │ ├── id.go │ └── id_test.go ├── set │ ├── closure.go │ └── closure_test.go └── sparse │ ├── sparse.go │ └── sparse_test.go └── vscode-ext ├── .vscode └── launch.json ├── CHANGELOG.md ├── LICENSE ├── README.md ├── extension.ts ├── language-configuration.json ├── package-lock.json ├── package.json ├── syntaxes ├── cc.injection.json ├── go.injection.json └── textmapper.tmLanguage.json └── tsconfig.json /.gitattributes: -------------------------------------------------------------------------------- 1 | *.xml text 2 | *.java text 3 | *.cpp text 4 | *.sh eol=lf 5 | *.dsp text 6 | *.dsw text 7 | *.txt text 8 | *.gradle text 9 | errors text 10 | tables text 11 | .project text 12 | *.iml text 13 | *.s eol=lf 14 | *.ltp eol=lf 15 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a golang project 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go 3 | 4 | name: Go 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | pull_request: 10 | branches: [ "main" ] 11 | 12 | jobs: 13 | 14 | build: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - name: Set up Go 20 | uses: actions/setup-go@v4 21 | with: 22 | go-version: '1.23' 23 | 24 | - name: Build 25 | run: go build -v ./... 26 | 27 | - name: Test 28 | run: go test -v ./... 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /testing/bazel-* 3 | /testing/external 4 | /testing/compile_commands.json 5 | /testing/.cache/ 6 | /testing/ts/node_modules/ 7 | /vscode-ext/node_modules/ 8 | /vscode-ext/out/ 9 | .vscode 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | matrix: 2 | include: 3 | - language: go 4 | go: 5 | - 1.x 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2002 - present Evgeny Gryaznov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TextMapper 2 | 3 | [![Build Status](https://github.com/inspirer/textmapper/actions/workflows/go.yml/badge.svg)](https://github.com/inspirer/textmapper/actions/workflows/go.yml) 4 | 5 | ## Introduction 6 | 7 | TextMapper is a tool for language development. It generates bottom-up parsers with complete infrastructure from a high-level, declarative specification. TextMapper spreads the generative approach onto different aspects of language design and tries to generate as much derived, boilerplate code as possible. With a little effort you get an abstract syntax tree, code formatters, and even full-featured editor plug-ins for major IDEs. 8 | 9 | TextMapper takes an annotated context-free grammar and outputs a program able to parse the language defined by the grammar. Generated parsers are deterministic and employ LALR(1) parser tables. In grammar handling aspects it is very similar to Bison, with some additional features on top. If you are familiar with Bison, you won't get lost. TextMapper includes quite a sophisticated scanner generator (Unicode-aware, specified using regular expressions, almost flex/JFlex compatible). 10 | -------------------------------------------------------------------------------- /cmd/textmapper/debug.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "time" 9 | 10 | "github.com/inspirer/textmapper/compiler" 11 | "github.com/inspirer/textmapper/lalr" 12 | "github.com/inspirer/textmapper/status" 13 | ) 14 | 15 | var debugCmd = &command{ 16 | Name: "debug", 17 | Title: "print out automaton statistics and parsing tables in a human-readable format", 18 | Usage: " [flags] [grammars...]", 19 | Help: `By default, Textmapper prints out debug info for all grammars in the current directory. 20 | 21 | Flags:`, 22 | } 23 | 24 | var ( 25 | stats = debugCmd.Flags.Bool("stats", false, "output generated table statistics") 26 | tables = debugCmd.Flags.Bool("tables", false, "dump generated tables in a human-readable format") 27 | ) 28 | 29 | func init() { 30 | debugCmd.Run = debug 31 | } 32 | 33 | func debug(ctx context.Context, files []string) error { 34 | if len(files) == 0 { 35 | var err error 36 | files, err = filepath.Glob("*.tm") 37 | if err != nil { 38 | return err 39 | } 40 | 41 | if len(files) == 0 { 42 | return fmt.Errorf("no .tm files found in the current directory") 43 | } 44 | } 45 | 46 | var s status.Status 47 | for _, path := range files { 48 | err := debugFile(ctx, path) 49 | if err != nil { 50 | s.AddError(err) 51 | } 52 | } 53 | return s.Err() 54 | } 55 | 56 | func debugFile(ctx context.Context, path string) error { 57 | content, err := os.ReadFile(path) 58 | if err != nil { 59 | return err 60 | } 61 | 62 | start := time.Now() 63 | params := compiler.Params{CollectStats: true, DebugTables: *tables, CheckOnly: true /*disables optimizations*/} 64 | g, err := compiler.Compile(ctx, path, string(content), params) 65 | if g == nil { 66 | return err 67 | } 68 | if err != nil { 69 | status.Print(os.Stderr, err) 70 | fmt.Fprintln(os.Stderr) 71 | } 72 | 73 | if *stats { 74 | fmt.Printf("Compiled %v in %v\n", path, time.Since(start)) 75 | } 76 | 77 | if *stats && g.Lexer != nil { 78 | fmt.Println() 79 | fmt.Print(g.Lexer.TableStats()) 80 | } 81 | 82 | if *stats && g.Parser != nil && g.Parser.Tables != nil { 83 | fmt.Print(g.Parser.TableStats()) 84 | 85 | start = time.Now() 86 | newEnc := lalr.Optimize(g.Parser.Tables.DefaultEnc, g.NumTokens, len(g.Parser.Tables.RuleLen), g.Options.DefaultReduce) 87 | fmt.Printf("Optimized tables in %v\n", time.Since(start)) 88 | 89 | fmt.Print(newEnc.TableStats()) 90 | } 91 | 92 | if g.Parser != nil && g.Parser.Tables != nil { 93 | for _, info := range g.Parser.Tables.DebugInfo { 94 | fmt.Println(info) 95 | } 96 | } 97 | 98 | return nil 99 | } 100 | -------------------------------------------------------------------------------- /cmd/textmapper/ls.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "os" 7 | 8 | "github.com/inspirer/textmapper/ls" 9 | "go.lsp.dev/jsonrpc2" 10 | lsp "go.lsp.dev/protocol" 11 | "go.uber.org/zap" 12 | ) 13 | 14 | var lsCmd = &command{ 15 | Name: "ls", 16 | Title: "start a language server", 17 | Help: `This command starts a language server communicating with Visual Studio 18 | Code (or similar editors) via stdin/stdout and providing coding 19 | assistance for Textmapper grammars. 20 | `, 21 | } 22 | 23 | func init() { 24 | lsCmd.Run = startLS 25 | } 26 | 27 | func startLS(ctx context.Context, files []string) error { 28 | logger, err := zap.NewDevelopment() 29 | if err != nil { 30 | log.Fatalln(err.Error()) 31 | } 32 | logger.Info("textmapper LS is starting ..") 33 | 34 | server := ls.NewServer(logger) 35 | 36 | conn := jsonrpc2.NewConn(jsonrpc2.NewStream(transport{logger})) 37 | client := lsp.ClientDispatcher(conn, logger.Named("client")) 38 | ctx = lsp.WithClient(ctx, client) 39 | 40 | server.SetClient(client) 41 | 42 | conn.Go(ctx, 43 | lsp.Handlers( 44 | lsp.ServerHandler(server, jsonrpc2.MethodNotFoundHandler), 45 | ), 46 | ) 47 | 48 | logger.Info("listening ..") 49 | <-conn.Done() 50 | logger.Info("done") 51 | return nil 52 | } 53 | 54 | type transport struct { 55 | *zap.Logger 56 | } 57 | 58 | func (transport) Read(p []byte) (int, error) { 59 | return os.Stdin.Read(p) 60 | } 61 | 62 | func (transport) Write(p []byte) (int, error) { 63 | return os.Stdout.Write(p) 64 | } 65 | 66 | func (t transport) Close() error { 67 | t.Logger.Info("closing connections ..") 68 | if err := os.Stdin.Close(); err != nil { 69 | t.Logger.Sugar().Errorf("cannot close stdin: %v", err) 70 | return err 71 | } 72 | if err := os.Stdout.Close(); err != nil { 73 | t.Logger.Sugar().Errorf("cannot close stdout: %v", err) 74 | return err 75 | } 76 | return nil 77 | } 78 | -------------------------------------------------------------------------------- /cmd/textmapper/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "flag" 7 | "fmt" 8 | "log" 9 | "os" 10 | "text/template" 11 | 12 | "github.com/inspirer/textmapper/status" 13 | ) 14 | 15 | const ( 16 | version = `0.10.1` 17 | usageTemplate = `Textmapper is an LALR parser and lexer generator. 18 | 19 | Usage: 20 | textmapper [command] [flags] [arguments] 21 | 22 | Commands: 23 | {{- range .}}{{if ne .Name "help"}} 24 | {{.Name | printf "%-10s"}} {{.Title}}{{end}}{{end}} 25 | 26 | Use "textmapper help [command]" for more information about a command. 27 | 28 | ` 29 | ) 30 | 31 | var commands = []*command{ 32 | genCmd, 33 | debugCmd, 34 | versionCmd, 35 | helpCmd, 36 | lsCmd, 37 | } 38 | 39 | func main() { 40 | flag.Usage = usage 41 | flag.Parse() 42 | 43 | ctx := context.Background() 44 | args := flag.Args() 45 | if len(args) == 0 { 46 | flag.Usage() 47 | os.Exit(2) 48 | } 49 | 50 | cmd := findCommand(args[0]) 51 | if cmd == nil { 52 | fmt.Fprintf(os.Stderr, "textmapper: unknown command %q\n", args[0]) 53 | fmt.Fprint(os.Stderr, "Run 'textmapper help' for usage.\n") 54 | os.Exit(2) 55 | } 56 | 57 | cmd.Flags.Usage = func() { 58 | cmd.usage() 59 | os.Exit(2) 60 | } 61 | cmd.Flags.Parse(args[1:]) 62 | args = cmd.Flags.Args() 63 | err := cmd.Run(ctx, args) 64 | if err != nil { 65 | status.Print(os.Stderr, err) 66 | os.Exit(2) 67 | } 68 | } 69 | 70 | func usage() { 71 | var buf bytes.Buffer 72 | t := template.Must(template.New("main").Parse(usageTemplate)) 73 | if err := t.Execute(&buf, commands); err != nil { 74 | log.Fatal(err) 75 | } 76 | 77 | fmt.Fprint(os.Stderr, buf.String()) 78 | } 79 | 80 | var versionCmd = &command{ 81 | Name: "version", 82 | Title: "print the Textmapper version", 83 | Help: `This command prints the Textmapper version.`, 84 | Run: func(ctx context.Context, args []string) error { 85 | fmt.Fprintf(os.Stderr, "textmapper ver %v\n", version) 86 | return nil 87 | }, 88 | } 89 | 90 | var helpCmd = &command{ 91 | Name: "help", 92 | Title: "prints help", 93 | Usage: " [command]", 94 | Help: `Outputs command-specific flags and usage help.`, 95 | } 96 | 97 | func init() { 98 | helpCmd.Run = help 99 | } 100 | 101 | func help(ctx context.Context, args []string) error { 102 | if len(args) == 0 { 103 | flag.Usage() 104 | return nil 105 | } 106 | 107 | cmd := findCommand(args[0]) 108 | if cmd == nil { 109 | return fmt.Errorf("unknown command %q. Run 'textmapper help'", args[0]) 110 | } 111 | 112 | cmd.usage() 113 | return nil 114 | } 115 | 116 | type command struct { 117 | Name string 118 | Title string 119 | Usage string 120 | Help string 121 | Run func(ctx context.Context, args []string) error 122 | Flags flag.FlagSet 123 | } 124 | 125 | func (c *command) usage() { 126 | fmt.Fprintf(os.Stderr, "Usage:\n textmapper %s%s\n\n%s\n", c.Name, c.Usage, c.Help) 127 | c.Flags.PrintDefaults() 128 | } 129 | 130 | func findCommand(name string) *command { 131 | for _, cmd := range commands { 132 | if cmd.Name == name { 133 | return cmd 134 | } 135 | } 136 | return nil 137 | } 138 | -------------------------------------------------------------------------------- /compiler/resolver.go: -------------------------------------------------------------------------------- 1 | package compiler 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/inspirer/textmapper/grammar" 7 | "github.com/inspirer/textmapper/parsers/tm/ast" 8 | "github.com/inspirer/textmapper/status" 9 | "github.com/inspirer/textmapper/syntax" 10 | "github.com/inspirer/textmapper/util/ident" 11 | ) 12 | 13 | type resolver struct { 14 | *status.Status 15 | Syms []grammar.Symbol 16 | NumTokens int 17 | 18 | syms map[string]int 19 | ids map[string]string // ID -> name 20 | tokID map[string]string // ensures unique token IDs consistency 21 | } 22 | 23 | func newResolver(s *status.Status) *resolver { 24 | return &resolver{ 25 | Status: s, 26 | syms: make(map[string]int), 27 | ids: make(map[string]string), 28 | tokID: make(map[string]string), 29 | } 30 | } 31 | 32 | func (c *resolver) addToken(name, id string, t ast.RawType, space ast.LexemeAttribute, n status.SourceNode) int { 33 | var rawType string 34 | if t.IsValid() { 35 | rawType = strings.TrimSuffix(strings.TrimPrefix(t.Text(), "{"), "}") 36 | } 37 | if i, exists := c.syms[name]; exists { 38 | sym := c.Syms[i] 39 | if sym.Type != rawType { 40 | anchor := n 41 | if t.IsValid() { 42 | anchor = t 43 | } 44 | c.Errorf(anchor, "terminal type redeclaration for %v, was %v", name, sym.PrettyType()) 45 | } 46 | if sym.Space != space.IsValid() { 47 | anchor := n 48 | if space.IsValid() { 49 | anchor = space 50 | } 51 | c.Errorf(anchor, "%v is declared as both a space and non-space terminal", name) 52 | } 53 | if prev := c.tokID[name]; prev != id { 54 | c.Errorf(n, "%v is redeclared with a different ID (%q vs %q)", name, prev, id) 55 | } 56 | return sym.Index 57 | } 58 | c.tokID[name] = id 59 | if id == "" { 60 | id = ident.Produce(name, ident.UpperCase) 61 | } 62 | if prev, exists := c.ids[id]; exists { 63 | c.Errorf(n, "%v and %v get the same ID in generated code", name, prev) 64 | } 65 | 66 | sym := grammar.Symbol{ 67 | Index: len(c.Syms), 68 | ID: id, 69 | Name: name, 70 | Type: rawType, 71 | Space: space.IsValid(), 72 | Origin: n, 73 | } 74 | c.syms[name] = sym.Index 75 | c.ids[id] = name 76 | c.Syms = append(c.Syms, sym) 77 | c.NumTokens++ 78 | return sym.Index 79 | } 80 | 81 | func (c *resolver) addNonterms(m *syntax.Model) { 82 | // TODO error is also nullable - make it so! 83 | nullable := syntax.Nullable(m) 84 | nonterms := m.Nonterms 85 | 86 | for _, nt := range nonterms { 87 | name := nt.Name 88 | if _, ok := c.syms[name]; ok { 89 | // TODO come up with a better error message 90 | c.Errorf(nt.Origin, "duplicate name %v", name) 91 | } 92 | id := ident.Produce(name, ident.CamelCase) 93 | if prev, exists := c.ids[id]; exists { 94 | c.Errorf(nt.Origin, "%v and %v get the same ID in generated code", name, prev) 95 | } 96 | index := len(c.Syms) 97 | sym := grammar.Symbol{ 98 | Index: index, 99 | ID: id, 100 | Name: name, 101 | Type: nt.Type, 102 | CanBeNull: nullable.Get(index), 103 | Origin: nt.Origin, 104 | } 105 | c.syms[name] = sym.Index 106 | c.ids[id] = name 107 | c.Syms = append(c.Syms, sym) 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /compiler/testdata/backtrack.tmerr: -------------------------------------------------------------------------------- 1 | language backtrack(go); 2 | 3 | nonBacktracking = true 4 | 5 | :: lexer 6 | 7 | commentChars = /([^*]|\*+[^*\/])*\**/ 8 | MultiLineComment: /\/\*{commentChars}\*\// (space) 9 | 10 | # err: Needs backtracking since the following state(s) are prefixes of valid tokens but are not valid tokens themselves: 11 | # err: MultiLineComment: /\/\*{commentChars}\*\// -> /([^*]|\*+[^*\/])*\**/ 12 | # err: MultiLineComment: /\/\*{commentChars}\*\// -> /([^*]|\*+[^*\/])*\**/ 13 | # err: MultiLineComment: /\/\*{commentChars}\*\// -> /([^*]|\*+[^*\/])*\**/ 14 | # err: MultiLineComment: /\/\*{commentChars}\*\// 15 | # err: Consider removing 'nonBacktracking = true' or reporting these states as 'invalid_token' via separate lexer rules. 16 | «'/': /\//» 17 | 18 | 19 | # err: Needs backtracking since the following state(s) are prefixes of valid tokens but are not valid tokens themselves: 20 | # err: '...': /\.\.\./ 21 | # err: Consider removing 'nonBacktracking = true' or reporting these states as 'invalid_token' via separate lexer rules. 22 | «'.': /\./» 23 | '...': /\.\.\./ 24 | 25 | 26 | # err: Needs backtracking since the following state(s) are prefixes of valid tokens but are not valid tokens themselves: 27 | # err: backtrackingToken: /test(foo)?-+>/ 28 | # err: Consider removing 'nonBacktracking = true' or reporting these states as 'invalid_token' via separate lexer rules. 29 | # err: Needs backtracking since the following state(s) are prefixes of valid tokens but are not valid tokens themselves: 30 | # err: backtrackingToken: /test(foo)?-+>/ 31 | # err: backtrackingToken: /test(foo)?-+>/ 32 | # err: Consider removing 'nonBacktracking = true' or reporting these states as 'invalid_token' via separate lexer rules. 33 | ««'test': /test/»» 34 | backtrackingToken: /test(foo)?-+>/ 35 | 36 | # The following resolution with an invalid token is OK. 37 | 38 | hex = /[0-9a-fA-F]/ 39 | esc = /u{hex}{4}/ 40 | idChar = /[a-zA-Z]|\\{esc}/ 41 | 42 | Identifier: /#@_{idChar}+/ (class) 43 | invalid_token: /#@_{idChar}*\\(u{hex}{0,3})?/ 44 | 45 | sharpFoo: /#@_foo/ -------------------------------------------------------------------------------- /compiler/testdata/badinput.tmerr: -------------------------------------------------------------------------------- 1 | «language parser(go);» 2 | # err: the grammar does not specify an input nonterminal, use '%input' to declare one 3 | 4 | :: lexer 5 | 6 | a: /a/ 7 | 8 | :: parser 9 | 10 | %flag foo; 11 | 12 | «input»: a; 13 | # err: the 'input' nonterminal cannot be parametrized -------------------------------------------------------------------------------- /compiler/testdata/conflict1.tmerr: -------------------------------------------------------------------------------- 1 | «language conflict1(go); 2 | # err: conflicts: 7 shift/reduce and 1 reduce/reduce 3 | 4 | lang = "conflict1" 5 | 6 | :: lexer 7 | 8 | Identifier: /[a-zA-Z_]+/ (class) 9 | 10 | # Keywords. 11 | 'a': /a/ 12 | 'b': /b/ 13 | 'c': /c/ 14 | 15 | '+': /\+/ 16 | '-': /\-/ 17 | 18 | :: parser 19 | 20 | input: 21 | 'a' 22 | | 'b' (f | p) 23 | | 'a' e e 24 | ; 25 | 26 | e : 27 | «'c'+» 28 | # err: input: 'a' ChaRC_list 29 | # err: shift/reduce conflict (next: 'c') 30 | # err: e : ChaRC_list 31 | # err: 32 | ; 33 | 34 | f : 35 | Identifier 36 | | «««f '+' f»»» 37 | # err: input: 'b' f '+' f 38 | # err: reduce/reduce conflict (next: eoi) 39 | # err: p : f '+' f 40 | # err: f : f '+' f 41 | # err: 42 | # err: input: 'b' f '+' f 43 | # err: shift/reduce conflict (next: '+', '-') 44 | # err: f : f '+' f 45 | # err: 46 | # err: input: 'b' f '+' f '+' f 47 | # err: shift/reduce conflict (next: '+', '-') 48 | # err: f : f '+' f 49 | # err: 50 | | «f '-' f» 51 | # err: input: 'b' f '-' f 52 | # err: shift/reduce conflict (next: '+', '-') 53 | # err: f : f '-' f 54 | # err: 55 | ; 56 | 57 | p : 58 | «f '+' f» 59 | # err: input: 'b' f '+' f 60 | # err: reduce/reduce conflict (next: eoi) 61 | # err: p : f '+' f 62 | # err: f : f '+' f 63 | # err: 64 | ;» -------------------------------------------------------------------------------- /compiler/testdata/disabled_syntax.tmerr: -------------------------------------------------------------------------------- 1 | language parser(go); 2 | 3 | disableSyntax = ["Lookahead", "Arrow", "Templates", "NestedChoice"] 4 | 5 | :: lexer 6 | 7 | a: /a/ 8 | b: /b/ 9 | c: /c/ 10 | d: /d/ 11 | 12 | :: parser 13 | 14 | input: A1 B1 C1 D1 E1 F1; 15 | 16 | A1: «(?= laA)» a; 17 | # err: syntax Lookahead is not supported 18 | 19 | laA: a b c d; 20 | 21 | B1: «(?= laB)» b; 22 | # err: syntax Lookahead is not supported 23 | 24 | laB: a b d; 25 | 26 | C1: «(?= laC)» c; 27 | # err: syntax Lookahead is not supported 28 | 29 | laC: laA | laB ; 30 | 31 | # Note: reusing laC again. 32 | 33 | D1: «(?= laC)» d; 34 | # err: syntax Lookahead is not supported 35 | 36 | E1: «(?= laE)» d; 37 | # err: syntax Lookahead is not supported 38 | 39 | laE: a+ b; 40 | 41 | F1: «(?= laF)» d; 42 | # err: syntax Lookahead is not supported 43 | 44 | laF «-> Thing»: laE b; 45 | # err: syntax Arrow is not supported 46 | 47 | %flag T; 48 | 49 | «g»: F1; 50 | # err: templates are not supported 51 | 52 | h : F1 | (F1 F1) | «(F1 | F1 F1)»; 53 | # err: parenthesized Choice operator is not supported 54 | -------------------------------------------------------------------------------- /compiler/testdata/flexmode.tmerr: -------------------------------------------------------------------------------- 1 | language parser(cc); 2 | 3 | flexMode = true 4 | 5 | :: lexer 6 | 7 | foo {foo}: 8 | comment: (space) 9 | 10 | ',': /,/ 11 | 12 | «foo»: 13 | # err: redeclaration of 'foo' 14 | 15 | abc: «/[a-z]+/» («class») 16 | # err: only individual ASCII characters are allowed as patterns in flex mode 17 | # err: unsupported attribute (flex mode) 18 | 19 | # \n is a control character, so it is not allowed in flex mode 20 | 21 | '\n': «/\n/» 22 | # err: only individual ASCII characters are allowed as patterns in flex mode 23 | 24 | 'n': /n/ «-1» 25 | # err: priorities are not supported in flex mode 26 | 27 | 'm': /m/ «{ abc }» 28 | # err: commands are not supported in flex mode 29 | 30 | «%x a;» 31 | # err: start conditions are not supported in flex mode 32 | 33 | «» f: /f/ 34 | # err: start conditions are not supported in flex mode 35 | 36 | « { 37 | bar: /b/ 38 | }» 39 | # err: start conditions are not supported in flex mode 40 | 41 | :: parser 42 | 43 | %input a; 44 | 45 | a: ','; 46 | 47 | 48 | empty: «»; 49 | # err: empty alternative without an %empty marker 50 | 51 | empty2: ( «»| a); 52 | # err: empty alternative without an %empty marker 53 | 54 | good_rule: %empty; 55 | 56 | hidden_empty: 57 | «»| abc 58 | # err: empty alternative without an %empty marker 59 | | f 60 | ; 61 | 62 | double_empty: 63 | %empty «%empty» ; 64 | # err: duplicate %empty marker 65 | 66 | inner_double_empty: 67 | (%empty «%empty») ; 68 | # err: duplicate %empty marker 69 | 70 | double_prec: 71 | %prec 'n' «%prec 'm'» a ; 72 | # err: duplicate %prec marker 73 | 74 | inner_prec: ( ««%prec 'm'»»| a); 75 | # err: empty alternative without an %empty marker 76 | # err: precedence markers are only allowed in the top level rules 77 | -------------------------------------------------------------------------------- /compiler/testdata/greedy.tmerr: -------------------------------------------------------------------------------- 1 | «language greedy(go); 2 | # err: conflicts: 0 shift/reduce and 1 reduce/reduce 3 | 4 | :: lexer 5 | 6 | a: /a/ 7 | b: /b/ 8 | c: /c/ 9 | 10 | '+': /+/ 11 | 12 | :: parser 13 | 14 | input : N c M c O ; 15 | 16 | N : 17 | a 18 | | a '+' N 19 | | a '+' .greedy a '+' N # all ok 20 | ; 21 | 22 | # M is identical to N but without the .greedy marker. 23 | 24 | M : 25 | a 26 | | «a '+' M» 27 | # err: input: N c a '+' a '+' M 28 | # err: reduce/reduce conflict (next: c) 29 | # err: M : a '+' a '+' M 30 | # err: M : a '+' M 31 | # err: 32 | | «a '+' a '+' M» 33 | # err: input: N c a '+' a '+' M 34 | # err: reduce/reduce conflict (next: c) 35 | # err: M : a '+' a '+' M 36 | # err: M : a '+' M 37 | # err: 38 | ; 39 | 40 | O : 41 | elem+ ; 42 | 43 | elem: 44 | a | a .greedy b | b ; # prefer shift over reduce 45 | » -------------------------------------------------------------------------------- /compiler/testdata/inject.tmerr: -------------------------------------------------------------------------------- 1 | language parser(go); 2 | 3 | :: lexer 4 | 5 | a: /a/ 6 | b: /b/ 7 | 8 | :: parser 9 | 10 | %input input, «foo»; 11 | # err: input nonterminals cannot have an 'inline' property 12 | 13 | input : d; 14 | inline foo : a; 15 | 16 | d {Foo} -> Foo : /*empty*/ ; 17 | 18 | # err: selector clauses (Aaa) cannot be used with injected terminals 19 | «%inject a -> Aaa/Foo;» 20 | 21 | # err: second %inject directive for 'a' 22 | %inject «a» -> Bar; 23 | 24 | # err: reporting terminals 'as' some category is not supported 25 | %inject b -> Bar «as Foo»; 26 | 27 | # err: unresolved reference 'c' 28 | %inject «c» -> Caa; 29 | 30 | %interface Aaa; 31 | -------------------------------------------------------------------------------- /compiler/testdata/inline_input.tmerr: -------------------------------------------------------------------------------- 1 | «language parser(go);» 2 | # err: the grammar does not specify an input nonterminal, use '%input' to declare one 3 | 4 | :: lexer 5 | 6 | a: /a/ 7 | 8 | :: parser 9 | 10 | inline «input»: a; 11 | # err: the 'input' nonterminal cannot have an 'inline' property -------------------------------------------------------------------------------- /compiler/testdata/lexer.tmerr: -------------------------------------------------------------------------------- 1 | language lexer(go); 2 | 3 | :: lexer 4 | 5 | %s initial; 6 | # err: redeclaration of 'initial' 7 | %x «initial»; 8 | 9 | idChar = /[a-zA-Z_]/ 10 | # err: redeclaration of 'idChar' 11 | «idChar» = /[a-zA-Z_0-9]/ 12 | 13 | # err: unused pattern 'unusedP' 14 | «unusedP» = /useless/ 15 | 16 | id: /{idChar}+/ 17 | scon: /"([^\n\\"]|\\.)*"/ 18 | # err: terminal type redeclaration for id, was 19 | id «{fff}»: /'[a-z]+'/ 20 | 21 | whitespace: /[\n\r\t ]+/ (space) 22 | 23 | # err: syntax error 24 | «%brackets '(' ')';» 25 | 26 | '(': /\(/ 27 | ')': /\)/ 28 | 29 | # err: broken regexp: invalid escape sequence 30 | broken_regex: /asd«\0a»bc/ 31 | # err: broken regexp: unexpected closing parenthesis 32 | broken_regex: /asd«)»/ 33 | # err: broken regexp: missing closing parenthesis 34 | broken_regex: «/(asd/» 35 | 36 | <*> { 37 | «%s foo;» 38 | # err: syntax error 39 | } 40 | 41 | # err: unresolved reference initia 42 | <«initia»> q: /qqq/ 43 | 44 | %x off1; 45 | 46 | a1: /%[a-z]+/ (class) 47 | # err: a2 must be applicable in the same set of start conditions as a1 48 | « a2: /%abc/» 49 | # OK 50 | <*> a3: /%def/ 51 | # OK: order does not matter 52 | a4: /%xyz/ 53 | 54 | # err: two rules are identical: conflict_with_id and id 55 | «conflict_with_id: /ppp/» 56 | no_id_conflict: /ppp/ 57 | 58 | # err: two rules are identical: ppp and no_id_conflict 59 | « ppp: /ppp/» 60 | 61 | # err: class rule without specializations 'noSpecializations' 62 | «noSpecializations: /567/ (class)» 63 | 64 | rrr-ppp: /\$rrrppp/ 65 | # err: rrr--ppp and rrr-ppp get the same ID in generated code 66 | «rrr--ppp»: /\$rrrppp2/ 67 | -------------------------------------------------------------------------------- /compiler/testdata/lr0.tmerr: -------------------------------------------------------------------------------- 1 | language lr0(go); 2 | 3 | :: lexer 4 | 5 | a: /a/ 6 | b: /b/ 7 | c: /c/ 8 | 9 | :: parser lalr(«16») 10 | # err: lookahead value of 16 is out of the [1, 8] range 11 | 12 | input : N; 13 | 14 | N : 15 | a .lr0 b a 16 | | «a .lr0 c .lr0» 17 | # err: Found an lr0 marker inside a non-LR0 state (4) 18 | | a .lr0 c a 19 | ; 20 | -------------------------------------------------------------------------------- /compiler/testdata/max_la.tmerr: -------------------------------------------------------------------------------- 1 | language parser(go); 2 | 3 | maxLookahead = 3 4 | 5 | :: lexer 6 | 7 | a: /a/ 8 | b: /b/ 9 | c: /c/ 10 | d: /d/ 11 | 12 | :: parser 13 | 14 | input: A1 B1 C1 D1 E1 F1; 15 | 16 | A1: (?= «laA») a; 17 | # err: lookahead for laA is too long (4 tokens) 18 | 19 | laA: a b c d; 20 | 21 | B1: (?= laB) b; 22 | 23 | laB: a b d; 24 | 25 | C1: (?= «laC») c; 26 | # err: lookahead for laC is too long (4 tokens) 27 | 28 | laC: laA | laB ; 29 | 30 | # Note: reusing laC again. 31 | 32 | D1: (?= «laC») d; 33 | # err: lookahead for laC is too long (4 tokens) 34 | 35 | E1: (?= «laE») d; 36 | # err: lookahead for laE is unbounded 37 | 38 | laE: a+ b; 39 | 40 | F1: (?= «laF») d; 41 | # err: lookahead for laF is unbounded 42 | 43 | laF: laE b; 44 | -------------------------------------------------------------------------------- /compiler/testdata/noinput.tmerr: -------------------------------------------------------------------------------- 1 | «language parser(go);» 2 | # err: the grammar does not specify an input nonterminal, use '%input' to declare one 3 | 4 | :: lexer 5 | 6 | a: /a/ 7 | 8 | f (foo): /foo/ 9 | «f»: /f/ 10 | # err: f is redeclared with a different ID ("FOO" vs "") 11 | 12 | :: parser 13 | 14 | «A»: a; 15 | # err: A and a get the same ID in generated code -------------------------------------------------------------------------------- /compiler/testdata/opts.tmerr: -------------------------------------------------------------------------------- 1 | language lexer(go); 2 | 3 | 4 | # err: unknown option 'a' 5 | «a» = true 6 | 7 | # err: string is expected 8 | package = «true» 9 | 10 | # err: bool is expected 11 | eventBased = «"foo"» 12 | 13 | # err: 123 is not a valid identifier 14 | # err: % is not a valid identifier 15 | extraTypes = ["abc", "eoi", «" 123 -> _tty -> % "», «" a123 -> _tty -> % "»] 16 | 17 | # err: reinitialization of 'extraTypes', previously declared on line 15 18 | # err: list of strings with names is expected. E.g. ["Foo", "Bar -> Expr"] 19 | «extraTypes» = «123» 20 | 21 | # err: reinitialization of 'extraTypes', previously declared on line 19 22 | # err: cannot parse string literal: invalid syntax 23 | «extraTypes» = [«"\x"»] 24 | 25 | # err: cannot parse string literal: invalid syntax 26 | fileNode = «"\xTT"» 27 | 28 | lang = "go" 29 | debugParser = true 30 | 31 | :: lexer 32 | 33 | eoi: -------------------------------------------------------------------------------- /compiler/testdata/opts_ok.tmerr: -------------------------------------------------------------------------------- 1 | language lexer(go); 2 | 3 | 4 | # Options are case-sensitive. 5 | # err: unknown option 'ExtraTypes' 6 | «ExtraTypes» = "" 7 | 8 | # The list of all options. 9 | package = "github.com/example" 10 | genCopyright = true 11 | tokenLine = false 12 | tokenLineOffset = false 13 | cancellable = true 14 | writeBison = true 15 | recursiveLookaheads = true 16 | eventBased = true 17 | eventFields = true 18 | eventAST = true 19 | fileNode = "File" 20 | extraTypes = ["a", "b"] 21 | 22 | :: lexer 23 | 24 | eoi: 25 | foo: /foo/ 26 | -------------------------------------------------------------------------------- /compiler/testdata/parser_confl.tmerr: -------------------------------------------------------------------------------- 1 | language parser(go); 2 | 3 | eventBased = true 4 | 5 | :: lexer 6 | 7 | a: /a/ 8 | b: /b/ 9 | 10 | :: parser 11 | 12 | input : d | q; 13 | 14 | d {Foo} -> Foo : /*empty*/ ; 15 | 16 | «d_» : /*empty*/ ; 17 | # err: d_ and d get the same ID in generated code 18 | 19 | q : «a (/*empty*/ -> Abc)» ; 20 | # err: reporting empty ranges at the end of a rule is not allowed 21 | -------------------------------------------------------------------------------- /compiler/testdata/set.tmerr: -------------------------------------------------------------------------------- 1 | language parser(go); 2 | 3 | :: lexer 4 | 5 | a: /a/ 6 | b: /b/ 7 | 8 | :: parser 9 | 10 | input : d e; 11 | 12 | d : a set(«~d») b ; 13 | # err: set complement cannot transitively depend on itself 14 | 15 | e : set(f) ; 16 | f : set(«~e») ; 17 | # err: set complement cannot transitively depend on itself 18 | -------------------------------------------------------------------------------- /compiler/testdata/set2.tmerr: -------------------------------------------------------------------------------- 1 | language parser(go); 2 | 3 | :: lexer 4 | 5 | a: /a/ 6 | b: /b/ 7 | 8 | :: parser 9 | 10 | %generate «afterErr» = set(follow input); 11 | # err: 'afterErr' is reserved for built-in error recovery 12 | 13 | input : d p<+f>; 14 | 15 | d : a b ; 16 | 17 | %flag f; 18 | 19 | q: 20 | | [f] a 21 | | [!f] b 22 | ; 23 | 24 | # Note: sets don't inherit their flags from the containing nonterminal. 25 | 26 | p: set(«q»); 27 | # err: uninitialized parameters: f -------------------------------------------------------------------------------- /compiler/testdata/templ_input.tmerr: -------------------------------------------------------------------------------- 1 | «language parser(go);» 2 | # err: the grammar does not specify an input nonterminal, use '%input' to declare one 3 | 4 | :: lexer 5 | 6 | a: /a/ 7 | b: /b/ 8 | 9 | :: parser 10 | 11 | «input» : d; 12 | # err: the 'input' nonterminal cannot be parametrized 13 | 14 | d {Foo} -> Foo : /*empty*/ ; 15 | -------------------------------------------------------------------------------- /gen/gen_test.go: -------------------------------------------------------------------------------- 1 | package gen_test 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | 9 | "github.com/inspirer/textmapper/gen" 10 | "github.com/inspirer/textmapper/status" 11 | "github.com/inspirer/textmapper/util/diff" 12 | ) 13 | 14 | var grammars = []string{ 15 | "../parsers/json/json.tm", 16 | "../parsers/simple/simple.tm", 17 | "../parsers/test/test.tm", 18 | "../parsers/tm/textmapper.tm", 19 | "../parsers/js/js.tm", 20 | } 21 | 22 | type mapWriter struct { 23 | files []string 24 | content map[string]string 25 | } 26 | 27 | func (w *mapWriter) Write(filename, content string) error { 28 | w.files = append(w.files, filename) 29 | w.content[filename] = content 30 | return nil 31 | } 32 | 33 | func TestGenerate(t *testing.T) { 34 | ctx := context.Background() 35 | for _, filename := range grammars { 36 | filename := filename 37 | t.Run(filename, func(t *testing.T) { 38 | w := &mapWriter{content: make(map[string]string)} 39 | _, err := gen.GenerateFile(ctx, filename, w, gen.Options{}) 40 | if err != nil { 41 | s := status.FromError(err) 42 | s.Sort() 43 | for _, err := range s { 44 | t.Errorf("GenerateFile() failure: %v", err) 45 | } 46 | return 47 | } 48 | 49 | for _, genfile := range w.files { 50 | content := w.content[genfile] 51 | p := filepath.Join(filepath.Dir(filename), genfile) 52 | ondisk, err := os.ReadFile(p) 53 | if err != nil { 54 | t.Errorf("ReadFile(%v) failure: %v", genfile, err) 55 | continue 56 | } 57 | t.Logf("comparing %v", p) 58 | if diff := diff.LineDiff(string(ondisk), content); diff != "" { 59 | t.Errorf("The on-disk content differs from the generated one.\n--- %v\n+++ %v (generated)\n%v", p, genfile, diff) 60 | } 61 | } 62 | }) 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /gen/post_ts.go: -------------------------------------------------------------------------------- 1 | package gen 2 | 3 | import ( 4 | "regexp" 5 | "sort" 6 | "strings" 7 | ) 8 | 9 | var qualifierTsRE = regexp.MustCompile(`("([\w\/\.-]+)")\.(\w+)`) 10 | 11 | // ExtractTsImports rewrites the content of a generated TypeScript file, deriving imports 12 | // from qualified names that can appear anywhere in src, where one can reference a symbol 13 | // from another module. The format: 14 | // 15 | // "./foo".Bar 16 | // 17 | // will be transformed into proper TypeScript imports: 18 | // 19 | // import {Bar} from "./foo" 20 | // 21 | // Multiple imports from the same module will be combined: 22 | // 23 | // "./foo".Bar, "./foo".Baz -> import {Bar, Baz} from "./foo" 24 | func ExtractTsImports(src string) string { 25 | var b strings.Builder 26 | byModule := make(map[string]map[string]bool) // module -> set of symbols 27 | 28 | // First pass: collect all imports and transform the source 29 | for { 30 | match := qualifierTsRE.FindStringSubmatchIndex(src) 31 | if match == nil { 32 | break 33 | } 34 | 35 | slice := func(n int) string { 36 | s := match[2*n] 37 | if s == -1 { 38 | return "" 39 | } 40 | return src[s:match[2*n+1]] 41 | } 42 | 43 | modulePath := slice(2) 44 | symbol := slice(3) 45 | 46 | // Add the symbol to the map of imports for this module 47 | if _, ok := byModule[modulePath]; !ok { 48 | byModule[modulePath] = make(map[string]bool) 49 | } 50 | byModule[modulePath][symbol] = true 51 | 52 | // Write everything before the match 53 | b.WriteString(src[:match[0]]) 54 | // Replace the "module".Symbol with just Symbol 55 | b.WriteString(symbol) 56 | 57 | // Move past the match 58 | src = src[match[1]:] 59 | } 60 | 61 | // Add the remaining source 62 | b.WriteString(src) 63 | 64 | // If no imports were found, return the original source 65 | if len(byModule) == 0 { 66 | return b.String() 67 | } 68 | 69 | // Sort modules to ensure consistent output 70 | var modules []string 71 | for m := range byModule { 72 | modules = append(modules, m) 73 | } 74 | sort.Strings(modules) 75 | 76 | // Generate import statements 77 | var header strings.Builder 78 | for _, mod := range modules { 79 | var symbols []string 80 | for sym := range byModule[mod] { 81 | symbols = append(symbols, sym) 82 | } 83 | sort.Strings(symbols) 84 | 85 | header.WriteString("import {") 86 | for i, sym := range symbols { 87 | if i > 0 { 88 | header.WriteString(", ") 89 | } 90 | header.WriteString(sym) 91 | } 92 | header.WriteString("} from \"") 93 | header.WriteString(mod) 94 | header.WriteString("\";\n") 95 | } 96 | 97 | source := b.String() 98 | if header.Len() == 0 { 99 | return source 100 | } 101 | 102 | var insert int 103 | for strings.HasPrefix(source[insert:], "//") { 104 | if nl := strings.Index(source[insert:], "\n"); nl != -1 { 105 | insert += nl + 1 106 | } else { 107 | break 108 | } 109 | } 110 | if strings.HasPrefix(source[insert:], "\n") { 111 | // Skip the blank line after the top-level comments. 112 | insert++ 113 | } 114 | if !strings.HasPrefix(source[insert:], "import ") { 115 | header.WriteString("\n") 116 | } 117 | 118 | // Combine the import statements with the transformed source 119 | return source[:insert] + header.String() + source[insert:] 120 | } 121 | -------------------------------------------------------------------------------- /gen/templates/bison.go.tmpl: -------------------------------------------------------------------------------- 1 | %{ 2 | %} 3 | {{range .Parser.Inputs}} 4 | %start {{(index $.Parser.Nonterms .Nonterm).Name}}{{if .NoEoi}} // no-eoi{{end}} 5 | {{- end}} 6 | {{range .Parser.Prec}} 7 | %{{.Associativity}}{{range .Terminals}} {{(index $.Syms .).ID}}{{end}} 8 | {{- end}} 9 | {{- range slice .TokensWithoutPrec 1}} 10 | %token {{.ID}} 11 | {{- end}} 12 | 13 | %% 14 | 15 | {{- range .Parser.RulesByNonterm}} 16 | 17 | {{ if eq .Nonterm.Value.Kind 11 -}} 18 | // lookahead: {{ range $i, $it := .Nonterm.Value.Sub }}{{if gt $i 0}} & {{end}}{{$it}}{{end}} 19 | {{ end -}} 20 | {{ .Nonterm.Name}} : 21 | {{- range $i, $rule := .Rules}} 22 | {{ if eq $i 0}} {{else}}| {{end}}{{if eq $rule.Value.Kind 11}}%empty{{else}}{{$.ExprString $rule.Value}}{{end}} 23 | {{- $act := index $.Parser.Actions $rule.Action }} 24 | {{- if $act.Code }} 25 | {{bison_parser_action $act.Code $act.Vars $act.Origin}} 26 | {{- end}} 27 | {{- end}} 28 | ; 29 | {{- end}} 30 | 31 | %% 32 | 33 | -------------------------------------------------------------------------------- /gen/templates/cc_cached.go.tmpl: -------------------------------------------------------------------------------- 1 | {{ define "node_id" }}{{.Options.NodePrefix}}{{.Name}}{{end -}} 2 | -------------------------------------------------------------------------------- /gen/templates/cc_shared.go.tmpl: -------------------------------------------------------------------------------- 1 | {{ define "header" -}} 2 | // generated by Textmapper; DO NOT EDIT 3 | 4 | {{ end }} -------------------------------------------------------------------------------- /gen/templates/cc_token_codes_inc.go.tmpl: -------------------------------------------------------------------------------- 1 | YYEMPTY = -2, 2 | YYEOF = 0, 3 | YYerror = 256, // error 4 | YYUNDEF = 257, // "invalid token" 5 | {{range $i, $tok := .Tokens -}} 6 | {{if ge .FlexID 258 -}} 7 | {{printf "%v = %v," .ID .FlexID | printf "%-30s"}} // {{if .Comment}}{{.Comment}}{{else}}{{.ID}}{{end}} 8 | {{end -}} 9 | {{end -}} -------------------------------------------------------------------------------- /gen/templates/cc_token_h.go.tmpl: -------------------------------------------------------------------------------- 1 | {{ template "header" . -}} 2 | #ifndef {{.Options.IncludeGuardPrefix}}TOKEN_H_ 3 | #define {{.Options.IncludeGuardPrefix}}TOKEN_H_ 4 | 5 | {{block "tokenHeaderIncludes" . -}} 6 | #include 7 | #include 8 | #include 9 | 10 | #include "{{.Options.AbslIncludePrefix}}/strings/string_view.h" 11 | {{end}} 12 | namespace {{.Options.Namespace}} { 13 | 14 | // Token is an enum of all terminal symbols of the {{.Name}} language. 15 | enum class Token { 16 | UNAVAILABLE = -1, 17 | {{- range $i, $tok := .Tokens}} 18 | {{.ID}} = {{$i}},{{if .Comment}} /* {{.Comment}} */{{end}} 19 | {{- end}} 20 | NumTokens = {{len .Tokens}} 21 | }; 22 | 23 | constexpr inline std::array(Token::NumTokens)> 25 | tokenStr = { 26 | {{- range .Tokens}} 27 | {{str_literal .ID}},{{if .Comment}} /* {{.Comment}} */{{end}} 28 | {{- end}} 29 | }; 30 | 31 | constexpr inline std::array(Token::NumTokens)> 33 | tokenName = { 34 | {{- range .Tokens}} 35 | {{stringify .Name}},{{if .Comment}} /* {{.Comment}} */{{end}} 36 | {{- end}} 37 | }; 38 | 39 | inline std::ostream& operator<<(std::ostream& os, Token tok) { 40 | int t = static_cast(tok); 41 | if (t >= 0 && t < tokenStr.size()) { 42 | return os << tokenStr[t]; 43 | } 44 | return os << "token(" << t << ")"; 45 | } 46 | 47 | } // namespace {{.Options.Namespace}} 48 | 49 | #endif // {{.Options.IncludeGuardPrefix}}TOKEN_H_ 50 | -------------------------------------------------------------------------------- /gen/templates/go_ast.go.tmpl: -------------------------------------------------------------------------------- 1 | {{ template "header" . -}} 2 | package ast 3 | 4 | // Interfaces. 5 | 6 | {{$baseNode := concat (title .Name) "Node" -}} 7 | type {{$baseNode}} interface { 8 | {{$baseNode}}() *Node 9 | } 10 | 11 | {{if .Parser.MappedTokens -}} 12 | type Token struct { 13 | *Node 14 | } 15 | 16 | {{end -}} 17 | 18 | type NilNode struct {} 19 | 20 | var nilInstance = &NilNode{} 21 | 22 | // All types implement {{$baseNode}}. 23 | {{- range .Parser.Types.RangeTypes }} 24 | func (n {{.Name}}) {{$baseNode}}() *Node { return n.Node } 25 | {{- end}} 26 | {{- range .Options.ExtraTypes }} 27 | func (n {{.Name}}) {{$baseNode}}() *Node { return n.Node } 28 | {{- end}} 29 | func (NilNode) {{$baseNode}}() *Node { return nil } 30 | 31 | {{ range .Parser.Types.Categories -}} 32 | {{ $catMethod := concat .Name "Node" | first_lower}} 33 | type {{.Name}} interface { 34 | {{$baseNode}} 35 | {{$catMethod}}() 36 | } 37 | 38 | // {{$catMethod}}() ensures that only the following types can be 39 | // assigned to {{.Name}}. 40 | // 41 | {{- range .Types }} 42 | func ({{.}}) {{$catMethod}}() {} 43 | {{- end}} 44 | {{- if ne .Name "TokenSet" }} 45 | func (NilNode) {{$catMethod}}() {} 46 | {{- end}} 47 | 48 | {{end -}} 49 | 50 | // Types. 51 | 52 | {{ range .Parser.Types.RangeTypes -}} 53 | {{$name := .Name -}} 54 | {{$rt := . -}} 55 | type {{$name}} struct { 56 | *Node 57 | } 58 | 59 | {{ range $i, $f := .Fields -}} 60 | {{if gt (len .Selector) 1 -}} 61 | var selector{{$name}}{{title .Name}} = {{pkg "selector"}}OneOf({{range $ind, $t := expand_selector .Selector}}{{if ne $ind 0}}, {{end}}{{pkg "main"}}{{$t}}{{end}}) 62 | {{end -}} 63 | 64 | {{$innerType := unwrap_with_default .Selector $baseNode -}} 65 | func (n {{$name}}) {{title .Name | escape_reserved}}() {{if and (not .IsRequired) (not .IsList)}}({{end -}} 66 | {{if .IsList}}[]{{end -}} 67 | {{$innerType -}} 68 | {{if and (not .IsRequired) (not .IsList)}}, bool){{end -}} 69 | { 70 | {{if .IsList }}nodes{{else}}child{{end}} := n{{range $i, $step := $rt.DecodeField $i}}. 71 | {{- if gt $i 0}}{{if .IsList}}NextAll{{else}}Next{{end}}{{else}}{{if .IsList}}Children{{else}}Child{{end}}{{end -}} 72 | ({{if gt (len .Selector) 1 -}}selector{{$name}}{{title .Name}}{{else}}{{pkg "selector"}}{{index .Selector 0}}{{end}}){{end}} 73 | {{if .IsList -}} 74 | var ret = make([]{{$innerType}}, 0, len(nodes)) 75 | for _, node := range nodes { 76 | ret = append(ret, {{if and (eq (len .Selector) 1) (not (is_cat (index .Selector 0)))}}{{index .Selector 0}}{node}{{else}}To{{$baseNode}}(node).({{$innerType}}){{end}}) 77 | } 78 | return ret 79 | {{ else -}} 80 | return {{if and (eq (len .Selector) 1) (not (is_cat (index .Selector 0)))}}{{index .Selector 0}}{child}{{else}}To{{$baseNode}}(child).({{$innerType}}){{end}}{{if not .IsRequired -}}, child.IsValid(){{end}} 81 | {{ end -}} 82 | } 83 | 84 | {{ end -}} 85 | {{ end -}} 86 | {{ range .Options.ExtraTypes -}} 87 | type {{.Name}} struct { 88 | *Node 89 | } 90 | 91 | {{ end}} 92 | -------------------------------------------------------------------------------- /gen/templates/go_ast_factory.go.tmpl: -------------------------------------------------------------------------------- 1 | {{ template "header" . -}} 2 | package ast 3 | 4 | {{$baseNode := concat (title .Name) "Node" -}} 5 | func To{{$baseNode}}(n *Node) {{$baseNode}} { 6 | switch n.Type() { 7 | {{- range .Parser.Types.RangeTypes }} 8 | case {{pkg "main"}}{{.Name}}: 9 | return &{{.Name}}{n} 10 | {{- end}} 11 | {{- range .Options.ExtraTypes }} 12 | case {{pkg "main"}}{{.Name}}: 13 | return &{{.Name}}{n} 14 | {{- end}} 15 | case {{pkg "main"}}NoType: 16 | return nilInstance 17 | } 18 | panic("fmt".Errorf("ast: unknown node type %v", n.Type())) 19 | return nil 20 | } 21 | -------------------------------------------------------------------------------- /gen/templates/go_ast_parse.go.tmpl: -------------------------------------------------------------------------------- 1 | {{ template "header" . -}} 2 | package ast 3 | 4 | // Parse parses a given utf-8 content into an AST. 5 | func Parse({{if $.Options.Cancellable}}ctx "context".Context, {{end}}path, content string{{if .Parser.IsRecovering }}, eh {{pkg "main"}}ErrorHandler{{end}}) (*Tree, error) { 6 | b := newBuilder(path, content) 7 | {{ if $.Options.TokenStream -}} 8 | var s {{pkg "main"}}TokenStream 9 | s.Init(content, b.addNode) 10 | {{ else -}} 11 | var l {{pkg "main"}}Lexer 12 | l.Init(content) 13 | {{ end -}} 14 | var p {{pkg "main"}}Parser 15 | p.Init({{if .Parser.IsRecovering }}eh, {{end}}b.addNode) 16 | {{- range $index, $inp := .Parser.Inputs }} 17 | {{- if $inp.Synthetic }}{{continue}}{{end}} 18 | err := p.Parse{{if $.Parser.HasMultipleUserInputs}}{{$.NontermID $inp.Nonterm}}{{end}}({{if $.Options.Cancellable}}ctx, {{end}}&{{if $.Options.TokenStream}}s{{else}}l{{end}}) 19 | {{- break }} 20 | {{- end }} 21 | if err != nil { 22 | return nil, err 23 | } 24 | return b.build() 25 | } 26 | 27 | type builder struct { 28 | tree *Tree 29 | stack []*Node 30 | err error 31 | } 32 | 33 | func newBuilder(path, content string) *builder { 34 | return &builder{ 35 | tree: newTree(path, content), 36 | stack: make([]*Node, 0, 512), 37 | } 38 | } 39 | 40 | func (b *builder) addNode(t {{template "nodeTypeRef" $}}, offset, endoffset int) { 41 | start := len(b.stack) 42 | end := start 43 | for start > 0 && b.stack[start-1].offset >= offset { 44 | start-- 45 | if b.stack[start].offset >= endoffset { 46 | end-- 47 | } 48 | } 49 | out := &Node{ 50 | tree: b.tree, 51 | t: t, 52 | offset: offset, 53 | endoffset: endoffset, 54 | } 55 | if start < end { 56 | out.firstChild = b.stack[start] 57 | var prev *Node 58 | for i := end - 1; i >= start; i-- { 59 | n := b.stack[i] 60 | n.parent = out 61 | n.next = prev 62 | prev = n 63 | } 64 | } 65 | if end == len(b.stack) { 66 | b.stack = append(b.stack[:start], out) 67 | } else if start < end { 68 | b.stack[start] = out 69 | l := copy(b.stack[start+1:], b.stack[end:]) 70 | b.stack = b.stack[:start+1+l] 71 | } else { 72 | b.stack = append(b.stack, nil) 73 | copy(b.stack[start+1:], b.stack[start:]) 74 | b.stack[start] = out 75 | } 76 | } 77 | {{ if not .Options.FileNode }} 78 | var errNumRoots = "errors".New("exactly one root node is expected") 79 | {{ end }} 80 | func (b *builder) build() (*Tree, error) { 81 | if b.err != nil { 82 | return nil, b.err 83 | } 84 | {{- if .Options.FileNode }} 85 | b.addNode({{template "nodeTypePkg" $}}{{.Options.FileNode}}, 0, len(b.tree.content)) 86 | {{- else }} 87 | if len(b.stack) != 1 { 88 | return nil, errNumRoots 89 | } 90 | {{- end }} 91 | b.tree.root = b.stack[0] 92 | return b.tree, nil 93 | } -------------------------------------------------------------------------------- /gen/templates/go_cached.go.tmpl: -------------------------------------------------------------------------------- 1 | {{ define "node_id" }}{{.Options.NodePrefix}}{{.Name}}{{end -}} 2 | -------------------------------------------------------------------------------- /gen/templates/go_lexer_tables.go.tmpl: -------------------------------------------------------------------------------- 1 | {{- template "header" . -}} 2 | package {{short_pkg .Options.Package}} 3 | 4 | const tmNumClasses = {{.Lexer.Tables.NumSymbols}} 5 | 6 | {{$runeType := bits .Lexer.Tables.NumSymbols -}} 7 | {{if gt .Lexer.Tables.LastMapEntry.Start 2048 -}} 8 | type mapRange struct { 9 | lo rune 10 | hi rune 11 | defaultVal uint{{$runeType}} 12 | val []uint{{$runeType}} 13 | } 14 | 15 | func mapRune(c rune) int { 16 | lo := 0 17 | hi := len(tmRuneRanges) 18 | for lo < hi { 19 | m := lo + (hi-lo)/2 20 | r := tmRuneRanges[m] 21 | if c < r.lo { 22 | hi = m 23 | } else if c >= r.hi { 24 | lo = m + 1 25 | } else { 26 | i := int(c - r.lo) 27 | if i < len(r.val) { 28 | return int(r.val[i]) 29 | } 30 | return int(r.defaultVal) 31 | } 32 | } 33 | return {{.Lexer.Tables.LastMapEntry.Target}} 34 | } 35 | 36 | // Latin-1 characters. 37 | var tmRuneClass = []uint{{$runeType}}{ 38 | {{- int_array (.Lexer.Tables.SymbolArr 256) "\t" 79 -}} 39 | } 40 | 41 | const tmRuneClassLen = 256 42 | const tmFirstRule = {{.Lexer.Tables.ActionStart}} 43 | 44 | var tmRuneRanges = []mapRange{ 45 | {{range .Lexer.Tables.CompressedMap 256}} { {{- .Lo}}, {{.Hi}}, {{.DefaultVal}}, {{if .Vals}}[]uint{{$runeType}}{ 46 | {{- int_array .Vals "\t\t" 78}} }{{else}}nil{{end -}} }, 47 | {{end -}} 48 | } 49 | 50 | {{else -}} 51 | {{ $runeArr := .Lexer.Tables.SymbolArr 0 -}} 52 | var tmRuneClass = []uint{{$runeType}}{ 53 | {{- int_array $runeArr "\t" 79 -}} 54 | } 55 | 56 | const tmRuneClassLen = {{len $runeArr}} 57 | const tmFirstRule = {{.Lexer.Tables.ActionStart}} 58 | 59 | {{end -}} 60 | {{ if gt (len .Lexer.StartConditions) 1 -}} 61 | var tmStateMap = []int{ 62 | {{- int_array .Lexer.Tables.StateMap "\t" 79 -}} 63 | } 64 | 65 | {{end -}} 66 | {{if .Lexer.RuleToken -}} 67 | var tmToken = []{{template "tokenType" .}}{ 68 | {{- int_array .Lexer.RuleToken "\t" 79 -}} 69 | } 70 | 71 | {{end -}} 72 | var tmLexerAction = []int{{bits_per_element .Lexer.Tables.Dfa}}{ 73 | {{- int_array .Lexer.Tables.Dfa "\t" 79 -}} 74 | } 75 | 76 | {{- if .Lexer.Tables.Backtrack}} 77 | 78 | var tmBacktracking = []int{ 79 | {{- range .Lexer.Tables.Backtrack}} 80 | {{.Action}}, {{.NextState}},{{if .Details}} // {{.Details}}{{end}} 81 | {{- end}} 82 | } 83 | {{- end}} -------------------------------------------------------------------------------- /gen/templates/go_listener.go.tmpl: -------------------------------------------------------------------------------- 1 | {{- template "header" . -}} 2 | package {{short_pkg .Options.Package}} 3 | 4 | {{ block "NodeDecl" . -}} 5 | {{if .Parser.UsedFlags -}} 6 | type NodeType uint16 7 | 8 | type NodeFlags uint16 9 | 10 | type Listener func(t NodeType, flags NodeFlags, offset, endoffset int) 11 | 12 | {{ else -}} 13 | type NodeType int 14 | 15 | type Listener func(t NodeType, offset, endoffset int) 16 | 17 | {{ end -}} 18 | {{ end -}} 19 | {{ block "NodeEnum" . -}} 20 | const ( 21 | NoType {{template "nodeTypeRef" $}} = iota 22 | {{- range .Parser.Types.RangeTypes }} 23 | {{.Name}} {{- if gt (len .Fields) 0}} // {{.Descriptor}}{{end}} 24 | {{- end}} 25 | {{- range .Options.ExtraTypes }} 26 | {{.Name}} 27 | {{- end}} 28 | NodeTypeMax 29 | ) 30 | 31 | {{ end -}} 32 | {{ block "NodeString" . -}} 33 | var nodeTypeStr = [...]string{ 34 | "NONE", 35 | {{- range .Parser.Types.RangeTypes }} 36 | "{{.Name}}", 37 | {{- end}} 38 | {{- range .Options.ExtraTypes }} 39 | "{{.Name}}", 40 | {{- end}} 41 | } 42 | 43 | func (t NodeType) String() string { 44 | if t >= 0 && int(t) < len(nodeTypeStr) { 45 | return nodeTypeStr[t] 46 | } 47 | return "fmt".Sprintf("node(%d)", t) 48 | } 49 | 50 | {{ end -}} 51 | {{ block "NodeCategories" . -}} 52 | {{ range .Parser.Types.Categories -}} 53 | var {{.Name}} = []{{template "nodeTypeRef" $}}{ 54 | {{- range .Types }} 55 | {{template "nodeTypePkg" $}}{{node_id .}}, 56 | {{- end}} 57 | } 58 | 59 | {{end -}} 60 | {{end -}} -------------------------------------------------------------------------------- /gen/templates/go_selector.go.tmpl: -------------------------------------------------------------------------------- 1 | {{ template "header" . -}} 2 | package selector 3 | 4 | {{ block "Selector" . -}} 5 | {{ if .Options.IsEnabled "Selector" -}} 6 | type Selector func(nt {{template "nodeTypeRef" $}}) bool 7 | 8 | {{ end -}} 9 | {{ end -}} 10 | var ( 11 | Any = func(t {{template "nodeTypeRef" $}}) bool { return true } 12 | {{- range .Parser.Types.RangeTypes}} 13 | {{.Name}} = func(t {{template "nodeTypeRef" $}}) bool { return t == {{template "nodeTypePkg" $}}{{node_id .Name}} } 14 | {{- end}} 15 | {{- range .Options.ExtraTypes }} 16 | {{.Name}} = func(t {{template "nodeTypeRef" $}}) bool { return t == {{template "nodeTypePkg" $}}{{node_id .Name}} } 17 | {{- end }} 18 | {{- range .Parser.Types.Categories}} 19 | {{.Name}} = {{template "nodeOneOf" $}}({{pkg "main"}}{{.Name}}...) 20 | {{- end}} 21 | {{ block "additionalSelectors" . -}} 22 | {{end -}} 23 | ) 24 | 25 | {{ block "OneOf" . -}} 26 | {{ if .Options.IsEnabled "OneOf" -}} 27 | func OneOf(types ...{{template "nodeTypeRef" $}}) {{template "nodeTypeSel" $}} { 28 | if len(types) == 0 { 29 | return func({{template "nodeTypeRef" $}}) bool { return false } 30 | } 31 | const bits = 32 32 | max := 1 33 | for _, t := range types { 34 | if int(t) > max { 35 | max = int(t) 36 | } 37 | } 38 | size := (max + bits) / bits 39 | bitarr := make([]uint32, size) 40 | for _, t := range types { 41 | bitarr[uint(t)/bits] |= 1 << (uint(t) % bits) 42 | } 43 | return func(t {{template "nodeTypeRef" $}}) bool { 44 | i := uint(t)/bits 45 | return int(i) < len(bitarr) && bitarr[i]&(1<<(uint(t)%bits)) != 0 46 | } 47 | } 48 | {{ end -}} 49 | {{ end -}} -------------------------------------------------------------------------------- /gen/templates/go_shared.go.tmpl: -------------------------------------------------------------------------------- 1 | {{define "header" -}} 2 | // generated by Textmapper; DO NOT EDIT 3 | 4 | {{end}} 5 | 6 | {{ define "tokenType"}}{{pkg "token"}}Type{{end -}} 7 | {{ define "tokenPkg"}}{{pkg "token"}}{{end -}} 8 | 9 | {{ define "nodeTypeRef"}}{{pkg "main"}}NodeType{{end -}} 10 | {{ define "nodeFlagsRef"}}{{pkg "main"}}NodeFlags{{end -}} 11 | {{ define "nodeFlagsPkg"}}{{pkg "main"}}{{end -}} 12 | {{ define "nodeTypePkg"}}{{pkg "main"}}{{end -}} 13 | {{ define "nodeTypeSel"}}{{pkg "selector"}}Selector{{end -}} 14 | {{ define "nodeOneOf"}}{{pkg "selector"}}OneOf{{end -}} 15 | -------------------------------------------------------------------------------- /gen/templates/go_token.go.tmpl: -------------------------------------------------------------------------------- 1 | {{- template "header" . -}} 2 | package token 3 | 4 | // Type is an enum of all terminal symbols of the {{.Name}} language. 5 | type Type int32 6 | 7 | // Token values. 8 | const ( 9 | UNAVAILABLE Type = iota - 1 10 | {{- range .Tokens}} 11 | {{.ID}}{{if .Comment}} // {{.Comment}}{{end}} 12 | {{- end}} 13 | 14 | NumTokens 15 | ) 16 | 17 | var tokenStr = [...]string{ 18 | {{- range .Tokens}} 19 | {{if .Comment}}{{str_literal .Comment}}{{else}}{{str_literal .ID}}{{end}}, 20 | {{- end}} 21 | } 22 | 23 | func (tok Type) String() string { 24 | if tok >= 0 && int(tok) < len(tokenStr) { 25 | return tokenStr[tok] 26 | } 27 | return "fmt".Sprintf("token(%d)", tok) 28 | } 29 | -------------------------------------------------------------------------------- /gen/templates/ts_cached.go.tmpl: -------------------------------------------------------------------------------- 1 | {{ define "node_id" }}{{.Options.NodePrefix}}{{.Name}}{{end -}} -------------------------------------------------------------------------------- /gen/templates/ts_common.go.tmpl: -------------------------------------------------------------------------------- 1 | {{- template "header" . -}} 2 | 3 | export const debugSyntax = {{ .Options.DebugParser }}; 4 | 5 | export function debugLog(...data: any[]) : void { 6 | {{ if .Options.DebugParser -}} 7 | console.log("[DEBUG]", ...data); 8 | {{ else -}} 9 | // No-op. DebugParser == false 10 | {{ end -}} 11 | } 12 | 13 | {{ template "symbol" . -}} 14 | {{ template "stackEntry" . -}} 15 | 16 | {{- define "symbol" -}} 17 | {{ if .Options.IsEnabled "symbol" -}} 18 | export class Symbol { 19 | symbol: {{template "tokenTypeRef" $}}; 20 | offset: number; 21 | endoffset: number; 22 | 23 | constructor(symbol: {{template "tokenTypeRef" $}}, offset: number, endoffset: number) { 24 | this.symbol = symbol; 25 | this.offset = offset; 26 | this.endoffset = endoffset; 27 | } 28 | 29 | copy() : Symbol { 30 | let copy = JSON.parse(JSON.stringify(this)); 31 | return copy as Symbol; 32 | } 33 | } 34 | 35 | {{ end -}} 36 | {{ end -}} 37 | 38 | {{- define "stackEntry" -}} 39 | {{$stateType := bits_per_element .Parser.Tables.FromTo -}} 40 | {{ if .Options.IsEnabled "stackEntry" -}} 41 | export type StackEntry = { 42 | sym: Symbol; 43 | state: number; 44 | {{ if .Parser.HasAssocValues -}} 45 | value: any; 46 | {{ end -}} 47 | }; 48 | 49 | {{ end -}} 50 | {{ end -}} 51 | -------------------------------------------------------------------------------- /gen/templates/ts_lexer_tables.go.tmpl: -------------------------------------------------------------------------------- 1 | {{- template "header" . -}} 2 | 3 | export const tmNumClasses = {{.Lexer.Tables.NumSymbols}} 4 | 5 | {{$runeType := bits .Lexer.Tables.NumSymbols -}} 6 | {{if gt .Lexer.Tables.LastMapEntry.Start 2048 -}} 7 | export type mapRange = { 8 | lo: number; 9 | hi: number; 10 | defaultVal: number; 11 | val: number[]; 12 | } 13 | 14 | export function mapRune(c: number): number { 15 | let lo = 0; 16 | let hi = tmRuneRanges.length; 17 | while (lo < hi) { 18 | let m = lo + (hi - lo) / 2; 19 | let r = tmRuneRanges[m]; 20 | if (c < r.lo) { 21 | hi = m; 22 | } else if (c >= r.hi) { 23 | lo = m + 1; 24 | } else { 25 | let i = c - r.lo; 26 | if (i < r.val.length) { 27 | return r.val[i]; 28 | } 29 | return r.defaultVal; 30 | } 31 | } 32 | return {{.Lexer.Tables.LastMapEntry.Target}} 33 | } 34 | 35 | // Latin-1 characters. 36 | export const tmRuneClass: number[] = [ 37 | {{- int_array (.Lexer.Tables.SymbolArr 256) " " 79 -}} 38 | ]; 39 | 40 | export const tmRuneClassLen = 256; 41 | export const tmFirstRule = {{.Lexer.Tables.ActionStart}}; 42 | 43 | export const tmRuneRanges: mapRange[] = [ 44 | {{range .Lexer.Tables.CompressedMap 256}} { lo: {{- .Lo}}, hi: {{.Hi}}, defaultVal: {{.DefaultVal}}, {{if .Vals}}[ 45 | {{- int_array .Vals " " 78}} ]{{else}}[]{{end -}} }, 46 | {{end -}} 47 | ]; 48 | 49 | {{else -}} 50 | {{ $runeArr := .Lexer.Tables.SymbolArr 0 -}} 51 | export const tmRuneClass: number[] = [ 52 | {{- int_array $runeArr " " 79 -}} 53 | ]; 54 | 55 | export const tmRuneClassLen = {{len $runeArr}}; 56 | export const tmFirstRule = {{.Lexer.Tables.ActionStart}}; 57 | 58 | {{end -}} 59 | {{ if gt (len .Lexer.StartConditions) 1 -}} 60 | export const tmStateMap: number[] = [ 61 | {{- int_array .Lexer.Tables.StateMap " " 79 -}} 62 | ]; 63 | 64 | {{end -}} 65 | {{if .Lexer.RuleToken -}} 66 | export const tmToken: {{template "tokenTypeRef" $}}[] = [ 67 | {{- int_array .Lexer.RuleToken " " 79 -}} 68 | ]; 69 | 70 | {{end -}} 71 | export const tmLexerAction: number[] = [ 72 | {{- int_array .Lexer.Tables.Dfa " " 79 -}} 73 | ]; 74 | 75 | {{- if .Lexer.Tables.Backtrack}} 76 | 77 | export const tmBacktracking: number[] = [ 78 | {{- range .Lexer.Tables.Backtrack}} 79 | {{.Action}}, {{.NextState}},{{if .Details}} // {{.Details}}{{end}} 80 | {{- end}} 81 | ]; 82 | {{- end}} -------------------------------------------------------------------------------- /gen/templates/ts_listener.go.tmpl: -------------------------------------------------------------------------------- 1 | {{- template "header" . -}} 2 | 3 | export enum NodeType { 4 | NoType, 5 | {{- range .Parser.Types.RangeTypes }} 6 | {{.Name}}, {{- if gt (len .Fields) 0}} // {{.Descriptor}}{{end}} 7 | {{- end}} 8 | {{- range .Options.ExtraTypes }} 9 | {{.Name}}, 10 | {{- end}} 11 | NodeTypeMax 12 | }; 13 | 14 | {{if .Parser.UsedFlags -}} 15 | {{ $flags := .AllFlags -}} 16 | {{ if $flags -}} 17 | export enum NodeFlags { 18 | None = -1, 19 | {{ range $i, $it := $flags -}} 20 | {{$it}} = {{$i}}, 21 | {{ end -}} 22 | }; 23 | {{ end -}} 24 | 25 | export type Listener = (t: NodeType, flags: NodeFlags, offset: number, endOffset: number) => void; 26 | {{ else -}} 27 | export type Listener = (t: NodeType, offset: number, endOffset: number) => void; 28 | {{ end -}} 29 | 30 | {{ block "NodeCategories" . -}} 31 | {{ range .Parser.Types.Categories -}} 32 | export const {{.Name}} : NodeType[] = [ 33 | {{- range .Types }} 34 | NodeType.{{node_id .}}, 35 | {{- end}} 36 | ]; 37 | {{end -}} 38 | {{end -}} -------------------------------------------------------------------------------- /gen/templates/ts_selector.go.tmpl: -------------------------------------------------------------------------------- 1 | {{ template "header" . -}} 2 | 3 | import * as {{template "listenerPkg"}} from './listener'; 4 | 5 | {{ block "Selector" . -}} 6 | {{ if .Options.IsEnabled "Selector" -}} 7 | export type Selector = (nt : {{template "nodeTypeRef" $}}) => boolean; 8 | {{ end -}} 9 | {{ end -}} 10 | 11 | export const Any : Selector = (nt : {{template "nodeTypeRef" $}}) => true; 12 | {{- range .Parser.Types.RangeTypes}} 13 | export const {{.Name}} : Selector = (nt : {{template "nodeTypeRef" $}}) : boolean => nt === {{template "nodeTypeRef" $}}.{{node_id .Name}}; 14 | {{- end}} 15 | {{- range .Options.ExtraTypes }} 16 | export const {{.Name}} : Selector = (nt : {{template "nodeTypeRef" $}}) : boolean => nt === {{template "nodeTypeRef" $}}.{{node_id .Name}}; 17 | {{- end }} 18 | {{ block "additionalSelectors" . -}} 19 | {{ end -}} 20 | -------------------------------------------------------------------------------- /gen/templates/ts_shared.go.tmpl: -------------------------------------------------------------------------------- 1 | {{define "header" -}} 2 | // generated by Textmapper; DO NOT EDIT 3 | {{end -}} 4 | 5 | {{ define "tokenType"}}TokenType{{end -}} 6 | {{ define "tokenTypeRef"}}"./token".{{template "tokenType" .}}{{end -}} 7 | 8 | {{ define "commonPkg"}}common{{end -}} 9 | {{ define "lexerPkg"}}lexer{{end -}} 10 | {{ define "streamPkg"}}stream{{end -}} 11 | 12 | {{ define "listenerPkg"}}listener{{end -}} 13 | {{ define "listenerTypeRef"}}{{template "listenerPkg"}}.Listener{{end -}} 14 | {{ define "nodeTypeRef"}}{{template "listenerPkg"}}.NodeType{{end -}} 15 | {{ define "nodeFlagsPkg"}}{{template "listenerPkg"}}{{end -}} 16 | {{ define "nodeFlagsRef"}}{{template "nodeFlagsPkg"}}.NodeFlags{{end -}} -------------------------------------------------------------------------------- /gen/templates/ts_token.go.tmpl: -------------------------------------------------------------------------------- 1 | {{- template "header" . -}} 2 | 3 | // Token values. 4 | export enum TokenType { 5 | UNAVAILABLE = -1, 6 | {{- range $i, $tok := .Tokens}} 7 | {{$tok.ID}} = {{$i}},{{if .Comment}} // {{.Comment}}{{end}} 8 | {{- end}} 9 | 10 | NumTokens, 11 | } 12 | -------------------------------------------------------------------------------- /gen/templates/ts_tree.go.tmpl: -------------------------------------------------------------------------------- 1 | {{ template "header" . -}} 2 | 3 | import { NodeType } from './listener' 4 | import { Selector } from './selector' 5 | 6 | /** 7 | * Tree represents an AST for some parsed content. 8 | */ 9 | export interface Tree { 10 | 11 | /** 12 | * Returns the content that get parsed into this tree. 13 | */ 14 | text(): string; 15 | 16 | /** 17 | * Returns the root node of the tree. 18 | */ 19 | root(): Node; 20 | } 21 | 22 | /** 23 | * Node represents a Node in an AST. 24 | */ 25 | export interface Node { 26 | 27 | /** 28 | * The {@link NodeType} represented by this node. 29 | */ 30 | type(): NodeType; 31 | 32 | /** 33 | * The starting offset in the parsed content represented by this node. 34 | */ 35 | offset(): number; 36 | 37 | /** 38 | * The ending offset (exclusive) in the parsed content represented by this node. 39 | */ 40 | endOffset(): number; 41 | 42 | /** 43 | * The parsed content represented by this node. 44 | * It is essentially the tree.text().substring(offset(), endOffset()). 45 | */ 46 | text(): string; 47 | 48 | /** 49 | * Returns the start position of the content of this node as 1-based line and column. 50 | */ 51 | lineColumn(): { line: number; column: number }; 52 | 53 | /** 54 | * Returns all silibing nodes to this node that are accepted by the given {@link Selector}. 55 | * 56 | * @param selector the selector for filtering nodes 57 | * @returns an {@link Iterable} of silbing nodes that are accepted by the selector 58 | */ 59 | nextAll(selector: Selector): Iterable; 60 | 61 | /** 62 | * Returns the first child node to this node that is accepted by the given {@link Selector}. 63 | * 64 | * @param selector the selector for filtering nodes 65 | * @returns the first child node accepted by the selector or null if no such node is found 66 | */ 67 | child(selector: Selector): Node | null; 68 | 69 | /** 70 | * Returns all children nodes to this node that are accepted by the given {@link Selector}. 71 | * 72 | * @param selector the selector for filtering nodes 73 | * @returns an {@link Iterable} of children nodes that are accepted by the selector 74 | */ 75 | children(selector: Selector): Iterable; 76 | 77 | /** 78 | * Returns all descendant nodes to this node that are accepted by the given {@link Selector} 79 | * in pre-order traversal. 80 | * 81 | * @param selector the selector for filtering nodes 82 | * @returns an {@link Iterable} of descendant nodes that are accepted by the selector 83 | */ 84 | descendants(selector: Selector): Iterable; 85 | } 86 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/inspirer/textmapper 2 | 3 | go 1.22 4 | 5 | require ( 6 | go.lsp.dev/jsonrpc2 v0.10.0 7 | go.lsp.dev/protocol v0.12.0 8 | go.lsp.dev/uri v0.3.0 9 | go.uber.org/zap v1.27.0 10 | ) 11 | 12 | require ( 13 | github.com/segmentio/asm v1.2.0 // indirect 14 | github.com/segmentio/encoding v0.4.0 // indirect 15 | go.lsp.dev/pkg v0.0.0-20210717090340-384b27a52fb2 // indirect 16 | go.uber.org/multierr v1.11.0 // indirect 17 | golang.org/x/sys v0.24.0 // indirect 18 | ) 19 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 4 | github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= 5 | github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 6 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 7 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 8 | github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys= 9 | github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs= 10 | github.com/segmentio/encoding v0.4.0 h1:MEBYvRqiUB2nfR2criEXWqwdY6HJOUrCn5hboVOVmy8= 11 | github.com/segmentio/encoding v0.4.0/go.mod h1:/d03Cd8PoaDeceuhUUUQWjU0KhWjrmYrWPgtJHYZSnI= 12 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 13 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 14 | go.lsp.dev/jsonrpc2 v0.10.0 h1:Pr/YcXJoEOTMc/b6OTmcR1DPJ3mSWl/SWiU1Cct6VmI= 15 | go.lsp.dev/jsonrpc2 v0.10.0/go.mod h1:fmEzIdXPi/rf6d4uFcayi8HpFP1nBF99ERP1htC72Ac= 16 | go.lsp.dev/pkg v0.0.0-20210717090340-384b27a52fb2 h1:hCzQgh6UcwbKgNSRurYWSqh8MufqRRPODRBblutn4TE= 17 | go.lsp.dev/pkg v0.0.0-20210717090340-384b27a52fb2/go.mod h1:gtSHRuYfbCT0qnbLnovpie/WEmqyJ7T4n6VXiFMBtcw= 18 | go.lsp.dev/protocol v0.12.0 h1:tNprUI9klQW5FAFVM4Sa+AbPFuVQByWhP1ttNUAjIWg= 19 | go.lsp.dev/protocol v0.12.0/go.mod h1:Qb11/HgZQ72qQbeyPfJbu3hZBH23s1sr4st8czGeDMQ= 20 | go.lsp.dev/uri v0.3.0 h1:KcZJmh6nFIBeJzTugn5JTU6OOyG0lDOo3R9KwTxTYbo= 21 | go.lsp.dev/uri v0.3.0/go.mod h1:P5sbO1IQR+qySTWOCnhnK7phBx+W3zbLqSMDJNTw88I= 22 | go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= 23 | go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= 24 | go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= 25 | go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= 26 | go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= 27 | go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= 28 | golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= 29 | golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 30 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 31 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= 32 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 33 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 34 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 35 | -------------------------------------------------------------------------------- /grammar/debug.go: -------------------------------------------------------------------------------- 1 | package grammar 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "strings" 7 | 8 | "github.com/inspirer/textmapper/util/debug" 9 | ) 10 | 11 | // TableStats returns a string with statistics about the generated lexer tables. 12 | func (l *Lexer) TableStats() string { 13 | var b strings.Builder 14 | 15 | t := l.Tables 16 | if t == nil { 17 | return "No tables\n" 18 | } 19 | 20 | if t.ScanBytes { 21 | fmt.Fprintf(&b, "Lexer (bytes):\n") 22 | } else { 23 | fmt.Fprintf(&b, "Lexer (unicode):\n") 24 | } 25 | 26 | fmt.Fprintf(&b, "\t%v states, %v symbols, %v start conditions, %v backtracking checkpoints\n", len(t.Dfa)/t.NumSymbols, t.NumSymbols, len(t.StateMap), len(t.Backtrack)) 27 | fmt.Fprintf(&b, "\tDFA = %s, Backtracking = %s, ", debug.Size(sizeBytes(t.Dfa)), debug.Size(len(t.Backtrack)*8)) 28 | fmt.Fprintf(&b, "StateMap = %s, SymbolMap = %s\n", debug.Size(len(t.StateMap)*4), debug.Size(len(t.SymbolMap)*8)) 29 | return b.String() 30 | } 31 | 32 | func (p *Parser) TableStats() string { 33 | var b strings.Builder 34 | 35 | t := p.Tables 36 | if t == nil { 37 | return "No tables\n" 38 | } 39 | 40 | fmt.Fprintf(&b, "LALR:\n\t%v terminals, %v nonterminals, %v rules, %v states, %v markers, %v lookaheads\n", p.NumTerminals, len(p.Nonterms), len(t.RuleLen), t.NumStates, len(t.Markers), len(t.Lookaheads)) 41 | fmt.Fprintf(&b, "Action Table:\n\t%d x %d, expanded size = %s (%s in default encoding)\n", t.NumStates, p.NumTerminals, debug.Size(t.NumStates*p.NumTerminals*4), debug.Size((len(t.Action)+len(t.Lalr))*4)) 42 | var lr0, nonZero, total int 43 | for _, val := range t.Action { 44 | if val >= -2 { 45 | lr0++ 46 | continue 47 | } 48 | total += p.NumTerminals 49 | for a := -3 - val; t.Lalr[a] >= 0; a += 2 { 50 | if t.Lalr[a+1] >= 0 { 51 | nonZero++ 52 | } 53 | } 54 | } 55 | fmt.Fprintf(&b, "\tLR0 states: %v (%.2v%%)\n", lr0, float64(lr0*100)/float64(t.NumStates)) 56 | fmt.Fprintf(&b, "\t%.2v%% of the LALR table is reductions (%s)\n", float64(nonZero*100)/float64(total), debug.Size(nonZero*4)) 57 | 58 | syms := p.NumTerminals + len(p.Nonterms) 59 | fmt.Fprintf(&b, "Goto Table:\n\t%d x %d, expanded size = %s (%s in default encoding)\n", t.NumStates, syms, debug.Size(t.NumStates*syms*4), debug.Size(len(t.Goto)*4+sizeBytes(t.FromTo))) 60 | 61 | nonZero = len(t.FromTo) / 2 62 | total = t.NumStates * syms 63 | fmt.Fprintf(&b, "\t%.2v%% of the GOTO table is populated (%s), %v transitions\n", float64(nonZero*100)/float64(total), debug.Size(nonZero*4), nonZero) 64 | 65 | return b.String() 66 | } 67 | 68 | func sizeBytes(arr []int) int { 69 | v := 1 70 | for _, i := range arr { 71 | if i < math.MinInt8 || i > math.MaxInt8 { 72 | if i < math.MinInt16 || i > math.MaxInt16 { 73 | v = 4 74 | break 75 | } 76 | v = 2 77 | } 78 | } 79 | return len(arr) * v 80 | } 81 | -------------------------------------------------------------------------------- /lalr/lookahead_test.go: -------------------------------------------------------------------------------- 1 | package lalr 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "testing" 7 | ) 8 | 9 | func TestLookahead(t *testing.T) { 10 | var tests = []struct { 11 | input [][]int // the last int in each row is the outcome 12 | want string 13 | }{ 14 | {[][]int{ 15 | {1, 7}, 16 | {2, 8}, 17 | }, "ERR: ambiguous order"}, 18 | {[][]int{ 19 | {1, 2, 7}, 20 | {2, 1, 8}, 21 | }, "ERR: inconsistent order"}, 22 | {[][]int{ 23 | {1, 7}, 24 | {-1, 8}, 25 | }, "1 -> 7, default -> 8"}, 26 | {[][]int{ 27 | {-1, 8}, 28 | {1, 7}, 29 | }, "1 -> 7, default -> 8"}, 30 | {[][]int{ 31 | {1, 7}, 32 | {1, -2, 8}, 33 | }, "ERR: cannot decide on the next lookahead"}, 34 | {[][]int{ 35 | {1, -2, 7}, 36 | {1, -2, 8}, 37 | }, "ERR: cannot decide on the next lookahead"}, 38 | {[][]int{ 39 | {1, 2, 7}, 40 | {1, -2, 8}, 41 | }, "2 -> 7, default -> 8"}, 42 | {[][]int{ 43 | {1, 2, 7}, 44 | {-2, 8}, 45 | }, "2 -> 7, default -> 8"}, 46 | {[][]int{ 47 | {6, 1, 2, 3, 7}, 48 | {6, 1, 2, -3, 8}, 49 | {6, -1, 2, 9}, 50 | {6, -1, -2, 10}, 51 | }, "!2 -> 10, !1 -> 9, 3 -> 7, default -> 8"}, 52 | {[][]int{ 53 | {6, 1, 2, 3, 7}, 54 | {-6, 1, 2, -3, 8}, 55 | {-6, -1, 2, 9}, 56 | {6, -1, -2, 10}, 57 | }, "!2 -> 10, 6 -> 7, 1 -> 8, default -> 9"}, 58 | {[][]int{ 59 | {1, -2, 8, 7}, 60 | {1, -2, -8, 8}, 61 | }, "8 -> 7, default -> 8"}, 62 | {[][]int{ 63 | {1, -2, 8, 7}, 64 | {1, -2, -9, 8}, 65 | }, "ERR: ambiguous order"}, 66 | } 67 | 68 | for _, tc := range tests { 69 | var lookaheads []Lookahead 70 | for _, la := range tc.input { 71 | var preds []Predicate 72 | for _, p := range la[:len(la)-1] { 73 | var negated bool 74 | if p < 0 { 75 | p = -p 76 | negated = true 77 | } 78 | preds = append(preds, Predicate{Input: int32(p), Negated: negated}) 79 | } 80 | lookaheads = append(lookaheads, Lookahead{Predicates: preds, Nonterminal: Sym(la[len(la)-1])}) 81 | } 82 | rule, err := newLookaheadRule(lookaheads) 83 | if err != nil { 84 | if got := "ERR: " + err.Error(); got != tc.want { 85 | t.Errorf("newLookaheadRule(%v) failed with %v, want %v", tc.input, got, tc.want) 86 | } 87 | continue 88 | } 89 | if got := ruleString(rule); got != tc.want { 90 | t.Errorf("newLookaheadRule(%v) = %s, want %s", tc.input, got, tc.want) 91 | } 92 | } 93 | } 94 | 95 | func ruleString(rule LookaheadRule) string { 96 | var b strings.Builder 97 | for _, c := range rule.Cases { 98 | if c.Negated { 99 | b.WriteString("!") 100 | } 101 | fmt.Fprintf(&b, "%d -> %d, ", c.Input, c.Target) 102 | } 103 | fmt.Fprintf(&b, "default -> %d", rule.DefaultTarget) 104 | return b.String() 105 | } 106 | -------------------------------------------------------------------------------- /lex/compress.go: -------------------------------------------------------------------------------- 1 | package lex 2 | 3 | import ( 4 | "encoding/binary" 5 | "log" 6 | "sort" 7 | ) 8 | 9 | // symlist is a sorted list of DFA input symbols. 10 | type symlist []Sym 11 | 12 | func (l symlist) contains(s Sym) bool { 13 | if len(l) < 10 { 14 | for _, elem := range l { 15 | if elem == s { 16 | return true 17 | } 18 | } 19 | return false 20 | } 21 | 22 | n := sort.Search(len(l), func(i int) bool { return l[i] >= s }) 23 | return n < len(l) && l[n] == s 24 | } 25 | 26 | // compressCharsets combines unicode runes into equivalence classes that become input symbols for 27 | // the generated DFA. All characters mapped into one symbol are either a subset or do not belong 28 | // to any of the given charsets. 29 | func compressCharsets(sets []charset, opts CharsetOptions) (out []symlist, inputMap []RangeEntry) { 30 | type rng struct { 31 | index int 32 | start, end rune // inclusive 33 | delta int 34 | } 35 | 36 | var ranges []rng 37 | for index, cs := range sets { 38 | for i := 0; i < len(cs); i += 2 { 39 | ranges = append(ranges, rng{ 40 | index: index, 41 | start: cs[i], 42 | end: cs[i+1], 43 | delta: 1, 44 | }) 45 | if cs[i+1] > 0xff && opts.ScanBytes { 46 | log.Fatalf("invariant failure: charset %v is not compatible with scanBytes", cs) 47 | } 48 | } 49 | } 50 | 51 | sort.Slice(ranges, func(i, j int) bool { 52 | if ranges[i].start != ranges[j].start { 53 | return ranges[i].start < ranges[j].start 54 | } 55 | if ranges[i].end != ranges[j].end { 56 | return ranges[i].end < ranges[j].end 57 | } 58 | return ranges[i].index < ranges[j].index 59 | }) 60 | 61 | out = make([]symlist, len(sets)) 62 | chunk := make([]int, 0, len(sets)) 63 | b := make([]byte, 4*len(sets)) 64 | 65 | var start rune 66 | var first int 67 | 68 | counter := Sym(1) 69 | m := make(map[string]Sym) 70 | dd := make(map[[2]int]bool) 71 | l := len(ranges) 72 | maxRune := opts.maxRune() 73 | for start <= maxRune { 74 | for first < l && ranges[first].end < start { 75 | first += ranges[first].delta 76 | } 77 | chunk = chunk[:0] 78 | end := maxRune 79 | if first < l { 80 | i := first 81 | prev := &first 82 | for ; i < l && ranges[i].start <= start; i += ranges[i].delta { 83 | if ranges[i].end < start { 84 | *prev += ranges[i].delta 85 | continue 86 | } 87 | if ranges[i].end < end { 88 | end = ranges[i].end 89 | } 90 | chunk = append(chunk, ranges[i].index) 91 | prev = &ranges[i].delta 92 | } 93 | sort.Ints(chunk) 94 | if i < l && ranges[i].start-1 < end { 95 | end = ranges[i].start - 1 96 | } 97 | } 98 | 99 | var size int 100 | for _, index := range chunk { 101 | binary.LittleEndian.PutUint32(b[size:], uint32(index)) 102 | size += 4 103 | } 104 | key := string(b[:size]) // allocates key 105 | id, ok := m[key] 106 | if !ok { 107 | id = counter 108 | counter++ 109 | m[key] = id 110 | } 111 | 112 | for _, index := range chunk { 113 | key := [2]int{index, int(id)} 114 | if dd[key] { 115 | continue 116 | } 117 | out[index] = append(out[index], id) 118 | dd[key] = true 119 | } 120 | inputMap = append(inputMap, RangeEntry{start, id}) 121 | start = end + 1 122 | } 123 | return 124 | } 125 | -------------------------------------------------------------------------------- /lex/compress_test.go: -------------------------------------------------------------------------------- 1 | package lex 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "testing" 7 | ) 8 | 9 | var compressTests = []struct { 10 | input string 11 | wantOut string 12 | wantMap string 13 | }{ 14 | {``, `[]`, `[\x00=>1]`}, 15 | {`[\x00\t]`, `[[1]]`, `[\x00=>1 \x01=>2 \t=>1 \n=>2]`}, 16 | {`[a-z]`, `[[2]]`, `[\x00=>1 a=>2 \{=>1]`}, 17 | {`[a-z][A-Z]`, `[[3] [2]]`, `[\x00=>1 A=>2 \[=>1 a=>3 \{=>1]`}, 18 | {`[a-zA-Z][A-Z]`, `[[2 3] [2]]`, `[\x00=>1 A=>2 \[=>1 a=>3 \{=>1]`}, 19 | {`[A-N][L-Z]`, `[[2 3] [3 4]]`, `[\x00=>1 A=>2 L=>3 O=>4 \[=>1]`}, 20 | {`[A-L][L-Z]`, `[[2 3] [3 4]]`, `[\x00=>1 A=>2 L=>3 M=>4 \[=>1]`}, 21 | {`[A-L][M-Z]`, `[[2] [3]]`, `[\x00=>1 A=>2 M=>3 \[=>1]`}, 22 | {`[A-Z][CX]`, `[[2 3] [3]]`, `[\x00=>1 A=>2 C=>3 D=>2 X=>3 Y=>2 \[=>1]`}, 23 | {`[A-Z][CX][CY-Z]`, `[[2 3 4 5] [3 4] [3 5]]`, `[\x00=>1 A=>2 C=>3 D=>2 X=>4 Y=>5 \[=>1]`}, 24 | {`[CX][CY-Z][A-Z]`, `[[3 4] [3 5] [2 3 4 5]]`, `[\x00=>1 A=>2 C=>3 D=>2 X=>4 Y=>5 \[=>1]`}, 25 | {`[Z][C][A]`, `[[4] [3] [2]]`, `[\x00=>1 A=>2 B=>1 C=>3 D=>1 Z=>4 \[=>1]`}, 26 | {`[0-9][0-1][0-7]`, `[[2 3 4] [2] [2 3]]`, `[\x00=>1 0=>2 2=>3 8=>4 \:=>1]`}, 27 | {`[0-9][0-1][0-7][0-9][A-Z]`, `[[2 3 4] [2] [2 3] [2 3 4] [5]]`, `[\x00=>1 0=>2 2=>3 8=>4 \:=>1 A=>5 \[=>1]`}, 28 | {`[^b]`, `[[1]]`, `[\x00=>1 b=>2 c=>1]`}, 29 | {`[^\p{Any}]`, `[[]]`, `[\x00=>1]`}, 30 | {`[\x21-\U0010ffff][a-z]`, `[[2 3] [3]]`, `[\x00=>1 \!=>2 a=>3 \{=>2]`}, 31 | {`[\x21-\U0010fff0]`, `[[2]]`, `[\x00=>1 \!=>2 \U0010fff1=>1]`}, 32 | 33 | // Bytes modes. 34 | {`{#bytes}[\x00\t]`, `[[1]]`, `[\x00=>1 \x01=>2 \t=>1 \n=>2]`}, 35 | {`{#bytes}[\x00-\xff]`, `[[1]]`, `[\x00=>1]`}, // no second class compared to the full Unicode mode 36 | {`[\x00-\xff]`, `[[1]]`, `[\x00=>1 \u0100=>2]`}, 37 | {`{#bytes}[\x00-\xfe]`, `[[1]]`, `[\x00=>1 \u00ff=>2]`}, 38 | } 39 | 40 | func TestCompressCharsets(t *testing.T) { 41 | for _, test := range compressTests { 42 | input := test.input 43 | var opts CharsetOptions 44 | input, opts.ScanBytes = strings.CutPrefix(input, "{#bytes}") 45 | sets, err := parseCharsets(input, opts) 46 | if err != nil { 47 | t.Errorf("parseCharsets(%q) failed with %v", test.input, err) 48 | } 49 | 50 | out, inputMap := compressCharsets(sets, opts) 51 | if outstr := fmt.Sprintf("%v", out); outstr != test.wantOut { 52 | t.Errorf("compressCharsets(%q).out = %v; want: %v", test.input, outstr, test.wantOut) 53 | } 54 | if mapstr := fmt.Sprintf("%v", inputMap); mapstr != test.wantMap { 55 | t.Errorf("compressCharsets(%q).inputMap = %v; want: %v", test.input, mapstr, test.wantMap) 56 | } 57 | } 58 | } 59 | 60 | func parseCharsets(input string, opts CharsetOptions) ([]charset, error) { 61 | var ret []charset 62 | var p parser 63 | p.source = input 64 | p.next() 65 | for p.ch == '[' { 66 | cs := p.parseClass(opts) 67 | ret = append(ret, cs) 68 | } 69 | if p.ch != -1 { 70 | p.error("unexpected end of input", p.offset, p.offset) 71 | } 72 | if p.err.Msg != "" { 73 | return nil, p.err 74 | } 75 | return ret, nil 76 | } 77 | 78 | func TestContains(t *testing.T) { 79 | input := symlist{0, 2, 7, 8, 10, 12, 15, 17, 20, 30, 40, 50, 52, 63} 80 | elements := make(map[Sym]bool) 81 | for _, sym := range input { 82 | elements[sym] = true 83 | } 84 | for sym := Sym(0); sym < 64; sym++ { 85 | want := elements[sym] 86 | if got := input.contains(sym); got != want { 87 | t.Errorf("symlist.contains(%v) = %v, want: %v", sym, got, want) 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /lex/regexp_fuzz.go: -------------------------------------------------------------------------------- 1 | //go:build gofuzz 2 | // +build gofuzz 3 | 4 | // go-fuzz-build github.com/inspirer/textmapper/tm-go/lex 5 | // go-fuzz -bin=./lex-fuzz.zip -workdir=. 6 | 7 | package lex 8 | 9 | func Fuzz(data []byte) int { 10 | _, err := ParseRegexp(string(data), true) 11 | if err != nil { 12 | return 0 13 | } 14 | return 1 15 | } 16 | -------------------------------------------------------------------------------- /parsers/js/ast/parse.go: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | package ast 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/inspirer/textmapper/parsers/js" 9 | ) 10 | 11 | // Parse parses a given utf-8 content into an AST. 12 | func Parse(ctx context.Context, path, content string, eh js.ErrorHandler) (*Tree, error) { 13 | b := newBuilder(path, content) 14 | var s js.TokenStream 15 | s.Init(content, b.addNode) 16 | var p js.Parser 17 | p.Init(eh, b.addNode) 18 | err := p.ParseModule(ctx, &s) 19 | if err != nil { 20 | return nil, err 21 | } 22 | return b.build() 23 | } 24 | 25 | type builder struct { 26 | tree *Tree 27 | stack []*Node 28 | err error 29 | } 30 | 31 | func newBuilder(path, content string) *builder { 32 | return &builder{ 33 | tree: newTree(path, content), 34 | stack: make([]*Node, 0, 512), 35 | } 36 | } 37 | 38 | func (b *builder) addNode(t js.NodeType, offset, endoffset int) { 39 | start := len(b.stack) 40 | end := start 41 | for start > 0 && b.stack[start-1].offset >= offset { 42 | start-- 43 | if b.stack[start].offset >= endoffset { 44 | end-- 45 | } 46 | } 47 | out := &Node{ 48 | tree: b.tree, 49 | t: t, 50 | offset: offset, 51 | endoffset: endoffset, 52 | } 53 | if start < end { 54 | out.firstChild = b.stack[start] 55 | var prev *Node 56 | for i := end - 1; i >= start; i-- { 57 | n := b.stack[i] 58 | n.parent = out 59 | n.next = prev 60 | prev = n 61 | } 62 | } 63 | if end == len(b.stack) { 64 | b.stack = append(b.stack[:start], out) 65 | } else if start < end { 66 | b.stack[start] = out 67 | l := copy(b.stack[start+1:], b.stack[end:]) 68 | b.stack = b.stack[:start+1+l] 69 | } else { 70 | b.stack = append(b.stack, nil) 71 | copy(b.stack[start+1:], b.stack[start:]) 72 | b.stack[start] = out 73 | } 74 | } 75 | 76 | func (b *builder) build() (*Tree, error) { 77 | if b.err != nil { 78 | return nil, b.err 79 | } 80 | b.addNode(js.File, 0, len(b.tree.content)) 81 | b.tree.root = b.stack[0] 82 | return b.tree, nil 83 | } 84 | -------------------------------------------------------------------------------- /parsers/js/const.go: -------------------------------------------------------------------------------- 1 | package js 2 | 3 | import ( 4 | "github.com/inspirer/textmapper/parsers/js/token" 5 | ) 6 | 7 | const ( 8 | keywordStart = token.PRIVATEIDENTIFIER + 1 9 | keywordEnd = token.LBRACE 10 | 11 | punctuationStart = token.LBRACE 12 | punctuationEnd = token.NUMERICLITERAL 13 | ) 14 | -------------------------------------------------------------------------------- /parsers/js/const_test.go: -------------------------------------------------------------------------------- 1 | package js 2 | 3 | import ( 4 | "regexp" 5 | "testing" 6 | 7 | "github.com/inspirer/textmapper/parsers/js/token" 8 | ) 9 | 10 | func TestTokenRanges(t *testing.T) { 11 | keywordRE := regexp.MustCompile("^[a-z]+$") 12 | punctRE := regexp.MustCompile("^[^a-zA-Z\x00-\x1f]+$") 13 | for tok := token.EOI; tok < token.NumTokens; tok++ { 14 | val := tok.String() 15 | if keywordRE.MatchString(val) != (tok >= keywordStart && tok < keywordEnd) { 16 | t.Errorf("All keywords must be in the range [keywordStart, keywordEnd): %d, %s", tok, val) 17 | } 18 | if punctRE.MatchString(val) != (tok >= punctuationStart && tok < punctuationEnd) { 19 | t.Errorf("All punctuation tokens must be in the range [punctuationStart, punctuationEnd): %d, %s", tok, val) 20 | } 21 | } 22 | } 23 | 24 | func TestStateValues(t *testing.T) { 25 | if StateDiv&^1 != StateInitial || StateTemplateDiv&^1 != StateTemplate || StateTemplateExprDiv&^1 != StateTemplateExpr { 26 | t.Error("div states must be odd and one greater than non-div states") 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /parsers/js/lexer_impl.go: -------------------------------------------------------------------------------- 1 | package js 2 | 3 | // Copy forks the lexer in its current state. 4 | func (l *Lexer) Copy() Lexer { 5 | ret := *l 6 | // Note: empty stack is okay for lookahead purposes, since the stack is 7 | // used for JSX tags and not within TS/JS code. 8 | ret.Stack = nil 9 | return ret 10 | } 11 | -------------------------------------------------------------------------------- /parsers/js/stream.go: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | package js 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | 9 | "github.com/inspirer/textmapper/parsers/js/token" 10 | ) 11 | 12 | // TokenStream post-processes lexer output for consumption by the parser. 13 | type TokenStream struct { 14 | lexer Lexer 15 | listener Listener // for ingesting tokens into the AST, nil during lookaheads 16 | pending []symbol 17 | delayed symbol // by semicolon insertion and for splitting >> into two tokens 18 | recoveryMode bool // forces use of simplified semicolon insertion rules during error recovery 19 | 20 | lastToken token.Type 21 | lastEnd int 22 | lastLine int // 1-based 23 | } 24 | 25 | type symbol struct { 26 | symbol int32 27 | offset int 28 | endoffset int 29 | } 30 | 31 | func (s *TokenStream) Init(content string, l Listener) { 32 | s.lexer.Init(content) 33 | s.listener = l 34 | 35 | if cap(s.pending) < startTokenBufferSize { 36 | s.pending = make([]symbol, 0, startTokenBufferSize) 37 | } 38 | s.pending = s.pending[:0] 39 | s.delayed.symbol = noToken 40 | s.recoveryMode = false 41 | s.lastToken = token.UNAVAILABLE 42 | s.lastLine = 1 43 | } 44 | 45 | func (s *TokenStream) Copy() TokenStream { 46 | ret := *s 47 | ret.lexer = s.lexer.Copy() 48 | ret.listener = nil 49 | ret.pending = nil 50 | return ret 51 | } 52 | 53 | func (s *TokenStream) reportIgnored(ctx context.Context, tok symbol) { 54 | var t NodeType 55 | switch token.Type(tok.symbol) { 56 | case token.MULTILINECOMMENT: 57 | t = MultiLineComment 58 | case token.SINGLELINECOMMENT: 59 | t = SingleLineComment 60 | case token.INVALID_TOKEN: 61 | t = InvalidToken 62 | default: 63 | return 64 | } 65 | if debugSyntax { 66 | fmt.Printf("ignored: %v as %v\n", token.Type(tok.symbol), t) 67 | } 68 | s.listener(t, tok.offset, tok.endoffset) 69 | } 70 | 71 | // flush is called for every "shifted" token to report it together with any pending tokens 72 | // to the listener. 73 | func (s *TokenStream) flush(ctx context.Context, sym symbol) { 74 | if s.listener == nil { 75 | return 76 | } 77 | if len(s.pending) > 0 { 78 | for i, tok := range s.pending { 79 | if tok.endoffset > sym.endoffset { 80 | // Note: this copying should not happen during normal operation, only 81 | // during error recovery. 82 | s.pending = append(s.pending[:0], s.pending[i:]...) 83 | goto flushed 84 | } 85 | s.reportIgnored(ctx, tok) 86 | } 87 | s.pending = s.pending[:0] 88 | flushed: 89 | } 90 | switch token.Type(sym.symbol) { 91 | case token.NOSUBSTITUTIONTEMPLATE: 92 | s.listener(NoSubstitutionTemplate, sym.offset, sym.endoffset) 93 | case token.TEMPLATEHEAD: 94 | s.listener(TemplateHead, sym.offset, sym.endoffset) 95 | case token.TEMPLATEMIDDLE: 96 | s.listener(TemplateMiddle, sym.offset, sym.endoffset) 97 | case token.TEMPLATETAIL: 98 | s.listener(TemplateTail, sym.offset, sym.endoffset) 99 | } 100 | } 101 | 102 | func (s *TokenStream) text(sym symbol) string { 103 | return s.lexer.source[sym.offset:sym.endoffset] 104 | } 105 | 106 | func (s *TokenStream) SetDialect(d Dialect) { 107 | s.lexer.Dialect = d 108 | } 109 | -------------------------------------------------------------------------------- /parsers/json/ast.go: -------------------------------------------------------------------------------- 1 | package json 2 | 3 | type Value interface { 4 | valueType() 5 | } 6 | 7 | type Literal struct { 8 | value string 9 | } 10 | 11 | func (literal Literal) valueType() {} 12 | 13 | type Field struct { 14 | name string 15 | } 16 | -------------------------------------------------------------------------------- /parsers/json/json.tm: -------------------------------------------------------------------------------- 1 | language json(go); 2 | 3 | lang = "json" 4 | package = "github.com/inspirer/textmapper/parsers/json" 5 | eventBased = true 6 | optimizeTables = true 7 | extraTypes = ["NonExistingType"] 8 | 9 | :: lexer 10 | 11 | '{' {int64}: /\{/ { $$ = int64(42); } 12 | '}': /\}/ 13 | '[': /\[/ 14 | ']': /\]/ 15 | ':': /:/ 16 | ',': /,/ 17 | 18 | space: /[\t\r\n ]+/ (space) 19 | 20 | commentChars = /([^*]|\*+[^*\/])*\**/ 21 | MultiLineComment: /\/\*{commentChars}\*\// (space) 22 | 23 | hex = /[0-9a-fA-F]/ 24 | 25 | # TODO 26 | JSONString {string}: /"([^"\\]|\\(["\/\\bfnrt]|u{hex}{4}))*"/ 27 | #JSONString: /"([^"\\\x00-\x1f]|\\(["\/\\bfnrt]|u{hex}{4}))*"/ 28 | 29 | fraction = /\.[0-9]+/ 30 | exp = /[eE][+-]?[0-9]+/ 31 | JSONNumber: /-?(0|[1-9][0-9]*){fraction}?{exp}?/ 32 | 33 | id: /[a-zA-Z][a-zA-Z0-9]*/ (class) 34 | 35 | 'null': /null/ 36 | 'true': /true/ 37 | 'false': /false/ 38 | 39 | 'A': /A/ 40 | 'B': /B/ 41 | 42 | error: 43 | invalid_token: 44 | 45 | :: parser 46 | 47 | %input JSONText; 48 | 49 | %inject MultiLineComment -> MultiLineComment; 50 | %inject invalid_token -> InvalidToken; 51 | %inject JSONString -> JSONString; 52 | 53 | %generate Literals = set(first JSONValue<+A>); 54 | 55 | %flag A; 56 | 57 | JSONText -> JSONText : 58 | JSONValue<+A> ; 59 | 60 | JSONValue {Value} -> JSONValue : 61 | 'null' 62 | | 'true' 63 | | 'false' 64 | | [A] 'A' 65 | | [!A] 'B' 66 | | JSONObject 67 | | EmptyObject 68 | | JSONArray 69 | | JSONString 70 | | JSONNumber 71 | ; 72 | 73 | EmptyObject -> EmptyObject : 74 | (?= EmptyObject) 75 | { /* empty mid-rule */ } 76 | '{'[lparen] { val := $lparen; if val != int64(42) { panic(fmt.Sprintf("got %v %T", val, val)) } } '}' 77 | { val := $lparen; _ = val } 78 | ; 79 | 80 | JSONObject -> JSONObject : 81 | (?= !EmptyObject) '{'[F] JSONMemberList? { /*mid-rule ${F.offset}*/ } '}' { /*starts ${F.offset}*/ } ; 82 | 83 | JSONMember {*Field} -> JSONMember : 84 | JSONString ':' JSONValue<~A> ; 85 | 86 | JSONMemberList : 87 | JSONMember 88 | | JSONMemberList ',' JSONMember 89 | ; 90 | 91 | JSONArray -> JSONArray : 92 | '[' JSONElementListopt ']' ; 93 | 94 | JSONElementList : 95 | JSONValue<+A> 96 | | JSONElementList ',' JSONValue<+A> 97 | ; 98 | -------------------------------------------------------------------------------- /parsers/json/lexer_test.go: -------------------------------------------------------------------------------- 1 | package json_test 2 | 3 | import ( 4 | "fmt" 5 | "regexp" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/inspirer/textmapper/parsers/json" 10 | "github.com/inspirer/textmapper/parsers/json/token" 11 | ) 12 | 13 | const jsonExample = ` 14 | { 15 | "some key": [{ 16 | "title": "example glossary", 17 | "float value": 1e9, 18 | "float value 2": -0.9e-5, 19 | "Gloss \u1234 \nDiv": { 20 | "title": "S", "items": { 21 | "read": { 22 | "ID": "xml", 23 | "SortAs": "price", 24 | "type": "Markup Language", 25 | "Acronym": {}, 26 | "UniqueID": "850257207432", 27 | "def": { 28 | "json": "Lorem ipsum dolor sit amet, ad prima imperdiet sea. Homero reprimique no duo, mundi iriure expetenda ei est. No nec denique efficiantur, pri ad oratio adipisci expetendis.", 29 | "links": ["ABC", "Echo", "a", "b", "c"] 30 | }, 31 | "render as": "markup", "null": null, "true": true, "false": false 32 | } 33 | } 34 | } 35 | }] 36 | } 37 | ` 38 | 39 | func PanicOnError(line, offset, len int, msg string) { 40 | panic(fmt.Sprintf("%d, %d: %s", line, offset, msg)) 41 | } 42 | 43 | func testLexer(input string, t *testing.T) { 44 | l := new(json.Lexer) 45 | l.Init(input) 46 | spacesRE := regexp.MustCompile(`^\s+$`) 47 | 48 | next := l.Next() 49 | var offset int 50 | for next != token.EOI { 51 | s, e := l.Pos() 52 | if s > offset && !spacesRE.MatchString(input[offset:s]) { 53 | t.Errorf("Spaces expected: %s", input[offset:s]) 54 | } 55 | offset = e 56 | tok := input[s:e] 57 | switch next { 58 | case token.LBRACE, token.RBRACE, token.LBRACK, token.RBRACK, token.COLON, token.COMMA, token.NULL, token.TRUE, token.FALSE: 59 | if tok != next.String() { 60 | t.Errorf("Bad token %v: %s", next, tok) 61 | } 62 | case token.JSONSTRING: 63 | if !strings.HasPrefix(tok, `"`) || !strings.HasSuffix(tok, `"`) { 64 | t.Errorf("Bad string literal: %s", tok) 65 | } 66 | } 67 | next = l.Next() 68 | } 69 | } 70 | 71 | func TestLexerExample(t *testing.T) { 72 | testLexer(jsonExample, t) 73 | } 74 | 75 | func BenchmarkLexer(b *testing.B) { 76 | l := new(json.Lexer) 77 | for i := 0; i < b.N; i++ { 78 | l.Init(jsonExample) 79 | next := l.Next() 80 | for next != token.EOI { 81 | next = l.Next() 82 | } 83 | } 84 | b.SetBytes(int64(len(jsonExample))) 85 | } 86 | -------------------------------------------------------------------------------- /parsers/json/listener.go: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | package json 4 | 5 | import ( 6 | "fmt" 7 | ) 8 | 9 | type NodeType int 10 | 11 | type Listener func(t NodeType, offset, endoffset int) 12 | 13 | const ( 14 | NoType NodeType = iota 15 | EmptyObject 16 | JSONArray 17 | JSONMember 18 | JSONObject 19 | JSONText 20 | JSONValue 21 | MultiLineComment 22 | InvalidToken 23 | JSONString 24 | NonExistingType 25 | NodeTypeMax 26 | ) 27 | 28 | var nodeTypeStr = [...]string{ 29 | "NONE", 30 | "EmptyObject", 31 | "JSONArray", 32 | "JSONMember", 33 | "JSONObject", 34 | "JSONText", 35 | "JSONValue", 36 | "MultiLineComment", 37 | "InvalidToken", 38 | "JSONString", 39 | "NonExistingType", 40 | } 41 | 42 | func (t NodeType) String() string { 43 | if t >= 0 && int(t) < len(nodeTypeStr) { 44 | return nodeTypeStr[t] 45 | } 46 | return fmt.Sprintf("node(%d)", t) 47 | } 48 | -------------------------------------------------------------------------------- /parsers/json/parser_test.go: -------------------------------------------------------------------------------- 1 | package json_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/inspirer/textmapper/parsers/json" 7 | "github.com/inspirer/textmapper/parsers/parsertest" 8 | ) 9 | 10 | var jsParseTests = []struct { 11 | nt json.NodeType 12 | inputs []string 13 | }{ 14 | 15 | {json.EmptyObject, []string{ 16 | `«{}»`, 17 | `«{ /* comment */ }»`, 18 | `{"aa": «{}» }`, 19 | }}, 20 | {json.JSONObject, []string{ 21 | `«{ "a" : "b" }»`, 22 | `«{ "a" : ["b"] }»`, 23 | `«{ "a" : {} }»`, 24 | `«{ "a" : «{"q":B}» }»`, 25 | }}, 26 | {json.JSONArray, []string{ 27 | `{ "a" : «["b"]» }`, 28 | ` «[]» `, 29 | }}, 30 | {json.JSONText, []string{ 31 | `«{ "a" : ["b", A] }»`, 32 | ` «"aa"» `, 33 | ` «A» `, 34 | }}, 35 | {json.JSONMember, []string{ 36 | `[{ «"a" : ["b"]», «"q":[]» }]`, 37 | }}, 38 | {json.JSONValue, []string{ 39 | `«{ "a" : «[«"b"»]» }»`, 40 | ` «"aa"» `, 41 | }}, 42 | {json.InvalidToken, []string{ 43 | ` «%» null `, 44 | }}, 45 | {json.NonExistingType, []string{}}, 46 | {json.MultiLineComment, []string{ 47 | `{ "a"«/* abc */» : [] }`, 48 | }}, 49 | {json.JSONString, []string{ 50 | `{ «"a"» : [«"b"»] }`, 51 | }}, 52 | } 53 | 54 | func TestParser(t *testing.T) { 55 | l := new(json.Lexer) 56 | p := new(json.Parser) 57 | 58 | seen := map[json.NodeType]bool{} 59 | for _, tc := range jsParseTests { 60 | seen[tc.nt] = true 61 | for _, input := range tc.inputs { 62 | test := parsertest.New(t, tc.nt.String(), input) 63 | l.Init(test.Source()) 64 | p.Init(func(nt json.NodeType, offset, endoffset int) { 65 | if nt == tc.nt { 66 | test.Consume(t, offset, endoffset) 67 | } 68 | }) 69 | test.Done(t, p.Parse(l)) 70 | } 71 | } 72 | for n := json.NodeType(1); n < json.NodeTypeMax; n++ { 73 | if !seen[n] { 74 | t.Errorf("%v is not tested", n) 75 | } 76 | } 77 | } 78 | 79 | func BenchmarkParser(b *testing.B) { 80 | l := new(json.Lexer) 81 | p := new(json.Parser) 82 | 83 | p.Init(func(t json.NodeType, offset, endoffset int) {}) 84 | for i := 0; i < b.N; i++ { 85 | l.Init(jsonExample) 86 | p.Parse(l) 87 | } 88 | b.SetBytes(int64(len(jsonExample))) 89 | } 90 | -------------------------------------------------------------------------------- /parsers/json/token/token.go: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | package token 4 | 5 | import ( 6 | "fmt" 7 | ) 8 | 9 | // Type is an enum of all terminal symbols of the json language. 10 | type Type int32 11 | 12 | // Token values. 13 | const ( 14 | UNAVAILABLE Type = iota - 1 15 | EOI 16 | INVALID_TOKEN 17 | LBRACE // { 18 | RBRACE // } 19 | LBRACK // [ 20 | RBRACK // ] 21 | COLON // : 22 | COMMA // , 23 | SPACE 24 | MULTILINECOMMENT 25 | JSONSTRING 26 | JSONNUMBER 27 | ID 28 | NULL // null 29 | TRUE // true 30 | FALSE // false 31 | CHAR_A // A 32 | CHAR_B // B 33 | ERROR 34 | 35 | NumTokens 36 | ) 37 | 38 | var tokenStr = [...]string{ 39 | "EOI", 40 | "INVALID_TOKEN", 41 | "{", 42 | "}", 43 | "[", 44 | "]", 45 | ":", 46 | ",", 47 | "SPACE", 48 | "MULTILINECOMMENT", 49 | "JSONSTRING", 50 | "JSONNUMBER", 51 | "ID", 52 | "null", 53 | "true", 54 | "false", 55 | "A", 56 | "B", 57 | "ERROR", 58 | } 59 | 60 | func (tok Type) String() string { 61 | if tok >= 0 && int(tok) < len(tokenStr) { 62 | return tokenStr[tok] 63 | } 64 | return fmt.Sprintf("token(%d)", tok) 65 | } 66 | -------------------------------------------------------------------------------- /parsers/parsertest/parsertest_test.go: -------------------------------------------------------------------------------- 1 | package parsertest 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | ) 7 | 8 | func TestSplitInput(t *testing.T) { 9 | res, exp, errors := splitInput(t, "test", `abc«de§f»cdf«q1»q2§`) 10 | if string(res) != `abcdefcdfq1q2` { 11 | t.Errorf("got: %s, want: abcdefcdfq1q2", res) 12 | } 13 | want := map[node]int{{3, 6}: 1, {9, 11}: 1} 14 | if !reflect.DeepEqual(exp, want) { 15 | t.Errorf("got: %v, want: %v", exp, want) 16 | } 17 | if !reflect.DeepEqual(errors, []int{5, 13}) { 18 | t.Errorf("got: %v, want: [5 13]", errors) 19 | } 20 | 21 | res, exp, errors = splitInput(t, "test", `/*no expectations*/`) 22 | if string(res) != `/*no expectations*/` || len(exp) != 0 || len(errors) != 0 { 23 | t.Errorf("got: %s, %v, %v, want: /*no expectations*/, [], []", res, exp, errors) 24 | } 25 | 26 | res, exp, errors = splitInput(t, "test", `«abc» «a«b§«c»»»`) 27 | if string(res) != `abc abc` { 28 | t.Errorf("got: %s, want: abc abc", res) 29 | } 30 | want = map[node]int{{0, 3}: 1, {6, 7}: 1, {5, 7}: 1, {4, 7}: 1} 31 | if !reflect.DeepEqual(exp, want) { 32 | t.Errorf("got: %v, want: %v", exp, want) 33 | } 34 | if !reflect.DeepEqual(errors, []int{6}) { 35 | t.Errorf("got: %v, want: [6]", errors) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /parsers/simple/lexer_test.go: -------------------------------------------------------------------------------- 1 | package simple_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/inspirer/textmapper/parsers/parsertest" 7 | "github.com/inspirer/textmapper/parsers/simple" 8 | "github.com/inspirer/textmapper/parsers/simple/token" 9 | ) 10 | 11 | var lexerTests = []struct { 12 | tok token.Type 13 | inputs []string 14 | }{ 15 | 16 | {token.ID, []string{ 17 | `«\abc» «\brea» break`, 18 | `«\abc123»`, 19 | `«\_abc_»`, 20 | }}, 21 | {token.CHAR_A, []string{`«a»`}}, 22 | {token.CHAR_B, []string{`«b»`}}, 23 | {token.CHAR_C, []string{`«c»`}}, 24 | {token.SIMPLE, []string{`«simple»`}}, 25 | {token.INVALID_TOKEN, []string{`«si»`}}, 26 | } 27 | 28 | func TestLexer(t *testing.T) { 29 | l := new(simple.Lexer) 30 | seen := make(map[token.Type]bool) 31 | seen[token.WHITESPACE] = true 32 | for _, tc := range lexerTests { 33 | seen[tc.tok] = true 34 | for _, input := range tc.inputs { 35 | test := parsertest.New(t, tc.tok.String(), input) 36 | l.Init(test.Source()) 37 | tok := l.Next() 38 | for tok != token.EOI { 39 | if tok == tc.tok { 40 | s, e := l.Pos() 41 | test.Consume(t, s, e) 42 | } 43 | tok = l.Next() 44 | } 45 | test.Done(t, nil) 46 | } 47 | } 48 | for tok := token.Type(1); tok < token.NumTokens; tok++ { 49 | if !seen[tok] { 50 | t.Errorf("%v is not tested", tok) 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /parsers/simple/listener.go: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | package simple 4 | 5 | import ( 6 | "fmt" 7 | ) 8 | 9 | type NodeType int 10 | 11 | type Listener func(t NodeType, offset, endoffset int) 12 | 13 | const ( 14 | NoType NodeType = iota 15 | NodeTypeMax 16 | ) 17 | 18 | var nodeTypeStr = [...]string{ 19 | "NONE", 20 | } 21 | 22 | func (t NodeType) String() string { 23 | if t >= 0 && int(t) < len(nodeTypeStr) { 24 | return nodeTypeStr[t] 25 | } 26 | return fmt.Sprintf("node(%d)", t) 27 | } 28 | -------------------------------------------------------------------------------- /parsers/simple/parser_tables.go: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | package simple 4 | 5 | import ( 6 | "fmt" 7 | 8 | "github.com/inspirer/textmapper/parsers/simple/token" 9 | ) 10 | 11 | const atBState = 1 12 | 13 | var afterListStates = map[int]bool{ 14 | 6: true, 15 | 7: true, 16 | } 17 | 18 | var tmNonterminals = [...]string{ 19 | "Bar_list", 20 | "Foo_list", 21 | "Xyz_list", 22 | "input", 23 | "Foo", 24 | "Bar", 25 | "Xyz", 26 | } 27 | 28 | func symbolName(sym int32) string { 29 | if sym == noToken { 30 | return "" 31 | } 32 | if sym < int32(token.NumTokens) { 33 | return token.Type(sym).String() 34 | } 35 | if i := int(sym) - int(token.NumTokens); i < len(tmNonterminals) { 36 | return tmNonterminals[i] 37 | } 38 | return fmt.Sprintf("nonterminal(%d)", sym) 39 | } 40 | 41 | var tmAction = []int32{ 42 | -1, -1, 12, 11, 13, -3, -9, -15, 3, 1, 5, 7, 6, 0, 2, 4, -1, -2, 43 | } 44 | 45 | var tmLalr = []int32{ 46 | 4, -1, 0, 10, -1, -2, 5, -1, 0, 9, -1, -2, 6, -1, 0, 8, -1, -2, 47 | } 48 | 49 | var tmGoto = []int32{ 50 | 0, 2, 2, 2, 4, 8, 14, 20, 20, 22, 24, 26, 28, 32, 36, 42, 51 | } 52 | 53 | var tmFromTo = []int8{ 54 | 16, 17, 0, 1, 0, 2, 5, 2, 0, 3, 1, 11, 6, 3, 0, 4, 1, 4, 7, 4, 0, 5, 0, 6, 0, 55 | 7, 0, 16, 0, 8, 6, 14, 0, 9, 5, 13, 0, 10, 1, 12, 7, 15, 56 | } 57 | 58 | var tmRuleLen = []int8{ 59 | 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 60 | } 61 | 62 | var tmRuleSymbol = []int32{ 63 | 8, 8, 9, 9, 10, 10, 11, 11, 11, 11, 11, 12, 13, 14, 64 | } 65 | 66 | var tmRuleType = [...]NodeType{ 67 | 0, // Bar_list : Bar_list Bar 68 | 0, // Bar_list : Bar 69 | 0, // Foo_list : Foo_list Foo 70 | 0, // Foo_list : Foo 71 | 0, // Xyz_list : Xyz_list Xyz 72 | 0, // Xyz_list : Xyz 73 | 0, // input : 'simple' Xyz 74 | 0, // input : 'simple' .atB 'b' 75 | 0, // input : Xyz_list .afterList 76 | 0, // input : Foo_list .afterList 77 | 0, // input : Bar_list 78 | 0, // Foo : 'b' 79 | 0, // Bar : 'a' 80 | 0, // Xyz : 'c' 81 | } 82 | 83 | // set(follow SIMPLE) = CHAR_B, CHAR_C 84 | var afterSimple = []token.Type{ 85 | 5, 6, 86 | } 87 | -------------------------------------------------------------------------------- /parsers/simple/simple.tm: -------------------------------------------------------------------------------- 1 | language simple(go); 2 | 3 | lang = "simple" 4 | package = "github.com/inspirer/textmapper/parsers/simple" 5 | eventBased = true 6 | 7 | ::lexer 8 | 9 | WhiteSpace: /[\n\r\x20\t]+/ (space) 10 | 11 | 'simple': /simple/ 12 | 13 | 'a': /a/ 14 | 'b': /b/ 15 | 'c': /c/ 16 | 17 | # See https://www.unicode.org/reports/tr31/tr31-37.html#Default_Identifier_Syntax 18 | IDStart = /[_\p{L}\p{Nl}\p{Other_ID_Start}-\p{Pattern_Syntax}-\p{Pattern_White_Space}]/ 19 | IDFollow = /{IDStart}|[\p{Mn}\p{Mc}\p{Nd}\p{Pc}\p{Other_ID_Continue}-\p{Pattern_Syntax}-\p{Pattern_White_Space}]/ 20 | 21 | id: /\\{IDStart}{IDFollow}*/ 22 | 23 | ::parser 24 | 25 | %generate afterSimple = set(follow 'simple'); 26 | 27 | input : 'simple' (Xyz | .atB 'b') | Xyz+ .afterList | Foo+ .afterList | Bar+ ; 28 | Foo : 'b' ; 29 | Bar : 'a' ; 30 | Xyz : 'c'; -------------------------------------------------------------------------------- /parsers/simple/token/token.go: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | package token 4 | 5 | import ( 6 | "fmt" 7 | ) 8 | 9 | // Type is an enum of all terminal symbols of the simple language. 10 | type Type int32 11 | 12 | // Token values. 13 | const ( 14 | UNAVAILABLE Type = iota - 1 15 | EOI 16 | INVALID_TOKEN 17 | WHITESPACE 18 | SIMPLE // simple 19 | CHAR_A // a 20 | CHAR_B // b 21 | CHAR_C // c 22 | ID 23 | 24 | NumTokens 25 | ) 26 | 27 | var tokenStr = [...]string{ 28 | "EOI", 29 | "INVALID_TOKEN", 30 | "WHITESPACE", 31 | "simple", 32 | "a", 33 | "b", 34 | "c", 35 | "ID", 36 | } 37 | 38 | func (tok Type) String() string { 39 | if tok >= 0 && int(tok) < len(tokenStr) { 40 | return tokenStr[tok] 41 | } 42 | return fmt.Sprintf("token(%d)", tok) 43 | } 44 | -------------------------------------------------------------------------------- /parsers/test/ast/factory.go: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | package ast 4 | 5 | import ( 6 | "fmt" 7 | 8 | "github.com/inspirer/textmapper/parsers/test" 9 | ) 10 | 11 | func ToTestNode(n Node) TestNode { 12 | if n == nil { 13 | return nil 14 | } 15 | switch n.Type() { 16 | case test.AsExpr: 17 | return &AsExpr{n} 18 | case test.Block: 19 | return &Block{n} 20 | case test.Decl1: 21 | return &Decl1{n} 22 | case test.Decl2: 23 | return &Decl2{n} 24 | case test.DeclOptQual: 25 | return &DeclOptQual{n} 26 | case test.Empty1: 27 | return &Empty1{n} 28 | case test.EvalEmpty1: 29 | return &EvalEmpty1{n} 30 | case test.EvalFoo: 31 | return &EvalFoo{n} 32 | case test.EvalFoo2: 33 | return &EvalFoo2{n} 34 | case test.Icon: 35 | return &Icon{n} 36 | case test.If: 37 | return &If{n} 38 | case test.Int: 39 | return &Int{n} 40 | case test.IntExpr: 41 | return &IntExpr{n} 42 | case test.LastInt: 43 | return &LastInt{n} 44 | case test.Negation: 45 | return &Negation{n} 46 | case test.PlusExpr: 47 | return &PlusExpr{n} 48 | case test.Test: 49 | return &Test{n} 50 | case test.TestClause: 51 | return &TestClause{n} 52 | case test.TestIntClause: 53 | return &TestIntClause{n} 54 | case test.Int7: 55 | return &Int7{n} 56 | case test.Int9: 57 | return &Int9{n} 58 | case test.SingleLineComment, test.Identifier, test.InvalidToken, test.MultiLineComment: 59 | return &Token{n} 60 | case test.NoType: 61 | return nilInstance 62 | } 63 | panic(fmt.Errorf("ast: unknown node type %v", n.Type())) 64 | return nil 65 | } 66 | -------------------------------------------------------------------------------- /parsers/test/consts.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | // Node flags. 4 | const ( 5 | InTest NodeFlags = 1 << iota 6 | InFoo 7 | ) 8 | -------------------------------------------------------------------------------- /parsers/test/lexer_test.go: -------------------------------------------------------------------------------- 1 | package test_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/inspirer/textmapper/parsers/parsertest" 7 | "github.com/inspirer/textmapper/parsers/test" 8 | "github.com/inspirer/textmapper/parsers/test/token" 9 | ) 10 | 11 | var lexerTests = []struct { 12 | tok token.Type 13 | inputs []string 14 | }{ 15 | 16 | {token.IDENTIFIER, []string{ 17 | `«abc» «brea» «abc-def»`, 18 | `«a-b-c-d»---- `, 19 | ` «a»-`, 20 | ` «a»--`, 21 | `«a»->«b»`, 22 | `«testfoo»----- testfoo----->`, 23 | }}, 24 | {token.IDENTIFIER2, []string{ 25 | `«^a» «^b»`, 26 | "«^\n» «^\x00»", 27 | }}, 28 | 29 | {token.MINUS, []string{ 30 | ` «-» -> a------b«-» «-»«-»`, 31 | }}, 32 | {token.MINUSGT, []string{ 33 | `«->»`, 34 | `abcdef«->»`, 35 | `abcdef«->» `, 36 | `testfoo1----«->»`, 37 | }}, 38 | 39 | {token.BACKTRACKINGTOKEN, []string{ 40 | `«test----->» «test->» «testfoo->» testf->`, 41 | }}, 42 | 43 | {token.TEST, []string{"«test»", "«test»-----"}}, 44 | {token.DECL1, []string{"«decl1»"}}, 45 | {token.DECL2, []string{"«decl2»"}}, 46 | {token.IF, []string{"«if»"}}, 47 | {token.ELSE, []string{"«else»"}}, 48 | {token.EVAL, []string{"«eval»"}}, 49 | {token.AS, []string{"«as»"}}, 50 | {token.INTEGERCONSTANT, []string{"«123» 34\n «0» ", "«123» 0"}}, 51 | {token.LASTINT, []string{"123 «0\n»45 «0»"}}, 52 | 53 | {token.LBRACE, []string{"«{»"}}, 54 | {token.RBRACE, []string{"«}»"}}, 55 | {token.LPAREN, []string{"«(»"}}, 56 | {token.RPAREN, []string{"«)»"}}, 57 | {token.LBRACK, []string{"«[»"}}, 58 | {token.RBRACK, []string{"«]»"}}, 59 | {token.DOT, []string{ 60 | "«.»", 61 | "«.»«.»", 62 | }}, 63 | {token.MULTILINE, []string{ 64 | "% \n «%q\n% q»\n%f", 65 | "«%q\n%q» !", 66 | "«%q\n% q»", 67 | }}, 68 | {token.DOTDOTDOT, []string{"«...»"}}, 69 | {token.COMMA, []string{"«,»"}}, 70 | {token.COLON, []string{"«:»"}}, 71 | {token.PLUS, []string{"«+»"}}, 72 | {token.ESC, []string{`«\»`}}, 73 | {token.CHAR__, []string{`«_»`}}, 74 | {token.FOO_, []string{`«foo_»`}}, 75 | {token.F_A, []string{`«f_a»`}}, 76 | 77 | {token.SINGLELINECOMMENT, []string{" «//abc»\r\n "}}, 78 | {token.MULTILINECOMMENT, []string{ 79 | " «/**/» «/***/» «/*\r\n*/» ", 80 | " «/* /* ****/ */» nested", 81 | }}, 82 | {token.SHARPATID, []string{ 83 | " Zfoo «Zfoob» «Zfo\\u1111ob» ", 84 | }}, 85 | {token.DQUOTE, []string{"«\"»"}}, 86 | {token.SQUOTE, []string{"«'»"}}, 87 | {token.ZFOO, []string{ 88 | " «Zfoo» Zfoob ", 89 | }}, 90 | {token.INVALID_TOKEN, []string{ 91 | " «#» ", 92 | " /**/ «/* /* ****/ * nested»", // unfinished comment 93 | " «Zff\\» ", 94 | " \x00 «\U0001fffe»«#» ", // \x00 is valid whitespace 95 | }}, 96 | } 97 | 98 | func TestLexer(t *testing.T) { 99 | l := new(test.Lexer) 100 | seen := map[token.Type]bool{} 101 | seen[token.WHITESPACE] = true 102 | seen[token.ERROR] = true 103 | for _, tc := range lexerTests { 104 | seen[tc.tok] = true 105 | for _, input := range tc.inputs { 106 | ptest := parsertest.New(t, tc.tok.String(), input) 107 | l.Init(ptest.Source()) 108 | tok := l.Next() 109 | for tok != token.EOI { 110 | if tok == tc.tok { 111 | s, e := l.Pos() 112 | ptest.Consume(t, s, e) 113 | } 114 | tok = l.Next() 115 | } 116 | ptest.Done(t, nil) 117 | } 118 | } 119 | for tok := token.EOI + 1; tok < token.NumTokens; tok++ { 120 | if !seen[tok] { 121 | t.Errorf("%v is not tested", tok) 122 | } 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /parsers/test/listener.go: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | package test 4 | 5 | import ( 6 | "fmt" 7 | ) 8 | 9 | type NodeType uint16 10 | 11 | type NodeFlags uint16 12 | 13 | type Listener func(t NodeType, flags NodeFlags, offset, endoffset int) 14 | 15 | const ( 16 | NoType NodeType = iota 17 | AsExpr // left=Expr right=Expr 18 | Bar 19 | Block // Negation? (Declaration)* 20 | Decl1 // (Identifier)+ 21 | Decl2 22 | DeclOptQual // (Identifier)* 23 | Elem 24 | Empty1 25 | EvalEmpty1 // Expr 26 | EvalFoo // Expr 27 | EvalFoo2 // a=Expr b=Expr 28 | Icon 29 | If // (Elem)+ then=Decl2Interface else=Decl2Interface? 30 | Int 31 | IntExpr // Bar 32 | LastInt 33 | Negation 34 | PlusExpr // left=Expr right=Expr 35 | Test // (Declaration)+ 36 | TestClause 37 | TestIntClause // Icon 38 | SingleLineComment 39 | Identifier 40 | InvalidToken 41 | MultiLineComment 42 | Int7 43 | Int9 44 | NodeTypeMax 45 | ) 46 | 47 | var nodeTypeStr = [...]string{ 48 | "NONE", 49 | "AsExpr", 50 | "Bar", 51 | "Block", 52 | "Decl1", 53 | "Decl2", 54 | "DeclOptQual", 55 | "Elem", 56 | "Empty1", 57 | "EvalEmpty1", 58 | "EvalFoo", 59 | "EvalFoo2", 60 | "Icon", 61 | "If", 62 | "Int", 63 | "IntExpr", 64 | "LastInt", 65 | "Negation", 66 | "PlusExpr", 67 | "Test", 68 | "TestClause", 69 | "TestIntClause", 70 | "SingleLineComment", 71 | "Identifier", 72 | "InvalidToken", 73 | "MultiLineComment", 74 | "Int7", 75 | "Int9", 76 | } 77 | 78 | func (t NodeType) String() string { 79 | if t >= 0 && int(t) < len(nodeTypeStr) { 80 | return nodeTypeStr[t] 81 | } 82 | return fmt.Sprintf("node(%d)", t) 83 | } 84 | 85 | var Decl2Interface = []NodeType{ 86 | Decl2, 87 | If, 88 | } 89 | 90 | var Declaration = []NodeType{ 91 | AsExpr, 92 | Block, 93 | Decl1, 94 | Decl2, 95 | DeclOptQual, 96 | Empty1, 97 | EvalEmpty1, 98 | EvalFoo, 99 | EvalFoo2, 100 | If, 101 | Int, 102 | IntExpr, 103 | LastInt, 104 | PlusExpr, 105 | TestClause, 106 | TestIntClause, 107 | } 108 | 109 | var Expr = []NodeType{ 110 | AsExpr, 111 | Int9, 112 | IntExpr, 113 | PlusExpr, 114 | } 115 | 116 | var TokenSet = []NodeType{ 117 | Identifier, 118 | } 119 | -------------------------------------------------------------------------------- /parsers/test/selector/selector.go: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | package selector 4 | 5 | import ( 6 | "github.com/inspirer/textmapper/parsers/test" 7 | ) 8 | 9 | type Selector func(nt test.NodeType) bool 10 | 11 | var ( 12 | Any = func(t test.NodeType) bool { return true } 13 | AsExpr = func(t test.NodeType) bool { return t == test.AsExpr } 14 | Bar = func(t test.NodeType) bool { return t == test.Bar } 15 | Block = func(t test.NodeType) bool { return t == test.Block } 16 | Decl1 = func(t test.NodeType) bool { return t == test.Decl1 } 17 | Decl2 = func(t test.NodeType) bool { return t == test.Decl2 } 18 | DeclOptQual = func(t test.NodeType) bool { return t == test.DeclOptQual } 19 | Elem = func(t test.NodeType) bool { return t == test.Elem } 20 | Empty1 = func(t test.NodeType) bool { return t == test.Empty1 } 21 | EvalEmpty1 = func(t test.NodeType) bool { return t == test.EvalEmpty1 } 22 | EvalFoo = func(t test.NodeType) bool { return t == test.EvalFoo } 23 | EvalFoo2 = func(t test.NodeType) bool { return t == test.EvalFoo2 } 24 | Icon = func(t test.NodeType) bool { return t == test.Icon } 25 | If = func(t test.NodeType) bool { return t == test.If } 26 | Int = func(t test.NodeType) bool { return t == test.Int } 27 | IntExpr = func(t test.NodeType) bool { return t == test.IntExpr } 28 | LastInt = func(t test.NodeType) bool { return t == test.LastInt } 29 | Negation = func(t test.NodeType) bool { return t == test.Negation } 30 | PlusExpr = func(t test.NodeType) bool { return t == test.PlusExpr } 31 | Test = func(t test.NodeType) bool { return t == test.Test } 32 | TestClause = func(t test.NodeType) bool { return t == test.TestClause } 33 | TestIntClause = func(t test.NodeType) bool { return t == test.TestIntClause } 34 | SingleLineComment = func(t test.NodeType) bool { return t == test.SingleLineComment } 35 | Identifier = func(t test.NodeType) bool { return t == test.Identifier } 36 | InvalidToken = func(t test.NodeType) bool { return t == test.InvalidToken } 37 | MultiLineComment = func(t test.NodeType) bool { return t == test.MultiLineComment } 38 | Int7 = func(t test.NodeType) bool { return t == test.Int7 } 39 | Int9 = func(t test.NodeType) bool { return t == test.Int9 } 40 | Decl2Interface = OneOf(test.Decl2Interface...) 41 | Declaration = OneOf(test.Declaration...) 42 | Expr = OneOf(test.Expr...) 43 | TokenSet = OneOf(test.TokenSet...) 44 | ) 45 | 46 | func OneOf(types ...test.NodeType) Selector { 47 | if len(types) == 0 { 48 | return func(test.NodeType) bool { return false } 49 | } 50 | const bits = 32 51 | max := 1 52 | for _, t := range types { 53 | if int(t) > max { 54 | max = int(t) 55 | } 56 | } 57 | size := (max + bits) / bits 58 | bitarr := make([]uint32, size) 59 | for _, t := range types { 60 | bitarr[uint(t)/bits] |= 1 << (uint(t) % bits) 61 | } 62 | return func(t test.NodeType) bool { 63 | i := uint(t) / bits 64 | return int(i) < len(bitarr) && bitarr[i]&(1<<(uint(t)%bits)) != 0 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /parsers/test/token/token.go: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | package token 4 | 5 | import ( 6 | "fmt" 7 | ) 8 | 9 | // Type is an enum of all terminal symbols of the test language. 10 | type Type int32 11 | 12 | // Token values. 13 | const ( 14 | UNAVAILABLE Type = iota - 1 15 | EOI 16 | INVALID_TOKEN 17 | WHITESPACE 18 | SINGLELINECOMMENT 19 | IDENTIFIER 20 | IDENTIFIER2 21 | INTEGERCONSTANT 22 | LASTINT 23 | TEST // test 24 | DECL1 // decl1 25 | DECL2 // decl2 26 | EVAL // eval 27 | AS // as 28 | IF // if 29 | ELSE // else 30 | LBRACE // { 31 | RBRACE // } 32 | LPAREN // ( 33 | RPAREN // ) 34 | LBRACK // [ 35 | RBRACK // ] 36 | DOT // . 37 | DOTDOTDOT // ... 38 | COMMA // , 39 | COLON // : 40 | MINUS // - 41 | MINUSGT // -> 42 | PLUS // + 43 | ESC // \ 44 | CHAR__ // _ 45 | FOO_ // foo_ 46 | F_A // f_a 47 | MULTILINE 48 | DQUOTE // " 49 | SQUOTE // ' 50 | SHARPATID 51 | ZFOO // Zfoo 52 | BACKTRACKINGTOKEN 53 | ERROR 54 | MULTILINECOMMENT 55 | 56 | NumTokens 57 | ) 58 | 59 | var tokenStr = [...]string{ 60 | "EOI", 61 | "INVALID_TOKEN", 62 | "WHITESPACE", 63 | "SINGLELINECOMMENT", 64 | "IDENTIFIER", 65 | "IDENTIFIER2", 66 | "INTEGERCONSTANT", 67 | "LASTINT", 68 | "test", 69 | "decl1", 70 | "decl2", 71 | "eval", 72 | "as", 73 | "if", 74 | "else", 75 | "{", 76 | "}", 77 | "(", 78 | ")", 79 | "[", 80 | "]", 81 | ".", 82 | "...", 83 | ",", 84 | ":", 85 | "-", 86 | "->", 87 | "+", 88 | "\\", 89 | "_", 90 | "foo_", 91 | "f_a", 92 | "MULTILINE", 93 | "\"", 94 | "'", 95 | "SHARPATID", 96 | "Zfoo", 97 | "BACKTRACKINGTOKEN", 98 | "ERROR", 99 | "MULTILINECOMMENT", 100 | } 101 | 102 | func (tok Type) String() string { 103 | if tok >= 0 && int(tok) < len(tokenStr) { 104 | return tokenStr[tok] 105 | } 106 | return fmt.Sprintf("token(%d)", tok) 107 | } 108 | -------------------------------------------------------------------------------- /parsers/tm/ast/parse.go: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | package ast 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/inspirer/textmapper/parsers/tm" 9 | ) 10 | 11 | // Parse parses a given utf-8 content into an AST. 12 | func Parse(ctx context.Context, path, content string, eh tm.ErrorHandler) (*Tree, error) { 13 | b := newBuilder(path, content) 14 | var s tm.TokenStream 15 | s.Init(content, b.addNode) 16 | var p tm.Parser 17 | p.Init(eh, b.addNode) 18 | err := p.ParseFile(ctx, &s) 19 | if err != nil { 20 | return nil, err 21 | } 22 | return b.build() 23 | } 24 | 25 | type builder struct { 26 | tree *Tree 27 | stack []*Node 28 | err error 29 | } 30 | 31 | func newBuilder(path, content string) *builder { 32 | return &builder{ 33 | tree: newTree(path, content), 34 | stack: make([]*Node, 0, 512), 35 | } 36 | } 37 | 38 | func (b *builder) addNode(t tm.NodeType, offset, endoffset int) { 39 | start := len(b.stack) 40 | end := start 41 | for start > 0 && b.stack[start-1].offset >= offset { 42 | start-- 43 | if b.stack[start].offset >= endoffset { 44 | end-- 45 | } 46 | } 47 | out := &Node{ 48 | tree: b.tree, 49 | t: t, 50 | offset: offset, 51 | endoffset: endoffset, 52 | } 53 | if start < end { 54 | out.firstChild = b.stack[start] 55 | var prev *Node 56 | for i := end - 1; i >= start; i-- { 57 | n := b.stack[i] 58 | n.parent = out 59 | n.next = prev 60 | prev = n 61 | } 62 | } 63 | if end == len(b.stack) { 64 | b.stack = append(b.stack[:start], out) 65 | } else if start < end { 66 | b.stack[start] = out 67 | l := copy(b.stack[start+1:], b.stack[end:]) 68 | b.stack = b.stack[:start+1+l] 69 | } else { 70 | b.stack = append(b.stack, nil) 71 | copy(b.stack[start+1:], b.stack[start:]) 72 | b.stack[start] = out 73 | } 74 | } 75 | 76 | func (b *builder) build() (*Tree, error) { 77 | if b.err != nil { 78 | return nil, b.err 79 | } 80 | b.addNode(tm.File, 0, len(b.tree.content)) 81 | b.tree.root = b.stack[0] 82 | return b.tree, nil 83 | } 84 | -------------------------------------------------------------------------------- /parsers/tm/ast/parser_test.go: -------------------------------------------------------------------------------- 1 | package ast 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "strings" 8 | "testing" 9 | 10 | "github.com/inspirer/textmapper/parsers/tm" 11 | "github.com/inspirer/textmapper/parsers/tm/selector" 12 | ) 13 | 14 | const ( 15 | testType = tm.Header 16 | ) 17 | 18 | var builderTests = []struct { 19 | want string 20 | ranges []string 21 | }{ 22 | {"(((1)(2))(3))", []string{ 23 | "1", "2", "12", "3", "123", 24 | }}, 25 | {"(((1)(2))((3))((4)(5)))", []string{ 26 | "1", "2", "12", "3", "3", "4", "5", "45", "12345", 27 | }}, 28 | {"(((1)(2))((3))((4)(5)))", []string{ 29 | "1", "2", "12", "4", "5", "3", "3", "45", "12345", 30 | }}, 31 | } 32 | 33 | func serialize(n *Node, b *bytes.Buffer) { 34 | b.Write([]byte("(")) 35 | defer b.Write([]byte(")")) 36 | 37 | children := n.Children(selector.Any) 38 | text := n.Text() 39 | offset := n.Offset() 40 | for _, c := range children { 41 | b.Write([]byte(text[offset-n.Offset() : c.Offset()-n.Offset()])) 42 | offset = c.Endoffset() 43 | serialize(c, b) 44 | } 45 | b.Write([]byte(text[offset-n.Offset():])) 46 | } 47 | 48 | func TestBuilder(t *testing.T) { 49 | for _, tc := range builderTests { 50 | source := tc.ranges[len(tc.ranges)-1] 51 | b := newBuilder("test", source) 52 | for _, r := range tc.ranges { 53 | i := strings.Index(source, r) 54 | if i == -1 { 55 | t.Fatalf("%v not found in %q", r, source) 56 | } 57 | b.addNode(testType, i, i+len(r)) 58 | } 59 | 60 | tree, err := b.build() 61 | if err != nil { 62 | t.Fatalf("builder failed with %v", b.err) 63 | } 64 | 65 | var buf bytes.Buffer 66 | serialize(tree.Root().Child(selector.Any), &buf) 67 | got := buf.String() 68 | if got != tc.want { 69 | t.Errorf("builder returned %v, want: %v", got, tc.want) 70 | } 71 | } 72 | } 73 | 74 | const testInput = ` 75 | language abc(go); 76 | lang = "abc" 77 | 78 | :: lexer 79 | eoi: /%%.*(\r?\n)?/ 80 | whitespace: /[\n\r\t ]+/ (space) 81 | 82 | qqq = /q1/ 83 | 84 | 'q': /{qqq}/ 85 | 86 | :: parser 87 | 88 | %input a; 89 | 90 | a : 'q'+ ; 91 | ` 92 | 93 | func TestParser(t *testing.T) { 94 | ctx := context.Background() 95 | tree, err := Parse(ctx, "file1", testInput, tm.StopOnFirstError) 96 | if err != nil { 97 | t.Errorf("cannot parse %q: %v", testInput, err) 98 | } 99 | 100 | var buf bytes.Buffer 101 | file := File{tree.Root()} 102 | lexer, _ := file.Lexer() 103 | for _, lp := range lexer.LexerPart() { 104 | switch lp := lp.(type) { 105 | case *Lexeme: 106 | fmt.Fprintf(&buf, "token %v\n", lp.Name().Text()) 107 | } 108 | } 109 | // TODO check "buf" 110 | t.Log(string(buf.Bytes())) 111 | } 112 | -------------------------------------------------------------------------------- /parsers/tm/ast/tree_ext.go: -------------------------------------------------------------------------------- 1 | package ast 2 | 3 | import ( 4 | "github.com/inspirer/textmapper/status" 5 | ) 6 | 7 | // SourceRange returns the full location of the node. 8 | func (n *Node) SourceRange() status.SourceRange { 9 | if n == nil { 10 | return status.SourceRange{} 11 | } 12 | line, col := n.LineColumn() 13 | return status.SourceRange{ 14 | Filename: n.tree.path, 15 | Offset: n.offset, 16 | EndOffset: n.endoffset, 17 | Line: line, 18 | Column: col, 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /parsers/tm/ast/tree_test.go: -------------------------------------------------------------------------------- 1 | package ast 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "testing" 7 | 8 | "github.com/inspirer/textmapper/status" 9 | ) 10 | 11 | var offsetTests = []struct { 12 | input string 13 | want string 14 | }{ 15 | {"", "[0]"}, 16 | {"abc", "[0]"}, 17 | {"abc\n", "[0 4]"}, 18 | {"\nabc", "[0 1]"}, 19 | {"\n\nabc\n", "[0 1 2 6]"}, 20 | } 21 | 22 | func TestFile(t *testing.T) { 23 | for _, test := range offsetTests { 24 | lo := lineOffsets(test.input) 25 | if got := fmt.Sprintf("%v", lo); got != test.want { 26 | t.Errorf("lineOffsets(%q) = %v, want: %v", test.input, got, test.want) 27 | } 28 | } 29 | } 30 | 31 | const testFile = "abc" 32 | 33 | var rangeTests = []struct { 34 | content string 35 | substr string 36 | want status.SourceRange 37 | }{ 38 | {"a", "a", status.SourceRange{testFile, 0, 1, 1, 1}}, 39 | {"abcdef", "def", status.SourceRange{testFile, 3, 6, 1, 4}}, 40 | {"abc\ndef", "def", status.SourceRange{testFile, 4, 7, 2, 1}}, 41 | {"\n\n\n def", "def", status.SourceRange{testFile, 4, 7, 4, 2}}, 42 | } 43 | 44 | func TestSourceRange(t *testing.T) { 45 | for _, test := range rangeTests { 46 | tree := newTree(testFile, test.content) 47 | i := strings.Index(test.content, test.substr) 48 | if i == -1 { 49 | t.Fatalf("%q is not found in %q", test.substr, test.content) 50 | } 51 | n := &Node{tree: tree, offset: i, endoffset: i + len(test.substr)} 52 | if got := n.SourceRange(); got != test.want { 53 | t.Errorf("sourceRange(%q,%q) = %v, want: %v", test.content, test.substr, got, test.want) 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /parsers/tm/lexer_actions.go: -------------------------------------------------------------------------------- 1 | package tm 2 | 3 | import ( 4 | "strings" 5 | "unicode/utf8" 6 | ) 7 | 8 | func (l *Lexer) skipAction() bool { 9 | open := 1 10 | var openQuote rune 11 | for open > 0 { 12 | var skipNext bool 13 | switch l.ch { 14 | case -1: 15 | return false 16 | case '{': 17 | if openQuote == 0 { 18 | open++ 19 | } 20 | case '}': 21 | if openQuote == 0 { 22 | open-- 23 | } 24 | case '\'', '"': 25 | if openQuote == 0 { 26 | openQuote = l.ch 27 | } else if l.ch == openQuote { 28 | openQuote = 0 29 | } 30 | case '\\': 31 | skipNext = openQuote != 0 32 | case '/': 33 | if openQuote != 0 || l.scanOffset >= len(l.source) { 34 | break 35 | } 36 | switch l.source[l.scanOffset] { 37 | case '*': 38 | end := strings.Index(l.source[l.scanOffset+1:], "*/") 39 | if end >= 0 { 40 | end += l.scanOffset + 3 41 | l.rewind(end) 42 | continue 43 | } 44 | case '/': 45 | end := strings.Index(l.source[l.scanOffset+1:], "\n") 46 | if end >= 0 { 47 | end += l.scanOffset + 2 48 | l.rewind(end) 49 | continue 50 | } 51 | } 52 | case '\n': 53 | l.line++ 54 | } 55 | 56 | // Scan the next character. 57 | // Note: the following code is inlined to avoid performance implications. 58 | next: 59 | l.offset = l.scanOffset 60 | if l.offset < len(l.source) { 61 | r, w := rune(l.source[l.offset]), 1 62 | if r >= 0x80 { 63 | // not ASCII 64 | r, w = utf8.DecodeRuneInString(l.source[l.offset:]) 65 | } 66 | l.scanOffset += w 67 | l.ch = r 68 | if skipNext { 69 | skipNext = false 70 | goto next 71 | } 72 | } else { 73 | l.ch = -1 // EOI 74 | } 75 | 76 | } 77 | return true 78 | } 79 | -------------------------------------------------------------------------------- /parsers/tm/parser_test.go: -------------------------------------------------------------------------------- 1 | package tm_test 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/inspirer/textmapper/parsers/parsertest" 8 | "github.com/inspirer/textmapper/parsers/tm" 9 | ) 10 | 11 | var parseTests = []struct { 12 | nt tm.NodeType 13 | inputs []string 14 | }{ 15 | 16 | {tm.Identifier, []string{ 17 | ` language «a»(«b»); :: lexer «error»: `, 18 | }}, 19 | {tm.Option, []string{ 20 | header + ` «a = 5» «list = [5]» «feature = true» `, 21 | }}, 22 | {tm.IntegerLiteral, []string{ 23 | header + ` a = «5» list = [«5»] feature = true `, 24 | }}, 25 | {tm.BooleanLiteral, []string{ 26 | header + ` a = «true»`, 27 | }}, 28 | {tm.Lexeme, []string{ 29 | lexerPre + ` «error:»`, 30 | lexerPre + ` « error:»`, 31 | lexerPre + ` { «error:» }`, 32 | lexerPre + ` «error: /abc/ -1» «def:»`, 33 | lexerPre + ` «error: /abc/ {}»`, 34 | lexerPre + ` <*> { «error: /abc/ {}» }`, 35 | lexerPre + ` «int {Type}: /[0-9]+/ { $$ = parseInt(); }»`, 36 | }}, 37 | {tm.Command, []string{ 38 | lexerPre + ` abc: /abc/ «{}»`, 39 | lexerPre + ` abc: /abc/ «{ printf("}") }»`, 40 | }}, 41 | {tm.Comment, []string{ 42 | parserPre + ` «# abc» 43 | «# abc2» 44 | a : abc ; «# 8» 45 | «# abc2»`, 46 | }}, 47 | {tm.MultilineComment, []string{ 48 | parserPre + `a : «/* te ** / st */» ;`, 49 | parserPre + `«/* abc */» a:b;`, 50 | 51 | // While recovering. 52 | parserPre + " a : (§:: a «/*aaa*/» b ) ; ", 53 | }}, 54 | {tm.InvalidToken, []string{ 55 | parserPre + "a : «'»\n ;", 56 | }}, 57 | 58 | {tm.Rule, []string{ 59 | parserPre + " a : /* empty */ «»| «abc» | «abc -> def» ; ", 60 | }}, 61 | {tm.DirectiveExpect, []string{ 62 | parserPre + ` «%expect 0;» `, 63 | }}, 64 | {tm.DirectiveExpectRR, []string{ 65 | parserPre + ` «%expect-rr 8;» `, 66 | }}, 67 | {tm.DirectiveInject, []string{ 68 | parserPre + ` «%inject comment -> Comment/a,b;» `, 69 | }}, 70 | {tm.SyntaxProblem, []string{ 71 | parserPre + " a : (§«:: a /*aaa*/ b» ) ; ", 72 | parserPre + " a : §«+ a» ; ", 73 | header + ` a = 5 «b §a b c = 5» :: lexer a: /a/`, 74 | }}, 75 | 76 | // TODO add tests 77 | } 78 | 79 | func TestParser(t *testing.T) { 80 | var s tm.TokenStream 81 | var p tm.Parser 82 | 83 | ctx := context.Background() 84 | seen := make(map[tm.NodeType]bool) 85 | seen[tm.File] = true 86 | for _, tc := range parseTests { 87 | seen[tc.nt] = true 88 | for _, input := range tc.inputs { 89 | test := parsertest.New(t, tc.nt.String(), input) 90 | listener := func(nt tm.NodeType, offset, endoffset int) { 91 | if nt == tc.nt { 92 | test.Consume(t, offset, endoffset) 93 | } 94 | } 95 | errHandler := func(se tm.SyntaxError) bool { 96 | test.ConsumeError(t, se.Offset, se.Endoffset) 97 | return true 98 | } 99 | s.Init(test.Source(), listener) 100 | p.Init(errHandler, listener) 101 | test.Done(t, p.ParseFile(ctx, &s)) 102 | } 103 | } 104 | for n := tm.NodeType(1); n < tm.NodeTypeMax; n++ { 105 | if !seen[n] { 106 | // TODO t.Errorf("%v is not tested", n) 107 | } 108 | } 109 | } 110 | 111 | const header = "language l(a); " 112 | const lexerPre = "language l(a); :: lexer\n" 113 | const parserPre = "language l(a); :: lexer a = /abc/ :: parser " 114 | -------------------------------------------------------------------------------- /parsers/tm/stream.go: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | package tm 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | 9 | "github.com/inspirer/textmapper/parsers/tm/token" 10 | ) 11 | 12 | // TokenStream post-processes lexer output for consumption by the parser. 13 | type TokenStream struct { 14 | lexer Lexer 15 | listener Listener // for ingesting tokens into the AST, nil during lookaheads 16 | pending []symbol 17 | } 18 | 19 | type symbol struct { 20 | symbol int32 21 | offset int 22 | endoffset int 23 | } 24 | 25 | func (s *TokenStream) Init(content string, l Listener) { 26 | s.lexer.Init(content) 27 | s.listener = l 28 | 29 | if cap(s.pending) < startTokenBufferSize { 30 | s.pending = make([]symbol, 0, startTokenBufferSize) 31 | } 32 | s.pending = s.pending[:0] 33 | } 34 | 35 | func (s *TokenStream) Copy() TokenStream { 36 | ret := *s 37 | ret.lexer = s.lexer.Copy() 38 | ret.listener = nil 39 | ret.pending = nil 40 | return ret 41 | } 42 | 43 | func (s *TokenStream) reportIgnored(ctx context.Context, tok symbol) { 44 | var t NodeType 45 | switch token.Type(tok.symbol) { 46 | case token.INVALID_TOKEN: 47 | t = InvalidToken 48 | case token.MULTILINECOMMENT: 49 | t = MultilineComment 50 | case token.COMMENT: 51 | t = Comment 52 | case token.TEMPLATES: 53 | t = Templates 54 | default: 55 | return 56 | } 57 | if debugSyntax { 58 | fmt.Printf("ignored: %v as %v\n", token.Type(tok.symbol), t) 59 | } 60 | s.listener(t, tok.offset, tok.endoffset) 61 | } 62 | 63 | // flush is called for every "shifted" token to report it together with any pending tokens 64 | // to the listener. 65 | func (s *TokenStream) flush(ctx context.Context, sym symbol) { 66 | if s.listener == nil { 67 | return 68 | } 69 | if len(s.pending) > 0 { 70 | for i, tok := range s.pending { 71 | if tok.endoffset > sym.endoffset { 72 | // Note: this copying should not happen during normal operation, only 73 | // during error recovery. 74 | s.pending = append(s.pending[:0], s.pending[i:]...) 75 | goto flushed 76 | } 77 | s.reportIgnored(ctx, tok) 78 | } 79 | s.pending = s.pending[:0] 80 | flushed: 81 | } 82 | } 83 | 84 | func (s *TokenStream) text(sym symbol) string { 85 | return s.lexer.source[sym.offset:sym.endoffset] 86 | } 87 | 88 | func (s *TokenStream) line() int { 89 | return s.lexer.tokenLine 90 | } 91 | 92 | // next transforms the lexer stream into a stream of symbols for the parser. 93 | // 94 | // Note: "stack" and "endState" are nil and -1 respectively during lookaheads 95 | // and error recovery. 96 | func (s *TokenStream) next(stack []stackEntry, endState int16) symbol { 97 | restart: 98 | tok := s.lexer.Next() 99 | switch tok { 100 | case token.INVALID_TOKEN, token.MULTILINECOMMENT, token.COMMENT, token.TEMPLATES: 101 | start, end := s.lexer.Pos() 102 | s.pending = append(s.pending, symbol{int32(tok), start, end}) 103 | goto restart 104 | } 105 | start, end := s.lexer.Pos() 106 | return symbol{int32(tok), start, end} 107 | } 108 | -------------------------------------------------------------------------------- /parsers/tm/tokens.go: -------------------------------------------------------------------------------- 1 | package tm 2 | 3 | import ( 4 | "github.com/inspirer/textmapper/parsers/tm/token" 5 | ) 6 | 7 | func IsSoftKeyword(t token.Type) bool { 8 | return t >= token.ASSERT && t <= token.CHAR_X 9 | } 10 | 11 | func IsKeyword(t token.Type) bool { 12 | return t >= token.AS && t <= token.TRUE 13 | } 14 | -------------------------------------------------------------------------------- /regen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | go install ./cmd/textmapper 4 | 5 | echo 'Regenerating js' 6 | (cd parsers/js; textmapper generate) 7 | echo 'Regenerating test' 8 | (cd parsers/test; textmapper generate) 9 | echo 'Regenerating tm' 10 | (cd parsers/tm; textmapper generate) 11 | echo 'Regenerating json' 12 | (cd parsers/json; textmapper generate) 13 | echo 'Regenerating simple' 14 | (cd parsers/simple; textmapper generate) 15 | 16 | go fmt ./... 17 | find . -type f -name '*.go' | xargs -I '{}' goimports -w -local github.com,go.lsp.dev,go.uber.org '{}' 18 | go build ./... && go test ./... 19 | -------------------------------------------------------------------------------- /status/status_test.go: -------------------------------------------------------------------------------- 1 | package status 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | ) 7 | 8 | var statusTests = []struct { 9 | errs []Error 10 | wantPrinted string 11 | wantErr string 12 | }{ 13 | { 14 | errs: []Error{}, 15 | wantErr: "no errors", 16 | }, 17 | { 18 | errs: []Error{ 19 | {SourceRange{}, "I/O err 2"}, 20 | {SourceRange{}, "I/O err 1"}, 21 | }, 22 | wantErr: "I/O err 1 (and 1 more error(s))", 23 | wantPrinted: "I/O err 1\nI/O err 2\n", 24 | }, 25 | { 26 | errs: []Error{ 27 | {SourceRange{}, "I/O err"}, 28 | {SourceRange{"file2", 0, 100, 1, 1}, "broken file"}, 29 | {SourceRange{"file1", 80, 81, 20, 12}, "invalid utf-8"}, 30 | {SourceRange{"file1", 10, 20, 3, 1}, "invalid identifier"}, 31 | {SourceRange{"file1", 15, 20, 3, 6}, "second error"}, 32 | }, 33 | wantErr: "I/O err (and 3 more error(s))", 34 | wantPrinted: "I/O err\n" + 35 | "file1:3:1: invalid identifier\n" + 36 | "file1:20:12: invalid utf-8\n" + 37 | "file2:1:1: broken file\n", 38 | }, 39 | } 40 | 41 | func TestStatus(t *testing.T) { 42 | for i, test := range statusTests { 43 | var s Status 44 | for _, e := range test.errs { 45 | s.Add(e.Origin, e.Msg) 46 | } 47 | s.Dedupe() 48 | if got := s.Error(); got != test.wantErr { 49 | t.Errorf("Error(%v) = %v; want: %v", i, got, test.wantErr) 50 | } 51 | var b bytes.Buffer 52 | Print(&b, s.Err()) 53 | if got := b.String(); got != test.wantPrinted { 54 | t.Errorf("Print(%v) = %v; want: %v", i, got, test.wantPrinted) 55 | } 56 | s2 := FromError(s.Err()) 57 | b.Reset() 58 | Print(&b, s2.Err()) 59 | if got := b.String(); got != test.wantPrinted { 60 | t.Errorf("Print(FromError(%v.Err())) = %v; want: %v", i, got, test.wantPrinted) 61 | } 62 | } 63 | } 64 | 65 | type testNode SourceRange 66 | 67 | func (tn testNode) SourceRange() SourceRange { 68 | return SourceRange(tn) 69 | } 70 | 71 | func TestErrorf(t *testing.T) { 72 | n := testNode(SourceRange{"file1", 80, 81, 20, 12}) 73 | s := Errorf(n, "failure") 74 | want := "file1:20:12: failure" 75 | if got := s.Error(); got != want { 76 | t.Errorf("Errorf(\"failure\") = %v; want: %v", got, want) 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /syntax/expand_test.go: -------------------------------------------------------------------------------- 1 | package syntax_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/inspirer/textmapper/syntax" 7 | ) 8 | 9 | var nameTests = []struct { 10 | input string 11 | want string 12 | }{ 13 | {`%input Z; Z: a+;`, "A_list"}, 14 | {`%input Z; Z: a*;`, "A_optlist"}, 15 | {`%input Z; Z: a* -> Foo;`, "A_optlist"}, 16 | {`%input Z; Z: QQ=a+;`, "A_list"}, 17 | {`%input Z; Z: (a separator b)+;`, "A_list_B_separated"}, 18 | {`%input Z; Z: .foo (a separator b)* .bar;`, "A_optlist_B_separated"}, 19 | {`%input Z; Z: .foo (a separator b c)* .bar;`, "A_optlist_withsep"}, 20 | {`%input Z; Z: a?;`, "Aopt"}, 21 | {`%input Z; Z: B?; B:;`, "Bopt"}, 22 | {`%input Z; Z: (a separator b c)+?;`, "A_list_withsepopt"}, 23 | 24 | // sets 25 | {`%input Z; Z: set(a);`, "setof_a"}, 26 | {`%input Z; Z: set(a | b);`, "setof_a_or_b"}, 27 | {`%input Z; Z: set(a | b)+;`, "setof_a_or_b_list"}, 28 | {`%input Z; Z: set(a | b)+?;`, "setof_a_or_b_listopt"}, 29 | {`%input Z; Z: set(a | b)*;`, "setof_a_or_b_optlist"}, 30 | {`%input Z; Z: set(Q); Q: c;`, "setof_Q"}, 31 | {`%input Z; Z: set(precede Q); Q: c;`, "setof_precede_Q"}, 32 | {`%input Z; Z: set(~Q | follow b); Q: c;`, "setof_not_Q_or_follow_b"}, 33 | {`%input Z; Z: set(first A | last B)?; A: a; B: b;`, "setof_first_A_or_last_Bopt"}, 34 | 35 | // lookaheads 36 | {`%input Z; Z: (?= A & !B); A:; B:;`, "lookahead_A_notB"}, 37 | } 38 | 39 | func TestProvisionalName(t *testing.T) { 40 | for _, tc := range nameTests { 41 | model, err := parse(tc.input) 42 | if err != nil { 43 | t.Errorf("cannot parse %q: %v", tc.input, err) 44 | continue 45 | } 46 | 47 | inp := model.Inputs[0].Nonterm 48 | name := syntax.ProvisionalName(model.Nonterms[inp].Value, model) 49 | if name != tc.want { 50 | t.Errorf("ProvisionalName(%v) = %v, want %v", tc.input, name, tc.want) 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /syntax/nullable.go: -------------------------------------------------------------------------------- 1 | package syntax 2 | 3 | import ( 4 | "log" 5 | 6 | "github.com/inspirer/textmapper/util/container" 7 | ) 8 | 9 | // Nullable computes and returns the set of nullable symbols in a given model. 10 | // 11 | // Note: this function does not instantiate templates but does some approximation if they are 12 | // present by treating all conditional productions non-nullable. 13 | func Nullable(m *Model) container.BitSet { 14 | ret := container.NewBitSet(len(m.Terminals) + len(m.Nonterms)) 15 | 16 | terms := len(m.Terminals) 17 | for { 18 | var keepGoing bool 19 | for i, nt := range m.Nonterms { 20 | if ret.Get(terms + i) { 21 | continue 22 | } 23 | if isNullable(nt.Value, ret) { 24 | ret.Set(terms + i) 25 | keepGoing = true 26 | } 27 | } 28 | if !keepGoing { 29 | break 30 | } 31 | } 32 | return ret 33 | } 34 | 35 | func isNullable(expr *Expr, nullable container.BitSet) bool { 36 | switch expr.Kind { 37 | case Empty, Optional, StateMarker, Command, Lookahead: 38 | return true 39 | case Set: 40 | return false 41 | case List: 42 | if expr.ListFlags&OneOrMore == 0 { 43 | return true 44 | } 45 | fallthrough 46 | case Assign, Append, Arrow, Prec: 47 | return isNullable(expr.Sub[0], nullable) 48 | case Choice: 49 | for _, c := range expr.Sub { 50 | if isNullable(c, nullable) { 51 | return true 52 | } 53 | } 54 | return len(expr.Sub) == 0 55 | case Sequence: 56 | for _, c := range expr.Sub { 57 | if !isNullable(c, nullable) { 58 | return false 59 | } 60 | } 61 | return true 62 | case Reference: 63 | return nullable.Get(expr.Symbol) 64 | case Conditional, LookaheadNot: 65 | // Note: these are unexpected 66 | return false 67 | default: 68 | log.Fatal("invariant failure") 69 | return false 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /syntax/nullable_test.go: -------------------------------------------------------------------------------- 1 | package syntax_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/inspirer/textmapper/syntax" 7 | "github.com/inspirer/textmapper/util/dump" 8 | ) 9 | 10 | var nullableTests = []struct { 11 | input string 12 | want []string 13 | }{ 14 | {`A: a?; B: .foo (a b)?;`, []string{"A", "B"}}, 15 | {`A:; B: b | A; C: c;`, []string{"A", "B"}}, 16 | {`A:; B: b | b A; C: c;`, []string{"A"}}, 17 | {`A: B; B: C D; C:; D: a?; `, []string{"A", "B", "C", "D"}}, 18 | {`A: B; B: C D; C:; D: a A; `, []string{"C"}}, 19 | {`A: set(a); B: A+; C: A*;`, []string{"C"}}, 20 | {`A: a %prec a; B: a? %prec a;`, []string{"B"}}, 21 | {`A: set(a); B: (A separator a)+; C: (A separator b)*;`, []string{"C"}}, 22 | // Approximation: conditionals are never nullable. 23 | {`%flag Foo; A: a?; B: [Foo] a?;`, []string{"A"}}, 24 | } 25 | 26 | func TestNullable(t *testing.T) { 27 | for _, tc := range nullableTests { 28 | model, err := parse(tc.input) 29 | if err != nil { 30 | t.Errorf("cannot parse %q: %v", tc.input, err) 31 | continue 32 | } 33 | 34 | var got []string 35 | for _, nt := range syntax.Nullable(model).Slice(nil) { 36 | got = append(got, model.Nonterms[nt-len(model.Terminals)].Name) 37 | } 38 | 39 | if diff := dump.Diff(tc.want, got); diff != "" { 40 | t.Errorf("Nullable(%v) diff (-want +got):\n%s", tc.input, diff) 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /testing/.bazelignore: -------------------------------------------------------------------------------- 1 | ts/node_modules 2 | -------------------------------------------------------------------------------- /testing/.bazelrc: -------------------------------------------------------------------------------- 1 | # TypeScript skipLibCheck configuration for faster performance 2 | common --@aspect_rules_ts//ts:skipLibCheck=always 3 | 4 | # C++ toolchain configuration for macOS 5 | build --repo_env=CC=clang 6 | build --repo_env=CXX=clang++ 7 | build --repo_env=BAZEL_CXXOPTS="-std=c++17" 8 | build --macos_sdk_version=14.0 -------------------------------------------------------------------------------- /testing/.bazelversion: -------------------------------------------------------------------------------- 1 | 8.1.1 -------------------------------------------------------------------------------- /testing/BUILD: -------------------------------------------------------------------------------- 1 | load("@hedron_compile_commands//:refresh_compile_commands.bzl", "refresh_compile_commands") 2 | 3 | # Note: run `bazel run :refresh_compile_commands` to set up clangd. 4 | refresh_compile_commands( 5 | name = "refresh_compile_commands", 6 | exclude_external_sources = True, 7 | exclude_headers = "external", 8 | targets = "//...", 9 | ) 10 | -------------------------------------------------------------------------------- /testing/MODULE.bazel: -------------------------------------------------------------------------------- 1 | module( 2 | name = "testing", 3 | ) 4 | 5 | # Bazel's canonical dependencies 6 | bazel_dep(name = "bazel_skylib", version = "1.7.1") 7 | bazel_dep(name = "platforms", version = "0.0.11") 8 | 9 | # C++ 10 | bazel_dep(name = "abseil-cpp", version = "20250127.0") 11 | bazel_dep(name = "googletest", version = "1.15.2") 12 | bazel_dep(name = "google_benchmark", version = "1.9.1") 13 | 14 | # C++ toolchain configuration 15 | cc_toolchain_config = use_extension("@bazel_tools//tools/cpp:cc_toolchain_config.bzl", "cc_toolchain_config") 16 | cc_toolchain_config.toolchain( 17 | name = "cc-toolchain", 18 | compiler = "clang", 19 | compiler_version = "14.0", 20 | cxx_standard = "c++17", 21 | target_cpu = "darwin_arm64", 22 | ) 23 | use_repo(cc_toolchain_config) 24 | 25 | # Hedron's Compile Commands Extractor for Bazel 26 | bazel_dep(name = "hedron_compile_commands", version = "0.0.0", dev_dependency = True) 27 | git_override( 28 | module_name = "hedron_compile_commands", 29 | commit = "0e990032f3c5a866e72615cf67e5ce22186dcb97", 30 | remote = "https://github.com/hedronvision/bazel-compile-commands-extractor.git", 31 | ) 32 | 33 | # TypeScript 34 | bazel_dep(name = "aspect_rules_js", version = "2.1.3") 35 | bazel_dep(name = "rules_nodejs", version = "6.3.3") 36 | bazel_dep(name = "aspect_bazel_lib", version = "2.9.3") 37 | 38 | # Jest testing 39 | bazel_dep(name = "aspect_rules_jest", version = "0.22.0") 40 | 41 | node = use_extension("@rules_nodejs//nodejs:extensions.bzl", "node", dev_dependency = True) 42 | node.toolchain(node_version = "20.9.0") 43 | 44 | npm = use_extension("@aspect_rules_js//npm:extensions.bzl", "npm", dev_dependency = True) 45 | npm.npm_translate_lock( 46 | name = "npm", 47 | no_optional = False, 48 | pnpm_lock = "//ts:pnpm-lock.yaml", 49 | verify_node_modules_ignored = "//:.bazelignore", 50 | ) 51 | use_repo(npm, "npm") 52 | 53 | pnpm = use_extension("@aspect_rules_js//npm:extensions.bzl", "pnpm") 54 | pnpm.pnpm( 55 | name = "pnpm", 56 | pnpm_version = "9.12.1", 57 | ) 58 | use_repo(pnpm, "pnpm") 59 | 60 | bazel_dep(name = "aspect_rules_esbuild", version = "0.21.0") 61 | bazel_dep(name = "aspect_rules_ts", version = "3.5.0") 62 | 63 | rules_ts_ext = use_extension("@aspect_rules_ts//ts:extensions.bzl", "ext", dev_dependency = True) 64 | rules_ts_ext.deps( 65 | ts_version_from = "//ts:package.json", 66 | ) 67 | use_repo(rules_ts_ext, "npm_typescript") -------------------------------------------------------------------------------- /testing/clangd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | bazel run :refresh_compile_commands 4 | -------------------------------------------------------------------------------- /testing/cpp/json/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:private"]) 2 | 3 | cc_library( 4 | name = "lexer", 5 | srcs = ["json_lexer.cc"], 6 | hdrs = [ 7 | "json_lexer.h", 8 | "json_token.h", 9 | ], 10 | deps = [ 11 | "@abseil-cpp//absl/log", 12 | "@abseil-cpp//absl/strings", 13 | ], 14 | ) 15 | 16 | cc_test( 17 | name = "lexer_test", 18 | size = "small", 19 | srcs = ["lexer_test.cc"], 20 | deps = [ 21 | ":lexer", 22 | "//cpp/markup", 23 | "@googletest//:gtest_main", 24 | ], 25 | ) 26 | 27 | cc_library( 28 | name = "parser", 29 | srcs = ["json_parser.cc"], 30 | hdrs = ["json_parser.h"], 31 | deps = [ 32 | ":lexer", 33 | "@abseil-cpp//absl/functional:function_ref", 34 | "@abseil-cpp//absl/log", 35 | "@abseil-cpp//absl/status", 36 | "@abseil-cpp//absl/strings", 37 | ], 38 | ) 39 | 40 | cc_test( 41 | name = "parser_test", 42 | size = "small", 43 | srcs = ["parser_test.cc"], 44 | deps = [ 45 | ":lexer", 46 | ":parser", 47 | "//cpp/markup", 48 | "@googletest//:gtest_main", 49 | ], 50 | ) 51 | 52 | cc_binary( 53 | name = "benchmark", 54 | srcs = ["benchmark.cc"], 55 | deps = [ 56 | ":lexer", 57 | "@google_benchmark//:benchmark", 58 | ], 59 | ) 60 | -------------------------------------------------------------------------------- /testing/cpp/json/benchmark.cc: -------------------------------------------------------------------------------- 1 | // Run with: 2 | // 3 | // $ bazel run --compilation_mode=opt //json:benchmark 4 | 5 | #include 6 | 7 | #include "json_lexer.h" 8 | 9 | const std::string input = R"({ 10 | "some key": [{ 11 | "title": "example glossary", 12 | "float value": 1e9, 13 | "float value 2": -0.9e-5, 14 | "Gloss \u1234 \nDiv": { 15 | "title": "S", "items": { 16 | "read": { 17 | "ID": "xml", 18 | "SortAs": "price", 19 | "type": "Markup Language", 20 | "Acronym": {}, 21 | "UniqueID": "850257207432", 22 | "def": { 23 | "json": "Lorem ipsum dolor sit amet, ad prima imperdiet sea. Homero reprimique no duo, mundi iriure expetenda ei est. No nec denique efficiantur, pri ad oratio adipisci expetendis.", 24 | "links": ["ABC", "Echo", "a", "b", "c"] 25 | }, 26 | "render as": "markup", "null": null, "true": true, "false": false 27 | } 28 | } 29 | } 30 | }] 31 | } 32 | )"; 33 | 34 | static void BM_Lexer(benchmark::State& state) { 35 | for (auto _ : state) { 36 | json::Lexer l(input); 37 | json::Token next; 38 | while ((next = l.Next()) != json::Token::EOI) { 39 | } 40 | } 41 | state.SetBytesProcessed( 42 | static_cast(state.iterations())*input.size()); 43 | } 44 | BENCHMARK(BM_Lexer); 45 | 46 | BENCHMARK_MAIN(); 47 | -------------------------------------------------------------------------------- /testing/cpp/json/json.tm: -------------------------------------------------------------------------------- 1 | language json(cc); 2 | 3 | namespace = "json" 4 | includeGuardPrefix = "EXAMPLES_JSON_" 5 | tokenLineOffset = true 6 | tokenColumn = true 7 | filenamePrefix = "json_" 8 | optimizeTables = true 9 | eventBased = true 10 | extraTypes = ["NonExistingType"] 11 | parseParams = ["int a", "bool b"] 12 | debugParser = true 13 | scanBytes = true 14 | 15 | :: lexer 16 | 17 | %s initial, foo; 18 | 19 | '{': /\{/ 20 | '}': /\}/ 21 | '[': /\[/ 22 | ']': /\]/ 23 | ':': /:/ 24 | ',': /,/ 25 | 26 | Foo: /\#/ 27 | 28 | space: /[\t\r\n ]+/ (space) 29 | 30 | commentChars = /([^*]|\*+[^*\/])*\**/ 31 | MultiLineComment: /\/\*{commentChars}\*\// (space) 32 | 33 | hex = /[0-9a-fA-F]/ 34 | 35 | # TODO 36 | JSONString: /"([^"\\]|\\(["\/\\bfnrt]|u{hex}{4}))*"/ 37 | #JSONString: /"([^"\\\x00-\x1f]|\\(["\/\\bfnrt]|u{hex}{4}))*"/ 38 | 39 | fraction = /\.[0-9]+/ 40 | exp = /[eE][+-]?[0-9]+/ 41 | JSONNumber: /-?(0|[1-9][0-9]*){fraction}?{exp}?/ 42 | 43 | id: /[a-zA-Z][a-zA-Z0-9]*/ (class) 44 | 45 | kw_null: /null/ 46 | 'true': /true/ 47 | 'false': /false/ 48 | 49 | 'A': /A/ 50 | 'A': /α/ 51 | 'B': /B/ 52 | 53 | 'A': /A!/ { /*some code */ } 54 | 55 | error: 56 | invalid_token: 57 | 58 | :: parser 59 | 60 | %input JSONText; 61 | 62 | %inject MultiLineComment -> MultiLineComment/Bar,Foo; 63 | %inject invalid_token -> InvalidToken; 64 | %inject JSONString -> JsonString; 65 | 66 | %generate Literals = set(first JSONValue<+A>); 67 | 68 | %flag A; 69 | 70 | JSONText {bool b} -> JSONText : 71 | JSONValue<+A>[val] { $$ = $val; } ; 72 | 73 | JSONValue {int a} -> JSONValue : 74 | kw_null 75 | | 'true' 76 | | 'false' { $$ = 5; } 77 | | [A] 'A' 78 | | [!A] 'B' 79 | | JSONObject 80 | | EmptyObject 81 | | JSONArray 82 | | JSONString 83 | | JSONNumber 84 | ; 85 | 86 | EmptyObject -> EmptyObject : (?= EmptyObject) '{' '}' { @$.begin = @1.begin; } ; 87 | 88 | JSONObject -> JSONObject/Foo : 89 | (?= !EmptyObject) '{' JSONMemberList? '}' { @$.begin = @1.begin; } ; 90 | 91 | JSONMember {int c} -> JSONMember/Foo : 92 | JSONString ':'[b] { LOG(INFO) << @b.begin; } JSONValue<~A> { $$ = a; } 93 | | error -> SyntaxProblem 94 | ; 95 | 96 | JSONMemberList {bool d}: 97 | JSONMember { $$ = b; } 98 | | JSONMemberList .foo ',' JSONMember 99 | ; 100 | 101 | JSONArray -> JSONArray/Foo : 102 | .bar '[' JSONElementListopt ']' ; 103 | 104 | JSONElementList : 105 | JSONValue<+A> 106 | | JSONElementList ',' JSONValue<+A> 107 | ; 108 | -------------------------------------------------------------------------------- /testing/cpp/json/json_lexer.h: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | #ifndef EXAMPLES_JSON_LEXER_H_ 4 | #define EXAMPLES_JSON_LEXER_H_ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "absl/strings/string_view.h" 11 | #include "json_token.h" 12 | 13 | namespace json { 14 | inline constexpr absl::string_view bomSeq = "\xef\xbb\xbf"; 15 | 16 | class Lexer { 17 | public: 18 | // Lexer states. 19 | enum class State : std::uint32_t { 20 | Initial = 0, 21 | Foo = 1, 22 | }; 23 | 24 | struct Location { 25 | Location(int64_t b = 0, int64_t e = 0) : begin(b), end(e) {} 26 | friend inline std::ostream& operator<<(std::ostream& os, 27 | const Location& l) { 28 | return os << "[" << l.begin << "-" << l.end << "]"; 29 | } 30 | // Byte offsets into input buffer. 31 | int64_t begin; 32 | int64_t end; 33 | }; 34 | 35 | explicit Lexer(absl::string_view input_source ABSL_ATTRIBUTE_LIFETIME_BOUND); 36 | 37 | // Next finds and returns the next token in source. The stream end is 38 | // indicated by Token.EOI. 39 | // 40 | // The token text can be retrieved later by calling the Text() method. 41 | ABSL_MUST_USE_RESULT Token Next(); 42 | 43 | // Location of the last token returned by Next(). 44 | ABSL_MUST_USE_RESULT Location LastTokenLocation() const { 45 | return Location(token_offset_, offset_); 46 | } 47 | // LastTokenLine returns the line number of the last token returned by Next() 48 | // (1-based). 49 | ABSL_MUST_USE_RESULT int64_t LastTokenLine() const { return token_line_; } 50 | // LastTokenColumn returns the column of the last token returned by Next() 51 | // (in bytes, 1-based). 52 | ABSL_MUST_USE_RESULT int64_t LastTokenColumn() const { return token_column_; } 53 | 54 | // Text returns the substring of the input corresponding to the last token. 55 | ABSL_MUST_USE_RESULT absl::string_view Text() const 56 | ABSL_ATTRIBUTE_LIFETIME_BOUND { 57 | return source_.substr(token_offset_, offset_ - token_offset_); 58 | } 59 | 60 | void set_state(State state) { start_state_ = state; } 61 | ABSL_MUST_USE_RESULT State state() { return start_state_; } 62 | 63 | private: 64 | // Rewind can be used in lexer actions to accept a portion of a scanned token, 65 | // or to include more text into it. 66 | void Rewind(int64_t rewind_offset); 67 | 68 | absl::string_view source_; 69 | 70 | int32_t input_rune_ = 0; // current character, -1 means end of input 71 | int64_t offset_ = 0; // character offset 72 | int64_t token_offset_ = 0; // last token byte offset 73 | int64_t line_ = 1; // current line number (1-based) 74 | int64_t token_line_ = 1; // last token line 75 | int64_t line_offset_ = 0; // current line offset 76 | int64_t token_column_ = 1; // last token column (in bytes) 77 | int64_t scan_offset_ = 0; // scanning byte offset 78 | 79 | State start_state_ = State::Initial; // lexer state, modifiable 80 | }; 81 | 82 | inline std::ostream& operator<<(std::ostream& os, const Lexer& lexer) { 83 | return os << "json::Lexer at line " << lexer.LastTokenLine() << " location " 84 | << lexer.LastTokenLocation() << " last token was \"" << lexer.Text() 85 | << "\""; 86 | } 87 | 88 | } // namespace json 89 | 90 | #endif // EXAMPLES_JSON_LEXER_H_ 91 | -------------------------------------------------------------------------------- /testing/cpp/json/json_token.h: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | #ifndef EXAMPLES_JSON_TOKEN_H_ 4 | #define EXAMPLES_JSON_TOKEN_H_ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "absl/strings/string_view.h" 11 | 12 | namespace json { 13 | 14 | // Token is an enum of all terminal symbols of the json language. 15 | enum class Token { 16 | UNAVAILABLE = -1, 17 | EOI = 0, 18 | INVALID_TOKEN = 1, 19 | LBRACE = 2, /* { */ 20 | RBRACE = 3, /* } */ 21 | LBRACK = 4, /* [ */ 22 | RBRACK = 5, /* ] */ 23 | COLON = 6, /* : */ 24 | COMMA = 7, /* , */ 25 | FOO = 8, /* # */ 26 | SPACE = 9, 27 | MULTILINECOMMENT = 10, 28 | JSONSTRING = 11, 29 | JSONNUMBER = 12, 30 | ID = 13, 31 | KW_NULL = 14, /* null */ 32 | TRUE = 15, /* true */ 33 | FALSE = 16, /* false */ 34 | CHAR_A = 17, 35 | CHAR_B = 18, /* B */ 36 | ERROR = 19, 37 | NumTokens = 20 38 | }; 39 | 40 | constexpr inline std::array(Token::NumTokens)> 42 | tokenStr = { 43 | "EOI", 44 | "INVALID_TOKEN", 45 | "LBRACE", /* { */ 46 | "RBRACE", /* } */ 47 | "LBRACK", /* [ */ 48 | "RBRACK", /* ] */ 49 | "COLON", /* : */ 50 | "COMMA", /* , */ 51 | "FOO", /* # */ 52 | "SPACE", 53 | "MULTILINECOMMENT", 54 | "JSONSTRING", 55 | "JSONNUMBER", 56 | "ID", 57 | "KW_NULL", /* null */ 58 | "TRUE", /* true */ 59 | "FALSE", /* false */ 60 | "CHAR_A", 61 | "CHAR_B", /* B */ 62 | "ERROR", 63 | }; 64 | 65 | constexpr inline std::array(Token::NumTokens)> 67 | tokenName = { 68 | "eoi", 69 | "invalid_token", 70 | "'{'", /* { */ 71 | "'}'", /* } */ 72 | "'['", /* [ */ 73 | "']'", /* ] */ 74 | "':'", /* : */ 75 | "','", /* , */ 76 | "Foo", /* # */ 77 | "space", 78 | "MultiLineComment", 79 | "JSONString", 80 | "JSONNumber", 81 | "id", 82 | "kw_null", /* null */ 83 | "'true'", /* true */ 84 | "'false'", /* false */ 85 | "'A'", 86 | "'B'", /* B */ 87 | "error", 88 | }; 89 | 90 | inline std::ostream& operator<<(std::ostream& os, Token tok) { 91 | int t = static_cast(tok); 92 | if (t >= 0 && t < tokenStr.size()) { 93 | return os << tokenStr[t]; 94 | } 95 | return os << "token(" << t << ")"; 96 | } 97 | 98 | } // namespace json 99 | 100 | #endif // EXAMPLES_JSON_TOKEN_H_ 101 | -------------------------------------------------------------------------------- /testing/cpp/json/lexer_test.cc: -------------------------------------------------------------------------------- 1 | #include "json_lexer.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "gmock/gmock.h" 7 | #include "gtest/gtest.h" 8 | #include "cpp/markup/markup.h" 9 | 10 | namespace json { 11 | namespace { 12 | 13 | struct Test { 14 | std::string name; 15 | Token tok; 16 | std::vector cases; 17 | }; 18 | 19 | inline std::ostream &operator<<(std::ostream &os, Test t) { 20 | return os << "{" << t.tok << ", " << t.cases.size() << " cases}"; 21 | } 22 | 23 | const std::vector tests = { 24 | {"id", 25 | Token::ID, 26 | { 27 | R"( 0«foo» «barB1»)", 28 | }}, 29 | {"string", 30 | Token::JSONSTRING, 31 | { 32 | R"(«"foo"» «"b\nar"» «"α"»)", 33 | }}, 34 | {"number", 35 | Token::JSONNUMBER, 36 | { 37 | "«1» «534»", 38 | "«1e9» «1.2» «1e-2»", 39 | }}, 40 | {"true", 41 | Token::TRUE, 42 | { 43 | "«true»", 44 | "/* true */ «true» ", 45 | }}, 46 | 47 | {"false", Token::FALSE, {" «false» "}}, 48 | {"null", Token::KW_NULL, {" «null» "}}, 49 | 50 | {"lbrace", Token::LBRACE, {"«{»"}}, 51 | {"rbrace", Token::RBRACE, {"«}»"}}, 52 | {"lbrack", Token::LBRACK, {"«[»"}}, 53 | {"rbrack", Token::RBRACK, {"«]»"}}, 54 | {"colon", Token::COLON, {"«:»"}}, 55 | {"comma", Token::COMMA, {"«,»"}}, 56 | 57 | {"comment", Token::MULTILINECOMMENT, {" «/* asda *** */» bar"}}, 58 | {"char_a", Token::CHAR_A, {" «A» «α» «A»"}}, 59 | 60 | // TODO: handle invalid tokens 61 | // 62 | // {"invalid", 63 | // Token::INVALID_TOKEN, 64 | // { 65 | // "«1e» ", 66 | // "abc «/*1e \n»", 67 | // "abc «\" »\n", 68 | // }}, 69 | }; 70 | 71 | class LexerTest : public testing::TestWithParam {}; 72 | 73 | TEST_P(LexerTest, Token) { 74 | const auto ¶m = GetParam(); 75 | for (const auto &input : param.cases) { 76 | std::vector want; 77 | std::string text; 78 | tie(want, text) = markup::Parse(input); 79 | 80 | Lexer l(text); 81 | std::vector tokens; 82 | Token next; 83 | while ((next = l.Next()) != Token::EOI) { 84 | if (next == param.tok) { 85 | auto loc = l.LastTokenLocation(); 86 | tokens.push_back( 87 | markup::Range{loc.begin, loc.end}); 88 | } 89 | } 90 | 91 | EXPECT_THAT(tokens, testing::ElementsAreArray(want)) 92 | << "lexer produced " << markup::Create(text, tokens) << " instead of " 93 | << markup::Create(text, want); 94 | } 95 | } 96 | 97 | INSTANTIATE_TEST_SUITE_P(Vals, LexerTest, testing::ValuesIn(tests), 98 | [](const ::testing::TestParamInfo &info) { 99 | return info.param.name; 100 | }); 101 | 102 | } // namespace 103 | } // namespace json 104 | -------------------------------------------------------------------------------- /testing/cpp/json_flex/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:private"]) 2 | 3 | cc_library( 4 | name = "lexer", 5 | srcs = ["json_lexer.cc"], 6 | hdrs = [ 7 | "json_lexer.h", 8 | "json_token.h", 9 | ], 10 | deps = [ 11 | "@abseil-cpp//absl/log", 12 | "@abseil-cpp//absl/strings", 13 | ], 14 | ) 15 | 16 | cc_library( 17 | name = "parser", 18 | srcs = ["json_parser.cc"], 19 | hdrs = ["json_parser.h"], 20 | deps = [ 21 | ":lexer", 22 | "@abseil-cpp//absl/functional:function_ref", 23 | "@abseil-cpp//absl/log", 24 | "@abseil-cpp//absl/status", 25 | "@abseil-cpp//absl/strings", 26 | ], 27 | ) 28 | -------------------------------------------------------------------------------- /testing/cpp/json_flex/json.tm: -------------------------------------------------------------------------------- 1 | language json(cc); 2 | 3 | namespace = "json" 4 | includeGuardPrefix = "EXAMPLES_JSON_" 5 | tokenLineOffset = true 6 | tokenColumn = true 7 | filenamePrefix = "json_" 8 | optimizeTables = true 9 | eventBased = true 10 | extraTypes = ["NonExistingType"] 11 | parseParams = ["int a", "bool b"] 12 | debugParser = true 13 | flexMode = true 14 | 15 | :: lexer 16 | 17 | '{': /\{/ 18 | '}': /\}/ 19 | '[': /\[/ 20 | ']': /\]/ 21 | ':': /:/ 22 | ',': /,/ // comma 23 | 24 | MultiLineComment: (space) 25 | // not a trailing comment 26 | 27 | JSONString: // "string literal" 28 | JSONNumber: 29 | 30 | id: 31 | kw_null: 32 | 'true': 33 | 'false': 34 | 35 | :: parser 36 | 37 | %input JSONText; 38 | 39 | %inject MultiLineComment -> MultiLineComment/Bar,Foo; 40 | %inject invalid_token -> InvalidToken; 41 | %inject JSONString -> JsonString; 42 | 43 | %generate Literals = set(first JSONValue<+A>); 44 | 45 | %flag A; 46 | 47 | JSONText {bool b} -> JSONText : 48 | JSONValue<+A>[val] { $$ = $val; } ; 49 | 50 | JSONValue {int a} -> JSONValue : 51 | kw_null 52 | | 'true' 53 | | 'false' { $$ = 5; } 54 | | JSONObject 55 | | EmptyObject 56 | | JSONArray 57 | | JSONString 58 | | JSONNumber 59 | ; 60 | 61 | EmptyObject -> EmptyObject : (?= EmptyObject) '{' '}' { @$.begin = @1.begin; } ; 62 | 63 | JSONObject -> JSONObject/Foo : 64 | (?= !EmptyObject) '{' JSONMemberList? '}' { @$.begin = @1.begin; } ; 65 | 66 | JSONMember {int c} -> JSONMember/Foo : 67 | JSONString ':' JSONValue<~A> { $$ = a; } 68 | | error -> SyntaxProblem 69 | ; 70 | 71 | JSONMemberList {bool d}: 72 | JSONMember { $$ = b; } 73 | | JSONMemberList .foo ',' JSONMember 74 | ; 75 | 76 | JSONArray -> JSONArray/Foo : 77 | .bar '[' JSONElementListopt ']' ; 78 | 79 | JSONElementList : 80 | JSONValue<+A> 81 | | JSONElementList ',' JSONValue<+A> 82 | ; 83 | -------------------------------------------------------------------------------- /testing/cpp/json_flex/json_lexer.cc: -------------------------------------------------------------------------------- 1 | #include "json_lexer.h" 2 | 3 | #include "absl/log/log.h" 4 | #include "absl/strings/match.h" 5 | 6 | namespace json { 7 | 8 | Lexer::Lexer(absl::string_view input_source) { source_ = input_source; } 9 | 10 | Token Lexer::Next() { 11 | token_line_ = 0; 12 | offset_ = 0; 13 | token_offset_ = 0; 14 | 15 | // TODO: implement 16 | return Token::EOI; 17 | } 18 | 19 | } // namespace json 20 | -------------------------------------------------------------------------------- /testing/cpp/json_flex/json_lexer.h: -------------------------------------------------------------------------------- 1 | #ifndef EXAMPLES_JSON_FLEX_LEXER_H_ 2 | #define EXAMPLES_JSON_FLEX_LEXER_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "absl/strings/string_view.h" 9 | #include "json_token.h" 10 | 11 | namespace json { 12 | 13 | // Lexer is a handwritten adapter that is supposed to wrap a flex-generated 14 | // lexer. 15 | class Lexer { 16 | public: 17 | struct Location { 18 | Location(int64_t b = 0, int64_t e = 0) : begin(b), end(e) {} 19 | friend inline std::ostream& operator<<(std::ostream& os, 20 | const Location& l) { 21 | return os << "[" << l.begin << "-" << l.end << "]"; 22 | } 23 | // Byte offsets into input buffer. 24 | int64_t begin; 25 | int64_t end; 26 | }; 27 | 28 | explicit Lexer(absl::string_view input_source ABSL_ATTRIBUTE_LIFETIME_BOUND); 29 | 30 | // Next finds and returns the next token in source. The stream end is 31 | // indicated by Token.EOI. 32 | // 33 | // The token text can be retrieved later by calling the Text() method. 34 | ABSL_MUST_USE_RESULT Token Next(); 35 | 36 | // Location of the last token returned by Next(). 37 | ABSL_MUST_USE_RESULT Location LastTokenLocation() const { 38 | return Location(token_offset_, offset_); 39 | } 40 | // LastTokenLine returns the line number of the last token returned by Next() 41 | // (1-based). 42 | ABSL_MUST_USE_RESULT int64_t LastTokenLine() const { return token_line_; } 43 | 44 | // Text returns the substring of the input corresponding to the last token. 45 | ABSL_MUST_USE_RESULT absl::string_view Text() const 46 | ABSL_ATTRIBUTE_LIFETIME_BOUND { 47 | return source_.substr(token_offset_, offset_ - token_offset_); 48 | } 49 | 50 | private: 51 | absl::string_view source_; 52 | 53 | int64_t offset_ = 0; // character offset 54 | int64_t token_offset_ = 0; // last token byte offset 55 | int64_t token_line_ = 1; // last token line 56 | }; 57 | 58 | } // namespace json 59 | 60 | #endif // EXAMPLES_JSON_FLEX_LEXER_H_ 61 | -------------------------------------------------------------------------------- /testing/cpp/json_flex/json_token.h: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | #ifndef EXAMPLES_JSON_TOKEN_H_ 4 | #define EXAMPLES_JSON_TOKEN_H_ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "absl/strings/string_view.h" 11 | 12 | namespace json { 13 | 14 | // Token is an enum of all terminal symbols of the json language. 15 | enum class Token { 16 | UNAVAILABLE = -1, 17 | EOI = 0, 18 | YYerror = 1, 19 | INVALID_TOKEN = 2, 20 | LBRACE = 3, 21 | RBRACE = 4, 22 | LBRACK = 5, 23 | RBRACK = 6, 24 | COLON = 7, 25 | COMMA = 8, /* comma */ 26 | MULTILINECOMMENT = 9, 27 | JSONSTRING = 10, /* "string literal" */ 28 | JSONNUMBER = 11, 29 | ID = 12, 30 | KW_NULL = 13, 31 | TRUE = 14, 32 | FALSE = 15, 33 | NumTokens = 16 34 | }; 35 | 36 | constexpr inline std::array(Token::NumTokens)> 38 | tokenStr = { 39 | "EOI", 40 | "YYerror", 41 | "INVALID_TOKEN", 42 | "LBRACE", 43 | "RBRACE", 44 | "LBRACK", 45 | "RBRACK", 46 | "COLON", 47 | "COMMA", /* comma */ 48 | "MULTILINECOMMENT", 49 | "JSONSTRING", /* "string literal" */ 50 | "JSONNUMBER", 51 | "ID", 52 | "KW_NULL", 53 | "TRUE", 54 | "FALSE", 55 | }; 56 | 57 | constexpr inline std::array(Token::NumTokens)> 59 | tokenName = { 60 | "eoi", 61 | "error", 62 | "invalid_token", 63 | "'{'", 64 | "'}'", 65 | "'['", 66 | "']'", 67 | "':'", 68 | "','", /* comma */ 69 | "MultiLineComment", 70 | "JSONString", /* "string literal" */ 71 | "JSONNumber", 72 | "id", 73 | "kw_null", 74 | "'true'", 75 | "'false'", 76 | }; 77 | 78 | inline std::ostream& operator<<(std::ostream& os, Token tok) { 79 | int t = static_cast(tok); 80 | if (t >= 0 && t < tokenStr.size()) { 81 | return os << tokenStr[t]; 82 | } 83 | return os << "token(" << t << ")"; 84 | } 85 | 86 | } // namespace json 87 | 88 | #endif // EXAMPLES_JSON_TOKEN_H_ 89 | -------------------------------------------------------------------------------- /testing/cpp/json_flex/json_token_codes.inc: -------------------------------------------------------------------------------- 1 | YYEMPTY = -2, 2 | YYEOF = 0, 3 | YYerror = 256, // error 4 | YYUNDEF = 257, // "invalid token" 5 | MULTILINECOMMENT = 258, // MULTILINECOMMENT 6 | JSONSTRING = 259, // "string literal" 7 | JSONNUMBER = 260, // JSONNUMBER 8 | ID = 261, // ID 9 | KW_NULL = 262, // KW_NULL 10 | TRUE = 263, // TRUE 11 | FALSE = 264, // FALSE 12 | -------------------------------------------------------------------------------- /testing/cpp/markup/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | cc_library( 4 | name = "markup", 5 | srcs = ["markup.cc"], 6 | hdrs = ["markup.h"], 7 | copts = ["-std=c++17"], 8 | deps = [ 9 | "@abseil-cpp//absl/log:check", 10 | "@abseil-cpp//absl/strings", 11 | ], 12 | ) 13 | 14 | cc_test( 15 | name = "markup_test", 16 | size = "small", 17 | srcs = ["markup_test.cc"], 18 | copts = ["-std=c++17"], 19 | deps = [ 20 | ":markup", 21 | "@googletest//:gtest_main", 22 | ], 23 | ) 24 | -------------------------------------------------------------------------------- /testing/cpp/markup/markup.cc: -------------------------------------------------------------------------------- 1 | #include "markup.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "absl/log/check.h" 11 | #include "absl/strings/match.h" 12 | #include "absl/strings/string_view.h" 13 | 14 | namespace markup { 15 | 16 | namespace { 17 | inline constexpr absl::string_view opening = u8"«"; 18 | inline constexpr absl::string_view closing = u8"»"; 19 | } // namespace 20 | 21 | std::tuple, std::string> Parse(absl::string_view text) { 22 | std::string out; 23 | std::vector ranges; 24 | std::stack stack; 25 | 26 | int pos = 0; 27 | for (int i = 0; i < text.size(); i++) { 28 | auto b = text[i]; 29 | if ((b & 0xC0) == 0x80) continue; // skip continuation bytes 30 | if (absl::StartsWith(text.substr(i), opening)) { 31 | out.append(text.substr(pos, i - pos)); 32 | pos = i + opening.size(); 33 | stack.push({static_cast(out.size()), -1}); 34 | } else if (absl::StartsWith(text.substr(i), closing)) { 35 | out.append(text.substr(pos, i - pos)); 36 | pos = i + closing.size(); 37 | CHECK(!stack.empty()) << "unexpected closing guillemets in " << text; 38 | auto rng = stack.top(); 39 | stack.pop(); 40 | rng.end = static_cast(out.size()); 41 | ranges.push_back(rng); 42 | } 43 | } 44 | CHECK(stack.empty()) << "missing closing guillemets in " << text; 45 | out.append(text.substr(pos, text.size() - pos)); 46 | return std::make_tuple(ranges, out); 47 | } 48 | 49 | std::string Create(absl::string_view text, 50 | const std::vector &ranges) { 51 | struct Bracket { 52 | int64_t offset; 53 | absl::string_view insert; 54 | 55 | bool operator<(const Bracket &rhs) const { 56 | return offset < rhs.offset || 57 | (offset == rhs.offset && insert == "»" && rhs.insert != insert); 58 | } 59 | }; 60 | 61 | std::vector brackets; 62 | for (const auto &r : ranges) { 63 | CHECK_LE(0, r.start); 64 | CHECK_LE(r.start, r.end); 65 | CHECK_LE(r.end, text.size()); 66 | brackets.push_back({r.start, opening}); 67 | brackets.push_back({r.end, closing}); 68 | } 69 | std::sort(brackets.begin(), brackets.end()); 70 | 71 | int64_t i = 0; 72 | std::string out; 73 | for (const auto &b : brackets) { 74 | if (i < b.offset) { 75 | out.append(text.substr(i, b.offset - i)); 76 | } 77 | out.append(b.insert); 78 | i = b.offset; 79 | } 80 | if (i < text.size()) { 81 | out.append(text.substr(i, text.size() - i)); 82 | } 83 | return out; 84 | } 85 | 86 | } // namespace markup 87 | -------------------------------------------------------------------------------- /testing/cpp/markup/markup.h: -------------------------------------------------------------------------------- 1 | #ifndef TEXTMAPPER_MARKUP_H_ 2 | #define TEXTMAPPER_MARKUP_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "absl/strings/string_view.h" 11 | 12 | namespace markup { 13 | 14 | // Range represents a range of byte offsets. 15 | struct Range { 16 | int64_t start; 17 | int64_t end; 18 | 19 | bool operator==(const Range& rhs) const { 20 | return start == rhs.start && end == rhs.end; 21 | } 22 | }; 23 | 24 | inline std::ostream& operator<<(std::ostream& os, Range r) { 25 | return os << "[" << r.start << "," << r.end << "]"; 26 | } 27 | 28 | // Parse extracts pairs of guillemets from text (sorted by their end offset). 29 | // 30 | // WARNING: the function crashes on invalid input. 31 | std::tuple, std::string> Parse(absl::string_view text); 32 | 33 | // Create returns a string with guillemets inserted at the given ranges. 34 | // 35 | // WARNING: the function crashes on invalid input. 36 | std::string Create(absl::string_view text, 37 | const std::vector& ranges); 38 | 39 | } // namespace markup 40 | 41 | #endif // TEXTMAPPER_MARKUP_H_ 42 | -------------------------------------------------------------------------------- /testing/cpp/markup/markup_test.cc: -------------------------------------------------------------------------------- 1 | #include "markup.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "gmock/gmock.h" 8 | #include "gtest/gtest.h" 9 | 10 | namespace { 11 | 12 | TEST(MarkupTest, Create) { 13 | EXPECT_EQ(markup::Create("foo", {}), "foo"); 14 | EXPECT_EQ(markup::Create("foo", {{1, 2}}), "f«o»o"); 15 | EXPECT_EQ(markup::Create("foo", {{1, 2}, {1, 3}}), "f««o»o»"); 16 | EXPECT_EQ(markup::Create("foo", {{0, 3}}), "«foo»"); 17 | } 18 | 19 | TEST(MarkupTest, Parse) { 20 | EXPECT_EQ( 21 | markup::Parse("f«o»o"), 22 | std::make_tuple(std::vector{{1, 2}}, std::string{"foo"})); 23 | EXPECT_EQ(markup::Parse("f««o»»o"), 24 | std::make_tuple(std::vector{{1, 2}, {1, 2}}, 25 | std::string{"foo"})); 26 | EXPECT_EQ(markup::Parse("f««o»o»"), 27 | std::make_tuple(std::vector{{1, 2}, {1, 3}}, 28 | std::string{"foo"})); 29 | } 30 | 31 | } // namespace 32 | -------------------------------------------------------------------------------- /testing/gen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # C++ 4 | go run ../cmd/textmapper generate -o cpp/json/ cpp/json/json.tm 5 | go run ../cmd/textmapper generate -o cpp/json_flex/ cpp/json_flex/json.tm 6 | clang-format -i --style=google cpp/json/json_lexer* cpp/json/json_parser* cpp/markup/markup* cpp/json_flex/json_lexer* 7 | buildifier cpp/json/BUILD cpp/json_flex/BUILD cpp/markup/BUILD ./BUILD 8 | 9 | # TypeScript 10 | go run ../cmd/textmapper generate -o ts/json/ ts/json/json.tm 11 | -------------------------------------------------------------------------------- /testing/ts/BUILD: -------------------------------------------------------------------------------- 1 | load("@aspect_rules_ts//ts:defs.bzl", "ts_config", "ts_project") 2 | load("@npm//:defs.bzl", "npm_link_all_packages") 3 | load("@aspect_bazel_lib//lib:copy_to_bin.bzl", "copy_to_bin") 4 | 5 | package(default_visibility = ["//visibility:public"]) 6 | 7 | # Link npm packages at the root level where package.json is located 8 | npm_link_all_packages(name = "node_modules") 9 | 10 | ts_config( 11 | name = "tsconfig", 12 | src = "tsconfig.json", 13 | visibility = ["//visibility:public"], 14 | ) 15 | 16 | copy_to_bin( 17 | name = "tsconfig_bin", 18 | srcs = ["tsconfig.json"], 19 | visibility = ["//ts/json:__subpackages__"], 20 | ) 21 | 22 | copy_to_bin( 23 | name = "jestconfig_bin", 24 | srcs = ["jest.config.js"], 25 | ) 26 | -------------------------------------------------------------------------------- /testing/ts/jest.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('ts-jest').JestConfigWithTsJest} */ 2 | module.exports = { 3 | preset: 'ts-jest', 4 | testEnvironment: 'node', 5 | transform: { 6 | '^.+\\.tsx?$': ['ts-jest'] 7 | } 8 | }; 9 | -------------------------------------------------------------------------------- /testing/ts/json/BUILD: -------------------------------------------------------------------------------- 1 | load("@aspect_rules_jest//jest:defs.bzl", "jest_test") 2 | load("@aspect_rules_ts//ts:defs.bzl", "ts_project") 3 | 4 | package(default_visibility = ["//visibility:public"]) 5 | 6 | ts_project( 7 | name = "json_ts_lib", 8 | srcs = glob( 9 | ["**/*.ts"], 10 | exclude = ["**/*.test.ts"], 11 | ), 12 | declaration = True, 13 | resolve_json_module = True, 14 | source_map = True, 15 | transpiler = "tsc", 16 | tsconfig = "//ts:tsconfig_bin", 17 | deps = [ 18 | "//ts:node_modules/@types/node", 19 | ], 20 | ) 21 | 22 | ts_project( 23 | name = "json_test_lib", 24 | srcs = glob(["**/*.test.ts"]), 25 | declaration = True, 26 | resolve_json_module = True, 27 | source_map = True, 28 | transpiler = "tsc", 29 | tsconfig = "//ts:tsconfig_bin", 30 | deps = [ 31 | ":json_ts_lib", 32 | "//ts:node_modules/@types/jest", 33 | "//ts:node_modules/@types/node", 34 | ], 35 | ) 36 | 37 | jest_test( 38 | name = "json_test", 39 | config = "//ts:jestconfig_bin", 40 | data = [ 41 | ":json_test_lib", 42 | "//ts:node_modules/jest", 43 | "//ts:node_modules/ts-jest", 44 | ], 45 | node_modules = "//ts:node_modules", 46 | ) 47 | -------------------------------------------------------------------------------- /testing/ts/json/common.ts: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | import {TokenType} from "./token"; 4 | 5 | export const debugSyntax = false; 6 | 7 | export function debugLog(...data: any[]) : void { 8 | // No-op. DebugParser == false 9 | } 10 | 11 | export class Symbol { 12 | symbol: TokenType; 13 | offset: number; 14 | endoffset: number; 15 | 16 | constructor(symbol: TokenType, offset: number, endoffset: number) { 17 | this.symbol = symbol; 18 | this.offset = offset; 19 | this.endoffset = endoffset; 20 | } 21 | 22 | copy() : Symbol { 23 | let copy = JSON.parse(JSON.stringify(this)); 24 | return copy as Symbol; 25 | } 26 | } 27 | 28 | export type StackEntry = { 29 | sym: Symbol; 30 | state: number; 31 | }; 32 | 33 | -------------------------------------------------------------------------------- /testing/ts/json/json.tm: -------------------------------------------------------------------------------- 1 | language json(ts); 2 | 3 | tokenLine = true 4 | eventBased = true 5 | eventAST = true 6 | genSelector = true 7 | fixWhitespace = true 8 | extraTypes = ["NonExistingType"] 9 | 10 | :: lexer 11 | 12 | '{': /\{/ 13 | '}': /\}/ 14 | '[': /\[/ 15 | ']': /\]/ 16 | ':': /:/ 17 | ',': /,/ 18 | 19 | space: /[\t\r\n ]+/ (space) 20 | 21 | commentChars = /([^*]|\*+[^*\/])*\**/ 22 | MultiLineComment: /\/\*{commentChars}\*\// (space) 23 | 24 | hex = /[0-9a-fA-F]/ 25 | 26 | # TODO 27 | JSONString {string}: /"([^"\\]|\\(["\/\\bfnrt]|u{hex}{4}))*"/ 28 | #JSONString: /"([^"\\\x00-\x1f]|\\(["\/\\bfnrt]|u{hex}{4}))*"/ 29 | 30 | fraction = /\.[0-9]+/ 31 | exp = /[eE][+-]?[0-9]+/ 32 | JSONNumber: /-?(0|[1-9][0-9]*){fraction}?{exp}?/ 33 | 34 | id: /[a-zA-Z][a-zA-Z0-9]*/ (class) 35 | 36 | 'null': /null/ 37 | 'true': /true/ 38 | 'false': /false/ 39 | 40 | error: 41 | invalid_token: 42 | 43 | :: parser 44 | 45 | %input JSONText; 46 | 47 | %inject MultiLineComment -> MultiLineComment; 48 | %inject invalid_token -> InvalidToken; 49 | %inject JSONString -> JSONString; 50 | 51 | %generate Literals = set(first JSONValue); 52 | 53 | JSONText -> JSONText : 54 | JSONValue ; 55 | 56 | JSONValue -> JSONValue : 57 | 'null' 58 | | 'true' 59 | | 'false' 60 | | JSONObject 61 | | JSONArray 62 | | JSONString 63 | | JSONNumber 64 | ; 65 | 66 | JSONObject -> JSONObject : 67 | '{' JSONMemberList? '}' ; 68 | 69 | JSONMember -> JSONMember : 70 | JSONString ':' JSONValue ; 71 | 72 | JSONMemberList : 73 | JSONMember 74 | | JSONMemberList ',' JSONMember 75 | ; 76 | 77 | JSONArray -> JSONArray : 78 | '[' JSONElementListopt ']' ; 79 | 80 | JSONElementList : 81 | JSONValue 82 | | JSONElementList ',' JSONValue 83 | ; 84 | -------------------------------------------------------------------------------- /testing/ts/json/listener.ts: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | export enum NodeType { 4 | NoType, 5 | JSONArray, 6 | JSONMember, 7 | JSONObject, 8 | JSONText, 9 | JSONValue, 10 | MultiLineComment, 11 | InvalidToken, 12 | JSONString, 13 | NonExistingType, 14 | NodeTypeMax 15 | }; 16 | 17 | export type Listener = (t: NodeType, offset: number, endOffset: number) => void; 18 | 19 | export const TokenSet : NodeType[] = [ 20 | NodeType.JSONString, 21 | ]; 22 | -------------------------------------------------------------------------------- /testing/ts/json/parser_tables.ts: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | import {TokenType} from './token'; 4 | import * as listener from './listener'; 5 | 6 | export const startStackSize = 256; 7 | export const startTokenBufferSize = 16; 8 | export const noToken = TokenType.UNAVAILABLE; 9 | export const eoiToken = TokenType.EOI; 10 | 11 | let tmNonterminals: string[] = [ 12 | "JSONText", 13 | "JSONValue", 14 | "JSONObject", 15 | "JSONMember", 16 | "JSONMemberList", 17 | "JSONArray", 18 | "JSONElementList", 19 | "JSONElementListopt", 20 | ]; 21 | 22 | export function symbolName(sym: TokenType): string { 23 | if (sym === noToken) { 24 | return ""; 25 | } 26 | if (sym < TokenType.NumTokens) { 27 | return TokenType[sym]; 28 | } 29 | let i = sym - TokenType.NumTokens; 30 | if (i < tmNonterminals.length) { 31 | return tmNonterminals[i]; 32 | } 33 | return "nonterminal(" + sym + ")"; 34 | } 35 | 36 | export const tmAction : number[] = [ 37 | -1, -1, -3, 6, 7, 1, 2, 3, 0, 4, 5, 9, -1, 11, -1, 14, -21, -1, -1, 8, -1, 38 | -1, 13, 10, 12, 15, -1, -2, 39 | ]; 40 | export const tmLalr : number[] = [ 41 | 2, -1, 4, -1, 10, -1, 11, -1, 13, -1, 14, -1, 15, -1, 5, 17, -1, -2, 7, -1, 42 | 5, 16, -1, -2, 43 | ]; 44 | 45 | export const tmGoto : number[] = [ 46 | 0, 2, 2, 10, 14, 22, 24, 26, 30, 30, 30, 42, 50, 50, 58, 66, 74, 74, 76, 84, 47 | 92, 96, 98, 106, 108, 110, 48 | ]; 49 | 50 | export const tmFromTo : number[] = [ 51 | 26, 27, 0, 1, 2, 1, 18, 1, 21, 1, 1, 11, 14, 19, 0, 2, 2, 2, 18, 2, 21, 2, 52 | 17, 22, 12, 18, 14, 20, 16, 21, 0, 3, 1, 12, 2, 3, 18, 3, 20, 12, 21, 3, 0, 53 | 4, 2, 4, 18, 4, 21, 4, 0, 5, 2, 5, 18, 5, 21, 5, 0, 6, 2, 6, 18, 6, 21, 6, 54 | 0, 7, 2, 7, 18, 7, 21, 7, 0, 26, 0, 8, 2, 15, 18, 23, 21, 25, 0, 9, 2, 9, 55 | 18, 9, 21, 9, 1, 13, 20, 24, 1, 14, 0, 10, 2, 10, 18, 10, 21, 10, 2, 16, 2, 56 | 17, 57 | ]; 58 | 59 | export const tmRuleLen : number[] = [ 60 | 1, 1, 1, 1, 1, 1, 1, 1, 3, 2, 3, 1, 3, 3, 1, 3, 1, 0, 61 | ]; 62 | 63 | export const tmRuleSymbol : number[] = [ 64 | 17, 18, 18, 18, 18, 18, 18, 18, 19, 19, 20, 21, 21, 22, 23, 23, 24, 24, 65 | ]; 66 | export const tmRuleType : listener.NodeType[] = [ 67 | listener.NodeType.JSONText, // JSONText : JSONValue 68 | listener.NodeType.JSONValue, // JSONValue : 'null' 69 | listener.NodeType.JSONValue, // JSONValue : 'true' 70 | listener.NodeType.JSONValue, // JSONValue : 'false' 71 | listener.NodeType.JSONValue, // JSONValue : JSONObject 72 | listener.NodeType.JSONValue, // JSONValue : JSONArray 73 | listener.NodeType.JSONValue, // JSONValue : JSONString 74 | listener.NodeType.JSONValue, // JSONValue : JSONNumber 75 | listener.NodeType.JSONObject, // JSONObject : '{' JSONMemberList '}' 76 | listener.NodeType.JSONObject, // JSONObject : '{' '}' 77 | listener.NodeType.JSONMember, // JSONMember : JSONString ':' JSONValue 78 | 0, // JSONMemberList : JSONMember 79 | 0, // JSONMemberList : JSONMemberList ',' JSONMember 80 | listener.NodeType.JSONArray, // JSONArray : '[' JSONElementListopt ']' 81 | 0, // JSONElementList : JSONValue 82 | 0, // JSONElementList : JSONElementList ',' JSONValue 83 | 0, // JSONElementListopt : JSONElementList 84 | 0, // JSONElementListopt : 85 | ]; 86 | // set(first JSONValue) = LBRACE, LBRACK, JSONSTRING, JSONNUMBER, NULL, TRUE, FALSE 87 | export const Literals : TokenType[] = [ 88 | 2, 4, 10, 11, 13, 14, 15, 89 | ]; 90 | // set(follow ERROR) = 91 | export const afterErr : TokenType[] = []; -------------------------------------------------------------------------------- /testing/ts/json/selector.ts: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | import * as listener from './listener'; 4 | 5 | export type Selector = (nt : listener.NodeType) => boolean; 6 | 7 | export const Any : Selector = (nt : listener.NodeType) => true; 8 | export const JSONArray : Selector = (nt : listener.NodeType) : boolean => nt === listener.NodeType.JSONArray; 9 | export const JSONMember : Selector = (nt : listener.NodeType) : boolean => nt === listener.NodeType.JSONMember; 10 | export const JSONObject : Selector = (nt : listener.NodeType) : boolean => nt === listener.NodeType.JSONObject; 11 | export const JSONText : Selector = (nt : listener.NodeType) : boolean => nt === listener.NodeType.JSONText; 12 | export const JSONValue : Selector = (nt : listener.NodeType) : boolean => nt === listener.NodeType.JSONValue; 13 | export const MultiLineComment : Selector = (nt : listener.NodeType) : boolean => nt === listener.NodeType.MultiLineComment; 14 | export const InvalidToken : Selector = (nt : listener.NodeType) : boolean => nt === listener.NodeType.InvalidToken; 15 | export const JSONString : Selector = (nt : listener.NodeType) : boolean => nt === listener.NodeType.JSONString; 16 | export const NonExistingType : Selector = (nt : listener.NodeType) : boolean => nt === listener.NodeType.NonExistingType; 17 | -------------------------------------------------------------------------------- /testing/ts/json/token.ts: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | // Token values. 4 | export enum TokenType { 5 | UNAVAILABLE = -1, 6 | EOI = 0, 7 | INVALID_TOKEN = 1, 8 | LBRACE = 2, // { 9 | RBRACE = 3, // } 10 | LBRACK = 4, // [ 11 | RBRACK = 5, // ] 12 | COLON = 6, // : 13 | COMMA = 7, // , 14 | SPACE = 8, 15 | MULTILINECOMMENT = 9, 16 | JSONSTRING = 10, 17 | JSONNUMBER = 11, 18 | ID = 12, 19 | NULL = 13, // null 20 | TRUE = 14, // true 21 | FALSE = 15, // false 22 | ERROR = 16, 23 | 24 | NumTokens, 25 | } 26 | -------------------------------------------------------------------------------- /testing/ts/json/tree.ts: -------------------------------------------------------------------------------- 1 | // generated by Textmapper; DO NOT EDIT 2 | 3 | import { NodeType } from './listener' 4 | import { Selector } from './selector' 5 | 6 | /** 7 | * Tree represents an AST for some parsed content. 8 | */ 9 | export interface Tree { 10 | 11 | /** 12 | * Returns the content that get parsed into this tree. 13 | */ 14 | text(): string; 15 | 16 | /** 17 | * Returns the root node of the tree. 18 | */ 19 | root(): Node; 20 | } 21 | 22 | /** 23 | * Node represents a Node in an AST. 24 | */ 25 | export interface Node { 26 | 27 | /** 28 | * The {@link NodeType} represented by this node. 29 | */ 30 | type(): NodeType; 31 | 32 | /** 33 | * The starting offset in the parsed content represented by this node. 34 | */ 35 | offset(): number; 36 | 37 | /** 38 | * The ending offset (exclusive) in the parsed content represented by this node. 39 | */ 40 | endOffset(): number; 41 | 42 | /** 43 | * The parsed content represented by this node. 44 | * It is essentially the tree.text().substring(offset(), endOffset()). 45 | */ 46 | text(): string; 47 | 48 | /** 49 | * Returns the start position of the content of this node as 1-based line and column. 50 | */ 51 | lineColumn(): { line: number; column: number }; 52 | 53 | /** 54 | * Returns all silibing nodes to this node that are accepted by the given {@link Selector}. 55 | * 56 | * @param selector the selector for filtering nodes 57 | * @returns an {@link Iterable} of silbing nodes that are accepted by the selector 58 | */ 59 | nextAll(selector: Selector): Iterable; 60 | 61 | /** 62 | * Returns the first child node to this node that is accepted by the given {@link Selector}. 63 | * 64 | * @param selector the selector for filtering nodes 65 | * @returns the first child node accepted by the selector or null if no such node is found 66 | */ 67 | child(selector: Selector): Node | null; 68 | 69 | /** 70 | * Returns all children nodes to this node that are accepted by the given {@link Selector}. 71 | * 72 | * @param selector the selector for filtering nodes 73 | * @returns an {@link Iterable} of children nodes that are accepted by the selector 74 | */ 75 | children(selector: Selector): Iterable; 76 | 77 | /** 78 | * Returns all descendant nodes to this node that are accepted by the given {@link Selector} 79 | * in pre-order traversal. 80 | * 81 | * @param selector the selector for filtering nodes 82 | * @returns an {@link Iterable} of descendant nodes that are accepted by the selector 83 | */ 84 | descendants(selector: Selector): Iterable; 85 | } 86 | -------------------------------------------------------------------------------- /testing/ts/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "testing", 3 | "private": true, 4 | "packageManager": "pnpm@9.12.1", 5 | "scripts": { 6 | "test": "jest" 7 | }, 8 | "pnpm": { 9 | "onlyBuiltDependencies": [], 10 | "packageExtensions": { 11 | "rollup": { 12 | "dependencies": { 13 | "@rollup/rollup-darwin-arm64": "*" 14 | } 15 | } 16 | } 17 | }, 18 | "devDependencies": { 19 | "@types/jest": "29.5.14", 20 | "@types/node": "20.9.0", 21 | "jest": "29.7.0", 22 | "jest-cli": "^29.7.0", 23 | "jest-junit": "^16.0.0", 24 | "ts-jest": "29.2.6", 25 | "typescript": "5.6.2" 26 | } 27 | } -------------------------------------------------------------------------------- /testing/ts/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2018", 4 | "module": "CommonJS", 5 | "moduleResolution": "node", 6 | "esModuleInterop": true, 7 | "sourceMap": true, 8 | "declaration": true, 9 | "strict": true, 10 | "skipLibCheck": true, 11 | "resolveJsonModule": true 12 | } 13 | } -------------------------------------------------------------------------------- /util/container/bitset_test.go: -------------------------------------------------------------------------------- 1 | package container_test 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "testing" 7 | 8 | "github.com/inspirer/textmapper/util/container" 9 | ) 10 | 11 | func TestBitSet(t *testing.T) { 12 | for i := 0; i < 100; i++ { 13 | s := container.NewBitSet(i) 14 | if i > 0 { 15 | s.Set(i - 1) 16 | } 17 | } 18 | 19 | s := container.NewBitSet(64) 20 | s.Set(20) 21 | s.Set(40) 22 | s.Set(42) 23 | s.Set(63) 24 | s.Clear(42) 25 | 26 | var got []int 27 | for i := 0; i < 64; i++ { 28 | if s.Get(i) { 29 | got = append(got, i) 30 | } 31 | } 32 | if gotStr := fmt.Sprintf("%v", got); gotStr != "[20 40 63]" { 33 | t.Errorf("Found %v set bits, want: [20 40 63]", got) 34 | } 35 | 36 | got = s.Slice(nil) 37 | if gotStr := fmt.Sprintf("%v", got); gotStr != "[20 40 63]" { 38 | t.Errorf("Slice() = %v, want: [20 40 63]", got) 39 | } 40 | 41 | s = container.NewBitSet(128) 42 | s.SetAll(74) 43 | s.ClearAll(72) 44 | 45 | got = s.Slice(nil) 46 | if gotStr := fmt.Sprintf("%v", got); gotStr != "[72 73]" { 47 | t.Errorf("Slice() = %v, want: [72 73]", got) 48 | } 49 | 50 | s = container.NewBitSet(32) 51 | s.SetAll(32) 52 | if s[0] != ^uint32(0) { 53 | t.Errorf("SetAll(32) = %b, want: 32x ones", s[0]) 54 | } 55 | 56 | s.Complement(30) 57 | if s[0] != 3<<30 { 58 | t.Errorf("Complement(32) = %b, want: 110000..00 (30x zeroes)", s[0]) 59 | } 60 | 61 | s.ClearAll(32) 62 | if s[0] != 0 { 63 | t.Errorf("ClearAll(32) = %b, want: 0", s[0]) 64 | } 65 | } 66 | 67 | func BenchmarkBitSetSlice(b *testing.B) { 68 | s := container.NewBitSet(1024) 69 | r := rand.New(rand.NewSource(99)) 70 | for i := 0; i < 20; i++ { 71 | s.Set(r.Intn(1024)) 72 | } 73 | val := s.Slice(nil) 74 | 75 | b.ResetTimer() 76 | for i := 0; i < b.N; i++ { 77 | s.Slice(val) 78 | } 79 | } 80 | 81 | func TestNext(t *testing.T) { 82 | var tests = [][]int{ 83 | {0, 5}, 84 | {0, 1, 2, 3}, 85 | {15}, 86 | {0, 30, 31}, 87 | {32}, 88 | {32, 33}, 89 | {30, 33}, 90 | {688}, 91 | } 92 | for _, tc := range tests { 93 | s := container.NewBitSet(tc[len(tc)-1] + 1) 94 | for _, bit := range tc { 95 | s.Set(bit) 96 | } 97 | for i := 0; i < len(s)*32; i++ { 98 | want := i 99 | for want < len(s)*32 && s.Get(want) { 100 | want++ 101 | } 102 | got := s.NextZero(i) 103 | if got != want { 104 | t.Errorf("NextZero(%v, %v) = %v, want: %v", tc, i, got, want) 105 | } 106 | } 107 | } 108 | } 109 | 110 | func BenchmarkNext(b *testing.B) { 111 | const ( 112 | size = 1024 113 | zeroBits = 100 114 | ) 115 | s := container.NewBitSet(size) 116 | s.Complement(size) 117 | r := rand.New(rand.NewSource(99)) 118 | for i := 0; i < zeroBits; i++ { 119 | s.Clear(r.Intn(size)) 120 | } 121 | 122 | b.Run("NextZero", func(b *testing.B) { 123 | for i := 0; i < b.N; i++ { 124 | n := i % size 125 | s.NextZero(n) 126 | } 127 | }) 128 | 129 | b.Run("NextBruteForce", func(b *testing.B) { 130 | for i := 0; i < b.N; i++ { 131 | for n := i % size; n < size && s.Get(n); n++ { 132 | } 133 | } 134 | }) 135 | } 136 | -------------------------------------------------------------------------------- /util/container/intset.go: -------------------------------------------------------------------------------- 1 | package container 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | // IntSet efficiently stores, merges and intersects integer sets (represented as sorted arrays). 8 | type IntSet struct { 9 | Inverse bool 10 | Set []int // sorted 11 | } 12 | 13 | // Empty checks if a set is empty. 14 | func (s IntSet) Empty() bool { 15 | return len(s.Set) == 0 && !s.Inverse 16 | } 17 | 18 | // Complement returns a complement set to this one. 19 | func (s IntSet) Complement() IntSet { 20 | return IntSet{!s.Inverse, s.Set} 21 | } 22 | 23 | // BitSet turns this IntSet into a BitSet of the given size. 24 | func (s IntSet) BitSet(size int) BitSet { 25 | ret := NewBitSet(size) 26 | if s.Inverse { 27 | ret.SetAll(size) 28 | for _, v := range s.Set { 29 | ret.Clear(v) 30 | } 31 | } else { 32 | for _, v := range s.Set { 33 | ret.Set(v) 34 | } 35 | } 36 | return ret 37 | } 38 | 39 | func (s IntSet) String() string { 40 | var prefix string 41 | if s.Inverse { 42 | prefix = "~" 43 | } 44 | return fmt.Sprintf("%v%v", prefix, s.Set) 45 | } 46 | 47 | // Equals compares two sets for equality. 48 | func (s IntSet) Equals(oth IntSet) bool { 49 | if s.Inverse != oth.Inverse || len(s.Set) != len(oth.Set) { 50 | return false 51 | } 52 | for i := range s.Set { 53 | if s.Set[i] != oth.Set[i] { 54 | return false 55 | } 56 | } 57 | return true 58 | } 59 | 60 | // Intersect computes an intersection of two sets. 61 | func Intersect(a, b IntSet, reuse []int) IntSet { 62 | switch { 63 | case a.Empty() || b.Empty(): 64 | return IntSet{} 65 | case a.Inverse: 66 | if b.Inverse { 67 | return IntSet{Inverse: true, Set: combine(a.Set, b.Set, reuse)} 68 | } 69 | return IntSet{Set: subtract(b.Set, a.Set, reuse)} 70 | case b.Inverse: 71 | return IntSet{Set: subtract(a.Set, b.Set, reuse)} 72 | } 73 | return IntSet{Set: intersect(a.Set, b.Set, reuse)} 74 | } 75 | 76 | // Merge computes a union of two sets. 77 | func Merge(a, b IntSet, reuse []int) IntSet { 78 | switch { 79 | case a.Empty(): 80 | return b 81 | case b.Empty(): 82 | return a 83 | case a.Inverse: 84 | if b.Inverse { 85 | return IntSet{Inverse: true, Set: intersect(a.Set, b.Set, reuse)} 86 | } 87 | return IntSet{Inverse: true, Set: subtract(a.Set, b.Set, reuse)} 88 | case b.Inverse: 89 | return IntSet{Inverse: true, Set: subtract(b.Set, a.Set, reuse)} 90 | } 91 | return IntSet{Set: combine(a.Set, b.Set, reuse)} 92 | } 93 | 94 | func combine(a, b, reuse []int) []int { 95 | ret := reuse[:0] 96 | var e int 97 | bl := len(b) 98 | for _, v := range a { 99 | for e < bl && b[e] < v { 100 | ret = append(ret, b[e]) 101 | e++ 102 | } 103 | if e < bl && b[e] == v { 104 | e++ 105 | } 106 | ret = append(ret, v) 107 | } 108 | for _, v := range b[e:] { 109 | ret = append(ret, v) 110 | } 111 | return ret 112 | } 113 | 114 | func intersect(a, b, reuse []int) []int { 115 | ret := reuse[:0] 116 | var e int 117 | bl := len(b) 118 | for _, v := range a { 119 | for e < bl && b[e] < v { 120 | e++ 121 | } 122 | if e < bl && b[e] == v { 123 | ret = append(ret, v) 124 | } 125 | } 126 | return ret 127 | } 128 | 129 | func subtract(a, b, reuse []int) []int { 130 | ret := reuse[:0] 131 | var e int 132 | bl := len(b) 133 | for _, v := range a { 134 | for e < bl && b[e] < v { 135 | e++ 136 | } 137 | if e < bl && b[e] == v { 138 | continue 139 | } 140 | ret = append(ret, v) 141 | } 142 | return ret 143 | } 144 | -------------------------------------------------------------------------------- /util/container/intset_test.go: -------------------------------------------------------------------------------- 1 | package container_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/inspirer/textmapper/util/container" 7 | ) 8 | 9 | var intersectTests = []struct { 10 | a, b container.IntSet 11 | want string 12 | }{ 13 | {set(), complementOf(), "[]"}, 14 | {complementOf(), complementOf(), "~[]"}, 15 | {set(1, 2, 3), complementOf(), "[1 2 3]"}, 16 | {set(1), set(), "[]"}, 17 | {set(1), set(1), "[1]"}, 18 | {set(1), set(5), "[]"}, 19 | {set(7, 9), set(7, 8, 9), "[7 9]"}, 20 | {set(0, 1, 3), set(1, 2), "[1]"}, 21 | {set(0, 1, 3), complementOf(1), "[0 3]"}, 22 | {complementOf(3), complementOf(1), "~[1 3]"}, 23 | {complementOf(2, 3), complementOf(1, 2), "~[1 2 3]"}, 24 | } 25 | 26 | func TestIntersect(t *testing.T) { 27 | for _, test := range intersectTests { 28 | c := container.Intersect(test.a, test.b, nil) 29 | if got := c.String(); got != test.want { 30 | t.Errorf("Intersect(%v, %v) = %v, want: %v", test.a, test.b, got, test.want) 31 | } 32 | 33 | c = container.Intersect(test.b, test.a, nil) 34 | if got := c.String(); got != test.want { 35 | t.Errorf("Intersect(%v, %v) = %v, want: %v", test.b, test.a, got, test.want) 36 | } 37 | } 38 | } 39 | 40 | var mergeTests = []struct { 41 | a, b container.IntSet 42 | want string 43 | }{ 44 | {set(), set(), "[]"}, 45 | {set(), complementOf(), "~[]"}, 46 | {complementOf(), complementOf(), "~[]"}, 47 | {set(1, 2, 3), complementOf(), "~[]"}, 48 | {set(1), set(), "[1]"}, 49 | {set(1), set(1), "[1]"}, 50 | {set(1), set(5), "[1 5]"}, 51 | {set(7, 9), set(7, 8, 9), "[7 8 9]"}, 52 | {set(0, 1, 3), set(1, 2), "[0 1 2 3]"}, 53 | {set(0, 1, 3), complementOf(1), "~[]"}, 54 | {set(0, 1, 3), complementOf(8), "~[8]"}, 55 | {complementOf(3), complementOf(1), "~[]"}, 56 | {complementOf(2, 3), complementOf(1, 2), "~[2]"}, 57 | } 58 | 59 | func TestMerge(t *testing.T) { 60 | for _, test := range mergeTests { 61 | c := container.Merge(test.a, test.b, nil) 62 | if got := c.String(); got != test.want { 63 | t.Errorf("Merge(%v, %v) = %v, want: %v", test.a, test.b, got, test.want) 64 | } 65 | 66 | c = container.Merge(test.b, test.a, nil) 67 | if got := c.String(); got != test.want { 68 | t.Errorf("Merge(%v, %v) = %v, want: %v", test.b, test.a, got, test.want) 69 | } 70 | } 71 | } 72 | 73 | var intBitSetTests = []container.IntSet{ 74 | set(), 75 | set(1), 76 | set(1, 5), 77 | set(0, 1, 2, 99), 78 | complementOf(), 79 | complementOf(0, 1, 2, 99), 80 | } 81 | 82 | func TestIntBitSets(t *testing.T) { 83 | for _, test := range intBitSetTests { 84 | bs := test.BitSet(100) 85 | if test.Inverse { 86 | bs.Complement(100) 87 | } 88 | d := container.IntSet{Set: bs.Slice(nil), Inverse: test.Inverse} 89 | if got := d.String(); got != test.String() { 90 | t.Errorf("%v.BitSet().Slice() = %v, want: %v", test, got, test.String()) 91 | } 92 | } 93 | } 94 | 95 | func set(ints ...int) container.IntSet { 96 | return container.IntSet{Set: ints} 97 | } 98 | 99 | func complementOf(ints ...int) container.IntSet { 100 | return container.IntSet{Inverse: true, Set: ints} 101 | } 102 | -------------------------------------------------------------------------------- /util/container/slicemap.go: -------------------------------------------------------------------------------- 1 | package container 2 | 3 | // allocator is used to allocate IntSliceMap values on the first access. 4 | type allocator[T any] func(key []int) T 5 | 6 | type entry[T any] struct { 7 | key []int 8 | value T 9 | } 10 | 11 | // IntSliceMap maps slices of ints into values that get automatically allocated upon the 12 | // first access. 13 | type IntSliceMap[T any] struct { 14 | allocate allocator[T] 15 | data map[uint64][]entry[T] 16 | } 17 | 18 | // NewIntSliceMap returns an empty map, which will use the provided function to instantiate values. 19 | func NewIntSliceMap[T any](allocate allocator[T]) *IntSliceMap[T] { 20 | return &IntSliceMap[T]{ 21 | allocate: allocate, 22 | data: make(map[uint64][]entry[T]), 23 | } 24 | } 25 | 26 | // Get returns the value corresponding to a given key, instantiating it if needed. 27 | func (m *IntSliceMap[T]) Get(key []int) T { 28 | var hash uint64 29 | for _, i := range key { 30 | hash = hash*31 + uint64(i) 31 | } 32 | 33 | for _, entry := range m.data[hash] { 34 | if SliceEqual(key, entry.key) { 35 | return entry.value 36 | } 37 | } 38 | 39 | keyCopy := make([]int, len(key)) 40 | copy(keyCopy, key) 41 | val := m.allocate(keyCopy) 42 | m.data[hash] = append(m.data[hash], entry[T]{keyCopy, val}) 43 | return val 44 | } 45 | 46 | // SliceEqual compares two slices for equality. 47 | func SliceEqual(a, b []int) bool { 48 | if len(a) != len(b) { 49 | return false 50 | } 51 | for i, ai := range a { 52 | if ai != b[i] { 53 | return false 54 | } 55 | } 56 | return true 57 | } 58 | -------------------------------------------------------------------------------- /util/container/slicemap_test.go: -------------------------------------------------------------------------------- 1 | package container_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/inspirer/textmapper/util/container" 7 | ) 8 | 9 | func TestSliceMap(t *testing.T) { 10 | var counter int 11 | sm := container.NewIntSliceMap(func(key []int) interface{} { 12 | val := counter 13 | counter++ 14 | return val 15 | }) 16 | 17 | for _, tc := range []struct { 18 | input []int 19 | want int 20 | }{ 21 | {[]int{1}, 0}, 22 | {[]int{1}, 0}, 23 | {[]int{1, 2, 3}, 1}, 24 | {[]int{1, 2, 3, 4}, 2}, 25 | {[]int{1, 2, 3}, 1}, 26 | {[]int{1}, 0}, 27 | {[]int{}, 3}, 28 | {[]int{}, 3}, 29 | } { 30 | got := sm.Get(tc.input).(int) 31 | if got != tc.want { 32 | t.Errorf("Get(%v) = %v, want: %v", tc.input, got, tc.want) 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /util/debug/debug.go: -------------------------------------------------------------------------------- 1 | // Package debug provides utilities for printing basic data structures for human consumption. 2 | package debug 3 | 4 | import ( 5 | "fmt" 6 | ) 7 | 8 | // Size outputs a rounded size in bytes. 9 | func Size(bytes int) string { 10 | if bytes < 1024 { 11 | return fmt.Sprintf("%v B", bytes) 12 | } 13 | if bytes < 1024*1024 { 14 | return fmt.Sprintf("%.2f KiB", float64(bytes)/1024.) 15 | } 16 | return fmt.Sprintf("%.2f MiB", float64(bytes)/(1024*1024)) 17 | } 18 | -------------------------------------------------------------------------------- /util/graph/matrix.go: -------------------------------------------------------------------------------- 1 | package graph 2 | 3 | import ( 4 | "github.com/inspirer/textmapper/util/container" 5 | ) 6 | 7 | // Matrix stores a simple directed graph as an adjacency matrix in a bit set. 8 | type Matrix struct { 9 | n int 10 | set container.BitSet 11 | } 12 | 13 | // NewMatrix creates a new n x n matrix. 14 | func NewMatrix(n int) Matrix { 15 | return Matrix{n, container.NewBitSet(n * n)} 16 | } 17 | 18 | // AddEdge creates a link from i to e. 19 | func (m Matrix) AddEdge(i, e int) { 20 | m.set.Set(i*m.n + e) 21 | } 22 | 23 | // HasEdge checks if there is a link from i to e. 24 | func (m Matrix) HasEdge(i, e int) bool { 25 | return m.set.Get(i*m.n + e) 26 | } 27 | 28 | // Closure adds links for each path available in the graph. 29 | func (m Matrix) Closure() { 30 | // [j,i] && [i,e] => [j,e] 31 | for i := 0; i < m.n; i++ { 32 | for j := 0; j < m.n; j++ { 33 | if !m.HasEdge(j, i) { 34 | continue 35 | } 36 | for e := 0; e < m.n; e++ { 37 | if m.HasEdge(i, e) { 38 | m.AddEdge(j, e) 39 | } 40 | } 41 | } 42 | } 43 | } 44 | 45 | // Graph returns the adjacency list representation of the graph. 46 | func (m Matrix) Graph(reuse []int) [][]int { 47 | n := m.n 48 | ret := make([][]int, n) 49 | slice := m.set.Slice(reuse) 50 | var start, index int 51 | for i, val := range slice { 52 | e := val / n 53 | slice[i] = val % n 54 | if e > index { 55 | ret[index] = slice[start:i] 56 | start = i 57 | index = e 58 | } 59 | } 60 | ret[index] = slice[start:] 61 | return ret 62 | } 63 | -------------------------------------------------------------------------------- /util/graph/matrix_test.go: -------------------------------------------------------------------------------- 1 | package graph 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestMatrixClosure(t *testing.T) { 9 | m := NewMatrix(20) 10 | count := func() int { 11 | var ret int 12 | for i := 0; i < 20*20; i++ { 13 | if m.set.Get(i) { 14 | ret++ 15 | } 16 | } 17 | return ret 18 | } 19 | 20 | m.AddEdge(1, 8) 21 | m.AddEdge(8, 3) 22 | m.AddEdge(3, 4) 23 | m.AddEdge(4, 5) 24 | m.AddEdge(5, 0) 25 | m.Closure() // 5 edges + 1+2+3+4 26 | 27 | if got := count(); got != 15 { 28 | t.Errorf("count() = %v, want: 15", got) 29 | } 30 | 31 | m.AddEdge(0, 1) 32 | m.Closure() // 6 nodes, fully connected 33 | if got := count(); got != 36 { 34 | t.Errorf("count() = %v, want: 36", got) 35 | } 36 | } 37 | 38 | func TestGraph(t *testing.T) { 39 | m := NewMatrix(10) 40 | m.AddEdge(1, 5) 41 | m.AddEdge(2, 9) 42 | m.AddEdge(5, 1) 43 | m.AddEdge(0, 0) 44 | m.AddEdge(0, 1) 45 | m.AddEdge(9, 9) 46 | g := m.Graph(nil) 47 | want := "[[0 1] [5] [9] [] [] [1] [] [] [] [9]]" 48 | if got := fmt.Sprintf("%v", g); got != want { 49 | t.Errorf("m.Graph() = %v, want: %v", got, want) 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /util/graph/path.go: -------------------------------------------------------------------------------- 1 | package graph 2 | 3 | // LongestPath returns the longest path in a given graph, or nil if the graph has cycles. 4 | func LongestPath(graph [][]int) []int { 5 | type node struct{ height, link int } 6 | data := make([]node, len(graph)) 7 | var cycle bool 8 | 9 | var dfs func(i int) 10 | dfs = func(i int) { 11 | if h := data[i].height; h != 0 { 12 | // Do not enter a node twice. 13 | if h == -1 { 14 | cycle = true 15 | } 16 | return 17 | } 18 | data[i].height = -1 19 | ret := node{1, -1} 20 | for _, next := range graph[i] { 21 | dfs(next) 22 | if height := data[next].height; height >= ret.height { 23 | ret = node{height + 1, next} 24 | } 25 | } 26 | data[i] = ret 27 | } 28 | first := -1 29 | for i := 0; i < len(graph); i++ { 30 | dfs(i) 31 | if first == -1 || data[first].height < data[i].height { 32 | first = i 33 | } 34 | } 35 | if cycle { 36 | return nil 37 | } 38 | var ret []int 39 | for i := first; i != -1; i = data[i].link { 40 | ret = append(ret, i) 41 | } 42 | return ret 43 | } 44 | -------------------------------------------------------------------------------- /util/graph/path_test.go: -------------------------------------------------------------------------------- 1 | package graph 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | var pathTests = []struct { 9 | g [][]int 10 | want string 11 | }{ 12 | { 13 | g: [][]int{}, 14 | want: "[]", 15 | }, 16 | { 17 | g: [][]int{ 18 | {}, 19 | }, 20 | want: "[0]", 21 | }, 22 | { 23 | g: [][]int{ 24 | {1}, 25 | {0}, // cycle 26 | }, 27 | want: "[]", 28 | }, 29 | { 30 | g: [][]int{ 31 | {1}, 32 | {}, 33 | {0}, 34 | {}, 35 | }, 36 | want: "[2 0 1]", 37 | }, 38 | { 39 | g: [][]int{ 40 | {1}, 41 | {2, 3}, 42 | {3}, 43 | {}, 44 | }, 45 | want: "[0 1 2 3]", 46 | }, 47 | } 48 | 49 | func TestLongestPath(t *testing.T) { 50 | for _, tc := range pathTests { 51 | path := LongestPath(tc.g) 52 | if got := fmt.Sprintf("%v", path); got != tc.want { 53 | t.Errorf("LongestPath(%v) = %v, want: %v", tc.g, got, tc.want) 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /util/graph/tarjan.go: -------------------------------------------------------------------------------- 1 | package graph 2 | 3 | import ( 4 | "github.com/inspirer/textmapper/util/container" 5 | ) 6 | 7 | // Tarjan calls the provided function for each strongly connected component in the graph in the 8 | // topological order. 9 | func Tarjan(graph [][]int, f func(vertices []int, onStack container.BitSet)) { 10 | t := tarjan{graph: graph, callback: f} 11 | t.run() 12 | } 13 | 14 | type tarjan struct { 15 | graph [][]int 16 | callback func(vertices []int, onStack container.BitSet) 17 | 18 | stack []int 19 | index []int 20 | lowLink []int 21 | onStack container.BitSet 22 | curr int 23 | } 24 | 25 | func (t *tarjan) run() { 26 | size := len(t.graph) 27 | if size < 2 { 28 | return 29 | } 30 | 31 | t.stack = nil 32 | t.index = make([]int, size) 33 | for i := range t.index { 34 | t.index[i] = -1 35 | } 36 | t.lowLink = make([]int, size) 37 | t.onStack = container.NewBitSet(size) 38 | 39 | t.curr = 0 40 | for i := 0; i < size; i++ { 41 | if t.index[i] == -1 { 42 | t.strongConnect(i) 43 | } 44 | } 45 | } 46 | 47 | func (t *tarjan) strongConnect(v int) { 48 | base := len(t.stack) 49 | t.index[v] = t.curr 50 | t.lowLink[v] = t.curr 51 | t.curr++ 52 | t.stack = append(t.stack, v) 53 | t.onStack.Set(v) 54 | 55 | for _, w := range t.graph[v] { 56 | if t.index[w] == -1 { 57 | t.strongConnect(w) 58 | if t.lowLink[w] < t.lowLink[v] { 59 | t.lowLink[v] = t.lowLink[w] 60 | } 61 | } else if t.onStack.Get(w) && t.index[w] < t.lowLink[v] { 62 | t.lowLink[v] = t.index[w] 63 | } 64 | } 65 | 66 | if t.lowLink[v] == t.index[v] { 67 | t.callback(t.stack[base:], t.onStack) 68 | for _, v := range t.stack[base:] { 69 | t.onStack.Clear(v) 70 | } 71 | t.stack = t.stack[:base] 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /util/graph/tarjan_test.go: -------------------------------------------------------------------------------- 1 | package graph 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/inspirer/textmapper/util/container" 7 | ) 8 | 9 | var tarjanTests = [][][]int{ 10 | {{1}, {}}, // 0 -> 1 11 | {{1}, {0}}, // 0 <-> 1 12 | {{}, {0}}, // 0 <- 1 13 | {{1}, {2}, {0}}, // 0->1->2->0 14 | {{1, 2, 3, 4}, {0}, {0}, {4}, {3}}, // (0<->1,2) -> (3<->4) 15 | {{}, {}, {}, {}, {}}, // 5 separate nodes 16 | {{}, {3}, {4}, {}, {}}, 17 | {{3}, {3}, {3}, {4}, {}}, 18 | {{3}, {3}, {3}, {4}, {0}}, 19 | } 20 | 21 | func TestTarjan(t *testing.T) { 22 | for _, g := range tarjanTests { 23 | seen := container.NewBitSet(len(g)) 24 | Tarjan(g, func(vs []int, onStack container.BitSet) { 25 | stack := onStack.Slice(nil) 26 | for _, n := range stack { 27 | if seen.Get(n) { 28 | t.Fatalf("Tarjan(%v) calls the callback with (%v, %v), seen=%v", g, vs, stack, seen.Slice(nil)) 29 | } 30 | } 31 | for _, n := range vs { 32 | if !onStack.Get(n) { 33 | t.Fatalf("Tarjan(%v) calls the callback with (%v, %v), %v is not onStack", g, vs, stack, n) 34 | } 35 | seen.Set(n) 36 | } 37 | }) 38 | if got := len(seen.Slice(nil)); got != len(g) { 39 | t.Errorf("Tarjan(%v) called the callback for %v nodes, want: %v", g, got, len(g)) 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /util/graph/transpose.go: -------------------------------------------------------------------------------- 1 | package graph 2 | 3 | // Transpose reverses all edges in a graph. 4 | func Transpose(g [][]int) [][]int { 5 | n := len(g) 6 | size := make([]int, n) 7 | var total int 8 | for _, edges := range g { 9 | for _, to := range edges { 10 | size[to]++ 11 | } 12 | total += len(edges) 13 | } 14 | pool := make([]int, total) 15 | ret := make([][]int, n) 16 | for i, size := range size { 17 | ret[i] = pool[:0:size] 18 | pool = pool[size:] 19 | } 20 | for from, edges := range g { 21 | for _, to := range edges { 22 | ret[to] = append(ret[to], from) 23 | } 24 | } 25 | return ret 26 | } 27 | -------------------------------------------------------------------------------- /util/graph/transpose_test.go: -------------------------------------------------------------------------------- 1 | package graph 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | var transposeTests = []struct { 9 | g [][]int 10 | want string 11 | }{ 12 | {[][]int{{0}}, "[[0]]"}, // 0<->0 13 | {[][]int{{1}, {}}, "[[] [0]]"}, // 0 <- 1 14 | {[][]int{{1}, {0}}, "[[1] [0]]"}, // 0 <-> 1 15 | {[][]int{{1}, {2}, {0}}, "[[2] [0] [1]]"}, // 0<-1<-2<-0 16 | {[][]int{{0, 1, 2}, {2, 1}, {0}}, "[[0 2] [0 1] [0 1]]"}, 17 | } 18 | 19 | func TestTranspose(t *testing.T) { 20 | for _, tc := range transposeTests { 21 | tg := Transpose(tc.g) 22 | if got := fmt.Sprintf("%v", tg); got != tc.want { 23 | t.Errorf("Transpose(%v) = %v, want: %v", tc.g, got, tc.want) 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /util/sparse/sparse.go: -------------------------------------------------------------------------------- 1 | // Package sparse introduces a notion of sparse sets. 2 | package sparse 3 | 4 | import ( 5 | "slices" 6 | 7 | "github.com/inspirer/textmapper/util/container" 8 | ) 9 | 10 | // Set is an immutable collection of distinct numbers, typically coming from a certain [0, n) range. 11 | // 12 | // Warning: this list is *unordered*. 13 | type Set []int 14 | 15 | // Union computes a union of multiple sets in linear time. 16 | // 17 | // "aux" must have enough bits to capture all the values in "sets". It must contains all zeroes on 18 | // entry and will remain zeroed after this function returns. "reuse" can be provided to reduce 19 | // memory allocations. 20 | func Union(sets []Set, aux container.BitSet, reuse []int) Set { 21 | if len(sets) == 0 { 22 | return nil 23 | } 24 | 25 | var largest, maxSize int 26 | ret := Set(reuse[:0]) 27 | for i, set := range sets { 28 | if len(set) > maxSize { 29 | largest = i 30 | maxSize = len(set) 31 | } 32 | for _, v := range set { 33 | if aux.Get(int(v)) { 34 | continue 35 | } 36 | aux.Set(v) 37 | ret = append(ret, v) 38 | } 39 | } 40 | for _, v := range ret { 41 | aux.Clear(v) 42 | } 43 | if len(ret) == maxSize { 44 | return sets[largest] 45 | } 46 | if cap(reuse) >= len(ret) { 47 | ret = slices.Clone(ret) 48 | } 49 | return ret 50 | } 51 | 52 | // Builder is a reusable type for easy and safe Set instantiation. 53 | type Builder struct { 54 | result Set 55 | seen container.BitSet 56 | } 57 | 58 | // NewBuilder creates a new builder for sets containing numbers in the range of [0, n). 59 | func NewBuilder(n int) *Builder { 60 | return &Builder{ 61 | seen: container.NewBitSet(n), 62 | } 63 | } 64 | 65 | // Add inserts a value into the set deduplicating values as necessary. 66 | func (b *Builder) Add(val int) { 67 | if b.seen.Get(val) { 68 | return 69 | } 70 | b.result = append(b.result, val) 71 | b.seen.Set(val) 72 | } 73 | 74 | // Build returns the constructed set and resets this instance for further use. 75 | func (b *Builder) Build() Set { 76 | for _, v := range b.result { 77 | b.seen.Clear(v) 78 | } 79 | ret := b.result 80 | b.result = nil 81 | return ret 82 | } 83 | -------------------------------------------------------------------------------- /util/sparse/sparse_test.go: -------------------------------------------------------------------------------- 1 | package sparse_test 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/inspirer/textmapper/util/container" 8 | "github.com/inspirer/textmapper/util/sparse" 9 | ) 10 | 11 | func TestSparse(t *testing.T) { 12 | tests := []struct { 13 | sets []sparse.Set 14 | want string 15 | wantBuild string 16 | }{ 17 | { 18 | sets: []sparse.Set{}, 19 | want: "[]", 20 | wantBuild: "[]", 21 | }, 22 | { 23 | sets: []sparse.Set{ 24 | {1, 0, 3}, 25 | {2, 1, 0}, 26 | }, 27 | want: "[1 0 3 2]", 28 | wantBuild: "[1 0 3 2]", 29 | }, 30 | { 31 | sets: []sparse.Set{ 32 | {1, 0}, 33 | {2, 1, 0}, 34 | }, 35 | want: "[2 1 0]", // reusing an existing set 36 | wantBuild: "[1 0 2]", 37 | }, 38 | { 39 | sets: []sparse.Set{ 40 | {2, 3, 4}, 41 | {1, 2, 3}, 42 | {3, 4, 5}, 43 | }, 44 | want: "[2 3 4 1 5]", 45 | wantBuild: "[2 3 4 1 5]", 46 | }, 47 | } 48 | 49 | b := sparse.NewBuilder(16) 50 | for _, tc := range tests { 51 | aux := container.NewBitSet(16) 52 | got := sparse.Union(tc.sets, aux, nil /*reuse*/) 53 | if got := fmt.Sprintf("%v", got); got != tc.want { 54 | t.Errorf("Union(%v) = %v, want: %v", tc.sets, got, tc.want) 55 | } 56 | 57 | // Builder should produce (almost) the same result. 58 | for _, set := range tc.sets { 59 | for _, v := range set { 60 | b.Add(v) 61 | } 62 | } 63 | got = b.Build() 64 | if got := fmt.Sprintf("%v", got); got != tc.wantBuild { 65 | t.Errorf("Build(%v) = %v, want: %v", tc.sets, got, tc.wantBuild) 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /vscode-ext/.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.1.0", 3 | "configurations": [ 4 | { 5 | "name": "Extension", 6 | "type": "extensionHost", 7 | "request": "launch", 8 | "runtimeExecutable": "${execPath}", 9 | "args": ["--extensionDevelopmentPath=${workspaceRoot}" ] 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /vscode-ext/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this extension will be documented in this file. 3 | 4 | ## [0.0.1] - 2017-12-25 5 | ### Added 6 | - Syntax highlighting for Textmapper files (excluding templates). -------------------------------------------------------------------------------- /vscode-ext/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2019 - present Evgeny Gryaznov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /vscode-ext/README.md: -------------------------------------------------------------------------------- 1 | # Textmapper for Visual Studio Code 2 | 3 | This extension adds support for Textmapper grammars to VS Code, including: 4 | 5 | * Syntax highlighting. 6 | * Diagnostics (including grammar ambiguities) 7 | * Go to definition with references. -------------------------------------------------------------------------------- /vscode-ext/extension.ts: -------------------------------------------------------------------------------- 1 | import { ExtensionContext, workspace } from 'vscode'; 2 | 3 | import { 4 | Executable, 5 | LanguageClient, 6 | LanguageClientOptions, 7 | } from 'vscode-languageclient/node'; 8 | 9 | let client: LanguageClient; 10 | 11 | export async function activate(context: ExtensionContext) { 12 | const config = workspace.getConfiguration('textmapper'); 13 | const serverOptions: Executable = { 14 | command: config.get("path"), 15 | args: ["ls"], 16 | }; 17 | const clientOptions: LanguageClientOptions = { 18 | documentSelector: [{ scheme: 'file', language: 'textmapper' }], 19 | diagnosticCollectionName: "textmapper", 20 | }; 21 | 22 | const client = new LanguageClient( 23 | 'tm-lsp', 24 | 'Textmapper', 25 | serverOptions, 26 | clientOptions 27 | ); 28 | 29 | await client.start(); 30 | context.subscriptions.push(client); 31 | } 32 | 33 | export async function deactivate() { 34 | if (client) { 35 | await client.stop(); 36 | } 37 | } -------------------------------------------------------------------------------- /vscode-ext/language-configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "comments": { 3 | "lineComment": "#", 4 | "blockComment": ["/*", "*/"] 5 | }, 6 | "brackets": [ 7 | ["(", ")"] 8 | ], 9 | "autoClosingPairs": [ 10 | ["<", ">"], 11 | ["[", "]"], 12 | ["(", ")"], 13 | ["\"", "\""], 14 | ["'", "'"] 15 | ] 16 | } -------------------------------------------------------------------------------- /vscode-ext/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "textmapper-support", 3 | "displayName": "Textmapper Support", 4 | "description": "Textmapper Language Support", 5 | "version": "0.1.0", 6 | "publisher": "inspirer", 7 | "repository": { 8 | "type": "git", 9 | "url": "https://github.com/inspirer/textmapper" 10 | }, 11 | "engines": { 12 | "vscode": "^1.92.0" 13 | }, 14 | "categories": [ 15 | "Programming Languages" 16 | ], 17 | "main": "./out/extension", 18 | "contributes": { 19 | "languages": [ 20 | { 21 | "id": "textmapper", 22 | "aliases": [ 23 | "Textmapper", 24 | "textmapper" 25 | ], 26 | "extensions": [ 27 | ".tm" 28 | ], 29 | "configuration": "./language-configuration.json" 30 | } 31 | ], 32 | "grammars": [ 33 | { 34 | "path": "./syntaxes/cc.injection.json", 35 | "scopeName": "textmapper.metavars.injection.cc", 36 | "injectTo": [ 37 | "source.textmapper" 38 | ] 39 | }, 40 | { 41 | "path": "./syntaxes/go.injection.json", 42 | "scopeName": "textmapper.metavars.injection.go", 43 | "injectTo": [ 44 | "source.textmapper" 45 | ] 46 | }, 47 | { 48 | "language": "textmapper", 49 | "scopeName": "source.textmapper", 50 | "path": "./syntaxes/textmapper.tmLanguage.json", 51 | "embeddedLanguages": { 52 | "entity.code.textmapper.cc": "cpp", 53 | "entity.code.textmapper.go": "go" 54 | } 55 | } 56 | ], 57 | "configuration": { 58 | "title": "Textmapper configuration", 59 | "properties": { 60 | "textmapper.path": { 61 | "type": "string", 62 | "default": "tmlsp", 63 | "description": "The path to the Textmapper language server" 64 | } 65 | } 66 | } 67 | }, 68 | "scripts": { 69 | "vscode:prepublish": "npm run compile", 70 | "compile": "tsc -b", 71 | "watch": "tsc -b -w" 72 | }, 73 | "dependencies": { 74 | "vscode-languageclient": "^9.0.1" 75 | }, 76 | "devDependencies": { 77 | "@types/node": "^22.1.0", 78 | "@types/vscode": "^1.92.0", 79 | "typescript": "^5.5.4" 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /vscode-ext/syntaxes/cc.injection.json: -------------------------------------------------------------------------------- 1 | { 2 | "scopeName": "textmapper.metavars.injection.cc", 3 | "injectionSelector": "L:entity.code.textmapper.cc", 4 | "patterns": [ 5 | { 6 | "include": "#dollars-keyword" 7 | }, 8 | { 9 | "include": "#dollars-expr" 10 | }, 11 | { 12 | "include": "#location-clause" 13 | } 14 | ], 15 | "repository": { 16 | "dollars-keyword": { 17 | "match": "\\$(\\$|[0-9]+|[a-zA-Z_]([a-zA-Z_0-9\\-]*[a-zA-Z_0-9])?)", 18 | "name": "keyword.dollars.tm" 19 | }, 20 | "location-clause": { 21 | "match": "\\@(\\$|[0-9]+|[a-zA-Z_]([a-zA-Z_0-9\\-]*[a-zA-Z_0-9])?)", 22 | "name": "keyword.ampersand.tm" 23 | }, 24 | "dollars-expr": { 25 | "begin": "\\$\\{", 26 | "end": "\\}", 27 | "name": "keyword.dollars.tm" 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /vscode-ext/syntaxes/go.injection.json: -------------------------------------------------------------------------------- 1 | { 2 | "scopeName": "textmapper.metavars.injection.go", 3 | "injectionSelector": "L:entity.code.textmapper.go", 4 | "patterns": [ 5 | { 6 | "include": "#dollars-keyword" 7 | }, 8 | { 9 | "include": "#dollars-expr" 10 | }, 11 | { 12 | "include": "#location-clause" 13 | } 14 | ], 15 | "repository": { 16 | "dollars-keyword": { 17 | "match": "\\$(\\$|[0-9]+|[a-zA-Z_]([a-zA-Z_0-9\\-]*[a-zA-Z_0-9])?)", 18 | "name": "keyword.dollars.tm" 19 | }, 20 | "location-clause": { 21 | "match": "\\@(\\$|[0-9]+|[a-zA-Z_]([a-zA-Z_0-9\\-]*[a-zA-Z_0-9])?)", 22 | "name": "keyword.ampersand.tm" 23 | }, 24 | "dollars-expr": { 25 | "begin": "\\$\\{", 26 | "end": "\\}", 27 | "name": "keyword.dollars.tm" 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /vscode-ext/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "target": "ES2022", 5 | "lib": [ 6 | "ES2022" 7 | ], 8 | "outDir": "out", 9 | "rootDir": ".", 10 | "sourceMap": true 11 | }, 12 | "exclude": [ 13 | "node_modules" 14 | ], 15 | } --------------------------------------------------------------------------------