├── examples ├── go │ ├── .gitignore │ ├── Makefile │ └── example2.l ├── numbers │ ├── .gitignore │ ├── Makefile │ └── example.l ├── c-like │ ├── Makefile │ ├── example.l │ └── example.y └── calc │ ├── Makefile │ ├── calc.y │ ├── tokenizer.l │ ├── README │ └── fdl-1.3 ├── README.md ├── README ├── all_test.go ├── CONTRIBUTORS ├── AUTHORS ├── lex ├── dfa ├── all_test.go ├── Makefile ├── doc.go ├── example.l ├── example_test.go └── api.go ├── LICENSE ├── Makefile ├── main.go ├── doc.go └── render.go /examples/go/.gitignore: -------------------------------------------------------------------------------- 1 | example2.go 2 | example2 3 | -------------------------------------------------------------------------------- /examples/numbers/.gitignore: -------------------------------------------------------------------------------- 1 | example 2 | example.go 3 | -------------------------------------------------------------------------------- /examples/c-like/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | golex *.l 3 | goyacc *.y 4 | go run *.go 5 | 6 | clean: 7 | rm lex.yy *.go y.output -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | `github.com/cznic/golex` has moved to [`modernc.org/golex`](https://godoc.org/modernc.org/golex) ([vcs](https://gitlab.com/cznic/golex)). 2 | 3 | Please update your import paths to `modernc.org/golex`. 4 | 5 | This repo is now archived. 6 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | This is a goinstall-able mirror of modified code already published at: 2 | http://git.nic.cz/redmine/projects/gogolex/repository 3 | 4 | Online godoc documentation for this tool available at: 5 | http://godoc.org/github.com/cznic/golex 6 | 7 | Installation: 8 | $ go get github.com/cznic/golex 9 | -------------------------------------------------------------------------------- /all_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 The golex Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "testing" 9 | ) 10 | 11 | func TestPlaceholder(t *testing.T) { 12 | t.Log("TODO") //TODO 13 | } 14 | -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | # This file lists people who contributed code to this repository. The AUTHORS 2 | # file lists the copyright holders; this file lists people. 3 | # 4 | # Names should be added to this file like so: 5 | # Name 6 | # 7 | # Please keep the list sorted. 8 | 9 | Alexey Neyhdanov 10 | Bill Thiede 11 | Jan Mercl <0xjnml@gmail.com> 12 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # This file lists authors for copyright purposes. This file is distinct from 2 | # the CONTRIBUTORS files. See the latter for an explanation. 3 | # 4 | # Names should be added to this file as: 5 | # Name or Organization 6 | # 7 | # The email address is not required for organizations. 8 | # 9 | # Please keep the list sorted. 10 | 11 | CZ.NIC z.s.p.o. 12 | Jan Mercl <0xjnml@gmail.com> 13 | -------------------------------------------------------------------------------- /examples/numbers/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | # blame: jnml, labs.nic.cz 6 | 7 | all: example.go 8 | go build $< 9 | 10 | run: all 11 | ./example 12 | 13 | example.go: example.l 14 | golex -t $< | gofmt > $@ 15 | 16 | clean: 17 | rm -f example.go lex.yy.go y.output *~ 18 | 19 | nuke: clean 20 | rm -f example 21 | -------------------------------------------------------------------------------- /examples/go/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | # blame: jnml, labs.nic.cz 6 | 7 | all: example2.go 8 | go build $< 9 | 10 | run: all 11 | ./example2 12 | 13 | example2.go: example2.l 14 | golex -t $< | gofmt > $@ 15 | 16 | clean: 17 | rm -f example2.go lex.yy.go y.output *~ 18 | 19 | nuke: clean 20 | rm -f example2 *.[568] 21 | -------------------------------------------------------------------------------- /examples/calc/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | # blame: jnml, labs.nic.cz 6 | 7 | all: calc.go tokenizer.go 8 | gofmt -l -s -w *.go 9 | go build $^ 10 | 11 | run: all 12 | ./calc 13 | 14 | tokenizer.go: tokenizer.l 15 | golex -o $@ $< 16 | 17 | calc.go: calc.y 18 | goyacc -o $@ $< 19 | 20 | clean: 21 | go clean 22 | rm -f tokenizer.go calc.go lex.yy.go y.go y.output *~ 23 | 24 | nuke: clean 25 | rm -f calc 26 | -------------------------------------------------------------------------------- /lex/dfa: -------------------------------------------------------------------------------- 1 | $ golex -DFA example.l 2 | StartConditions: 3 | INITIAL, scId:0, stateId:1 4 | DFA: 5 | [1] 6 | "\t"..."\n", "\r", " ", --> 2 7 | "0"..."9", --> 3 8 | "A"..."Z", "_", "a"..."e", "g"..."z", "\u0080", --> 4 9 | "f"--> 5 10 | [2] 11 | "\t"..."\n", "\r", " ", --> 2 12 | [3] 13 | "0"..."9", --> 3 14 | [4] 15 | "0"..."9", "A"..."Z", "_", "a"..."z", "\u0080"..."\u0081", --> 4 16 | [5] 17 | "0"..."9", "A"..."Z", "_", "a"..."t", "v"..."z", "\u0080"..."\u0081", --> 4 18 | "u"--> 6 19 | [6] 20 | "0"..."9", "A"..."Z", "_", "a"..."m", "o"..."z", "\u0080"..."\u0081", --> 4 21 | "n"--> 7 22 | [7] 23 | "0"..."9", "A"..."Z", "_", "a"..."b", "d"..."z", "\u0080"..."\u0081", --> 4 24 | "c"--> 8 25 | [8] 26 | "0"..."9", "A"..."Z", "_", "a"..."z", "\u0080"..."\u0081", --> 4 27 | state 2 accepts rule 1 28 | state 3 accepts rule 4 29 | state 4 accepts rule 3 30 | state 5 accepts rule 3 31 | state 6 accepts rule 3 32 | state 7 accepts rule 3 33 | state 8 accepts rule 2 34 | 35 | $ 36 | -------------------------------------------------------------------------------- /lex/all_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2015 The golex Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package lex 6 | 7 | import ( 8 | "fmt" 9 | "os" 10 | "path" 11 | "runtime" 12 | "strings" 13 | "testing" 14 | ) 15 | 16 | func caller(s string, va ...interface{}) { 17 | _, fn, fl, _ := runtime.Caller(2) 18 | fmt.Fprintf(os.Stderr, "caller: %s:%d: ", path.Base(fn), fl) 19 | fmt.Fprintf(os.Stderr, s, va...) 20 | fmt.Fprintln(os.Stderr) 21 | _, fn, fl, _ = runtime.Caller(1) 22 | fmt.Fprintf(os.Stderr, "\tcallee: %s:%d: ", path.Base(fn), fl) 23 | fmt.Fprintln(os.Stderr) 24 | } 25 | 26 | func dbg(s string, va ...interface{}) { 27 | if s == "" { 28 | s = strings.Repeat("%v ", len(va)) 29 | } 30 | _, fn, fl, _ := runtime.Caller(1) 31 | fmt.Fprintf(os.Stderr, "dbg %s:%d: ", path.Base(fn), fl) 32 | fmt.Fprintf(os.Stderr, s, va...) 33 | fmt.Fprintln(os.Stderr) 34 | } 35 | 36 | func TODO(...interface{}) string { 37 | _, fn, fl, _ := runtime.Caller(1) 38 | return fmt.Sprintf("TODO: %s:%d:\n", path.Base(fn), fl) 39 | } 40 | 41 | func use(...interface{}) {} 42 | 43 | // ============================================================================ 44 | 45 | func Test(t *testing.T) { 46 | t.Logf("TODO") 47 | } 48 | -------------------------------------------------------------------------------- /lex/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 The golex Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | .PHONY: all clean cover cpu editor internalError later mem nuke todo 6 | 7 | grep=--include=*.go --include=*.l --include=*.y 8 | 9 | all: editor 10 | go vet || true 11 | golint || true 12 | make todo 13 | 14 | clean: 15 | go clean 16 | rm -f *~ cpu.test mem.test 17 | 18 | cover: 19 | t=$(shell tempfile) ; go test -coverprofile $$t && go tool cover -html $$t && unlink $$t 20 | 21 | cpu: 22 | go test -c -o cpu.test 23 | ./cpu.test -noerr -test.cpuprofile cpu.out 24 | go tool pprof --lines cpu.test cpu.out 25 | 26 | editor: example_test.go 27 | gofmt -l -s -w *.go 28 | go test 29 | go install 30 | 31 | example_test.go: example.l 32 | golex -o $@ $< 33 | 34 | internalError: 35 | egrep -ho '"internal error.*"' *.go | sort | cat -n 36 | 37 | later: 38 | @grep -n $(grep) LATER * || true 39 | @grep -n $(grep) MAYBE * || true 40 | 41 | mem: 42 | go test -c -o mem.test 43 | ./mem.test -test.bench . -test.memprofile mem.out 44 | go tool pprof --lines --web --alloc_space mem.test mem.out 45 | 46 | nuke: clean 47 | go clean -i 48 | 49 | todo: 50 | @grep -nr $(grep) ^[[:space:]]*_[[:space:]]*=[[:space:]][[:alpha:]][[:alnum:]]* * || true 51 | @grep -nr $(grep) TODO * || true 52 | @grep -nr $(grep) BUG * || true 53 | @grep -nr $(grep) [^[:alpha:]]println * || true 54 | -------------------------------------------------------------------------------- /examples/numbers/example.l: -------------------------------------------------------------------------------- 1 | %{ 2 | // Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved. 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file. 5 | 6 | // blame: jnml, labs.nic.cz 7 | 8 | // +build ignore 9 | 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "os" 16 | ) 17 | 18 | var ( 19 | src = bufio.NewReader(os.Stdin) 20 | buf []byte 21 | current byte 22 | ) 23 | 24 | func getc() byte { 25 | if current != 0 { 26 | buf = append(buf, current) 27 | } 28 | current = 0 29 | if b, err := src.ReadByte(); err == nil { 30 | current = b 31 | } 32 | return current 33 | } 34 | 35 | func main() { // This left brace is closed by *1 36 | c := getc() // init 37 | %} 38 | 39 | %yyc c 40 | %yyn c = getc() 41 | 42 | D [0-9]+ 43 | 44 | %% 45 | buf = buf[:0] // The code before the first rule executed before every scan cycle (rule #0 / state 0 action) 46 | 47 | [ \t\n\r]+ 48 | 49 | {D} fmt.Printf("int %q\n", buf) 50 | 51 | {D}\.{D}?|\.{D} fmt.Printf("float %q\n", buf) 52 | 53 | \0 return 54 | 55 | . fmt.Printf("%q\n", buf) 56 | 57 | %% 58 | // The golex generated scanner enters top of the user code section when 59 | // lexem recongition fails. In this example it should never happen. 60 | panic("scanner internal error") 61 | 62 | } // *1 this right brace 63 | -------------------------------------------------------------------------------- /examples/calc/calc.y: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | This file is a modified excerpt from the GNU Bison Manual examples originally found here: 4 | http://www.gnu.org/software/bison/manual/html_node/Infix-Calc.html#Infix-Calc 5 | 6 | The Copyright License for the GNU Bison Manual can be found in the "fdl-1.3" file. 7 | 8 | */ 9 | 10 | /* Infix notation calculator. */ 11 | 12 | %{ 13 | 14 | // +build ignore 15 | 16 | package main 17 | 18 | import ( 19 | "bufio" 20 | "fmt" 21 | "math" 22 | "os" 23 | ) 24 | 25 | %} 26 | 27 | %union{ 28 | value float64 29 | } 30 | 31 | %token NUM 32 | 33 | %left '-' '+' 34 | %left '*' '/' 35 | %left NEG /* negation--unary minus */ 36 | %right '^' /* exponentiation */ 37 | 38 | %type NUM, exp 39 | 40 | %% /* The grammar follows. */ 41 | 42 | input: /* empty */ 43 | | input line 44 | ; 45 | 46 | line: '\n' 47 | | exp '\n' { fmt.Printf("\t%.10g\n", $1) } 48 | ; 49 | 50 | exp: NUM { $$ = $1 } 51 | | exp '+' exp { $$ = $1 + $3 } 52 | | exp '-' exp { $$ = $1 - $3 } 53 | | exp '*' exp { $$ = $1 * $3 } 54 | | exp '/' exp { $$ = $1 / $3 } 55 | | '-' exp %prec NEG { $$ = -$2 } 56 | | exp '^' exp { $$ = math.Pow($1, $3) } 57 | | '(' exp ')' { $$ = $2; } 58 | ; 59 | %% 60 | 61 | func main() { 62 | os.Exit(yyParse(newLexer(bufio.NewReader(os.Stdin)))) 63 | } 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 The golex Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the names of the authors nor the names of the 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all clean cover cpu editor internalError later mem nuke todo edit 2 | 3 | grep=--include=*.go --include=*.l --include=*.y --include=*.yy 4 | ngrep='TODOOK\|parser\.go\|scanner\.go\|.*_string\.go' 5 | 6 | all: editor 7 | go vet 2>&1 | grep -v $(ngrep) || true 8 | golint 2>&1 | grep -v $(ngrep) || true 9 | make todo 10 | unused . || true 11 | misspell *.go 12 | gosimple || true 13 | maligned || true 14 | unconvert -apply 15 | 16 | clean: 17 | go clean 18 | rm -f *~ *.test *.out 19 | 20 | cover: 21 | t=$(shell tempfile) ; go test -coverprofile $$t && go tool cover -html $$t && unlink $$t 22 | 23 | cpu: clean 24 | go test -run @ -bench . -cpuprofile cpu.out 25 | go tool pprof -lines *.test cpu.out 26 | 27 | edit: 28 | @ 1>/dev/null 2>/dev/null gvim -p Makefile *.go 29 | 30 | editor: 31 | gofmt -l -s -w *.go 32 | go test -i 33 | go test 2>&1 | tee log 34 | go build 35 | 36 | internalError: 37 | egrep -ho '"internal error.*"' *.go | sort | cat -n 38 | 39 | later: 40 | @grep -n $(grep) LATER * || true 41 | @grep -n $(grep) MAYBE * || true 42 | 43 | mem: clean 44 | go test -run @ -bench . -memprofile mem.out -memprofilerate 1 -timeout 24h 45 | go tool pprof -lines -web -alloc_space *.test mem.out 46 | 47 | nuke: clean 48 | go clean -i 49 | 50 | todo: 51 | @grep -nr $(grep) ^[[:space:]]*_[[:space:]]*=[[:space:]][[:alpha:]][[:alnum:]]* * | grep -v $(ngrep) || true 52 | @grep -nr $(grep) TODO * | grep -v $(ngrep) || true 53 | @grep -nr $(grep) BUG * | grep -v $(ngrep) || true 54 | @grep -nr $(grep) [^[:alpha:]]println * | grep -v $(ngrep) || true 55 | -------------------------------------------------------------------------------- /examples/calc/tokenizer.l: -------------------------------------------------------------------------------- 1 | %{ 2 | // Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved. 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file. 5 | 6 | // blame: jnml, labs.nic.cz 7 | 8 | 9 | package main 10 | 11 | import ( 12 | "bufio" 13 | "log" 14 | "strconv" 15 | ) 16 | 17 | type yylexer struct{ 18 | src *bufio.Reader 19 | buf []byte 20 | empty bool 21 | current byte 22 | } 23 | 24 | func newLexer(src *bufio.Reader) (y *yylexer) { 25 | y = &yylexer{src: src} 26 | if b, err := src.ReadByte(); err == nil { 27 | y.current = b 28 | } 29 | return 30 | } 31 | 32 | func (y *yylexer) getc() byte { 33 | if y.current != 0 { 34 | y.buf = append(y.buf, y.current) 35 | } 36 | y.current = 0 37 | if b, err := y.src.ReadByte(); err == nil { 38 | y.current = b 39 | } 40 | return y.current 41 | } 42 | 43 | func (y yylexer) Error(e string) { 44 | log.Fatal(e) 45 | } 46 | 47 | func (y *yylexer) Lex(lval *yySymType) int { 48 | var err error 49 | c := y.current 50 | if y.empty { 51 | c, y.empty = y.getc(), false 52 | } 53 | %} 54 | 55 | %yyc c 56 | %yyn c = y.getc() 57 | 58 | D [0-9]+ 59 | E [eE][-+]?{D} 60 | F {D}"."{D}?{E}?|{D}{E}?|"."{D}{E}? 61 | 62 | %% 63 | y.buf = y.buf[:0] 64 | 65 | [ \t\r]+ 66 | 67 | {F} 68 | if lval.value, err = strconv.ParseFloat(string(y.buf), 64); err != nil { 69 | log.Fatal(err) 70 | } 71 | 72 | return NUM 73 | 74 | %% 75 | y.empty = true 76 | return int(c) 77 | } 78 | -------------------------------------------------------------------------------- /lex/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2015 The golex Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package lex is a Unicode-friendly run time library for golex[0] generated 6 | // lexical analyzers[1]. 7 | // 8 | // Changelog 9 | // 10 | // 2015-04-08: Initial release. 11 | // 12 | // Character classes 13 | // 14 | // Golex internally handles only 8 bit "characters". Many Unicode-aware 15 | // tokenizers do not actually need to recognize every Unicode rune, but only 16 | // some particular partitions/subsets. Like, for example, a particular Unicode 17 | // category, say upper case letters: Lu. 18 | // 19 | // The idea is to convert all runes in a particular set as a single 8 bit 20 | // character allocated outside the ASCII range of codes. The token value, a 21 | // string of runes and their exact positions is collected as usual (see the 22 | // Token and TokenBytes method), but the tokenizer DFA is simpler (and thus 23 | // smaller and perhaps also faster) when this technique is used. In the example 24 | // program (see below), recognizing (and skipping) white space, integer 25 | // literals, one keyword and Go identifiers requires only an 8 state DFA[5]. 26 | // 27 | // To provide the conversion from runes to character classes, "install" your 28 | // converting function using the RuneClass option. 29 | // 30 | // References 31 | // 32 | // - 33 | // 34 | // [0]: http://godoc.org/github.com/cznic/golex 35 | // [1]: http://en.wikipedia.org/wiki/Lexical_analysis 36 | // [2]: http://golang.org/cmd/yacc/ 37 | // [3]: https://github.com/cznic/golex/blob/master/lex/example.l 38 | // [4]: http://golang.org/pkg/io/#RuneReader 39 | // [5]: https://github.com/cznic/golex/blob/master/lex/dfa 40 | package lex 41 | -------------------------------------------------------------------------------- /examples/calc/README: -------------------------------------------------------------------------------- 1 | Example for binding golex and goyacc. 2 | 3 | $ # To run it 4 | $ make run 5 | 6 | ---- 7 | 8 | The calc example is also useful as a demonstration of the performed DFA 9 | optmimization. 10 | 11 | $ golex -DFA -nodfaopt tokenizer.l 12 | StartConditions: 13 | INITIAL, scId:0, stateId:1 14 | DFA: 15 | [1] 16 | "\t", "\r", " ", --> 11 17 | "0"..."9", --> 2 18 | "."--> 12 19 | [2] 20 | "E", "e", --> 8 21 | "."--> 3 22 | "0"..."9", --> 2 23 | [3] 24 | "E", "e", --> 5 25 | "0"..."9", --> 4 26 | [4] 27 | "0"..."9", --> 4 28 | "E", "e", --> 5 29 | [5] 30 | "0"..."9", --> 6 31 | "+", "-", --> 7 32 | [6] 33 | "0"..."9", --> 6 34 | [7] 35 | "0"..."9", --> 6 36 | [8] 37 | "+", "-", --> 9 38 | "0"..."9", --> 10 39 | [9] 40 | "0"..."9", --> 10 41 | [10] 42 | "0"..."9", --> 10 43 | [11] 44 | "\t", "\r", " ", --> 11 45 | [12] 46 | "0"..."9", --> 13 47 | [13] 48 | "0"..."9", --> 13 49 | "E", "e", --> 14 50 | [14] 51 | "+", "-", --> 16 52 | "0"..."9", --> 15 53 | [15] 54 | "0"..."9", --> 15 55 | [16] 56 | "0"..."9", --> 15 57 | state 3 accepts rule 2 58 | state 6 accepts rule 2 59 | state 2 accepts rule 2 60 | state 15 accepts rule 2 61 | state 13 accepts rule 2 62 | state 10 accepts rule 2 63 | state 11 accepts rule 1 64 | state 4 accepts rule 2 65 | 66 | ---- vs ---- 67 | 68 | $ golex -DFA tokenizer.l 69 | StartConditions: 70 | INITIAL, scId:0, stateId:1 71 | DFA: 72 | [1] 73 | "0"..."9", --> 8 74 | "."--> 3 75 | "\t", "\r", " ", --> 2 76 | [2] 77 | "\t", "\r", " ", --> 2 78 | [3] 79 | "0"..."9", --> 4 80 | [4] 81 | "E", "e", --> 5 82 | "0"..."9", --> 4 83 | [5] 84 | "0"..."9", --> 7 85 | "+", "-", --> 6 86 | [6] 87 | "0"..."9", --> 7 88 | [7] 89 | "0"..."9", --> 7 90 | [8] 91 | "."--> 4 92 | "E", "e", --> 5 93 | "0"..."9", --> 8 94 | state 2 accepts rule 1 95 | state 8 accepts rule 2 96 | state 7 accepts rule 2 97 | state 4 accepts rule 2 98 | 99 | $ 100 | -------------------------------------------------------------------------------- /examples/c-like/example.l: -------------------------------------------------------------------------------- 1 | %{ 2 | // Copyright (c) 2015 The golex Authors. All rights reserved. 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file. 5 | 6 | // This is an example program using golex run time library. 7 | package main 8 | 9 | import ( 10 | "bufio" 11 | "go/token" 12 | "io" 13 | "unicode" 14 | 15 | "github.com/cznic/golex/lex" 16 | ) 17 | 18 | // Allocate Character classes anywhere in [0x80, 0xFF]. 19 | const ( 20 | classUnicodeLeter = iota + 0x80 21 | classUnicodeDigit 22 | classOther 23 | ) 24 | 25 | type lexer struct { 26 | *lex.Lexer 27 | } 28 | 29 | func rune2Class(r rune) int { 30 | if r >= 0 && r < 0x80 { // Keep ASCII as it is. 31 | return int(r) 32 | } 33 | if unicode.IsLetter(r) { 34 | return classUnicodeLeter 35 | } 36 | if unicode.IsDigit(r) { 37 | return classUnicodeDigit 38 | } 39 | return classOther 40 | } 41 | 42 | func newLexer(src io.Reader, dst io.Writer, fName string) *lexer { 43 | file := token.NewFileSet().AddFile(fName, -1, 1<<31-1) 44 | lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(rune2Class)) 45 | if (err != nil) { panic(err) } 46 | return &lexer{lx} 47 | } 48 | 49 | func (l *lexer) Lex(lval *yySymType) int { 50 | c := l.Enter() 51 | 52 | %} 53 | 54 | %yyc c 55 | %yyn c = l.Next() 56 | %yym l.Mark() 57 | 58 | digit [0-9]|{unicodeDigit} 59 | identifier {letter}({letter}|{digit})* 60 | int {digit}+ 61 | letter [_a-zA-Z]|{unicodeLetter} 62 | unicodeDigit \x81 63 | unicodeLetter \x80 64 | op [-+*/] 65 | 66 | %% 67 | c = l.Rule0() 68 | 69 | [ \t\r\n]+ 70 | [/][/][^\n]+ lval.token = string(l.TokenBytes(nil)); return COMMENT 71 | func lval.token = string(l.TokenBytes(nil)); return FUNC 72 | {identifier} lval.token = string(l.TokenBytes(nil)); return IDENT 73 | {int} lval.token = string(l.TokenBytes(nil)); return INT 74 | {op} lval.token = string(l.TokenBytes(nil)); return OP 75 | 76 | %% 77 | if c, ok := l.Abort(); ok { return int(c) } 78 | goto yyAction 79 | } 80 | -------------------------------------------------------------------------------- /examples/c-like/example.y: -------------------------------------------------------------------------------- 1 | %{ 2 | package main 3 | 4 | import ( 5 | "bytes" 6 | "fmt" 7 | "os" 8 | "io" 9 | ) 10 | 11 | type node struct { 12 | name string 13 | children []node 14 | } 15 | 16 | func (n node) String() string { 17 | buf := new(bytes.Buffer) 18 | n.print(buf, " ") 19 | return buf.String() 20 | } 21 | 22 | func (n node) print(out io.Writer, indent string) { 23 | fmt.Fprintf(out, "\n%v%v", indent, n.name) 24 | for _, nn := range n.children { nn.print(out, indent + " ") } 25 | } 26 | 27 | func Node(name string) node { return node{name: name} } 28 | func (n node) append(nn...node) node { n.children = append(n.children, nn...); return n } 29 | 30 | %} 31 | 32 | %union{ 33 | node node 34 | token string 35 | } 36 | 37 | %token FUNC INT IDENT OP COMMENT 38 | 39 | %type FUNC INT IDENT OP COMMENT 40 | %type Input Func Args Statements Expr Call ExprList Statement 41 | 42 | %% 43 | 44 | Input: /* empty */ { } 45 | | Input Func { fmt.Println($2) } 46 | 47 | Func: FUNC IDENT '(' Args ')' '{' Statements '}' { $$ = Node("func").append(Node("name").append(Node($2))).append($4, $7) } 48 | 49 | Args: /* empty */ { $$ = Node("args") } 50 | | Args ',' IDENT { $$ = $1.append(Node($3)) } 51 | 52 | Statements: /* empty */ { $$ = Node("statements") } 53 | | Statements Statement { $$ = $1.append($2) } 54 | 55 | Statement: IDENT '=' Expr { $$ = Node("assign").append(Node($1), $3) } 56 | | COMMENT { $$ = Node($1) } 57 | 58 | Expr: INT { $$ = Node($1) } 59 | | Call 60 | | Expr OP INT { $$ = Node($2).append($1, Node($3)) } 61 | | Expr OP Call { $$ = Node($2).append($1, $3) } 62 | 63 | Call: IDENT '(' ')' { $$ = Node("call").append(Node("name").append(Node($1))) } 64 | | IDENT '(' ExprList ')' { $$ = Node("call").append(Node("name").append(Node($1))).append($3) } 65 | 66 | ExprList: Expr { $$ = Node("expressions").append($1) } 67 | | ExprList ',' Expr { $$ = $1.append($3) } 68 | 69 | %% 70 | 71 | const src = ` 72 | 73 | func A() { // Just an example 74 | a = Привет(42, pi()) / 2 75 | } 76 | 77 | ` 78 | 79 | func main() { 80 | yyDebug = 0 81 | yyErrorVerbose = true 82 | l := newLexer(bytes.NewBufferString(src), os.Stdout, "file.name") 83 | yyParse(l) 84 | } 85 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 The golex Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "bufio" 9 | "bytes" 10 | "flag" 11 | "fmt" 12 | "go/format" 13 | "io" 14 | "log" 15 | "os" 16 | 17 | "github.com/cznic/lex" 18 | ) 19 | 20 | const ( 21 | oFile = "lex.yy.go" 22 | ) 23 | 24 | var ( 25 | stdin = bufio.NewReader(os.Stdin) 26 | stdout = bufio.NewWriter(os.Stdout) 27 | stderr = bufio.NewWriter(os.Stderr) 28 | ) 29 | 30 | type renderer interface { 31 | render(srcname string, l *lex.L) 32 | } 33 | 34 | type writer interface { 35 | io.Writer 36 | wprintf(s string, args ...interface{}) (n int, err error) 37 | } 38 | 39 | type noRender struct { 40 | w io.Writer 41 | } 42 | 43 | func (r *noRender) Write(p []byte) (n int, err error) { 44 | return r.w.Write(p) 45 | } 46 | 47 | func (r *noRender) wprintf(s string, args ...interface{}) (n int, err error) { 48 | n, err = io.WriteString(r.w, fmt.Sprintf(s, args...)) 49 | if err != nil { 50 | log.Fatal(err) 51 | } 52 | 53 | return 54 | } 55 | 56 | func q(c uint32) string { 57 | switch c { 58 | default: 59 | s := fmt.Sprintf("%q", string(c)) 60 | return "'" + s[1:len(s)-1] + "'" 61 | case '\'': 62 | return "'\\''" 63 | case '"': 64 | return "'\"'" 65 | } 66 | } 67 | 68 | func main() { 69 | log.SetFlags(log.Flags() | log.Lshortfile) 70 | oflag := "" 71 | var dfaflag, hflag, tflag, vflag, nodfaopt, bits32 bool 72 | 73 | flag.BoolVar(&dfaflag, "DFA", false, "write DFA on stdout and quit") 74 | flag.BoolVar(&hflag, "h", false, "show help and exit") 75 | flag.StringVar(&oflag, "o", oFile, "lexer output") 76 | flag.BoolVar(&tflag, "t", false, "write scanner on stdout instead of "+oFile) 77 | flag.BoolVar(&vflag, "v", false, "write summary of scanner statistics to stderr") 78 | flag.BoolVar(&nodfaopt, "nodfaopt", false, "disable DFA optimization - don't use this for production code") 79 | //flag.BoolVar(&bits32, "32bit", false, "assume unicode rune lexer (partially implemented)") 80 | flag.Parse() 81 | if hflag || flag.NArg() > 1 { 82 | flag.Usage() 83 | fmt.Fprintf(stderr, "\n%s [-o out_name] [other_options] [in_name]\n", os.Args[0]) 84 | fmt.Fprintln(stderr, " If no in_name is given then read from stdin.") 85 | stderr.Flush() 86 | os.Exit(1) 87 | } 88 | 89 | var ( 90 | lfile *bufio.Reader // source .l 91 | gofile *bufio.Writer // dest .go 92 | ) 93 | 94 | lname := flag.Arg(0) 95 | if lname == "" { 96 | lfile = stdin 97 | } else { 98 | l, err := os.Open(lname) 99 | if err != nil { 100 | log.Fatal(err) 101 | } 102 | 103 | defer l.Close() 104 | lfile = bufio.NewReader(l) 105 | } 106 | 107 | l, err := lex.NewL(lname, lfile, nodfaopt, bits32) 108 | if err != nil { 109 | log.Fatal(err) 110 | } 111 | 112 | if dfaflag { 113 | fmt.Println(l.DfaString()) 114 | os.Exit(1) 115 | } 116 | 117 | if tflag { 118 | gofile = stdout 119 | } else { 120 | if oflag == "" { 121 | oflag = oFile 122 | } 123 | g, err := os.Create(oflag) 124 | if err != nil { 125 | log.Fatal(err) 126 | } 127 | 128 | defer g.Close() 129 | gofile = bufio.NewWriter(g) 130 | } 131 | defer gofile.Flush() 132 | var buf bytes.Buffer 133 | renderGo{noRender{&buf}, map[int]bool{}}.render(lname, l) 134 | dst, err := format.Source(buf.Bytes()) 135 | switch { 136 | case err != nil: 137 | fmt.Fprintf(os.Stderr, "%v\n", err) 138 | if _, err := gofile.Write(buf.Bytes()); err != nil { 139 | log.Fatal(err) 140 | } 141 | default: 142 | if _, err := gofile.Write(dst); err != nil { 143 | log.Fatal(err) 144 | } 145 | } 146 | 147 | if vflag { 148 | fmt.Fprintln(os.Stderr, l.String()) 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /lex/example.l: -------------------------------------------------------------------------------- 1 | %yyc c 2 | %yyn c = l.Next() 3 | %yym l.Mark() 4 | 5 | %{ 6 | // Copyright (c) 2015 The golex Authors. All rights reserved. 7 | // Use of this source code is governed by a BSD-style 8 | // license that can be found in the LICENSE file. 9 | 10 | // This is an example program using golex run time library. It is generated by 11 | // 12 | // $ golex -o example_test.go example.l 13 | // 14 | // The complete input file, example.l, is at [3], the scan function excerpt is: 15 | // 16 | // func (l *lexer) scan() lex.Char { 17 | // c := l.Enter() 18 | // %} 19 | // 20 | // digit [0-9]|{unicodeDigit} 21 | // identifier {letter}({letter}|{digit})* 22 | // int [0-9]+ 23 | // letter [_a-zA-Z]|{unicodeLetter} 24 | // unicodeDigit \x81 25 | // unicodeLetter \x80 26 | // 27 | // %% 28 | // 29 | // c = l.Rule0() 30 | // 31 | // [ \t\r\n]+ 32 | // 33 | // func return l.char(FUNC) 34 | // {identifier} return l.char(IDENT) 35 | // {int} return l.char(INT) 36 | // 37 | // 38 | // %% 39 | // if c, ok := l.Abort(); ok { 40 | // return l.char(c) 41 | // } 42 | // 43 | // goto yyAction 44 | // } 45 | package lex_test 46 | 47 | import ( 48 | "bytes" 49 | "fmt" 50 | "go/token" 51 | "unicode" 52 | 53 | "github.com/cznic/golex/lex" 54 | ) 55 | 56 | // Allocate Character classes anywhere in [0x80, 0xFF]. 57 | const ( 58 | classUnicodeLeter = iota + 0x80 59 | classUnicodeDigit 60 | classOther 61 | ) 62 | 63 | // Parser token values. 64 | const ( 65 | FUNC = iota + 0xE002 66 | INT 67 | IDENT 68 | ) 69 | 70 | // For pretty printing. 71 | func str(r rune) string { 72 | switch r { 73 | case FUNC: 74 | return "FUNC" 75 | case INT: 76 | return "INT" 77 | case IDENT: 78 | return "IDENT" 79 | case lex.RuneEOF: 80 | return "EOF" 81 | } 82 | 83 | return fmt.Sprintf("%q", r) 84 | } 85 | 86 | type lexer struct { 87 | *lex.Lexer 88 | } 89 | 90 | func (l *lexer) char(r int) lex.Char { 91 | return lex.NewChar(l.First.Pos(), rune(r)) 92 | } 93 | 94 | func rune2Class(r rune) int { 95 | if r >= 0 && r < 0x80 { // Keep ASCII as it is. 96 | return int(r) 97 | } 98 | 99 | if unicode.IsLetter(r) { 100 | return classUnicodeLeter 101 | } 102 | 103 | if unicode.IsDigit(r) { 104 | return classUnicodeDigit 105 | } 106 | 107 | return classOther 108 | } 109 | 110 | const src = ` 111 | 112 | func Xφ42() int { return 314 } 113 | 114 | ` 115 | 116 | func Example_completeGeneratedProgram() { // main 117 | fset := token.NewFileSet() 118 | file := fset.AddFile("example.go", -1, len(src)) 119 | src := bytes.NewBufferString(src) 120 | lx, err := lex.New(file, src, lex.RuneClass(rune2Class)) 121 | if err != nil { 122 | panic(err) 123 | } 124 | 125 | l := &lexer{lx} 126 | for { 127 | c := l.scan() 128 | fmt.Printf("%v: %v %q\n", file.Position(c.Pos()), str(c.Rune), l.TokenBytes(nil)) 129 | if c.Rune == lex.RuneEOF { 130 | return 131 | } 132 | } 133 | // Output: 134 | // example.go:3:1: FUNC "func" 135 | // example.go:3:6: IDENT "Xφ42" 136 | // example.go:3:11: '(' "(" 137 | // example.go:3:12: ')' ")" 138 | // example.go:3:14: IDENT "int" 139 | // example.go:3:18: '{' "{" 140 | // example.go:3:20: IDENT "return" 141 | // example.go:3:27: INT "314" 142 | // example.go:3:31: '}' "}" 143 | // example.go:4:2: EOF "\xff" 144 | } 145 | 146 | 147 | func (l *lexer) scan() lex.Char { 148 | c := l.Enter() 149 | %} 150 | 151 | digit [0-9]|{unicodeDigit} 152 | identifier {letter}({letter}|{digit})* 153 | int [0-9]+ 154 | letter [_a-zA-Z]|{unicodeLetter} 155 | unicodeDigit \x81 156 | unicodeLetter \x80 157 | 158 | %% 159 | 160 | c = l.Rule0() 161 | 162 | [ \t\r\n]+ 163 | 164 | func return l.char(FUNC) 165 | {identifier} return l.char(IDENT) 166 | {int} return l.char(INT) 167 | 168 | 169 | %% 170 | if c, ok := l.Abort(); ok { 171 | return l.char(c) 172 | } 173 | 174 | goto yyAction 175 | } 176 | -------------------------------------------------------------------------------- /lex/example_test.go: -------------------------------------------------------------------------------- 1 | // CAUTION: Generated file - DO NOT EDIT. 2 | 3 | // Copyright (c) 2015 The golex Authors. All rights reserved. 4 | // Use of this source code is governed by a BSD-style 5 | // license that can be found in the LICENSE file. 6 | 7 | // This is an example program using golex run time library. It is generated by 8 | // 9 | // $ golex -o example_test.go example.l 10 | // 11 | // The complete input file, example.l, is at [3], the scan function excerpt is: 12 | // 13 | // func (l *lexer) scan() lex.Char { 14 | // c := l.Enter() 15 | // %} 16 | // 17 | // digit [0-9]|{unicodeDigit} 18 | // identifier {letter}({letter}|{digit})* 19 | // int [0-9]+ 20 | // letter [_a-zA-Z]|{unicodeLetter} 21 | // unicodeDigit \x81 22 | // unicodeLetter \x80 23 | // 24 | // %% 25 | // 26 | // c = l.Rule0() 27 | // 28 | // [ \t\r\n]+ 29 | // 30 | // func return l.char(FUNC) 31 | // {identifier} return l.char(IDENT) 32 | // {int} return l.char(INT) 33 | // 34 | // 35 | // %% 36 | // if c, ok := l.Abort(); ok { 37 | // return l.char(c) 38 | // } 39 | // 40 | // goto yyAction 41 | // } 42 | package lex_test 43 | 44 | import ( 45 | "bytes" 46 | "fmt" 47 | "go/token" 48 | "unicode" 49 | 50 | "github.com/cznic/golex/lex" 51 | ) 52 | 53 | // Allocate Character classes anywhere in [0x80, 0xFF]. 54 | const ( 55 | classUnicodeLeter = iota + 0x80 56 | classUnicodeDigit 57 | classOther 58 | ) 59 | 60 | // Parser token values. 61 | const ( 62 | FUNC = iota + 0xE002 63 | INT 64 | IDENT 65 | ) 66 | 67 | // For pretty printing. 68 | func str(r rune) string { 69 | switch r { 70 | case FUNC: 71 | return "FUNC" 72 | case INT: 73 | return "INT" 74 | case IDENT: 75 | return "IDENT" 76 | case lex.RuneEOF: 77 | return "EOF" 78 | } 79 | 80 | return fmt.Sprintf("%q", r) 81 | } 82 | 83 | type lexer struct { 84 | *lex.Lexer 85 | } 86 | 87 | func (l *lexer) char(r int) lex.Char { 88 | return lex.NewChar(l.First.Pos(), rune(r)) 89 | } 90 | 91 | func rune2Class(r rune) int { 92 | if r >= 0 && r < 0x80 { // Keep ASCII as it is. 93 | return int(r) 94 | } 95 | 96 | if unicode.IsLetter(r) { 97 | return classUnicodeLeter 98 | } 99 | 100 | if unicode.IsDigit(r) { 101 | return classUnicodeDigit 102 | } 103 | 104 | return classOther 105 | } 106 | 107 | const src = ` 108 | 109 | func Xφ42() int { return 314 } 110 | 111 | ` 112 | 113 | func Example_completeGeneratedProgram() { // main 114 | fset := token.NewFileSet() 115 | file := fset.AddFile("example.go", -1, len(src)) 116 | src := bytes.NewBufferString(src) 117 | lx, err := lex.New(file, src, lex.RuneClass(rune2Class)) 118 | if err != nil { 119 | panic(err) 120 | } 121 | 122 | l := &lexer{lx} 123 | for { 124 | c := l.scan() 125 | fmt.Printf("%v: %v %q\n", file.Position(c.Pos()), str(c.Rune), l.TokenBytes(nil)) 126 | if c.Rune == lex.RuneEOF { 127 | return 128 | } 129 | } 130 | // Output: 131 | // example.go:3:1: FUNC "func" 132 | // example.go:3:6: IDENT "Xφ42" 133 | // example.go:3:11: '(' "(" 134 | // example.go:3:12: ')' ")" 135 | // example.go:3:14: IDENT "int" 136 | // example.go:3:18: '{' "{" 137 | // example.go:3:20: IDENT "return" 138 | // example.go:3:27: INT "314" 139 | // example.go:3:31: '}' "}" 140 | // example.go:4:2: EOF "\xff" 141 | } 142 | 143 | func (l *lexer) scan() lex.Char { 144 | c := l.Enter() 145 | 146 | yystate0: 147 | yyrule := -1 148 | _ = yyrule 149 | c = l.Rule0() 150 | 151 | goto yystart1 152 | 153 | goto yystate0 // silence unused label error 154 | goto yyAction // silence unused label error 155 | yyAction: 156 | switch yyrule { 157 | case 1: 158 | goto yyrule1 159 | case 2: 160 | goto yyrule2 161 | case 3: 162 | goto yyrule3 163 | case 4: 164 | goto yyrule4 165 | } 166 | goto yystate1 // silence unused label error 167 | yystate1: 168 | c = l.Next() 169 | yystart1: 170 | switch { 171 | default: 172 | goto yyabort 173 | case c == '\t' || c == '\n' || c == '\r' || c == ' ': 174 | goto yystate2 175 | case c == 'f': 176 | goto yystate5 177 | case c >= '0' && c <= '9': 178 | goto yystate3 179 | case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'e' || c >= 'g' && c <= 'z' || c == '\u0080': 180 | goto yystate4 181 | } 182 | 183 | yystate2: 184 | c = l.Next() 185 | yyrule = 1 186 | l.Mark() 187 | switch { 188 | default: 189 | goto yyrule1 190 | case c == '\t' || c == '\n' || c == '\r' || c == ' ': 191 | goto yystate2 192 | } 193 | 194 | yystate3: 195 | c = l.Next() 196 | yyrule = 4 197 | l.Mark() 198 | switch { 199 | default: 200 | goto yyrule4 201 | case c >= '0' && c <= '9': 202 | goto yystate3 203 | } 204 | 205 | yystate4: 206 | c = l.Next() 207 | yyrule = 3 208 | l.Mark() 209 | switch { 210 | default: 211 | goto yyrule3 212 | case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c == '\u0080' || c == '\u0081': 213 | goto yystate4 214 | } 215 | 216 | yystate5: 217 | c = l.Next() 218 | yyrule = 3 219 | l.Mark() 220 | switch { 221 | default: 222 | goto yyrule3 223 | case c == 'u': 224 | goto yystate6 225 | case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 't' || c >= 'v' && c <= 'z' || c == '\u0080' || c == '\u0081': 226 | goto yystate4 227 | } 228 | 229 | yystate6: 230 | c = l.Next() 231 | yyrule = 3 232 | l.Mark() 233 | switch { 234 | default: 235 | goto yyrule3 236 | case c == 'n': 237 | goto yystate7 238 | case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'm' || c >= 'o' && c <= 'z' || c == '\u0080' || c == '\u0081': 239 | goto yystate4 240 | } 241 | 242 | yystate7: 243 | c = l.Next() 244 | yyrule = 3 245 | l.Mark() 246 | switch { 247 | default: 248 | goto yyrule3 249 | case c == 'c': 250 | goto yystate8 251 | case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c == 'a' || c == 'b' || c >= 'd' && c <= 'z' || c == '\u0080' || c == '\u0081': 252 | goto yystate4 253 | } 254 | 255 | yystate8: 256 | c = l.Next() 257 | yyrule = 2 258 | l.Mark() 259 | switch { 260 | default: 261 | goto yyrule2 262 | case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c == '\u0080' || c == '\u0081': 263 | goto yystate4 264 | } 265 | 266 | yyrule1: // [ \t\r\n]+ 267 | 268 | goto yystate0 269 | yyrule2: // func 270 | { 271 | return l.char(FUNC) 272 | } 273 | yyrule3: // {identifier} 274 | { 275 | return l.char(IDENT) 276 | } 277 | yyrule4: // {int} 278 | { 279 | return l.char(INT) 280 | } 281 | panic("unreachable") 282 | 283 | goto yyabort // silence unused label error 284 | 285 | yyabort: // no lexem recognized 286 | if c, ok := l.Abort(); ok { 287 | return l.char(c) 288 | } 289 | 290 | goto yyAction 291 | } 292 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 The golex Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Golex is a lex/flex like (not fully POSIX lex compatible) utility. 6 | // It renders .l formated data (http://flex.sourceforge.net/manual/Format.html#Format) to Go source code. 7 | // The .l data can come from a file named in a command line argument. 8 | // If no non-opt args are given, golex reads stdin. 9 | // 10 | // Options: 11 | // -DFA print the DFA to stdout and quit 12 | // -nodfaopt disable DFA optimization - don't use this for production code 13 | // -o fname write to file `fname`, default is `lex.yy.go` 14 | // -t write to stdout 15 | // -v write some scanner statistics to stderr 16 | // -32bit assume unicode rune lexer (partially implemented, disabled) 17 | // 18 | // To get the latest golex version: 19 | // 20 | // $ go get -u github.com/cznic/golex 21 | // 22 | // Run time library 23 | // 24 | // Please see http://godoc.org/github.com/cznic/golex/lex. 25 | // 26 | // Changelog 27 | // 28 | // 2014-11-18: Golex now supports %yym - a hook which can be used to mark an 29 | // accepting state. 30 | // 31 | // Implementing POSIX-like longest match 32 | // 33 | // Consider for example this .l file: 34 | // 35 | // $ cat main.l 36 | // %{ 37 | // package main 38 | // 39 | // import ( 40 | // "flag" 41 | // "fmt" 42 | // ) 43 | // 44 | // var ( 45 | // c byte 46 | // src string 47 | // in []byte 48 | // un []byte 49 | // mark int 50 | // ) 51 | // 52 | // func lex() (s string) { 53 | // %} 54 | // 55 | // %yyn next() 56 | // %yyc c 57 | // %yym fmt.Printf("\tstate accepts: %q\n", in); mark = len(in) 58 | // 59 | // %% 60 | // in = in[:0] 61 | // mark = -1 62 | // 63 | // \0 64 | // return "EOF" 65 | // 66 | // a([bcd]*z([efg]*z)?)? 67 | // return fmt.Sprintf("match(%q)", in) 68 | // 69 | // %% 70 | // if mark >= 0 { 71 | // if len(in) > mark { 72 | // unget(c) 73 | // for i := len(in)-1; i >= mark; i-- { 74 | // unget(in[i]) 75 | // } 76 | // next() 77 | // } 78 | // in = in[:mark] 79 | // goto yyAction // Hook: Execute the semantic action of the last matched rule. 80 | // } 81 | // 82 | // switch n := len(in); n { 83 | // case 0: // [] z 84 | // s = fmt.Sprintf("%q", c) 85 | // next() 86 | // case 1: // [x] z 87 | // s = fmt.Sprintf("%q", in[0]) 88 | // default: // [x, y, ...], z 89 | // s = fmt.Sprintf("%q", in[0]) 90 | // unget(c) // z 91 | // for i := n - 1; i > 1; i-- { 92 | // unget(in[i]) // ... 93 | // } 94 | // c = in[1] // y 95 | // } 96 | // return s 97 | // } 98 | // 99 | // func next() { 100 | // if len(un) != 0 { 101 | // c = un[len(un)-1] 102 | // un = un[:len(un)-1] 103 | // return 104 | // } 105 | // 106 | // in = append(in, c) 107 | // if len(src) == 0 { 108 | // c = 0 109 | // return 110 | // } 111 | // 112 | // c = src[0] 113 | // fmt.Printf("\tnext: %q\n", c) 114 | // src = src[1:] 115 | // } 116 | // 117 | // func unget(b byte) { 118 | // un = append(un, b) 119 | // } 120 | // 121 | // func main() { 122 | // flag.Parse() 123 | // if flag.NArg() > 0 { 124 | // src = flag.Arg(0) 125 | // } 126 | // next() 127 | // for { 128 | // s := lex() 129 | // fmt.Println(s) 130 | // if s == "EOF" { 131 | // break 132 | // } 133 | // } 134 | // } 135 | // $ 136 | // 137 | // Execution and output: 138 | // 139 | // $ golex -o main.go main.l && go run main.go abzez0abzefgxy 140 | // next: 'a' 141 | // next: 'b' 142 | // state accepts: "a" 143 | // next: 'z' 144 | // next: 'e' 145 | // state accepts: "abz" 146 | // next: 'z' 147 | // next: '0' 148 | // state accepts: "abzez" 149 | // match("abzez") 150 | // next: 'a' 151 | // '0' 152 | // next: 'b' 153 | // state accepts: "a" 154 | // next: 'z' 155 | // next: 'e' 156 | // state accepts: "abz" 157 | // next: 'f' 158 | // next: 'g' 159 | // next: 'x' 160 | // match("abz") 161 | // 'e' 162 | // 'f' 163 | // 'g' 164 | // next: 'y' 165 | // 'x' 166 | // 'y' 167 | // state accepts: "\x00" 168 | // EOF 169 | // $ 170 | // 171 | // 2014-11-15: Golex's output is now gofmt'ed, if possible. 172 | // 173 | // Missing/differing functionality of the current renderer (compared to flex): 174 | // - No runtime tokenizer package/environment 175 | // (but the freedom to have/write any fitting one's specific task(s)). 176 | // - The generated FSM picks the rules in the order of their appearance in the .l source, 177 | // but "flex picks the rule that matches the most text". 178 | // - And probably more. 179 | // Further limitations on the .l source are listed in the cznic/lex package godocs. 180 | // 181 | // A simple golex program example (make example1 && ./example1): 182 | // 183 | // %{ 184 | // package main 185 | // 186 | // import ( 187 | // "bufio" 188 | // "fmt" 189 | // "log" 190 | // "os" 191 | // ) 192 | // 193 | // var ( 194 | // src = bufio.NewReader(os.Stdin) 195 | // buf []byte 196 | // current byte 197 | // ) 198 | // 199 | // func getc() byte { 200 | // if current != 0 { 201 | // buf = append(buf, current) 202 | // } 203 | // current = 0 204 | // if b, err := src.ReadByte(); err == nil { 205 | // current = b 206 | // } 207 | // return current 208 | // } 209 | // 210 | // // %yyc is a "macro" to access the "current" character. 211 | // // 212 | // // %yyn is a "macro" to move to the "next" character. 213 | // // 214 | // // %yyb is a "macro" to return the beginning-of-line status (a bool typed value). 215 | // // It is used for patterns like `^re`. 216 | // // Example: %yyb prev == 0 || prev == '\n' 217 | // // 218 | // // %yyt is a "macro" to return the top/current start condition (an int typed value). 219 | // // It is used when there are patterns with conditions like `re`. 220 | // // Example: %yyt startCond 221 | // 222 | // func main() { // This left brace is closed by *1 223 | // c := getc() // init 224 | // %} 225 | // 226 | // %yyc c 227 | // %yyn c = getc() 228 | // 229 | // D [0-9]+ 230 | // 231 | // %% 232 | // buf = buf[:0] // Code before the first rule is executed before every scan cycle (state 0 action) 233 | // 234 | // [ \t\n\r]+ // Ignore whitespace 235 | // 236 | // {D} fmt.Printf("int %q\n", buf) 237 | // 238 | // {D}\.{D}?|\.{D} fmt.Printf("float %q\n", buf) 239 | // 240 | // \0 return // Exit on EOF or any other error 241 | // 242 | // . fmt.Printf("%q\n", buf) // Printout any other unrecognized stuff 243 | // 244 | // %% 245 | // // The rendered scanner enters top of the user code section when 246 | // // lexem recognition fails. In this example it should never happen. 247 | // log.Fatal("scanner internal error") 248 | // 249 | // } // *1 this right brace 250 | package main 251 | -------------------------------------------------------------------------------- /examples/go/example2.l: -------------------------------------------------------------------------------- 1 | %{ 2 | // Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved. 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file. 5 | 6 | // blame: jnml, labs.nic.cz 7 | 8 | // +build ignore 9 | 10 | package main 11 | 12 | import ( 13 | "fmt" 14 | "io/ioutil" 15 | "os" 16 | "path/filepath" 17 | "runtime" 18 | "unicode" 19 | "unicode/utf8" 20 | ) 21 | 22 | var ( 23 | src []byte 24 | srclen int 25 | pos int 26 | pos0 int 27 | line int 28 | column int 29 | current byte 30 | sc int 31 | ) 32 | 33 | const ( 34 | SEP = 0xE000 + iota 35 | PUNCT 36 | KWD 37 | IDENT 38 | STRING 39 | CHAR 40 | INT 41 | FLOAT 42 | IMAG 43 | ) 44 | 45 | func getc() byte { 46 | pos++ 47 | if pos >= srclen { 48 | current = 0 49 | return 0 50 | } 51 | 52 | current = src[pos] 53 | if current == '\n' { 54 | line++ 55 | column = 1 56 | } else { 57 | column++ 58 | } 59 | return current 60 | } 61 | 62 | func begin(cond int) { 63 | sc = cond 64 | } 65 | 66 | func getRune() rune { 67 | if rune, size := utf8.DecodeRune(src[pos:]); size != 0 { 68 | pos += size 69 | return rune 70 | } 71 | 72 | return 0 73 | } 74 | 75 | func scan() (x int) { 76 | const ( 77 | INITIAL = iota 78 | QSTR 79 | CH 80 | ) 81 | 82 | c := current 83 | %} 84 | 85 | %yyt sc 86 | %yyc c 87 | %yyn c = getc() 88 | 89 | %x QSTR CH 90 | 91 | int_lit {decimal_lit}|{octal_lit}|{hex_lit} 92 | decimal_lit [1-9][0-9]* 93 | octal_lit 0[0-7]* 94 | hex_lit 0[xX][0-9a-fA-F]+ 95 | 96 | float_lit {decimals}"."{decimals}?{exponent}?|{decimals}{exponent}|"."{decimals}{exponent}? 97 | decimals [0-9]+ 98 | exponent [eE][-+]?[0-9]+ 99 | 100 | imaginary_lit ({decimals}|{float_lit})i 101 | 102 | utf8_h2 [\xC2-\xDF] 103 | utf8_h3 [\xE0-\xEF] 104 | utf8_h4 [\xF0-\xF4] 105 | utf8_cont [\x80-\xBF] 106 | utf8_2 {utf8_h2}{utf8_cont} 107 | utf8_3 {utf8_h3}{utf8_cont}{utf8_cont} 108 | utf8_4 {utf8_h4}{utf8_cont}{utf8_cont}{utf8_cont} 109 | non_ascii {utf8_2}|{utf8_3}|{utf8_4} 110 | 111 | interpreted_string_lit ({string_unicode_value}|{byte_value})*"\"" 112 | string_unicode_value {string_unicode_char}|{string_interpreter_value} 113 | string_unicode_char [^"\x80-\xFF\\\n\r]|{non_ascii} 114 | string_interpreter_value {little_u_value}|{big_u_value}|{string_escaped_char} 115 | little_u_value "\u"{hex_digit}{hex_digit}{hex_digit}{hex_digit} 116 | hex_digit [0-9a-fA-F] 117 | big_u_value "\U"{hex_digit}{hex_digit}{hex_digit}{hex_digit}{hex_digit}{hex_digit}{hex_digit}{hex_digit} 118 | string_escaped_char \\(\"|{other_escaped_char}) 119 | other_escaped_char [abfnrtv\\] 120 | byte_value {octal_byte_value}|{hex_byte_value} 121 | octal_byte_value \\{octal_digit}{octal_digit}{octal_digit} 122 | octal_digit [0-7] 123 | hex_byte_value "\x"{hex_digit}{hex_digit} 124 | char_unicode_value [^'\x80-\xFF\\\n\r]|{char_interpreter_value}|{non_ascii} 125 | char_interpreter_value {little_u_value}|{big_u_value}|{char_escaped_char} 126 | char_escaped_char \\('|{other_escaped_char}) 127 | 128 | %% 129 | 130 | pos0 = pos 131 | 132 | <*>\0 return 0 133 | 134 | [ \t\n\r]+ | 135 | \/\/.* | 136 | \/\*([^*]|\*+[^*/])*\*+\/ 137 | 138 | "+" | 139 | "-" | 140 | "*" | 141 | "/" | 142 | "%" | 143 | "&" | 144 | "|" | 145 | "^" | 146 | "<<" | 147 | ">>" | 148 | "&^" | 149 | "+=" | 150 | "-=" | 151 | "*=" | 152 | "/=" | 153 | "%=" | 154 | "&=" | 155 | "|=" | 156 | "^=" | 157 | "<<=" | 158 | ">>=" | 159 | "&^=" | 160 | "&&" | 161 | "||" | 162 | "<-" | 163 | "++" | 164 | "--" | 165 | "==" | 166 | "<" | 167 | ">" | 168 | "=" | 169 | "!" | 170 | "!=" | 171 | "<=" | 172 | ">=" | 173 | ":=" | 174 | "..." | 175 | "(" | 176 | "[" | 177 | "{" | 178 | "," | 179 | "." | 180 | ")" | 181 | "]" | 182 | "}" | 183 | ";" | 184 | ":" return PUNCT 185 | 186 | break | 187 | case | 188 | chan | 189 | const | 190 | continue | 191 | default | 192 | defer | 193 | else | 194 | fallthrough | 195 | for | 196 | func | 197 | go | 198 | goto | 199 | if | 200 | import | 201 | interface | 202 | map | 203 | package | 204 | range | 205 | return | 206 | select | 207 | struct | 208 | switch | 209 | type | 210 | var return KWD 211 | 212 | \" begin(QSTR) 213 | 214 | {interpreted_string_lit} 215 | begin(INITIAL) 216 | return STRING 217 | 218 | `[^`]*` return STRING 219 | 220 | ' begin(CH) 221 | 222 | ({char_unicode_value}|{byte_value})' 223 | begin(INITIAL) 224 | return CHAR 225 | 226 | {int_lit} return INT 227 | {float_lit} return FLOAT 228 | {imaginary_lit} return IMAG 229 | 230 | [a-zA-Z_][a-zA-Z_0-9]* 231 | if current >= '\xC2' && current <= '\xF4' { 232 | pos-- 233 | column-- 234 | for { 235 | l, c, runepos := line, column, pos 236 | rune := getRune() 237 | if !(rune == '_' || unicode.IsLetter(rune) || unicode.IsDigit(rune)) { 238 | pos = runepos 239 | current = getc() 240 | line, column = l, c 241 | break 242 | } 243 | } 244 | } 245 | return IDENT 246 | 247 | {non_ascii} 248 | pos = pos0 249 | if rune := getRune(); !unicode.IsLetter(rune) { 250 | panic(fmt.Errorf("expected unicode letter, got %U", rune)) 251 | } 252 | 253 | for { 254 | l, c, runepos := line, column, pos 255 | rune := getRune() 256 | if !(rune == '_' || unicode.IsLetter(rune) || unicode.IsDigit(rune)) { 257 | pos = runepos 258 | current = getc() 259 | line, column = l, c 260 | break 261 | } 262 | } 263 | return IDENT 264 | 265 | %% 266 | return unicode.ReplacementChar 267 | } 268 | 269 | type visitor struct { 270 | count int 271 | tokCount int 272 | size int64 273 | } 274 | 275 | 276 | func (v *visitor) visitFile(path string, f os.FileInfo) { 277 | ok, err := filepath.Match("*.go", filepath.Base(path)) 278 | if err != nil { 279 | panic(err) 280 | } 281 | 282 | if !ok { 283 | return 284 | } 285 | 286 | file, err := os.Open(path) 287 | if err != nil { 288 | panic(err) 289 | } 290 | 291 | defer file.Close() 292 | src, err = ioutil.ReadAll(file) 293 | if err != nil { 294 | panic(err) 295 | } 296 | 297 | defer func() { 298 | if e := recover(); e != nil { 299 | e = fmt.Errorf("%s:%d:%d - scan fail(%s)", path, line, column, e) 300 | fmt.Println(e) 301 | os.Exit(1) 302 | } 303 | }() 304 | 305 | srclen = len(src) 306 | pos = 0 307 | line, column = 1, 1 308 | current = src[0] 309 | loop: 310 | for { 311 | switch x := scan(); { 312 | default: 313 | v.tokCount++ 314 | case x == 0: 315 | break loop 316 | case x == unicode.ReplacementChar: 317 | break loop 318 | case x < SEP || x > IMAG: 319 | panic(fmt.Errorf("%s:%d:%d - scan fail", path, line, column)) 320 | } 321 | } 322 | 323 | v.count++ 324 | v.size += f.Size() 325 | } 326 | 327 | 328 | func main() { 329 | 330 | defer func() { 331 | if e := recover(); e != nil { 332 | fmt.Println(e) 333 | os.Exit(1) 334 | } 335 | }() 336 | 337 | v := &visitor{} 338 | if err := filepath.Walk(runtime.GOROOT()+"/src", func(pth string, info os.FileInfo, err error) error { 339 | if err != nil { 340 | return err 341 | } 342 | 343 | if !info.IsDir() { 344 | v.visitFile(pth, info) 345 | } 346 | return nil 347 | }); err != nil { 348 | panic(err) 349 | } 350 | 351 | fmt.Printf("%d .go files, %d bytes, %d tokens\n", v.count, v.size, v.tokCount) 352 | } 353 | -------------------------------------------------------------------------------- /render.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 The golex Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "fmt" 9 | "github.com/cznic/lex" 10 | "github.com/cznic/lexer" 11 | "log" 12 | "sort" 13 | "strings" 14 | ) 15 | 16 | type renderGo struct { 17 | noRender 18 | scStates map[int]bool 19 | } 20 | 21 | func (r *renderGo) prolog(l *lex.L) { 22 | for _, state := range l.StartConditionsStates { 23 | r.scStates[int(state.Index)] = true 24 | } 25 | for _, state := range l.StartConditionsBolStates { 26 | r.scStates[int(state.Index)] = true 27 | } 28 | r.w.Write([]byte("// Code generated by golex. DO NOT EDIT.\n\n")) 29 | for _, line := range l.DefCode { 30 | r.w.Write([]byte(line)) 31 | } 32 | r.wprintf("\nyystate0:\n") 33 | if l.YYM != "yym" { 34 | r.wprintf("yyrule := -1\n_ = yyrule") 35 | } 36 | if action0 := l.Rules[0].Action; action0 != "" { 37 | r.w.Write([]byte(action0)) 38 | } 39 | scNames := map[int]string{} 40 | for name, i := range l.StartConditions { 41 | scNames[i] = name 42 | } 43 | if len(l.StartConditionsStates) > 1 || len(l.StartConditionsBolStates) != 0 { 44 | if len(l.StartConditionsBolStates) == 0 { 45 | r.wprintf("\n\nswitch yyt := %s; yyt {\n", l.YYT) 46 | } else { 47 | r.wprintf("\n\nswitch yyt, yyb := %s, %s; yyt {\n", l.YYT, l.YYB) 48 | } 49 | r.wprintf("default:\npanic(fmt.Errorf(`invalid start condition %%d`, yyt))\n") 50 | 51 | // Stabilize map ranging 52 | x := []int{} 53 | for sc := range l.StartConditionsStates { 54 | x = append(x, sc) 55 | } 56 | sort.Ints(x) 57 | 58 | for _, sc := range x { 59 | state := l.StartConditionsStates[sc] 60 | r.wprintf("case %d: // start condition: %s\n", sc, scNames[sc]) 61 | if state, ok := l.StartConditionsBolStates[sc]; ok { 62 | r.wprintf("if yyb { goto yystart%d }\n", state.Index) 63 | } 64 | r.wprintf("goto yystart%d\n", state.Index) 65 | } 66 | r.wprintf("}\n\n") 67 | } else { 68 | r.wprintf("\n\ngoto yystart%d\n\n", l.StartConditionsStates[0].Index) 69 | } 70 | } 71 | 72 | func isReturn(code string) bool { 73 | const ret = "return" 74 | lenret := len(ret) 75 | lines := strings.Split(code, "\n") 76 | for { 77 | l := len(lines) 78 | if l == 0 { 79 | break 80 | } 81 | 82 | line := strings.TrimSpace(lines[l-1]) 83 | if line == "" { 84 | lines = lines[:l-1] 85 | continue 86 | } 87 | 88 | if len(line) >= lenret && line[:lenret] == ret { 89 | if len(line) == lenret { 90 | return true 91 | } 92 | 93 | if c := line[lenret]; c == ' ' || c == '\t' { 94 | return true 95 | } 96 | } 97 | 98 | break 99 | 100 | } 101 | return false 102 | } 103 | 104 | func (r *renderGo) rules(l *lex.L) { 105 | for i := 1; i < len(l.Rules); i++ { 106 | rule := l.Rules[i] 107 | r.wprintf("yyrule%d: // %s\n", i, rule.Pattern) 108 | act := strings.TrimSpace(rule.Action) 109 | if act != "" && act != "|" { 110 | r.wprintf("{\n") 111 | r.w.Write([]byte(rule.Action)) 112 | } 113 | if act != "|" { 114 | r.wprintf("\n") 115 | if !isReturn(rule.Action) { 116 | r.wprintf("goto yystate0\n") 117 | } 118 | } 119 | if act != "" && act != "|" { 120 | r.wprintf("}\n") 121 | } 122 | } 123 | r.wprintf(`panic("unreachable")` + "\n") 124 | } 125 | 126 | func (r *renderGo) scanFail(l *lex.L) { 127 | r.wprintf("\ngoto yyabort // silence unused label error\n") 128 | r.wprintf("\nyyabort: // no lexem recognized\n") 129 | } 130 | 131 | func (r *renderGo) userCode(l *lex.L) { 132 | if userCode := l.UserCode; userCode != "" { 133 | r.w.Write([]byte(userCode)) 134 | } 135 | } 136 | 137 | func (r *renderGo) defaultTransition(l *lex.L, state *lexer.NfaState) (defaultEdge *lexer.RangesEdge) { 138 | r.wprintf("default:\n") 139 | if rule, ok := l.Accepts[state]; ok { 140 | r.wprintf("goto yyrule%d\n", rule) 141 | return 142 | } 143 | 144 | cases := map[rune]bool{} 145 | for i := 0; i < 256; i++ { 146 | cases[rune(i)] = true 147 | } 148 | for _, edge0 := range state.Consuming { 149 | switch edge := edge0.(type) { 150 | default: 151 | log.Fatalf("unexpected type %T", edge0) 152 | case *lexer.RuneEdge: 153 | delete(cases, edge.Rune) 154 | case *lexer.RangesEdge: 155 | if defaultEdge == nil || len(edge.Ranges.R32) > len(defaultEdge.Ranges.R32) { 156 | defaultEdge = edge 157 | } 158 | for _, rng := range edge.Ranges.R32 { 159 | for c := rng.Lo; c <= rng.Hi; c += rng.Stride { 160 | delete(cases, rune(c)) 161 | } 162 | } 163 | } 164 | } 165 | if len(cases) != 0 { 166 | r.wprintf("goto yyabort\n") 167 | return nil 168 | } 169 | 170 | if defaultEdge != nil { 171 | r.wprintf("goto yystate%d // %s\n", defaultEdge.Target().Index, r.rangesEdgeString(defaultEdge, l)) 172 | return 173 | } 174 | 175 | panic("internal error") 176 | } 177 | 178 | func (r *renderGo) rangesEdgeString(edge *lexer.RangesEdge, l *lex.L) string { 179 | a := []string{} 180 | for _, rng := range edge.Ranges.R32 { 181 | if rng.Stride != 1 { 182 | panic("internal error") 183 | } 184 | 185 | if rng.Hi-rng.Lo == 1 { 186 | a = append(a, fmt.Sprintf("%s == %s || %s == %s", l.YYC, q(rng.Lo), l.YYC, q(rng.Hi))) 187 | continue 188 | } 189 | 190 | if rng.Hi-rng.Lo > 0 { 191 | a = append(a, fmt.Sprintf("%s >= %s && %s <= %s", l.YYC, q(rng.Lo), l.YYC, q(rng.Hi))) 192 | continue 193 | } 194 | 195 | // rng.Hi == rng.Lo 196 | a = append(a, fmt.Sprintf("%s == %s", l.YYC, q(rng.Lo))) 197 | } 198 | return strings.Replace(strings.Join(a, " || "), "%", "%%", -1) 199 | } 200 | 201 | func (r *renderGo) transitions(l *lex.L, state *lexer.NfaState) { 202 | r.wprintf("switch {\n") 203 | var defaultEdge lexer.Edger = r.defaultTransition(l, state) 204 | 205 | // Stabilize case order 206 | a := []string{} 207 | m := map[string]uint{} 208 | for _, edge0 := range state.Consuming { 209 | if edge0 == defaultEdge { 210 | continue 211 | } 212 | 213 | s := "" 214 | switch edge := edge0.(type) { 215 | default: 216 | log.Fatalf("unexpected type %T", edge0) 217 | case *lexer.RuneEdge: 218 | s = fmt.Sprintf("%s == %s", l.YYC, q(uint32(edge.Rune))) 219 | case *lexer.RangesEdge: 220 | s = fmt.Sprintf(r.rangesEdgeString(edge, l)) 221 | } 222 | a = append(a, s) 223 | m[s] = edge0.Target().Index 224 | } 225 | sort.Strings(a) 226 | for _, s := range a { 227 | r.wprintf("case %s:\ngoto yystate%d\n", s, m[s]) 228 | } 229 | 230 | r.wprintf("}\n\n") 231 | } 232 | 233 | func (r *renderGo) states(l *lex.L) { 234 | yym := l.YYM != "yym" 235 | r.wprintf("goto yystate%d // silence unused label error\n", 0) 236 | if yym { 237 | r.wprintf("goto yyAction // silence unused label error\n") 238 | r.wprintf("yyAction:\n") 239 | r.wprintf("switch yyrule {\n") 240 | for i := range l.Rules[1:] { 241 | r.wprintf("case %d:\ngoto yyrule%d\n", i+1, i+1) 242 | } 243 | r.wprintf("}\n") 244 | } 245 | for _, state := range l.Dfa { 246 | iState := int(state.Index) 247 | if _, ok := r.scStates[iState]; ok { 248 | r.wprintf("goto yystate%d // silence unused label error\n", iState) 249 | } 250 | r.wprintf("yystate%d:\n", iState) 251 | rule, ok := l.Accepts[state] 252 | if !ok || !l.Rules[rule].EOL { 253 | r.wprintf("%s\n", l.YYN) 254 | } 255 | if ok && l.YYM != "yym" { 256 | r.wprintf("yyrule = %d\n", rule) 257 | r.wprintf("%s\n", l.YYM) 258 | } 259 | if _, ok := r.scStates[iState]; ok { 260 | r.wprintf("yystart%d:\n", iState) 261 | } 262 | if len(state.Consuming) != 0 { 263 | r.transitions(l, state) 264 | } else { 265 | if rule, ok := l.Accepts[state]; ok { 266 | r.wprintf("goto yyrule%d\n\n", rule) 267 | } else { 268 | panic("internal error") 269 | } 270 | } 271 | } 272 | } 273 | 274 | func (r renderGo) render(srcname string, l *lex.L) { 275 | r.prolog(l) 276 | r.states(l) 277 | r.rules(l) 278 | r.scanFail(l) 279 | r.userCode(l) 280 | } 281 | -------------------------------------------------------------------------------- /lex/api.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2015 The golex Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package lex 6 | 7 | import ( 8 | "bytes" 9 | "fmt" 10 | "go/token" 11 | "io" 12 | "os" 13 | ) 14 | 15 | // BOM handling modes which can be set by the BOMMode Option. Default is BOMIgnoreFirst. 16 | const ( 17 | BOMError = iota // BOM is an error anywhere. 18 | BOMIgnoreFirst // Skip BOM if at beginning, report as error if anywhere else. 19 | BOMPassAll // No special handling of BOM. 20 | BOMPassFirst // No special handling of BOM if at beginning, report as error if anywhere else. 21 | ) 22 | 23 | const ( 24 | NonASCII = 0x80 // DefaultRuneClass returns NonASCII for non ASCII runes. 25 | RuneEOF = -1 // Distinct from any valid Unicode rune value. 26 | ) 27 | 28 | // DefaultRuneClass returns the character class of r. If r is an ASCII code 29 | // then its class equals the ASCII code. Any other rune is of class NonASCII. 30 | // 31 | // DefaultRuneClass is the default implementation Lexer will use to convert 32 | // runes (21 bit entities) to scanner classes (8 bit entities). 33 | // 34 | // Non ASCII aware lexical analyzers will typically use their own 35 | // categorization function. To assign such custom function use the RuneClass 36 | // option. 37 | func DefaultRuneClass(r rune) int { 38 | if r >= 0 && r < 0x80 { 39 | return int(r) 40 | } 41 | 42 | return NonASCII 43 | } 44 | 45 | // Char represents a rune and its position. 46 | type Char struct { 47 | Rune rune 48 | pos int32 49 | } 50 | 51 | // NewChar returns a new Char value. 52 | func NewChar(pos token.Pos, r rune) Char { return Char{pos: int32(pos), Rune: r} } 53 | 54 | // IsValid reports whether c is not a zero Char. 55 | func (c Char) IsValid() bool { return c.Pos().IsValid() } 56 | 57 | // Pos returns the token.Pos associated with c. 58 | func (c Char) Pos() token.Pos { return token.Pos(c.pos) } 59 | 60 | // CharReader is a RuneReader providing additionally explicit position 61 | // information by returning a Char instead of a rune as its first result. 62 | type CharReader interface { 63 | ReadChar() (c Char, size int, err error) 64 | } 65 | 66 | // Lexer suports golex[0] generated lexical analyzers. 67 | type Lexer struct { 68 | File *token.File // The *token.File passed to New. 69 | First Char // First remembers the lookahead char when Rule0 was invoked. 70 | Last Char // Last remembers the last Char returned by Next. 71 | Prev Char // Prev remembers the Char previous to Last. 72 | bomMode int // See the BOM* constants. 73 | bytesBuf bytes.Buffer // Used by TokenBytes. 74 | charSrc CharReader // Lexer alternative input. 75 | classf func(rune) int // 76 | errorf func(token.Pos, string) // 77 | lookahead Char // Lookahead if non zero. 78 | mark int // Longest match marker. 79 | off int // Used for File.AddLine. 80 | src io.RuneReader // Lexer input. 81 | tokenBuf []Char // Lexeme collector. 82 | ungetBuf []Char // Unget buffer. 83 | } 84 | 85 | // New returns a new *Lexer. The result can be amended using opts. 86 | // 87 | // Non Unicode Input 88 | // 89 | // To consume sources in other encodings and still have exact position 90 | // information, pass an io.RuneReader which returns the next input character 91 | // reencoded as an Unicode rune but returns the size (number of bytes used to 92 | // encode it) of the original character, not the size of its UTF-8 93 | // representation after converted to an Unicode rune. Size is the second 94 | // returned value of io.RuneReader.ReadRune method[4]. 95 | // 96 | // When src optionally implements CharReader its ReadChar method is used 97 | // instead of io.ReadRune. 98 | func New(file *token.File, src io.RuneReader, opts ...Option) (*Lexer, error) { 99 | r := &Lexer{ 100 | File: file, 101 | bomMode: BOMIgnoreFirst, 102 | classf: DefaultRuneClass, 103 | src: src, 104 | } 105 | if x, ok := src.(CharReader); ok { 106 | r.charSrc = x 107 | } 108 | r.errorf = r.defaultErrorf 109 | for _, o := range opts { 110 | if err := o(r); err != nil { 111 | return nil, err 112 | } 113 | } 114 | return r, nil 115 | } 116 | 117 | // Abort handles the situation when the scanner does not successfully recognize 118 | // any token or when an attempt to find the longest match "overruns" from an 119 | // accepting state only to never reach an accepting state again. In the first 120 | // case the scanner was never in an accepting state since last call to Rule0 121 | // and then (true, previousLookahead rune) is returned, effectively consuming a 122 | // single Char token, avoiding scanner stall. Otherwise there was at least one 123 | // accepting scanner state marked using Mark. In this case Abort rollbacks the 124 | // lexer state to the marked state and returns (false, 0). The scanner must 125 | // then execute a prescribed goto statement. For example: 126 | // 127 | // %yyc c 128 | // %yyn c = l.Next() 129 | // %yym l.Mark() 130 | // 131 | // %{ 132 | // package foo 133 | // 134 | // import (...) 135 | // 136 | // type lexer struct { 137 | // *lex.Lexer 138 | // ... 139 | // } 140 | // 141 | // func newLexer(...) *lexer { 142 | // return &lexer{ 143 | // lex.NewLexer(...), 144 | // ... 145 | // } 146 | // } 147 | // 148 | // func (l *lexer) scan() int { 149 | // c := l.Enter() 150 | // %} 151 | // 152 | // ... more lex defintions 153 | // 154 | // %% 155 | // 156 | // c = l.Rule0() 157 | // 158 | // ... lex rules 159 | // 160 | // %% 161 | // 162 | // if c, ok := l.Abort(); ok { 163 | // return c 164 | // } 165 | // 166 | // goto yyAction 167 | // } 168 | func (l *Lexer) Abort() (int, bool) { 169 | if l.mark >= 0 { 170 | if len(l.tokenBuf) > l.mark { 171 | l.Unget(l.lookahead) 172 | for i := len(l.tokenBuf) - 1; i >= l.mark; i-- { 173 | l.Unget(l.tokenBuf[i]) 174 | } 175 | } 176 | l.tokenBuf = l.tokenBuf[:l.mark] 177 | return 0, false 178 | } 179 | 180 | switch n := len(l.tokenBuf); n { 181 | case 0: // [] z 182 | c := l.lookahead 183 | l.Next() 184 | return int(c.Rune), true 185 | case 1: // [a] z 186 | return int(l.tokenBuf[0].Rune), true 187 | default: // [a, b, ...], z 188 | c := l.tokenBuf[0] // a 189 | l.Unget(l.lookahead) // z 190 | for i := n - 1; i > 1; i-- { 191 | l.Unget(l.tokenBuf[i]) // ... 192 | } 193 | l.lookahead = l.tokenBuf[1] // b 194 | l.tokenBuf = l.tokenBuf[:1] 195 | return int(c.Rune), true 196 | } 197 | } 198 | 199 | func (l *Lexer) class() int { return l.classf(l.lookahead.Rune) } 200 | 201 | func (l *Lexer) defaultErrorf(pos token.Pos, msg string) { 202 | l.Error(fmt.Sprintf("%v: %v", l.File.Position(pos), msg)) 203 | } 204 | 205 | // Enter ensures the lexer has a valid lookahead Char and returns its class. 206 | // Typical use in an .l file 207 | // 208 | // func (l *lexer) scan() lex.Char { 209 | // c := l.Enter() 210 | // ... 211 | func (l *Lexer) Enter() int { 212 | if !l.lookahead.IsValid() { 213 | l.Next() 214 | } 215 | return l.class() 216 | } 217 | 218 | // Error Implements yyLexer[2] by printing the msg to stderr. 219 | func (l *Lexer) Error(msg string) { 220 | fmt.Fprintf(os.Stderr, "%s\n", msg) 221 | } 222 | 223 | // Lookahead returns the current lookahead. 224 | func (l *Lexer) Lookahead() Char { 225 | if !l.lookahead.IsValid() { 226 | l.Next() 227 | } 228 | return l.lookahead 229 | } 230 | 231 | // Mark records the current state of scanner as accepting. It implements the 232 | // golex macro %yym. Typical usage in an .l file: 233 | // 234 | // %yym l.Mark() 235 | func (l *Lexer) Mark() { l.mark = len(l.tokenBuf) } 236 | 237 | func (l *Lexer) next() int { 238 | const bom = '\ufeff' 239 | 240 | if c := l.lookahead; c.IsValid() { 241 | l.tokenBuf = append(l.tokenBuf, c) 242 | } 243 | if n := len(l.ungetBuf); n != 0 { 244 | l.lookahead = l.ungetBuf[n-1] 245 | l.ungetBuf = l.ungetBuf[:n-1] 246 | return l.class() 247 | } 248 | 249 | if l.src == nil { 250 | return RuneEOF 251 | } 252 | 253 | var r rune 254 | var sz int 255 | var err error 256 | var pos token.Pos 257 | var c Char 258 | again: 259 | off0 := l.off 260 | switch cs := l.charSrc; { 261 | case cs != nil: 262 | c, sz, err = cs.ReadChar() 263 | r = c.Rune 264 | pos = c.Pos() 265 | default: 266 | r, sz, err = l.src.ReadRune() 267 | pos = l.File.Pos(l.off) 268 | } 269 | l.off += sz 270 | if err != nil { 271 | l.src = nil 272 | r = RuneEOF 273 | if err != io.EOF { 274 | l.errorf(pos, err.Error()) 275 | } 276 | } 277 | 278 | if r == bom { 279 | switch l.bomMode { 280 | default: 281 | fallthrough 282 | case BOMIgnoreFirst: 283 | if off0 != 0 { 284 | l.errorf(pos, "unicode (UTF-8) BOM in middle of file") 285 | } 286 | goto again 287 | case BOMPassAll: 288 | // nop 289 | case BOMPassFirst: 290 | if off0 != 0 { 291 | l.errorf(pos, "unicode (UTF-8) BOM in middle of file") 292 | goto again 293 | } 294 | case BOMError: 295 | switch { 296 | case off0 == 0: 297 | l.errorf(pos, "unicode (UTF-8) BOM at beginnig of file") 298 | default: 299 | l.errorf(pos, "unicode (UTF-8) BOM in middle of file") 300 | } 301 | goto again 302 | } 303 | } 304 | 305 | l.lookahead = NewChar(pos, r) 306 | if r == '\n' { 307 | l.File.AddLine(l.off) 308 | } 309 | return l.class() 310 | } 311 | 312 | // Next advances the scanner for one rune and returns the respective character 313 | // class of the new lookahead. Typical usage in an .l file: 314 | // 315 | // %yyn c = l.Next() 316 | func (l *Lexer) Next() int { 317 | l.Prev = l.Last 318 | r := l.next() 319 | l.Last = l.lookahead 320 | return r 321 | } 322 | 323 | // Offset returns the current reading offset of the lexer's source. 324 | func (l *Lexer) Offset() int { return l.off } 325 | 326 | // Rule0 initializes the scanner state before the attempt to recognize a token 327 | // starts. The token collecting buffer is cleared. Rule0 records the current 328 | // lookahead in l.First and returns its class. Typical usage in an .l file: 329 | // 330 | // ... lex definitions 331 | // 332 | // %% 333 | // 334 | // c := l.Rule0() 335 | // 336 | // first-pattern-regexp 337 | func (l *Lexer) Rule0() int { 338 | if !l.lookahead.IsValid() { 339 | l.Next() 340 | } 341 | l.First = l.lookahead 342 | l.mark = -1 343 | if len(l.tokenBuf) > 1<<18 { //DONE constant tuned 344 | l.tokenBuf = nil 345 | } else { 346 | l.tokenBuf = l.tokenBuf[:0] 347 | } 348 | return l.class() 349 | } 350 | 351 | // Token returns the currently collected token chars. The result is R/O. 352 | func (l *Lexer) Token() []Char { return l.tokenBuf } 353 | 354 | // TokenBytes returns the UTF-8 encoding of Token. If builder is not nil then 355 | // it's called instead to build the encoded token byte value into the buffer 356 | // passed to it. 357 | // 358 | // The Result is R/O. 359 | func (l *Lexer) TokenBytes(builder func(*bytes.Buffer)) []byte { 360 | if len(l.bytesBuf.Bytes()) < 1<<18 { //DONE constant tuned 361 | l.bytesBuf.Reset() 362 | } else { 363 | l.bytesBuf = bytes.Buffer{} 364 | } 365 | switch { 366 | case builder != nil: 367 | builder(&l.bytesBuf) 368 | default: 369 | for _, c := range l.Token() { 370 | l.bytesBuf.WriteRune(c.Rune) 371 | } 372 | } 373 | return l.bytesBuf.Bytes() 374 | } 375 | 376 | // Unget unreads all chars in c. 377 | func (l *Lexer) Unget(c ...Char) { 378 | l.ungetBuf = append(l.ungetBuf, c...) 379 | l.lookahead = Char{} // Must invalidate lookahead. 380 | } 381 | 382 | // Option is a function which can be passed as an optional argument to New. 383 | type Option func(*Lexer) error 384 | 385 | // BOMMode option selects how the lexer handles BOMs. See the BOM* constants for details. 386 | func BOMMode(mode int) Option { 387 | return func(l *Lexer) error { 388 | l.bomMode = mode 389 | return nil 390 | } 391 | } 392 | 393 | // ErrorFunc option sets a function called when an, for example I/O error, 394 | // occurs. The default is to call Error with the position and message already 395 | // formated as a string. 396 | func ErrorFunc(f func(token.Pos, string)) Option { 397 | return func(l *Lexer) error { 398 | l.errorf = f 399 | return nil 400 | } 401 | } 402 | 403 | // RuneClass option sets the function used to convert runes to character 404 | // classes. 405 | func RuneClass(f func(rune) int) Option { 406 | return func(l *Lexer) error { 407 | l.classf = f 408 | return nil 409 | } 410 | } 411 | -------------------------------------------------------------------------------- /examples/calc/fdl-1.3: -------------------------------------------------------------------------------- 1 | 2 | GNU Free Documentation License 3 | Version 1.3, 3 November 2008 4 | 5 | 6 | Copyright (C) 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. 7 | 8 | Everyone is permitted to copy and distribute verbatim copies 9 | of this license document, but changing it is not allowed. 10 | 11 | 0. PREAMBLE 12 | 13 | The purpose of this License is to make a manual, textbook, or other 14 | functional and useful document "free" in the sense of freedom: to 15 | assure everyone the effective freedom to copy and redistribute it, 16 | with or without modifying it, either commercially or noncommercially. 17 | Secondarily, this License preserves for the author and publisher a way 18 | to get credit for their work, while not being considered responsible 19 | for modifications made by others. 20 | 21 | This License is a kind of "copyleft", which means that derivative 22 | works of the document must themselves be free in the same sense. It 23 | complements the GNU General Public License, which is a copyleft 24 | license designed for free software. 25 | 26 | We have designed this License in order to use it for manuals for free 27 | software, because free software needs free documentation: a free 28 | program should come with manuals providing the same freedoms that the 29 | software does. But this License is not limited to software manuals; 30 | it can be used for any textual work, regardless of subject matter or 31 | whether it is published as a printed book. We recommend this License 32 | principally for works whose purpose is instruction or reference. 33 | 34 | 35 | 1. APPLICABILITY AND DEFINITIONS 36 | 37 | This License applies to any manual or other work, in any medium, that 38 | contains a notice placed by the copyright holder saying it can be 39 | distributed under the terms of this License. Such a notice grants a 40 | world-wide, royalty-free license, unlimited in duration, to use that 41 | work under the conditions stated herein. The "Document", below, 42 | refers to any such manual or work. Any member of the public is a 43 | licensee, and is addressed as "you". You accept the license if you 44 | copy, modify or distribute the work in a way requiring permission 45 | under copyright law. 46 | 47 | A "Modified Version" of the Document means any work containing the 48 | Document or a portion of it, either copied verbatim, or with 49 | modifications and/or translated into another language. 50 | 51 | A "Secondary Section" is a named appendix or a front-matter section of 52 | the Document that deals exclusively with the relationship of the 53 | publishers or authors of the Document to the Document's overall 54 | subject (or to related matters) and contains nothing that could fall 55 | directly within that overall subject. (Thus, if the Document is in 56 | part a textbook of mathematics, a Secondary Section may not explain 57 | any mathematics.) The relationship could be a matter of historical 58 | connection with the subject or with related matters, or of legal, 59 | commercial, philosophical, ethical or political position regarding 60 | them. 61 | 62 | The "Invariant Sections" are certain Secondary Sections whose titles 63 | are designated, as being those of Invariant Sections, in the notice 64 | that says that the Document is released under this License. If a 65 | section does not fit the above definition of Secondary then it is not 66 | allowed to be designated as Invariant. The Document may contain zero 67 | Invariant Sections. If the Document does not identify any Invariant 68 | Sections then there are none. 69 | 70 | The "Cover Texts" are certain short passages of text that are listed, 71 | as Front-Cover Texts or Back-Cover Texts, in the notice that says that 72 | the Document is released under this License. A Front-Cover Text may 73 | be at most 5 words, and a Back-Cover Text may be at most 25 words. 74 | 75 | A "Transparent" copy of the Document means a machine-readable copy, 76 | represented in a format whose specification is available to the 77 | general public, that is suitable for revising the document 78 | straightforwardly with generic text editors or (for images composed of 79 | pixels) generic paint programs or (for drawings) some widely available 80 | drawing editor, and that is suitable for input to text formatters or 81 | for automatic translation to a variety of formats suitable for input 82 | to text formatters. A copy made in an otherwise Transparent file 83 | format whose markup, or absence of markup, has been arranged to thwart 84 | or discourage subsequent modification by readers is not Transparent. 85 | An image format is not Transparent if used for any substantial amount 86 | of text. A copy that is not "Transparent" is called "Opaque". 87 | 88 | Examples of suitable formats for Transparent copies include plain 89 | ASCII without markup, Texinfo input format, LaTeX input format, SGML 90 | or XML using a publicly available DTD, and standard-conforming simple 91 | HTML, PostScript or PDF designed for human modification. Examples of 92 | transparent image formats include PNG, XCF and JPG. Opaque formats 93 | include proprietary formats that can be read and edited only by 94 | proprietary word processors, SGML or XML for which the DTD and/or 95 | processing tools are not generally available, and the 96 | machine-generated HTML, PostScript or PDF produced by some word 97 | processors for output purposes only. 98 | 99 | The "Title Page" means, for a printed book, the title page itself, 100 | plus such following pages as are needed to hold, legibly, the material 101 | this License requires to appear in the title page. For works in 102 | formats which do not have any title page as such, "Title Page" means 103 | the text near the most prominent appearance of the work's title, 104 | preceding the beginning of the body of the text. 105 | 106 | The "publisher" means any person or entity that distributes copies of 107 | the Document to the public. 108 | 109 | A section "Entitled XYZ" means a named subunit of the Document whose 110 | title either is precisely XYZ or contains XYZ in parentheses following 111 | text that translates XYZ in another language. (Here XYZ stands for a 112 | specific section name mentioned below, such as "Acknowledgements", 113 | "Dedications", "Endorsements", or "History".) To "Preserve the Title" 114 | of such a section when you modify the Document means that it remains a 115 | section "Entitled XYZ" according to this definition. 116 | 117 | The Document may include Warranty Disclaimers next to the notice which 118 | states that this License applies to the Document. These Warranty 119 | Disclaimers are considered to be included by reference in this 120 | License, but only as regards disclaiming warranties: any other 121 | implication that these Warranty Disclaimers may have is void and has 122 | no effect on the meaning of this License. 123 | 124 | 2. VERBATIM COPYING 125 | 126 | You may copy and distribute the Document in any medium, either 127 | commercially or noncommercially, provided that this License, the 128 | copyright notices, and the license notice saying this License applies 129 | to the Document are reproduced in all copies, and that you add no 130 | other conditions whatsoever to those of this License. You may not use 131 | technical measures to obstruct or control the reading or further 132 | copying of the copies you make or distribute. However, you may accept 133 | compensation in exchange for copies. If you distribute a large enough 134 | number of copies you must also follow the conditions in section 3. 135 | 136 | You may also lend copies, under the same conditions stated above, and 137 | you may publicly display copies. 138 | 139 | 140 | 3. COPYING IN QUANTITY 141 | 142 | If you publish printed copies (or copies in media that commonly have 143 | printed covers) of the Document, numbering more than 100, and the 144 | Document's license notice requires Cover Texts, you must enclose the 145 | copies in covers that carry, clearly and legibly, all these Cover 146 | Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on 147 | the back cover. Both covers must also clearly and legibly identify 148 | you as the publisher of these copies. The front cover must present 149 | the full title with all words of the title equally prominent and 150 | visible. You may add other material on the covers in addition. 151 | Copying with changes limited to the covers, as long as they preserve 152 | the title of the Document and satisfy these conditions, can be treated 153 | as verbatim copying in other respects. 154 | 155 | If the required texts for either cover are too voluminous to fit 156 | legibly, you should put the first ones listed (as many as fit 157 | reasonably) on the actual cover, and continue the rest onto adjacent 158 | pages. 159 | 160 | If you publish or distribute Opaque copies of the Document numbering 161 | more than 100, you must either include a machine-readable Transparent 162 | copy along with each Opaque copy, or state in or with each Opaque copy 163 | a computer-network location from which the general network-using 164 | public has access to download using public-standard network protocols 165 | a complete Transparent copy of the Document, free of added material. 166 | If you use the latter option, you must take reasonably prudent steps, 167 | when you begin distribution of Opaque copies in quantity, to ensure 168 | that this Transparent copy will remain thus accessible at the stated 169 | location until at least one year after the last time you distribute an 170 | Opaque copy (directly or through your agents or retailers) of that 171 | edition to the public. 172 | 173 | It is requested, but not required, that you contact the authors of the 174 | Document well before redistributing any large number of copies, to 175 | give them a chance to provide you with an updated version of the 176 | Document. 177 | 178 | 179 | 4. MODIFICATIONS 180 | 181 | You may copy and distribute a Modified Version of the Document under 182 | the conditions of sections 2 and 3 above, provided that you release 183 | the Modified Version under precisely this License, with the Modified 184 | Version filling the role of the Document, thus licensing distribution 185 | and modification of the Modified Version to whoever possesses a copy 186 | of it. In addition, you must do these things in the Modified Version: 187 | 188 | A. Use in the Title Page (and on the covers, if any) a title distinct 189 | from that of the Document, and from those of previous versions 190 | (which should, if there were any, be listed in the History section 191 | of the Document). You may use the same title as a previous version 192 | if the original publisher of that version gives permission. 193 | B. List on the Title Page, as authors, one or more persons or entities 194 | responsible for authorship of the modifications in the Modified 195 | Version, together with at least five of the principal authors of the 196 | Document (all of its principal authors, if it has fewer than five), 197 | unless they release you from this requirement. 198 | C. State on the Title page the name of the publisher of the 199 | Modified Version, as the publisher. 200 | D. Preserve all the copyright notices of the Document. 201 | E. Add an appropriate copyright notice for your modifications 202 | adjacent to the other copyright notices. 203 | F. Include, immediately after the copyright notices, a license notice 204 | giving the public permission to use the Modified Version under the 205 | terms of this License, in the form shown in the Addendum below. 206 | G. Preserve in that license notice the full lists of Invariant Sections 207 | and required Cover Texts given in the Document's license notice. 208 | H. Include an unaltered copy of this License. 209 | I. Preserve the section Entitled "History", Preserve its Title, and add 210 | to it an item stating at least the title, year, new authors, and 211 | publisher of the Modified Version as given on the Title Page. If 212 | there is no section Entitled "History" in the Document, create one 213 | stating the title, year, authors, and publisher of the Document as 214 | given on its Title Page, then add an item describing the Modified 215 | Version as stated in the previous sentence. 216 | J. Preserve the network location, if any, given in the Document for 217 | public access to a Transparent copy of the Document, and likewise 218 | the network locations given in the Document for previous versions 219 | it was based on. These may be placed in the "History" section. 220 | You may omit a network location for a work that was published at 221 | least four years before the Document itself, or if the original 222 | publisher of the version it refers to gives permission. 223 | K. For any section Entitled "Acknowledgements" or "Dedications", 224 | Preserve the Title of the section, and preserve in the section all 225 | the substance and tone of each of the contributor acknowledgements 226 | and/or dedications given therein. 227 | L. Preserve all the Invariant Sections of the Document, 228 | unaltered in their text and in their titles. Section numbers 229 | or the equivalent are not considered part of the section titles. 230 | M. Delete any section Entitled "Endorsements". Such a section 231 | may not be included in the Modified Version. 232 | N. Do not retitle any existing section to be Entitled "Endorsements" 233 | or to conflict in title with any Invariant Section. 234 | O. Preserve any Warranty Disclaimers. 235 | 236 | If the Modified Version includes new front-matter sections or 237 | appendices that qualify as Secondary Sections and contain no material 238 | copied from the Document, you may at your option designate some or all 239 | of these sections as invariant. To do this, add their titles to the 240 | list of Invariant Sections in the Modified Version's license notice. 241 | These titles must be distinct from any other section titles. 242 | 243 | You may add a section Entitled "Endorsements", provided it contains 244 | nothing but endorsements of your Modified Version by various 245 | parties--for example, statements of peer review or that the text has 246 | been approved by an organization as the authoritative definition of a 247 | standard. 248 | 249 | You may add a passage of up to five words as a Front-Cover Text, and a 250 | passage of up to 25 words as a Back-Cover Text, to the end of the list 251 | of Cover Texts in the Modified Version. Only one passage of 252 | Front-Cover Text and one of Back-Cover Text may be added by (or 253 | through arrangements made by) any one entity. If the Document already 254 | includes a cover text for the same cover, previously added by you or 255 | by arrangement made by the same entity you are acting on behalf of, 256 | you may not add another; but you may replace the old one, on explicit 257 | permission from the previous publisher that added the old one. 258 | 259 | The author(s) and publisher(s) of the Document do not by this License 260 | give permission to use their names for publicity for or to assert or 261 | imply endorsement of any Modified Version. 262 | 263 | 264 | 5. COMBINING DOCUMENTS 265 | 266 | You may combine the Document with other documents released under this 267 | License, under the terms defined in section 4 above for modified 268 | versions, provided that you include in the combination all of the 269 | Invariant Sections of all of the original documents, unmodified, and 270 | list them all as Invariant Sections of your combined work in its 271 | license notice, and that you preserve all their Warranty Disclaimers. 272 | 273 | The combined work need only contain one copy of this License, and 274 | multiple identical Invariant Sections may be replaced with a single 275 | copy. If there are multiple Invariant Sections with the same name but 276 | different contents, make the title of each such section unique by 277 | adding at the end of it, in parentheses, the name of the original 278 | author or publisher of that section if known, or else a unique number. 279 | Make the same adjustment to the section titles in the list of 280 | Invariant Sections in the license notice of the combined work. 281 | 282 | In the combination, you must combine any sections Entitled "History" 283 | in the various original documents, forming one section Entitled 284 | "History"; likewise combine any sections Entitled "Acknowledgements", 285 | and any sections Entitled "Dedications". You must delete all sections 286 | Entitled "Endorsements". 287 | 288 | 289 | 6. COLLECTIONS OF DOCUMENTS 290 | 291 | You may make a collection consisting of the Document and other 292 | documents released under this License, and replace the individual 293 | copies of this License in the various documents with a single copy 294 | that is included in the collection, provided that you follow the rules 295 | of this License for verbatim copying of each of the documents in all 296 | other respects. 297 | 298 | You may extract a single document from such a collection, and 299 | distribute it individually under this License, provided you insert a 300 | copy of this License into the extracted document, and follow this 301 | License in all other respects regarding verbatim copying of that 302 | document. 303 | 304 | 305 | 7. AGGREGATION WITH INDEPENDENT WORKS 306 | 307 | A compilation of the Document or its derivatives with other separate 308 | and independent documents or works, in or on a volume of a storage or 309 | distribution medium, is called an "aggregate" if the copyright 310 | resulting from the compilation is not used to limit the legal rights 311 | of the compilation's users beyond what the individual works permit. 312 | When the Document is included in an aggregate, this License does not 313 | apply to the other works in the aggregate which are not themselves 314 | derivative works of the Document. 315 | 316 | If the Cover Text requirement of section 3 is applicable to these 317 | copies of the Document, then if the Document is less than one half of 318 | the entire aggregate, the Document's Cover Texts may be placed on 319 | covers that bracket the Document within the aggregate, or the 320 | electronic equivalent of covers if the Document is in electronic form. 321 | Otherwise they must appear on printed covers that bracket the whole 322 | aggregate. 323 | 324 | 325 | 8. TRANSLATION 326 | 327 | Translation is considered a kind of modification, so you may 328 | distribute translations of the Document under the terms of section 4. 329 | Replacing Invariant Sections with translations requires special 330 | permission from their copyright holders, but you may include 331 | translations of some or all Invariant Sections in addition to the 332 | original versions of these Invariant Sections. You may include a 333 | translation of this License, and all the license notices in the 334 | Document, and any Warranty Disclaimers, provided that you also include 335 | the original English version of this License and the original versions 336 | of those notices and disclaimers. In case of a disagreement between 337 | the translation and the original version of this License or a notice 338 | or disclaimer, the original version will prevail. 339 | 340 | If a section in the Document is Entitled "Acknowledgements", 341 | "Dedications", or "History", the requirement (section 4) to Preserve 342 | its Title (section 1) will typically require changing the actual 343 | title. 344 | 345 | 346 | 9. TERMINATION 347 | 348 | You may not copy, modify, sublicense, or distribute the Document 349 | except as expressly provided under this License. Any attempt 350 | otherwise to copy, modify, sublicense, or distribute it is void, and 351 | will automatically terminate your rights under this License. 352 | 353 | However, if you cease all violation of this License, then your license 354 | from a particular copyright holder is reinstated (a) provisionally, 355 | unless and until the copyright holder explicitly and finally 356 | terminates your license, and (b) permanently, if the copyright holder 357 | fails to notify you of the violation by some reasonable means prior to 358 | 60 days after the cessation. 359 | 360 | Moreover, your license from a particular copyright holder is 361 | reinstated permanently if the copyright holder notifies you of the 362 | violation by some reasonable means, this is the first time you have 363 | received notice of violation of this License (for any work) from that 364 | copyright holder, and you cure the violation prior to 30 days after 365 | your receipt of the notice. 366 | 367 | Termination of your rights under this section does not terminate the 368 | licenses of parties who have received copies or rights from you under 369 | this License. If your rights have been terminated and not permanently 370 | reinstated, receipt of a copy of some or all of the same material does 371 | not give you any rights to use it. 372 | 373 | 374 | 10. FUTURE REVISIONS OF THIS LICENSE 375 | 376 | The Free Software Foundation may publish new, revised versions of the 377 | GNU Free Documentation License from time to time. Such new versions 378 | will be similar in spirit to the present version, but may differ in 379 | detail to address new problems or concerns. See 380 | http://www.gnu.org/copyleft/. 381 | 382 | Each version of the License is given a distinguishing version number. 383 | If the Document specifies that a particular numbered version of this 384 | License "or any later version" applies to it, you have the option of 385 | following the terms and conditions either of that specified version or 386 | of any later version that has been published (not as a draft) by the 387 | Free Software Foundation. If the Document does not specify a version 388 | number of this License, you may choose any version ever published (not 389 | as a draft) by the Free Software Foundation. If the Document 390 | specifies that a proxy can decide which future versions of this 391 | License can be used, that proxy's public statement of acceptance of a 392 | version permanently authorizes you to choose that version for the 393 | Document. 394 | 395 | 11. RELICENSING 396 | 397 | "Massive Multiauthor Collaboration Site" (or "MMC Site") means any 398 | World Wide Web server that publishes copyrightable works and also 399 | provides prominent facilities for anybody to edit those works. A 400 | public wiki that anybody can edit is an example of such a server. A 401 | "Massive Multiauthor Collaboration" (or "MMC") contained in the site 402 | means any set of copyrightable works thus published on the MMC site. 403 | 404 | "CC-BY-SA" means the Creative Commons Attribution-Share Alike 3.0 405 | license published by Creative Commons Corporation, a not-for-profit 406 | corporation with a principal place of business in San Francisco, 407 | California, as well as future copyleft versions of that license 408 | published by that same organization. 409 | 410 | "Incorporate" means to publish or republish a Document, in whole or in 411 | part, as part of another Document. 412 | 413 | An MMC is "eligible for relicensing" if it is licensed under this 414 | License, and if all works that were first published under this License 415 | somewhere other than this MMC, and subsequently incorporated in whole or 416 | in part into the MMC, (1) had no cover texts or invariant sections, and 417 | (2) were thus incorporated prior to November 1, 2008. 418 | 419 | The operator of an MMC Site may republish an MMC contained in the site 420 | under CC-BY-SA on the same site at any time before August 1, 2009, 421 | provided the MMC is eligible for relicensing. 422 | 423 | 424 | ADDENDUM: How to use this License for your documents 425 | 426 | To use this License in a document you have written, include a copy of 427 | the License in the document and put the following copyright and 428 | license notices just after the title page: 429 | 430 | Copyright (c) YEAR YOUR NAME. 431 | Permission is granted to copy, distribute and/or modify this document 432 | under the terms of the GNU Free Documentation License, Version 1.3 433 | or any later version published by the Free Software Foundation; 434 | with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. 435 | A copy of the license is included in the section entitled "GNU 436 | Free Documentation License". 437 | 438 | If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts, 439 | replace the "with...Texts." line with this: 440 | 441 | with the Invariant Sections being LIST THEIR TITLES, with the 442 | Front-Cover Texts being LIST, and with the Back-Cover Texts being LIST. 443 | 444 | If you have Invariant Sections without Cover Texts, or some other 445 | combination of the three, merge those two alternatives to suit the 446 | situation. 447 | 448 | If your document contains nontrivial examples of program code, we 449 | recommend releasing these examples in parallel under your choice of 450 | free software license, such as the GNU General Public License, 451 | to permit their use in free software. 452 | --------------------------------------------------------------------------------