├── .gitignore ├── LICENSE ├── README.md └── peg ├── cover.out ├── language.go ├── language_test.go ├── lexer.go ├── lexer_test.go ├── parse_tree.go ├── parse_tree_test.go ├── parser.go ├── parser_test.go ├── source.go └── source_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Logiraptor 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | chicken 2 | ========= 3 | 4 | A set of tools for writing programming languages in go. Current work is on a PEG parser which will be eventually used to generate itself. 5 | 6 | The PEG Parser: 7 | ====== 8 | 9 | NOTE: The current implementation is missing several necessary features to be able to support actual languages. 10 | 11 | ### Install: 12 | go get github.com/Logiraptor/chicken/peg 13 | 14 | 15 | ### Defining your language: 16 | The language is expressed as a parsing expression grammar. Rules are expressed like so: 17 | 18 | rule <- partA partB 19 | partA <- 'a' 20 | partB <- ~'\\d+' 21 | ruleA <- partA* 22 | ruleB <- partA+ 23 | ruleC <- partA? 24 | ruleD <- partA / partB 25 | 26 | partA above is a string literal. 27 | partB above is defined to recognize a regular expression denoted with a `~` before the quoted regexp. 28 | 29 | The library takes a peg description like above, and generates a state machine which will both lex and parse a given input into a parse tree. The Parser can and should be generated only once and reused on multiple input strings. 30 | 31 | ### Planned: 32 | The following have yet to be implemented. 33 | 34 | # Comments 35 | -------------------------------------------------------------------------------- /peg/cover.out: -------------------------------------------------------------------------------- 1 | mode: set 2 | github.com/Logiraptor/chickenVM/peg/language.go:19.49,21.39 2 0 3 | github.com/Logiraptor/chickenVM/peg/language.go:24.2,24.10 1 0 4 | github.com/Logiraptor/chickenVM/peg/language.go:21.39,23.3 1 0 5 | github.com/Logiraptor/chickenVM/peg/language.go:27.34,29.2 1 0 6 | github.com/Logiraptor/chickenVM/peg/language.go:37.67,39.2 1 1 7 | github.com/Logiraptor/chickenVM/peg/language.go:42.64,44.16 2 1 8 | github.com/Logiraptor/chickenVM/peg/language.go:47.2,48.18 2 1 9 | github.com/Logiraptor/chickenVM/peg/language.go:44.16,46.3 1 0 10 | github.com/Logiraptor/chickenVM/peg/language.go:51.49,55.60 2 1 11 | github.com/Logiraptor/chickenVM/peg/language.go:55.60,57.20 2 1 12 | github.com/Logiraptor/chickenVM/peg/language.go:57.20,60.33 3 1 13 | github.com/Logiraptor/chickenVM/peg/language.go:64.5,64.113 1 1 14 | github.com/Logiraptor/chickenVM/peg/language.go:60.33,62.6 1 1 15 | github.com/Logiraptor/chickenVM/peg/language.go:65.6,70.5 1 1 16 | github.com/Logiraptor/chickenVM/peg/language.go:75.63,78.60 1 1 17 | github.com/Logiraptor/chickenVM/peg/language.go:78.60,80.20 2 1 18 | github.com/Logiraptor/chickenVM/peg/language.go:80.20,83.33 3 1 19 | github.com/Logiraptor/chickenVM/peg/language.go:87.5,87.126 1 1 20 | github.com/Logiraptor/chickenVM/peg/language.go:83.33,85.6 1 1 21 | github.com/Logiraptor/chickenVM/peg/language.go:88.6,93.5 1 1 22 | github.com/Logiraptor/chickenVM/peg/language.go:98.40,103.2 1 1 23 | github.com/Logiraptor/chickenVM/peg/language.go:105.58,109.60 1 1 24 | github.com/Logiraptor/chickenVM/peg/language.go:109.60,112.29 3 1 25 | github.com/Logiraptor/chickenVM/peg/language.go:123.4,123.26 1 1 26 | github.com/Logiraptor/chickenVM/peg/language.go:126.4,126.77 1 1 27 | github.com/Logiraptor/chickenVM/peg/language.go:112.29,114.19 2 1 28 | github.com/Logiraptor/chickenVM/peg/language.go:114.19,116.6 1 1 29 | github.com/Logiraptor/chickenVM/peg/language.go:116.7,117.21 1 1 30 | github.com/Logiraptor/chickenVM/peg/language.go:120.6,120.17 1 1 31 | github.com/Logiraptor/chickenVM/peg/language.go:117.21,119.7 1 1 32 | github.com/Logiraptor/chickenVM/peg/language.go:123.26,125.5 1 1 33 | github.com/Logiraptor/chickenVM/peg/language.go:131.42,135.60 1 1 34 | github.com/Logiraptor/chickenVM/peg/language.go:135.60,139.18 4 1 35 | github.com/Logiraptor/chickenVM/peg/language.go:154.4,154.33 1 1 36 | github.com/Logiraptor/chickenVM/peg/language.go:139.18,141.5 1 0 37 | github.com/Logiraptor/chickenVM/peg/language.go:141.6,144.9 3 1 38 | github.com/Logiraptor/chickenVM/peg/language.go:144.9,146.20 2 1 39 | github.com/Logiraptor/chickenVM/peg/language.go:149.6,150.16 2 1 40 | github.com/Logiraptor/chickenVM/peg/language.go:146.20,147.12 1 1 41 | github.com/Logiraptor/chickenVM/peg/language.go:159.42,163.60 1 1 42 | github.com/Logiraptor/chickenVM/peg/language.go:163.60,169.8 6 1 43 | github.com/Logiraptor/chickenVM/peg/language.go:177.4,177.33 1 1 44 | github.com/Logiraptor/chickenVM/peg/language.go:169.8,171.19 2 1 45 | github.com/Logiraptor/chickenVM/peg/language.go:174.5,175.15 2 1 46 | github.com/Logiraptor/chickenVM/peg/language.go:171.19,172.11 1 1 47 | github.com/Logiraptor/chickenVM/peg/language.go:182.44,186.60 1 1 48 | github.com/Logiraptor/chickenVM/peg/language.go:186.60,189.4 2 1 49 | github.com/Logiraptor/chickenVM/peg/language.go:193.63,197.60 1 1 50 | github.com/Logiraptor/chickenVM/peg/language.go:197.60,199.18 2 1 51 | github.com/Logiraptor/chickenVM/peg/language.go:199.18,201.5 1 1 52 | github.com/Logiraptor/chickenVM/peg/language.go:201.6,203.19 2 1 53 | github.com/Logiraptor/chickenVM/peg/language.go:206.5,206.26 1 1 54 | github.com/Logiraptor/chickenVM/peg/language.go:203.19,205.6 1 1 55 | github.com/Logiraptor/chickenVM/peg/language.go:212.43,216.60 1 1 56 | github.com/Logiraptor/chickenVM/peg/language.go:216.60,219.4 2 1 57 | github.com/Logiraptor/chickenVM/peg/lexer.go:19.31,20.9 1 0 58 | github.com/Logiraptor/chickenVM/peg/lexer.go:26.2,26.43 1 0 59 | github.com/Logiraptor/chickenVM/peg/lexer.go:21.2,22.15 1 0 60 | github.com/Logiraptor/chickenVM/peg/lexer.go:23.2,24.15 1 0 61 | github.com/Logiraptor/chickenVM/peg/lexer.go:49.35,50.11 1 0 62 | github.com/Logiraptor/chickenVM/peg/lexer.go:80.2,80.18 1 0 63 | github.com/Logiraptor/chickenVM/peg/lexer.go:51.2,52.21 1 0 64 | github.com/Logiraptor/chickenVM/peg/lexer.go:53.2,54.26 1 0 65 | github.com/Logiraptor/chickenVM/peg/lexer.go:55.2,56.21 1 0 66 | github.com/Logiraptor/chickenVM/peg/lexer.go:57.2,58.23 1 0 67 | github.com/Logiraptor/chickenVM/peg/lexer.go:59.2,60.26 1 0 68 | github.com/Logiraptor/chickenVM/peg/lexer.go:61.2,62.23 1 0 69 | github.com/Logiraptor/chickenVM/peg/lexer.go:63.2,64.26 1 0 70 | github.com/Logiraptor/chickenVM/peg/lexer.go:65.2,66.22 1 0 71 | github.com/Logiraptor/chickenVM/peg/lexer.go:67.2,68.19 1 0 72 | github.com/Logiraptor/chickenVM/peg/lexer.go:69.2,70.23 1 0 73 | github.com/Logiraptor/chickenVM/peg/lexer.go:71.2,72.20 1 0 74 | github.com/Logiraptor/chickenVM/peg/lexer.go:73.2,74.25 1 0 75 | github.com/Logiraptor/chickenVM/peg/lexer.go:75.2,76.24 1 0 76 | github.com/Logiraptor/chickenVM/peg/lexer.go:77.2,78.23 1 0 77 | github.com/Logiraptor/chickenVM/peg/lexer.go:96.33,99.2 2 0 78 | github.com/Logiraptor/chickenVM/peg/lexer.go:101.34,108.2 3 1 79 | github.com/Logiraptor/chickenVM/peg/lexer.go:110.23,111.40 1 1 80 | github.com/Logiraptor/chickenVM/peg/lexer.go:114.2,114.16 1 1 81 | github.com/Logiraptor/chickenVM/peg/lexer.go:111.40,113.3 1 1 82 | github.com/Logiraptor/chickenVM/peg/lexer.go:117.29,119.19 2 1 83 | github.com/Logiraptor/chickenVM/peg/lexer.go:122.2,124.10 3 1 84 | github.com/Logiraptor/chickenVM/peg/lexer.go:119.19,121.3 1 0 85 | github.com/Logiraptor/chickenVM/peg/lexer.go:127.29,129.19 2 1 86 | github.com/Logiraptor/chickenVM/peg/lexer.go:136.2,137.19 2 1 87 | github.com/Logiraptor/chickenVM/peg/lexer.go:143.2,144.10 2 1 88 | github.com/Logiraptor/chickenVM/peg/lexer.go:129.19,131.3 1 1 89 | github.com/Logiraptor/chickenVM/peg/lexer.go:131.4,131.23 1 1 90 | github.com/Logiraptor/chickenVM/peg/lexer.go:131.23,134.3 2 0 91 | github.com/Logiraptor/chickenVM/peg/lexer.go:137.19,139.3 1 0 92 | github.com/Logiraptor/chickenVM/peg/lexer.go:139.4,139.23 1 1 93 | github.com/Logiraptor/chickenVM/peg/lexer.go:139.23,142.3 2 0 94 | github.com/Logiraptor/chickenVM/peg/lexer.go:147.29,148.17 1 1 95 | github.com/Logiraptor/chickenVM/peg/lexer.go:148.17,150.3 1 1 96 | github.com/Logiraptor/chickenVM/peg/lexer.go:150.4,150.24 1 0 97 | github.com/Logiraptor/chickenVM/peg/lexer.go:150.24,152.3 1 0 98 | github.com/Logiraptor/chickenVM/peg/lexer.go:152.4,152.24 1 0 99 | github.com/Logiraptor/chickenVM/peg/lexer.go:152.24,154.3 1 0 100 | github.com/Logiraptor/chickenVM/peg/lexer.go:154.4,156.3 1 0 101 | github.com/Logiraptor/chickenVM/peg/lexer.go:159.34,161.2 1 1 102 | github.com/Logiraptor/chickenVM/peg/lexer.go:166.56,171.2 4 1 103 | github.com/Logiraptor/chickenVM/peg/lexer.go:173.43,174.45 1 0 104 | github.com/Logiraptor/chickenVM/peg/lexer.go:178.2,178.14 1 0 105 | github.com/Logiraptor/chickenVM/peg/lexer.go:174.45,177.3 2 0 106 | github.com/Logiraptor/chickenVM/peg/lexer.go:181.41,182.46 1 0 107 | github.com/Logiraptor/chickenVM/peg/lexer.go:182.46,184.3 1 0 108 | github.com/Logiraptor/chickenVM/peg/lexer.go:187.68,190.2 2 0 109 | github.com/Logiraptor/chickenVM/peg/lexer.go:192.47,194.19 2 0 110 | github.com/Logiraptor/chickenVM/peg/lexer.go:200.2,200.28 1 0 111 | github.com/Logiraptor/chickenVM/peg/lexer.go:194.19,196.3 1 0 112 | github.com/Logiraptor/chickenVM/peg/lexer.go:196.4,196.23 1 0 113 | github.com/Logiraptor/chickenVM/peg/lexer.go:196.23,199.3 2 0 114 | github.com/Logiraptor/chickenVM/peg/lexer.go:204.42,205.29 1 0 115 | github.com/Logiraptor/chickenVM/peg/lexer.go:205.29,207.3 1 0 116 | github.com/Logiraptor/chickenVM/peg/lexer.go:210.31,212.2 1 1 117 | github.com/Logiraptor/chickenVM/peg/lexer.go:214.31,215.24 1 1 118 | github.com/Logiraptor/chickenVM/peg/lexer.go:243.2,243.12 1 0 119 | github.com/Logiraptor/chickenVM/peg/lexer.go:216.2,217.23 1 1 120 | github.com/Logiraptor/chickenVM/peg/lexer.go:218.2,219.23 1 1 121 | github.com/Logiraptor/chickenVM/peg/lexer.go:220.2,221.20 1 1 122 | github.com/Logiraptor/chickenVM/peg/lexer.go:222.2,223.23 1 1 123 | github.com/Logiraptor/chickenVM/peg/lexer.go:224.2,225.20 1 1 124 | github.com/Logiraptor/chickenVM/peg/lexer.go:226.2,227.18 1 1 125 | github.com/Logiraptor/chickenVM/peg/lexer.go:228.2,229.20 1 1 126 | github.com/Logiraptor/chickenVM/peg/lexer.go:230.2,231.17 1 1 127 | github.com/Logiraptor/chickenVM/peg/lexer.go:232.2,233.22 1 1 128 | github.com/Logiraptor/chickenVM/peg/lexer.go:234.2,235.19 1 1 129 | github.com/Logiraptor/chickenVM/peg/lexer.go:236.2,237.20 1 1 130 | github.com/Logiraptor/chickenVM/peg/lexer.go:238.2,240.13 2 1 131 | github.com/Logiraptor/chickenVM/peg/lexer.go:246.32,250.2 3 1 132 | github.com/Logiraptor/chickenVM/peg/lexer.go:252.37,256.2 3 1 133 | github.com/Logiraptor/chickenVM/peg/lexer.go:258.34,262.2 3 1 134 | github.com/Logiraptor/chickenVM/peg/lexer.go:264.35,268.2 3 1 135 | github.com/Logiraptor/chickenVM/peg/lexer.go:270.35,274.2 3 1 136 | github.com/Logiraptor/chickenVM/peg/lexer.go:276.38,277.28 1 1 137 | github.com/Logiraptor/chickenVM/peg/lexer.go:280.2,281.15 2 1 138 | github.com/Logiraptor/chickenVM/peg/lexer.go:277.28,279.3 1 1 139 | github.com/Logiraptor/chickenVM/peg/lexer.go:284.38,285.6 1 1 140 | github.com/Logiraptor/chickenVM/peg/lexer.go:293.2,294.15 2 1 141 | github.com/Logiraptor/chickenVM/peg/lexer.go:285.6,287.38 2 1 142 | github.com/Logiraptor/chickenVM/peg/lexer.go:287.38,289.4 1 1 143 | github.com/Logiraptor/chickenVM/peg/lexer.go:289.5,290.9 1 1 144 | github.com/Logiraptor/chickenVM/peg/lexer.go:297.35,301.2 3 1 145 | github.com/Logiraptor/chickenVM/peg/lexer.go:303.38,305.21 2 1 146 | github.com/Logiraptor/chickenVM/peg/lexer.go:311.2,311.15 1 1 147 | github.com/Logiraptor/chickenVM/peg/lexer.go:305.21,308.3 2 0 148 | github.com/Logiraptor/chickenVM/peg/lexer.go:308.4,310.3 1 1 149 | github.com/Logiraptor/chickenVM/peg/lexer.go:314.35,317.6 2 1 150 | github.com/Logiraptor/chickenVM/peg/lexer.go:317.6,319.36 2 1 151 | github.com/Logiraptor/chickenVM/peg/lexer.go:319.36,321.4 1 1 152 | github.com/Logiraptor/chickenVM/peg/lexer.go:321.5,321.23 1 1 153 | github.com/Logiraptor/chickenVM/peg/lexer.go:321.23,324.4 2 1 154 | github.com/Logiraptor/chickenVM/peg/lexer.go:324.5,324.22 1 1 155 | github.com/Logiraptor/chickenVM/peg/lexer.go:324.22,327.4 2 0 156 | github.com/Logiraptor/chickenVM/peg/lexer.go:331.33,334.22 2 1 157 | github.com/Logiraptor/chickenVM/peg/lexer.go:341.2,341.6 1 1 158 | github.com/Logiraptor/chickenVM/peg/lexer.go:334.22,337.3 2 0 159 | github.com/Logiraptor/chickenVM/peg/lexer.go:337.4,339.3 1 1 160 | github.com/Logiraptor/chickenVM/peg/lexer.go:341.6,343.36 2 1 161 | github.com/Logiraptor/chickenVM/peg/lexer.go:343.36,345.4 1 0 162 | github.com/Logiraptor/chickenVM/peg/lexer.go:345.5,345.23 1 1 163 | github.com/Logiraptor/chickenVM/peg/lexer.go:345.23,348.4 2 1 164 | github.com/Logiraptor/chickenVM/peg/lexer.go:348.5,348.22 1 1 165 | github.com/Logiraptor/chickenVM/peg/lexer.go:348.22,351.4 2 0 166 | github.com/Logiraptor/chickenVM/peg/parse_tree.go:13.55,16.35 3 0 167 | github.com/Logiraptor/chickenVM/peg/parse_tree.go:19.2,19.13 1 0 168 | github.com/Logiraptor/chickenVM/peg/parse_tree.go:16.35,18.3 1 0 169 | github.com/Logiraptor/chickenVM/peg/parse_tree.go:22.37,24.2 1 0 170 | github.com/Logiraptor/chickenVM/peg/parser.go:20.52,24.2 3 1 171 | github.com/Logiraptor/chickenVM/peg/parser.go:26.61,29.2 2 0 172 | github.com/Logiraptor/chickenVM/peg/parser.go:31.47,37.45 5 1 173 | github.com/Logiraptor/chickenVM/peg/parser.go:41.2,43.22 2 1 174 | github.com/Logiraptor/chickenVM/peg/parser.go:47.2,47.9 1 1 175 | github.com/Logiraptor/chickenVM/peg/parser.go:37.45,39.3 1 1 176 | github.com/Logiraptor/chickenVM/peg/parser.go:43.22,45.3 1 0 177 | github.com/Logiraptor/chickenVM/peg/parser.go:48.2,49.19 1 1 178 | github.com/Logiraptor/chickenVM/peg/parser.go:50.2,51.18 1 0 179 | github.com/Logiraptor/chickenVM/peg/parser.go:55.88,58.9 3 1 180 | github.com/Logiraptor/chickenVM/peg/parser.go:62.2,63.26 2 1 181 | github.com/Logiraptor/chickenVM/peg/parser.go:67.2,68.16 2 1 182 | github.com/Logiraptor/chickenVM/peg/parser.go:58.9,61.3 2 0 183 | github.com/Logiraptor/chickenVM/peg/parser.go:63.26,65.3 1 1 184 | github.com/Logiraptor/chickenVM/peg/parser.go:68.16,71.3 2 0 185 | github.com/Logiraptor/chickenVM/peg/parser.go:71.4,76.3 2 1 186 | github.com/Logiraptor/chickenVM/peg/parser.go:79.80,80.20 1 1 187 | github.com/Logiraptor/chickenVM/peg/parser.go:83.2,84.22 2 1 188 | github.com/Logiraptor/chickenVM/peg/parser.go:92.2,94.39 2 1 189 | github.com/Logiraptor/chickenVM/peg/parser.go:102.2,104.17 2 1 190 | github.com/Logiraptor/chickenVM/peg/parser.go:80.20,82.3 1 1 191 | github.com/Logiraptor/chickenVM/peg/parser.go:84.22,86.10 2 1 192 | github.com/Logiraptor/chickenVM/peg/parser.go:86.10,88.4 1 0 193 | github.com/Logiraptor/chickenVM/peg/parser.go:88.5,90.4 1 1 194 | github.com/Logiraptor/chickenVM/peg/parser.go:94.39,97.17 3 1 195 | github.com/Logiraptor/chickenVM/peg/parser.go:97.17,99.4 1 0 196 | github.com/Logiraptor/chickenVM/peg/parser.go:107.42,109.9 2 1 197 | github.com/Logiraptor/chickenVM/peg/parser.go:112.2,112.18 1 1 198 | github.com/Logiraptor/chickenVM/peg/parser.go:122.2,122.12 1 0 199 | github.com/Logiraptor/chickenVM/peg/parser.go:109.9,111.3 1 1 200 | github.com/Logiraptor/chickenVM/peg/parser.go:113.2,114.29 1 1 201 | github.com/Logiraptor/chickenVM/peg/parser.go:115.2,116.21 1 1 202 | github.com/Logiraptor/chickenVM/peg/parser.go:117.2,118.43 1 0 203 | github.com/Logiraptor/chickenVM/peg/parser.go:119.2,120.27 1 0 204 | github.com/Logiraptor/chickenVM/peg/parser.go:125.42,126.38 1 1 205 | github.com/Logiraptor/chickenVM/peg/parser.go:126.38,128.10 2 1 206 | github.com/Logiraptor/chickenVM/peg/parser.go:132.3,132.19 1 1 207 | github.com/Logiraptor/chickenVM/peg/parser.go:138.3,138.13 1 0 208 | github.com/Logiraptor/chickenVM/peg/parser.go:128.10,131.4 2 0 209 | github.com/Logiraptor/chickenVM/peg/parser.go:133.3,134.26 1 1 210 | github.com/Logiraptor/chickenVM/peg/parser.go:135.3,136.35 1 1 211 | github.com/Logiraptor/chickenVM/peg/parser.go:142.63,144.38 2 1 212 | github.com/Logiraptor/chickenVM/peg/parser.go:144.38,146.10 2 1 213 | github.com/Logiraptor/chickenVM/peg/parser.go:150.3,150.19 1 1 214 | github.com/Logiraptor/chickenVM/peg/parser.go:208.3,208.13 1 0 215 | github.com/Logiraptor/chickenVM/peg/parser.go:146.10,149.4 2 0 216 | github.com/Logiraptor/chickenVM/peg/parser.go:151.3,152.37 1 1 217 | github.com/Logiraptor/chickenVM/peg/parser.go:153.3,155.78 2 1 218 | github.com/Logiraptor/chickenVM/peg/parser.go:156.3,157.97 1 1 219 | github.com/Logiraptor/chickenVM/peg/parser.go:158.3,159.69 1 1 220 | github.com/Logiraptor/chickenVM/peg/parser.go:160.3,161.23 1 1 221 | github.com/Logiraptor/chickenVM/peg/parser.go:165.4,167.66 3 1 222 | github.com/Logiraptor/chickenVM/peg/parser.go:168.3,169.23 1 1 223 | github.com/Logiraptor/chickenVM/peg/parser.go:173.4,175.66 3 1 224 | github.com/Logiraptor/chickenVM/peg/parser.go:176.3,177.23 1 1 225 | github.com/Logiraptor/chickenVM/peg/parser.go:181.4,183.68 3 1 226 | github.com/Logiraptor/chickenVM/peg/parser.go:184.3,185.23 1 1 227 | github.com/Logiraptor/chickenVM/peg/parser.go:189.4,191.67 3 1 228 | github.com/Logiraptor/chickenVM/peg/parser.go:192.3,193.41 1 1 229 | github.com/Logiraptor/chickenVM/peg/parser.go:195.3,196.23 1 1 230 | github.com/Logiraptor/chickenVM/peg/parser.go:203.4,203.22 1 1 231 | github.com/Logiraptor/chickenVM/peg/parser.go:204.3,206.14 2 0 232 | github.com/Logiraptor/chickenVM/peg/parser.go:161.23,164.5 2 0 233 | github.com/Logiraptor/chickenVM/peg/parser.go:169.23,172.5 2 0 234 | github.com/Logiraptor/chickenVM/peg/parser.go:177.23,180.5 2 0 235 | github.com/Logiraptor/chickenVM/peg/parser.go:185.23,188.5 2 0 236 | github.com/Logiraptor/chickenVM/peg/parser.go:196.23,198.5 1 0 237 | github.com/Logiraptor/chickenVM/peg/parser.go:198.6,198.30 1 1 238 | github.com/Logiraptor/chickenVM/peg/parser.go:198.30,200.5 1 1 239 | github.com/Logiraptor/chickenVM/peg/parser.go:200.6,202.5 1 1 240 | github.com/Logiraptor/chickenVM/peg/parser.go:212.67,213.38 1 1 241 | github.com/Logiraptor/chickenVM/peg/parser.go:213.38,215.10 2 1 242 | github.com/Logiraptor/chickenVM/peg/parser.go:219.3,220.19 2 1 243 | github.com/Logiraptor/chickenVM/peg/parser.go:234.3,237.79 3 1 244 | github.com/Logiraptor/chickenVM/peg/parser.go:215.10,218.4 2 0 245 | github.com/Logiraptor/chickenVM/peg/parser.go:221.3,222.41 1 1 246 | github.com/Logiraptor/chickenVM/peg/parser.go:223.3,224.41 1 0 247 | github.com/Logiraptor/chickenVM/peg/parser.go:225.3,226.60 1 0 248 | github.com/Logiraptor/chickenVM/peg/parser.go:227.3,228.32 1 1 249 | github.com/Logiraptor/chickenVM/peg/parser.go:229.3,231.14 2 0 250 | github.com/Logiraptor/chickenVM/peg/source.go:14.47,16.16 2 1 251 | github.com/Logiraptor/chickenVM/peg/source.go:19.2,21.8 1 1 252 | github.com/Logiraptor/chickenVM/peg/source.go:16.16,18.3 1 0 253 | github.com/Logiraptor/chickenVM/peg/source.go:27.64,29.16 2 1 254 | github.com/Logiraptor/chickenVM/peg/source.go:33.2,33.17 1 1 255 | github.com/Logiraptor/chickenVM/peg/source.go:37.2,37.12 1 1 256 | github.com/Logiraptor/chickenVM/peg/source.go:29.16,31.3 1 1 257 | github.com/Logiraptor/chickenVM/peg/source.go:33.17,35.3 1 1 258 | github.com/Logiraptor/chickenVM/peg/source.go:42.63,43.23 1 1 259 | github.com/Logiraptor/chickenVM/peg/source.go:46.2,46.41 1 1 260 | github.com/Logiraptor/chickenVM/peg/source.go:49.2,49.12 1 1 261 | github.com/Logiraptor/chickenVM/peg/source.go:43.23,45.3 1 1 262 | github.com/Logiraptor/chickenVM/peg/source.go:46.41,48.3 1 1 263 | -------------------------------------------------------------------------------- /peg/language.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io" 7 | "regexp" 8 | "strings" 9 | ) 10 | 11 | type Lexeme struct { 12 | Name string 13 | Dependencies []*Lexeme 14 | isResolved bool // whether the deps are resolved. 15 | // Lexer returns the parse tree, an error and the number of input bytes consumed. 16 | Lexer func(*Source, int) (*ParseTree, error, int) 17 | } 18 | 19 | func (l *Lexeme) dumpTree(indent string) string { 20 | s := fmt.Sprintln(indent, l.Name, l.isResolved) 21 | for _, child := range l.Dependencies { 22 | s += child.dumpTree(indent + " ") 23 | } 24 | return s 25 | } 26 | 27 | func (l *Lexeme) String() string { 28 | return l.dumpTree("") 29 | } 30 | 31 | // Language defines lexing and parsing capabilities for a peg defined language. 32 | type Language struct { 33 | root *Lexeme 34 | } 35 | 36 | // ParseString is identical to Parse, but operates on string input. 37 | func (l *Language) ParseString(source string) (*ParseTree, error) { 38 | return l.Parse(strings.NewReader(source)) 39 | } 40 | 41 | // Parse attemps to turn the input reader into a valid parse tree. 42 | func (l *Language) Parse(source io.Reader) (*ParseTree, error) { 43 | s, err := NewSource(source) 44 | if err != nil { 45 | return nil, err 46 | } 47 | tree, err, _ := l.root.Lexer(s, 0) 48 | return tree, err 49 | } 50 | 51 | func NewLiteralLexer(typ, valid string) *Lexeme { 52 | vbytes := []byte(valid) 53 | return &Lexeme{ 54 | Name: typ, 55 | Lexer: func(s *Source, pos int) (*ParseTree, error, int) { 56 | match := s.ConsumeLiteral(vbytes, pos) 57 | if match == nil { 58 | neighborhood := pos 59 | neighborEnd := pos + 10 60 | if neighborEnd > len(s.buf) { 61 | neighborEnd = len(s.buf) 62 | } 63 | 64 | return nil, errors.New(fmt.Sprintf("expected literal: %q at %q", valid, s.buf[neighborhood:neighborEnd])), 0 65 | } else { 66 | return &ParseTree{ 67 | Type: typ, 68 | Data: vbytes, 69 | }, nil, len(match) 70 | } 71 | }, 72 | } 73 | } 74 | 75 | func NewRegexpLexer(typ string, valid *regexp.Regexp) *Lexeme { 76 | return &Lexeme{ 77 | Name: typ, 78 | Lexer: func(s *Source, pos int) (*ParseTree, error, int) { 79 | match := s.Consume(valid, pos) 80 | if match == nil { 81 | neighborhood := pos 82 | neighborEnd := pos + 10 83 | if neighborEnd > len(s.buf) { 84 | neighborEnd = len(s.buf) 85 | } 86 | 87 | return nil, errors.New(fmt.Sprintf("expected regex match: %q at %q", valid.String(), s.buf[neighborhood:neighborEnd])), 0 88 | } else { 89 | return &ParseTree{ 90 | Type: typ, 91 | Data: match, 92 | }, nil, len(match) 93 | } 94 | }, 95 | } 96 | } 97 | 98 | func NewRuleLexer(rule string) *Lexeme { 99 | return &Lexeme{ 100 | Name: "~" + rule, 101 | Lexer: nil, 102 | } 103 | } 104 | 105 | func NewConcatLexer(name string, deps []*Lexeme) *Lexeme { 106 | return &Lexeme{ 107 | Name: name, 108 | Dependencies: deps, 109 | Lexer: func(s *Source, pos int) (*ParseTree, error, int) { 110 | children := make([]*ParseTree, 0, len(deps)) 111 | offset := 0 112 | for _, dep := range deps { 113 | tree, err, l := dep.Lexer(s, pos+offset) 114 | if err != nil { 115 | return nil, err, 0 116 | } else { 117 | if tree != nil { 118 | children = append(children, tree) 119 | } 120 | offset += l 121 | } 122 | } 123 | if len(children) == 1 { 124 | return children[0], nil, offset 125 | } 126 | return &ParseTree{Type: name, Data: nil, Children: children}, nil, offset 127 | }, 128 | } 129 | } 130 | 131 | func NewPlusClosure(lex *Lexeme) *Lexeme { 132 | return &Lexeme{ 133 | Name: lex.Name + "+", 134 | Dependencies: []*Lexeme{lex}, 135 | Lexer: func(s *Source, pos int) (*ParseTree, error, int) { 136 | start := pos 137 | resp := &ParseTree{Type: lex.Name + "+"} 138 | next, err, off := lex.Lexer(s, pos) 139 | if err != nil { 140 | return nil, err, 0 141 | } else { 142 | resp.Children = append(resp.Children, next) 143 | pos += off 144 | for { 145 | next, err, off = lex.Lexer(s, pos) 146 | if err != nil { 147 | break 148 | } 149 | resp.Children = append(resp.Children, next) 150 | pos += off 151 | } 152 | } 153 | 154 | return resp, nil, pos - start 155 | }, 156 | } 157 | } 158 | 159 | func NewStarClosure(lex *Lexeme) *Lexeme { 160 | return &Lexeme{ 161 | Name: lex.Name + "*", 162 | Dependencies: []*Lexeme{lex}, 163 | Lexer: func(s *Source, pos int) (*ParseTree, error, int) { 164 | start := pos 165 | resp := &ParseTree{Type: lex.Name + "*"} 166 | var next *ParseTree 167 | var err error 168 | var off int 169 | for { 170 | next, err, off = lex.Lexer(s, pos) 171 | if err != nil { 172 | break 173 | } 174 | resp.Children = append(resp.Children, next) 175 | pos += off 176 | } 177 | return resp, nil, pos - start 178 | }, 179 | } 180 | } 181 | 182 | func NewOptionClosure(lex *Lexeme) *Lexeme { 183 | return &Lexeme{ 184 | Name: lex.Name + "?", 185 | Dependencies: []*Lexeme{lex}, 186 | Lexer: func(s *Source, pos int) (*ParseTree, error, int) { 187 | tree, _, offset := lex.Lexer(s, pos) 188 | return tree, nil, offset 189 | }, 190 | } 191 | } 192 | 193 | func NewAlternateLexer(name string, lhs, rhs *Lexeme) *Lexeme { 194 | return &Lexeme{ 195 | Name: name, 196 | Dependencies: []*Lexeme{lhs, rhs}, 197 | Lexer: func(s *Source, pos int) (*ParseTree, error, int) { 198 | tree, err, off := lhs.Lexer(s, pos) 199 | if err == nil { 200 | return tree, nil, off 201 | } else { 202 | tree, err, off = rhs.Lexer(s, pos) 203 | if err != nil { 204 | return nil, err, 0 205 | } 206 | return tree, nil, off 207 | } 208 | }, 209 | } 210 | } 211 | 212 | func NewDiscardLexer(lex *Lexeme) *Lexeme { 213 | return &Lexeme{ 214 | Name: lex.Name + "^", 215 | Dependencies: []*Lexeme{lex}, 216 | Lexer: func(s *Source, pos int) (*ParseTree, error, int) { 217 | _, _, offset := lex.Lexer(s, pos) 218 | return nil, nil, offset 219 | }, 220 | } 221 | } 222 | -------------------------------------------------------------------------------- /peg/language_test.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestSimpleLanguage(t *testing.T) { 8 | l := &Language{ 9 | root: NewLiteralLexer("prgm", "source"), 10 | } 11 | 12 | tree, err := l.ParseString("source") 13 | if err != nil { 14 | t.Error(err) 15 | return 16 | } 17 | 18 | if tree.Type != "prgm" { 19 | t.Errorf("Incorrect type parsed: %s", tree.Type) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /peg/lexer.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "fmt" 7 | "io" 8 | "strings" 9 | "unicode" 10 | "unicode/utf8" 11 | ) 12 | 13 | type item struct { 14 | typ itemType 15 | pos int 16 | val string 17 | } 18 | 19 | func (i item) String() string { 20 | switch { 21 | case i.typ == itemEOF: 22 | return "EOF" 23 | case i.typ == itemError: 24 | return i.val 25 | } 26 | return fmt.Sprintf("%s:%q", i.typ, i.val) 27 | } 28 | 29 | type itemType int 30 | 31 | const ( 32 | itemUNKNOWN itemType = iota 33 | itemError itemType = iota 34 | itemAssignment 35 | itemQuote 36 | itemLiteral 37 | itemWhitespace 38 | itemNewline 39 | itemIdentifier 40 | itemRegexp 41 | itemClosure 42 | itemPlus 43 | itemAlternate 44 | itemOptional 45 | itemDiscard 46 | itemEOF 47 | ) 48 | 49 | func (i itemType) String() string { 50 | switch i { 51 | case itemError: 52 | return "itemError" 53 | case itemAssignment: 54 | return "itemAssignment" 55 | case itemQuote: 56 | return "itemQuote" 57 | case itemLiteral: 58 | return "itemLiteral" 59 | case itemWhitespace: 60 | return "itemWhitespace" 61 | case itemNewline: 62 | return "itemNewline" 63 | case itemIdentifier: 64 | return "itemIdentifier" 65 | case itemRegexp: 66 | return "itemRegexp" 67 | case itemEOF: 68 | return "itemEOF" 69 | case itemClosure: 70 | return "itemClosure" 71 | case itemPlus: 72 | return "itemPlus" 73 | case itemAlternate: 74 | return "itemAlternate" 75 | case itemOptional: 76 | return "itemOptional" 77 | case itemDiscard: 78 | return "itemDiscard" 79 | } 80 | return "UNKNOWN" 81 | } 82 | 83 | const eof = -1 84 | 85 | type stateFn func(*lexer) stateFn 86 | 87 | type lexer struct { 88 | input *bufio.Reader 89 | buffer bytes.Buffer 90 | state stateFn 91 | pos int 92 | start int 93 | items chan item 94 | } 95 | 96 | func (l *lexer) nextItem() item { 97 | item := <-l.items 98 | return item 99 | } 100 | 101 | func lex(input io.Reader) *lexer { 102 | l := &lexer{ 103 | input: bufio.NewReader(input), 104 | items: make(chan item, 1), 105 | } 106 | go l.run() 107 | return l 108 | } 109 | 110 | func (l *lexer) run() { 111 | for l.state = lexPeg; l.state != nil; { 112 | l.state = l.state(l) 113 | } 114 | close(l.items) 115 | } 116 | 117 | func (l *lexer) next() rune { 118 | r, w, err := l.input.ReadRune() 119 | if err == io.EOF { 120 | return eof 121 | } 122 | l.pos += w 123 | l.buffer.WriteRune(r) 124 | return r 125 | } 126 | 127 | func (l *lexer) peek() rune { 128 | lead, err := l.input.Peek(1) 129 | if err == io.EOF { 130 | return eof 131 | } else if err != nil { 132 | l.errorf("peek: %s", err.Error()) 133 | return 0 134 | } 135 | 136 | p, err := l.input.Peek(runeLen(lead[0])) 137 | if err == io.EOF { 138 | return eof 139 | } else if err != nil { 140 | l.errorf("peek: %s", err.Error()) 141 | return 0 142 | } 143 | r, _ := utf8.DecodeRune(p) 144 | return r 145 | } 146 | 147 | func runeLen(lead byte) int { 148 | if lead < 0xC0 { 149 | return 1 150 | } else if lead < 0xE0 { 151 | return 2 152 | } else if lead < 0xF0 { 153 | return 3 154 | } else { 155 | return 4 156 | } 157 | } 158 | 159 | func (l *lexer) emit(t itemType) { 160 | l.emitInner(t, 0, 0) 161 | } 162 | 163 | // emitInner trims left characters from the left, 164 | // right characters from the right side of the token 165 | // and emits that. 166 | func (l *lexer) emitInner(t itemType, left, right int) { 167 | token := l.buffer.String() 168 | l.items <- item{t, l.start + left, token[left : len(token)-right]} 169 | l.start = l.pos 170 | l.buffer.Truncate(0) 171 | } 172 | 173 | func (l *lexer) accept(valid string) bool { 174 | if strings.IndexRune(valid, l.peek()) >= 0 { 175 | l.next() 176 | return true 177 | } 178 | return false 179 | } 180 | 181 | func (l *lexer) acceptRun(valid string) { 182 | for strings.IndexRune(valid, l.peek()) >= 0 { 183 | l.next() 184 | } 185 | } 186 | 187 | func (l *lexer) errorf(format string, args ...interface{}) stateFn { 188 | l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)} 189 | return nil 190 | } 191 | 192 | func (l *lexer) hasPrefix(prefix string) bool { 193 | p, err := l.input.Peek(len(prefix)) 194 | if err == io.EOF { 195 | return false 196 | } else if err != nil { 197 | l.errorf("hasPrefix: %s", err.Error()) 198 | return false 199 | } 200 | return string(p) == prefix 201 | } 202 | 203 | // Accept next count runes. Normally called after hasPrefix(). 204 | func (l *lexer) nextRuneCount(count int) { 205 | for i := 0; i < count; i++ { 206 | l.next() 207 | } 208 | } 209 | 210 | func isIdentRune(r rune) bool { 211 | return unicode.IsLetter(r) || r == '_' 212 | } 213 | 214 | func lexPeg(l *lexer) stateFn { 215 | switch r := l.peek(); { 216 | case isIdentRune(r): 217 | return lexIdentifier 218 | case unicode.IsSpace(r) && r != '\n': 219 | return lexWhitespace 220 | case r == '\n': 221 | return lexNewline 222 | case r == '<': 223 | return lexAssignment 224 | case r == '\'': 225 | return lexLiteral 226 | case r == '~': 227 | return lexRegex 228 | case r == '*': 229 | return lexClosure 230 | case r == '+': 231 | return lexPlus 232 | case r == '/': 233 | return lexAlternate 234 | case r == '?': 235 | return lexOption 236 | case r == '^': 237 | return lexDiscard 238 | case r == eof: 239 | l.emit(itemEOF) 240 | return nil 241 | } 242 | 243 | return nil 244 | } 245 | 246 | func lexPlus(l *lexer) stateFn { 247 | l.next() 248 | l.emit(itemPlus) 249 | return lexPeg 250 | } 251 | 252 | func lexAlternate(l *lexer) stateFn { 253 | l.next() 254 | l.emit(itemAlternate) 255 | return lexPeg 256 | } 257 | 258 | func lexOption(l *lexer) stateFn { 259 | l.next() 260 | l.emit(itemOptional) 261 | return lexPeg 262 | } 263 | 264 | func lexDiscard(l *lexer) stateFn { 265 | l.next() 266 | l.emit(itemDiscard) 267 | return lexPeg 268 | } 269 | 270 | func lexClosure(l *lexer) stateFn { 271 | l.next() 272 | l.emit(itemClosure) 273 | return lexPeg 274 | } 275 | 276 | func lexIdentifier(l *lexer) stateFn { 277 | for isIdentRune(l.peek()) { 278 | l.next() 279 | } 280 | l.emit(itemIdentifier) 281 | return lexPeg 282 | } 283 | 284 | func lexWhitespace(l *lexer) stateFn { 285 | for { 286 | r := l.peek() 287 | if unicode.IsSpace(r) && r != '\n' { 288 | l.next() 289 | } else { 290 | break 291 | } 292 | } 293 | l.emit(itemWhitespace) 294 | return lexPeg 295 | } 296 | 297 | func lexNewline(l *lexer) stateFn { 298 | l.next() 299 | l.emit(itemNewline) 300 | return lexPeg 301 | } 302 | 303 | func lexAssignment(l *lexer) stateFn { 304 | l.next() 305 | if l.next() != '-' { 306 | l.errorf("expected <-") 307 | return nil 308 | } else { 309 | l.emit(itemAssignment) 310 | } 311 | return lexPeg 312 | } 313 | 314 | func lexLiteral(l *lexer) stateFn { 315 | l.next() // consume ' 316 | 317 | for { 318 | r := l.next() 319 | if r == '\\' && l.peek() == '\'' { 320 | l.next() 321 | } else if r == '\'' { 322 | l.emitInner(itemLiteral, 1, 1) 323 | return lexPeg 324 | } else if r == eof { 325 | l.errorf("eof while parsing literal") 326 | return nil 327 | } 328 | } 329 | } 330 | 331 | func lexRegex(l *lexer) stateFn { 332 | l.next() // consume ~ 333 | 334 | if l.peek() != '\'' { 335 | l.errorf("Expected \"'\" after ~") 336 | return nil 337 | } else { 338 | l.next() // consume ' 339 | } 340 | 341 | for { 342 | r := l.next() 343 | if r == '\\' && l.peek() == '\'' { 344 | l.next() 345 | } else if r == '\'' { 346 | l.emitInner(itemRegexp, 2, 1) 347 | return lexPeg 348 | } else if r == eof { 349 | l.errorf("eof while parsing regexp") 350 | return nil 351 | } 352 | } 353 | } 354 | -------------------------------------------------------------------------------- /peg/lexer_test.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | ) 7 | 8 | type LexTest struct { 9 | input string 10 | exp []item 11 | } 12 | 13 | var lexTestTable = []LexTest{ 14 | LexTest{ 15 | "prgm <- 'a'", 16 | []item{ 17 | item{typ: itemIdentifier, val: "prgm"}, 18 | item{typ: itemWhitespace, val: " "}, 19 | item{typ: itemAssignment, val: "<-"}, 20 | item{typ: itemWhitespace, val: " "}, 21 | item{typ: itemLiteral, val: "a"}, 22 | item{typ: itemEOF, val: ""}, 23 | }, 24 | }, 25 | LexTest{ 26 | "prgm <- _ a b", 27 | []item{ 28 | item{typ: itemIdentifier, val: "prgm"}, 29 | item{typ: itemWhitespace, val: " "}, 30 | item{typ: itemAssignment, val: "<-"}, 31 | item{typ: itemWhitespace, val: " "}, 32 | item{typ: itemIdentifier, val: "_"}, 33 | item{typ: itemWhitespace, val: " "}, 34 | item{typ: itemIdentifier, val: "a"}, 35 | item{typ: itemWhitespace, val: " "}, 36 | item{typ: itemIdentifier, val: "b"}, 37 | item{typ: itemEOF, val: ""}, 38 | }, 39 | }, 40 | LexTest{ 41 | "prgm <- b*+/?", 42 | []item{ 43 | item{typ: itemIdentifier, val: "prgm"}, 44 | item{typ: itemWhitespace, val: " "}, 45 | item{typ: itemAssignment, val: "<-"}, 46 | item{typ: itemWhitespace, val: " "}, 47 | item{typ: itemIdentifier, val: "b"}, 48 | item{typ: itemClosure, val: "*"}, 49 | item{typ: itemPlus, val: "+"}, 50 | item{typ: itemAlternate, val: "/"}, 51 | item{typ: itemOptional, val: "?"}, 52 | item{typ: itemEOF, val: ""}, 53 | }, 54 | }, 55 | LexTest{ 56 | "prgm <- ~'-?\\d+.?\\d*'", 57 | []item{ 58 | item{typ: itemIdentifier, val: "prgm"}, 59 | item{typ: itemWhitespace, val: " "}, 60 | item{typ: itemAssignment, val: "<-"}, 61 | item{typ: itemWhitespace, val: " "}, 62 | item{typ: itemRegexp, val: "-?\\d+.?\\d*"}, 63 | item{typ: itemEOF, val: ""}, 64 | }, 65 | }, 66 | LexTest{ 67 | "prgm <- a b\na <- 'c'\n b <- ~'\\d+'", 68 | []item{ 69 | item{typ: itemIdentifier, val: "prgm"}, 70 | item{typ: itemWhitespace, val: " "}, 71 | item{typ: itemAssignment, val: "<-"}, 72 | item{typ: itemWhitespace, val: " "}, 73 | item{typ: itemIdentifier, val: "a"}, 74 | item{typ: itemWhitespace, val: " "}, 75 | item{typ: itemIdentifier, val: "b"}, 76 | item{typ: itemNewline, val: "\n"}, 77 | item{typ: itemIdentifier, val: "a"}, 78 | item{typ: itemWhitespace, val: " "}, 79 | item{typ: itemAssignment, val: "<-"}, 80 | item{typ: itemWhitespace, val: " "}, 81 | item{typ: itemLiteral, val: "c"}, 82 | item{typ: itemNewline, val: "\n"}, 83 | item{typ: itemWhitespace, val: " "}, 84 | item{typ: itemIdentifier, val: "b"}, 85 | item{typ: itemWhitespace, val: " "}, 86 | item{typ: itemAssignment, val: "<-"}, 87 | item{typ: itemWhitespace, val: " "}, 88 | item{typ: itemRegexp, val: "\\d+"}, 89 | item{typ: itemEOF, val: ""}, 90 | }, 91 | }, 92 | LexTest{ 93 | "prgm <- ~'[a-zA-Z]+' '=' ~'\\d+'", 94 | []item{ 95 | item{typ: itemIdentifier, val: "prgm"}, 96 | item{typ: itemWhitespace, val: " "}, 97 | item{typ: itemAssignment, val: "<-"}, 98 | item{typ: itemWhitespace, val: " "}, 99 | item{typ: itemRegexp, val: "[a-zA-Z]+"}, 100 | item{typ: itemWhitespace, val: " "}, 101 | item{typ: itemLiteral, val: "="}, 102 | item{typ: itemWhitespace, val: " "}, 103 | item{typ: itemRegexp, val: "\\d+"}, 104 | item{typ: itemEOF, val: ""}, 105 | }, 106 | }, 107 | } 108 | 109 | func TestLexerTable(t *testing.T) { 110 | for _, tc := range lexTestTable { 111 | l := lex(strings.NewReader(tc.input)) 112 | for i, it := range tc.exp { 113 | ot, ok := <-l.items 114 | if !ok { 115 | t.Errorf("No more items after: %v", tc.exp[:i]) 116 | t.Errorf("Expected %v", tc.exp[i]) 117 | return 118 | } 119 | if ot.val != it.val { 120 | t.Errorf("incorrect val: %q exp: %q", ot.val, it.val) 121 | return 122 | } 123 | if ot.typ != it.typ { 124 | t.Errorf("incorrect typ: %q exp: %q", ot.typ, it.typ) 125 | return 126 | } 127 | } 128 | 129 | x, ok := <-l.items 130 | if ok { 131 | t.Errorf("There are extra items on the channel: %v", x) 132 | } 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /peg/parse_tree.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | type ParseTree struct { 8 | Type string 9 | Data []byte 10 | Children []*ParseTree 11 | } 12 | 13 | func (p *ParseTree) prettyPrint(indent string) string { 14 | resp := fmt.Sprintln(indent, p.Type) 15 | resp += fmt.Sprintf("%s %q\n", indent, string(p.Data)) 16 | for _, child := range p.Children { 17 | resp += child.prettyPrint(indent + " |") 18 | } 19 | return resp 20 | } 21 | 22 | func (p *ParseTree) String() string { 23 | return p.prettyPrint("") 24 | } 25 | -------------------------------------------------------------------------------- /peg/parse_tree_test.go: -------------------------------------------------------------------------------- 1 | package peg 2 | -------------------------------------------------------------------------------- /peg/parser.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io" 7 | "regexp" 8 | "strings" 9 | ) 10 | 11 | type parseStateFn func(*parser) parseStateFn 12 | 13 | type parser struct { 14 | lex *lexer 15 | state parseStateFn 16 | parts chan *Lexeme 17 | lastErr error 18 | } 19 | 20 | func NewParser(input io.Reader) (*Language, error) { 21 | l := lex(input) 22 | p := &parser{lex: l} 23 | return p.prepare() 24 | } 25 | 26 | func (p *parser) Errorf(format string, args ...interface{}) { 27 | s := fmt.Sprintf(format, args...) 28 | p.lastErr = errors.New(s) 29 | } 30 | 31 | func (p *parser) prepare() (*Language, error) { 32 | p.parts = make(chan *Lexeme) 33 | in := make(chan *Language, 1) 34 | err := make(chan error, 1) 35 | go constructLanguage(p.parts, in, err) 36 | 37 | for p.state = parseLexeme; p.state != nil; { 38 | p.state = p.state(p) 39 | } 40 | 41 | close(p.parts) 42 | 43 | if p.lastErr != nil { 44 | return nil, p.lastErr 45 | } 46 | 47 | select { 48 | case lang := <-in: 49 | return lang, nil 50 | case err := <-err: 51 | return nil, err 52 | } 53 | } 54 | 55 | func constructLanguage(parts chan *Lexeme, success chan *Language, failure chan error) { 56 | var lexemes = make(map[string]*Lexeme) 57 | first, ok := <-parts 58 | if !ok { 59 | failure <- errors.New("Parts channel was empty.") 60 | return 61 | } 62 | lexemes[first.Name] = first 63 | for part := range parts { 64 | lexemes[part.Name] = part 65 | } 66 | 67 | lex, err := resolveDependencies(first, lexemes) 68 | if err != nil { 69 | failure <- err 70 | return 71 | } else { 72 | success <- &Language{ 73 | root: lex, 74 | } 75 | return 76 | } 77 | } 78 | 79 | func resolveDependencies(lex *Lexeme, env map[string]*Lexeme) (*Lexeme, error) { 80 | if lex.isResolved { 81 | return lex, nil 82 | } 83 | old := lex 84 | if lex.Lexer == nil { 85 | p, ok := env[lex.Name[1:]] 86 | if !ok { 87 | return nil, errors.New(fmt.Sprintf("Cannot resolve dependency %s\n Available are: %v", lex.Name[1:], env)) 88 | } else { 89 | lex = p 90 | } 91 | } 92 | lex.isResolved = true 93 | 94 | for i, dep := range lex.Dependencies { 95 | var err error 96 | lex.Dependencies[i], err = resolveDependencies(dep, env) 97 | if err != nil { 98 | return nil, err 99 | } 100 | } 101 | 102 | (*old) = (*lex) 103 | 104 | return lex, nil 105 | } 106 | 107 | func parseLexeme(p *parser) parseStateFn { 108 | next, ok := <-p.lex.items 109 | if !ok { 110 | return nil 111 | } 112 | switch next.typ { 113 | case itemIdentifier: 114 | return parseRule(next.val) 115 | case itemWhitespace: 116 | return parseLexeme 117 | case itemError: 118 | p.Errorf("lex error: %s", next.String()) 119 | default: 120 | panic(next.typ.String()) 121 | } 122 | return nil 123 | } 124 | 125 | func parseRule(name string) parseStateFn { 126 | return func(p *parser) parseStateFn { 127 | next, ok := <-p.lex.items 128 | if !ok { 129 | p.Errorf("item channel drained unexpectedly in parseRule") 130 | return nil 131 | } 132 | switch next.typ { 133 | case itemWhitespace: 134 | return parseRule(name) 135 | case itemAssignment: 136 | return parseRuleBody(name, nil) 137 | } 138 | return nil 139 | } 140 | } 141 | 142 | func parseRuleBody(name string, parts []*Lexeme) parseStateFn { 143 | quoteResolver := strings.NewReplacer("\\'", "'") 144 | return func(p *parser) parseStateFn { 145 | next, ok := <-p.lex.items 146 | if !ok { 147 | p.Errorf("item channel drained unexpectedly in parseRuleBody") 148 | return nil 149 | } 150 | switch next.typ { 151 | case itemWhitespace: 152 | return parseRuleBody(name, parts) 153 | case itemLiteral: 154 | next.val = quoteResolver.Replace(next.val) 155 | return parseRuleBody(name, append(parts, NewLiteralLexer(name, next.val))) 156 | case itemRegexp: 157 | return parseRuleBody(name, append(parts, NewRegexpLexer(name, regexp.MustCompile(next.val)))) 158 | case itemIdentifier: 159 | return parseRuleBody(name, append(parts, NewRuleLexer(next.val))) 160 | case itemPlus: 161 | if len(parts) == 0 { 162 | p.Errorf("expected lexeme definition before '+'") 163 | return nil 164 | } 165 | lex := parts[len(parts)-1] 166 | parts := parts[:len(parts)-1] 167 | return parseRuleBody(name, append(parts, NewPlusClosure(lex))) 168 | case itemClosure: 169 | if len(parts) == 0 { 170 | p.Errorf("expected lexeme definition before '*'") 171 | return nil 172 | } 173 | lex := parts[len(parts)-1] 174 | parts := parts[:len(parts)-1] 175 | return parseRuleBody(name, append(parts, NewStarClosure(lex))) 176 | case itemOptional: 177 | if len(parts) == 0 { 178 | p.Errorf("expected lexeme definition before '?'") 179 | return nil 180 | } 181 | lex := parts[len(parts)-1] 182 | parts := parts[:len(parts)-1] 183 | return parseRuleBody(name, append(parts, NewOptionClosure(lex))) 184 | case itemDiscard: 185 | if len(parts) == 0 { 186 | p.Errorf("expected lexeme definition before '^'") 187 | return nil 188 | } 189 | lex := parts[len(parts)-1] 190 | parts := parts[:len(parts)-1] 191 | return parseRuleBody(name, append(parts, NewDiscardLexer(lex))) 192 | case itemAlternate: 193 | return parseAlternateRHS(name, parts) 194 | 195 | case itemNewline, itemEOF: 196 | if len(parts) == 0 { 197 | return nil 198 | } else if len(parts) == 1 { // Prevent single literals from being stuck in an array. 199 | p.parts <- parts[0] 200 | } else { 201 | p.parts <- NewConcatLexer(name, parts) 202 | } 203 | return parseLexeme 204 | default: 205 | p.Errorf("unexpected token : %v", next) 206 | return nil 207 | } 208 | return nil 209 | } 210 | } 211 | 212 | func parseAlternateRHS(name string, parts []*Lexeme) parseStateFn { 213 | return func(p *parser) parseStateFn { 214 | next, ok := <-p.lex.items 215 | if !ok { 216 | p.Errorf("expected lexeme after '/'") 217 | return nil 218 | } 219 | var rhs *Lexeme 220 | switch next.typ { 221 | case itemWhitespace: 222 | return parseAlternateRHS(name, parts) 223 | case itemLiteral: 224 | rhs = NewLiteralLexer(name, next.val) 225 | case itemRegexp: 226 | rhs = NewRegexpLexer(name, regexp.MustCompile(next.val)) 227 | case itemIdentifier: 228 | rhs = NewRuleLexer(next.val) 229 | default: 230 | p.Errorf("unexpected token : %v", next) 231 | return nil 232 | } 233 | 234 | lhs := parts[len(parts)-1] 235 | parts := parts[:len(parts)-1] 236 | 237 | return parseRuleBody(name, append(parts, NewAlternateLexer(name, lhs, rhs))) 238 | } 239 | } 240 | -------------------------------------------------------------------------------- /peg/parser_test.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "strings" 8 | "testing" 9 | ) 10 | 11 | type ParseTest struct { 12 | language string 13 | input string 14 | exp *ParseTree 15 | } 16 | 17 | var parseTestTable = []ParseTest{ 18 | ParseTest{ 19 | "prgm <- 'a'", 20 | "a", 21 | &ParseTree{"prgm", []byte("a"), nil}, 22 | }, 23 | ParseTest{ 24 | "prgm <- ~'\\d+'", 25 | "74538", 26 | &ParseTree{"prgm", []byte("74538"), nil}, 27 | }, 28 | ParseTest{ 29 | "prgm <- 'a'_'b' \n _ <- ~'\\s+'", 30 | "a b", 31 | &ParseTree{ 32 | "prgm", 33 | nil, 34 | []*ParseTree{ 35 | &ParseTree{"prgm", []byte("a"), nil}, 36 | &ParseTree{"_", []byte(" "), nil}, 37 | &ParseTree{"prgm", []byte("b"), nil}, 38 | }, 39 | }, 40 | }, 41 | ParseTest{ 42 | "prgm <- name '=' number \n name <- ~'[a-zA-Z]+' \n number <- ~'\\d+'", 43 | "variableName=432", 44 | &ParseTree{ 45 | "prgm", 46 | nil, 47 | []*ParseTree{ 48 | &ParseTree{"name", []byte("variableName"), nil}, 49 | &ParseTree{"prgm", []byte("="), nil}, 50 | &ParseTree{"number", []byte("432"), nil}, 51 | }, 52 | }, 53 | }, 54 | ParseTest{ 55 | "prgm <- a+\na <- 'a'", 56 | "aaa", 57 | &ParseTree{ 58 | "a+", 59 | nil, 60 | []*ParseTree{ 61 | &ParseTree{"a", []byte("a"), nil}, 62 | &ParseTree{"a", []byte("a"), nil}, 63 | &ParseTree{"a", []byte("a"), nil}, 64 | }, 65 | }, 66 | }, 67 | ParseTest{ 68 | "prgm <- a+\na <- 'a' _?\n_ <- ~'\\s'", 69 | "aa a", 70 | &ParseTree{ 71 | "a+", 72 | nil, 73 | []*ParseTree{ 74 | &ParseTree{"a", []byte("a"), nil}, 75 | &ParseTree{"a", nil, []*ParseTree{ 76 | &ParseTree{"a", []byte("a"), nil}, 77 | &ParseTree{"_", []byte(" "), nil}, 78 | }}, 79 | &ParseTree{"a", []byte("a"), nil}, 80 | }, 81 | }, 82 | }, 83 | ParseTest{ 84 | "prgm <- a*\na <- 'a' _?^\n_ <- ~'\\s+'", 85 | "aa \ta", 86 | &ParseTree{ 87 | "a*", 88 | nil, 89 | []*ParseTree{ 90 | &ParseTree{"a", []byte("a"), nil}, 91 | &ParseTree{"a", []byte("a"), nil}, 92 | &ParseTree{"a", []byte("a"), nil}, 93 | }, 94 | }, 95 | }, 96 | ParseTest{ 97 | "prgm <- a*\na <- 'a' _?^ '\\''\n_ <- ~'\\s+'", 98 | "a'a \t'a'", 99 | &ParseTree{ 100 | "a*", 101 | nil, 102 | []*ParseTree{ 103 | &ParseTree{"a", nil, []*ParseTree{ 104 | &ParseTree{"a", []byte("a"), nil}, 105 | &ParseTree{"a", []byte("'"), nil}, 106 | }}, 107 | &ParseTree{"a", nil, []*ParseTree{ 108 | &ParseTree{"a", []byte("a"), nil}, 109 | &ParseTree{"a", []byte("'"), nil}, 110 | }}, 111 | &ParseTree{"a", nil, []*ParseTree{ 112 | &ParseTree{"a", []byte("a"), nil}, 113 | &ParseTree{"a", []byte("'"), nil}, 114 | }}, 115 | }, 116 | }, 117 | }, 118 | ParseTest{ 119 | "prgm <- a*\na <- 'a' _?\n_ <- ~'\\s+'", 120 | "aa \ta", 121 | &ParseTree{ 122 | "a*", 123 | nil, 124 | []*ParseTree{ 125 | &ParseTree{"a", []byte("a"), nil}, 126 | &ParseTree{"a", nil, []*ParseTree{ 127 | &ParseTree{"a", []byte("a"), nil}, 128 | &ParseTree{"_", []byte(" \t"), nil}, 129 | }}, 130 | &ParseTree{"a", []byte("a"), nil}, 131 | }, 132 | }, 133 | }, 134 | ParseTest{ 135 | "prgm <- a* b\na <- 'a'\nb <- 'b'", 136 | "aaab", 137 | &ParseTree{ 138 | "prgm", 139 | nil, 140 | []*ParseTree{ 141 | &ParseTree{"a*", nil, []*ParseTree{ 142 | &ParseTree{"a", []byte("a"), nil}, 143 | &ParseTree{"a", []byte("a"), nil}, 144 | &ParseTree{"a", []byte("a"), nil}, 145 | }}, 146 | &ParseTree{"b", []byte("b"), nil}, 147 | }, 148 | }, 149 | }, 150 | ParseTest{ 151 | "prgm <- a+ b\na <- 'a'\nb <- 'b'", 152 | "aaab", 153 | &ParseTree{ 154 | "prgm", 155 | nil, 156 | []*ParseTree{ 157 | &ParseTree{"a+", nil, []*ParseTree{ 158 | &ParseTree{"a", []byte("a"), nil}, 159 | &ParseTree{"a", []byte("a"), nil}, 160 | &ParseTree{"a", []byte("a"), nil}, 161 | }}, 162 | &ParseTree{"b", []byte("b"), nil}, 163 | }, 164 | }, 165 | }, 166 | ParseTest{ 167 | "prgm <- item+\nitem <- a/ b\na <- 'a'\n b <- 'b'", 168 | "abaabba", 169 | &ParseTree{ 170 | "item+", 171 | nil, 172 | []*ParseTree{ 173 | &ParseTree{"a", []byte("a"), nil}, 174 | &ParseTree{"b", []byte("b"), nil}, 175 | &ParseTree{"a", []byte("a"), nil}, 176 | &ParseTree{"a", []byte("a"), nil}, 177 | &ParseTree{"b", []byte("b"), nil}, 178 | &ParseTree{"b", []byte("b"), nil}, 179 | &ParseTree{"a", []byte("a"), nil}, 180 | }, 181 | }, 182 | }, 183 | ParseTest{ 184 | "prgm <- list+\nlist <- 'c' a+ 'd'\na <- 'a' / list", 185 | "cacaaacaaddd", 186 | &ParseTree{ 187 | "list+", 188 | nil, 189 | []*ParseTree{ 190 | &ParseTree{"list", nil, []*ParseTree{ 191 | &ParseTree{"list", []byte("c"), nil}, 192 | &ParseTree{"a+", nil, []*ParseTree{ 193 | &ParseTree{"a", []byte("a"), nil}, 194 | &ParseTree{"list", nil, []*ParseTree{ 195 | &ParseTree{"list", []byte("c"), nil}, 196 | &ParseTree{"a+", nil, []*ParseTree{ 197 | &ParseTree{"a", []byte("a"), nil}, 198 | &ParseTree{"a", []byte("a"), nil}, 199 | &ParseTree{"a", []byte("a"), nil}, 200 | &ParseTree{"list", nil, []*ParseTree{ 201 | &ParseTree{"list", []byte("c"), nil}, 202 | &ParseTree{"a+", nil, []*ParseTree{ 203 | &ParseTree{"a", []byte("a"), nil}, 204 | &ParseTree{"a", []byte("a"), nil}, 205 | }}, 206 | &ParseTree{"list", []byte("d"), nil}, 207 | }}, 208 | }}, 209 | &ParseTree{"list", []byte("d"), nil}, 210 | }}, 211 | }}, 212 | &ParseTree{"list", []byte("d"), nil}, 213 | }}, 214 | }, 215 | }, 216 | }, 217 | } 218 | 219 | func TestParseTable(t *testing.T) { 220 | for _, tc := range parseTestTable { 221 | parser, err := NewParser(strings.NewReader(tc.language)) 222 | if err != nil { 223 | t.Error(tc.input) 224 | t.Error(err) 225 | return 226 | } 227 | 228 | tree, err := parser.Parse(strings.NewReader(tc.input)) 229 | if err != nil { 230 | t.Error(tc.input) 231 | t.Error(err) 232 | return 233 | } 234 | 235 | if err := treeCompare(tree, tc.exp); err != nil { 236 | t.Error(tc.input) 237 | fmt.Println("Got:") 238 | dumpTree(tree, "") 239 | fmt.Println("Expected:") 240 | dumpTree(tc.exp, "") 241 | t.Error(err) 242 | return 243 | } 244 | } 245 | } 246 | 247 | func treeCompare(a, b *ParseTree) error { 248 | if a == b { 249 | return nil 250 | } else if a == nil || b == nil { 251 | return errors.New(fmt.Sprintf("a or b is nil %v %v", a, b)) 252 | } 253 | if a.Type != b.Type { 254 | return errors.New(fmt.Sprintf("tree type mismatch: %q exp: %q", a.Type, b.Type)) 255 | } 256 | if !bytes.Equal(a.Data, b.Data) { 257 | return errors.New(fmt.Sprintf("tree data mismatch: %q exp: %q", string(a.Data), string(b.Data))) 258 | } 259 | 260 | if len(a.Children) != len(b.Children) { 261 | return errors.New(fmt.Sprintf("trees have different number of chidren: %d exp: %d", len(a.Children), len(b.Children))) 262 | } 263 | 264 | for i, child := range a.Children { 265 | if err := treeCompare(child, b.Children[i]); err != nil { 266 | return err 267 | } 268 | } 269 | 270 | return nil 271 | } 272 | 273 | func dumpTree(tree *ParseTree, indent string) { 274 | if tree == nil { 275 | fmt.Println(indent, nil) 276 | } else { 277 | fmt.Println(indent, tree.Type) 278 | fmt.Printf("%s %q\n", indent, string(tree.Data)) 279 | for _, child := range tree.Children { 280 | dumpTree(child, indent+" |") 281 | } 282 | } 283 | } 284 | -------------------------------------------------------------------------------- /peg/source.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "io/ioutil" 7 | "regexp" 8 | ) 9 | 10 | type Source struct { 11 | buf []byte 12 | } 13 | 14 | func NewSource(in io.Reader) (*Source, error) { 15 | buf, err := ioutil.ReadAll(in) 16 | if err != nil { 17 | return nil, err 18 | } 19 | return &Source{ 20 | buf: buf, 21 | }, nil 22 | } 23 | 24 | // Consume tries to consume text matching the specified regex 25 | // starting at the current position. Returns the consumed text, 26 | // or nil if there was no match. 27 | func (s *Source) Consume(regex *regexp.Regexp, pos int) []byte { 28 | loc := regex.FindIndex(s.buf[pos:]) 29 | if loc == nil { 30 | return nil 31 | } 32 | 33 | if loc[0] == 0 { 34 | return s.buf[pos+loc[0] : pos+loc[1]] 35 | } 36 | 37 | return nil 38 | } 39 | 40 | // Consume literal attempts to consume a literal string. 41 | // Returns the consumed text, or nil if there was no match. 42 | func (s *Source) ConsumeLiteral(valid []byte, pos int) []byte { 43 | if pos == len(s.buf) { 44 | return nil 45 | } 46 | if bytes.HasPrefix(s.buf[pos:], valid) { 47 | return valid 48 | } 49 | return nil 50 | } 51 | -------------------------------------------------------------------------------- /peg/source_test.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | import ( 4 | "regexp" 5 | "strings" 6 | "testing" 7 | ) 8 | 9 | type ConsumeTest struct { 10 | Body string 11 | Regex string 12 | Expected string 13 | } 14 | 15 | var sourceConsumeTests = []ConsumeTest{ 16 | ConsumeTest{ 17 | "some text to consume", 18 | ".*", 19 | "some text to consume", 20 | }, 21 | ConsumeTest{ 22 | "some text to consume", 23 | "s.?me", 24 | "some", 25 | }, 26 | ConsumeTest{ 27 | "123.43", 28 | "\\d+", 29 | "123", 30 | }, 31 | ConsumeTest{ 32 | "123.43", 33 | "\\d+.?\\d*", 34 | "123.43", 35 | }, 36 | } 37 | 38 | func TestSourceConsume(t *testing.T) { 39 | for _, ct := range sourceConsumeTests { 40 | s, err := NewSource(strings.NewReader(ct.Body)) 41 | if err != nil { 42 | t.Error(err) 43 | } 44 | r := regexp.MustCompile(ct.Regex) 45 | match := s.Consume(r, 0) 46 | if match == nil || ct.Expected != string(match) { 47 | t.Errorf("Source failed to consume input: %s re: %s match: %s exp: %s", ct.Body, ct.Regex, match, ct.Expected) 48 | } 49 | } 50 | } 51 | --------------------------------------------------------------------------------