├── .gitignore ├── LICENSE ├── README.md ├── api.go ├── api_easy.go ├── api_easy_context.go ├── api_test.go ├── cfgrammar.go ├── cfgrammar_comments.go ├── cfgrammar_fmr.go ├── cfgrammar_regex.go ├── cfgrammar_special.go ├── cfgrammar_terminal.go ├── cfgrammar_test.go ├── earley.go ├── earley_ast.go ├── earley_eval.go ├── earley_fmr.go ├── earley_nodeprint.go ├── earley_stringer.go ├── earley_terminal_match.go ├── earley_test.go ├── examples ├── arithmetic │ ├── arithmetic.grammar │ ├── input.txt │ ├── main.go │ └── math.js ├── builtin │ ├── any.grammar │ ├── any.txt │ ├── builtin.grammar │ ├── cn_input.txt │ ├── cn_num.grammar │ ├── company.js │ ├── input.txt │ ├── main.go │ ├── math.js │ └── tianjin.txt └── math │ ├── README.md │ ├── grammars │ ├── latex.math.grammar │ ├── math.en.grammar │ ├── math.grammar │ ├── math.zh.grammar │ ├── number.en.grammar │ └── number.zh.grammar │ ├── input.txt │ ├── main.go │ └── math.js ├── frame_api.go ├── frame_api_test.go ├── funcs.go ├── funcs_test.go ├── go.mod ├── go.sum ├── grammar_index.go ├── grammar_index_test.go ├── grammar_refine.go ├── grammar_tokens.go ├── grammars ├── limit.grammar ├── math.zh.grammar ├── number.zh.grammar ├── order.grammar ├── sql.grammar └── time.grammar ├── list_test.go ├── local_grammar.go ├── local_grammar_test.go ├── math_funcs.go ├── math_funcs_test.go ├── node_methods.go ├── regexp_tagger.go ├── sf.grammar ├── termtype_jsonenums.go ├── termtype_string.go ├── types.go └── types_util.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.dll 4 | *.so 5 | *.dylib 6 | 7 | # Test binary, build with `go test -c` 8 | *.test 9 | 10 | # Output of the go coverage tool, specifically when used with LiteIDE 11 | *.out 12 | 13 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 14 | .glide/ 15 | 16 | dict 17 | dicts 18 | 19 | examples/arithmetic/arithmetic 20 | examples/builtin/builtin 21 | 22 | *.swp 23 | 24 | .vscode/ 25 | .DS_Store 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Zhanliang Liu. All rights reserved. 2 | 3 | Apache License 4 | Version 2.0, January 2004 5 | http://www.apache.org/licenses/ 6 | 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, 12 | and distribution as defined by Sections 1 through 9 of this document. 13 | 14 | "Licensor" shall mean the copyright owner or entity authorized by 15 | the copyright owner that is granting the License. 16 | 17 | "Legal Entity" shall mean the union of the acting entity and all 18 | other entities that control, are controlled by, or are under common 19 | control with that entity. For the purposes of this definition, 20 | "control" means (i) the power, direct or indirect, to cause the 21 | direction or management of such entity, whether by contract or 22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity 26 | exercising permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, 29 | including but not limited to software source code, documentation 30 | source, and configuration files. 31 | 32 | "Object" form shall mean any form resulting from mechanical 33 | transformation or translation of a Source form, including but 34 | not limited to compiled object code, generated documentation, 35 | and conversions to other media types. 36 | 37 | "Work" shall mean the work of authorship, whether in Source or 38 | Object form, made available under the License, as indicated by a 39 | copyright notice that is included in or attached to the work 40 | (an example is provided in the Appendix below). 41 | 42 | "Derivative Works" shall mean any work, whether in Source or Object 43 | form, that is based on (or derived from) the Work and for which the 44 | editorial revisions, annotations, elaborations, or other modifications 45 | represent, as a whole, an original work of authorship. For the purposes 46 | of this License, Derivative Works shall not include works that remain 47 | separable from, or merely link (or bind by name) to the interfaces of, 48 | the Work and Derivative Works thereof. 49 | 50 | "Contribution" shall mean any work of authorship, including 51 | the original version of the Work and any modifications or additions 52 | to that Work or Derivative Works thereof, that is intentionally 53 | submitted to Licensor for inclusion in the Work by the copyright owner 54 | or by an individual or Legal Entity authorized to submit on behalf of 55 | the copyright owner. For the purposes of this definition, "submitted" 56 | means any form of electronic, verbal, or written communication sent 57 | to the Licensor or its representatives, including but not limited to 58 | communication on electronic mailing lists, source code control systems, 59 | and issue tracking systems that are managed by, or on behalf of, the 60 | Licensor for the purpose of discussing and improving the Work, but 61 | excluding communication that is conspicuously marked or otherwise 62 | designated in writing by the copyright owner as "Not a Contribution." 63 | 64 | "Contributor" shall mean Licensor and any individual or Legal Entity 65 | on behalf of whom a Contribution has been received by Licensor and 66 | subsequently incorporated within the Work. 67 | 68 | 2. Grant of Copyright License. Subject to the terms and conditions of 69 | this License, each Contributor hereby grants to You a perpetual, 70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 71 | copyright license to reproduce, prepare Derivative Works of, 72 | publicly display, publicly perform, sublicense, and distribute the 73 | Work and such Derivative Works in Source or Object form. 74 | 75 | 3. Grant of Patent License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | (except as stated in this section) patent license to make, have made, 79 | use, offer to sell, sell, import, and otherwise transfer the Work, 80 | where such license applies only to those patent claims licensable 81 | by such Contributor that are necessarily infringed by their 82 | Contribution(s) alone or by combination of their Contribution(s) 83 | with the Work to which such Contribution(s) was submitted. If You 84 | institute patent litigation against any entity (including a 85 | cross-claim or counterclaim in a lawsuit) alleging that the Work 86 | or a Contribution incorporated within the Work constitutes direct 87 | or contributory patent infringement, then any patent licenses 88 | granted to You under this License for that Work shall terminate 89 | as of the date such litigation is filed. 90 | 91 | 4. Redistribution. You may reproduce and distribute copies of the 92 | Work or Derivative Works thereof in any medium, with or without 93 | modifications, and in Source or Object form, provided that You 94 | meet the following conditions: 95 | 96 | (a) You must give any other recipients of the Work or 97 | Derivative Works a copy of this License; and 98 | 99 | (b) You must cause any modified files to carry prominent notices 100 | stating that You changed the files; and 101 | 102 | (c) You must retain, in the Source form of any Derivative Works 103 | that You distribute, all copyright, patent, trademark, and 104 | attribution notices from the Source form of the Work, 105 | excluding those notices that do not pertain to any part of 106 | the Derivative Works; and 107 | 108 | (d) If the Work includes a "NOTICE" text file as part of its 109 | distribution, then any Derivative Works that You distribute must 110 | include a readable copy of the attribution notices contained 111 | within such NOTICE file, excluding those notices that do not 112 | pertain to any part of the Derivative Works, in at least one 113 | of the following places: within a NOTICE text file distributed 114 | as part of the Derivative Works; within the Source form or 115 | documentation, if provided along with the Derivative Works; or, 116 | within a display generated by the Derivative Works, if and 117 | wherever such third-party notices normally appear. The contents 118 | of the NOTICE file are for informational purposes only and 119 | do not modify the License. You may add Your own attribution 120 | notices within Derivative Works that You distribute, alongside 121 | or as an addendum to the NOTICE text from the Work, provided 122 | that such additional attribution notices cannot be construed 123 | as modifying the License. 124 | 125 | You may add Your own copyright statement to Your modifications and 126 | may provide additional or different license terms and conditions 127 | for use, reproduction, or distribution of Your modifications, or 128 | for any such Derivative Works as a whole, provided Your use, 129 | reproduction, and distribution of the Work otherwise complies with 130 | the conditions stated in this License. 131 | 132 | 5. Submission of Contributions. Unless You explicitly state otherwise, 133 | any Contribution intentionally submitted for inclusion in the Work 134 | by You to the Licensor shall be under the terms and conditions of 135 | this License, without any additional terms or conditions. 136 | Notwithstanding the above, nothing herein shall supersede or modify 137 | the terms of any separate license agreement you may have executed 138 | with Licensor regarding such Contributions. 139 | 140 | 6. Trademarks. This License does not grant permission to use the trade 141 | names, trademarks, service marks, or product names of the Licensor, 142 | except as required for reasonable and customary use in describing the 143 | origin of the Work and reproducing the content of the NOTICE file. 144 | 145 | 7. Disclaimer of Warranty. Unless required by applicable law or 146 | agreed to in writing, Licensor provides the Work (and each 147 | Contributor provides its Contributions) on an "AS IS" BASIS, 148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 149 | implied, including, without limitation, any warranties or conditions 150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 151 | PARTICULAR PURPOSE. You are solely responsible for determining the 152 | appropriateness of using or redistributing the Work and assume any 153 | risks associated with Your exercise of permissions under this License. 154 | 155 | 8. Limitation of Liability. In no event and under no legal theory, 156 | whether in tort (including negligence), contract, or otherwise, 157 | unless required by applicable law (such as deliberate and grossly 158 | negligent acts) or agreed to in writing, shall any Contributor be 159 | liable to You for damages, including any direct, indirect, special, 160 | incidental, or consequential damages of any character arising as a 161 | result of this License or out of the use or inability to use the 162 | Work (including but not limited to damages for loss of goodwill, 163 | work stoppage, computer failure or malfunction, or any and all 164 | other commercial damages or losses), even if such Contributor 165 | has been advised of the possibility of such damages. 166 | 167 | 9. Accepting Warranty or Additional Liability. While redistributing 168 | the Work or Derivative Works thereof, You may choose to offer, 169 | and charge a fee for, acceptance of support, warranty, indemnity, 170 | or other liability obligations and/or rights consistent with this 171 | License. However, in accepting such obligations, You may act only 172 | on Your own behalf and on Your sole responsibility, not on behalf 173 | of any other Contributor, and only if You agree to indemnify, 174 | defend, and hold each Contributor harmless for any liability 175 | incurred by, or claims asserted against, such Contributor by reason 176 | of your accepting any such warranty or additional liability. 177 | 178 | END OF TERMS AND CONDITIONS 179 | 180 | APPENDIX: How to apply the Apache License to your work. 181 | 182 | To apply the Apache License to your work, attach the following 183 | boilerplate notice, with the fields enclosed by brackets "[]" 184 | replaced with your own identifying information. (Don't include 185 | the brackets!) The text should be enclosed in the appropriate 186 | comment syntax for the file format. We also recommend that a 187 | file or class name and description of purpose be included on the 188 | same "printed page" as the copyright notice for easier 189 | identification within third-party archives. 190 | 191 | Copyright 2018, Zhanliang Liu. 192 | 193 | Licensed under the Apache License, Version 2.0 (the "License"); 194 | you may not use this file except in compliance with the License. 195 | You may obtain a copy of the License at 196 | 197 | http://www.apache.org/licenses/LICENSE-2.0 198 | 199 | Unless required by applicable law or agreed to in writing, software 200 | distributed under the License is distributed on an "AS IS" BASIS, 201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 202 | See the License for the specific language governing permissions and 203 | limitations under the License. 204 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FMR: Functional Meaning Representation & Semantic Parsing Framework 2 | [![GoDoc](https://godoc.org/github.com/liuzl/fmr?status.svg)](https://godoc.org/github.com/liuzl/fmr)[![Go Report Card](https://goreportcard.com/badge/github.com/liuzl/fmr)](https://goreportcard.com/report/github.com/liuzl/fmr) 3 | 4 | ## Projects that uses FMR 5 | 6 | ### mathsolver 7 | * codes: https://github.com/liuzl/mathsolver 8 | * demo: https://mathsolver.zliu.org/ 9 | 10 | ## What is semantic parsing? 11 | Semantic parsing is the process of mapping a natural language sentence into an intermediate logical form which is a formal representation of its meaning. 12 | 13 | The formal representation should be a detailed representation of the complete meaning of the natural language sentence in a fully formal language that: 14 | 15 | * Has a rich ontology of types, properties, and relations. 16 | * Supports automated reasoning or execution. 17 | 18 | ## Representation languages 19 | Early semantic parsers used highly domain-specific meaning representation languages, with later systems using more extensible languages like Prolog, lambda calculus, lambda dependancy-based compositional semantics (λ-DCS), SQL, Python, Java, and the Alexa Meaning Representation Language. Some work has used more exotic meaning representations, like query graphs or vector representations. 20 | 21 | ### FMR, a formal meaning representation language 22 | * FMR stands for functional meaning representation 23 | * Context-Free Grammar for bridging NL and FMR 24 | * *[VIM Syntax highlighting for FMR grammar file](https://github.com/liuzl/vim-fmr)* 25 | 26 | ## Tasks 27 | * Grammar checkers 28 | * Dialogue management 29 | * Question answering 30 | * Information extraction 31 | * Machine translation 32 | 33 | ## What can FMR do, a glance overview 34 | ```js 35 | // semantic parsing 36 | "五与5.8的和的平方的1.5次方与two的和减去261.712" => 37 | nf.math.sub( 38 | nf.math.sum( 39 | nf.math.pow( 40 | nf.math.pow( 41 | nf.math.sum( 42 | 5, 43 | nf.math.to_number("5.8") 44 | ), 45 | 2 46 | ), 47 | nf.math.to_number("1.5") 48 | ), 49 | 2 50 | ), 51 | nf.math.to_number("261.712") 52 | ); // denotation: 1000 53 | 54 | // slot filling 55 | "从上海到天津的机票" => nf.flight("上海", "天津"); 56 | "到重庆,明天,从北京" => nf.flight("北京", "重庆"); 57 | "到上海去" => nf.flight(null, "上海"); 58 | ``` 59 | 60 | ## References 61 | * [Semantic Parsing: Past, Present, and Future](http://yoavartzi.com/sp14/slides/mooney.sp14.pdf), Raymond J. Mooney, 2014 62 | * [Introduction to semantic parsing](https://github.com/liuzl/fmr-files/blob/master/cs224u-2019-intro-semparse.pdf), Bill MacCartney, 2019 63 | * [Bringing machine learning and compositional semantics together](https://web.stanford.edu/~cgpotts/manuscripts/liang-potts-semantics.pdf), Percy Liang and Christopher Potts, 2014 64 | * [SippyCup: A semantic parsing tutorial](https://github.com/wcmac/sippycup), Bill MacCartney, 2015 65 | * [Semantic parsing in your browser](https://www.cs.toronto.edu/~muuo/writing/semantic-parsing-in-your-browser/), Muuo Wambua, 2018 66 | -------------------------------------------------------------------------------- /api.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "net/url" 7 | "strings" 8 | "sync" 9 | 10 | "github.com/liuzl/ling" 11 | ) 12 | 13 | var ( 14 | apiTagger = flag.String("api_tagger", "", "http address of api tagger") 15 | ctxTagger = flag.String("ctx_tagger", "", "http address of context tagger") 16 | ) 17 | 18 | var nlp *ling.Pipeline 19 | var once sync.Once 20 | 21 | // NLP returns handler for the ling nlp toolkit 22 | func NLP() *ling.Pipeline { 23 | once.Do(func() { 24 | var err error 25 | var tagger *ling.DictTagger 26 | if nlp, err = ling.DefaultNLP(); err != nil { 27 | panic(err) 28 | } 29 | if tagger, err = ling.NewDictTagger(); err != nil { 30 | panic(err) 31 | } 32 | if err = nlp.AddTagger(tagger); err != nil { 33 | panic(err) 34 | } 35 | if *apiTagger == "" { 36 | return 37 | } 38 | var tagger1 *ling.APITagger 39 | if tagger1, err = ling.NewAPITagger(*apiTagger); err != nil { 40 | panic(err) 41 | } 42 | if err = nlp.AddTagger(tagger1); err != nil { 43 | panic(err) 44 | } 45 | }) 46 | return nlp 47 | } 48 | 49 | // EarleyParse parses text for rule at beginning 50 | func (g *Grammar) EarleyParse(text string, starts ...string) (*Parse, error) { 51 | return g.EarleyParseWithContext("", text, starts...) 52 | } 53 | 54 | // EarleyParseWithContext with context information 55 | func (g *Grammar) EarleyParseWithContext( 56 | context, text string, starts ...string) (*Parse, error) { 57 | tokens, l, err := g.process(context, text) 58 | if err != nil { 59 | return nil, err 60 | } 61 | return g.earleyParse(true, text, tokens, l, starts...) 62 | } 63 | 64 | // EarleyParseAny parses text for rule at any position 65 | func (g *Grammar) EarleyParseAny( 66 | text string, starts ...string) (*Parse, error) { 67 | 68 | return g.EarleyParseAnyWithContext("", text, starts...) 69 | } 70 | 71 | //EarleyParseAnyWithContext with context information 72 | func (g *Grammar) EarleyParseAnyWithContext( 73 | context, text string, starts ...string) (*Parse, error) { 74 | 75 | tokens, l, err := g.process(context, text) 76 | if err != nil { 77 | return nil, err 78 | } 79 | var p *Parse 80 | for i := 0; i < len(tokens); i++ { 81 | if p, err = g.earleyParse( 82 | true, text, tokens[i:], l, starts...); err != nil { 83 | return nil, err 84 | } 85 | if p.finalStates != nil { 86 | return p, nil 87 | } 88 | } 89 | return p, nil 90 | } 91 | 92 | // EarleyParseMaxAll extracts all submatches in text for rule 93 | func (g *Grammar) EarleyParseMaxAll( 94 | text string, starts ...string) ([]*Parse, error) { 95 | return g.EarleyParseMaxAllWithContext("", text, starts...) 96 | } 97 | 98 | // EarleyParseMaxAllWithContext with context information 99 | func (g *Grammar) EarleyParseMaxAllWithContext( 100 | context, text string, starts ...string) ([]*Parse, error) { 101 | tokens, l, err := g.process(context, text) 102 | if err != nil { 103 | return nil, err 104 | } 105 | var ret []*Parse 106 | for i := 0; i < len(tokens); { 107 | p, err := g.earleyParse(true, text, tokens[i:], l, starts...) 108 | if err != nil { 109 | return nil, err 110 | } 111 | if p.finalStates != nil { 112 | ret = append(ret, p) 113 | max := 0 114 | for _, finalState := range p.finalStates { 115 | if finalState.End > max { 116 | max = finalState.End 117 | } 118 | } 119 | i += max 120 | } else { 121 | i++ 122 | } 123 | } 124 | return ret, nil 125 | } 126 | 127 | // EarleyParseAll extracts all submatches in text for rule 128 | func (g *Grammar) EarleyParseAll( 129 | text string, starts ...string) ([]*Parse, error) { 130 | return g.EarleyParseAllWithContext("", text, starts...) 131 | } 132 | 133 | // EarleyParseAllWithContext with context information 134 | func (g *Grammar) EarleyParseAllWithContext( 135 | context, text string, starts ...string) ([]*Parse, error) { 136 | tokens, l, err := g.process(context, text) 137 | if err != nil { 138 | return nil, err 139 | } 140 | var ret []*Parse 141 | for i := 0; i < len(tokens); i++ { 142 | p, err := g.earleyParse(false, text, tokens[i:], l, starts...) 143 | if err != nil { 144 | return nil, err 145 | } 146 | if p.finalStates != nil { 147 | ret = append(ret, p) 148 | //i += p.finalState.End 149 | } 150 | } 151 | return ret, nil 152 | } 153 | 154 | func (g *Grammar) earleyParse(maxFlag bool, text string, 155 | tokens []*ling.Token, l *Grammar, starts ...string) (*Parse, error) { 156 | if len(starts) == 0 { 157 | return nil, fmt.Errorf("no start rules") 158 | } 159 | if len(tokens) == 0 { 160 | return nil, fmt.Errorf("no tokens to parse") 161 | } 162 | 163 | parse := &Parse{grammars: []*Grammar{g}, text: text, starts: starts} 164 | if len(g.includes) > 0 { 165 | parse.grammars = append(parse.grammars, g.includes...) 166 | } 167 | if l != nil { 168 | parse.grammars = append(parse.grammars, l) 169 | } 170 | parse.columns = append(parse.columns, &TableColumn{index: 0, token: nil}) 171 | for _, token := range tokens { 172 | parse.columns = append(parse.columns, 173 | &TableColumn{index: len(parse.columns), token: token}) 174 | } 175 | parse.parse(maxFlag) 176 | if Debug { 177 | fmt.Println(parse) 178 | } 179 | return parse, nil 180 | } 181 | 182 | func (g *Grammar) process( 183 | context, text string) ([]*ling.Token, *Grammar, error) { 184 | if text = strings.TrimSpace(text); text == "" { 185 | return nil, nil, fmt.Errorf("text is empty") 186 | } 187 | d := ling.NewDocument(text) 188 | if context == "" { 189 | if err := NLP().Annotate(d); err != nil { 190 | return nil, nil, err 191 | } 192 | } else { 193 | if *ctxTagger == "" { 194 | return nil, nil, fmt.Errorf("ctxTagger should be supplied") 195 | } 196 | vurl, err := url.ParseRequestURI(*ctxTagger) 197 | if err != nil { 198 | return nil, nil, err 199 | } 200 | c := vurl.Query() 201 | c.Set("context", context) 202 | vurl.RawQuery = c.Encode() 203 | tagger, err := ling.NewAPITagger(vurl.String()) 204 | if err != nil { 205 | return nil, nil, err 206 | } 207 | if err = NLP().AnnotatePro(d, tagger); err != nil { 208 | return nil, nil, err 209 | } 210 | } 211 | var ret []*ling.Token 212 | for _, token := range d.Tokens { 213 | if token.Type == ling.Space { 214 | continue 215 | } 216 | ret = append(ret, token) 217 | } 218 | if len(ret) == 0 { 219 | return nil, nil, fmt.Errorf("no tokens") 220 | } 221 | l, err := g.localGrammar(d) 222 | if err != nil { 223 | return nil, nil, err 224 | } 225 | return ret, l, nil 226 | } 227 | -------------------------------------------------------------------------------- /api_easy.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | // Parse returns parse trees for rule at beginning 4 | func (g *Grammar) Parse(text string, starts ...string) ([]*Node, error) { 5 | return g.extract(func(text string, starts ...string) ([]*Parse, error) { 6 | p, err := g.EarleyParse(text, starts...) 7 | if err != nil { 8 | return nil, err 9 | } 10 | return []*Parse{p}, nil 11 | }, text, starts...) 12 | } 13 | 14 | // ParseAny returns parse trees for rule at any position 15 | func (g *Grammar) ParseAny(text string, starts ...string) ([]*Node, error) { 16 | return g.extract( 17 | func(text string, starts ...string) ([]*Parse, error) { 18 | p, err := g.EarleyParseAny(text, starts...) 19 | if err != nil { 20 | return nil, err 21 | } 22 | return []*Parse{p}, nil 23 | }, text, starts...) 24 | } 25 | 26 | // ExtractMaxAll extracts all parse trees in text for rule 27 | func (g *Grammar) ExtractMaxAll( 28 | text string, starts ...string) ([]*Node, error) { 29 | return g.extract(g.EarleyParseMaxAll, text, starts...) 30 | } 31 | 32 | // ExtractAll extracts all parse trees in text for rule 33 | func (g *Grammar) ExtractAll(text string, starts ...string) ([]*Node, error) { 34 | return g.extract(g.EarleyParseAll, text, starts...) 35 | } 36 | 37 | func (g *Grammar) extract(f func(string, ...string) ([]*Parse, error), 38 | text string, starts ...string) ([]*Node, error) { 39 | ps, err := f(text, starts...) 40 | if err != nil { 41 | return nil, err 42 | } 43 | var ret []*Node 44 | for _, p := range ps { 45 | for _, f := range p.GetFinalStates() { 46 | ret = append(ret, p.GetTrees(f)...) 47 | } 48 | } 49 | return ret, nil 50 | } 51 | -------------------------------------------------------------------------------- /api_easy_context.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | // ParseWithContext returns parse trees for rule at beginning 4 | func (g *Grammar) ParseWithContext( 5 | context, text string, starts ...string) ([]*Node, error) { 6 | return g.extractWithContext( 7 | func(context, text string, starts ...string) ([]*Parse, error) { 8 | p, err := g.EarleyParseWithContext(context, text, starts...) 9 | if err != nil { 10 | return nil, err 11 | } 12 | return []*Parse{p}, nil 13 | }, context, text, starts...) 14 | } 15 | 16 | // ParseAnyWithContext returns parse trees for rule at any position 17 | func (g *Grammar) ParseAnyWithContext( 18 | context, text string, starts ...string) ([]*Node, error) { 19 | return g.extractWithContext( 20 | func(context, text string, starts ...string) ([]*Parse, error) { 21 | p, err := g.EarleyParseAnyWithContext(context, text, starts...) 22 | if err != nil { 23 | return nil, err 24 | } 25 | return []*Parse{p}, nil 26 | }, context, text, starts...) 27 | } 28 | 29 | // ExtractMaxAllWithContext extracts all parse trees in text for rule 30 | func (g *Grammar) ExtractMaxAllWithContext( 31 | context, text string, starts ...string) ([]*Node, error) { 32 | return g.extractWithContext( 33 | g.EarleyParseMaxAllWithContext, context, text, starts...) 34 | } 35 | 36 | // ExtractAllWithContext extracts all parse trees in text for rule 37 | func (g *Grammar) ExtractAllWithContext( 38 | context, text string, starts ...string) ([]*Node, error) { 39 | return g.extractWithContext( 40 | g.EarleyParseAllWithContext, context, text, starts...) 41 | } 42 | 43 | func (g *Grammar) extractWithContext( 44 | f func(string, string, ...string) ([]*Parse, error), 45 | context, text string, starts ...string) ([]*Node, error) { 46 | ps, err := f(context, text, starts...) 47 | if err != nil { 48 | return nil, err 49 | } 50 | var ret []*Node 51 | for _, p := range ps { 52 | for _, f := range p.GetFinalStates() { 53 | ret = append(ret, p.GetTrees(f)...) 54 | } 55 | } 56 | return ret, nil 57 | } 58 | -------------------------------------------------------------------------------- /api_test.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestLocalParse(t *testing.T) { 8 | tests := []string{ 9 | `柏乡位于河北省`, 10 | } 11 | g := &Grammar{} 12 | for _, c := range tests { 13 | ps, err := g.EarleyParseMaxAll(c, "loc_province", "loc_county") 14 | if err != nil { 15 | t.Error(err) 16 | } 17 | for _, p := range ps { 18 | for _, f := range p.GetFinalStates() { 19 | t.Log(f) 20 | trees := p.GetTrees(f) 21 | t.Log(trees) 22 | for _, tree := range trees { 23 | sem, err := tree.Semantic() 24 | if err != nil { 25 | t.Error(err) 26 | } 27 | t.Log(sem) 28 | } 29 | } 30 | } 31 | } 32 | } 33 | 34 | func TestGParse(t *testing.T) { 35 | tests := []string{ 36 | `柏乡位于河北省`, 37 | } 38 | grammar := ` = {nf.loc($1)}| {nf.loc($1)};` 39 | g, err := GrammarFromString(grammar, "loc") 40 | if err != nil { 41 | t.Error(err) 42 | } 43 | for _, c := range tests { 44 | ps, err := g.EarleyParseMaxAll(c, "loc") 45 | if err != nil { 46 | t.Error(err) 47 | } 48 | for _, p := range ps { 49 | for _, f := range p.GetFinalStates() { 50 | t.Log(f) 51 | trees := p.GetTrees(f) 52 | t.Log(trees) 53 | for _, tree := range trees { 54 | sem, err := tree.Semantic() 55 | if err != nil { 56 | t.Error(err) 57 | } 58 | t.Log(sem) 59 | } 60 | } 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /cfgrammar.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "path/filepath" 7 | "strconv" 8 | "strings" 9 | "unicode" 10 | "unicode/utf8" 11 | 12 | "github.com/mitchellh/hashstructure" 13 | ) 14 | 15 | type parser struct { 16 | input string 17 | pos int 18 | width int 19 | current *position 20 | info map[int]*position 21 | fname string 22 | dir string 23 | } 24 | 25 | type position struct { 26 | row, col int 27 | r string 28 | } 29 | 30 | func (p *position) String() string { 31 | return fmt.Sprintf("|row:%d, col:%d, c:%s|", p.row, p.col, strconv.Quote(p.r)) 32 | } 33 | 34 | const eof = -1 35 | 36 | // GrammarFromFile constructs the Context-Free Grammar from file 37 | func GrammarFromFile(file string) (*Grammar, error) { 38 | b, err := ioutil.ReadFile(file) 39 | if err != nil { 40 | return nil, err 41 | } 42 | dir, err := filepath.Abs(filepath.Dir(file)) 43 | if err != nil { 44 | return nil, err 45 | } 46 | return grammarFromString(string(b), file, dir, map[string]int{file: 1}) 47 | } 48 | 49 | func grammarFromFile(ifile string, files map[string]int) (*Grammar, error) { 50 | if files[ifile] >= 2 { 51 | return nil, nil 52 | } 53 | b, err := ioutil.ReadFile(ifile) 54 | if err != nil { 55 | return nil, err 56 | } 57 | dir, err := filepath.Abs(filepath.Dir(ifile)) 58 | if err != nil { 59 | return nil, err 60 | } 61 | return grammarFromString(string(b), ifile, dir, files) 62 | } 63 | 64 | // GrammarFromString constructs the Contex-Free Grammar from string d with name 65 | func GrammarFromString(d, name string) (*Grammar, error) { 66 | return grammarFromString(d, name, ".", make(map[string]int)) 67 | } 68 | 69 | func grammarFromString(d, name, dir string, files map[string]int) (*Grammar, error) { 70 | if files[name] >= 2 { 71 | return nil, nil 72 | } 73 | p := &parser{fname: name, dir: dir, input: d, info: make(map[int]*position)} 74 | if Debug { 75 | fmt.Println("loading ", name, files) 76 | } 77 | g, err := p.grammar(files) 78 | if err != nil { 79 | return nil, err 80 | } 81 | files[name]++ 82 | if Debug { 83 | fmt.Println("loaded ", name, files) 84 | } 85 | return g, nil 86 | } 87 | 88 | func (p *parser) posInfo() string { 89 | return fmt.Sprintf("%s%s", p.fname, p.current) 90 | } 91 | 92 | func (p *parser) next() rune { 93 | if p.pos >= len(p.input) { 94 | p.width = 0 95 | return eof 96 | } 97 | r, w := utf8.DecodeRuneInString(p.input[p.pos:]) 98 | if r == utf8.RuneError { 99 | return eof 100 | } 101 | p.width = w 102 | p.pos += w 103 | if p.info[p.pos] == nil { 104 | if p.current == nil { 105 | p.current = &position{1, w, string(r)} 106 | } else { 107 | if r == '\n' { 108 | p.current = &position{p.current.row + 1, w, string(r)} 109 | } else { 110 | p.current = &position{p.current.row, p.current.col + w, string(r)} 111 | } 112 | } 113 | p.info[p.pos] = p.current 114 | } else { 115 | p.current = p.info[p.pos] 116 | } 117 | return r 118 | } 119 | 120 | func (p *parser) eat(expected rune) error { 121 | if r := p.next(); r != expected { 122 | return fmt.Errorf("%s :expected %s, got %s", p.posInfo(), 123 | strconv.Quote(string(expected)), strconv.Quote(string(r))) 124 | } 125 | return nil 126 | } 127 | 128 | func (p *parser) backup() { 129 | p.pos -= p.width 130 | p.current = p.info[p.pos] 131 | } 132 | 133 | func (p *parser) peek() rune { 134 | r := p.next() 135 | p.backup() 136 | return r 137 | } 138 | 139 | func (p *parser) ws() string { 140 | var ret []rune 141 | for r := p.next(); unicode.IsSpace(r); r = p.next() { 142 | ret = append(ret, r) 143 | } 144 | p.backup() 145 | return string(ret) 146 | } 147 | 148 | func (p *parser) text() (string, error) { 149 | var ret []rune 150 | first := true 151 | Loop: 152 | for { 153 | switch r := p.next(); { 154 | case unicode.IsLetter(r) || r == '_': 155 | ret = append(ret, r) 156 | case unicode.IsDigit(r) && !first: 157 | ret = append(ret, r) 158 | default: 159 | p.backup() 160 | break Loop 161 | } 162 | first = false 163 | } 164 | if len(ret) == 0 { 165 | return "", fmt.Errorf("%s : no text", p.posInfo()) 166 | } 167 | return string(ret), nil 168 | } 169 | 170 | func (p *parser) token(begin, end rune) (name string, err error) { 171 | if err = p.eat(begin); err != nil { 172 | return 173 | } 174 | if name, err = p.text(); err != nil { 175 | return 176 | } 177 | err = p.eat(end) 178 | return 179 | } 180 | 181 | func (p *parser) nonterminal() (string, error) { 182 | return p.token('<', '>') 183 | } 184 | 185 | func (p *parser) frame() (string, error) { 186 | return p.token('[', ']') 187 | } 188 | 189 | func (p *parser) term(g *Grammar) (*Term, error) { 190 | switch p.peek() { 191 | case '<': 192 | name, err := p.nonterminal() 193 | if err != nil { 194 | return nil, err 195 | } 196 | return &Term{Value: name, Type: Nonterminal}, nil 197 | case '"': 198 | flags, text, err := p.terminal() 199 | if err != nil { 200 | return nil, err 201 | } 202 | if flags == "" { 203 | return &Term{Value: text, Type: Terminal}, nil 204 | } 205 | return &Term{Value: text, Type: Terminal, Meta: flags}, nil 206 | case '(': 207 | return p.special() 208 | case '`': 209 | return p.regex(g) 210 | } 211 | return nil, fmt.Errorf("%s :invalid term char", p.posInfo()) 212 | } 213 | 214 | func (p *parser) getInt() (idx int, err error) { 215 | idx = -1 216 | var n uint64 217 | var r rune 218 | for r = p.next(); unicode.IsDigit(r); r = p.next() { 219 | if n, err = strconv.ParseUint(string(r), 10, 32); err != nil { 220 | return 221 | } 222 | if idx == -1 { 223 | idx = int(n) 224 | } else { 225 | idx = idx*10 + int(n) 226 | } 227 | } 228 | if idx == -1 { 229 | err = fmt.Errorf("%s : number expected", p.posInfo()) 230 | return 231 | } 232 | p.backup() 233 | return 234 | } 235 | 236 | func (p *parser) ruleBody(g *Grammar) (*RuleBody, error) { 237 | t, err := p.term(g) 238 | if err != nil { 239 | return nil, err 240 | } 241 | terms := []*Term{t} 242 | if err = p.comments(); err != nil { 243 | return nil, err 244 | } 245 | for { 246 | if err = p.comments(); err != nil { 247 | return nil, err 248 | } 249 | if !strings.ContainsRune("<\"(`", p.peek()) { 250 | break 251 | } 252 | if t, err = p.term(g); err != nil { 253 | return nil, err 254 | } 255 | terms = append(terms, t) 256 | if err = p.comments(); err != nil { 257 | return nil, err 258 | } 259 | } 260 | var f *FMR 261 | if p.peek() == '{' { 262 | p.eat('{') 263 | if f, err = p.semanticFn(); err != nil { 264 | return nil, err 265 | } 266 | if err = p.eat('}'); err != nil { 267 | return nil, err 268 | } 269 | if err = p.comments(); err != nil { 270 | return nil, err 271 | } 272 | } 273 | return &RuleBody{terms, f}, nil 274 | } 275 | 276 | func (p *parser) ruleBodies(g *Grammar) (map[uint64]*RuleBody, error) { 277 | r, err := p.ruleBody(g) 278 | if err != nil { 279 | return nil, err 280 | } 281 | hash, err := hashstructure.Hash(r, nil) 282 | if err != nil { 283 | return nil, err 284 | } 285 | rules := map[uint64]*RuleBody{hash: r} 286 | for { 287 | if p.peek() != '|' { 288 | break 289 | } 290 | p.eat('|') 291 | if err = p.comments(); err != nil { 292 | return nil, err 293 | } 294 | if r, err = p.ruleBody(g); err != nil { 295 | return nil, err 296 | } 297 | if hash, err = hashstructure.Hash(r, nil); err != nil { 298 | return nil, err 299 | } 300 | rules[hash] = r 301 | } 302 | return rules, nil 303 | } 304 | 305 | func (p *parser) rule(c rune, g *Grammar) (*Rule, error) { 306 | var name string 307 | var err error 308 | switch c { 309 | case '<': 310 | if name, err = p.nonterminal(); err != nil { 311 | return nil, err 312 | } 313 | case '[': 314 | if name, err = p.frame(); err != nil { 315 | return nil, err 316 | } 317 | default: 318 | return nil, fmt.Errorf("%s : unexpected char", p.posInfo()) 319 | } 320 | if err = p.comments(); err != nil { 321 | return nil, err 322 | } 323 | if err = p.eat('='); err != nil { 324 | return nil, err 325 | } 326 | if err = p.comments(); err != nil { 327 | return nil, err 328 | } 329 | body, err := p.ruleBodies(g) 330 | if err != nil { 331 | return nil, err 332 | } 333 | if err = p.eat(';'); err != nil { 334 | return nil, err 335 | } 336 | return &Rule{name, body}, nil 337 | } 338 | 339 | func (p *parser) grammar(files map[string]int) (*Grammar, error) { 340 | g := &Grammar{ 341 | Name: p.fname, 342 | Rules: make(map[string]*Rule), 343 | Frames: make(map[string]*Rule), 344 | Regexps: make(map[string]string), 345 | } 346 | for { 347 | if err := p.comments(); err != nil { 348 | return nil, err 349 | } 350 | if p.peek() != '#' { 351 | break 352 | } 353 | p.eat('#') 354 | p.ws() 355 | name, err := p.text() 356 | if err != nil { 357 | return nil, err 358 | } 359 | if name != "include" { 360 | return nil, fmt.Errorf( 361 | "%s: directive:(%s) not suppported", p.posInfo(), name) 362 | } 363 | p.ws() 364 | _, ifile, err := p.terminal() 365 | if err != nil { 366 | return nil, err 367 | } 368 | ifile = filepath.Join(p.dir, ifile) 369 | files[ifile]++ 370 | ig, err := grammarFromFile(ifile, files) 371 | if err != nil { 372 | return nil, err 373 | } 374 | if ig == nil { 375 | continue 376 | } 377 | g.includes = append(g.includes, ig) 378 | g.includes = append(g.includes, ig.includes...) 379 | for k, v := range ig.Regexps { 380 | g.Regexps[k] = v 381 | } 382 | } 383 | for { 384 | if err := p.comments(); err != nil { 385 | return nil, err 386 | } 387 | 388 | c := p.peek() 389 | if !strings.ContainsRune(`<[`, c) { 390 | break 391 | } 392 | r, err := p.rule(c, g) 393 | if err != nil { 394 | return nil, err 395 | 396 | } 397 | rules := g.Rules 398 | if c == '[' { 399 | rules = g.Frames 400 | } 401 | if _, has := rules[r.Name]; has { 402 | for k, v := range r.Body { 403 | rules[r.Name].Body[k] = v 404 | } 405 | } else { 406 | rules[r.Name] = r 407 | } 408 | } 409 | if p.next() != eof { 410 | return nil, fmt.Errorf("%s : format error", p.posInfo()) 411 | } 412 | if err := g.buildIndex(); err != nil { 413 | return nil, err 414 | } 415 | if err := g.refine("g"); err != nil { 416 | return nil, err 417 | } 418 | return g, nil 419 | } 420 | -------------------------------------------------------------------------------- /cfgrammar_comments.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | func (p *parser) comments() error { 8 | defer p.ws() 9 | for { 10 | p.ws() 11 | c, err := p.comment() 12 | if err != nil { 13 | return err 14 | } 15 | if len(c) == 0 { 16 | return nil 17 | } 18 | } 19 | } 20 | 21 | func (p *parser) comment() (string, error) { 22 | if p.next() != '/' { 23 | p.backup() 24 | return "", nil 25 | } 26 | switch r := p.peek(); { 27 | case r == '/': 28 | return p.lineComment() 29 | case r == '*': 30 | return p.multiLineComment() 31 | default: 32 | return "", fmt.Errorf("%s : invalid char %s", p.posInfo(), string(r)) 33 | } 34 | } 35 | 36 | func (p *parser) lineComment() (string, error) { 37 | if err := p.eat('/'); err != nil { 38 | return "", err 39 | } 40 | ret := []rune{'/', '/'} 41 | for { 42 | r := p.next() 43 | if r == '\n' { 44 | break 45 | } 46 | ret = append(ret, r) 47 | } 48 | return string(ret), nil 49 | } 50 | 51 | func (p *parser) multiLineComment() (string, error) { 52 | if err := p.eat('*'); err != nil { 53 | return "", err 54 | } 55 | ret := []rune{'/', '*'} 56 | var prev rune 57 | for { 58 | r := p.next() 59 | if r == eof { 60 | return "", fmt.Errorf("%s : unterminated string", p.posInfo()) 61 | } 62 | if prev == '*' && r == '/' { 63 | break 64 | } 65 | ret = append(ret, r) 66 | prev = r 67 | } 68 | ret = append(ret, '/') 69 | return string(ret), nil 70 | } 71 | -------------------------------------------------------------------------------- /cfgrammar_fmr.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "fmt" 5 | "math/big" 6 | "unicode" 7 | ) 8 | 9 | func (p *parser) semanticFn() (f *FMR, err error) { 10 | p.ws() 11 | f = &FMR{} 12 | if f.Fn, err = p.funcName(); err != nil { 13 | return 14 | } 15 | if f.Args, err = p.funcArgs(); err != nil { 16 | return 17 | } 18 | p.ws() 19 | return 20 | } 21 | 22 | func (p *parser) funcName() (string, error) { 23 | var ret []rune 24 | var prev rune = eof 25 | var r rune 26 | first := true 27 | Loop: 28 | for { 29 | switch r = p.next(); { 30 | case unicode.IsLetter(r) || r == '_': 31 | ret = append(ret, r) 32 | case unicode.IsDigit(r) && !first: 33 | ret = append(ret, r) 34 | case r == '.' && prev != '.' && !first: 35 | ret = append(ret, r) 36 | default: 37 | p.backup() 38 | break Loop 39 | } 40 | first = false 41 | prev = r 42 | } 43 | if len(ret) == 0 { 44 | return "", fmt.Errorf("%s : no funcName", p.posInfo()) 45 | } 46 | p.ws() 47 | return string(ret), nil 48 | } 49 | 50 | func (p *parser) funcArgs() (args []*Arg, err error) { 51 | if err = p.eat('('); err != nil { 52 | return 53 | } 54 | var r rune 55 | var arg *Arg 56 | for { 57 | p.ws() 58 | switch r = p.peek(); { 59 | case r == '@': 60 | if arg, err = p.contextArg(); err != nil { 61 | return 62 | } 63 | case r == '$': 64 | if arg, err = p.idxArg(); err != nil { 65 | return 66 | } 67 | case r == '"': 68 | if arg, err = p.strArg(); err != nil { 69 | return 70 | } 71 | case unicode.IsDigit(r): 72 | if arg, err = p.numArg(false); err != nil { 73 | return 74 | } 75 | case r == '-': 76 | if err = p.eat('-'); err != nil { 77 | return 78 | } 79 | if arg, err = p.numArg(true); err != nil { 80 | return 81 | } 82 | default: 83 | if arg, err = p.fArg(); err != nil { 84 | return 85 | } 86 | } 87 | args = append(args, arg) 88 | if r == ',' { 89 | continue 90 | } else { 91 | p.ws() 92 | r = p.next() 93 | if r == ',' { 94 | continue 95 | } else if r == ')' { 96 | break 97 | } else { 98 | err = fmt.Errorf("%s : unexpected semantic args", p.posInfo()) 99 | return 100 | } 101 | } 102 | } 103 | return 104 | } 105 | 106 | func (p *parser) contextArg() (arg *Arg, err error) { 107 | if err = p.eat('@'); err != nil { 108 | return 109 | } 110 | arg = &Arg{"context", "@"} 111 | return 112 | } 113 | 114 | func (p *parser) idxArg() (arg *Arg, err error) { 115 | if err = p.eat('$'); err != nil { 116 | return 117 | } 118 | var idx int 119 | if idx, err = p.getInt(); err != nil { 120 | return 121 | } 122 | arg = &Arg{"index", idx} 123 | return 124 | } 125 | 126 | func (p *parser) strArg() (*Arg, error) { 127 | var text string 128 | var err error 129 | if _, text, err = p.terminal(); err != nil { 130 | return nil, err 131 | } 132 | return &Arg{"string", text}, nil 133 | } 134 | 135 | func (p *parser) numArg(neg bool) (*Arg, error) { 136 | var ret []rune 137 | hasDot := false 138 | for r := p.next(); ; r = p.next() { 139 | if unicode.IsDigit(r) { 140 | ret = append(ret, r) 141 | } else if r == '.' { 142 | if hasDot { 143 | return nil, fmt.Errorf("%s : unexpected dot", p.posInfo()) 144 | } 145 | hasDot = true 146 | ret = append(ret, r) 147 | } else { 148 | break 149 | } 150 | } 151 | if len(ret) == 0 { 152 | return nil, fmt.Errorf("%s : number expected", p.posInfo()) 153 | } 154 | p.backup() 155 | if neg { 156 | ret = append([]rune{'-'}, ret...) 157 | } 158 | if hasDot { 159 | n := new(big.Float) 160 | if _, err := fmt.Sscan(string(ret), n); err != nil { 161 | return nil, err 162 | } 163 | return &Arg{"float", n}, nil 164 | } 165 | n := new(big.Int) 166 | if _, err := fmt.Sscan(string(ret), n); err != nil { 167 | return nil, err 168 | } 169 | return &Arg{"int", n}, nil 170 | } 171 | 172 | func (p *parser) fArg() (*Arg, error) { 173 | var f *FMR 174 | var err error 175 | if f, err = p.semanticFn(); err != nil { 176 | return nil, err 177 | } 178 | return &Arg{"func", f}, nil 179 | } 180 | -------------------------------------------------------------------------------- /cfgrammar_regex.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "fmt" 5 | 6 | "zliu.org/goutil" 7 | ) 8 | 9 | func (p *parser) regex(g *Grammar) (*Term, error) { 10 | if err := p.eat('`'); err != nil { 11 | return nil, err 12 | } 13 | p.ws() 14 | var ret []rune 15 | OUT: 16 | for { 17 | switch r := p.next(); { 18 | case r == '`': 19 | break OUT 20 | case r == eof: 21 | return nil, fmt.Errorf("%s : unterminated string", p.posInfo()) 22 | default: 23 | ret = append(ret, r) 24 | } 25 | } 26 | if len(ret) == 0 { 27 | return nil, fmt.Errorf("%s : empty regexp string", p.posInfo()) 28 | } 29 | s := string(ret) 30 | if _, err := goutil.Regexp(s); err != nil { 31 | return nil, fmt.Errorf("%s : `%s` is not a valid regexp", p.posInfo(), s) 32 | } 33 | h := goutil.MD5(s)[:16] 34 | g.Regexps[h] = s 35 | return &Term{Value: h, Type: Nonterminal}, nil 36 | } 37 | -------------------------------------------------------------------------------- /cfgrammar_special.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import "fmt" 4 | 5 | func (p *parser) special() (*Term, error) { 6 | if err := p.eat('('); err != nil { 7 | return nil, err 8 | } 9 | p.ws() 10 | name, err := p.text() 11 | if err != nil { 12 | return nil, err 13 | } 14 | p.ws() 15 | switch name { 16 | case "any": 17 | return p.any() 18 | case "list": 19 | return p.list() 20 | default: 21 | return nil, fmt.Errorf( 22 | "%s: special rule:(%s) not supported", p.posInfo(), name) 23 | } 24 | } 25 | 26 | func (p *parser) specialMeta() (map[string]int, error) { 27 | p.ws() 28 | var err error 29 | var meta map[string]int 30 | if p.peek() == '{' { 31 | // contains range 32 | meta = make(map[string]int) 33 | p.eat('{') 34 | p.ws() 35 | if meta["min"], err = p.getInt(); err != nil { 36 | return nil, err 37 | } 38 | p.ws() 39 | if err = p.eat(','); err != nil { 40 | return nil, err 41 | } 42 | p.ws() 43 | if meta["max"], err = p.getInt(); err != nil { 44 | return nil, err 45 | } 46 | if meta["max"] < meta["min"] { 47 | return nil, fmt.Errorf("%s : max:%d less than min:%d", 48 | p.posInfo(), meta["max"], meta["min"]) 49 | } 50 | p.ws() 51 | if err = p.eat('}'); err != nil { 52 | return nil, err 53 | } 54 | } 55 | p.ws() 56 | return meta, nil 57 | } 58 | 59 | func (p *parser) list() (*Term, error) { 60 | name, err := p.nonterminal() 61 | if err != nil { 62 | return nil, err 63 | } 64 | meta, err := p.specialMeta() 65 | if err != nil { 66 | return nil, err 67 | } 68 | if err = p.eat(')'); err != nil { 69 | return nil, err 70 | } 71 | if len(meta) > 0 { 72 | return &Term{Type: List, Value: name, Meta: meta}, nil 73 | } 74 | return &Term{Type: List, Value: name}, nil 75 | } 76 | 77 | func (p *parser) any() (*Term, error) { 78 | meta, err := p.specialMeta() 79 | if err != nil { 80 | return nil, err 81 | } 82 | if err = p.eat(')'); err != nil { 83 | return nil, err 84 | } 85 | if len(meta) > 0 { 86 | return &Term{Value: "any", Type: Any, Meta: meta}, nil 87 | } 88 | return &Term{Value: "any", Type: Any}, nil 89 | } 90 | -------------------------------------------------------------------------------- /cfgrammar_terminal.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import "fmt" 4 | 5 | func (p *parser) terminal() (flags, text string, err error) { 6 | if err = p.eat('"'); err != nil { 7 | return 8 | } 9 | p.ws() 10 | if p.peek() == '(' { 11 | p.eat('(') 12 | p.ws() 13 | if err = p.eat('?'); err != nil { 14 | return 15 | } 16 | p.ws() 17 | if flags, err = p.text(); err != nil { 18 | return 19 | } 20 | if err = p.eat(')'); err != nil { 21 | return 22 | } 23 | } 24 | if text, err = p.terminalText(); err != nil { 25 | return 26 | } 27 | err = p.eat('"') 28 | return 29 | } 30 | 31 | func (p *parser) terminalText() (string, error) { 32 | var ret []rune 33 | var prev rune 34 | for { 35 | switch r := p.next(); { 36 | case r == '"' && prev != '\\': 37 | p.backup() 38 | return string(ret), nil 39 | case r == eof: 40 | return "", fmt.Errorf("%s : unterminated string", p.posInfo()) 41 | case prev == '\\': 42 | switch r { 43 | case '\\': 44 | ret = append(ret, '\\') 45 | case 'n': 46 | ret = append(ret, '\n') 47 | case 't': 48 | ret = append(ret, '\t') 49 | case '"': 50 | ret = append(ret, '"') 51 | case '(': 52 | ret = append(ret, '(') 53 | default: 54 | return "", fmt.Errorf("%s : unexpected escape string", p.posInfo()) 55 | } 56 | prev = 0 57 | case r == '\\': 58 | prev = r 59 | default: 60 | ret = append(ret, r) 61 | prev = r 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /cfgrammar_test.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | //"fmt" 5 | "testing" 6 | 7 | "zliu.org/goutil" 8 | ) 9 | 10 | var tests = []string{ 11 | ` = "<" ">" ; 12 | = " " { nf.math.sum($1,$3)} | ; 13 | = "(?ilfw)f \\uoo\n" | "bar\t" | "baz"|"好吧" ; 14 | = "\(" (any) ")" ; 15 | `, 16 | ` = (list); 17 | ="20181219"|"20181218"; 18 | `, 19 | } 20 | 21 | func TestLex(t *testing.T) { 22 | for _, c := range tests { 23 | g, err := GrammarFromString(c, "test") 24 | if err != nil { 25 | t.Error(err) 26 | } 27 | b, err := goutil.JSONMarshalIndent(g, "", " ") 28 | if err != nil { 29 | t.Error(err) 30 | } 31 | t.Log(string(b)) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /earley.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/liuzl/ling" 7 | ) 8 | 9 | // GammaRule is the name of the special "gamma" rule added by the algorithm 10 | // (this is unicode for 'LATIN SMALL LETTER GAMMA') 11 | const GammaRule = "\u0263" // "\u0194" 12 | 13 | // DOT indicates the current position inside a TableState 14 | const DOT = "\u2022" // "\u00B7" 15 | 16 | // TableState uses Earley's dot notation: given a production X → αβ, 17 | // the notation X → α • β represents a condition in which α has already 18 | // been parsed and β is expected. 19 | type TableState struct { 20 | Term *Term `json:"term"` 21 | Rb *RuleBody `json:"rb,omitempty"` 22 | Start int `json:"start"` 23 | End int `json:"end"` 24 | Dot int `json:"dot"` 25 | } 26 | 27 | // TableColumn is the TableState set 28 | type TableColumn struct { 29 | token *ling.Token 30 | index int 31 | states []*TableState 32 | } 33 | 34 | // Parse stores a parse chart by grammars 35 | type Parse struct { 36 | grammars []*Grammar 37 | text string 38 | starts []string 39 | columns []*TableColumn 40 | finalStates []*TableState 41 | } 42 | 43 | // Equal func for TableState 44 | func (s *TableState) Equal(ts *TableState) bool { 45 | if s == nil && ts == nil { 46 | return true 47 | } 48 | if s == nil || ts == nil { 49 | if Debug { 50 | fmt.Println("only one is nil:", s, ts) 51 | } 52 | return false 53 | } 54 | if s.Start != ts.Start || s.End != ts.End || s.Dot != ts.Dot || 55 | !s.Rb.Equal(ts.Rb) { 56 | return false 57 | } 58 | return s.Term.Equal(ts.Term) 59 | } 60 | 61 | func (s *TableState) metaEmpty() bool { 62 | if s.Term.Meta == nil { 63 | return true 64 | } 65 | if m, ok := s.Term.Meta.(map[string]int); ok && len(m) == 0 { 66 | return true 67 | } 68 | return false 69 | } 70 | 71 | func (s *TableState) isCompleted() bool { 72 | switch s.Term.Type { 73 | case Any, List: 74 | if !s.metaEmpty() { 75 | if meta, ok := s.Term.Meta.(map[string]int); ok { 76 | if s.Dot >= meta["min"] && s.Dot <= meta["max"] { 77 | return true 78 | } 79 | } 80 | } else if s.Dot > 0 { 81 | return true 82 | } 83 | return false 84 | default: 85 | return s.Dot >= len(s.Rb.Terms) 86 | } 87 | } 88 | 89 | func (s *TableState) getNextTerm() *Term { 90 | switch s.Term.Type { 91 | case Any: 92 | if !s.metaEmpty() { 93 | if meta, ok := s.Term.Meta.(map[string]int); ok && s.Dot >= meta["max"] { 94 | return nil 95 | } 96 | } 97 | return s.Term 98 | case List: 99 | if !s.metaEmpty() { 100 | if meta, ok := s.Term.Meta.(map[string]int); ok && s.Dot >= meta["max"] { 101 | return nil 102 | } 103 | } 104 | return &Term{Value: s.Term.Value, Type: Nonterminal, Meta: s.Term.Meta} 105 | default: 106 | if s.isCompleted() { 107 | return nil 108 | } 109 | return s.Rb.Terms[s.Dot] 110 | } 111 | } 112 | 113 | func (col *TableColumn) insert(state *TableState) *TableState { 114 | return col.insertToEnd(state, false) 115 | } 116 | 117 | func (col *TableColumn) insertToEnd(state *TableState, end bool) *TableState { 118 | state.End = col.index 119 | if state.Term.Type == Any { 120 | state.Dot = state.End - state.Start 121 | } 122 | for i, s := range col.states { 123 | if s.Equal(state) { 124 | if end { 125 | col.states = append(col.states[:i], col.states[i+1:]...) 126 | col.states = append(col.states, s) 127 | } 128 | return s 129 | } 130 | } 131 | col.states = append(col.states, state) 132 | return col.states[len(col.states)-1] 133 | } 134 | 135 | /* 136 | * the Earley algorithm's core: add gamma rule, fill up table, and check if the 137 | * gamma rule span from the first column to the last one. return the final gamma 138 | * state, or null, if the parse failed. 139 | */ 140 | func (p *Parse) parse(maxFlag bool) []*TableState { 141 | if len(p.starts) == 0 { 142 | return nil 143 | } 144 | for _, start := range p.starts { 145 | rb := &RuleBody{ 146 | []*Term{{Value: start, Type: Nonterminal}}, 147 | &FMR{"nf.I", []*Arg{{"index", 1}}}, 148 | } 149 | begin := &TableState{&Term{GammaRule, Nonterminal, nil}, rb, 0, 0, 0} 150 | p.columns[0].states = append(p.columns[0].states, begin) 151 | } 152 | for i, col := range p.columns { 153 | if Debug { 154 | fmt.Printf("Column %d[%s]:", i, col.token) 155 | } 156 | for j := 0; j < len(col.states); j++ { 157 | st := col.states[j] 158 | if Debug { 159 | fmt.Printf("\n\tRow %d: %+v, len:%d\n", j, st, len(col.states)) 160 | } 161 | if st.isCompleted() { 162 | p.complete(col, st) 163 | } 164 | term := st.getNextTerm() 165 | if term != nil { 166 | if st.Term.Type == Any { 167 | if i+1 < len(p.columns) { 168 | p.scan(p.columns[i+1], st, term) 169 | } 170 | } else { 171 | switch term.Type { 172 | case Nonterminal, Any, List: 173 | p.predict(col, term) 174 | case Terminal: 175 | if i+1 < len(p.columns) { 176 | p.scan(p.columns[i+1], st, term) 177 | } 178 | } 179 | } 180 | } 181 | } 182 | if Debug { 183 | fmt.Println() 184 | } 185 | //p.handleEpsilons(col) 186 | } 187 | 188 | // find end state (return nil if not found) 189 | /* 190 | lastCol := p.columns[len(p.columns)-1] 191 | for _, state := range lastCol.states { 192 | if state.Name == GAMMA_RULE && state.isCompleted() { 193 | return state 194 | } 195 | } 196 | */ 197 | var ret []*TableState 198 | for i := len(p.columns) - 1; i >= 0; i-- { 199 | for _, state := range p.columns[i].states { 200 | if state.Term.Value == GammaRule && state.isCompleted() { 201 | ret = append(ret, state) 202 | if maxFlag { 203 | p.finalStates = ret 204 | return ret 205 | } 206 | } 207 | } 208 | } 209 | p.finalStates = ret 210 | return ret 211 | } 212 | 213 | func (*Parse) scan(col *TableColumn, st *TableState, term *Term) { 214 | if term.Type == Any { 215 | newSt := &TableState{Term: &Term{"any", Any, term.Meta}, Rb: st.Rb, 216 | Dot: st.Dot + 1, Start: st.Start} 217 | col.insert(newSt) 218 | if Debug { 219 | fmt.Println("\tscan Any") 220 | fmt.Printf("\t\tinsert to next: %+v\n", newSt) 221 | } 222 | return 223 | } 224 | if terminalMatch(term, col.token) { 225 | newSt := &TableState{Term: st.Term, Rb: st.Rb, 226 | Dot: st.Dot + 1, Start: st.Start} 227 | col.insert(newSt) 228 | if Debug { 229 | fmt.Println("\tscan", term.Value, col.token) 230 | fmt.Printf("\t\tinsert to next: %+v\n", newSt) 231 | } 232 | } 233 | } 234 | 235 | func predict(g *Grammar, col *TableColumn, term *Term) bool { 236 | r, has := g.Rules[term.Value] 237 | if !has { 238 | return false 239 | } 240 | changed := false 241 | for _, prod := range r.Body { 242 | //st := &TableState{Name: r.Name, Rb: prod, dot: 0, Start: col.index, termType: term.Type} 243 | st := &TableState{Term: &Term{Value: r.Name, Type: Nonterminal}, Rb: prod, 244 | Dot: 0, Start: col.index} 245 | st2 := col.insert(st) 246 | if Debug { 247 | fmt.Printf("\t\t%+v insert: %+v\n", term.Type, st) 248 | } 249 | changed = changed || (st == st2) 250 | } 251 | return changed 252 | } 253 | 254 | func (p *Parse) predict(col *TableColumn, term *Term) bool { 255 | if Debug { 256 | fmt.Println("\tpredict", term.Type, term.Value) 257 | } 258 | switch term.Type { 259 | case Nonterminal: 260 | changed := false 261 | for _, g := range p.grammars { 262 | changed = predict(g, col, term) || changed 263 | } 264 | return changed 265 | case Any, List: 266 | st := &TableState{Term: term, Start: col.index} 267 | st2 := col.insert(st) 268 | if Debug { 269 | fmt.Printf("\t\tinsert: %+v\n", st) 270 | } 271 | return st == st2 272 | } 273 | return false 274 | } 275 | 276 | // Earley complete. returns true if the table has been changed, false otherwise 277 | func (p *Parse) complete(col *TableColumn, state *TableState) bool { 278 | if Debug { 279 | fmt.Printf("\tcomplete: %+v\n", state) 280 | } 281 | changed := false 282 | for _, st := range p.columns[state.Start].states { 283 | next := st.getNextTerm() 284 | if next == nil { 285 | continue 286 | } 287 | if (next.Type == Any && state.Term.Type == Any) || 288 | (next.Type == state.Term.Type && next.Value == state.Term.Value) { 289 | st1 := &TableState{Term: &Term{st.Term.Value, st.Term.Type, next.Meta}, 290 | Rb: st.Rb, Dot: st.Dot + 1, Start: st.Start} 291 | //st2 := col.insertToEnd(st1, true) 292 | st2 := col.insertToEnd(st1, false) 293 | if Debug { 294 | fmt.Printf("\t\tinsert: %+v\n", st1) 295 | } 296 | changed = changed || (st1 == st2) 297 | } 298 | } 299 | return changed 300 | } 301 | 302 | func (p *Parse) handleEpsilons(col *TableColumn) { 303 | changed := true 304 | for changed { 305 | changed = false 306 | for _, state := range col.states { 307 | if state.isCompleted() { 308 | changed = p.complete(col, state) || changed 309 | } 310 | term := state.getNextTerm() 311 | if term != nil && term.Type == Nonterminal { 312 | changed = p.predict(col, term) || changed 313 | } 314 | } 315 | } 316 | } 317 | -------------------------------------------------------------------------------- /earley_ast.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import "fmt" 4 | 5 | // Debug flag 6 | var Debug = false 7 | 8 | // Node is the AST of tree structure 9 | type Node struct { 10 | Value *TableState `json:"value"` 11 | Children []*Node `json:"children,omitempty"` 12 | 13 | p *Parse 14 | } 15 | 16 | // GetFinalStates returns the final states of p 17 | func (p *Parse) GetFinalStates() []*TableState { 18 | return p.finalStates 19 | } 20 | 21 | // Boundary returns the start, end position in NL for finalState 22 | func (p *Parse) Boundary(finalState *TableState) *Pos { 23 | if finalState == nil { 24 | return nil 25 | } 26 | start := p.columns[finalState.Start+1].token.StartByte 27 | end := p.columns[finalState.End].token.EndByte 28 | if end < start { //TODO 29 | end = start 30 | } 31 | return &Pos{start, end} 32 | } 33 | 34 | // Tag returns the Nonterminal name of finalState 35 | func (p *Parse) Tag(finalState *TableState) string { 36 | if finalState == nil { 37 | return "" 38 | } 39 | return finalState.Rb.Terms[0].Value 40 | } 41 | 42 | // GetTrees returns all possible parse results 43 | func (p *Parse) GetTrees(finalState *TableState) []*Node { 44 | if Debug { 45 | fmt.Printf("chart:\n%+v\n", p) 46 | fmt.Println("finalState:\n", finalState) 47 | } 48 | if finalState != nil { 49 | return p.buildTrees(finalState) 50 | } 51 | return nil 52 | } 53 | 54 | func (p *Parse) buildTrees(state *TableState) []*Node { 55 | if state.Term.Type == Any { 56 | n := &TableState{state.Term, nil, state.Start, state.End, state.End} 57 | cld := []*Node{{n, nil, p}} 58 | return cld 59 | } 60 | if state.Term.Type == List { 61 | state.Rb = &RuleBody{} 62 | var args []*Arg 63 | for i := 0; i < state.Dot; i++ { 64 | state.Rb.Terms = append(state.Rb.Terms, &Term{state.Term.Value, Nonterminal, nil}) 65 | args = append(args, &Arg{"index", i + 1}) 66 | } 67 | state.Rb.F = &FMR{"fmr.list", args} 68 | } 69 | return p.buildTreesHelper( 70 | &[]*Node{}, state, len(state.Rb.Terms)-1, state.End) 71 | } 72 | 73 | /* 74 | * How it works: suppose we're trying to match [X -> Y Z W]. We go from finish 75 | * to start, e.g., first we'll try to match W in X.encCol. Let this matching 76 | * state be M1. Next we'll try to match Z in M1.startCol. Let this matching 77 | * state be M2. And finally, we'll try to match Y in M2.startCol, which must 78 | * also start at X.startCol. Let this matching state be M3. 79 | * 80 | * If we matched M1, M2 and M3, then we've found a parsing for X: 81 | * X-> 82 | * Y -> M3 83 | * Z -> M2 84 | * W -> M1 85 | */ 86 | func (p *Parse) buildTreesHelper(children *[]*Node, state *TableState, 87 | termIndex, end int) []*Node { 88 | // begin with the last --non-terminal-- of the ruleBody of finalState 89 | if Debug { 90 | fmt.Printf("debug: %+v termIndex:%d children:%+v, end:%d\n", 91 | state, termIndex, children, end) 92 | } 93 | var outputs []*Node 94 | var start = -1 95 | if termIndex < 0 { 96 | // this is the base-case for the recursion (we matched the entire rule) 97 | outputs = append(outputs, &Node{state, *children, p}) 98 | return outputs 99 | } else if termIndex == 0 { 100 | // if this is the first rule 101 | start = state.Start 102 | } 103 | term := state.Rb.Terms[termIndex] 104 | 105 | if term.Type == Terminal { 106 | n := &TableState{term, nil, 107 | state.Start + termIndex, state.Start + termIndex + 1, 0} 108 | cld := []*Node{{n, nil, p}} 109 | cld = append(cld, *children...) 110 | for _, node := range p.buildTreesHelper(&cld, state, termIndex-1, end-1) { 111 | outputs = append(outputs, node) 112 | } 113 | return outputs 114 | } 115 | 116 | if Debug { 117 | fmt.Println("\nend:", end, "term.value:", term.Value, state) 118 | } 119 | for _, st := range p.columns[end].states { 120 | if st == state { 121 | // this prevents an endless recursion: since the states are filled in 122 | // order of completion, we know that X cannot depend on state Y that 123 | // comes after it X in chronological order 124 | if Debug { 125 | fmt.Println("st==state", st, state) 126 | fmt.Println(p.columns[end]) 127 | } 128 | break 129 | } 130 | if !st.isCompleted() || st.Term.Value != term.Value || st.Term.Type != term.Type { 131 | // this state is out of the question -- either not completed or does not 132 | // match the name 133 | continue 134 | } 135 | if start != -1 && st.Start != start { 136 | // if start isn't nil, this state must span from start to end 137 | continue 138 | } 139 | if Debug { 140 | fmt.Printf("\tY st:%+v, term:%+v\n", st, term) 141 | } 142 | 143 | // okay, so `st` matches -- now we need to create a tree for every possible 144 | // sub-match 145 | for _, subTree := range p.buildTrees(st) { 146 | cld := []*Node{subTree} 147 | cld = append(cld, *children...) 148 | // now try all options 149 | for _, node := range p.buildTreesHelper(&cld, state, termIndex-1, st.Start) { 150 | outputs = append(outputs, node) 151 | } 152 | } 153 | } 154 | return outputs 155 | } 156 | -------------------------------------------------------------------------------- /earley_eval.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "fmt" 5 | "math/big" 6 | ) 7 | 8 | // Eval returns the denotation of Node n 9 | func (n *Node) Eval() (interface{}, error) { 10 | if n.Value.Rb == nil || n.Value.Rb.F == nil { 11 | if n.p == nil { 12 | return "", nil 13 | } 14 | return n.OriginalText(), nil 15 | } 16 | return n.fmrEval(n.Value.Rb.F, n.Children) 17 | } 18 | 19 | func (n *Node) fmrEval(f *FMR, children []*Node) (interface{}, error) { 20 | if f == nil { 21 | return "", nil 22 | } 23 | if f.Fn == "nf.I" { 24 | if len(f.Args) != 1 { 25 | return "", fmt.Errorf("the length of Args of nf.I should be one") 26 | } 27 | s, err := n.semEval(f.Args[0], children) 28 | if err != nil { 29 | return "", err 30 | } 31 | return s, nil 32 | } 33 | 34 | var args []interface{} 35 | for _, arg := range f.Args { 36 | s, err := n.semEval(arg, children) 37 | if err != nil { 38 | return "", err 39 | } 40 | args = append(args, s) 41 | } 42 | if Debug { 43 | fmt.Printf("funcs.Call(%s, %+v)\n", f.Fn, args) 44 | } 45 | return Call(f.Fn, args...) 46 | } 47 | 48 | func (n *Node) semEval(arg *Arg, nodes []*Node) (interface{}, error) { 49 | if arg == nil { 50 | return "", fmt.Errorf("arg is nil") 51 | } 52 | switch arg.Type { 53 | case "string": 54 | if s, ok := arg.Value.(string); ok { 55 | return s, nil 56 | } 57 | return "", fmt.Errorf("arg.Value: %+v is not string", arg.Value) 58 | case "int": 59 | if i, ok := arg.Value.(*big.Int); ok { 60 | return i.String(), nil 61 | } 62 | return "", fmt.Errorf("arg.Value: %+v is not int", arg.Value) 63 | case "float": 64 | if f, ok := arg.Value.(*big.Float); ok { 65 | return f.String(), nil 66 | } 67 | return "", fmt.Errorf("arg.Value: %+v is not float", arg.Value) 68 | case "func": 69 | if fmr, ok := arg.Value.(*FMR); ok { 70 | return n.fmrEval(fmr, nodes) 71 | } 72 | return "", fmt.Errorf("arg.Value: %+v is not func", arg.Value) 73 | case "index": 74 | i, ok := arg.Value.(int) 75 | if !ok { 76 | return "", fmt.Errorf("arg.Value: %+v is not index", arg.Value) 77 | } 78 | if i < 0 || i > len(nodes) { 79 | return "", fmt.Errorf("i=%d not in range [0, %d]", i, len(nodes)) 80 | } 81 | if i == 0 { 82 | return n.NL(), nil 83 | } 84 | s, err := nodes[i-1].Eval() 85 | if err != nil { 86 | return "", err 87 | } 88 | return s, nil 89 | case "context": 90 | subnodes := []map[string]interface{}{} 91 | for _, node := range nodes { 92 | ni, err := node.Eval() 93 | if err != nil { 94 | ni = node.OriginalText() 95 | } 96 | subnodes = append(subnodes, map[string]interface{}{node.Term().Value: ni}) 97 | } 98 | ret := map[string]interface{}{ 99 | "text": n.OriginalText(), 100 | "pos": n.Pos(), 101 | "nodes": subnodes, 102 | } 103 | if n.Term().Type != Terminal { 104 | ret["type"] = n.Term().Value 105 | } 106 | return ret, nil 107 | default: 108 | return "", fmt.Errorf("arg.Type: %s invalid", arg.Type) 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /earley_fmr.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "math/big" 7 | "strconv" 8 | "strings" 9 | ) 10 | 11 | // Semantic returns the stringified FMR of Node n 12 | func (n *Node) Semantic() (string, error) { 13 | nl := strconv.Quote(n.NL()) 14 | if n.Value.Rb == nil || n.Value.Rb.F == nil { 15 | if n.p == nil { 16 | return "", nil 17 | } 18 | // by default, returns nf.I($0) 19 | return nl, nil 20 | } 21 | return n.fmrStr(n.Value.Rb.F, n.Children, nl) 22 | } 23 | 24 | func (n *Node) fmrStr(f *FMR, children []*Node, nl string) (string, error) { 25 | if f == nil { 26 | return "", nil 27 | } 28 | if f.Fn == "nf.I" { 29 | if len(f.Args) != 1 { 30 | return "", fmt.Errorf("the length of Args of nf.I should be one") 31 | } 32 | s, err := n.semStr(f.Args[0], children, nl) 33 | if err != nil { 34 | return "", err 35 | } 36 | return s, nil 37 | } 38 | 39 | var args []string 40 | for _, arg := range f.Args { 41 | s, err := n.semStr(arg, children, nl) 42 | if err != nil { 43 | return "", err 44 | } 45 | args = append(args, s) 46 | } 47 | return fmt.Sprintf("%s(%s)", f.Fn, strings.Join(args, ", ")), nil 48 | } 49 | 50 | func (n *Node) semStr(arg *Arg, nodes []*Node, nl string) (string, error) { 51 | if arg == nil { 52 | return "", fmt.Errorf("arg is nil") 53 | } 54 | switch arg.Type { 55 | case "string": 56 | if s, ok := arg.Value.(string); ok { 57 | return strconv.Quote(s), nil 58 | } 59 | return "", fmt.Errorf("arg.Value: %+v is not string", arg.Value) 60 | case "int": 61 | if i, ok := arg.Value.(*big.Int); ok { 62 | return i.String(), nil 63 | } 64 | return "", fmt.Errorf("arg.Value: %+v is not int", arg.Value) 65 | case "float": 66 | if f, ok := arg.Value.(*big.Float); ok { 67 | return f.String(), nil 68 | } 69 | return "", fmt.Errorf("arg.Value: %+v is not float", arg.Value) 70 | case "func": 71 | if fmr, ok := arg.Value.(*FMR); ok { 72 | return n.fmrStr(fmr, nodes, nl) 73 | } 74 | return "", fmt.Errorf("arg.Value: %+v is not func", arg.Value) 75 | case "index": 76 | i, ok := arg.Value.(int) 77 | if !ok { 78 | return "", fmt.Errorf("arg.Value: %+v is not index", arg.Value) 79 | } 80 | if i < 0 || i > len(nodes) { 81 | return "", fmt.Errorf("i=%d not in range [0, %d]", i, len(nodes)) 82 | } 83 | if i == 0 { 84 | return nl, nil 85 | } 86 | if nodes[i-1] == nil { 87 | return "null", nil 88 | } 89 | s, err := nodes[i-1].Semantic() 90 | if err != nil { 91 | return "", err 92 | } 93 | return s, nil 94 | case "context": 95 | subnodes := []map[string]interface{}{} 96 | for _, node := range nodes { 97 | ni, err := node.Eval() 98 | if err != nil { 99 | ni = node.OriginalText() 100 | } 101 | subnodes = append(subnodes, map[string]interface{}{node.Term().Value: ni}) 102 | } 103 | ret := map[string]interface{}{ 104 | "text": n.OriginalText(), 105 | "pos": n.Pos(), 106 | "nodes": subnodes, 107 | } 108 | if n.Term().Type != Terminal { 109 | ret["type"] = n.Term().Value 110 | } 111 | s, _ := json.Marshal(ret) 112 | return string(s), nil 113 | default: 114 | return "", fmt.Errorf("arg.Type: %s invalid", arg.Type) 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /earley_nodeprint.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/xlab/treeprint" 7 | ) 8 | 9 | // TreePrint to out 10 | func (n *Node) TreePrint() { 11 | tree := treeprint.New() 12 | tree.SetValue(n.Value) 13 | for _, child := range n.Children { 14 | tree.AddNode(child.Value) 15 | } 16 | fmt.Println(tree.String()) 17 | } 18 | -------------------------------------------------------------------------------- /earley_stringer.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "math/big" 7 | "strconv" 8 | "strings" 9 | ) 10 | 11 | func (ts *TableState) String() string { 12 | s := "" 13 | switch ts.Term.Type { 14 | case Nonterminal: 15 | if ts.Rb != nil { 16 | for i, term := range ts.Rb.Terms { 17 | if i == ts.Dot { 18 | s += DOT + " " 19 | } 20 | switch term.Type { 21 | case Nonterminal: 22 | s += "<" + term.Value + "> " 23 | case Terminal: 24 | s += strconv.Quote(term.Value) + " " 25 | case Any: 26 | s += "(any) " 27 | case List: 28 | s += "(list<" + term.Value + ">) " 29 | } 30 | } 31 | if ts.Dot == len(ts.Rb.Terms) { 32 | s += DOT 33 | } 34 | return fmt.Sprintf("<%s> -> %s [%d-%d] {%s}", 35 | ts.Term.Value, s, ts.Start, ts.End, ts.Rb.F) 36 | } 37 | case Any: 38 | for i := ts.Start; i < ts.End; i++ { 39 | s += "# " 40 | } 41 | s += DOT + " * " 42 | return fmt.Sprintf("(any) -> %s [%d-%d]", s, ts.Start, ts.End) 43 | case List: 44 | f := "fmr.list(" 45 | for i := 0; i < ts.Dot; i++ { 46 | s += "<" + ts.Term.Value + "> " 47 | f += fmt.Sprintf("$%d", i+1) 48 | if i != ts.Dot-1 { 49 | f += "," 50 | } 51 | } 52 | f += ")" 53 | s += DOT + " * " 54 | return fmt.Sprintf("(list<%s>) -> %s [%d-%d] {%s}", ts.Term.Value, s, ts.Start, ts.End, f) 55 | } 56 | return fmt.Sprintf("%s [%d-%d]", strconv.Quote(ts.Term.Value), ts.Start, ts.End) 57 | } 58 | 59 | func (tc *TableColumn) String() (out string) { 60 | if tc.index == 0 { 61 | out = "[0] ''\n" 62 | } else { 63 | out = fmt.Sprintf("[%d] '%s' position:[%d-%d]\n", 64 | tc.index, tc.token, tc.token.StartByte, tc.token.EndByte) 65 | } 66 | out += "=======================================\n" 67 | for _, s := range tc.states { 68 | out += s.String() + "\n" 69 | } 70 | return out 71 | } 72 | 73 | func (p *Parse) String() string { 74 | out := "" 75 | for _, c := range p.columns { 76 | out += c.String() + "\n" 77 | } 78 | return out 79 | } 80 | 81 | // Print this tree to out 82 | func (n *Node) Print(out io.Writer) { 83 | n.printLevel(out, 0) 84 | } 85 | 86 | func (n *Node) printLevel(out io.Writer, level int) { 87 | indentation := "" 88 | for i := 0; i < level; i++ { 89 | indentation += " " 90 | } 91 | fmt.Fprintf(out, "%s%v\n", indentation, n.Value) 92 | for _, child := range n.Children { 93 | child.printLevel(out, level+1) 94 | } 95 | } 96 | 97 | func (n *Node) String() string { 98 | if len(n.Children) > 0 { 99 | return fmt.Sprintf("%+v %+v", n.Value, n.Children) 100 | } 101 | return fmt.Sprintf("%+v", n.Value) 102 | } 103 | 104 | func (f *FMR) String() string { 105 | if f == nil { 106 | return "nf.I($0)" 107 | } 108 | var args []string 109 | invalid := "invalid_fmr" 110 | for _, arg := range f.Args { 111 | switch arg.Type { 112 | case "string": 113 | if s, ok := arg.Value.(string); ok { 114 | args = append(args, strconv.Quote(s)) 115 | } else { 116 | return invalid 117 | } 118 | case "int": 119 | if i, ok := arg.Value.(*big.Int); ok { 120 | args = append(args, i.String()) 121 | } else { 122 | return invalid 123 | } 124 | case "float": 125 | if f, ok := arg.Value.(*big.Float); ok { 126 | args = append(args, f.String()) 127 | } else { 128 | return invalid 129 | } 130 | case "func": 131 | if fmr, ok := arg.Value.(*FMR); ok { 132 | args = append(args, fmr.String()) 133 | } else { 134 | return invalid 135 | } 136 | case "index": 137 | if i, ok := arg.Value.(int); ok { 138 | args = append(args, fmt.Sprintf("$%d", i)) 139 | } else { 140 | return invalid 141 | } 142 | default: 143 | return invalid 144 | } 145 | } 146 | return fmt.Sprintf("%s(%s)", f.Fn, strings.Join(args, ",")) 147 | } 148 | -------------------------------------------------------------------------------- /earley_terminal_match.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/liuzl/ling" 7 | ) 8 | 9 | func terminalMatch(term *Term, token *ling.Token) bool { 10 | if term == nil || token == nil || term.Type != Terminal { 11 | return false 12 | } 13 | t := gTokens.get(term.Value) 14 | if term.Meta == nil || t == nil { 15 | if term.Value == token.Text { 16 | return true 17 | } 18 | } else { 19 | flags, _ := term.Meta.(string) 20 | switch { 21 | case strings.Contains(flags, "l"): 22 | if t.Annotations[ling.Lemma] == token.Annotations[ling.Lemma] { 23 | return true 24 | } 25 | case strings.Contains(flags, "i"): 26 | if strings.ToLower(t.Annotations[ling.Norm]) == 27 | strings.ToLower(token.Annotations[ling.Norm]) { 28 | return true 29 | } 30 | } 31 | } 32 | return false 33 | } 34 | -------------------------------------------------------------------------------- /earley_test.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | 7 | "zliu.org/goutil" 8 | ) 9 | 10 | func TestEarleyParse(t *testing.T) { 11 | var grammar = ` = "a" | "a" "+" {nf.math.sum($1, $3)};` 12 | //grammar = ` = "a";` 13 | strs := []string{ 14 | "a", 15 | "a + a", 16 | //"a + a + a", 17 | //"a + a + a + a", 18 | //"a + a + a + a + a", 19 | //"a + a + a + a + a + a", 20 | //"a + a + a + a + a + a + a", 21 | "+ a", 22 | } 23 | g, err := GrammarFromString(grammar, "a") 24 | if err != nil { 25 | t.Error(err) 26 | } 27 | _, err = goutil.JSONMarshalIndent(g, "", " ") 28 | if err != nil { 29 | t.Error(err) 30 | } 31 | //fmt.Println(string(b)) 32 | for _, text := range strs { 33 | p, err := g.EarleyParse(text, "expr") 34 | if err != nil { 35 | t.Error(err) 36 | } 37 | t.Logf("%+v\n", p) 38 | for _, finalState := range p.finalStates { 39 | trees := p.GetTrees(finalState) 40 | t.Log("tree number:", len(trees)) 41 | for _, tree := range trees { 42 | var buf bytes.Buffer 43 | tree.Print(&buf) 44 | t.Log(buf.String()) 45 | tree.TreePrint() 46 | b, err := goutil.JSONMarshalIndent(tree, "", " ") 47 | if err != nil { 48 | t.Error(err) 49 | } 50 | t.Logf("%+v", string(b)) 51 | } 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /examples/arithmetic/arithmetic.grammar: -------------------------------------------------------------------------------- 1 | = "one" {nf.I(1)} 2 | | "two" {nf.I(2)} 3 | | "three" {nf.I(3)} 4 | | "four" {nf.I(4)} 5 | | "five" {nf.I(5)} 6 | | "six" {nf.I(6)} 7 | | "seven" {nf.I(7)} 8 | | "eight" {nf.I(8)} 9 | | "nine" {nf.I(9)} 10 | | "ten" {nf.I(10)} 11 | | "一" {nf.I(1)} 12 | | "二" {nf.I(2)} 13 | | "三" {nf.I(3)} 14 | | "四" {nf.I(4)} 15 | | "五" {nf.I(5)} 16 | | "六" {nf.I(6)} 17 | | "七" {nf.I(7)} 18 | | "八" {nf.I(8)} 19 | | "九" {nf.I(9)} 20 | | "十" {nf.I(10)} 21 | | "minus" {nf.math.sub($1, $3)} 22 | | "减" {nf.math.sub($1, $3)} 23 | | "plus" {nf.math.sum($1, $3)} 24 | | "add" {nf.math.sum($1, $3)} 25 | | "加" {nf.math.sum($1, $3)} 26 | | "times" {nf.math.mul($1, $3)} 27 | | "multiply by" {nf.math.mul($1, $3)} 28 | | "乘" {nf.math.mul($1, $3)} 29 | | "minus" {nf.math.neg($2)} 30 | | "负" {nf.math.neg($2)}; 31 | 32 | -------------------------------------------------------------------------------- /examples/arithmetic/input.txt: -------------------------------------------------------------------------------- 1 | minus three minus two 2 | two times two plus three 3 | one add two multiply by two plus three 4 | 二加五减三 5 | 我的二加五减三 6 | three plus three minus two 7 | minus four 8 | 四加七等于几?八减二等于几? 9 | 十九八七六五四三二一 10 | -------------------------------------------------------------------------------- /examples/arithmetic/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "io/ioutil" 9 | "os" 10 | 11 | "github.com/golang/glog" 12 | "github.com/liuzl/fmr" 13 | "github.com/robertkrimen/otto" 14 | ) 15 | 16 | var ( 17 | grammar = flag.String("g", "arithmetic.grammar", "grammar file") 18 | js = flag.String("js", "math.js", "javascript file") 19 | input = flag.String("i", "", "file of original text to read") 20 | ) 21 | 22 | func main() { 23 | flag.Parse() 24 | //bnf.Debug = true 25 | g, err := fmr.GrammarFromFile(*grammar) 26 | if err != nil { 27 | glog.Fatal(err) 28 | } 29 | 30 | script, err := ioutil.ReadFile(*js) 31 | if err != nil { 32 | glog.Fatal(err) 33 | } 34 | vm := otto.New() 35 | if _, err = vm.Run(script); err != nil { 36 | glog.Fatal(err) 37 | } 38 | 39 | var in *os.File 40 | if *input == "" { 41 | in = os.Stdin 42 | } else { 43 | in, err = os.Open(*input) 44 | if err != nil { 45 | glog.Fatal(err) 46 | } 47 | defer in.Close() 48 | } 49 | br := bufio.NewReader(in) 50 | 51 | for { 52 | line, c := br.ReadString('\n') 53 | if c == io.EOF { 54 | break 55 | } 56 | if c != nil { 57 | glog.Fatal(c) 58 | } 59 | fmt.Println(line) 60 | //p, err := g.EarleyParse("number", line) 61 | ps, err := g.EarleyParseMaxAll(line, "number") 62 | if err != nil { 63 | glog.Fatal(err) 64 | } 65 | for i, p := range ps { 66 | for _, f := range p.GetFinalStates() { 67 | trees := p.GetTrees(f) 68 | //fmt.Printf("%+v\n", p) 69 | fmt.Printf("p%d tree number:%d\n", i, len(trees)) 70 | for _, tree := range trees { 71 | //tree.Print(os.Stdout) 72 | sem, err := tree.Semantic() 73 | if err != nil { 74 | glog.Fatal(err) 75 | } 76 | result, err := vm.Run(sem) 77 | if err != nil { 78 | glog.Fatal(err) 79 | } 80 | fmt.Printf("%s = %v\n", sem, result) 81 | } 82 | } 83 | } 84 | fmt.Println() 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /examples/arithmetic/math.js: -------------------------------------------------------------------------------- 1 | var nf = nf || {}; 2 | nf.math = {}; 3 | 4 | nf.math.sum = function(x, y) { 5 | //console.log(typeof(x), typeof(y)); 6 | //console.log(x, y); 7 | return x + y; 8 | }; 9 | nf.math.sub = function(x, y) { 10 | return x - y; 11 | } 12 | nf.math.mul = function(x, y) { 13 | return x * y; 14 | } 15 | nf.math.neg = function(x) { 16 | return -x; 17 | } 18 | 19 | //console.log(nf.math.sum(1,1)); 20 | 21 | -------------------------------------------------------------------------------- /examples/builtin/any.grammar: -------------------------------------------------------------------------------- 1 | = (any) "有限公司" {nf.company($1,$2,$3)} 2 | | (any) "有限公司" {nf.company($1,$2,$4,$3)} 3 | | (any) "(" ")" "有限公司" {nf.company($3,$1,$5)} 4 | | (any) "(" ")" "有限公司" {nf.company($4,$1,$6,$2)} 5 | ; 6 | = "北京" {nf.I("beijing")} 7 | | "天津" {nf.I("tianjin")} 8 | | "上海" {nf.I("shanghai")} 9 | ; 10 | 11 | = "科技" 12 | | "网络技术" 13 | | "计算机技术" 14 | ; 15 | -------------------------------------------------------------------------------- /examples/builtin/any.txt: -------------------------------------------------------------------------------- 1 | 搜索行业的北京搜狗有限公司是一家很有意思的公司 2 | 天津机器智能有限公司,天津有这家公司吗? 3 | 北京tencent sogou baidu有限公司 4 | 北京Jörgensen and Art有限公司 5 | 北京сайт Московского有限公司 6 | 北京チャンネル有限公司 7 | 北京대통령의有限公司 8 | 北京ทันทุกเหตุการ有限公司 9 | 腾讯科技(北京)有限公司 10 | 北京小熊博望科技有限公司 11 | 百度在线网络技术(北京)有限公司 12 | 携程计算机技术(上海)有限公司 13 | 天津海量计算机技术有限公司 14 | -------------------------------------------------------------------------------- /examples/builtin/builtin.grammar: -------------------------------------------------------------------------------- 1 | = "zero" {nf.I(0)} | "o" {nf.I(0)} 2 | | "one" {nf.I(1)} | "a" {nf.I(1)} | "an" {nf.I(1)} 3 | | "two" {nf.I(2)} 4 | | "three" {nf.I(3)} 5 | | "four" {nf.I(4)} 6 | | "five" {nf.I(5)} 7 | | "six" {nf.I(6)} 8 | | "seven" {nf.I(7)} 9 | | "eight" {nf.I(8)} 10 | | "nine" {nf.I(9)} 11 | ; 12 | 13 | = "ten" {nf.I(10)} 14 | | "eleven" {nf.I(11)} 15 | | "twelve" {nf.I(12)} 16 | | "thirteen" {nf.I(13)} 17 | | "fourteen" {nf.I(14)} 18 | | "fifteen" {nf.I(15)} 19 | | "sixteen" {nf.I(16)} 20 | | "seventeen" {nf.I(17)} 21 | | "eighteen" {nf.I(18)} 22 | | "nineteen" {nf.I(19)} 23 | ; 24 | 25 | = "first" {nf.I(1)} 26 | | "second" {nf.I(2)} 27 | | "third" {nf.I(3)} 28 | | "fourth" {nf.I(4)} 29 | | "fifth" {nf.I(5)} 30 | | "sixth" {nf.I(6)} 31 | | "seventh" {nf.I(7)} 32 | | "eighth" {nf.I(8)} 33 | | "ninth" {nf.I(9)} 34 | ; 35 | 36 | = "tenth" {nf.I(10)} 37 | | "eleventh" {nf.I(11)} 38 | | "twelfth" {nf.I(12)} 39 | | "thirteenth" {nf.I(13)} 40 | | "fourteenth" {nf.I(14)} 41 | | "fifteenth" {nf.I(15)} 42 | | "sixteenth" {nf.I(16)} 43 | | "seventeenth" {nf.I(17)} 44 | | "eighteenth" {nf.I(18)} 45 | | "nineteenth" {nf.I(19)} 46 | ; 47 | 48 | = "twenty" {nf.I(20)} 49 | | "thirty" {nf.I(30)} 50 | | "forty" {nf.I(40)} 51 | | "fifty" {nf.I(50)} 52 | | "sixty" {nf.I(60)} 53 | | "seventy" {nf.I(70)} 54 | | "eighty" {nf.I(80)} 55 | | "ninety" {nf.I(90)} 56 | ; 57 | 58 | = "twentieth" {nf.I(20)} 59 | | "thirtieth" {nf.I(30)} 60 | | "fortieth" {nf.I(40)} 61 | | "fiftieth" {nf.I(50)} 62 | | "sixtieth" {nf.I(60)} 63 | | "seventieth" {nf.I(70)} 64 | | "eightieth" {nf.I(80)} 65 | | "ninetieth" {nf.I(90)} 66 | ; 67 | 68 | = "hundred" {nf.I(100)} 69 | | "thousand" {nf.I(1000)} 70 | | "million" {nf.I(1000000)} 71 | | "billion" {nf.I(1000000000)} 72 | | "trillion" {nf.I(1000000000000)} 73 | | "quadrillion" {nf.I(1000000000000000)} 74 | | "quintillion" {nf.I(1000000000000000000)} 75 | | "sextillion" {nf.I(1000000000000000000000)} 76 | | "septillion" {nf.I(1000000000000000000000000)} 77 | | "octillion" {nf.I(1000000000000000000000000000)} 78 | | "nonillion" {nf.I(1000000000000000000000000000000)} 79 | | "decillion" {nf.I(1000000000000000000000000000000000)} 80 | ; 81 | 82 | = {nf.I($1)} 83 | | {nf.I($1)} 84 | | {nf.I($1)} 85 | | {nf.math.sum($1, $2)} 86 | | "and" {nf.math.sum($1, $3)} 87 | | "-" {nf.math.sum($1, $3)} 88 | | "," {nf.math.sum($1, $3)} 89 | ; 90 | 91 | = {nf.math.mul($1, $2)} 92 | | "-" {nf.math.mul($1, $3)} 93 | | {nf.math.mul($1, $2)} 94 | ; 95 | 96 |
= {nf.I($1)} 97 | | {nf.I($1)} 98 | ; 99 | 100 | =
{nf.I($1)} 101 | |
{nf.math.sum($1, $2)} 102 | |
"and" {nf.math.sum($1, $3)} 103 | ; 104 | -------------------------------------------------------------------------------- /examples/builtin/cn_input.txt: -------------------------------------------------------------------------------- 1 | 二千零一十四 2 | 十万 3 | 一万五千 4 | 十万二千 5 | 十万八千六百零四 6 | 二亿二千零二万二千二百二十二 7 | 二十二亿零二万二千二百 8 | 二十二亿二万二千二百 9 | 二百五十一 10 | -------------------------------------------------------------------------------- /examples/builtin/cn_num.grammar: -------------------------------------------------------------------------------- 1 | = "一" {nf.I(1)} 2 | | "二" {nf.I(2)} 3 | | "三" {nf.I(3)} 4 | | "四" {nf.I(4)} 5 | | "五" {nf.I(5)} 6 | | "六" {nf.I(6)} 7 | | "七" {nf.I(7)} 8 | | "八" {nf.I(8)} 9 | | "九" {nf.I(9)} 10 | ; 11 | 12 | = "零" {nf.I(0)} 13 | | "〇" {nf.I(0)} 14 | ; 15 | 16 | = "十" {nf.I(10)}; 17 | = "百" {nf.I(100)}; 18 | = "千" {nf.I(1000)}; 19 | 20 | = "万" {nf.I(10000)}; 21 | = "亿" {nf.I(100000000)}; 22 | 23 | = {nf.I($1)} 24 | | {nf.math.mul($1, $2)} 25 | | {nf.math.sum(nf.math.mul($1, $2), $3)} 26 | | {nf.I($1)} 27 | ; 28 | 29 | = {nf.math.mul($1, $2)} 30 | | {nf.math.sum(nf.math.mul($1, $2), $3)} 31 | | {nf.math.sum(nf.math.mul($1, $2), $4)} 32 | | {nf.I($1)} 33 | ; 34 | 35 | = {nf.math.mul($1, $2)} 36 | | {nf.math.sum(nf.math.mul($1, $2), $3)} 37 | | {nf.math.sum(nf.math.mul($1, $2), $4)} 38 | | {nf.math.sum(nf.math.mul($1, $2), $4)} 39 | | {nf.I($1)} 40 | ; 41 | 42 | = {nf.math.mul($1, $2)} 43 | | {nf.math.sum(nf.math.mul($1, $2), $3)} 44 | | {nf.I($1)} 45 | ; 46 | 47 | = {nf.math.mul($1, $2)} 48 | | {nf.math.sum(nf.math.mul($1, $2), $3)} 49 | | {nf.math.sum(nf.math.mul($1, $2), $4)} 50 | | {nf.I($1)} 51 | ; 52 | 53 | = {nf.I($1)}; 54 | 55 | -------------------------------------------------------------------------------- /examples/builtin/company.js: -------------------------------------------------------------------------------- 1 | var nf = nf || {}; 2 | 3 | nf.company = function(loc, name, type, area) { 4 | return {"loc":loc, "name":name, "type":type, "area":area} 5 | } 6 | 7 | //console.log(nf.math.sum(1,1)); 8 | 9 | -------------------------------------------------------------------------------- /examples/builtin/input.txt: -------------------------------------------------------------------------------- 1 | six million five thousand 2 | twenty thousand five hundred and sixty nine 3 | six-million five-thousand and two 4 | six-million 5 | five-thousand 6 | two 7 | -------------------------------------------------------------------------------- /examples/builtin/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "io/ioutil" 9 | "os" 10 | "strings" 11 | 12 | "github.com/golang/glog" 13 | "github.com/liuzl/fmr" 14 | "github.com/robertkrimen/otto" 15 | "zliu.org/goutil" 16 | ) 17 | 18 | var ( 19 | grammar = flag.String("g", "builtin.grammar", "grammar file") 20 | js = flag.String("js", "math.js", "javascript file") 21 | input = flag.String("i", "", "file of original text to read") 22 | debug = flag.Bool("debug", false, "debug mode") 23 | start = flag.String("start", "number", "start rule") 24 | ) 25 | 26 | func main() { 27 | defer func() { 28 | if err := recover(); err != nil { 29 | fmt.Println(err) 30 | } 31 | }() 32 | flag.Parse() 33 | if *debug { 34 | fmr.Debug = true 35 | } 36 | g, err := fmr.GrammarFromFile(*grammar) 37 | if err != nil { 38 | glog.Fatal(err) 39 | } 40 | if *debug { 41 | b, err := goutil.JSONMarshalIndent(g, "", " ") 42 | if err != nil { 43 | glog.Fatal(err) 44 | } 45 | fmt.Printf("%s\n", string(b)) 46 | } 47 | script, err := ioutil.ReadFile(*js) 48 | if err != nil { 49 | glog.Fatal(err) 50 | } 51 | vm := otto.New() 52 | if _, err = vm.Run(script); err != nil { 53 | glog.Fatal(err) 54 | } 55 | 56 | var in *os.File 57 | if *input == "" { 58 | in = os.Stdin 59 | } else { 60 | in, err = os.Open(*input) 61 | if err != nil { 62 | glog.Fatal(err) 63 | } 64 | defer in.Close() 65 | } 66 | br := bufio.NewReader(in) 67 | 68 | for { 69 | line, c := br.ReadString('\n') 70 | if c == io.EOF { 71 | break 72 | } 73 | if c != nil { 74 | glog.Fatal(c) 75 | } 76 | line = strings.TrimSpace(line) 77 | fmt.Println(line) 78 | 79 | trees, err := g.ExtractMaxAll(line, *start) 80 | if err != nil { 81 | glog.Fatal(err) 82 | } 83 | for _, tree := range trees { 84 | //tree.Print(os.Stdout) 85 | sem, err := tree.Semantic() 86 | //fmt.Println(sem) 87 | if err != nil { 88 | glog.Fatal(err) 89 | } 90 | if *debug { 91 | fmt.Printf("%s = ?\n", sem) 92 | } 93 | result, err := vm.Run(sem) 94 | if err != nil { 95 | glog.Error(err) 96 | } 97 | rs, _ := result.Export() 98 | fmt.Printf("%s = %+v\n", sem, rs) 99 | //eval, err := tree.Eval() 100 | //fmt.Printf("Eval: %s, Err: %+v\n", eval, err) 101 | } 102 | fmt.Println() 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /examples/builtin/math.js: -------------------------------------------------------------------------------- 1 | var nf = nf || {}; 2 | nf.math = {}; 3 | 4 | nf.math.sum = function(x, y) { 5 | //console.log(typeof(x), typeof(y)); 6 | //console.log(x, y); 7 | return x + y; 8 | }; 9 | nf.math.sub = function(x, y) { 10 | return x - y; 11 | } 12 | nf.math.mul = function(x, y) { 13 | return x * y; 14 | } 15 | nf.math.neg = function(x) { 16 | return -x; 17 | } 18 | 19 | //console.log(nf.math.sum(1,1)); 20 | 21 | -------------------------------------------------------------------------------- /examples/builtin/tianjin.txt: -------------------------------------------------------------------------------- 1 | 天津机器智能有限公司,天津有这家公司吗? 2 | -------------------------------------------------------------------------------- /examples/math/README.md: -------------------------------------------------------------------------------- 1 | ```sh 2 | go build 3 | ./math -eval -i input.txt 4 | ``` 5 | -------------------------------------------------------------------------------- /examples/math/grammars/latex.math.grammar: -------------------------------------------------------------------------------- 1 | = {nf.I($1)}; 2 | 3 | = "+" {nf.math.sum($1,$3)} 4 | | "-" {nf.math.sub($1,$3)} 5 | | {nf.I($1)} 6 | ; 7 | 8 | = "*" {nf.math.mul($1,$3)} 9 | | "×" {nf.math.mul($1,$3)} 10 | | "\\times" {nf.math.mul($1,$3)} 11 | | {nf.math.mul($1,$2)} 12 | | "/" {nf.math.div($1,$3)} 13 | | "÷" {nf.math.div($1,$3)} 14 | | "\\frac" "{" "}" "{" "}" {nf.math.div($3,$6)} 15 | | {nf.I($1)} 16 | ; 17 | 18 | = "^" {nf.math.pow($1,$3)} 19 | | "**" {nf.math.pow($1,$3)} 20 | | "\\sqrt" "{" "}" {nf.math.pow($3,0.5)} 21 | | {nf.I($1)} 22 | ; 23 | 24 | = "+" {nf.I($2)} 25 | ; 26 | 27 | = "\(" ")" {nf.I($2)} 28 | | "{" "}" {nf.I($2)} 29 | | "|" "|" {nf.math.abs($2)} 30 | | {nf.math.to_number($1)} 31 | | {nf.math.expression($1)} 32 | ; 33 | 34 | = "=" {vf.be.eq($1,$3)} 35 | | "\\neq" {vf.be.neq($1,$3)} 36 | | "≠" {vf.be.neq($1,$3)} 37 | | ">" {vf.be.gt($1,$3)} 38 | | "≥" {vf.be.geq($1,$3)} 39 | | "\\geq" {vf.be.geq($1,$3)} 40 | | "<" {vf.be.lt($1,$3)} 41 | | "≤" {vf.be.leq($1,$3)} 42 | | "\\leq" {vf.be.leq($1,$3)} 43 | | "\\ " {nf.math.op($3,$1,$4)} 44 | ; 45 | 46 | = "f"|"g"; 47 | 48 | = "\(" ")" {nf.math.func($1,$3)}; 49 | 50 | 51 | = {nf.I($1)} 52 | | {nf.I($1)} 53 | ; 54 | -------------------------------------------------------------------------------- /examples/math/grammars/math.en.grammar: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: Zhanliang Liu 3 | */ 4 | 5 | #include "number.en.grammar" 6 | 7 | // for local grammars that come from regex tagger 8 | = {nf.math.to_number($1)}; 9 | 10 | = "pair" {nf.I(2)} 11 | | "couple" {nf.I(2)} 12 | | "dozen" {nf.I(12)} 13 | | "percent" {nf.I(0.01)} 14 | ; 15 | = {nf.I($1)} 16 | | {nf.math.mul($1,$2)} 17 | ; 18 | 19 | = "plus" {nf.math.sum($1,$3)} 20 | | "added to" {nf.math.sum($3, $1)} 21 | | "sum of" "and" {nf.math.sum($2,$4)} 22 | | "the sum of" "and" {nf.math.sum($2,$4)} 23 | ; 24 | 25 | = "minus" {nf.math.sub($1,$3)} 26 | | "subtract" "from" {nf.math.sub($4,$2)} 27 | | "deduct" "from" {nf.math.sub($4,$2)} 28 | | "subtracted from" {nf.math.sub($3,$1)} 29 | ; 30 | 31 | = "divides" {nf.math.div($3,$1)} 32 | | "divided by" {nf.math.div($1,$3)} 33 | | "over" {nf.math.div($1,$3)} 34 | ; 35 | 36 | = "times" {nf.math.mul($1,$3)} 37 | | "multiply by" {nf.math.mul($1,$3)} 38 | | "multiply" "by" {nf.math.mul($2,$4)} 39 | | "product of" "and" {nf.math.mul($2,$4)} 40 | | "half of" {nf.math.mul($2,0.5)} 41 | ; 42 | 43 | = "raised to the power of" {nf.math.pow($1,$3)} 44 | | "raised to the exponent of" {nf.math.pow($1,$3)} 45 | | "square of" {nf.math.pow($2,2)} 46 | | "cube of" {nf.math.pow($2,3)} 47 | | "square root of" {nf.math.pow($2,0.5)} 48 | | "cubic root of" {nf.math.pow($2,nf.math.div(1,3))} 49 | ; 50 | 51 | = "be equal to" {vf.be.equ($1,$2)} 52 | | "equal" {vf.be.equ($1,$2)} 53 | | "be" {vf.be.equ($1,$2)} 54 | ; 55 | 56 | -------------------------------------------------------------------------------- /examples/math/grammars/math.grammar: -------------------------------------------------------------------------------- 1 | #include "math.en.grammar" 2 | #include "math.zh.grammar" 3 | 4 | = {nf.I($1)}; 5 | 6 | = "+" {nf.math.sum($1,$3)} 7 | | "-" {nf.math.sub($1,$3)} 8 | | {nf.I($1)} 9 | ; 10 | 11 | = "*" {nf.math.mul($1,$3)} 12 | | "×" {nf.math.mul($1,$3)} 13 | | "/" {nf.math.div($1,$3)} 14 | | "÷" {nf.math.div($1,$3)} 15 | | {nf.I($1)} 16 | ; 17 | 18 | = "^" {nf.math.pow($1,$3)} 19 | | "**" {nf.math.pow($1,$3)} 20 | | {nf.I($1)} 21 | ; 22 | 23 | = "\(" ")" {nf.I($2)} 24 | | "+" {nf.I($2)} 25 | | "-" {nf.math.neg($2)} 26 | | {nf.I($1)} 27 | | {nf.math.expression($1)} 28 | ; 29 | 30 | -------------------------------------------------------------------------------- /examples/math/grammars/math.zh.grammar: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: Zhanliang Liu 3 | */ 4 | 5 | #include "number.zh.grammar" 6 | 7 | // for local grammars that come from regex tagger 8 | = {nf.math.to_number($1)}; 9 | 10 | = "打" {nf.math.mul($1, 12)}; 11 | 12 | = "加" {nf.math.sum($1,$3)} 13 | | "加上" {nf.math.sum($1,$3)} 14 | | "与" "的和" {nf.math.sum($1,$3)} 15 | ; 16 | 17 | = "减" {nf.math.sub($1,$3)} 18 | | "减去" {nf.math.sub($1,$3)} 19 | | "与" "的差" {nf.math.sub($1,$3)} 20 | ; 21 | 22 | = "分之" {nf.math.div($3,$1)} 23 | ; 24 | 25 | = "乘" {nf.math.mul($1,$3)} 26 | | "乘以" {nf.math.mul($1,$3)} 27 | | "与" "的积" {nf.math.mul($1,$3)} 28 | | "的" "倍" {nf.math.mul($1,$3)} 29 | | "的一半" {nf.math.mul($1,0.5)} 30 | | "的" {nf.math.mul($1,$3)} 31 | ; 32 | 33 | = "除" {nf.math.div($3,$1)} 34 | | "除以" {nf.math.div($1,$3)} 35 | | {nf.I($1)} 36 | ; 37 | 38 | = "的" "次方" {nf.math.pow($1,$3)} 39 | | "的" "次幂" {nf.math.pow($1,$3)} 40 | | "的平方" {nf.math.pow($1,2)} 41 | | "的立方" {nf.math.pow($1,3)} 42 | | "的平方根" {nf.math.pow($1,0.5)} 43 | | "的立方根" {nf.math.pow($1,nf.math.div(1,3))} 44 | ; 45 | 46 | -------------------------------------------------------------------------------- /examples/math/grammars/number.en.grammar: -------------------------------------------------------------------------------- 1 | = "zero" {nf.I(0)} 2 | | "one" {nf.I(1)} 3 | | "two" {nf.I(2)} 4 | | "three" {nf.I(3)} 5 | | "four" {nf.I(4)} 6 | | "five" {nf.I(5)} 7 | | "six" {nf.I(6)} 8 | | "seven" {nf.I(7)} 9 | | "eight" {nf.I(8)} 10 | | "nine" {nf.I(9)} 11 | ; 12 | 13 | = {nf.I($1)} 14 | | {nf.util.concat($1,$2)} 15 | ; 16 | 17 | = "ten" {nf.I(10)} 18 | | "eleven" {nf.I(11)} 19 | | "twelve" {nf.I(12)} 20 | | "thirteen" {nf.I(13)} 21 | | "fourteen" {nf.I(14)} 22 | | "fifteen" {nf.I(15)} 23 | | "sixteen" {nf.I(16)} 24 | | "seventeen" {nf.I(17)} 25 | | "eighteen" {nf.I(18)} 26 | | "nineteen" {nf.I(19)} 27 | ; 28 | 29 | = "twenty" {nf.I(20)} 30 | | "thirty" {nf.I(30)} 31 | | "forty" {nf.I(40)} 32 | | "fifty" {nf.I(50)} 33 | | "sixty" {nf.I(60)} 34 | | "seventy" {nf.I(70)} 35 | | "eighty" {nf.I(80)} 36 | | "ninety" {nf.I(90)} 37 | ; 38 | 39 | = {nf.I($1)} 40 | | {nf.I($1)} 41 | | {nf.I($1)} 42 | | {nf.math.sum($1,$2)} 43 | | "-" {nf.math.sum($1,$3)} 44 | ; 45 | 46 | = "hundred" {nf.I(100)}; 47 | = "thousand" {nf.I(1000)}; 48 | = "million" {nf.I(1000000)}; 49 | = "billion" {nf.I(1000000000)}; 50 | = "trillion" {nf.I(1000000000000)}; 51 | 52 | = {nf.I($1)} 53 | | {nf.I($1)} 54 | | {nf.math.mul($1,$2)} 55 | | {nf.math.sum(nf.math.mul($1,$2),$3)} 56 | | "and" {nf.math.sum(nf.math.mul($1,$2),$4)} 57 | ; 58 | 59 | = {nf.I($1)} 60 | | {nf.I($1)} 61 | | {nf.math.mul($1,$2)} 62 | | {nf.math.sum(nf.math.mul($1,$2),$3)} 63 | | "and" {nf.math.sum(nf.math.mul($1,$2),$4)} 64 | ; 65 | 66 | = {nf.I($1)} 67 | | {nf.I($1)} 68 | | {nf.math.mul($1,$2)} 69 | | {nf.math.sum(nf.math.mul($1,$2),$3)} 70 | | "and" {nf.math.sum(nf.math.mul($1,$2),$4)} 71 | ; 72 | 73 | = {nf.I($1)} 74 | | {nf.I($1)} 75 | | {nf.math.mul($1,$2)} 76 | | {nf.math.sum(nf.math.mul($1,$2),$3)} 77 | | "and" {nf.math.sum(nf.math.mul($1,$2),$4)} 78 | ; 79 | 80 | = {nf.I($1)} 81 | | {nf.I($1)} 82 | | {nf.math.mul($1,$2)} 83 | | {nf.math.sum(nf.math.mul($1,$2),$3)} 84 | | "and" {nf.math.sum(nf.math.mul($1,$2),$4)} 85 | ; 86 | 87 | = "point" {nf.math.decimal($2)}; 88 | 89 | = {nf.I($1)}; 90 | 91 | = {nf.I($1)} 92 | | {nf.I($1)} 93 | | {nf.math.sum($1,$2)} 94 | | "and" {nf.math.sum($1,$3)} 95 | ; 96 | 97 | // = {nf.math.mul($1,$2)}; 98 | // = {nf.math.mul($1,$2)}; 99 | -------------------------------------------------------------------------------- /examples/math/grammars/number.zh.grammar: -------------------------------------------------------------------------------- 1 | = "一" {nf.I(1)} 2 | | "二" {nf.I(2)} 3 | | "三" {nf.I(3)} 4 | | "四" {nf.I(4)} 5 | | "五" {nf.I(5)} 6 | | "六" {nf.I(6)} 7 | | "七" {nf.I(7)} 8 | | "八" {nf.I(8)} 9 | | "九" {nf.I(9)} 10 | | "壹" {nf.I(1)} 11 | | "贰" {nf.I(2)} 12 | | "叁" {nf.I(3)} 13 | | "肆" {nf.I(4)} 14 | | "伍" {nf.I(5)} 15 | | "陆" {nf.I(6)} 16 | | "柒" {nf.I(7)} 17 | | "捌" {nf.I(8)} 18 | | "玖" {nf.I(9)} 19 | | "两" {nf.I(2)} 20 | | "俩" {nf.I(2)} 21 | | "仨" {nf.I(3)} 22 | ; 23 | 24 | = "零" {nf.I(0)} 25 | | "〇" {nf.I(0)} 26 | ; 27 | 28 | = {nf.I($1)} | {nf.I($1)} | {nf.I($1)}; 29 | 30 | = {nf.I($1)} 31 | | {nf.util.concat($1, $2)} 32 | ; 33 | 34 | = "十" {nf.I(10)} 35 | | "拾" {nf.I(10)} 36 | ; 37 | 38 | = "百" {nf.I(100)} 39 | | "佰" {nf.I(100)} 40 | ; 41 | 42 | = "千" {nf.I(1000)} 43 | | "仟" {nf.I(1000)} 44 | ; 45 | 46 | = "万" {nf.I(10000)}; 47 | 48 | = "亿" {nf.I(100000000)} 49 | | "万万" {nf.I(100000000)} 50 | ; 51 | 52 | = {nf.I($1)} 53 | | {nf.math.sum($1,$2)} 54 | | {nf.math.mul($1, $2)} 55 | | {nf.math.sum(nf.math.mul($1, $2), $3)} 56 | | {nf.I($1)} 57 | | {nf.I($1)} 58 | ; 59 | 60 | = {nf.math.mul($1, $2)} 61 | | {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(10,$3))} 62 | | {nf.math.sum(nf.math.mul($1, $2), $3)} 63 | | {nf.math.sum(nf.math.mul($1, $2), $4)} 64 | | {nf.I($1)} 65 | ; 66 | 67 | = {nf.math.mul($1, $2)} 68 | | {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(100,$3))} 69 | | {nf.math.sum(nf.math.mul($1, $2), $3)} 70 | | {nf.math.sum(nf.math.mul($1, $2), $4)} 71 | | {nf.I($1)} 72 | ; 73 | 74 | = {nf.math.mul($1, $2)} 75 | | {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(1000,$3))} 76 | | {nf.math.sum(nf.math.mul($1, $2), $3)} 77 | | {nf.math.sum(nf.math.mul($1, $2), $4)} 78 | | {nf.I($1)} 79 | ; 80 | 81 | = {nf.math.mul($1, $2)} 82 | | {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(10000000,$3))} 83 | | {nf.math.sum(nf.math.mul($1, $2), $3)} 84 | | {nf.math.sum(nf.math.mul($1, $2), $4)} 85 | | {nf.I($1)} 86 | ; 87 | 88 | = "点" {nf.math.decimal($2)}; 89 | 90 | = {nf.I($1)} 91 | | {nf.I($1)} 92 | | {nf.math.sum($1,$2)} 93 | ; 94 | 95 | = {nf.I($1)} 96 | | {nf.I($1)} 97 | | {nf.I($1)} 98 | | {nf.I($1)} 99 | ; 100 | -------------------------------------------------------------------------------- /examples/math/input.txt: -------------------------------------------------------------------------------- 1 | six million five thousand plus twenty thousand five hundred and sixty nine 2 | six million five thousand and two plus five 3 | six million plus five thousand 4 | two plus one 5 | the sum of 10 and 789 6 | the sum of 10 and 一百八十二 7 | the sum of 十万八千 and 一百八十二 8 | 四十二与八十八亿的和 9 | 四十二与二的十次方的和 10 | one added to 四十raised to the power of 1.5 11 | -------------------------------------------------------------------------------- /examples/math/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "io/ioutil" 9 | "os" 10 | "runtime/pprof" 11 | "strings" 12 | 13 | "github.com/golang/glog" 14 | "github.com/liuzl/fmr" 15 | "github.com/robertkrimen/otto" 16 | ) 17 | 18 | var ( 19 | grammar = flag.String("g", "grammars/math.grammar", "grammar file") 20 | js = flag.String("js", "math.js", "javascript file") 21 | input = flag.String("i", "", "file of original text to read") 22 | start = flag.String("start", "number", "start rule") 23 | eval = flag.Bool("eval", false, "execute flag") 24 | debug = flag.Bool("debug", false, "debug mode") 25 | cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file") 26 | ) 27 | 28 | func main() { 29 | flag.Parse() 30 | if *debug { 31 | fmr.Debug = true 32 | } 33 | 34 | if *cpuprofile != "" { 35 | f, err := os.Create(*cpuprofile) 36 | if err != nil { 37 | glog.Fatal(err) 38 | } 39 | pprof.StartCPUProfile(f) 40 | defer pprof.StopCPUProfile() 41 | } 42 | g, err := fmr.GrammarFromFile(*grammar) 43 | if err != nil { 44 | glog.Fatal(err) 45 | } 46 | script, err := ioutil.ReadFile(*js) 47 | if err != nil { 48 | glog.Fatal(err) 49 | } 50 | vm := otto.New() 51 | if _, err = vm.Run(script); err != nil { 52 | glog.Fatal(err) 53 | } 54 | 55 | var in *os.File 56 | if *input == "" { 57 | in = os.Stdin 58 | } else { 59 | in, err = os.Open(*input) 60 | if err != nil { 61 | glog.Fatal(err) 62 | } 63 | defer in.Close() 64 | } 65 | br := bufio.NewReader(in) 66 | 67 | for { 68 | line, c := br.ReadString('\n') 69 | if c == io.EOF { 70 | break 71 | } 72 | if c != nil { 73 | glog.Fatal(c) 74 | } 75 | line = strings.TrimSpace(line) 76 | fmt.Println(line) 77 | if len(line) == 0 { 78 | continue 79 | } 80 | 81 | ps, err := g.EarleyParseMaxAll(line, *start) 82 | if err != nil { 83 | glog.Fatal(err) 84 | } 85 | for i, p := range ps { 86 | for _, f := range p.GetFinalStates() { 87 | trees := p.GetTrees(f) 88 | //fmt.Printf("%+v\n", p) 89 | fmt.Printf("p%d tree number:%d\n", i, len(trees)) 90 | for _, tree := range trees { 91 | //tree.Print(os.Stdout) 92 | sem, err := tree.Semantic() 93 | if err != nil { 94 | glog.Fatal(err) 95 | } 96 | 97 | if !*eval { 98 | fmt.Println(sem) 99 | } else { 100 | result, err := vm.Run(sem) 101 | if err != nil { 102 | glog.Error(sem, err) 103 | continue 104 | } 105 | rs, _ := result.Export() 106 | fmt.Printf("%s => %+v\n", sem, rs) 107 | } 108 | //eval, err := tree.Eval() 109 | //fmt.Printf("Eval: %s, Err: %+v\n", eval, err) 110 | } 111 | } 112 | } 113 | fmt.Println() 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /examples/math/math.js: -------------------------------------------------------------------------------- 1 | var nf = nf || {}; 2 | nf.math = {}; 3 | nf.util = {}; 4 | 5 | nf.list = function(type, cnt) { 6 | //TODO 7 | } 8 | 9 | nf.it = function() {} 10 | nf.what = function() {} 11 | 12 | nf.math.expression = function(s) { 13 | return s.split("").join('*'); 14 | } 15 | 16 | nf.math.to_number = function(s) { 17 | return Number(s); 18 | } 19 | 20 | nf.math.decimal = function(s) { 21 | s = s.toString(); 22 | var n = Number(s); 23 | return n / Math.pow(10, s.length); 24 | } 25 | 26 | nf.math.sum = function(x, y) { 27 | return x + y; 28 | } 29 | 30 | nf.math.sub = function(x, y) { 31 | return x - y; 32 | } 33 | 34 | nf.math.mul = function(x, y) { 35 | return x * y; 36 | } 37 | 38 | nf.math.div = function(x, y) { 39 | return x / y; 40 | } 41 | 42 | nf.math.neg = function(x) { 43 | return -x; 44 | } 45 | 46 | nf.math.pow = function(x, y) { 47 | return Math.pow(x, y); 48 | } 49 | 50 | nf.util.concat = function(x, y) { 51 | return x.toString() + y.toString(); 52 | } 53 | -------------------------------------------------------------------------------- /frame_api.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | // FrameFMR parses NL text to FMR 8 | func (g *Grammar) FrameFMR(text string) ([]string, error) { 9 | return g.FrameFMRWithContext("", text) 10 | } 11 | 12 | // FrameFMRWithContext parses NL text to FMR 13 | func (g *Grammar) FrameFMRWithContext(context, text string) ([]string, error) { 14 | frames, err := g.MatchFramesWithContext(context, text) 15 | if err != nil { 16 | return nil, err 17 | } 18 | var ret []string 19 | for k, v := range frames { 20 | f := g.Frames[k.RuleName].Body[k.BodyID].F 21 | terms := g.Frames[k.RuleName].Body[k.BodyID].Terms 22 | var children []*Node 23 | for _, term := range terms { 24 | slots := v.Slots[term.Key()] 25 | if slots == nil || len(slots) == 0 || len(slots[0].Trees) == 0 { 26 | children = append(children, nil) 27 | continue 28 | } 29 | children = append(children, slots[0].Trees[0]) 30 | } 31 | n := &Node{} 32 | str, err := n.fmrStr(f, children, "") 33 | if err != nil { 34 | return nil, err 35 | } 36 | ret = append(ret, str) 37 | } 38 | return ret, nil 39 | } 40 | 41 | // MatchFrames returns the matched frames for NL text 42 | func (g *Grammar) MatchFrames(text string) (map[RbKey]*Frame, error) { 43 | return g.MatchFramesWithContext("", text) 44 | } 45 | 46 | // MatchFramesWithContext returns the matched frames for NL text 47 | func (g *Grammar) MatchFramesWithContext( 48 | context, text string) (map[RbKey]*Frame, error) { 49 | frames, starts, err := g.getCandidates(text) 50 | if err != nil { 51 | return nil, err 52 | } 53 | ps, err := g.EarleyParseAllWithContext(context, text, starts...) 54 | if err != nil { 55 | return nil, err 56 | } 57 | for _, p := range ps { 58 | for _, finalState := range p.finalStates { 59 | tag := p.Tag(finalState) 60 | pos := p.Boundary(finalState) 61 | trees := p.GetTrees(finalState) 62 | 63 | if tag == "" || pos == nil { 64 | return nil, fmt.Errorf("invalid parse") 65 | } 66 | 67 | slot := &Slot{*pos, trees} 68 | 69 | ret := g.ruleIndex[tag] 70 | if ret == nil { 71 | continue 72 | } 73 | for rbKey := range ret.Frames { 74 | if frames[rbKey] == nil { 75 | frames[rbKey] = &Frame{make(map[uint64][]*Slot), false} 76 | } 77 | t := Term{Value: tag, Type: Nonterminal} 78 | frames[rbKey].Slots[t.Key()] = append(frames[rbKey].Slots[t.Key()], slot) 79 | if len(frames[rbKey].Slots) >= 80 | len(g.Frames[rbKey.RuleName].Body[rbKey.BodyID].Terms) { 81 | frames[rbKey].Complete = true 82 | } 83 | } 84 | } 85 | } 86 | return frames, nil 87 | } 88 | 89 | func (g *Grammar) getCandidates(text string) ( 90 | map[RbKey]*Frame, []string, error) { 91 | 92 | matches, err := g.trie.MultiMatch(text) 93 | if err != nil { 94 | return nil, nil, err 95 | } 96 | frames := map[RbKey]*Frame{} 97 | rules := map[string]bool{} 98 | for word, hits := range matches { 99 | v := g.index[word] 100 | if v == nil { 101 | return nil, nil, fmt.Errorf("%s in trie but not in index", word) 102 | } 103 | for rbKey := range v.Frames { 104 | if frames[rbKey] == nil { 105 | frames[rbKey] = &Frame{make(map[uint64][]*Slot), false} 106 | } 107 | t := Term{Value: word, Type: Terminal} 108 | for _, hit := range hits { 109 | frames[rbKey].Slots[t.Key()] = append(frames[rbKey].Slots[t.Key()], 110 | &Slot{Pos{hit.StartByte, hit.EndByte}, nil}) 111 | } 112 | if len(frames[rbKey].Slots) >= 113 | len(g.Frames[rbKey.RuleName].Body[rbKey.BodyID].Terms) { 114 | frames[rbKey].Complete = true 115 | } 116 | } 117 | for rbKey := range v.Rules { 118 | rules[rbKey.RuleName] = true 119 | } 120 | } 121 | var ruleList []string 122 | for k := range rules { 123 | ruleList = append(ruleList, k) 124 | } 125 | for { 126 | if len(ruleList) == 0 { 127 | break 128 | } 129 | r := ruleList[0] 130 | ruleList = ruleList[1:] 131 | 132 | ret := g.ruleIndex[r] 133 | if ret == nil { 134 | continue 135 | } 136 | for rbKey := range ret.Rules { 137 | if !rules[rbKey.RuleName] { 138 | ruleList = append(ruleList, rbKey.RuleName) 139 | rules[rbKey.RuleName] = true 140 | } 141 | } 142 | } 143 | var starts []string 144 | for k := range rules { 145 | starts = append(starts, k) 146 | } 147 | return frames, starts, nil 148 | } 149 | -------------------------------------------------------------------------------- /frame_api_test.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestMatchFrames(t *testing.T) { 8 | cases := []string{ 9 | `从北京飞上海`, 10 | `飞上海,从北京,后天`, 11 | `我要从北京走`, 12 | } 13 | g, err := GrammarFromFile("sf.grammar") 14 | if err != nil { 15 | t.Error(err) 16 | } 17 | for _, c := range cases { 18 | fmrs, err := g.FrameFMR(c) 19 | if err != nil { 20 | t.Error(err) 21 | } 22 | t.Log(c, fmrs) 23 | } 24 | } 25 | 26 | // go test -test.run MatchFrames2 -v --ctx_tagger="http://127.0.0.1:5002/api" 27 | func TestMatchFrames2(t *testing.T) { 28 | cases := []string{ 29 | `获得亚军次数降序排前5的都是哪些羽毛球运动员?`, 30 | `注册资本大于1亿的品牌中,哪5个品牌收入最少?并给出它们的法定代表人`, 31 | } 32 | g, err := GrammarFromFile("grammars/sql.grammar") 33 | if err != nil { 34 | t.Error(err) 35 | } 36 | for _, c := range cases { 37 | fmrs, err := g.FrameFMRWithContext("比赛", c) 38 | if err != nil { 39 | t.Error(err) 40 | } 41 | t.Log(c, fmrs) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /funcs.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "fmt" 5 | 6 | "zliu.org/goutil" 7 | ) 8 | 9 | var builtinFuncs = make(map[string]interface{}) 10 | 11 | func init() { 12 | builtinFuncs["fmr.list"] = fmrList 13 | builtinFuncs["fmr.entity"] = fmrEntity 14 | } 15 | 16 | // Call funcs by name fn and args 17 | func Call(fn string, args ...interface{}) (interface{}, error) { 18 | ret, err := goutil.Call(builtinFuncs, fn, args...) 19 | if err != nil { 20 | return nil, err 21 | } 22 | if len(ret) == 0 { 23 | return nil, nil 24 | } 25 | return ret[0].Interface(), nil 26 | } 27 | 28 | func fmrList(items ...interface{}) []interface{} { 29 | return items 30 | } 31 | 32 | func fmrEntity(items ...interface{}) map[string]interface{} { 33 | l := len(items) 34 | if l == 0 { 35 | return nil 36 | } 37 | typ := fmt.Sprintf("%v", items[0]) 38 | if typ == "" { 39 | return nil 40 | } 41 | if l == 1 { 42 | return map[string]interface{}{typ: nil} 43 | } 44 | return map[string]interface{}{typ: items[1:]} 45 | } 46 | -------------------------------------------------------------------------------- /funcs_test.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestListFunc(t *testing.T) { 8 | t.Log(Call("fmr.list", "100000227", 78, "abc")) 9 | } 10 | 11 | func TestEntityFunc(t *testing.T) { 12 | t.Log(Call("fmr.entity", "PER", map[string]string{"name": "冯诺依曼"})) 13 | } 14 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/liuzl/fmr 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/golang/glog v1.0.0 7 | github.com/liuzl/dict v0.0.0-20180720023830-2cdb4749152d 8 | github.com/liuzl/ling v0.0.0-20200509031100-522aef269e3c 9 | github.com/liuzl/unidecode v0.0.0-20170420112940-fd1463e9cd5b 10 | github.com/mitchellh/hashstructure v1.1.0 11 | github.com/robertkrimen/otto v0.0.0-20221011175642-09fc211e5ab1 12 | github.com/xlab/treeprint v1.1.0 13 | zliu.org/goutil v0.0.0-20220902023633-6fcbc3a43c89 14 | ) 15 | 16 | require ( 17 | crawler.club/dl v0.0.0-20200622075740-2bbb15a2cf94 // indirect 18 | github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394 // indirect 19 | github.com/cheggaaa/pb v1.0.29 // indirect 20 | github.com/eknkc/basex v1.0.1 // indirect 21 | github.com/golang/snappy v0.0.4 // indirect 22 | github.com/juju/errors v1.0.0 // indirect 23 | github.com/justinas/alice v1.2.0 // indirect 24 | github.com/liuzl/cedar-go v0.0.0-20170805034717-80a9c64b256d // indirect 25 | github.com/liuzl/d v0.0.0-20221017065133-150b00c98eb9 // indirect 26 | github.com/liuzl/da v0.0.0-20180704015230-14771aad5b1d // indirect 27 | github.com/liuzl/filestore v0.0.0-20200229104338-5ea723a6a528 // indirect 28 | github.com/liuzl/segment v0.0.0-20160915185041-762005e7a34f // indirect 29 | github.com/liuzl/store v0.0.0-20190530065605-e2dbcd3c77fc // indirect 30 | github.com/liuzl/tokenizer v0.0.0-20181128060327-56c1056833c1 // indirect 31 | github.com/mattn/go-colorable v0.1.13 // indirect 32 | github.com/mattn/go-isatty v0.0.16 // indirect 33 | github.com/mattn/go-runewidth v0.0.14 // indirect 34 | github.com/rivo/uniseg v0.4.2 // indirect 35 | github.com/rs/xid v1.4.0 // indirect 36 | github.com/rs/zerolog v1.28.0 // indirect 37 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect 38 | github.com/syndtr/goleveldb v1.0.0 // indirect 39 | github.com/torden/go-strutil v0.1.7 // indirect 40 | golang.org/x/net v0.0.0-20220826154423-83b083e8dc8b // indirect 41 | golang.org/x/sys v0.0.0-20221013171732-95e765b1cc43 // indirect 42 | golang.org/x/text v0.3.7 // indirect 43 | gopkg.in/sourcemap.v1 v1.0.5 // indirect 44 | ) 45 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | crawler.club/dl v0.0.0-20200622075740-2bbb15a2cf94 h1:8vuyB8j14xIrixp9tWSQ04D/m8fyXzCoim1cUhU3juY= 2 | crawler.club/dl v0.0.0-20200622075740-2bbb15a2cf94/go.mod h1:UnhSaKtYQlEdkutiJ4UIwCNKxiFgs5/4WJgfIijiOUg= 3 | github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d h1:ir/IFJU5xbja5UaBEQLjcvn7aAU01nqU/NUyOBEU+ew= 4 | github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394 h1:OYA+5W64v3OgClL+IrOD63t4i/RW7RqrAVl9LTZ9UqQ= 5 | github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394/go.mod h1:Q8n74mJTIgjX4RBBcHnJ05h//6/k6foqmgE45jTQtxg= 6 | github.com/cheggaaa/pb v1.0.29 h1:FckUN5ngEk2LpvuG0fw1GEFx6LtyY2pWI/Z2QgCnEYo= 7 | github.com/cheggaaa/pb v1.0.29/go.mod h1:W40334L7FMC5JKWldsTWbdGjLo0RxUKK73K+TuPxX30= 8 | github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= 9 | github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= 10 | github.com/coreos/go-systemd/v22 v22.3.3-0.20220203105225-a9a7ef127534/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= 11 | github.com/crawlerclub/x v0.1.0 h1:XmEcdwprNZ6ltP9VTUJ7h2PJRETt4KKeN8euXER+gPU= 12 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 13 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 14 | github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= 15 | github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= 16 | github.com/eknkc/basex v1.0.1 h1:TcyAkqh4oJXgV3WYyL4KEfCMk9W8oJCpmx1bo+jVgKY= 17 | github.com/eknkc/basex v1.0.1/go.mod h1:k/F/exNEHFdbs3ZHuasoP2E7zeWwZblG84Y7Z59vQRo= 18 | github.com/fatih/color v1.9.0 h1:8xPHl4/q1VyqGIPif1F+1V3Y3lSmrq01EabUW3CoW5s= 19 | github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= 20 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= 21 | github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= 22 | github.com/golang/glog v1.0.0 h1:nfP3RFugxnNRyKgeWd4oI1nYvXpxrx8ck8ZrcizshdQ= 23 | github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= 24 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 25 | github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 26 | github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= 27 | github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 28 | github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= 29 | github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= 30 | github.com/juju/errors v1.0.0 h1:yiq7kjCLll1BiaRuNY53MGI0+EQ3rF6GB+wvboZDefM= 31 | github.com/juju/errors v1.0.0/go.mod h1:B5x9thDqx0wIMH3+aLIMP9HjItInYWObRovoCFM5Qe8= 32 | github.com/justinas/alice v1.2.0 h1:+MHSA/vccVCF4Uq37S42jwlkvI2Xzl7zTPCN5BnZNVo= 33 | github.com/justinas/alice v1.2.0/go.mod h1:fN5HRH/reO/zrUflLfTN43t3vXvKzvZIENsNEe7i7qA= 34 | github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= 35 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 36 | github.com/liuzl/cedar-go v0.0.0-20170805034717-80a9c64b256d h1:qSmEGTgjkESUX5kPMSGJ4pcBUtYVDdkNzMrjQyvRvp0= 37 | github.com/liuzl/cedar-go v0.0.0-20170805034717-80a9c64b256d/go.mod h1:x7SghIWwLVcJObXbjK7S2ENsT1cAcdJcPl7dRaSFog0= 38 | github.com/liuzl/d v0.0.0-20221017065133-150b00c98eb9 h1:C+E8rPy8GE8Tlv2STuLJwpIxmrOeMJM3ngbKOgRwyno= 39 | github.com/liuzl/d v0.0.0-20221017065133-150b00c98eb9/go.mod h1:yLLAiymfW6NXB86ABmj24Jw22eayoSZXu9qCzJOUBfM= 40 | github.com/liuzl/da v0.0.0-20180704015230-14771aad5b1d h1:hTRDIpJ1FjS9ULJuEzu69n3qTgc18eI+ztw/pJv47hs= 41 | github.com/liuzl/da v0.0.0-20180704015230-14771aad5b1d/go.mod h1:7xD3p0XnHvJFQ3t/stEJd877CSIMkH/fACVWen5pYnc= 42 | github.com/liuzl/dict v0.0.0-20180720023830-2cdb4749152d h1:pAN7QPdrG4eo+6ggl8ZPwF5mUz+d1e6C7aDZbJOWeTk= 43 | github.com/liuzl/dict v0.0.0-20180720023830-2cdb4749152d/go.mod h1:UXlqqhJaw28ned67lp11D9ZBDoFfFBCLhfQVcFjTn4w= 44 | github.com/liuzl/filestore v0.0.0-20200229104338-5ea723a6a528 h1:g+uxFYnxN+bMSgLu+t7k4zzVIUsRhKykir1C4F5Gp2c= 45 | github.com/liuzl/filestore v0.0.0-20200229104338-5ea723a6a528/go.mod h1:aMgfSMkON/7fp+l9vv8w0xq870iSPVrNs7IqEu3xu5Q= 46 | github.com/liuzl/ling v0.0.0-20200509031100-522aef269e3c h1:j3k0k6lfQtPgolRVxyeqK6I5fbLL7vu0Nxzs8toCMW8= 47 | github.com/liuzl/ling v0.0.0-20200509031100-522aef269e3c/go.mod h1:1kEdLCXtzHEn0iVmhN+52m5l9YWMDL0EYch4Xt1su1Y= 48 | github.com/liuzl/segment v0.0.0-20160915185041-762005e7a34f h1:toJ372frwG+oflCG1Ebti4+yr3Pf1DhtIipClmAwTIs= 49 | github.com/liuzl/segment v0.0.0-20160915185041-762005e7a34f/go.mod h1:PfFcixpSUOZCUgVTaF3uNidhAQD0gfobJ4gJe67kC2U= 50 | github.com/liuzl/store v0.0.0-20190530065605-e2dbcd3c77fc h1:mZ1DgWJEXekv8VFCurVYxQdqJ8bgnsx7cFyBAE+ORCE= 51 | github.com/liuzl/store v0.0.0-20190530065605-e2dbcd3c77fc/go.mod h1:oGZDOBSfYkcxlMrnAaf6R2/DgLW56QYm3fJAj/fzODo= 52 | github.com/liuzl/tokenizer v0.0.0-20181128060327-56c1056833c1 h1:AbKGfBWqlLBVPwzaKRSmHOB4T5jsh/OJx3TQ0mINNtc= 53 | github.com/liuzl/tokenizer v0.0.0-20181128060327-56c1056833c1/go.mod h1:phPMsXWmEeSG/RxGFD6qjSVJdVVHv1HkcBHQfNVeibQ= 54 | github.com/liuzl/unidecode v0.0.0-20170420112940-fd1463e9cd5b h1:N3Kwu7n5QYlCiKYEiy7a7bRG3P/Bp/kwI4jES3rbA4g= 55 | github.com/liuzl/unidecode v0.0.0-20170420112940-fd1463e9cd5b/go.mod h1:QnoHoimYwXkYpQfXolZEH+CpPDZMKvZDlMz0fStkbmU= 56 | github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= 57 | github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= 58 | github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= 59 | github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= 60 | github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= 61 | github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= 62 | github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= 63 | github.com/mattn/go-isatty v0.0.16 h1:bq3VjFmv/sOjHtdEhmkEV4x1AJtvUvOJ2PFAZ5+peKQ= 64 | github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= 65 | github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= 66 | github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= 67 | github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= 68 | github.com/mitchellh/hashstructure v1.1.0 h1:P6P1hdjqAAknpY/M1CGipelZgp+4y9ja9kmUZPXP+H0= 69 | github.com/mitchellh/hashstructure v1.1.0/go.mod h1:xUDAozZz0Wmdiufv0uyhnHkUTN6/6d8ulp4AwfLKrmA= 70 | github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= 71 | github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs= 72 | github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= 73 | github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU= 74 | github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= 75 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 76 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 77 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 78 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 79 | github.com/rivo/uniseg v0.4.2 h1:YwD0ulJSJytLpiaWua0sBDusfsCZohxjxzVTYjwxfV8= 80 | github.com/rivo/uniseg v0.4.2/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= 81 | github.com/robertkrimen/otto v0.0.0-20221011175642-09fc211e5ab1 h1:SQiIjmrbwsmwsf68GxOPZa3y2q98Vfo41CT6h7pOMAE= 82 | github.com/robertkrimen/otto v0.0.0-20221011175642-09fc211e5ab1/go.mod h1:DKHCllR988yoiVXPZrLqCjwAKhryyDPNmb9cBVtG/aQ= 83 | github.com/rs/xid v1.4.0 h1:qd7wPTDkN6KQx2VmMBLrpHkiyQwgFXRnkOLacUiaSNY= 84 | github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= 85 | github.com/rs/zerolog v1.28.0 h1:MirSo27VyNi7RJYP3078AA1+Cyzd2GB66qy3aUHvsWY= 86 | github.com/rs/zerolog v1.28.0/go.mod h1:NILgTygv/Uej1ra5XxGf82ZFSLk58MFGAUS2o6usyD0= 87 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI= 88 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= 89 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 90 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 91 | github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= 92 | github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE= 93 | github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= 94 | github.com/torden/go-strutil v0.1.7 h1:6c1WDvEqqueK6qiLPSD8Svq/yRq/npkt9cLtw8khUI4= 95 | github.com/torden/go-strutil v0.1.7/go.mod h1:7cy4xHed8E5wlnGkk+gztMCIiFLxTJWbPAlr4XjwHYA= 96 | github.com/xlab/treeprint v1.1.0 h1:G/1DjNkPpfZCFt9CSh6b5/nY4VimlbHF3Rh4obvtzDk= 97 | github.com/xlab/treeprint v1.1.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= 98 | golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 99 | golang.org/x/net v0.0.0-20220826154423-83b083e8dc8b h1:ZmngSVLe/wycRns9MKikG9OWIEjGcGAkacif7oYQaUY= 100 | golang.org/x/net v0.0.0-20220826154423-83b083e8dc8b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= 101 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 102 | golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 103 | golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 104 | golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 105 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 106 | golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 107 | golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 108 | golang.org/x/sys v0.0.0-20221013171732-95e765b1cc43 h1:OK7RB6t2WQX54srQQYSXMW8dF5C6/8+oA/s5QBmmto4= 109 | golang.org/x/sys v0.0.0-20221013171732-95e765b1cc43/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 110 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 111 | golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= 112 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 113 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 114 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 115 | gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= 116 | gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= 117 | gopkg.in/readline.v1 v1.0.0-20160726135117-62c6fe619375/go.mod h1:lNEQeAhU009zbRxng+XOj5ITVgY24WcbNnQopyfKoYQ= 118 | gopkg.in/sourcemap.v1 v1.0.5 h1:inv58fC9f9J3TK2Y2R1NPntXEn3/wjWHkonhIUODNTI= 119 | gopkg.in/sourcemap.v1 v1.0.5/go.mod h1:2RlvNNSMglmRrcvhfuzp4hQHwOtjxlbjX7UPY/GXb78= 120 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= 121 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= 122 | gopkg.in/yaml.v2 v2.2.1 h1:mUhvW9EsL+naU5Q3cakzfE91YhliOondGd6ZrsDBHQE= 123 | gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 124 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 125 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 126 | zliu.org/goutil v0.0.0-20220902023633-6fcbc3a43c89 h1:91pOnvABI1firRblKJEF+auogF0NsqSDSefEjrCPd/0= 127 | zliu.org/goutil v0.0.0-20220902023633-6fcbc3a43c89/go.mod h1:Z7oA8jTnXTwZyxCPRxHkYK2oZK8eOH7FbXiW4RYLijI= 128 | -------------------------------------------------------------------------------- /grammar_index.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/liuzl/dict" 8 | ) 9 | 10 | func updateIndex(index map[string]*Index, k string, cate string, v RbKey) error { 11 | if index == nil { 12 | return fmt.Errorf("nil grammar index") 13 | } 14 | if cate != "frame" && cate != "rule" { 15 | return fmt.Errorf("invalid cate %s", cate) 16 | } 17 | if index[k] == nil { 18 | index[k] = &Index{make(map[RbKey]struct{}), make(map[RbKey]struct{})} 19 | } 20 | switch cate { 21 | case "frame": 22 | index[k].Frames[v] = struct{}{} 23 | case "rule": 24 | index[k].Rules[v] = struct{}{} 25 | } 26 | return nil 27 | } 28 | 29 | func (g *Grammar) indexRules(rules map[string]*Rule, cate string) error { 30 | var err error 31 | for _, rule := range rules { 32 | for id, body := range rule.Body { 33 | for _, term := range body.Terms { 34 | v := RbKey{rule.Name, id} 35 | value := strings.TrimSpace(term.Value) 36 | if value == "" { 37 | continue 38 | } 39 | switch term.Type { 40 | case Terminal: 41 | if err = g.trie.SafeUpdate([]byte(value), 1); err != nil { 42 | return err 43 | } 44 | if err = updateIndex(g.index, value, cate, v); err != nil { 45 | return err 46 | } 47 | case Nonterminal: 48 | if err = updateIndex(g.ruleIndex, value, cate, v); err != nil { 49 | return err 50 | } 51 | } 52 | } 53 | } 54 | } 55 | return nil 56 | } 57 | 58 | func (g *Grammar) buildIndex() error { 59 | if g.Refined { 60 | return fmt.Errorf("should call Grammar.index before Grammar.refine") 61 | } 62 | g.trie = dict.New() 63 | g.index = make(map[string]*Index) 64 | g.ruleIndex = make(map[string]*Index) 65 | 66 | gs := []*Grammar{g} 67 | gs = append(gs, g.includes...) 68 | for _, ig := range gs { 69 | if err := g.indexRules(ig.Frames, "frame"); err != nil { 70 | return err 71 | } 72 | if err := g.indexRules(ig.Rules, "rule"); err != nil { 73 | return err 74 | } 75 | } 76 | return nil 77 | } 78 | -------------------------------------------------------------------------------- /grammar_index_test.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestGrammarIndex(t *testing.T) { 8 | g, err := GrammarFromFile("sf.grammar") 9 | if err != nil { 10 | t.Error(err) 11 | } 12 | t.Log(g) 13 | } 14 | -------------------------------------------------------------------------------- /grammar_refine.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/liuzl/ling" 8 | "github.com/liuzl/unidecode" 9 | "github.com/mitchellh/hashstructure" 10 | ) 11 | 12 | func (g *Grammar) refine(prefix string) error { 13 | if g.Refined { 14 | return nil 15 | } 16 | var terminalRules []*Rule 17 | var terminals = make(map[string]string) 18 | var names = make(map[string]bool) 19 | var n int 20 | var name string 21 | for _, rule := range g.Rules { 22 | for _, body := range rule.Body { 23 | for _, term := range body.Terms { 24 | if term.Type != Terminal { 25 | continue 26 | } 27 | // if this is a terminal text inside a ruleBody 28 | if t, has := terminals[term.Value]; has { 29 | term.Value = t 30 | } else { 31 | d := ling.NewDocument(term.Value) 32 | if err := NLP().Annotate(d); err != nil { 33 | return err 34 | } 35 | tname := prefix + "_t" 36 | rb := &RuleBody{} 37 | for _, token := range d.Tokens { 38 | if token.Type == ling.Space { 39 | continue 40 | } 41 | if token.Type != ling.Punct { 42 | ascii := unidecode.Unidecode(token.Text) 43 | ascii = strings.Join(strings.Fields(ascii), "_") 44 | tname += "_" + ascii 45 | } 46 | rb.Terms = append(rb.Terms, 47 | &Term{Value: token.Text, Type: Terminal, Meta: term.Meta}) 48 | if gTokens.get(token.Text) == nil { 49 | gTokens.put(token.Text, token) 50 | } 51 | } 52 | for name, n = tname, 0; ; name, n = 53 | fmt.Sprintf("%s_%d", tname, n), n+1 { 54 | if g.Rules[name] == nil && !names[name] { 55 | break 56 | } 57 | } 58 | names[name] = true 59 | terminals[term.Value] = name 60 | hash, err := hashstructure.Hash(rb, nil) 61 | if err != nil { 62 | return err 63 | } 64 | terminalRules = append(terminalRules, 65 | &Rule{name, map[uint64]*RuleBody{hash: rb}}) 66 | term.Value = name 67 | } 68 | term.Type = Nonterminal 69 | } 70 | } 71 | } 72 | for _, r := range terminalRules { 73 | g.Rules[r.Name] = r 74 | } 75 | g.Refined = true 76 | return nil 77 | } 78 | -------------------------------------------------------------------------------- /grammar_tokens.go: -------------------------------------------------------------------------------- 1 | package fmr 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/liuzl/ling" 7 | ) 8 | 9 | type cMap struct { 10 | tokens map[string]*ling.Token 11 | sync.RWMutex 12 | } 13 | 14 | func (m *cMap) get(k string) *ling.Token { 15 | m.RLock() 16 | defer m.RUnlock() 17 | return m.tokens[k] 18 | } 19 | 20 | func (m *cMap) put(k string, token *ling.Token) { 21 | m.Lock() 22 | defer m.Unlock() 23 | m.tokens[k] = token 24 | } 25 | 26 | var gTokens = &cMap{tokens: make(map[string]*ling.Token)} 27 | -------------------------------------------------------------------------------- /grammars/limit.grammar: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: liang@zliu.org 3 | */ 4 | 5 | #include "math.zh.grammar" 6 | 7 | = "前"; 8 | = "个"; 9 | 10 | = {nf.sql.limit($2)} 11 | | {nf.sql.limit($1)} 12 | ; 13 | -------------------------------------------------------------------------------- /grammars/math.zh.grammar: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: Zhanliang Liu 3 | */ 4 | 5 | #include "number.zh.grammar" 6 | 7 | // for local grammars that come from regex tagger 8 | = {nf.math.to_number($1)}; 9 | 10 | = "打" {nf.math.mul($1, 12)}; 11 | 12 | = "加" {nf.math.sum($1,$3)} 13 | | "加上" {nf.math.sum($1,$3)} 14 | | "与" "的和" {nf.math.sum($1,$3)} 15 | ; 16 | 17 | = "减" {nf.math.sub($1,$3)} 18 | | "减去" {nf.math.sub($1,$3)} 19 | | "与" "的差" {nf.math.sub($1,$3)} 20 | ; 21 | 22 | = "分之" {nf.math.div($3,$1)} 23 | | "%" {nf.math.div($1, 100)} 24 | ; 25 | 26 | = "乘" {nf.math.mul($1,$3)} 27 | | "乘以" {nf.math.mul($1,$3)} 28 | | "与" "的积" {nf.math.mul($1,$3)} 29 | | "的" "倍" {nf.math.mul($1,$3)} 30 | | "的一半" {nf.math.mul($1,0.5)} 31 | | "的" {nf.math.mul($1,$3)} 32 | ; 33 | 34 | = "除" {nf.math.div($3,$1)} 35 | | "除以" {nf.math.div($1,$3)} 36 | | {nf.I($1)} 37 | ; 38 | 39 | = "的" "次方" {nf.math.pow($1,$3)} 40 | | "的" "次幂" {nf.math.pow($1,$3)} 41 | | "的平方" {nf.math.pow($1,2)} 42 | | "的立方" {nf.math.pow($1,3)} 43 | | "的平方根" {nf.math.pow($1,0.5)} 44 | | "的立方根" {nf.math.pow($1,nf.math.div(1,3))} 45 | ; 46 | 47 | -------------------------------------------------------------------------------- /grammars/number.zh.grammar: -------------------------------------------------------------------------------- 1 | = "一" {nf.I(1)} 2 | | "二" {nf.I(2)} 3 | | "三" {nf.I(3)} 4 | | "四" {nf.I(4)} 5 | | "五" {nf.I(5)} 6 | | "六" {nf.I(6)} 7 | | "七" {nf.I(7)} 8 | | "八" {nf.I(8)} 9 | | "九" {nf.I(9)} 10 | | "壹" {nf.I(1)} 11 | | "贰" {nf.I(2)} 12 | | "叁" {nf.I(3)} 13 | | "肆" {nf.I(4)} 14 | | "伍" {nf.I(5)} 15 | | "陆" {nf.I(6)} 16 | | "柒" {nf.I(7)} 17 | | "捌" {nf.I(8)} 18 | | "玖" {nf.I(9)} 19 | | "两" {nf.I(2)} 20 | | "俩" {nf.I(2)} 21 | | "仨" {nf.I(3)} 22 | ; 23 | 24 | = "零" {nf.I(0)} 25 | | "〇" {nf.I(0)} 26 | ; 27 | 28 | 29 | = {nf.I($1)} | {nf.I($1)} | {nf.I($1)}; 30 | 31 | = {nf.I($1)} 32 | | {nf.util.concat($1, $2)} 33 | ; 34 | 35 | = "十" {nf.I(10)} 36 | | "拾" {nf.I(10)} 37 | ; 38 | 39 | = "百" {nf.I(100)} 40 | | "佰" {nf.I(100)} 41 | ; 42 | 43 | = "千" {nf.I(1000)} 44 | | "仟" {nf.I(1000)} 45 | ; 46 | 47 | = "万" {nf.I(10000)}; 48 | 49 | = "亿" {nf.I(100000000)} 50 | | "万万" {nf.I(100000000)} 51 | ; 52 | 53 | = {nf.I($1)} 54 | | {nf.math.sum($1,$2)} 55 | | {nf.math.mul($1, $2)} 56 | | {nf.math.sum(nf.math.mul($1, $2), $3)} 57 | | {nf.I($1)} 58 | | {nf.I($1)} 59 | ; 60 | 61 | = {nf.math.mul($1, $2)} 62 | | {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(10,$3))} 63 | | {nf.math.sum(nf.math.mul($1, $2), $3)} 64 | | {nf.math.sum(nf.math.mul($1, $2), $4)} 65 | | {nf.I($1)} 66 | ; 67 | 68 | = {nf.math.mul($1, $2)} 69 | | {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(100,$3))} 70 | | {nf.math.sum(nf.math.mul($1, $2), $3)} 71 | | {nf.math.sum(nf.math.mul($1, $2), $4)} 72 | | {nf.I($1)} 73 | ; 74 | 75 | = {nf.math.mul($1, $2)} 76 | | {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(1000,$3))} 77 | | {nf.math.sum(nf.math.mul($1, $2), $3)} 78 | | {nf.math.sum(nf.math.mul($1, $2), $4)} 79 | | {nf.I($1)} 80 | | {nf.math.mul($1, $2)} 81 | ; 82 | 83 | = {nf.math.mul($1, $2)} 84 | | {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(10000000,$3))} 85 | | {nf.math.sum(nf.math.mul($1, $2), $3)} 86 | | {nf.math.sum(nf.math.mul($1, $2), $4)} 87 | | {nf.I($1)} 88 | | {nf.math.mul($1, $2)} 89 | ; 90 | 91 | = "点" {nf.math.decimal($2)}; 92 | 93 | = {nf.I($1)} 94 | | {nf.I($1)} 95 | | {nf.math.sum($1,$2)} 96 | ; 97 | 98 | = {nf.I($1)} 99 | | {nf.I($1)} 100 | | {nf.I($1)} 101 | | {nf.I($1)} 102 | ; 103 | -------------------------------------------------------------------------------- /grammars/order.grammar: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: liang@zliu.org 3 | */ 4 | 5 | // order by clause 6 | 7 | = "从近到远" | "从少到多" | "从小到大" | "从慢到快" | "升序" | "排名最低" 8 | | "最小" | "最少" | "最慢"; 9 | = "从远到近" | "从多到少" | "从大到小" | "从快到慢" | "降序" | "排名最高" 10 | | "最大" | "最多" | "最快"; 11 | = "按" | "获得" | "获得的" | "给出" | "根据" | "按照" | "依据"; 12 | 13 | = (any) {nf.sql.order($2,"asc")} 14 | | (any) {nf.sql.order($2,"desc")} 15 | ; 16 | -------------------------------------------------------------------------------- /grammars/sql.grammar: -------------------------------------------------------------------------------- 1 | #include "order.grammar" 2 | #include "limit.grammar" 3 | 4 | = {nf.I($1)} 5 | | {nf.sql.mix(nf.I($1),nf.sql.limit($2))} 6 | | (any) {nf.sql.mix(nf.I($1),nf.sql.limit($3))} 7 | ; 8 | 9 | [sql] = {nf.sql.gen($1,$2)}; 10 | -------------------------------------------------------------------------------- /grammars/time.grammar: -------------------------------------------------------------------------------- 1 | #include "math.zh.grammar" 2 | 3 | = "年" {nf.datetime.year($1)} 4 | // | "年" {nf.datetime.year($1)} 5 | ; 6 | = "月" {nf.datetime.month($1)}; 7 | = "日" {nf.datetime.month($1)} 8 | | "号" {nf.datetime.month($1)}; 9 | 10 | 11 | = 12 | | 13 | | 14 | | {nf.I($1)} 15 | | {nf.I($1)} 16 | | {nf.I($1)} 17 | ; 18 | 19 |