├── .gitignore
├── LICENSE
├── README.md
├── api.go
├── api_easy.go
├── api_easy_context.go
├── api_test.go
├── cfgrammar.go
├── cfgrammar_comments.go
├── cfgrammar_fmr.go
├── cfgrammar_regex.go
├── cfgrammar_special.go
├── cfgrammar_terminal.go
├── cfgrammar_test.go
├── earley.go
├── earley_ast.go
├── earley_eval.go
├── earley_fmr.go
├── earley_nodeprint.go
├── earley_stringer.go
├── earley_terminal_match.go
├── earley_test.go
├── examples
    ├── arithmetic
    │   ├── arithmetic.grammar
    │   ├── input.txt
    │   ├── main.go
    │   └── math.js
    ├── builtin
    │   ├── any.grammar
    │   ├── any.txt
    │   ├── builtin.grammar
    │   ├── cn_input.txt
    │   ├── cn_num.grammar
    │   ├── company.js
    │   ├── input.txt
    │   ├── main.go
    │   ├── math.js
    │   └── tianjin.txt
    └── math
    │   ├── README.md
    │   ├── grammars
    │       ├── latex.math.grammar
    │       ├── math.en.grammar
    │       ├── math.grammar
    │       ├── math.zh.grammar
    │       ├── number.en.grammar
    │       └── number.zh.grammar
    │   ├── input.txt
    │   ├── main.go
    │   └── math.js
├── frame_api.go
├── frame_api_test.go
├── funcs.go
├── funcs_test.go
├── go.mod
├── go.sum
├── grammar_index.go
├── grammar_index_test.go
├── grammar_refine.go
├── grammar_tokens.go
├── grammars
    ├── limit.grammar
    ├── math.zh.grammar
    ├── number.zh.grammar
    ├── order.grammar
    ├── sql.grammar
    └── time.grammar
├── list_test.go
├── local_grammar.go
├── local_grammar_test.go
├── math_funcs.go
├── math_funcs_test.go
├── node_methods.go
├── regexp_tagger.go
├── sf.grammar
├── termtype_jsonenums.go
├── termtype_string.go
├── types.go
└── types_util.go


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.dll
 4 | *.so
 5 | *.dylib
 6 | 
 7 | # Test binary, build with `go test -c`
 8 | *.test
 9 | 
10 | # Output of the go coverage tool, specifically when used with LiteIDE
11 | *.out
12 | 
13 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
14 | .glide/
15 | 
16 | dict
17 | dicts
18 | 
19 | examples/arithmetic/arithmetic
20 | examples/builtin/builtin
21 | 
22 | *.swp
23 | 
24 | .vscode/
25 | .DS_Store
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Copyright 2018 Zhanliang Liu.  All rights reserved.
  2 | 
  3 |                                  Apache License
  4 |                            Version 2.0, January 2004
  5 |                         http://www.apache.org/licenses/
  6 | 
  7 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  8 | 
  9 |    1. Definitions.
 10 | 
 11 |       "License" shall mean the terms and conditions for use, reproduction,
 12 |       and distribution as defined by Sections 1 through 9 of this document.
 13 | 
 14 |       "Licensor" shall mean the copyright owner or entity authorized by
 15 |       the copyright owner that is granting the License.
 16 | 
 17 |       "Legal Entity" shall mean the union of the acting entity and all
 18 |       other entities that control, are controlled by, or are under common
 19 |       control with that entity. For the purposes of this definition,
 20 |       "control" means (i) the power, direct or indirect, to cause the
 21 |       direction or management of such entity, whether by contract or
 22 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 23 |       outstanding shares, or (iii) beneficial ownership of such entity.
 24 | 
 25 |       "You" (or "Your") shall mean an individual or Legal Entity
 26 |       exercising permissions granted by this License.
 27 | 
 28 |       "Source" form shall mean the preferred form for making modifications,
 29 |       including but not limited to software source code, documentation
 30 |       source, and configuration files.
 31 | 
 32 |       "Object" form shall mean any form resulting from mechanical
 33 |       transformation or translation of a Source form, including but
 34 |       not limited to compiled object code, generated documentation,
 35 |       and conversions to other media types.
 36 | 
 37 |       "Work" shall mean the work of authorship, whether in Source or
 38 |       Object form, made available under the License, as indicated by a
 39 |       copyright notice that is included in or attached to the work
 40 |       (an example is provided in the Appendix below).
 41 | 
 42 |       "Derivative Works" shall mean any work, whether in Source or Object
 43 |       form, that is based on (or derived from) the Work and for which the
 44 |       editorial revisions, annotations, elaborations, or other modifications
 45 |       represent, as a whole, an original work of authorship. For the purposes
 46 |       of this License, Derivative Works shall not include works that remain
 47 |       separable from, or merely link (or bind by name) to the interfaces of,
 48 |       the Work and Derivative Works thereof.
 49 | 
 50 |       "Contribution" shall mean any work of authorship, including
 51 |       the original version of the Work and any modifications or additions
 52 |       to that Work or Derivative Works thereof, that is intentionally
 53 |       submitted to Licensor for inclusion in the Work by the copyright owner
 54 |       or by an individual or Legal Entity authorized to submit on behalf of
 55 |       the copyright owner. For the purposes of this definition, "submitted"
 56 |       means any form of electronic, verbal, or written communication sent
 57 |       to the Licensor or its representatives, including but not limited to
 58 |       communication on electronic mailing lists, source code control systems,
 59 |       and issue tracking systems that are managed by, or on behalf of, the
 60 |       Licensor for the purpose of discussing and improving the Work, but
 61 |       excluding communication that is conspicuously marked or otherwise
 62 |       designated in writing by the copyright owner as "Not a Contribution."
 63 | 
 64 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 65 |       on behalf of whom a Contribution has been received by Licensor and
 66 |       subsequently incorporated within the Work.
 67 | 
 68 |    2. Grant of Copyright License. Subject to the terms and conditions of
 69 |       this License, each Contributor hereby grants to You a perpetual,
 70 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 71 |       copyright license to reproduce, prepare Derivative Works of,
 72 |       publicly display, publicly perform, sublicense, and distribute the
 73 |       Work and such Derivative Works in Source or Object form.
 74 | 
 75 |    3. Grant of Patent License. Subject to the terms and conditions of
 76 |       this License, each Contributor hereby grants to You a perpetual,
 77 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 78 |       (except as stated in this section) patent license to make, have made,
 79 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 80 |       where such license applies only to those patent claims licensable
 81 |       by such Contributor that are necessarily infringed by their
 82 |       Contribution(s) alone or by combination of their Contribution(s)
 83 |       with the Work to which such Contribution(s) was submitted. If You
 84 |       institute patent litigation against any entity (including a
 85 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 86 |       or a Contribution incorporated within the Work constitutes direct
 87 |       or contributory patent infringement, then any patent licenses
 88 |       granted to You under this License for that Work shall terminate
 89 |       as of the date such litigation is filed.
 90 | 
 91 |    4. Redistribution. You may reproduce and distribute copies of the
 92 |       Work or Derivative Works thereof in any medium, with or without
 93 |       modifications, and in Source or Object form, provided that You
 94 |       meet the following conditions:
 95 | 
 96 |       (a) You must give any other recipients of the Work or
 97 |           Derivative Works a copy of this License; and
 98 | 
 99 |       (b) You must cause any modified files to carry prominent notices
100 |           stating that You changed the files; and
101 | 
102 |       (c) You must retain, in the Source form of any Derivative Works
103 |           that You distribute, all copyright, patent, trademark, and
104 |           attribution notices from the Source form of the Work,
105 |           excluding those notices that do not pertain to any part of
106 |           the Derivative Works; and
107 | 
108 |       (d) If the Work includes a "NOTICE" text file as part of its
109 |           distribution, then any Derivative Works that You distribute must
110 |           include a readable copy of the attribution notices contained
111 |           within such NOTICE file, excluding those notices that do not
112 |           pertain to any part of the Derivative Works, in at least one
113 |           of the following places: within a NOTICE text file distributed
114 |           as part of the Derivative Works; within the Source form or
115 |           documentation, if provided along with the Derivative Works; or,
116 |           within a display generated by the Derivative Works, if and
117 |           wherever such third-party notices normally appear. The contents
118 |           of the NOTICE file are for informational purposes only and
119 |           do not modify the License. You may add Your own attribution
120 |           notices within Derivative Works that You distribute, alongside
121 |           or as an addendum to the NOTICE text from the Work, provided
122 |           that such additional attribution notices cannot be construed
123 |           as modifying the License.
124 | 
125 |       You may add Your own copyright statement to Your modifications and
126 |       may provide additional or different license terms and conditions
127 |       for use, reproduction, or distribution of Your modifications, or
128 |       for any such Derivative Works as a whole, provided Your use,
129 |       reproduction, and distribution of the Work otherwise complies with
130 |       the conditions stated in this License.
131 | 
132 |    5. Submission of Contributions. Unless You explicitly state otherwise,
133 |       any Contribution intentionally submitted for inclusion in the Work
134 |       by You to the Licensor shall be under the terms and conditions of
135 |       this License, without any additional terms or conditions.
136 |       Notwithstanding the above, nothing herein shall supersede or modify
137 |       the terms of any separate license agreement you may have executed
138 |       with Licensor regarding such Contributions.
139 | 
140 |    6. Trademarks. This License does not grant permission to use the trade
141 |       names, trademarks, service marks, or product names of the Licensor,
142 |       except as required for reasonable and customary use in describing the
143 |       origin of the Work and reproducing the content of the NOTICE file.
144 | 
145 |    7. Disclaimer of Warranty. Unless required by applicable law or
146 |       agreed to in writing, Licensor provides the Work (and each
147 |       Contributor provides its Contributions) on an "AS IS" BASIS,
148 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149 |       implied, including, without limitation, any warranties or conditions
150 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151 |       PARTICULAR PURPOSE. You are solely responsible for determining the
152 |       appropriateness of using or redistributing the Work and assume any
153 |       risks associated with Your exercise of permissions under this License.
154 | 
155 |    8. Limitation of Liability. In no event and under no legal theory,
156 |       whether in tort (including negligence), contract, or otherwise,
157 |       unless required by applicable law (such as deliberate and grossly
158 |       negligent acts) or agreed to in writing, shall any Contributor be
159 |       liable to You for damages, including any direct, indirect, special,
160 |       incidental, or consequential damages of any character arising as a
161 |       result of this License or out of the use or inability to use the
162 |       Work (including but not limited to damages for loss of goodwill,
163 |       work stoppage, computer failure or malfunction, or any and all
164 |       other commercial damages or losses), even if such Contributor
165 |       has been advised of the possibility of such damages.
166 | 
167 |    9. Accepting Warranty or Additional Liability. While redistributing
168 |       the Work or Derivative Works thereof, You may choose to offer,
169 |       and charge a fee for, acceptance of support, warranty, indemnity,
170 |       or other liability obligations and/or rights consistent with this
171 |       License. However, in accepting such obligations, You may act only
172 |       on Your own behalf and on Your sole responsibility, not on behalf
173 |       of any other Contributor, and only if You agree to indemnify,
174 |       defend, and hold each Contributor harmless for any liability
175 |       incurred by, or claims asserted against, such Contributor by reason
176 |       of your accepting any such warranty or additional liability.
177 | 
178 |    END OF TERMS AND CONDITIONS
179 | 
180 |    APPENDIX: How to apply the Apache License to your work.
181 | 
182 |       To apply the Apache License to your work, attach the following
183 |       boilerplate notice, with the fields enclosed by brackets "[]"
184 |       replaced with your own identifying information. (Don't include
185 |       the brackets!)  The text should be enclosed in the appropriate
186 |       comment syntax for the file format. We also recommend that a
187 |       file or class name and description of purpose be included on the
188 |       same "printed page" as the copyright notice for easier
189 |       identification within third-party archives.
190 | 
191 |    Copyright 2018, Zhanliang Liu.
192 | 
193 |    Licensed under the Apache License, Version 2.0 (the "License");
194 |    you may not use this file except in compliance with the License.
195 |    You may obtain a copy of the License at
196 | 
197 |        http://www.apache.org/licenses/LICENSE-2.0
198 | 
199 |    Unless required by applicable law or agreed to in writing, software
200 |    distributed under the License is distributed on an "AS IS" BASIS,
201 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202 |    See the License for the specific language governing permissions and
203 |    limitations under the License.
204 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # FMR: Functional Meaning Representation & Semantic Parsing Framework
 2 | [![GoDoc](https://godoc.org/github.com/liuzl/fmr?status.svg)](https://godoc.org/github.com/liuzl/fmr)[![Go Report Card](https://goreportcard.com/badge/github.com/liuzl/fmr)](https://goreportcard.com/report/github.com/liuzl/fmr)
 3 | 
 4 | ## Projects that uses FMR
 5 | 
 6 | ### mathsolver
 7 | * codes: https://github.com/liuzl/mathsolver
 8 | * demo: https://mathsolver.zliu.org/
 9 | 
10 | ## What is semantic parsing?
11 | Semantic parsing is the process of mapping a natural language sentence into an intermediate logical form which is a formal representation of its meaning.
12 | 
13 | The formal representation should be a detailed representation of the complete meaning of the natural language sentence in a fully formal language that:
14 | 
15 | * Has a rich ontology of types, properties, and relations.
16 | * Supports automated reasoning or execution.
17 | 
18 | ## Representation languages
19 | Early semantic parsers used highly domain-specific meaning representation languages, with later systems using more extensible languages like Prolog, lambda calculus, lambda dependancy-based compositional semantics (λ-DCS), SQL, Python, Java, and the Alexa Meaning Representation Language. Some work has used more exotic meaning representations, like query graphs or vector representations.
20 | 
21 | ### FMR, a formal meaning representation language
22 | * FMR stands for  functional meaning representation
23 | * Context-Free Grammar for bridging NL and FMR
24 | * *[VIM Syntax highlighting for FMR grammar file](https://github.com/liuzl/vim-fmr)*
25 | 
26 | ## Tasks
27 | * Grammar checkers
28 | * Dialogue management
29 | * Question answering
30 | * Information extraction
31 | * Machine translation
32 | 
33 | ## What can FMR do, a glance overview
34 | ```js
35 | // semantic parsing
36 | "五与5.8的和的平方的1.5次方与two的和减去261.712" =>
37 | nf.math.sub(
38 |   nf.math.sum(
39 |     nf.math.pow(
40 |       nf.math.pow(
41 |         nf.math.sum(
42 |           5,
43 |           nf.math.to_number("5.8")
44 |         ),
45 |         2
46 |       ),
47 |       nf.math.to_number("1.5")
48 |     ),
49 |     2
50 |   ),
51 |   nf.math.to_number("261.712")
52 | ); // denotation: 1000
53 | 
54 | // slot filling
55 | "从上海到天津的机票" => nf.flight("上海", "天津");
56 | "到重庆，明天，从北京" => nf.flight("北京", "重庆");
57 | "到上海去" => nf.flight(null, "上海");
58 | ```
59 | 
60 | ## References
61 | * [Semantic Parsing: Past, Present, and Future](http://yoavartzi.com/sp14/slides/mooney.sp14.pdf), Raymond J. Mooney, 2014
62 | * [Introduction to semantic parsing](https://github.com/liuzl/fmr-files/blob/master/cs224u-2019-intro-semparse.pdf), Bill MacCartney, 2019
63 | * [Bringing machine learning and compositional semantics together](https://web.stanford.edu/~cgpotts/manuscripts/liang-potts-semantics.pdf), Percy Liang and Christopher Potts, 2014
64 | * [SippyCup: A semantic parsing tutorial](https://github.com/wcmac/sippycup), Bill MacCartney, 2015
65 | * [Semantic parsing in your browser](https://www.cs.toronto.edu/~muuo/writing/semantic-parsing-in-your-browser/), Muuo Wambua, 2018
66 | 


--------------------------------------------------------------------------------
/api.go:
--------------------------------------------------------------------------------
  1 | package fmr
  2 | 
  3 | import (
  4 | 	"flag"
  5 | 	"fmt"
  6 | 	"net/url"
  7 | 	"strings"
  8 | 	"sync"
  9 | 
 10 | 	"github.com/liuzl/ling"
 11 | )
 12 | 
 13 | var (
 14 | 	apiTagger = flag.String("api_tagger", "", "http address of api tagger")
 15 | 	ctxTagger = flag.String("ctx_tagger", "", "http address of context tagger")
 16 | )
 17 | 
 18 | var nlp *ling.Pipeline
 19 | var once sync.Once
 20 | 
 21 | // NLP returns handler for the ling nlp toolkit
 22 | func NLP() *ling.Pipeline {
 23 | 	once.Do(func() {
 24 | 		var err error
 25 | 		var tagger *ling.DictTagger
 26 | 		if nlp, err = ling.DefaultNLP(); err != nil {
 27 | 			panic(err)
 28 | 		}
 29 | 		if tagger, err = ling.NewDictTagger(); err != nil {
 30 | 			panic(err)
 31 | 		}
 32 | 		if err = nlp.AddTagger(tagger); err != nil {
 33 | 			panic(err)
 34 | 		}
 35 | 		if *apiTagger == "" {
 36 | 			return
 37 | 		}
 38 | 		var tagger1 *ling.APITagger
 39 | 		if tagger1, err = ling.NewAPITagger(*apiTagger); err != nil {
 40 | 			panic(err)
 41 | 		}
 42 | 		if err = nlp.AddTagger(tagger1); err != nil {
 43 | 			panic(err)
 44 | 		}
 45 | 	})
 46 | 	return nlp
 47 | }
 48 | 
 49 | // EarleyParse parses text for rule <start> at beginning
 50 | func (g *Grammar) EarleyParse(text string, starts ...string) (*Parse, error) {
 51 | 	return g.EarleyParseWithContext("", text, starts...)
 52 | }
 53 | 
 54 | // EarleyParseWithContext with context information
 55 | func (g *Grammar) EarleyParseWithContext(
 56 | 	context, text string, starts ...string) (*Parse, error) {
 57 | 	tokens, l, err := g.process(context, text)
 58 | 	if err != nil {
 59 | 		return nil, err
 60 | 	}
 61 | 	return g.earleyParse(true, text, tokens, l, starts...)
 62 | }
 63 | 
 64 | // EarleyParseAny parses text for rule <start> at any position
 65 | func (g *Grammar) EarleyParseAny(
 66 | 	text string, starts ...string) (*Parse, error) {
 67 | 
 68 | 	return g.EarleyParseAnyWithContext("", text, starts...)
 69 | }
 70 | 
 71 | //EarleyParseAnyWithContext with context information
 72 | func (g *Grammar) EarleyParseAnyWithContext(
 73 | 	context, text string, starts ...string) (*Parse, error) {
 74 | 
 75 | 	tokens, l, err := g.process(context, text)
 76 | 	if err != nil {
 77 | 		return nil, err
 78 | 	}
 79 | 	var p *Parse
 80 | 	for i := 0; i < len(tokens); i++ {
 81 | 		if p, err = g.earleyParse(
 82 | 			true, text, tokens[i:], l, starts...); err != nil {
 83 | 			return nil, err
 84 | 		}
 85 | 		if p.finalStates != nil {
 86 | 			return p, nil
 87 | 		}
 88 | 	}
 89 | 	return p, nil
 90 | }
 91 | 
 92 | // EarleyParseMaxAll extracts all submatches in text for rule <start>
 93 | func (g *Grammar) EarleyParseMaxAll(
 94 | 	text string, starts ...string) ([]*Parse, error) {
 95 | 	return g.EarleyParseMaxAllWithContext("", text, starts...)
 96 | }
 97 | 
 98 | // EarleyParseMaxAllWithContext with context information
 99 | func (g *Grammar) EarleyParseMaxAllWithContext(
100 | 	context, text string, starts ...string) ([]*Parse, error) {
101 | 	tokens, l, err := g.process(context, text)
102 | 	if err != nil {
103 | 		return nil, err
104 | 	}
105 | 	var ret []*Parse
106 | 	for i := 0; i < len(tokens); {
107 | 		p, err := g.earleyParse(true, text, tokens[i:], l, starts...)
108 | 		if err != nil {
109 | 			return nil, err
110 | 		}
111 | 		if p.finalStates != nil {
112 | 			ret = append(ret, p)
113 | 			max := 0
114 | 			for _, finalState := range p.finalStates {
115 | 				if finalState.End > max {
116 | 					max = finalState.End
117 | 				}
118 | 			}
119 | 			i += max
120 | 		} else {
121 | 			i++
122 | 		}
123 | 	}
124 | 	return ret, nil
125 | }
126 | 
127 | // EarleyParseAll extracts all submatches in text for rule <start>
128 | func (g *Grammar) EarleyParseAll(
129 | 	text string, starts ...string) ([]*Parse, error) {
130 | 	return g.EarleyParseAllWithContext("", text, starts...)
131 | }
132 | 
133 | // EarleyParseAllWithContext with context information
134 | func (g *Grammar) EarleyParseAllWithContext(
135 | 	context, text string, starts ...string) ([]*Parse, error) {
136 | 	tokens, l, err := g.process(context, text)
137 | 	if err != nil {
138 | 		return nil, err
139 | 	}
140 | 	var ret []*Parse
141 | 	for i := 0; i < len(tokens); i++ {
142 | 		p, err := g.earleyParse(false, text, tokens[i:], l, starts...)
143 | 		if err != nil {
144 | 			return nil, err
145 | 		}
146 | 		if p.finalStates != nil {
147 | 			ret = append(ret, p)
148 | 			//i += p.finalState.End
149 | 		}
150 | 	}
151 | 	return ret, nil
152 | }
153 | 
154 | func (g *Grammar) earleyParse(maxFlag bool, text string,
155 | 	tokens []*ling.Token, l *Grammar, starts ...string) (*Parse, error) {
156 | 	if len(starts) == 0 {
157 | 		return nil, fmt.Errorf("no start rules")
158 | 	}
159 | 	if len(tokens) == 0 {
160 | 		return nil, fmt.Errorf("no tokens to parse")
161 | 	}
162 | 
163 | 	parse := &Parse{grammars: []*Grammar{g}, text: text, starts: starts}
164 | 	if len(g.includes) > 0 {
165 | 		parse.grammars = append(parse.grammars, g.includes...)
166 | 	}
167 | 	if l != nil {
168 | 		parse.grammars = append(parse.grammars, l)
169 | 	}
170 | 	parse.columns = append(parse.columns, &TableColumn{index: 0, token: nil})
171 | 	for _, token := range tokens {
172 | 		parse.columns = append(parse.columns,
173 | 			&TableColumn{index: len(parse.columns), token: token})
174 | 	}
175 | 	parse.parse(maxFlag)
176 | 	if Debug {
177 | 		fmt.Println(parse)
178 | 	}
179 | 	return parse, nil
180 | }
181 | 
182 | func (g *Grammar) process(
183 | 	context, text string) ([]*ling.Token, *Grammar, error) {
184 | 	if text = strings.TrimSpace(text); text == "" {
185 | 		return nil, nil, fmt.Errorf("text is empty")
186 | 	}
187 | 	d := ling.NewDocument(text)
188 | 	if context == "" {
189 | 		if err := NLP().Annotate(d); err != nil {
190 | 			return nil, nil, err
191 | 		}
192 | 	} else {
193 | 		if *ctxTagger == "" {
194 | 			return nil, nil, fmt.Errorf("ctxTagger should be supplied")
195 | 		}
196 | 		vurl, err := url.ParseRequestURI(*ctxTagger)
197 | 		if err != nil {
198 | 			return nil, nil, err
199 | 		}
200 | 		c := vurl.Query()
201 | 		c.Set("context", context)
202 | 		vurl.RawQuery = c.Encode()
203 | 		tagger, err := ling.NewAPITagger(vurl.String())
204 | 		if err != nil {
205 | 			return nil, nil, err
206 | 		}
207 | 		if err = NLP().AnnotatePro(d, tagger); err != nil {
208 | 			return nil, nil, err
209 | 		}
210 | 	}
211 | 	var ret []*ling.Token
212 | 	for _, token := range d.Tokens {
213 | 		if token.Type == ling.Space {
214 | 			continue
215 | 		}
216 | 		ret = append(ret, token)
217 | 	}
218 | 	if len(ret) == 0 {
219 | 		return nil, nil, fmt.Errorf("no tokens")
220 | 	}
221 | 	l, err := g.localGrammar(d)
222 | 	if err != nil {
223 | 		return nil, nil, err
224 | 	}
225 | 	return ret, l, nil
226 | }
227 | 


--------------------------------------------------------------------------------
/api_easy.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | // Parse returns parse trees for rule <start> at beginning
 4 | func (g *Grammar) Parse(text string, starts ...string) ([]*Node, error) {
 5 | 	return g.extract(func(text string, starts ...string) ([]*Parse, error) {
 6 | 		p, err := g.EarleyParse(text, starts...)
 7 | 		if err != nil {
 8 | 			return nil, err
 9 | 		}
10 | 		return []*Parse{p}, nil
11 | 	}, text, starts...)
12 | }
13 | 
14 | // ParseAny returns parse trees for rule <start> at any position
15 | func (g *Grammar) ParseAny(text string, starts ...string) ([]*Node, error) {
16 | 	return g.extract(
17 | 		func(text string, starts ...string) ([]*Parse, error) {
18 | 			p, err := g.EarleyParseAny(text, starts...)
19 | 			if err != nil {
20 | 				return nil, err
21 | 			}
22 | 			return []*Parse{p}, nil
23 | 		}, text, starts...)
24 | }
25 | 
26 | // ExtractMaxAll extracts all parse trees in text for rule <start>
27 | func (g *Grammar) ExtractMaxAll(
28 | 	text string, starts ...string) ([]*Node, error) {
29 | 	return g.extract(g.EarleyParseMaxAll, text, starts...)
30 | }
31 | 
32 | // ExtractAll extracts all parse trees in text for rule <start>
33 | func (g *Grammar) ExtractAll(text string, starts ...string) ([]*Node, error) {
34 | 	return g.extract(g.EarleyParseAll, text, starts...)
35 | }
36 | 
37 | func (g *Grammar) extract(f func(string, ...string) ([]*Parse, error),
38 | 	text string, starts ...string) ([]*Node, error) {
39 | 	ps, err := f(text, starts...)
40 | 	if err != nil {
41 | 		return nil, err
42 | 	}
43 | 	var ret []*Node
44 | 	for _, p := range ps {
45 | 		for _, f := range p.GetFinalStates() {
46 | 			ret = append(ret, p.GetTrees(f)...)
47 | 		}
48 | 	}
49 | 	return ret, nil
50 | }
51 | 


--------------------------------------------------------------------------------
/api_easy_context.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | // ParseWithContext returns parse trees for rule <start> at beginning
 4 | func (g *Grammar) ParseWithContext(
 5 | 	context, text string, starts ...string) ([]*Node, error) {
 6 | 	return g.extractWithContext(
 7 | 		func(context, text string, starts ...string) ([]*Parse, error) {
 8 | 			p, err := g.EarleyParseWithContext(context, text, starts...)
 9 | 			if err != nil {
10 | 				return nil, err
11 | 			}
12 | 			return []*Parse{p}, nil
13 | 		}, context, text, starts...)
14 | }
15 | 
16 | // ParseAnyWithContext returns parse trees for rule <start> at any position
17 | func (g *Grammar) ParseAnyWithContext(
18 | 	context, text string, starts ...string) ([]*Node, error) {
19 | 	return g.extractWithContext(
20 | 		func(context, text string, starts ...string) ([]*Parse, error) {
21 | 			p, err := g.EarleyParseAnyWithContext(context, text, starts...)
22 | 			if err != nil {
23 | 				return nil, err
24 | 			}
25 | 			return []*Parse{p}, nil
26 | 		}, context, text, starts...)
27 | }
28 | 
29 | // ExtractMaxAllWithContext extracts all parse trees in text for rule <start>
30 | func (g *Grammar) ExtractMaxAllWithContext(
31 | 	context, text string, starts ...string) ([]*Node, error) {
32 | 	return g.extractWithContext(
33 | 		g.EarleyParseMaxAllWithContext, context, text, starts...)
34 | }
35 | 
36 | // ExtractAllWithContext extracts all parse trees in text for rule <start>
37 | func (g *Grammar) ExtractAllWithContext(
38 | 	context, text string, starts ...string) ([]*Node, error) {
39 | 	return g.extractWithContext(
40 | 		g.EarleyParseAllWithContext, context, text, starts...)
41 | }
42 | 
43 | func (g *Grammar) extractWithContext(
44 | 	f func(string, string, ...string) ([]*Parse, error),
45 | 	context, text string, starts ...string) ([]*Node, error) {
46 | 	ps, err := f(context, text, starts...)
47 | 	if err != nil {
48 | 		return nil, err
49 | 	}
50 | 	var ret []*Node
51 | 	for _, p := range ps {
52 | 		for _, f := range p.GetFinalStates() {
53 | 			ret = append(ret, p.GetTrees(f)...)
54 | 		}
55 | 	}
56 | 	return ret, nil
57 | }
58 | 


--------------------------------------------------------------------------------
/api_test.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestLocalParse(t *testing.T) {
 8 | 	tests := []string{
 9 | 		`柏乡位于河北省`,
10 | 	}
11 | 	g := &Grammar{}
12 | 	for _, c := range tests {
13 | 		ps, err := g.EarleyParseMaxAll(c, "loc_province", "loc_county")
14 | 		if err != nil {
15 | 			t.Error(err)
16 | 		}
17 | 		for _, p := range ps {
18 | 			for _, f := range p.GetFinalStates() {
19 | 				t.Log(f)
20 | 				trees := p.GetTrees(f)
21 | 				t.Log(trees)
22 | 				for _, tree := range trees {
23 | 					sem, err := tree.Semantic()
24 | 					if err != nil {
25 | 						t.Error(err)
26 | 					}
27 | 					t.Log(sem)
28 | 				}
29 | 			}
30 | 		}
31 | 	}
32 | }
33 | 
34 | func TestGParse(t *testing.T) {
35 | 	tests := []string{
36 | 		`柏乡位于河北省`,
37 | 	}
38 | 	grammar := `<loc> = <loc_province> {nf.loc($1)}| <loc_county> {nf.loc($1)};`
39 | 	g, err := GrammarFromString(grammar, "loc")
40 | 	if err != nil {
41 | 		t.Error(err)
42 | 	}
43 | 	for _, c := range tests {
44 | 		ps, err := g.EarleyParseMaxAll(c, "loc")
45 | 		if err != nil {
46 | 			t.Error(err)
47 | 		}
48 | 		for _, p := range ps {
49 | 			for _, f := range p.GetFinalStates() {
50 | 				t.Log(f)
51 | 				trees := p.GetTrees(f)
52 | 				t.Log(trees)
53 | 				for _, tree := range trees {
54 | 					sem, err := tree.Semantic()
55 | 					if err != nil {
56 | 						t.Error(err)
57 | 					}
58 | 					t.Log(sem)
59 | 				}
60 | 			}
61 | 		}
62 | 	}
63 | }
64 | 


--------------------------------------------------------------------------------
/cfgrammar.go:
--------------------------------------------------------------------------------
  1 | package fmr
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io/ioutil"
  6 | 	"path/filepath"
  7 | 	"strconv"
  8 | 	"strings"
  9 | 	"unicode"
 10 | 	"unicode/utf8"
 11 | 
 12 | 	"github.com/mitchellh/hashstructure"
 13 | )
 14 | 
 15 | type parser struct {
 16 | 	input   string
 17 | 	pos     int
 18 | 	width   int
 19 | 	current *position
 20 | 	info    map[int]*position
 21 | 	fname   string
 22 | 	dir     string
 23 | }
 24 | 
 25 | type position struct {
 26 | 	row, col int
 27 | 	r        string
 28 | }
 29 | 
 30 | func (p *position) String() string {
 31 | 	return fmt.Sprintf("|row:%d, col:%d, c:%s|", p.row, p.col, strconv.Quote(p.r))
 32 | }
 33 | 
 34 | const eof = -1
 35 | 
 36 | // GrammarFromFile constructs the Context-Free Grammar from file
 37 | func GrammarFromFile(file string) (*Grammar, error) {
 38 | 	b, err := ioutil.ReadFile(file)
 39 | 	if err != nil {
 40 | 		return nil, err
 41 | 	}
 42 | 	dir, err := filepath.Abs(filepath.Dir(file))
 43 | 	if err != nil {
 44 | 		return nil, err
 45 | 	}
 46 | 	return grammarFromString(string(b), file, dir, map[string]int{file: 1})
 47 | }
 48 | 
 49 | func grammarFromFile(ifile string, files map[string]int) (*Grammar, error) {
 50 | 	if files[ifile] >= 2 {
 51 | 		return nil, nil
 52 | 	}
 53 | 	b, err := ioutil.ReadFile(ifile)
 54 | 	if err != nil {
 55 | 		return nil, err
 56 | 	}
 57 | 	dir, err := filepath.Abs(filepath.Dir(ifile))
 58 | 	if err != nil {
 59 | 		return nil, err
 60 | 	}
 61 | 	return grammarFromString(string(b), ifile, dir, files)
 62 | }
 63 | 
 64 | // GrammarFromString constructs the Contex-Free Grammar from string d with name
 65 | func GrammarFromString(d, name string) (*Grammar, error) {
 66 | 	return grammarFromString(d, name, ".", make(map[string]int))
 67 | }
 68 | 
 69 | func grammarFromString(d, name, dir string, files map[string]int) (*Grammar, error) {
 70 | 	if files[name] >= 2 {
 71 | 		return nil, nil
 72 | 	}
 73 | 	p := &parser{fname: name, dir: dir, input: d, info: make(map[int]*position)}
 74 | 	if Debug {
 75 | 		fmt.Println("loading ", name, files)
 76 | 	}
 77 | 	g, err := p.grammar(files)
 78 | 	if err != nil {
 79 | 		return nil, err
 80 | 	}
 81 | 	files[name]++
 82 | 	if Debug {
 83 | 		fmt.Println("loaded ", name, files)
 84 | 	}
 85 | 	return g, nil
 86 | }
 87 | 
 88 | func (p *parser) posInfo() string {
 89 | 	return fmt.Sprintf("%s%s", p.fname, p.current)
 90 | }
 91 | 
 92 | func (p *parser) next() rune {
 93 | 	if p.pos >= len(p.input) {
 94 | 		p.width = 0
 95 | 		return eof
 96 | 	}
 97 | 	r, w := utf8.DecodeRuneInString(p.input[p.pos:])
 98 | 	if r == utf8.RuneError {
 99 | 		return eof
100 | 	}
101 | 	p.width = w
102 | 	p.pos += w
103 | 	if p.info[p.pos] == nil {
104 | 		if p.current == nil {
105 | 			p.current = &position{1, w, string(r)}
106 | 		} else {
107 | 			if r == '\n' {
108 | 				p.current = &position{p.current.row + 1, w, string(r)}
109 | 			} else {
110 | 				p.current = &position{p.current.row, p.current.col + w, string(r)}
111 | 			}
112 | 		}
113 | 		p.info[p.pos] = p.current
114 | 	} else {
115 | 		p.current = p.info[p.pos]
116 | 	}
117 | 	return r
118 | }
119 | 
120 | func (p *parser) eat(expected rune) error {
121 | 	if r := p.next(); r != expected {
122 | 		return fmt.Errorf("%s :expected %s, got %s", p.posInfo(),
123 | 			strconv.Quote(string(expected)), strconv.Quote(string(r)))
124 | 	}
125 | 	return nil
126 | }
127 | 
128 | func (p *parser) backup() {
129 | 	p.pos -= p.width
130 | 	p.current = p.info[p.pos]
131 | }
132 | 
133 | func (p *parser) peek() rune {
134 | 	r := p.next()
135 | 	p.backup()
136 | 	return r
137 | }
138 | 
139 | func (p *parser) ws() string {
140 | 	var ret []rune
141 | 	for r := p.next(); unicode.IsSpace(r); r = p.next() {
142 | 		ret = append(ret, r)
143 | 	}
144 | 	p.backup()
145 | 	return string(ret)
146 | }
147 | 
148 | func (p *parser) text() (string, error) {
149 | 	var ret []rune
150 | 	first := true
151 | Loop:
152 | 	for {
153 | 		switch r := p.next(); {
154 | 		case unicode.IsLetter(r) || r == '_':
155 | 			ret = append(ret, r)
156 | 		case unicode.IsDigit(r) && !first:
157 | 			ret = append(ret, r)
158 | 		default:
159 | 			p.backup()
160 | 			break Loop
161 | 		}
162 | 		first = false
163 | 	}
164 | 	if len(ret) == 0 {
165 | 		return "", fmt.Errorf("%s : no text", p.posInfo())
166 | 	}
167 | 	return string(ret), nil
168 | }
169 | 
170 | func (p *parser) token(begin, end rune) (name string, err error) {
171 | 	if err = p.eat(begin); err != nil {
172 | 		return
173 | 	}
174 | 	if name, err = p.text(); err != nil {
175 | 		return
176 | 	}
177 | 	err = p.eat(end)
178 | 	return
179 | }
180 | 
181 | func (p *parser) nonterminal() (string, error) {
182 | 	return p.token('<', '>')
183 | }
184 | 
185 | func (p *parser) frame() (string, error) {
186 | 	return p.token('[', ']')
187 | }
188 | 
189 | func (p *parser) term(g *Grammar) (*Term, error) {
190 | 	switch p.peek() {
191 | 	case '<':
192 | 		name, err := p.nonterminal()
193 | 		if err != nil {
194 | 			return nil, err
195 | 		}
196 | 		return &Term{Value: name, Type: Nonterminal}, nil
197 | 	case '"':
198 | 		flags, text, err := p.terminal()
199 | 		if err != nil {
200 | 			return nil, err
201 | 		}
202 | 		if flags == "" {
203 | 			return &Term{Value: text, Type: Terminal}, nil
204 | 		}
205 | 		return &Term{Value: text, Type: Terminal, Meta: flags}, nil
206 | 	case '(':
207 | 		return p.special()
208 | 	case '`':
209 | 		return p.regex(g)
210 | 	}
211 | 	return nil, fmt.Errorf("%s :invalid term char", p.posInfo())
212 | }
213 | 
214 | func (p *parser) getInt() (idx int, err error) {
215 | 	idx = -1
216 | 	var n uint64
217 | 	var r rune
218 | 	for r = p.next(); unicode.IsDigit(r); r = p.next() {
219 | 		if n, err = strconv.ParseUint(string(r), 10, 32); err != nil {
220 | 			return
221 | 		}
222 | 		if idx == -1 {
223 | 			idx = int(n)
224 | 		} else {
225 | 			idx = idx*10 + int(n)
226 | 		}
227 | 	}
228 | 	if idx == -1 {
229 | 		err = fmt.Errorf("%s : number expected", p.posInfo())
230 | 		return
231 | 	}
232 | 	p.backup()
233 | 	return
234 | }
235 | 
236 | func (p *parser) ruleBody(g *Grammar) (*RuleBody, error) {
237 | 	t, err := p.term(g)
238 | 	if err != nil {
239 | 		return nil, err
240 | 	}
241 | 	terms := []*Term{t}
242 | 	if err = p.comments(); err != nil {
243 | 		return nil, err
244 | 	}
245 | 	for {
246 | 		if err = p.comments(); err != nil {
247 | 			return nil, err
248 | 		}
249 | 		if !strings.ContainsRune("<\"(`", p.peek()) {
250 | 			break
251 | 		}
252 | 		if t, err = p.term(g); err != nil {
253 | 			return nil, err
254 | 		}
255 | 		terms = append(terms, t)
256 | 		if err = p.comments(); err != nil {
257 | 			return nil, err
258 | 		}
259 | 	}
260 | 	var f *FMR
261 | 	if p.peek() == '{' {
262 | 		p.eat('{')
263 | 		if f, err = p.semanticFn(); err != nil {
264 | 			return nil, err
265 | 		}
266 | 		if err = p.eat('}'); err != nil {
267 | 			return nil, err
268 | 		}
269 | 		if err = p.comments(); err != nil {
270 | 			return nil, err
271 | 		}
272 | 	}
273 | 	return &RuleBody{terms, f}, nil
274 | }
275 | 
276 | func (p *parser) ruleBodies(g *Grammar) (map[uint64]*RuleBody, error) {
277 | 	r, err := p.ruleBody(g)
278 | 	if err != nil {
279 | 		return nil, err
280 | 	}
281 | 	hash, err := hashstructure.Hash(r, nil)
282 | 	if err != nil {
283 | 		return nil, err
284 | 	}
285 | 	rules := map[uint64]*RuleBody{hash: r}
286 | 	for {
287 | 		if p.peek() != '|' {
288 | 			break
289 | 		}
290 | 		p.eat('|')
291 | 		if err = p.comments(); err != nil {
292 | 			return nil, err
293 | 		}
294 | 		if r, err = p.ruleBody(g); err != nil {
295 | 			return nil, err
296 | 		}
297 | 		if hash, err = hashstructure.Hash(r, nil); err != nil {
298 | 			return nil, err
299 | 		}
300 | 		rules[hash] = r
301 | 	}
302 | 	return rules, nil
303 | }
304 | 
305 | func (p *parser) rule(c rune, g *Grammar) (*Rule, error) {
306 | 	var name string
307 | 	var err error
308 | 	switch c {
309 | 	case '<':
310 | 		if name, err = p.nonterminal(); err != nil {
311 | 			return nil, err
312 | 		}
313 | 	case '[':
314 | 		if name, err = p.frame(); err != nil {
315 | 			return nil, err
316 | 		}
317 | 	default:
318 | 		return nil, fmt.Errorf("%s : unexpected char", p.posInfo())
319 | 	}
320 | 	if err = p.comments(); err != nil {
321 | 		return nil, err
322 | 	}
323 | 	if err = p.eat('='); err != nil {
324 | 		return nil, err
325 | 	}
326 | 	if err = p.comments(); err != nil {
327 | 		return nil, err
328 | 	}
329 | 	body, err := p.ruleBodies(g)
330 | 	if err != nil {
331 | 		return nil, err
332 | 	}
333 | 	if err = p.eat(';'); err != nil {
334 | 		return nil, err
335 | 	}
336 | 	return &Rule{name, body}, nil
337 | }
338 | 
339 | func (p *parser) grammar(files map[string]int) (*Grammar, error) {
340 | 	g := &Grammar{
341 | 		Name:    p.fname,
342 | 		Rules:   make(map[string]*Rule),
343 | 		Frames:  make(map[string]*Rule),
344 | 		Regexps: make(map[string]string),
345 | 	}
346 | 	for {
347 | 		if err := p.comments(); err != nil {
348 | 			return nil, err
349 | 		}
350 | 		if p.peek() != '#' {
351 | 			break
352 | 		}
353 | 		p.eat('#')
354 | 		p.ws()
355 | 		name, err := p.text()
356 | 		if err != nil {
357 | 			return nil, err
358 | 		}
359 | 		if name != "include" {
360 | 			return nil, fmt.Errorf(
361 | 				"%s: directive:(%s) not suppported", p.posInfo(), name)
362 | 		}
363 | 		p.ws()
364 | 		_, ifile, err := p.terminal()
365 | 		if err != nil {
366 | 			return nil, err
367 | 		}
368 | 		ifile = filepath.Join(p.dir, ifile)
369 | 		files[ifile]++
370 | 		ig, err := grammarFromFile(ifile, files)
371 | 		if err != nil {
372 | 			return nil, err
373 | 		}
374 | 		if ig == nil {
375 | 			continue
376 | 		}
377 | 		g.includes = append(g.includes, ig)
378 | 		g.includes = append(g.includes, ig.includes...)
379 | 		for k, v := range ig.Regexps {
380 | 			g.Regexps[k] = v
381 | 		}
382 | 	}
383 | 	for {
384 | 		if err := p.comments(); err != nil {
385 | 			return nil, err
386 | 		}
387 | 
388 | 		c := p.peek()
389 | 		if !strings.ContainsRune(`<[`, c) {
390 | 			break
391 | 		}
392 | 		r, err := p.rule(c, g)
393 | 		if err != nil {
394 | 			return nil, err
395 | 
396 | 		}
397 | 		rules := g.Rules
398 | 		if c == '[' {
399 | 			rules = g.Frames
400 | 		}
401 | 		if _, has := rules[r.Name]; has {
402 | 			for k, v := range r.Body {
403 | 				rules[r.Name].Body[k] = v
404 | 			}
405 | 		} else {
406 | 			rules[r.Name] = r
407 | 		}
408 | 	}
409 | 	if p.next() != eof {
410 | 		return nil, fmt.Errorf("%s : format error", p.posInfo())
411 | 	}
412 | 	if err := g.buildIndex(); err != nil {
413 | 		return nil, err
414 | 	}
415 | 	if err := g.refine("g"); err != nil {
416 | 		return nil, err
417 | 	}
418 | 	return g, nil
419 | }
420 | 


--------------------------------------------------------------------------------
/cfgrammar_comments.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | )
 6 | 
 7 | func (p *parser) comments() error {
 8 | 	defer p.ws()
 9 | 	for {
10 | 		p.ws()
11 | 		c, err := p.comment()
12 | 		if err != nil {
13 | 			return err
14 | 		}
15 | 		if len(c) == 0 {
16 | 			return nil
17 | 		}
18 | 	}
19 | }
20 | 
21 | func (p *parser) comment() (string, error) {
22 | 	if p.next() != '/' {
23 | 		p.backup()
24 | 		return "", nil
25 | 	}
26 | 	switch r := p.peek(); {
27 | 	case r == '/':
28 | 		return p.lineComment()
29 | 	case r == '*':
30 | 		return p.multiLineComment()
31 | 	default:
32 | 		return "", fmt.Errorf("%s : invalid char %s", p.posInfo(), string(r))
33 | 	}
34 | }
35 | 
36 | func (p *parser) lineComment() (string, error) {
37 | 	if err := p.eat('/'); err != nil {
38 | 		return "", err
39 | 	}
40 | 	ret := []rune{'/', '/'}
41 | 	for {
42 | 		r := p.next()
43 | 		if r == '\n' {
44 | 			break
45 | 		}
46 | 		ret = append(ret, r)
47 | 	}
48 | 	return string(ret), nil
49 | }
50 | 
51 | func (p *parser) multiLineComment() (string, error) {
52 | 	if err := p.eat('*'); err != nil {
53 | 		return "", err
54 | 	}
55 | 	ret := []rune{'/', '*'}
56 | 	var prev rune
57 | 	for {
58 | 		r := p.next()
59 | 		if r == eof {
60 | 			return "", fmt.Errorf("%s : unterminated string", p.posInfo())
61 | 		}
62 | 		if prev == '*' && r == '/' {
63 | 			break
64 | 		}
65 | 		ret = append(ret, r)
66 | 		prev = r
67 | 	}
68 | 	ret = append(ret, '/')
69 | 	return string(ret), nil
70 | }
71 | 


--------------------------------------------------------------------------------
/cfgrammar_fmr.go:
--------------------------------------------------------------------------------
  1 | package fmr
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math/big"
  6 | 	"unicode"
  7 | )
  8 | 
  9 | func (p *parser) semanticFn() (f *FMR, err error) {
 10 | 	p.ws()
 11 | 	f = &FMR{}
 12 | 	if f.Fn, err = p.funcName(); err != nil {
 13 | 		return
 14 | 	}
 15 | 	if f.Args, err = p.funcArgs(); err != nil {
 16 | 		return
 17 | 	}
 18 | 	p.ws()
 19 | 	return
 20 | }
 21 | 
 22 | func (p *parser) funcName() (string, error) {
 23 | 	var ret []rune
 24 | 	var prev rune = eof
 25 | 	var r rune
 26 | 	first := true
 27 | Loop:
 28 | 	for {
 29 | 		switch r = p.next(); {
 30 | 		case unicode.IsLetter(r) || r == '_':
 31 | 			ret = append(ret, r)
 32 | 		case unicode.IsDigit(r) && !first:
 33 | 			ret = append(ret, r)
 34 | 		case r == '.' && prev != '.' && !first:
 35 | 			ret = append(ret, r)
 36 | 		default:
 37 | 			p.backup()
 38 | 			break Loop
 39 | 		}
 40 | 		first = false
 41 | 		prev = r
 42 | 	}
 43 | 	if len(ret) == 0 {
 44 | 		return "", fmt.Errorf("%s : no funcName", p.posInfo())
 45 | 	}
 46 | 	p.ws()
 47 | 	return string(ret), nil
 48 | }
 49 | 
 50 | func (p *parser) funcArgs() (args []*Arg, err error) {
 51 | 	if err = p.eat('('); err != nil {
 52 | 		return
 53 | 	}
 54 | 	var r rune
 55 | 	var arg *Arg
 56 | 	for {
 57 | 		p.ws()
 58 | 		switch r = p.peek(); {
 59 | 		case r == '@':
 60 | 			if arg, err = p.contextArg(); err != nil {
 61 | 				return
 62 | 			}
 63 | 		case r == '$':
 64 | 			if arg, err = p.idxArg(); err != nil {
 65 | 				return
 66 | 			}
 67 | 		case r == '"':
 68 | 			if arg, err = p.strArg(); err != nil {
 69 | 				return
 70 | 			}
 71 | 		case unicode.IsDigit(r):
 72 | 			if arg, err = p.numArg(false); err != nil {
 73 | 				return
 74 | 			}
 75 | 		case r == '-':
 76 | 			if err = p.eat('-'); err != nil {
 77 | 				return
 78 | 			}
 79 | 			if arg, err = p.numArg(true); err != nil {
 80 | 				return
 81 | 			}
 82 | 		default:
 83 | 			if arg, err = p.fArg(); err != nil {
 84 | 				return
 85 | 			}
 86 | 		}
 87 | 		args = append(args, arg)
 88 | 		if r == ',' {
 89 | 			continue
 90 | 		} else {
 91 | 			p.ws()
 92 | 			r = p.next()
 93 | 			if r == ',' {
 94 | 				continue
 95 | 			} else if r == ')' {
 96 | 				break
 97 | 			} else {
 98 | 				err = fmt.Errorf("%s : unexpected semantic args", p.posInfo())
 99 | 				return
100 | 			}
101 | 		}
102 | 	}
103 | 	return
104 | }
105 | 
106 | func (p *parser) contextArg() (arg *Arg, err error) {
107 | 	if err = p.eat('@'); err != nil {
108 | 		return
109 | 	}
110 | 	arg = &Arg{"context", "@"}
111 | 	return
112 | }
113 | 
114 | func (p *parser) idxArg() (arg *Arg, err error) {
115 | 	if err = p.eat('$'); err != nil {
116 | 		return
117 | 	}
118 | 	var idx int
119 | 	if idx, err = p.getInt(); err != nil {
120 | 		return
121 | 	}
122 | 	arg = &Arg{"index", idx}
123 | 	return
124 | }
125 | 
126 | func (p *parser) strArg() (*Arg, error) {
127 | 	var text string
128 | 	var err error
129 | 	if _, text, err = p.terminal(); err != nil {
130 | 		return nil, err
131 | 	}
132 | 	return &Arg{"string", text}, nil
133 | }
134 | 
135 | func (p *parser) numArg(neg bool) (*Arg, error) {
136 | 	var ret []rune
137 | 	hasDot := false
138 | 	for r := p.next(); ; r = p.next() {
139 | 		if unicode.IsDigit(r) {
140 | 			ret = append(ret, r)
141 | 		} else if r == '.' {
142 | 			if hasDot {
143 | 				return nil, fmt.Errorf("%s : unexpected dot", p.posInfo())
144 | 			}
145 | 			hasDot = true
146 | 			ret = append(ret, r)
147 | 		} else {
148 | 			break
149 | 		}
150 | 	}
151 | 	if len(ret) == 0 {
152 | 		return nil, fmt.Errorf("%s : number expected", p.posInfo())
153 | 	}
154 | 	p.backup()
155 | 	if neg {
156 | 		ret = append([]rune{'-'}, ret...)
157 | 	}
158 | 	if hasDot {
159 | 		n := new(big.Float)
160 | 		if _, err := fmt.Sscan(string(ret), n); err != nil {
161 | 			return nil, err
162 | 		}
163 | 		return &Arg{"float", n}, nil
164 | 	}
165 | 	n := new(big.Int)
166 | 	if _, err := fmt.Sscan(string(ret), n); err != nil {
167 | 		return nil, err
168 | 	}
169 | 	return &Arg{"int", n}, nil
170 | }
171 | 
172 | func (p *parser) fArg() (*Arg, error) {
173 | 	var f *FMR
174 | 	var err error
175 | 	if f, err = p.semanticFn(); err != nil {
176 | 		return nil, err
177 | 	}
178 | 	return &Arg{"func", f}, nil
179 | }
180 | 


--------------------------------------------------------------------------------
/cfgrammar_regex.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"zliu.org/goutil"
 7 | )
 8 | 
 9 | func (p *parser) regex(g *Grammar) (*Term, error) {
10 | 	if err := p.eat('`'); err != nil {
11 | 		return nil, err
12 | 	}
13 | 	p.ws()
14 | 	var ret []rune
15 | OUT:
16 | 	for {
17 | 		switch r := p.next(); {
18 | 		case r == '`':
19 | 			break OUT
20 | 		case r == eof:
21 | 			return nil, fmt.Errorf("%s : unterminated string", p.posInfo())
22 | 		default:
23 | 			ret = append(ret, r)
24 | 		}
25 | 	}
26 | 	if len(ret) == 0 {
27 | 		return nil, fmt.Errorf("%s : empty regexp string", p.posInfo())
28 | 	}
29 | 	s := string(ret)
30 | 	if _, err := goutil.Regexp(s); err != nil {
31 | 		return nil, fmt.Errorf("%s : `%s` is not a valid regexp", p.posInfo(), s)
32 | 	}
33 | 	h := goutil.MD5(s)[:16]
34 | 	g.Regexps[h] = s
35 | 	return &Term{Value: h, Type: Nonterminal}, nil
36 | }
37 | 


--------------------------------------------------------------------------------
/cfgrammar_special.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import "fmt"
 4 | 
 5 | func (p *parser) special() (*Term, error) {
 6 | 	if err := p.eat('('); err != nil {
 7 | 		return nil, err
 8 | 	}
 9 | 	p.ws()
10 | 	name, err := p.text()
11 | 	if err != nil {
12 | 		return nil, err
13 | 	}
14 | 	p.ws()
15 | 	switch name {
16 | 	case "any":
17 | 		return p.any()
18 | 	case "list":
19 | 		return p.list()
20 | 	default:
21 | 		return nil, fmt.Errorf(
22 | 			"%s: special rule:(%s) not supported", p.posInfo(), name)
23 | 	}
24 | }
25 | 
26 | func (p *parser) specialMeta() (map[string]int, error) {
27 | 	p.ws()
28 | 	var err error
29 | 	var meta map[string]int
30 | 	if p.peek() == '{' {
31 | 		// contains range
32 | 		meta = make(map[string]int)
33 | 		p.eat('{')
34 | 		p.ws()
35 | 		if meta["min"], err = p.getInt(); err != nil {
36 | 			return nil, err
37 | 		}
38 | 		p.ws()
39 | 		if err = p.eat(','); err != nil {
40 | 			return nil, err
41 | 		}
42 | 		p.ws()
43 | 		if meta["max"], err = p.getInt(); err != nil {
44 | 			return nil, err
45 | 		}
46 | 		if meta["max"] < meta["min"] {
47 | 			return nil, fmt.Errorf("%s : max:%d less than min:%d",
48 | 				p.posInfo(), meta["max"], meta["min"])
49 | 		}
50 | 		p.ws()
51 | 		if err = p.eat('}'); err != nil {
52 | 			return nil, err
53 | 		}
54 | 	}
55 | 	p.ws()
56 | 	return meta, nil
57 | }
58 | 
59 | func (p *parser) list() (*Term, error) {
60 | 	name, err := p.nonterminal()
61 | 	if err != nil {
62 | 		return nil, err
63 | 	}
64 | 	meta, err := p.specialMeta()
65 | 	if err != nil {
66 | 		return nil, err
67 | 	}
68 | 	if err = p.eat(')'); err != nil {
69 | 		return nil, err
70 | 	}
71 | 	if len(meta) > 0 {
72 | 		return &Term{Type: List, Value: name, Meta: meta}, nil
73 | 	}
74 | 	return &Term{Type: List, Value: name}, nil
75 | }
76 | 
77 | func (p *parser) any() (*Term, error) {
78 | 	meta, err := p.specialMeta()
79 | 	if err != nil {
80 | 		return nil, err
81 | 	}
82 | 	if err = p.eat(')'); err != nil {
83 | 		return nil, err
84 | 	}
85 | 	if len(meta) > 0 {
86 | 		return &Term{Value: "any", Type: Any, Meta: meta}, nil
87 | 	}
88 | 	return &Term{Value: "any", Type: Any}, nil
89 | }
90 | 


--------------------------------------------------------------------------------
/cfgrammar_terminal.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import "fmt"
 4 | 
 5 | func (p *parser) terminal() (flags, text string, err error) {
 6 | 	if err = p.eat('"'); err != nil {
 7 | 		return
 8 | 	}
 9 | 	p.ws()
10 | 	if p.peek() == '(' {
11 | 		p.eat('(')
12 | 		p.ws()
13 | 		if err = p.eat('?'); err != nil {
14 | 			return
15 | 		}
16 | 		p.ws()
17 | 		if flags, err = p.text(); err != nil {
18 | 			return
19 | 		}
20 | 		if err = p.eat(')'); err != nil {
21 | 			return
22 | 		}
23 | 	}
24 | 	if text, err = p.terminalText(); err != nil {
25 | 		return
26 | 	}
27 | 	err = p.eat('"')
28 | 	return
29 | }
30 | 
31 | func (p *parser) terminalText() (string, error) {
32 | 	var ret []rune
33 | 	var prev rune
34 | 	for {
35 | 		switch r := p.next(); {
36 | 		case r == '"' && prev != '\\':
37 | 			p.backup()
38 | 			return string(ret), nil
39 | 		case r == eof:
40 | 			return "", fmt.Errorf("%s : unterminated string", p.posInfo())
41 | 		case prev == '\\':
42 | 			switch r {
43 | 			case '\\':
44 | 				ret = append(ret, '\\')
45 | 			case 'n':
46 | 				ret = append(ret, '\n')
47 | 			case 't':
48 | 				ret = append(ret, '\t')
49 | 			case '"':
50 | 				ret = append(ret, '"')
51 | 			case '(':
52 | 				ret = append(ret, '(')
53 | 			default:
54 | 				return "", fmt.Errorf("%s : unexpected escape string", p.posInfo())
55 | 			}
56 | 			prev = 0
57 | 		case r == '\\':
58 | 			prev = r
59 | 		default:
60 | 			ret = append(ret, r)
61 | 			prev = r
62 | 		}
63 | 	}
64 | }
65 | 


--------------------------------------------------------------------------------
/cfgrammar_test.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	//"fmt"
 5 | 	"testing"
 6 | 
 7 | 	"zliu.org/goutil"
 8 | )
 9 | 
10 | var tests = []string{
11 | 	`<list>  =  "<" <items> ">"               ;
12 | 	<items> =  <items> " " <item> {     nf.math.sum($1,$3)} | <item>   ;
13 | 	<item>  =  "(?ilfw)f    \\uoo\n" | "bar\t" | "baz"|"好吧"         ;
14 | 	<name> = "\(" (any) ")" ;
15 | 	`,
16 | 	`<datetimes> = (list<datetime>);
17 | 	<datetime>="20181219"|"20181218";
18 | 	`,
19 | }
20 | 
21 | func TestLex(t *testing.T) {
22 | 	for _, c := range tests {
23 | 		g, err := GrammarFromString(c, "test")
24 | 		if err != nil {
25 | 			t.Error(err)
26 | 		}
27 | 		b, err := goutil.JSONMarshalIndent(g, "", "  ")
28 | 		if err != nil {
29 | 			t.Error(err)
30 | 		}
31 | 		t.Log(string(b))
32 | 	}
33 | }
34 | 


--------------------------------------------------------------------------------
/earley.go:
--------------------------------------------------------------------------------
  1 | package fmr
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 
  6 | 	"github.com/liuzl/ling"
  7 | )
  8 | 
  9 | // GammaRule is the name of the special "gamma" rule added by the algorithm
 10 | // (this is unicode for 'LATIN SMALL LETTER GAMMA')
 11 | const GammaRule = "\u0263" // "\u0194"
 12 | 
 13 | // DOT indicates the current position inside a TableState
 14 | const DOT = "\u2022" // "\u00B7"
 15 | 
 16 | // TableState uses Earley's dot notation: given a production X → αβ,
 17 | // the notation X → α • β represents a condition in which α has already
 18 | // been parsed and β is expected.
 19 | type TableState struct {
 20 | 	Term  *Term     `json:"term"`
 21 | 	Rb    *RuleBody `json:"rb,omitempty"`
 22 | 	Start int       `json:"start"`
 23 | 	End   int       `json:"end"`
 24 | 	Dot   int       `json:"dot"`
 25 | }
 26 | 
 27 | // TableColumn is the TableState set
 28 | type TableColumn struct {
 29 | 	token  *ling.Token
 30 | 	index  int
 31 | 	states []*TableState
 32 | }
 33 | 
 34 | // Parse stores a parse chart by grammars
 35 | type Parse struct {
 36 | 	grammars    []*Grammar
 37 | 	text        string
 38 | 	starts      []string
 39 | 	columns     []*TableColumn
 40 | 	finalStates []*TableState
 41 | }
 42 | 
 43 | // Equal func for TableState
 44 | func (s *TableState) Equal(ts *TableState) bool {
 45 | 	if s == nil && ts == nil {
 46 | 		return true
 47 | 	}
 48 | 	if s == nil || ts == nil {
 49 | 		if Debug {
 50 | 			fmt.Println("only one is nil:", s, ts)
 51 | 		}
 52 | 		return false
 53 | 	}
 54 | 	if s.Start != ts.Start || s.End != ts.End || s.Dot != ts.Dot ||
 55 | 		!s.Rb.Equal(ts.Rb) {
 56 | 		return false
 57 | 	}
 58 | 	return s.Term.Equal(ts.Term)
 59 | }
 60 | 
 61 | func (s *TableState) metaEmpty() bool {
 62 | 	if s.Term.Meta == nil {
 63 | 		return true
 64 | 	}
 65 | 	if m, ok := s.Term.Meta.(map[string]int); ok && len(m) == 0 {
 66 | 		return true
 67 | 	}
 68 | 	return false
 69 | }
 70 | 
 71 | func (s *TableState) isCompleted() bool {
 72 | 	switch s.Term.Type {
 73 | 	case Any, List:
 74 | 		if !s.metaEmpty() {
 75 | 			if meta, ok := s.Term.Meta.(map[string]int); ok {
 76 | 				if s.Dot >= meta["min"] && s.Dot <= meta["max"] {
 77 | 					return true
 78 | 				}
 79 | 			}
 80 | 		} else if s.Dot > 0 {
 81 | 			return true
 82 | 		}
 83 | 		return false
 84 | 	default:
 85 | 		return s.Dot >= len(s.Rb.Terms)
 86 | 	}
 87 | }
 88 | 
 89 | func (s *TableState) getNextTerm() *Term {
 90 | 	switch s.Term.Type {
 91 | 	case Any:
 92 | 		if !s.metaEmpty() {
 93 | 			if meta, ok := s.Term.Meta.(map[string]int); ok && s.Dot >= meta["max"] {
 94 | 				return nil
 95 | 			}
 96 | 		}
 97 | 		return s.Term
 98 | 	case List:
 99 | 		if !s.metaEmpty() {
100 | 			if meta, ok := s.Term.Meta.(map[string]int); ok && s.Dot >= meta["max"] {
101 | 				return nil
102 | 			}
103 | 		}
104 | 		return &Term{Value: s.Term.Value, Type: Nonterminal, Meta: s.Term.Meta}
105 | 	default:
106 | 		if s.isCompleted() {
107 | 			return nil
108 | 		}
109 | 		return s.Rb.Terms[s.Dot]
110 | 	}
111 | }
112 | 
113 | func (col *TableColumn) insert(state *TableState) *TableState {
114 | 	return col.insertToEnd(state, false)
115 | }
116 | 
117 | func (col *TableColumn) insertToEnd(state *TableState, end bool) *TableState {
118 | 	state.End = col.index
119 | 	if state.Term.Type == Any {
120 | 		state.Dot = state.End - state.Start
121 | 	}
122 | 	for i, s := range col.states {
123 | 		if s.Equal(state) {
124 | 			if end {
125 | 				col.states = append(col.states[:i], col.states[i+1:]...)
126 | 				col.states = append(col.states, s)
127 | 			}
128 | 			return s
129 | 		}
130 | 	}
131 | 	col.states = append(col.states, state)
132 | 	return col.states[len(col.states)-1]
133 | }
134 | 
135 | /*
136 |  * the Earley algorithm's core: add gamma rule, fill up table, and check if the
137 |  * gamma rule span from the first column to the last one. return the final gamma
138 |  * state, or null, if the parse failed.
139 |  */
140 | func (p *Parse) parse(maxFlag bool) []*TableState {
141 | 	if len(p.starts) == 0 {
142 | 		return nil
143 | 	}
144 | 	for _, start := range p.starts {
145 | 		rb := &RuleBody{
146 | 			[]*Term{{Value: start, Type: Nonterminal}},
147 | 			&FMR{"nf.I", []*Arg{{"index", 1}}},
148 | 		}
149 | 		begin := &TableState{&Term{GammaRule, Nonterminal, nil}, rb, 0, 0, 0}
150 | 		p.columns[0].states = append(p.columns[0].states, begin)
151 | 	}
152 | 	for i, col := range p.columns {
153 | 		if Debug {
154 | 			fmt.Printf("Column %d[%s]:", i, col.token)
155 | 		}
156 | 		for j := 0; j < len(col.states); j++ {
157 | 			st := col.states[j]
158 | 			if Debug {
159 | 				fmt.Printf("\n\tRow %d: %+v, len:%d\n", j, st, len(col.states))
160 | 			}
161 | 			if st.isCompleted() {
162 | 				p.complete(col, st)
163 | 			}
164 | 			term := st.getNextTerm()
165 | 			if term != nil {
166 | 				if st.Term.Type == Any {
167 | 					if i+1 < len(p.columns) {
168 | 						p.scan(p.columns[i+1], st, term)
169 | 					}
170 | 				} else {
171 | 					switch term.Type {
172 | 					case Nonterminal, Any, List:
173 | 						p.predict(col, term)
174 | 					case Terminal:
175 | 						if i+1 < len(p.columns) {
176 | 							p.scan(p.columns[i+1], st, term)
177 | 						}
178 | 					}
179 | 				}
180 | 			}
181 | 		}
182 | 		if Debug {
183 | 			fmt.Println()
184 | 		}
185 | 		//p.handleEpsilons(col)
186 | 	}
187 | 
188 | 	// find end state (return nil if not found)
189 | 	/*
190 | 		lastCol := p.columns[len(p.columns)-1]
191 | 		for _, state := range lastCol.states {
192 | 			if state.Name == GAMMA_RULE && state.isCompleted() {
193 | 				return state
194 | 			}
195 | 		}
196 | 	*/
197 | 	var ret []*TableState
198 | 	for i := len(p.columns) - 1; i >= 0; i-- {
199 | 		for _, state := range p.columns[i].states {
200 | 			if state.Term.Value == GammaRule && state.isCompleted() {
201 | 				ret = append(ret, state)
202 | 				if maxFlag {
203 | 					p.finalStates = ret
204 | 					return ret
205 | 				}
206 | 			}
207 | 		}
208 | 	}
209 | 	p.finalStates = ret
210 | 	return ret
211 | }
212 | 
213 | func (*Parse) scan(col *TableColumn, st *TableState, term *Term) {
214 | 	if term.Type == Any {
215 | 		newSt := &TableState{Term: &Term{"any", Any, term.Meta}, Rb: st.Rb,
216 | 			Dot: st.Dot + 1, Start: st.Start}
217 | 		col.insert(newSt)
218 | 		if Debug {
219 | 			fmt.Println("\tscan Any")
220 | 			fmt.Printf("\t\tinsert to next: %+v\n", newSt)
221 | 		}
222 | 		return
223 | 	}
224 | 	if terminalMatch(term, col.token) {
225 | 		newSt := &TableState{Term: st.Term, Rb: st.Rb,
226 | 			Dot: st.Dot + 1, Start: st.Start}
227 | 		col.insert(newSt)
228 | 		if Debug {
229 | 			fmt.Println("\tscan", term.Value, col.token)
230 | 			fmt.Printf("\t\tinsert to next: %+v\n", newSt)
231 | 		}
232 | 	}
233 | }
234 | 
235 | func predict(g *Grammar, col *TableColumn, term *Term) bool {
236 | 	r, has := g.Rules[term.Value]
237 | 	if !has {
238 | 		return false
239 | 	}
240 | 	changed := false
241 | 	for _, prod := range r.Body {
242 | 		//st := &TableState{Name: r.Name, Rb: prod, dot: 0, Start: col.index, termType: term.Type}
243 | 		st := &TableState{Term: &Term{Value: r.Name, Type: Nonterminal}, Rb: prod,
244 | 			Dot: 0, Start: col.index}
245 | 		st2 := col.insert(st)
246 | 		if Debug {
247 | 			fmt.Printf("\t\t%+v insert: %+v\n", term.Type, st)
248 | 		}
249 | 		changed = changed || (st == st2)
250 | 	}
251 | 	return changed
252 | }
253 | 
254 | func (p *Parse) predict(col *TableColumn, term *Term) bool {
255 | 	if Debug {
256 | 		fmt.Println("\tpredict", term.Type, term.Value)
257 | 	}
258 | 	switch term.Type {
259 | 	case Nonterminal:
260 | 		changed := false
261 | 		for _, g := range p.grammars {
262 | 			changed = predict(g, col, term) || changed
263 | 		}
264 | 		return changed
265 | 	case Any, List:
266 | 		st := &TableState{Term: term, Start: col.index}
267 | 		st2 := col.insert(st)
268 | 		if Debug {
269 | 			fmt.Printf("\t\tinsert: %+v\n", st)
270 | 		}
271 | 		return st == st2
272 | 	}
273 | 	return false
274 | }
275 | 
276 | // Earley complete. returns true if the table has been changed, false otherwise
277 | func (p *Parse) complete(col *TableColumn, state *TableState) bool {
278 | 	if Debug {
279 | 		fmt.Printf("\tcomplete: %+v\n", state)
280 | 	}
281 | 	changed := false
282 | 	for _, st := range p.columns[state.Start].states {
283 | 		next := st.getNextTerm()
284 | 		if next == nil {
285 | 			continue
286 | 		}
287 | 		if (next.Type == Any && state.Term.Type == Any) ||
288 | 			(next.Type == state.Term.Type && next.Value == state.Term.Value) {
289 | 			st1 := &TableState{Term: &Term{st.Term.Value, st.Term.Type, next.Meta},
290 | 				Rb: st.Rb, Dot: st.Dot + 1, Start: st.Start}
291 | 			//st2 := col.insertToEnd(st1, true)
292 | 			st2 := col.insertToEnd(st1, false)
293 | 			if Debug {
294 | 				fmt.Printf("\t\tinsert: %+v\n", st1)
295 | 			}
296 | 			changed = changed || (st1 == st2)
297 | 		}
298 | 	}
299 | 	return changed
300 | }
301 | 
302 | func (p *Parse) handleEpsilons(col *TableColumn) {
303 | 	changed := true
304 | 	for changed {
305 | 		changed = false
306 | 		for _, state := range col.states {
307 | 			if state.isCompleted() {
308 | 				changed = p.complete(col, state) || changed
309 | 			}
310 | 			term := state.getNextTerm()
311 | 			if term != nil && term.Type == Nonterminal {
312 | 				changed = p.predict(col, term) || changed
313 | 			}
314 | 		}
315 | 	}
316 | }
317 | 


--------------------------------------------------------------------------------
/earley_ast.go:
--------------------------------------------------------------------------------
  1 | package fmr
  2 | 
  3 | import "fmt"
  4 | 
  5 | // Debug flag
  6 | var Debug = false
  7 | 
  8 | // Node is the AST of tree structure
  9 | type Node struct {
 10 | 	Value    *TableState `json:"value"`
 11 | 	Children []*Node     `json:"children,omitempty"`
 12 | 
 13 | 	p *Parse
 14 | }
 15 | 
 16 | // GetFinalStates returns the final states of p
 17 | func (p *Parse) GetFinalStates() []*TableState {
 18 | 	return p.finalStates
 19 | }
 20 | 
 21 | // Boundary returns the start, end position in NL for finalState
 22 | func (p *Parse) Boundary(finalState *TableState) *Pos {
 23 | 	if finalState == nil {
 24 | 		return nil
 25 | 	}
 26 | 	start := p.columns[finalState.Start+1].token.StartByte
 27 | 	end := p.columns[finalState.End].token.EndByte
 28 | 	if end < start { //TODO
 29 | 		end = start
 30 | 	}
 31 | 	return &Pos{start, end}
 32 | }
 33 | 
 34 | // Tag returns the Nonterminal name of finalState
 35 | func (p *Parse) Tag(finalState *TableState) string {
 36 | 	if finalState == nil {
 37 | 		return ""
 38 | 	}
 39 | 	return finalState.Rb.Terms[0].Value
 40 | }
 41 | 
 42 | // GetTrees returns all possible parse results
 43 | func (p *Parse) GetTrees(finalState *TableState) []*Node {
 44 | 	if Debug {
 45 | 		fmt.Printf("chart:\n%+v\n", p)
 46 | 		fmt.Println("finalState:\n", finalState)
 47 | 	}
 48 | 	if finalState != nil {
 49 | 		return p.buildTrees(finalState)
 50 | 	}
 51 | 	return nil
 52 | }
 53 | 
 54 | func (p *Parse) buildTrees(state *TableState) []*Node {
 55 | 	if state.Term.Type == Any {
 56 | 		n := &TableState{state.Term, nil, state.Start, state.End, state.End}
 57 | 		cld := []*Node{{n, nil, p}}
 58 | 		return cld
 59 | 	}
 60 | 	if state.Term.Type == List {
 61 | 		state.Rb = &RuleBody{}
 62 | 		var args []*Arg
 63 | 		for i := 0; i < state.Dot; i++ {
 64 | 			state.Rb.Terms = append(state.Rb.Terms, &Term{state.Term.Value, Nonterminal, nil})
 65 | 			args = append(args, &Arg{"index", i + 1})
 66 | 		}
 67 | 		state.Rb.F = &FMR{"fmr.list", args}
 68 | 	}
 69 | 	return p.buildTreesHelper(
 70 | 		&[]*Node{}, state, len(state.Rb.Terms)-1, state.End)
 71 | }
 72 | 
 73 | /*
 74 |  * How it works: suppose we're trying to match [X -> Y Z W]. We go from finish
 75 |  * to start, e.g., first we'll try to match W in X.encCol. Let this matching
 76 |  * state be M1. Next we'll try to match Z in M1.startCol. Let this matching
 77 |  * state be M2. And finally, we'll try to match Y in M2.startCol, which must
 78 |  * also start at X.startCol. Let this matching state be M3.
 79 |  *
 80 |  * If we matched M1, M2 and M3, then we've found a parsing for X:
 81 |  * X->
 82 |  *    Y -> M3
 83 |  *    Z -> M2
 84 |  *    W -> M1
 85 |  */
 86 | func (p *Parse) buildTreesHelper(children *[]*Node, state *TableState,
 87 | 	termIndex, end int) []*Node {
 88 | 	// begin with the last --non-terminal-- of the ruleBody of finalState
 89 | 	if Debug {
 90 | 		fmt.Printf("debug: %+v termIndex:%d children:%+v, end:%d\n",
 91 | 			state, termIndex, children, end)
 92 | 	}
 93 | 	var outputs []*Node
 94 | 	var start = -1
 95 | 	if termIndex < 0 {
 96 | 		// this is the base-case for the recursion (we matched the entire rule)
 97 | 		outputs = append(outputs, &Node{state, *children, p})
 98 | 		return outputs
 99 | 	} else if termIndex == 0 {
100 | 		// if this is the first rule
101 | 		start = state.Start
102 | 	}
103 | 	term := state.Rb.Terms[termIndex]
104 | 
105 | 	if term.Type == Terminal {
106 | 		n := &TableState{term, nil,
107 | 			state.Start + termIndex, state.Start + termIndex + 1, 0}
108 | 		cld := []*Node{{n, nil, p}}
109 | 		cld = append(cld, *children...)
110 | 		for _, node := range p.buildTreesHelper(&cld, state, termIndex-1, end-1) {
111 | 			outputs = append(outputs, node)
112 | 		}
113 | 		return outputs
114 | 	}
115 | 
116 | 	if Debug {
117 | 		fmt.Println("\nend:", end, "term.value:", term.Value, state)
118 | 	}
119 | 	for _, st := range p.columns[end].states {
120 | 		if st == state {
121 | 			// this prevents an endless recursion: since the states are filled in
122 | 			// order of completion, we know that X cannot depend on state Y that
123 | 			// comes after it X in chronological order
124 | 			if Debug {
125 | 				fmt.Println("st==state", st, state)
126 | 				fmt.Println(p.columns[end])
127 | 			}
128 | 			break
129 | 		}
130 | 		if !st.isCompleted() || st.Term.Value != term.Value || st.Term.Type != term.Type {
131 | 			// this state is out of the question -- either not completed or does not
132 | 			// match the name
133 | 			continue
134 | 		}
135 | 		if start != -1 && st.Start != start {
136 | 			// if start isn't nil, this state must span from start to end
137 | 			continue
138 | 		}
139 | 		if Debug {
140 | 			fmt.Printf("\tY st:%+v, term:%+v\n", st, term)
141 | 		}
142 | 
143 | 		// okay, so `st` matches -- now we need to create a tree for every possible
144 | 		// sub-match
145 | 		for _, subTree := range p.buildTrees(st) {
146 | 			cld := []*Node{subTree}
147 | 			cld = append(cld, *children...)
148 | 			// now try all options
149 | 			for _, node := range p.buildTreesHelper(&cld, state, termIndex-1, st.Start) {
150 | 				outputs = append(outputs, node)
151 | 			}
152 | 		}
153 | 	}
154 | 	return outputs
155 | }
156 | 


--------------------------------------------------------------------------------
/earley_eval.go:
--------------------------------------------------------------------------------
  1 | package fmr
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math/big"
  6 | )
  7 | 
  8 | // Eval returns the denotation of Node n
  9 | func (n *Node) Eval() (interface{}, error) {
 10 | 	if n.Value.Rb == nil || n.Value.Rb.F == nil {
 11 | 		if n.p == nil {
 12 | 			return "", nil
 13 | 		}
 14 | 		return n.OriginalText(), nil
 15 | 	}
 16 | 	return n.fmrEval(n.Value.Rb.F, n.Children)
 17 | }
 18 | 
 19 | func (n *Node) fmrEval(f *FMR, children []*Node) (interface{}, error) {
 20 | 	if f == nil {
 21 | 		return "", nil
 22 | 	}
 23 | 	if f.Fn == "nf.I" {
 24 | 		if len(f.Args) != 1 {
 25 | 			return "", fmt.Errorf("the length of Args of nf.I should be one")
 26 | 		}
 27 | 		s, err := n.semEval(f.Args[0], children)
 28 | 		if err != nil {
 29 | 			return "", err
 30 | 		}
 31 | 		return s, nil
 32 | 	}
 33 | 
 34 | 	var args []interface{}
 35 | 	for _, arg := range f.Args {
 36 | 		s, err := n.semEval(arg, children)
 37 | 		if err != nil {
 38 | 			return "", err
 39 | 		}
 40 | 		args = append(args, s)
 41 | 	}
 42 | 	if Debug {
 43 | 		fmt.Printf("funcs.Call(%s, %+v)\n", f.Fn, args)
 44 | 	}
 45 | 	return Call(f.Fn, args...)
 46 | }
 47 | 
 48 | func (n *Node) semEval(arg *Arg, nodes []*Node) (interface{}, error) {
 49 | 	if arg == nil {
 50 | 		return "", fmt.Errorf("arg is nil")
 51 | 	}
 52 | 	switch arg.Type {
 53 | 	case "string":
 54 | 		if s, ok := arg.Value.(string); ok {
 55 | 			return s, nil
 56 | 		}
 57 | 		return "", fmt.Errorf("arg.Value: %+v is not string", arg.Value)
 58 | 	case "int":
 59 | 		if i, ok := arg.Value.(*big.Int); ok {
 60 | 			return i.String(), nil
 61 | 		}
 62 | 		return "", fmt.Errorf("arg.Value: %+v is not int", arg.Value)
 63 | 	case "float":
 64 | 		if f, ok := arg.Value.(*big.Float); ok {
 65 | 			return f.String(), nil
 66 | 		}
 67 | 		return "", fmt.Errorf("arg.Value: %+v is not float", arg.Value)
 68 | 	case "func":
 69 | 		if fmr, ok := arg.Value.(*FMR); ok {
 70 | 			return n.fmrEval(fmr, nodes)
 71 | 		}
 72 | 		return "", fmt.Errorf("arg.Value: %+v is not func", arg.Value)
 73 | 	case "index":
 74 | 		i, ok := arg.Value.(int)
 75 | 		if !ok {
 76 | 			return "", fmt.Errorf("arg.Value: %+v is not index", arg.Value)
 77 | 		}
 78 | 		if i < 0 || i > len(nodes) {
 79 | 			return "", fmt.Errorf("i=%d not in range [0, %d]", i, len(nodes))
 80 | 		}
 81 | 		if i == 0 {
 82 | 			return n.NL(), nil
 83 | 		}
 84 | 		s, err := nodes[i-1].Eval()
 85 | 		if err != nil {
 86 | 			return "", err
 87 | 		}
 88 | 		return s, nil
 89 | 	case "context":
 90 | 		subnodes := []map[string]interface{}{}
 91 | 		for _, node := range nodes {
 92 | 			ni, err := node.Eval()
 93 | 			if err != nil {
 94 | 				ni = node.OriginalText()
 95 | 			}
 96 | 			subnodes = append(subnodes, map[string]interface{}{node.Term().Value: ni})
 97 | 		}
 98 | 		ret := map[string]interface{}{
 99 | 			"text":  n.OriginalText(),
100 | 			"pos":   n.Pos(),
101 | 			"nodes": subnodes,
102 | 		}
103 | 		if n.Term().Type != Terminal {
104 | 			ret["type"] = n.Term().Value
105 | 		}
106 | 		return ret, nil
107 | 	default:
108 | 		return "", fmt.Errorf("arg.Type: %s invalid", arg.Type)
109 | 	}
110 | }
111 | 


--------------------------------------------------------------------------------
/earley_fmr.go:
--------------------------------------------------------------------------------
  1 | package fmr
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"math/big"
  7 | 	"strconv"
  8 | 	"strings"
  9 | )
 10 | 
 11 | // Semantic returns the stringified FMR of Node n
 12 | func (n *Node) Semantic() (string, error) {
 13 | 	nl := strconv.Quote(n.NL())
 14 | 	if n.Value.Rb == nil || n.Value.Rb.F == nil {
 15 | 		if n.p == nil {
 16 | 			return "", nil
 17 | 		}
 18 | 		// by default, returns nf.I($0)
 19 | 		return nl, nil
 20 | 	}
 21 | 	return n.fmrStr(n.Value.Rb.F, n.Children, nl)
 22 | }
 23 | 
 24 | func (n *Node) fmrStr(f *FMR, children []*Node, nl string) (string, error) {
 25 | 	if f == nil {
 26 | 		return "", nil
 27 | 	}
 28 | 	if f.Fn == "nf.I" {
 29 | 		if len(f.Args) != 1 {
 30 | 			return "", fmt.Errorf("the length of Args of nf.I should be one")
 31 | 		}
 32 | 		s, err := n.semStr(f.Args[0], children, nl)
 33 | 		if err != nil {
 34 | 			return "", err
 35 | 		}
 36 | 		return s, nil
 37 | 	}
 38 | 
 39 | 	var args []string
 40 | 	for _, arg := range f.Args {
 41 | 		s, err := n.semStr(arg, children, nl)
 42 | 		if err != nil {
 43 | 			return "", err
 44 | 		}
 45 | 		args = append(args, s)
 46 | 	}
 47 | 	return fmt.Sprintf("%s(%s)", f.Fn, strings.Join(args, ", ")), nil
 48 | }
 49 | 
 50 | func (n *Node) semStr(arg *Arg, nodes []*Node, nl string) (string, error) {
 51 | 	if arg == nil {
 52 | 		return "", fmt.Errorf("arg is nil")
 53 | 	}
 54 | 	switch arg.Type {
 55 | 	case "string":
 56 | 		if s, ok := arg.Value.(string); ok {
 57 | 			return strconv.Quote(s), nil
 58 | 		}
 59 | 		return "", fmt.Errorf("arg.Value: %+v is not string", arg.Value)
 60 | 	case "int":
 61 | 		if i, ok := arg.Value.(*big.Int); ok {
 62 | 			return i.String(), nil
 63 | 		}
 64 | 		return "", fmt.Errorf("arg.Value: %+v is not int", arg.Value)
 65 | 	case "float":
 66 | 		if f, ok := arg.Value.(*big.Float); ok {
 67 | 			return f.String(), nil
 68 | 		}
 69 | 		return "", fmt.Errorf("arg.Value: %+v is not float", arg.Value)
 70 | 	case "func":
 71 | 		if fmr, ok := arg.Value.(*FMR); ok {
 72 | 			return n.fmrStr(fmr, nodes, nl)
 73 | 		}
 74 | 		return "", fmt.Errorf("arg.Value: %+v is not func", arg.Value)
 75 | 	case "index":
 76 | 		i, ok := arg.Value.(int)
 77 | 		if !ok {
 78 | 			return "", fmt.Errorf("arg.Value: %+v is not index", arg.Value)
 79 | 		}
 80 | 		if i < 0 || i > len(nodes) {
 81 | 			return "", fmt.Errorf("i=%d not in range [0, %d]", i, len(nodes))
 82 | 		}
 83 | 		if i == 0 {
 84 | 			return nl, nil
 85 | 		}
 86 | 		if nodes[i-1] == nil {
 87 | 			return "null", nil
 88 | 		}
 89 | 		s, err := nodes[i-1].Semantic()
 90 | 		if err != nil {
 91 | 			return "", err
 92 | 		}
 93 | 		return s, nil
 94 | 	case "context":
 95 | 		subnodes := []map[string]interface{}{}
 96 | 		for _, node := range nodes {
 97 | 			ni, err := node.Eval()
 98 | 			if err != nil {
 99 | 				ni = node.OriginalText()
100 | 			}
101 | 			subnodes = append(subnodes, map[string]interface{}{node.Term().Value: ni})
102 | 		}
103 | 		ret := map[string]interface{}{
104 | 			"text":  n.OriginalText(),
105 | 			"pos":   n.Pos(),
106 | 			"nodes": subnodes,
107 | 		}
108 | 		if n.Term().Type != Terminal {
109 | 			ret["type"] = n.Term().Value
110 | 		}
111 | 		s, _ := json.Marshal(ret)
112 | 		return string(s), nil
113 | 	default:
114 | 		return "", fmt.Errorf("arg.Type: %s invalid", arg.Type)
115 | 	}
116 | }
117 | 


--------------------------------------------------------------------------------
/earley_nodeprint.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"github.com/xlab/treeprint"
 7 | )
 8 | 
 9 | // TreePrint to out
10 | func (n *Node) TreePrint() {
11 | 	tree := treeprint.New()
12 | 	tree.SetValue(n.Value)
13 | 	for _, child := range n.Children {
14 | 		tree.AddNode(child.Value)
15 | 	}
16 | 	fmt.Println(tree.String())
17 | }
18 | 


--------------------------------------------------------------------------------
/earley_stringer.go:
--------------------------------------------------------------------------------
  1 | package fmr
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io"
  6 | 	"math/big"
  7 | 	"strconv"
  8 | 	"strings"
  9 | )
 10 | 
 11 | func (ts *TableState) String() string {
 12 | 	s := ""
 13 | 	switch ts.Term.Type {
 14 | 	case Nonterminal:
 15 | 		if ts.Rb != nil {
 16 | 			for i, term := range ts.Rb.Terms {
 17 | 				if i == ts.Dot {
 18 | 					s += DOT + " "
 19 | 				}
 20 | 				switch term.Type {
 21 | 				case Nonterminal:
 22 | 					s += "<" + term.Value + "> "
 23 | 				case Terminal:
 24 | 					s += strconv.Quote(term.Value) + " "
 25 | 				case Any:
 26 | 					s += "(any) "
 27 | 				case List:
 28 | 					s += "(list<" + term.Value + ">) "
 29 | 				}
 30 | 			}
 31 | 			if ts.Dot == len(ts.Rb.Terms) {
 32 | 				s += DOT
 33 | 			}
 34 | 			return fmt.Sprintf("<%s> -> %s [%d-%d] {%s}",
 35 | 				ts.Term.Value, s, ts.Start, ts.End, ts.Rb.F)
 36 | 		}
 37 | 	case Any:
 38 | 		for i := ts.Start; i < ts.End; i++ {
 39 | 			s += "# "
 40 | 		}
 41 | 		s += DOT + " * "
 42 | 		return fmt.Sprintf("(any) -> %s [%d-%d]", s, ts.Start, ts.End)
 43 | 	case List:
 44 | 		f := "fmr.list("
 45 | 		for i := 0; i < ts.Dot; i++ {
 46 | 			s += "<" + ts.Term.Value + "> "
 47 | 			f += fmt.Sprintf("$%d", i+1)
 48 | 			if i != ts.Dot-1 {
 49 | 				f += ","
 50 | 			}
 51 | 		}
 52 | 		f += ")"
 53 | 		s += DOT + " * "
 54 | 		return fmt.Sprintf("(list<%s>) -> %s [%d-%d] {%s}", ts.Term.Value, s, ts.Start, ts.End, f)
 55 | 	}
 56 | 	return fmt.Sprintf("%s [%d-%d]", strconv.Quote(ts.Term.Value), ts.Start, ts.End)
 57 | }
 58 | 
 59 | func (tc *TableColumn) String() (out string) {
 60 | 	if tc.index == 0 {
 61 | 		out = "[0] ''\n"
 62 | 	} else {
 63 | 		out = fmt.Sprintf("[%d] '%s' position:[%d-%d]\n",
 64 | 			tc.index, tc.token, tc.token.StartByte, tc.token.EndByte)
 65 | 	}
 66 | 	out += "=======================================\n"
 67 | 	for _, s := range tc.states {
 68 | 		out += s.String() + "\n"
 69 | 	}
 70 | 	return out
 71 | }
 72 | 
 73 | func (p *Parse) String() string {
 74 | 	out := ""
 75 | 	for _, c := range p.columns {
 76 | 		out += c.String() + "\n"
 77 | 	}
 78 | 	return out
 79 | }
 80 | 
 81 | // Print this tree to out
 82 | func (n *Node) Print(out io.Writer) {
 83 | 	n.printLevel(out, 0)
 84 | }
 85 | 
 86 | func (n *Node) printLevel(out io.Writer, level int) {
 87 | 	indentation := ""
 88 | 	for i := 0; i < level; i++ {
 89 | 		indentation += "  "
 90 | 	}
 91 | 	fmt.Fprintf(out, "%s%v\n", indentation, n.Value)
 92 | 	for _, child := range n.Children {
 93 | 		child.printLevel(out, level+1)
 94 | 	}
 95 | }
 96 | 
 97 | func (n *Node) String() string {
 98 | 	if len(n.Children) > 0 {
 99 | 		return fmt.Sprintf("%+v %+v", n.Value, n.Children)
100 | 	}
101 | 	return fmt.Sprintf("%+v", n.Value)
102 | }
103 | 
104 | func (f *FMR) String() string {
105 | 	if f == nil {
106 | 		return "nf.I($0)"
107 | 	}
108 | 	var args []string
109 | 	invalid := "invalid_fmr"
110 | 	for _, arg := range f.Args {
111 | 		switch arg.Type {
112 | 		case "string":
113 | 			if s, ok := arg.Value.(string); ok {
114 | 				args = append(args, strconv.Quote(s))
115 | 			} else {
116 | 				return invalid
117 | 			}
118 | 		case "int":
119 | 			if i, ok := arg.Value.(*big.Int); ok {
120 | 				args = append(args, i.String())
121 | 			} else {
122 | 				return invalid
123 | 			}
124 | 		case "float":
125 | 			if f, ok := arg.Value.(*big.Float); ok {
126 | 				args = append(args, f.String())
127 | 			} else {
128 | 				return invalid
129 | 			}
130 | 		case "func":
131 | 			if fmr, ok := arg.Value.(*FMR); ok {
132 | 				args = append(args, fmr.String())
133 | 			} else {
134 | 				return invalid
135 | 			}
136 | 		case "index":
137 | 			if i, ok := arg.Value.(int); ok {
138 | 				args = append(args, fmt.Sprintf("$%d", i))
139 | 			} else {
140 | 				return invalid
141 | 			}
142 | 		default:
143 | 			return invalid
144 | 		}
145 | 	}
146 | 	return fmt.Sprintf("%s(%s)", f.Fn, strings.Join(args, ","))
147 | }
148 | 


--------------------------------------------------------------------------------
/earley_terminal_match.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"strings"
 5 | 
 6 | 	"github.com/liuzl/ling"
 7 | )
 8 | 
 9 | func terminalMatch(term *Term, token *ling.Token) bool {
10 | 	if term == nil || token == nil || term.Type != Terminal {
11 | 		return false
12 | 	}
13 | 	t := gTokens.get(term.Value)
14 | 	if term.Meta == nil || t == nil {
15 | 		if term.Value == token.Text {
16 | 			return true
17 | 		}
18 | 	} else {
19 | 		flags, _ := term.Meta.(string)
20 | 		switch {
21 | 		case strings.Contains(flags, "l"):
22 | 			if t.Annotations[ling.Lemma] == token.Annotations[ling.Lemma] {
23 | 				return true
24 | 			}
25 | 		case strings.Contains(flags, "i"):
26 | 			if strings.ToLower(t.Annotations[ling.Norm]) ==
27 | 				strings.ToLower(token.Annotations[ling.Norm]) {
28 | 				return true
29 | 			}
30 | 		}
31 | 	}
32 | 	return false
33 | }
34 | 


--------------------------------------------------------------------------------
/earley_test.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"testing"
 6 | 
 7 | 	"zliu.org/goutil"
 8 | )
 9 | 
10 | func TestEarleyParse(t *testing.T) {
11 | 	var grammar = `<expr> = "a" | "a" "+" <expr> {nf.math.sum($1, $3)};`
12 | 	//grammar = `<expr> = "a";`
13 | 	strs := []string{
14 | 		"a",
15 | 		"a + a",
16 | 		//"a + a + a",
17 | 		//"a + a + a + a",
18 | 		//"a + a + a + a + a",
19 | 		//"a + a + a + a + a + a",
20 | 		//"a + a + a + a + a + a + a",
21 | 		"+ a",
22 | 	}
23 | 	g, err := GrammarFromString(grammar, "a")
24 | 	if err != nil {
25 | 		t.Error(err)
26 | 	}
27 | 	_, err = goutil.JSONMarshalIndent(g, "", " ")
28 | 	if err != nil {
29 | 		t.Error(err)
30 | 	}
31 | 	//fmt.Println(string(b))
32 | 	for _, text := range strs {
33 | 		p, err := g.EarleyParse(text, "expr")
34 | 		if err != nil {
35 | 			t.Error(err)
36 | 		}
37 | 		t.Logf("%+v\n", p)
38 | 		for _, finalState := range p.finalStates {
39 | 			trees := p.GetTrees(finalState)
40 | 			t.Log("tree number:", len(trees))
41 | 			for _, tree := range trees {
42 | 				var buf bytes.Buffer
43 | 				tree.Print(&buf)
44 | 				t.Log(buf.String())
45 | 				tree.TreePrint()
46 | 				b, err := goutil.JSONMarshalIndent(tree, "", " ")
47 | 				if err != nil {
48 | 					t.Error(err)
49 | 				}
50 | 				t.Logf("%+v", string(b))
51 | 			}
52 | 		}
53 | 	}
54 | }
55 | 


--------------------------------------------------------------------------------
/examples/arithmetic/arithmetic.grammar:
--------------------------------------------------------------------------------
 1 | <number> = "one"    {nf.I(1)}
 2 |          | "two"    {nf.I(2)}
 3 |          | "three"  {nf.I(3)}
 4 |          | "four"   {nf.I(4)}
 5 |          | "five"   {nf.I(5)}
 6 |          | "six"    {nf.I(6)}
 7 |          | "seven"  {nf.I(7)}
 8 |          | "eight"  {nf.I(8)}
 9 |          | "nine"   {nf.I(9)}
10 |          | "ten"    {nf.I(10)}
11 |          | "一"     {nf.I(1)}
12 |          | "二"     {nf.I(2)}
13 |          | "三"     {nf.I(3)}
14 |          | "四"     {nf.I(4)}
15 |          | "五"     {nf.I(5)}
16 |          | "六"     {nf.I(6)}
17 |          | "七"     {nf.I(7)}
18 |          | "八"     {nf.I(8)}
19 |          | "九"     {nf.I(9)}
20 |          | "十"     {nf.I(10)}
21 |          | <number> "minus" <number>        {nf.math.sub($1, $3)}
22 |          | <number> "减" <number>           {nf.math.sub($1, $3)}
23 |          | <number> "plus" <number>         {nf.math.sum($1, $3)}
24 |          | <number> "add" <number>          {nf.math.sum($1, $3)}
25 |          | <number> "加" <number>           {nf.math.sum($1, $3)}
26 |          | <number> "times" <number>        {nf.math.mul($1, $3)}
27 |          | <number> "multiply by" <number>  {nf.math.mul($1, $3)}
28 |          | <number> "乘" <number>           {nf.math.mul($1, $3)}
29 |          | "minus" <number>                 {nf.math.neg($2)}
30 |          | "负" <number>                    {nf.math.neg($2)};
31 | 
32 | 


--------------------------------------------------------------------------------
/examples/arithmetic/input.txt:
--------------------------------------------------------------------------------
 1 | minus three minus two
 2 | two times two plus three
 3 | one add two multiply by two plus three
 4 | 二加五减三
 5 | 我的二加五减三
 6 | three plus three minus two
 7 | minus four
 8 | 四加七等于几？八减二等于几？
 9 | 十九八七六五四三二一
10 | 


--------------------------------------------------------------------------------
/examples/arithmetic/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"bufio"
 5 | 	"flag"
 6 | 	"fmt"
 7 | 	"io"
 8 | 	"io/ioutil"
 9 | 	"os"
10 | 
11 | 	"github.com/golang/glog"
12 | 	"github.com/liuzl/fmr"
13 | 	"github.com/robertkrimen/otto"
14 | )
15 | 
16 | var (
17 | 	grammar = flag.String("g", "arithmetic.grammar", "grammar file")
18 | 	js      = flag.String("js", "math.js", "javascript file")
19 | 	input   = flag.String("i", "", "file of original text to read")
20 | )
21 | 
22 | func main() {
23 | 	flag.Parse()
24 | 	//bnf.Debug = true
25 | 	g, err := fmr.GrammarFromFile(*grammar)
26 | 	if err != nil {
27 | 		glog.Fatal(err)
28 | 	}
29 | 
30 | 	script, err := ioutil.ReadFile(*js)
31 | 	if err != nil {
32 | 		glog.Fatal(err)
33 | 	}
34 | 	vm := otto.New()
35 | 	if _, err = vm.Run(script); err != nil {
36 | 		glog.Fatal(err)
37 | 	}
38 | 
39 | 	var in *os.File
40 | 	if *input == "" {
41 | 		in = os.Stdin
42 | 	} else {
43 | 		in, err = os.Open(*input)
44 | 		if err != nil {
45 | 			glog.Fatal(err)
46 | 		}
47 | 		defer in.Close()
48 | 	}
49 | 	br := bufio.NewReader(in)
50 | 
51 | 	for {
52 | 		line, c := br.ReadString('\n')
53 | 		if c == io.EOF {
54 | 			break
55 | 		}
56 | 		if c != nil {
57 | 			glog.Fatal(c)
58 | 		}
59 | 		fmt.Println(line)
60 | 		//p, err := g.EarleyParse("number", line)
61 | 		ps, err := g.EarleyParseMaxAll(line, "number")
62 | 		if err != nil {
63 | 			glog.Fatal(err)
64 | 		}
65 | 		for i, p := range ps {
66 | 			for _, f := range p.GetFinalStates() {
67 | 				trees := p.GetTrees(f)
68 | 				//fmt.Printf("%+v\n", p)
69 | 				fmt.Printf("p%d tree number:%d\n", i, len(trees))
70 | 				for _, tree := range trees {
71 | 					//tree.Print(os.Stdout)
72 | 					sem, err := tree.Semantic()
73 | 					if err != nil {
74 | 						glog.Fatal(err)
75 | 					}
76 | 					result, err := vm.Run(sem)
77 | 					if err != nil {
78 | 						glog.Fatal(err)
79 | 					}
80 | 					fmt.Printf("%s = %v\n", sem, result)
81 | 				}
82 | 			}
83 | 		}
84 | 		fmt.Println()
85 | 	}
86 | }
87 | 


--------------------------------------------------------------------------------
/examples/arithmetic/math.js:
--------------------------------------------------------------------------------
 1 | var nf = nf || {};
 2 | nf.math = {};
 3 | 
 4 | nf.math.sum = function(x, y) {
 5 |     //console.log(typeof(x), typeof(y));
 6 |     //console.log(x, y);
 7 |     return x + y;
 8 | };
 9 | nf.math.sub = function(x, y) {
10 |     return x - y;
11 | }
12 | nf.math.mul = function(x, y) {
13 |     return x * y;
14 | }
15 | nf.math.neg = function(x) {
16 |     return -x;
17 | }
18 | 
19 | //console.log(nf.math.sum(1,1));
20 | 
21 | 


--------------------------------------------------------------------------------
/examples/builtin/any.grammar:
--------------------------------------------------------------------------------
 1 | <company> = <location> (any) "有限公司"                {nf.company($1,$2,$3)}
 2 |           | <location> (any) <area> "有限公司"         {nf.company($1,$2,$4,$3)}
 3 |           | (any) "(" <location> ")" "有限公司"        {nf.company($3,$1,$5)}
 4 |           | (any) <area> "(" <location> ")" "有限公司" {nf.company($4,$1,$6,$2)}
 5 |           ;
 6 | <location> = "北京" {nf.I("beijing")}
 7 |            | "天津" {nf.I("tianjin")}
 8 |            | "上海" {nf.I("shanghai")}
 9 |            ;
10 | 
11 | <area> = "科技"
12 |        | "网络技术"
13 |        | "计算机技术"
14 |        ;
15 | 


--------------------------------------------------------------------------------
/examples/builtin/any.txt:
--------------------------------------------------------------------------------
 1 | 搜索行业的北京搜狗有限公司是一家很有意思的公司
 2 | 天津机器智能有限公司，天津有这家公司吗？
 3 | 北京tencent sogou baidu有限公司
 4 | 北京Jörgensen and Art有限公司
 5 | 北京сайт Московского有限公司
 6 | 北京チャンネル有限公司
 7 | 北京대통령의有限公司
 8 | 北京ทันทุกเหตุการ有限公司
 9 | 腾讯科技（北京）有限公司
10 | 北京小熊博望科技有限公司
11 | 百度在线网络技术（北京）有限公司
12 | 携程计算机技术(上海)有限公司
13 | 天津海量计算机技术有限公司
14 | 


--------------------------------------------------------------------------------
/examples/builtin/builtin.grammar:
--------------------------------------------------------------------------------
  1 | <cardinal_unit> = "zero"  {nf.I(0)} | "o" {nf.I(0)}
  2 |                 | "one"   {nf.I(1)} | "a" {nf.I(1)} | "an" {nf.I(1)}
  3 |                 | "two"   {nf.I(2)}
  4 |                 | "three" {nf.I(3)}
  5 |                 | "four"  {nf.I(4)}
  6 |                 | "five"  {nf.I(5)}
  7 |                 | "six"   {nf.I(6)}
  8 |                 | "seven" {nf.I(7)}
  9 |                 | "eight" {nf.I(8)}
 10 |                 | "nine"  {nf.I(9)}
 11 |                 ;
 12 | 
 13 | <cardinal_ten> = "ten"       {nf.I(10)}
 14 |                | "eleven"    {nf.I(11)}
 15 |                | "twelve"    {nf.I(12)}
 16 |                | "thirteen"  {nf.I(13)}
 17 |                | "fourteen"  {nf.I(14)}
 18 |                | "fifteen"   {nf.I(15)}
 19 |                | "sixteen"   {nf.I(16)}
 20 |                | "seventeen" {nf.I(17)}
 21 |                | "eighteen"  {nf.I(18)}
 22 |                | "nineteen"  {nf.I(19)}
 23 |                ;
 24 |         
 25 | <ordinal_unit> = "first"   {nf.I(1)}
 26 |                | "second"  {nf.I(2)}
 27 |                | "third"   {nf.I(3)}
 28 |                | "fourth"  {nf.I(4)}
 29 |                | "fifth"   {nf.I(5)}
 30 |                | "sixth"   {nf.I(6)}
 31 |                | "seventh" {nf.I(7)}
 32 |                | "eighth"  {nf.I(8)}
 33 |                | "ninth"   {nf.I(9)}
 34 |                ;
 35 | 
 36 | <ordinal_ten>  = "tenth"       {nf.I(10)}
 37 |                | "eleventh"    {nf.I(11)}
 38 |                | "twelfth"     {nf.I(12)}
 39 |                | "thirteenth"  {nf.I(13)}
 40 |                | "fourteenth"  {nf.I(14)}
 41 |                | "fifteenth"   {nf.I(15)}
 42 |                | "sixteenth"   {nf.I(16)}
 43 |                | "seventeenth" {nf.I(17)}
 44 |                | "eighteenth"  {nf.I(18)}
 45 |                | "nineteenth"  {nf.I(19)}
 46 |                ;
 47 | 
 48 | <cardinal_tens> = "twenty"  {nf.I(20)}
 49 |                 | "thirty"  {nf.I(30)}
 50 |                 | "forty"   {nf.I(40)}
 51 |                 | "fifty"   {nf.I(50)}
 52 |                 | "sixty"   {nf.I(60)}
 53 |                 | "seventy" {nf.I(70)}
 54 |                 | "eighty"  {nf.I(80)}
 55 |                 | "ninety"  {nf.I(90)}
 56 |                 ;
 57 |                
 58 | <ordinal_tens> = "twentieth"  {nf.I(20)}
 59 |                | "thirtieth"  {nf.I(30)}
 60 |                | "fortieth"   {nf.I(40)}
 61 |                | "fiftieth"   {nf.I(50)}
 62 |                | "sixtieth"   {nf.I(60)}
 63 |                | "seventieth" {nf.I(70)}
 64 |                | "eightieth"  {nf.I(80)}
 65 |                | "ninetieth"  {nf.I(90)}
 66 |                ;
 67 | 
 68 | <magnitude> = "hundred"     {nf.I(100)}
 69 |             | "thousand"    {nf.I(1000)}
 70 |             | "million"     {nf.I(1000000)}
 71 |             | "billion"     {nf.I(1000000000)}
 72 |             | "trillion"    {nf.I(1000000000000)}
 73 |             | "quadrillion" {nf.I(1000000000000000)}
 74 |             | "quintillion" {nf.I(1000000000000000000)}
 75 |             | "sextillion"  {nf.I(1000000000000000000000)}
 76 |             | "septillion"  {nf.I(1000000000000000000000000)}
 77 |             | "octillion"   {nf.I(1000000000000000000000000000)}
 78 |             | "nonillion"   {nf.I(1000000000000000000000000000000)}
 79 |             | "decillion"   {nf.I(1000000000000000000000000000000000)}
 80 |             ;
 81 | 
 82 | <base_number> = <cardinal_unit>  {nf.I($1)}
 83 |               | <cardinal_ten>   {nf.I($1)}
 84 |               | <cardinal_tens>  {nf.I($1)}
 85 |               | <cardinal_tens> <cardinal_unit> {nf.math.sum($1, $2)}
 86 |               | <cardinal_tens> "and" <cardinal_unit> {nf.math.sum($1, $3)}
 87 |               | <cardinal_tens> "-" <cardinal_unit> {nf.math.sum($1, $3)}
 88 |               | <cardinal_tens> "," <cardinal_unit> {nf.math.sum($1, $3)}
 89 |               ;
 90 | 
 91 | <magnitude_number> = <base_number> <magnitude>      {nf.math.mul($1, $2)}
 92 |                    | <base_number> "-" <magnitude>  {nf.math.mul($1, $3)}
 93 |                    | <magnitude_number> <magnitude> {nf.math.mul($1, $2)}
 94 |                    ;
 95 | 
 96 | <section> = <magnitude_number> {nf.I($1)}
 97 |           | <base_number> {nf.I($1)}
 98 |           ;
 99 | 
100 | <number> = <section> {nf.I($1)}
101 |          | <section> <number> {nf.math.sum($1, $2)}
102 |          | <section> "and" <number> {nf.math.sum($1, $3)}
103 |          ;
104 | 


--------------------------------------------------------------------------------
/examples/builtin/cn_input.txt:
--------------------------------------------------------------------------------
 1 | 二千零一十四
 2 | 十万
 3 | 一万五千
 4 | 十万二千
 5 | 十万八千六百零四
 6 | 二亿二千零二万二千二百二十二
 7 | 二十二亿零二万二千二百
 8 | 二十二亿二万二千二百
 9 | 二百五十一
10 | 


--------------------------------------------------------------------------------
/examples/builtin/cn_num.grammar:
--------------------------------------------------------------------------------
 1 | <cn_num_unit> = "一" {nf.I(1)}
 2 |               | "二" {nf.I(2)}
 3 |               | "三" {nf.I(3)}
 4 |               | "四" {nf.I(4)}
 5 |               | "五" {nf.I(5)}
 6 |               | "六" {nf.I(6)}
 7 |               | "七" {nf.I(7)}
 8 |               | "八" {nf.I(8)}
 9 |               | "九" {nf.I(9)}
10 |               ;
11 | 
12 | <cn_zero> = "零" {nf.I(0)}
13 |           | "〇" {nf.I(0)}
14 |           ;
15 | 
16 | <cn_num_10> = "十"        {nf.I(10)};
17 | <cn_num_100> = "百"       {nf.I(100)};
18 | <cn_num_1000> = "千"      {nf.I(1000)};
19 | 
20 | <cn_num_10000> = "万"     {nf.I(10000)};
21 | <cn_num_100000000> = "亿" {nf.I(100000000)};
22 | 
23 | <cn_num_10s> = <cn_num_10>                             {nf.I($1)}
24 |              | <cn_num_unit> <cn_num_10>               {nf.math.mul($1, $2)}
25 |              | <cn_num_unit> <cn_num_10> <cn_num_unit> {nf.math.sum(nf.math.mul($1, $2), $3)}
26 |              | <cn_num_unit>                           {nf.I($1)}
27 |              ;
28 | 
29 | <cn_num_100s> = <cn_num_unit> <cn_num_100>                         {nf.math.mul($1, $2)}
30 |               | <cn_num_unit> <cn_num_100> <cn_num_10s>            {nf.math.sum(nf.math.mul($1, $2), $3)}
31 |               | <cn_num_unit> <cn_num_100> <cn_zero> <cn_num_unit> {nf.math.sum(nf.math.mul($1, $2), $4)}
32 |               | <cn_num_10s>                                       {nf.I($1)}
33 |               ;
34 | 
35 | <cn_num_1000s> = <cn_num_unit> <cn_num_1000>                         {nf.math.mul($1, $2)}
36 |                | <cn_num_unit> <cn_num_1000> <cn_num_100s>           {nf.math.sum(nf.math.mul($1, $2), $3)}
37 |                | <cn_num_unit> <cn_num_1000> <cn_zero> <cn_num_unit> {nf.math.sum(nf.math.mul($1, $2), $4)}
38 |                | <cn_num_unit> <cn_num_1000> <cn_zero> <cn_num_10s>  {nf.math.sum(nf.math.mul($1, $2), $4)}
39 |                | <cn_num_100s>                                       {nf.I($1)}
40 |                ;
41 | 
42 | <cn_num_10000s> = <cn_num_1000s> <cn_num_10000>                {nf.math.mul($1, $2)}
43 |                 | <cn_num_1000s> <cn_num_10000> <cn_num_1000s> {nf.math.sum(nf.math.mul($1, $2), $3)}
44 |                 | <cn_num_1000s>                               {nf.I($1)}
45 |                 ;
46 | 
47 | <cn_num_yi> = <cn_num_10000s> <cn_num_100000000>                           {nf.math.mul($1, $2)}
48 |             | <cn_num_10000s> <cn_num_100000000> <cn_num_10000s>           {nf.math.sum(nf.math.mul($1, $2), $3)}
49 |             | <cn_num_10000s> <cn_num_100000000> <cn_zero> <cn_num_10000s> {nf.math.sum(nf.math.mul($1, $2), $4)}
50 |             | <cn_num_10000s>                                              {nf.I($1)}
51 |             ;
52 | 
53 | <number> = <cn_num_yi> {nf.I($1)};
54 | 
55 | 


--------------------------------------------------------------------------------
/examples/builtin/company.js:
--------------------------------------------------------------------------------
1 | var nf = nf || {};
2 | 
3 | nf.company = function(loc, name, type, area) {
4 |     return {"loc":loc, "name":name, "type":type, "area":area}
5 | }
6 | 
7 | //console.log(nf.math.sum(1,1));
8 | 
9 | 


--------------------------------------------------------------------------------
/examples/builtin/input.txt:
--------------------------------------------------------------------------------
1 | six million five thousand
2 | twenty thousand five hundred and sixty nine
3 | six-million five-thousand and two
4 | six-million
5 | five-thousand
6 | two
7 | 


--------------------------------------------------------------------------------
/examples/builtin/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"io/ioutil"
  9 | 	"os"
 10 | 	"strings"
 11 | 
 12 | 	"github.com/golang/glog"
 13 | 	"github.com/liuzl/fmr"
 14 | 	"github.com/robertkrimen/otto"
 15 | 	"zliu.org/goutil"
 16 | )
 17 | 
 18 | var (
 19 | 	grammar = flag.String("g", "builtin.grammar", "grammar file")
 20 | 	js      = flag.String("js", "math.js", "javascript file")
 21 | 	input   = flag.String("i", "", "file of original text to read")
 22 | 	debug   = flag.Bool("debug", false, "debug mode")
 23 | 	start   = flag.String("start", "number", "start rule")
 24 | )
 25 | 
 26 | func main() {
 27 | 	defer func() {
 28 | 		if err := recover(); err != nil {
 29 | 			fmt.Println(err)
 30 | 		}
 31 | 	}()
 32 | 	flag.Parse()
 33 | 	if *debug {
 34 | 		fmr.Debug = true
 35 | 	}
 36 | 	g, err := fmr.GrammarFromFile(*grammar)
 37 | 	if err != nil {
 38 | 		glog.Fatal(err)
 39 | 	}
 40 | 	if *debug {
 41 | 		b, err := goutil.JSONMarshalIndent(g, "", "    ")
 42 | 		if err != nil {
 43 | 			glog.Fatal(err)
 44 | 		}
 45 | 		fmt.Printf("%s\n", string(b))
 46 | 	}
 47 | 	script, err := ioutil.ReadFile(*js)
 48 | 	if err != nil {
 49 | 		glog.Fatal(err)
 50 | 	}
 51 | 	vm := otto.New()
 52 | 	if _, err = vm.Run(script); err != nil {
 53 | 		glog.Fatal(err)
 54 | 	}
 55 | 
 56 | 	var in *os.File
 57 | 	if *input == "" {
 58 | 		in = os.Stdin
 59 | 	} else {
 60 | 		in, err = os.Open(*input)
 61 | 		if err != nil {
 62 | 			glog.Fatal(err)
 63 | 		}
 64 | 		defer in.Close()
 65 | 	}
 66 | 	br := bufio.NewReader(in)
 67 | 
 68 | 	for {
 69 | 		line, c := br.ReadString('\n')
 70 | 		if c == io.EOF {
 71 | 			break
 72 | 		}
 73 | 		if c != nil {
 74 | 			glog.Fatal(c)
 75 | 		}
 76 | 		line = strings.TrimSpace(line)
 77 | 		fmt.Println(line)
 78 | 
 79 | 		trees, err := g.ExtractMaxAll(line, *start)
 80 | 		if err != nil {
 81 | 			glog.Fatal(err)
 82 | 		}
 83 | 		for _, tree := range trees {
 84 | 			//tree.Print(os.Stdout)
 85 | 			sem, err := tree.Semantic()
 86 | 			//fmt.Println(sem)
 87 | 			if err != nil {
 88 | 				glog.Fatal(err)
 89 | 			}
 90 | 			if *debug {
 91 | 				fmt.Printf("%s = ?\n", sem)
 92 | 			}
 93 | 			result, err := vm.Run(sem)
 94 | 			if err != nil {
 95 | 				glog.Error(err)
 96 | 			}
 97 | 			rs, _ := result.Export()
 98 | 			fmt.Printf("%s = %+v\n", sem, rs)
 99 | 			//eval, err := tree.Eval()
100 | 			//fmt.Printf("Eval: %s, Err: %+v\n", eval, err)
101 | 		}
102 | 		fmt.Println()
103 | 	}
104 | }
105 | 


--------------------------------------------------------------------------------
/examples/builtin/math.js:
--------------------------------------------------------------------------------
 1 | var nf = nf || {};
 2 | nf.math = {};
 3 | 
 4 | nf.math.sum = function(x, y) {
 5 |     //console.log(typeof(x), typeof(y));
 6 |     //console.log(x, y);
 7 |     return x + y;
 8 | };
 9 | nf.math.sub = function(x, y) {
10 |     return x - y;
11 | }
12 | nf.math.mul = function(x, y) {
13 |     return x * y;
14 | }
15 | nf.math.neg = function(x) {
16 |     return -x;
17 | }
18 | 
19 | //console.log(nf.math.sum(1,1));
20 | 
21 | 


--------------------------------------------------------------------------------
/examples/builtin/tianjin.txt:
--------------------------------------------------------------------------------
1 | 天津机器智能有限公司，天津有这家公司吗？
2 | 


--------------------------------------------------------------------------------
/examples/math/README.md:
--------------------------------------------------------------------------------
1 | ```sh
2 | go build
3 | ./math -eval -i input.txt
4 | ```
5 | 


--------------------------------------------------------------------------------
/examples/math/grammars/latex.math.grammar:
--------------------------------------------------------------------------------
 1 | <Exp> = <AddExp> {nf.I($1)};
 2 | 
 3 | <AddExp> = <AddExp> "+" <MulExp> {nf.math.sum($1,$3)}
 4 |          | <AddExp> "-" <MulExp> {nf.math.sub($1,$3)}
 5 |          | <MulExp> {nf.I($1)}
 6 |          ;
 7 | 
 8 | <MulExp> = <MulExp> "*" <ExpExp> {nf.math.mul($1,$3)}
 9 |          | <MulExp> "×" <ExpExp> {nf.math.mul($1,$3)}
10 |          | <MulExp> "\\times" <ExpExp> {nf.math.mul($1,$3)}
11 |          | <MulExp> <ExpExp>     {nf.math.mul($1,$2)}
12 |          | <MulExp> "/" <ExpExp> {nf.math.div($1,$3)}
13 |          | <MulExp> "÷" <ExpExp> {nf.math.div($1,$3)}
14 |          | "\\frac" "{" <Exp> "}" "{" <Exp> "}" {nf.math.div($3,$6)}
15 |          | <ExpExp> {nf.I($1)}
16 |          ;
17 | 
18 | <ExpExp> = <PriExp> "^" <ExpExp>  {nf.math.pow($1,$3)}
19 |          | <PriExp> "**" <ExpExp> {nf.math.pow($1,$3)}
20 |          | "\\sqrt" "{" <Exp> "}" {nf.math.pow($3,0.5)}
21 |          | <PriExp> {nf.I($1)}
22 |          ;
23 | 
24 | <SignExp> = "+" <PriExp> {nf.I($2)}
25 |           ;
26 | 
27 | <PriExp> = "\(" <Exp> ")" {nf.I($2)}
28 |          | "{" <Exp> "}" {nf.I($2)}
29 |          | "|" <Exp> "|" {nf.math.abs($2)}
30 |          | <numeric> {nf.math.to_number($1)}
31 |          | <letters> {nf.math.expression($1)}
32 |          ;
33 | 
34 | <VerbExp> = <Exp> "=" <Exp> {vf.be.eq($1,$3)}
35 |           | <Exp> "\\neq" <Exp> {vf.be.neq($1,$3)}
36 |           | <Exp> "≠" <Exp> {vf.be.neq($1,$3)}
37 |           | <Exp> ">" <Exp> {vf.be.gt($1,$3)}
38 |           | <Exp> "≥" <Exp> {vf.be.geq($1,$3)}
39 |           | <Exp> "\\geq" <Exp> {vf.be.geq($1,$3)}
40 |           | <Exp> "<" <Exp> {vf.be.lt($1,$3)}
41 |           | <Exp> "≤" <Exp> {vf.be.leq($1,$3)}
42 |           | <Exp> "\\leq" <Exp> {vf.be.leq($1,$3)}
43 |           | <Exp> "\\ " <letters> <Exp> {nf.math.op($3,$1,$4)}
44 |           ;
45 | 
46 | <Func> = "f"|"g";
47 | 
48 | <Exp> = <Func> "\(" <Exp> ")" {nf.math.func($1,$3)};
49 | 
50 | 
51 | <start> = <VerbExp> {nf.I($1)}
52 |         | <Exp> {nf.I($1)}
53 |         ;
54 | 


--------------------------------------------------------------------------------
/examples/math/grammars/math.en.grammar:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Author: Zhanliang Liu
 3 |  */
 4 | 
 5 | #include "number.en.grammar"
 6 | 
 7 | // for local grammars that come from regex tagger
 8 | <number> = <numeric> {nf.math.to_number($1)};
 9 | 
10 | <special_number> = "pair"    {nf.I(2)}
11 |                  | "couple"  {nf.I(2)}
12 |                  | "dozen"   {nf.I(12)}
13 |                  | "percent" {nf.I(0.01)}
14 |                  ;
15 | <number> = <special_number>          {nf.I($1)}
16 |          | <number> <special_number> {nf.math.mul($1,$2)}
17 |          ;
18 | 
19 | <number> = <number> "plus" <number>         {nf.math.sum($1,$3)}
20 |          | <number> "added to" <number>     {nf.math.sum($3, $1)}
21 |          | "sum of" <number> "and" <number> {nf.math.sum($2,$4)}
22 |          | "the sum of" <number> "and" <number> {nf.math.sum($2,$4)}
23 |          ;
24 | 
25 | <number> = <number> "minus" <number>           {nf.math.sub($1,$3)}
26 |          | "subtract" <number> "from" <number> {nf.math.sub($4,$2)}
27 |          | "deduct" <number> "from" <number>   {nf.math.sub($4,$2)}
28 |          | <number> "subtracted from" <number> {nf.math.sub($3,$1)}
29 |          ;
30 | 
31 | <number> = <number> "divides" <number>    {nf.math.div($3,$1)}
32 |          | <number> "divided by" <number> {nf.math.div($1,$3)}
33 |          | <number> "over" <number>       {nf.math.div($1,$3)}
34 |          ;
35 | 
36 | <number> = <number> "times" <number>            {nf.math.mul($1,$3)}
37 |          | <number> "multiply by" <number>      {nf.math.mul($1,$3)}
38 |          | "multiply" <number> "by" <number>    {nf.math.mul($2,$4)}
39 |          | "product of" <number> "and" <number> {nf.math.mul($2,$4)}
40 |          | "half of" <number>                   {nf.math.mul($2,0.5)}
41 |          ;
42 | 
43 | <number> = <number> "raised to the power of" <number>    {nf.math.pow($1,$3)}
44 |          | <number> "raised to the exponent of" <number> {nf.math.pow($1,$3)}
45 |          | "square of" <number>      {nf.math.pow($2,2)}
46 |          | "cube of" <number>        {nf.math.pow($2,3)}
47 |          | "square root of" <number> {nf.math.pow($2,0.5)}
48 |          | "cubic root of" <number>  {nf.math.pow($2,nf.math.div(1,3))}
49 |          ;
50 | 
51 | <equ> = <number> "be equal to" <number> {vf.be.equ($1,$2)}
52 |       | <number> "equal" <number>       {vf.be.equ($1,$2)}
53 |       | <number> "be" <number>          {vf.be.equ($1,$2)}
54 |       ;
55 | 
56 | 


--------------------------------------------------------------------------------
/examples/math/grammars/math.grammar:
--------------------------------------------------------------------------------
 1 | #include "math.en.grammar"
 2 | #include "math.zh.grammar"
 3 | 
 4 | <Exp> = <AddExp> {nf.I($1)};
 5 | 
 6 | <AddExp> = <AddExp> "+" <MulExp> {nf.math.sum($1,$3)}
 7 |          | <AddExp> "-" <MulExp> {nf.math.sub($1,$3)}
 8 |          | <MulExp> {nf.I($1)}
 9 |          ;
10 | 
11 | <MulExp> = <MulExp> "*" <ExpExp> {nf.math.mul($1,$3)}
12 |          | <MulExp> "×" <ExpExp> {nf.math.mul($1,$3)}
13 |          | <MulExp> "/" <ExpExp> {nf.math.div($1,$3)}
14 |          | <MulExp> "÷" <ExpExp> {nf.math.div($1,$3)}
15 |          | <ExpExp> {nf.I($1)}
16 |          ;
17 | 
18 | <ExpExp> = <PriExp> "^" <ExpExp>  {nf.math.pow($1,$3)}
19 |          | <PriExp> "**" <ExpExp> {nf.math.pow($1,$3)}
20 |          | <PriExp> {nf.I($1)}
21 |          ;
22 | 
23 | <PriExp> = "\(" <Exp> ")" {nf.I($2)}
24 |          | "+" <PriExp> {nf.I($2)}
25 |          | "-" <PriExp> {nf.math.neg($2)}
26 |          | <number> {nf.I($1)}
27 |          | <letters> {nf.math.expression($1)}
28 |          ;
29 | 
30 | 


--------------------------------------------------------------------------------
/examples/math/grammars/math.zh.grammar:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Author: Zhanliang Liu
 3 |  */
 4 | 
 5 | #include "number.zh.grammar"
 6 | 
 7 | // for local grammars that come from regex tagger
 8 | <number> = <numeric> {nf.math.to_number($1)};
 9 | 
10 | <number> = <number> "打" {nf.math.mul($1, 12)};
11 | 
12 | <number> = <number> "加" <number>        {nf.math.sum($1,$3)}
13 |          | <number> "加上" <number>      {nf.math.sum($1,$3)}
14 |          | <number> "与" <number> "的和" {nf.math.sum($1,$3)}
15 |          ;
16 | 
17 | <number> = <number> "减" <number>        {nf.math.sub($1,$3)}
18 |          | <number> "减去" <number>      {nf.math.sub($1,$3)}
19 |          | <number> "与" <number> "的差" {nf.math.sub($1,$3)}
20 |          ;
21 | 
22 | <fraction> = <number> "分之" <number> {nf.math.div($3,$1)}
23 |            ;
24 | 
25 | <number> = <number> "乘" <number>        {nf.math.mul($1,$3)}
26 |          | <number> "乘以" <number>      {nf.math.mul($1,$3)}
27 |          | <number> "与" <number> "的积" {nf.math.mul($1,$3)}
28 |          | <number> "的" <number> "倍"   {nf.math.mul($1,$3)}
29 |          | <number> "的一半"             {nf.math.mul($1,0.5)}
30 |          | <number> "的" <fraction>      {nf.math.mul($1,$3)}
31 |          ;
32 | 
33 | <number> = <number> "除" <number>   {nf.math.div($3,$1)}
34 |          | <number> "除以" <number> {nf.math.div($1,$3)}
35 |          | <fraction>               {nf.I($1)}
36 |          ;
37 | 
38 | <number> = <number> "的" <number> "次方" {nf.math.pow($1,$3)}
39 |          | <number> "的" <number> "次幂" {nf.math.pow($1,$3)}
40 |          | <number> "的平方"   {nf.math.pow($1,2)}
41 |          | <number> "的立方"   {nf.math.pow($1,3)}
42 |          | <number> "的平方根" {nf.math.pow($1,0.5)}
43 |          | <number> "的立方根" {nf.math.pow($1,nf.math.div(1,3))}
44 |          ;
45 | 
46 | 


--------------------------------------------------------------------------------
/examples/math/grammars/number.en.grammar:
--------------------------------------------------------------------------------
 1 | <unit> = "zero"  {nf.I(0)}
 2 |        | "one"   {nf.I(1)}
 3 |        | "two"   {nf.I(2)}
 4 |        | "three" {nf.I(3)}
 5 |        | "four"  {nf.I(4)}
 6 |        | "five"  {nf.I(5)}
 7 |        | "six"   {nf.I(6)}
 8 |        | "seven" {nf.I(7)}
 9 |        | "eight" {nf.I(8)}
10 |        | "nine"  {nf.I(9)}
11 |        ;
12 | 
13 | <numbers> = <unit>           {nf.I($1)}
14 |           | <unit> <numbers> {nf.util.concat($1,$2)}
15 |           ;
16 | 
17 | <ten> = "ten"       {nf.I(10)}
18 |       | "eleven"    {nf.I(11)}
19 |       | "twelve"    {nf.I(12)}
20 |       | "thirteen"  {nf.I(13)}
21 |       | "fourteen"  {nf.I(14)}
22 |       | "fifteen"   {nf.I(15)}
23 |       | "sixteen"   {nf.I(16)}
24 |       | "seventeen" {nf.I(17)}
25 |       | "eighteen"  {nf.I(18)}
26 |       | "nineteen"  {nf.I(19)}
27 |       ;
28 | 
29 | <tens> = "twenty"  {nf.I(20)}
30 |        | "thirty"  {nf.I(30)}
31 |        | "forty"   {nf.I(40)}
32 |        | "fifty"   {nf.I(50)}
33 |        | "sixty"   {nf.I(60)}
34 |        | "seventy" {nf.I(70)}
35 |        | "eighty"  {nf.I(80)}
36 |        | "ninety"  {nf.I(90)}
37 |        ;
38 | 
39 | <e1> = <unit> {nf.I($1)}
40 |      | <ten>  {nf.I($1)}
41 |      | <tens> {nf.I($1)}
42 |      | <tens> <unit>     {nf.math.sum($1,$2)}
43 |      | <tens> "-" <unit> {nf.math.sum($1,$3)}
44 |      ;
45 | 
46 | <e2>  = "hundred"  {nf.I(100)};
47 | <e3>  = "thousand" {nf.I(1000)};
48 | <e6>  = "million"  {nf.I(1000000)};
49 | <e9>  = "billion"  {nf.I(1000000000)};
50 | <e12> = "trillion" {nf.I(1000000000000)};
51 | 
52 | <e2s> = <e1> {nf.I($1)}
53 |       | <e2> {nf.I($1)}
54 |       | <e1> <e2> {nf.math.mul($1,$2)}
55 |       | <e1> <e2> <e1>       {nf.math.sum(nf.math.mul($1,$2),$3)}
56 |       | <e1> <e2> "and" <e1> {nf.math.sum(nf.math.mul($1,$2),$4)}
57 |       ;
58 | 
59 | <e3s> = <e2s> {nf.I($1)}
60 |       | <e3>  {nf.I($1)}
61 |       | <e2s> <e3> {nf.math.mul($1,$2)}
62 |       | <e2s> <e3> <e2s> {nf.math.sum(nf.math.mul($1,$2),$3)}
63 |       | <e2s> <e3> "and" <e2s> {nf.math.sum(nf.math.mul($1,$2),$4)}
64 |       ;
65 | 
66 | <e6s> = <e3s> {nf.I($1)}
67 |       | <e6>  {nf.I($1)}
68 |       | <e3s> <e6> {nf.math.mul($1,$2)}
69 |       | <e3s> <e6> <e3s> {nf.math.sum(nf.math.mul($1,$2),$3)}
70 |       | <e3s> <e6> "and" <e3s> {nf.math.sum(nf.math.mul($1,$2),$4)}
71 |       ;
72 | 
73 | <e9s> = <e6s> {nf.I($1)}
74 |       | <e9>  {nf.I($1)}
75 |       | <e6s> <e9> {nf.math.mul($1,$2)}
76 |       | <e6s> <e9> <e6s> {nf.math.sum(nf.math.mul($1,$2),$3)}
77 |       | <e6s> <e9> "and" <e6s> {nf.math.sum(nf.math.mul($1,$2),$4)}
78 |       ;
79 | 
80 | <e12s> = <e9s>  {nf.I($1)}
81 |        | <e12>  {nf.I($1)}
82 |        | <e9s> <e12> {nf.math.mul($1,$2)}
83 |        | <e9s> <e12> <e9s> {nf.math.sum(nf.math.mul($1,$2),$3)}
84 |        | <e9s> <e12> "and" <e9s> {nf.math.sum(nf.math.mul($1,$2),$4)}
85 |        ;
86 | 
87 | <en_decimal> = "point" <numbers> {nf.math.decimal($2)};
88 | 
89 | <en_number> = <e12s> {nf.I($1)};
90 | 
91 | <number> = <en_number> {nf.I($1)}
92 |          | <en_decimal> {nf.I($1)}
93 |          | <en_number> <en_decimal> {nf.math.sum($1,$2)}
94 |          | <en_number> "and" <en_decimal> {nf.math.sum($1,$3)}
95 |          ;
96 | 
97 | //<number> = <number> <e3> {nf.math.mul($1,$2)};
98 | //<number> = <number> <e6> {nf.math.mul($1,$2)};
99 | 


--------------------------------------------------------------------------------
/examples/math/grammars/number.zh.grammar:
--------------------------------------------------------------------------------
  1 | <cn_unit> = "一" {nf.I(1)}
  2 |           | "二" {nf.I(2)}
  3 |           | "三" {nf.I(3)}
  4 |           | "四" {nf.I(4)}
  5 |           | "五" {nf.I(5)}
  6 |           | "六" {nf.I(6)}
  7 |           | "七" {nf.I(7)}
  8 |           | "八" {nf.I(8)}
  9 |           | "九" {nf.I(9)}
 10 |           | "壹" {nf.I(1)}
 11 |           | "贰" {nf.I(2)}
 12 |           | "叁" {nf.I(3)}
 13 |           | "肆" {nf.I(4)}
 14 |           | "伍" {nf.I(5)}
 15 |           | "陆" {nf.I(6)}
 16 |           | "柒" {nf.I(7)}
 17 |           | "捌" {nf.I(8)}
 18 |           | "玖" {nf.I(9)}
 19 |           | "两" {nf.I(2)}
 20 |           | "俩" {nf.I(2)}
 21 |           | "仨" {nf.I(3)}
 22 |           ;
 23 | 
 24 | <cn_zero> = "零" {nf.I(0)}
 25 |           | "〇" {nf.I(0)}
 26 |           ;
 27 | 
 28 | <cn_digit> = <cn_unit> {nf.I($1)} | <cn_zero> {nf.I($1)} | <digits> {nf.I($1)};
 29 | 
 30 | <numbers> = <cn_digit> {nf.I($1)}
 31 |           | <cn_digit> <numbers> {nf.util.concat($1, $2)}
 32 |           ;
 33 | 
 34 | <cn_e1> = "十" {nf.I(10)}
 35 |         | "拾" {nf.I(10)}
 36 |         ;
 37 | 
 38 | <cn_e2> = "百" {nf.I(100)}
 39 |         | "佰" {nf.I(100)}
 40 |         ;
 41 | 
 42 | <cn_e3> = "千" {nf.I(1000)}
 43 |         | "仟" {nf.I(1000)}
 44 |         ;
 45 | 
 46 | <cn_e4> = "万" {nf.I(10000)};
 47 | 
 48 | <cn_e8> = "亿"   {nf.I(100000000)}
 49 |         | "万万" {nf.I(100000000)}
 50 |         ;
 51 | 
 52 | <cn_e1s> = <cn_e1>                     {nf.I($1)}
 53 |          | <cn_e1> <cn_unit>           {nf.math.sum($1,$2)}
 54 |          | <cn_unit> <cn_e1>           {nf.math.mul($1, $2)}
 55 |          | <cn_unit> <cn_e1> <cn_unit> {nf.math.sum(nf.math.mul($1, $2), $3)}
 56 |          | <cn_unit>                   {nf.I($1)}
 57 |          | <cn_zero>                   {nf.I($1)}
 58 |          ;
 59 | 
 60 | <cn_e2s> = <cn_unit> <cn_e2>                    {nf.math.mul($1, $2)}
 61 |          | <cn_unit> <cn_e2> <cn_unit>          {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(10,$3))}
 62 |          | <cn_unit> <cn_e2> <cn_e1s>           {nf.math.sum(nf.math.mul($1, $2), $3)}
 63 |          | <cn_unit> <cn_e2> <cn_zero> <cn_e1s> {nf.math.sum(nf.math.mul($1, $2), $4)}
 64 |          | <cn_e1s>                             {nf.I($1)}
 65 |          ;
 66 | 
 67 | <cn_e3s> = <cn_unit> <cn_e3>                    {nf.math.mul($1, $2)}
 68 |          | <cn_unit> <cn_e3> <cn_unit>          {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(100,$3))}
 69 |          | <cn_unit> <cn_e3> <cn_e2s>           {nf.math.sum(nf.math.mul($1, $2), $3)}
 70 |          | <cn_unit> <cn_e3> <cn_zero> <cn_e2s> {nf.math.sum(nf.math.mul($1, $2), $4)}
 71 |          | <cn_e2s>                             {nf.I($1)}
 72 |          ;
 73 | 
 74 | <cn_e4s> = <cn_e3s> <cn_e4>                    {nf.math.mul($1, $2)}
 75 |          | <cn_e3s> <cn_e4> <cn_unit>          {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(1000,$3))}
 76 |          | <cn_e3s> <cn_e4> <cn_e3s>           {nf.math.sum(nf.math.mul($1, $2), $3)}
 77 |          | <cn_e3s> <cn_e4> <cn_zero> <cn_e3s> {nf.math.sum(nf.math.mul($1, $2), $4)}
 78 |          | <cn_e3s>                            {nf.I($1)}
 79 |          ;
 80 | 
 81 | <cn_e8s> = <cn_e4s> <cn_e8>                    {nf.math.mul($1, $2)}
 82 |          | <cn_e4s> <cn_e8> <cn_unit>          {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(10000000,$3))}
 83 |          | <cn_e4s> <cn_e8> <cn_e4s>           {nf.math.sum(nf.math.mul($1, $2), $3)}
 84 |          | <cn_e4s> <cn_e8> <cn_zero> <cn_e4s> {nf.math.sum(nf.math.mul($1, $2), $4)}
 85 |          | <cn_e4s>                            {nf.I($1)}
 86 |          ;
 87 | 
 88 | <cn_decimal> = "点" <numbers> {nf.math.decimal($2)};
 89 | 
 90 | <number> = <cn_e8s>              {nf.I($1)}
 91 |          | <cn_decimal>          {nf.I($1)}
 92 |          | <cn_e8s> <cn_decimal> {nf.math.sum($1,$2)}
 93 |          ;
 94 | 
 95 | <number> = <cn_e2> {nf.I($1)}
 96 |          | <cn_e3> {nf.I($1)}
 97 |          | <cn_e4> {nf.I($1)}
 98 |          | <cn_e8> {nf.I($1)}
 99 |          ;
100 | 


--------------------------------------------------------------------------------
/examples/math/input.txt:
--------------------------------------------------------------------------------
 1 | six million five thousand plus twenty thousand five hundred and sixty nine
 2 | six million five thousand and two plus five
 3 | six million plus five thousand
 4 | two plus one
 5 | the sum   of 10 and 789
 6 | the sum   of 10 and 一百八十二
 7 | the sum of 十万八千 and 一百八十二
 8 | 四十二与八十八亿的和
 9 | 四十二与二的十次方的和
10 | one added to 四十raised to the power of 1.5
11 | 


--------------------------------------------------------------------------------
/examples/math/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"io/ioutil"
  9 | 	"os"
 10 | 	"runtime/pprof"
 11 | 	"strings"
 12 | 
 13 | 	"github.com/golang/glog"
 14 | 	"github.com/liuzl/fmr"
 15 | 	"github.com/robertkrimen/otto"
 16 | )
 17 | 
 18 | var (
 19 | 	grammar    = flag.String("g", "grammars/math.grammar", "grammar file")
 20 | 	js         = flag.String("js", "math.js", "javascript file")
 21 | 	input      = flag.String("i", "", "file of original text to read")
 22 | 	start      = flag.String("start", "number", "start rule")
 23 | 	eval       = flag.Bool("eval", false, "execute flag")
 24 | 	debug      = flag.Bool("debug", false, "debug mode")
 25 | 	cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
 26 | )
 27 | 
 28 | func main() {
 29 | 	flag.Parse()
 30 | 	if *debug {
 31 | 		fmr.Debug = true
 32 | 	}
 33 | 
 34 | 	if *cpuprofile != "" {
 35 | 		f, err := os.Create(*cpuprofile)
 36 | 		if err != nil {
 37 | 			glog.Fatal(err)
 38 | 		}
 39 | 		pprof.StartCPUProfile(f)
 40 | 		defer pprof.StopCPUProfile()
 41 | 	}
 42 | 	g, err := fmr.GrammarFromFile(*grammar)
 43 | 	if err != nil {
 44 | 		glog.Fatal(err)
 45 | 	}
 46 | 	script, err := ioutil.ReadFile(*js)
 47 | 	if err != nil {
 48 | 		glog.Fatal(err)
 49 | 	}
 50 | 	vm := otto.New()
 51 | 	if _, err = vm.Run(script); err != nil {
 52 | 		glog.Fatal(err)
 53 | 	}
 54 | 
 55 | 	var in *os.File
 56 | 	if *input == "" {
 57 | 		in = os.Stdin
 58 | 	} else {
 59 | 		in, err = os.Open(*input)
 60 | 		if err != nil {
 61 | 			glog.Fatal(err)
 62 | 		}
 63 | 		defer in.Close()
 64 | 	}
 65 | 	br := bufio.NewReader(in)
 66 | 
 67 | 	for {
 68 | 		line, c := br.ReadString('\n')
 69 | 		if c == io.EOF {
 70 | 			break
 71 | 		}
 72 | 		if c != nil {
 73 | 			glog.Fatal(c)
 74 | 		}
 75 | 		line = strings.TrimSpace(line)
 76 | 		fmt.Println(line)
 77 | 		if len(line) == 0 {
 78 | 			continue
 79 | 		}
 80 | 
 81 | 		ps, err := g.EarleyParseMaxAll(line, *start)
 82 | 		if err != nil {
 83 | 			glog.Fatal(err)
 84 | 		}
 85 | 		for i, p := range ps {
 86 | 			for _, f := range p.GetFinalStates() {
 87 | 				trees := p.GetTrees(f)
 88 | 				//fmt.Printf("%+v\n", p)
 89 | 				fmt.Printf("p%d tree number:%d\n", i, len(trees))
 90 | 				for _, tree := range trees {
 91 | 					//tree.Print(os.Stdout)
 92 | 					sem, err := tree.Semantic()
 93 | 					if err != nil {
 94 | 						glog.Fatal(err)
 95 | 					}
 96 | 
 97 | 					if !*eval {
 98 | 						fmt.Println(sem)
 99 | 					} else {
100 | 						result, err := vm.Run(sem)
101 | 						if err != nil {
102 | 							glog.Error(sem, err)
103 | 							continue
104 | 						}
105 | 						rs, _ := result.Export()
106 | 						fmt.Printf("%s => %+v\n", sem, rs)
107 | 					}
108 | 					//eval, err := tree.Eval()
109 | 					//fmt.Printf("Eval: %s, Err: %+v\n", eval, err)
110 | 				}
111 | 			}
112 | 		}
113 | 		fmt.Println()
114 | 	}
115 | }
116 | 


--------------------------------------------------------------------------------
/examples/math/math.js:
--------------------------------------------------------------------------------
 1 | var nf = nf || {};
 2 | nf.math = {};
 3 | nf.util = {};
 4 | 
 5 | nf.list = function(type, cnt) {
 6 |     //TODO
 7 | }
 8 | 
 9 | nf.it = function() {}
10 | nf.what = function() {}
11 | 
12 | nf.math.expression = function(s) {
13 |     return s.split("").join('*');
14 | }
15 | 
16 | nf.math.to_number = function(s) {
17 |     return Number(s);
18 | }
19 | 
20 | nf.math.decimal = function(s) {
21 |     s = s.toString();
22 |     var n = Number(s);
23 |     return n / Math.pow(10, s.length);
24 | }
25 | 
26 | nf.math.sum = function(x, y) {
27 |     return x + y;
28 | }
29 | 
30 | nf.math.sub = function(x, y) {
31 |     return x - y;
32 | }
33 | 
34 | nf.math.mul = function(x, y) {
35 |     return x * y;
36 | }
37 | 
38 | nf.math.div = function(x, y) {
39 |     return x / y;
40 | }
41 | 
42 | nf.math.neg = function(x) {
43 |     return -x;
44 | }
45 | 
46 | nf.math.pow = function(x, y) {
47 |     return Math.pow(x, y);
48 | }
49 | 
50 | nf.util.concat = function(x, y) {
51 |     return x.toString() + y.toString();
52 | }
53 | 


--------------------------------------------------------------------------------
/frame_api.go:
--------------------------------------------------------------------------------
  1 | package fmr
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | )
  6 | 
  7 | // FrameFMR parses NL text to FMR
  8 | func (g *Grammar) FrameFMR(text string) ([]string, error) {
  9 | 	return g.FrameFMRWithContext("", text)
 10 | }
 11 | 
 12 | // FrameFMRWithContext parses NL text to FMR
 13 | func (g *Grammar) FrameFMRWithContext(context, text string) ([]string, error) {
 14 | 	frames, err := g.MatchFramesWithContext(context, text)
 15 | 	if err != nil {
 16 | 		return nil, err
 17 | 	}
 18 | 	var ret []string
 19 | 	for k, v := range frames {
 20 | 		f := g.Frames[k.RuleName].Body[k.BodyID].F
 21 | 		terms := g.Frames[k.RuleName].Body[k.BodyID].Terms
 22 | 		var children []*Node
 23 | 		for _, term := range terms {
 24 | 			slots := v.Slots[term.Key()]
 25 | 			if slots == nil || len(slots) == 0 || len(slots[0].Trees) == 0 {
 26 | 				children = append(children, nil)
 27 | 				continue
 28 | 			}
 29 | 			children = append(children, slots[0].Trees[0])
 30 | 		}
 31 | 		n := &Node{}
 32 | 		str, err := n.fmrStr(f, children, "")
 33 | 		if err != nil {
 34 | 			return nil, err
 35 | 		}
 36 | 		ret = append(ret, str)
 37 | 	}
 38 | 	return ret, nil
 39 | }
 40 | 
 41 | // MatchFrames returns the matched frames for NL text
 42 | func (g *Grammar) MatchFrames(text string) (map[RbKey]*Frame, error) {
 43 | 	return g.MatchFramesWithContext("", text)
 44 | }
 45 | 
 46 | // MatchFramesWithContext returns the matched frames for NL text
 47 | func (g *Grammar) MatchFramesWithContext(
 48 | 	context, text string) (map[RbKey]*Frame, error) {
 49 | 	frames, starts, err := g.getCandidates(text)
 50 | 	if err != nil {
 51 | 		return nil, err
 52 | 	}
 53 | 	ps, err := g.EarleyParseAllWithContext(context, text, starts...)
 54 | 	if err != nil {
 55 | 		return nil, err
 56 | 	}
 57 | 	for _, p := range ps {
 58 | 		for _, finalState := range p.finalStates {
 59 | 			tag := p.Tag(finalState)
 60 | 			pos := p.Boundary(finalState)
 61 | 			trees := p.GetTrees(finalState)
 62 | 
 63 | 			if tag == "" || pos == nil {
 64 | 				return nil, fmt.Errorf("invalid parse")
 65 | 			}
 66 | 
 67 | 			slot := &Slot{*pos, trees}
 68 | 
 69 | 			ret := g.ruleIndex[tag]
 70 | 			if ret == nil {
 71 | 				continue
 72 | 			}
 73 | 			for rbKey := range ret.Frames {
 74 | 				if frames[rbKey] == nil {
 75 | 					frames[rbKey] = &Frame{make(map[uint64][]*Slot), false}
 76 | 				}
 77 | 				t := Term{Value: tag, Type: Nonterminal}
 78 | 				frames[rbKey].Slots[t.Key()] = append(frames[rbKey].Slots[t.Key()], slot)
 79 | 				if len(frames[rbKey].Slots) >=
 80 | 					len(g.Frames[rbKey.RuleName].Body[rbKey.BodyID].Terms) {
 81 | 					frames[rbKey].Complete = true
 82 | 				}
 83 | 			}
 84 | 		}
 85 | 	}
 86 | 	return frames, nil
 87 | }
 88 | 
 89 | func (g *Grammar) getCandidates(text string) (
 90 | 	map[RbKey]*Frame, []string, error) {
 91 | 
 92 | 	matches, err := g.trie.MultiMatch(text)
 93 | 	if err != nil {
 94 | 		return nil, nil, err
 95 | 	}
 96 | 	frames := map[RbKey]*Frame{}
 97 | 	rules := map[string]bool{}
 98 | 	for word, hits := range matches {
 99 | 		v := g.index[word]
100 | 		if v == nil {
101 | 			return nil, nil, fmt.Errorf("%s in trie but not in index", word)
102 | 		}
103 | 		for rbKey := range v.Frames {
104 | 			if frames[rbKey] == nil {
105 | 				frames[rbKey] = &Frame{make(map[uint64][]*Slot), false}
106 | 			}
107 | 			t := Term{Value: word, Type: Terminal}
108 | 			for _, hit := range hits {
109 | 				frames[rbKey].Slots[t.Key()] = append(frames[rbKey].Slots[t.Key()],
110 | 					&Slot{Pos{hit.StartByte, hit.EndByte}, nil})
111 | 			}
112 | 			if len(frames[rbKey].Slots) >=
113 | 				len(g.Frames[rbKey.RuleName].Body[rbKey.BodyID].Terms) {
114 | 				frames[rbKey].Complete = true
115 | 			}
116 | 		}
117 | 		for rbKey := range v.Rules {
118 | 			rules[rbKey.RuleName] = true
119 | 		}
120 | 	}
121 | 	var ruleList []string
122 | 	for k := range rules {
123 | 		ruleList = append(ruleList, k)
124 | 	}
125 | 	for {
126 | 		if len(ruleList) == 0 {
127 | 			break
128 | 		}
129 | 		r := ruleList[0]
130 | 		ruleList = ruleList[1:]
131 | 
132 | 		ret := g.ruleIndex[r]
133 | 		if ret == nil {
134 | 			continue
135 | 		}
136 | 		for rbKey := range ret.Rules {
137 | 			if !rules[rbKey.RuleName] {
138 | 				ruleList = append(ruleList, rbKey.RuleName)
139 | 				rules[rbKey.RuleName] = true
140 | 			}
141 | 		}
142 | 	}
143 | 	var starts []string
144 | 	for k := range rules {
145 | 		starts = append(starts, k)
146 | 	}
147 | 	return frames, starts, nil
148 | }
149 | 


--------------------------------------------------------------------------------
/frame_api_test.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestMatchFrames(t *testing.T) {
 8 | 	cases := []string{
 9 | 		`从北京飞上海`,
10 | 		`飞上海，从北京，后天`,
11 | 		`我要从北京走`,
12 | 	}
13 | 	g, err := GrammarFromFile("sf.grammar")
14 | 	if err != nil {
15 | 		t.Error(err)
16 | 	}
17 | 	for _, c := range cases {
18 | 		fmrs, err := g.FrameFMR(c)
19 | 		if err != nil {
20 | 			t.Error(err)
21 | 		}
22 | 		t.Log(c, fmrs)
23 | 	}
24 | }
25 | 
26 | // go test -test.run MatchFrames2 -v --ctx_tagger="http://127.0.0.1:5002/api"
27 | func TestMatchFrames2(t *testing.T) {
28 | 	cases := []string{
29 | 		`获得亚军次数降序排前5的都是哪些羽毛球运动员？`,
30 | 		`注册资本大于1亿的品牌中，哪5个品牌收入最少？并给出它们的法定代表人`,
31 | 	}
32 | 	g, err := GrammarFromFile("grammars/sql.grammar")
33 | 	if err != nil {
34 | 		t.Error(err)
35 | 	}
36 | 	for _, c := range cases {
37 | 		fmrs, err := g.FrameFMRWithContext("比赛", c)
38 | 		if err != nil {
39 | 			t.Error(err)
40 | 		}
41 | 		t.Log(c, fmrs)
42 | 	}
43 | }
44 | 


--------------------------------------------------------------------------------
/funcs.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"zliu.org/goutil"
 7 | )
 8 | 
 9 | var builtinFuncs = make(map[string]interface{})
10 | 
11 | func init() {
12 | 	builtinFuncs["fmr.list"] = fmrList
13 | 	builtinFuncs["fmr.entity"] = fmrEntity
14 | }
15 | 
16 | // Call funcs by name fn and args
17 | func Call(fn string, args ...interface{}) (interface{}, error) {
18 | 	ret, err := goutil.Call(builtinFuncs, fn, args...)
19 | 	if err != nil {
20 | 		return nil, err
21 | 	}
22 | 	if len(ret) == 0 {
23 | 		return nil, nil
24 | 	}
25 | 	return ret[0].Interface(), nil
26 | }
27 | 
28 | func fmrList(items ...interface{}) []interface{} {
29 | 	return items
30 | }
31 | 
32 | func fmrEntity(items ...interface{}) map[string]interface{} {
33 | 	l := len(items)
34 | 	if l == 0 {
35 | 		return nil
36 | 	}
37 | 	typ := fmt.Sprintf("%v", items[0])
38 | 	if typ == "" {
39 | 		return nil
40 | 	}
41 | 	if l == 1 {
42 | 		return map[string]interface{}{typ: nil}
43 | 	}
44 | 	return map[string]interface{}{typ: items[1:]}
45 | }
46 | 


--------------------------------------------------------------------------------
/funcs_test.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestListFunc(t *testing.T) {
 8 | 	t.Log(Call("fmr.list", "100000227", 78, "abc"))
 9 | }
10 | 
11 | func TestEntityFunc(t *testing.T) {
12 | 	t.Log(Call("fmr.entity", "PER", map[string]string{"name": "冯诺依曼"}))
13 | }
14 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/liuzl/fmr
 2 | 
 3 | go 1.19
 4 | 
 5 | require (
 6 | 	github.com/golang/glog v1.0.0
 7 | 	github.com/liuzl/dict v0.0.0-20180720023830-2cdb4749152d
 8 | 	github.com/liuzl/ling v0.0.0-20200509031100-522aef269e3c
 9 | 	github.com/liuzl/unidecode v0.0.0-20170420112940-fd1463e9cd5b
10 | 	github.com/mitchellh/hashstructure v1.1.0
11 | 	github.com/robertkrimen/otto v0.0.0-20221011175642-09fc211e5ab1
12 | 	github.com/xlab/treeprint v1.1.0
13 | 	zliu.org/goutil v0.0.0-20220902023633-6fcbc3a43c89
14 | )
15 | 
16 | require (
17 | 	crawler.club/dl v0.0.0-20200622075740-2bbb15a2cf94 // indirect
18 | 	github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394 // indirect
19 | 	github.com/cheggaaa/pb v1.0.29 // indirect
20 | 	github.com/eknkc/basex v1.0.1 // indirect
21 | 	github.com/golang/snappy v0.0.4 // indirect
22 | 	github.com/juju/errors v1.0.0 // indirect
23 | 	github.com/justinas/alice v1.2.0 // indirect
24 | 	github.com/liuzl/cedar-go v0.0.0-20170805034717-80a9c64b256d // indirect
25 | 	github.com/liuzl/d v0.0.0-20221017065133-150b00c98eb9 // indirect
26 | 	github.com/liuzl/da v0.0.0-20180704015230-14771aad5b1d // indirect
27 | 	github.com/liuzl/filestore v0.0.0-20200229104338-5ea723a6a528 // indirect
28 | 	github.com/liuzl/segment v0.0.0-20160915185041-762005e7a34f // indirect
29 | 	github.com/liuzl/store v0.0.0-20190530065605-e2dbcd3c77fc // indirect
30 | 	github.com/liuzl/tokenizer v0.0.0-20181128060327-56c1056833c1 // indirect
31 | 	github.com/mattn/go-colorable v0.1.13 // indirect
32 | 	github.com/mattn/go-isatty v0.0.16 // indirect
33 | 	github.com/mattn/go-runewidth v0.0.14 // indirect
34 | 	github.com/rivo/uniseg v0.4.2 // indirect
35 | 	github.com/rs/xid v1.4.0 // indirect
36 | 	github.com/rs/zerolog v1.28.0 // indirect
37 | 	github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
38 | 	github.com/syndtr/goleveldb v1.0.0 // indirect
39 | 	github.com/torden/go-strutil v0.1.7 // indirect
40 | 	golang.org/x/net v0.0.0-20220826154423-83b083e8dc8b // indirect
41 | 	golang.org/x/sys v0.0.0-20221013171732-95e765b1cc43 // indirect
42 | 	golang.org/x/text v0.3.7 // indirect
43 | 	gopkg.in/sourcemap.v1 v1.0.5 // indirect
44 | )
45 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
  1 | crawler.club/dl v0.0.0-20200622075740-2bbb15a2cf94 h1:8vuyB8j14xIrixp9tWSQ04D/m8fyXzCoim1cUhU3juY=
  2 | crawler.club/dl v0.0.0-20200622075740-2bbb15a2cf94/go.mod h1:UnhSaKtYQlEdkutiJ4UIwCNKxiFgs5/4WJgfIijiOUg=
  3 | github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d h1:ir/IFJU5xbja5UaBEQLjcvn7aAU01nqU/NUyOBEU+ew=
  4 | github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394 h1:OYA+5W64v3OgClL+IrOD63t4i/RW7RqrAVl9LTZ9UqQ=
  5 | github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394/go.mod h1:Q8n74mJTIgjX4RBBcHnJ05h//6/k6foqmgE45jTQtxg=
  6 | github.com/cheggaaa/pb v1.0.29 h1:FckUN5ngEk2LpvuG0fw1GEFx6LtyY2pWI/Z2QgCnEYo=
  7 | github.com/cheggaaa/pb v1.0.29/go.mod h1:W40334L7FMC5JKWldsTWbdGjLo0RxUKK73K+TuPxX30=
  8 | github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
  9 | github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
 10 | github.com/coreos/go-systemd/v22 v22.3.3-0.20220203105225-a9a7ef127534/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
 11 | github.com/crawlerclub/x v0.1.0 h1:XmEcdwprNZ6ltP9VTUJ7h2PJRETt4KKeN8euXER+gPU=
 12 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 13 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 14 | github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
 15 | github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
 16 | github.com/eknkc/basex v1.0.1 h1:TcyAkqh4oJXgV3WYyL4KEfCMk9W8oJCpmx1bo+jVgKY=
 17 | github.com/eknkc/basex v1.0.1/go.mod h1:k/F/exNEHFdbs3ZHuasoP2E7zeWwZblG84Y7Z59vQRo=
 18 | github.com/fatih/color v1.9.0 h1:8xPHl4/q1VyqGIPif1F+1V3Y3lSmrq01EabUW3CoW5s=
 19 | github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
 20 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
 21 | github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 22 | github.com/golang/glog v1.0.0 h1:nfP3RFugxnNRyKgeWd4oI1nYvXpxrx8ck8ZrcizshdQ=
 23 | github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4=
 24 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 25 | github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 26 | github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
 27 | github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 28 | github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
 29 | github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
 30 | github.com/juju/errors v1.0.0 h1:yiq7kjCLll1BiaRuNY53MGI0+EQ3rF6GB+wvboZDefM=
 31 | github.com/juju/errors v1.0.0/go.mod h1:B5x9thDqx0wIMH3+aLIMP9HjItInYWObRovoCFM5Qe8=
 32 | github.com/justinas/alice v1.2.0 h1:+MHSA/vccVCF4Uq37S42jwlkvI2Xzl7zTPCN5BnZNVo=
 33 | github.com/justinas/alice v1.2.0/go.mod h1:fN5HRH/reO/zrUflLfTN43t3vXvKzvZIENsNEe7i7qA=
 34 | github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
 35 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 36 | github.com/liuzl/cedar-go v0.0.0-20170805034717-80a9c64b256d h1:qSmEGTgjkESUX5kPMSGJ4pcBUtYVDdkNzMrjQyvRvp0=
 37 | github.com/liuzl/cedar-go v0.0.0-20170805034717-80a9c64b256d/go.mod h1:x7SghIWwLVcJObXbjK7S2ENsT1cAcdJcPl7dRaSFog0=
 38 | github.com/liuzl/d v0.0.0-20221017065133-150b00c98eb9 h1:C+E8rPy8GE8Tlv2STuLJwpIxmrOeMJM3ngbKOgRwyno=
 39 | github.com/liuzl/d v0.0.0-20221017065133-150b00c98eb9/go.mod h1:yLLAiymfW6NXB86ABmj24Jw22eayoSZXu9qCzJOUBfM=
 40 | github.com/liuzl/da v0.0.0-20180704015230-14771aad5b1d h1:hTRDIpJ1FjS9ULJuEzu69n3qTgc18eI+ztw/pJv47hs=
 41 | github.com/liuzl/da v0.0.0-20180704015230-14771aad5b1d/go.mod h1:7xD3p0XnHvJFQ3t/stEJd877CSIMkH/fACVWen5pYnc=
 42 | github.com/liuzl/dict v0.0.0-20180720023830-2cdb4749152d h1:pAN7QPdrG4eo+6ggl8ZPwF5mUz+d1e6C7aDZbJOWeTk=
 43 | github.com/liuzl/dict v0.0.0-20180720023830-2cdb4749152d/go.mod h1:UXlqqhJaw28ned67lp11D9ZBDoFfFBCLhfQVcFjTn4w=
 44 | github.com/liuzl/filestore v0.0.0-20200229104338-5ea723a6a528 h1:g+uxFYnxN+bMSgLu+t7k4zzVIUsRhKykir1C4F5Gp2c=
 45 | github.com/liuzl/filestore v0.0.0-20200229104338-5ea723a6a528/go.mod h1:aMgfSMkON/7fp+l9vv8w0xq870iSPVrNs7IqEu3xu5Q=
 46 | github.com/liuzl/ling v0.0.0-20200509031100-522aef269e3c h1:j3k0k6lfQtPgolRVxyeqK6I5fbLL7vu0Nxzs8toCMW8=
 47 | github.com/liuzl/ling v0.0.0-20200509031100-522aef269e3c/go.mod h1:1kEdLCXtzHEn0iVmhN+52m5l9YWMDL0EYch4Xt1su1Y=
 48 | github.com/liuzl/segment v0.0.0-20160915185041-762005e7a34f h1:toJ372frwG+oflCG1Ebti4+yr3Pf1DhtIipClmAwTIs=
 49 | github.com/liuzl/segment v0.0.0-20160915185041-762005e7a34f/go.mod h1:PfFcixpSUOZCUgVTaF3uNidhAQD0gfobJ4gJe67kC2U=
 50 | github.com/liuzl/store v0.0.0-20190530065605-e2dbcd3c77fc h1:mZ1DgWJEXekv8VFCurVYxQdqJ8bgnsx7cFyBAE+ORCE=
 51 | github.com/liuzl/store v0.0.0-20190530065605-e2dbcd3c77fc/go.mod h1:oGZDOBSfYkcxlMrnAaf6R2/DgLW56QYm3fJAj/fzODo=
 52 | github.com/liuzl/tokenizer v0.0.0-20181128060327-56c1056833c1 h1:AbKGfBWqlLBVPwzaKRSmHOB4T5jsh/OJx3TQ0mINNtc=
 53 | github.com/liuzl/tokenizer v0.0.0-20181128060327-56c1056833c1/go.mod h1:phPMsXWmEeSG/RxGFD6qjSVJdVVHv1HkcBHQfNVeibQ=
 54 | github.com/liuzl/unidecode v0.0.0-20170420112940-fd1463e9cd5b h1:N3Kwu7n5QYlCiKYEiy7a7bRG3P/Bp/kwI4jES3rbA4g=
 55 | github.com/liuzl/unidecode v0.0.0-20170420112940-fd1463e9cd5b/go.mod h1:QnoHoimYwXkYpQfXolZEH+CpPDZMKvZDlMz0fStkbmU=
 56 | github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
 57 | github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
 58 | github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
 59 | github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
 60 | github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
 61 | github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=
 62 | github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
 63 | github.com/mattn/go-isatty v0.0.16 h1:bq3VjFmv/sOjHtdEhmkEV4x1AJtvUvOJ2PFAZ5+peKQ=
 64 | github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
 65 | github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
 66 | github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
 67 | github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
 68 | github.com/mitchellh/hashstructure v1.1.0 h1:P6P1hdjqAAknpY/M1CGipelZgp+4y9ja9kmUZPXP+H0=
 69 | github.com/mitchellh/hashstructure v1.1.0/go.mod h1:xUDAozZz0Wmdiufv0uyhnHkUTN6/6d8ulp4AwfLKrmA=
 70 | github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
 71 | github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs=
 72 | github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
 73 | github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU=
 74 | github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
 75 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 76 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 77 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 78 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 79 | github.com/rivo/uniseg v0.4.2 h1:YwD0ulJSJytLpiaWua0sBDusfsCZohxjxzVTYjwxfV8=
 80 | github.com/rivo/uniseg v0.4.2/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
 81 | github.com/robertkrimen/otto v0.0.0-20221011175642-09fc211e5ab1 h1:SQiIjmrbwsmwsf68GxOPZa3y2q98Vfo41CT6h7pOMAE=
 82 | github.com/robertkrimen/otto v0.0.0-20221011175642-09fc211e5ab1/go.mod h1:DKHCllR988yoiVXPZrLqCjwAKhryyDPNmb9cBVtG/aQ=
 83 | github.com/rs/xid v1.4.0 h1:qd7wPTDkN6KQx2VmMBLrpHkiyQwgFXRnkOLacUiaSNY=
 84 | github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
 85 | github.com/rs/zerolog v1.28.0 h1:MirSo27VyNi7RJYP3078AA1+Cyzd2GB66qy3aUHvsWY=
 86 | github.com/rs/zerolog v1.28.0/go.mod h1:NILgTygv/Uej1ra5XxGf82ZFSLk58MFGAUS2o6usyD0=
 87 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
 88 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
 89 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 90 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 91 | github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
 92 | github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE=
 93 | github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ=
 94 | github.com/torden/go-strutil v0.1.7 h1:6c1WDvEqqueK6qiLPSD8Svq/yRq/npkt9cLtw8khUI4=
 95 | github.com/torden/go-strutil v0.1.7/go.mod h1:7cy4xHed8E5wlnGkk+gztMCIiFLxTJWbPAlr4XjwHYA=
 96 | github.com/xlab/treeprint v1.1.0 h1:G/1DjNkPpfZCFt9CSh6b5/nY4VimlbHF3Rh4obvtzDk=
 97 | github.com/xlab/treeprint v1.1.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0=
 98 | golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 99 | golang.org/x/net v0.0.0-20220826154423-83b083e8dc8b h1:ZmngSVLe/wycRns9MKikG9OWIEjGcGAkacif7oYQaUY=
100 | golang.org/x/net v0.0.0-20220826154423-83b083e8dc8b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
101 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
102 | golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
103 | golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
104 | golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
105 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
106 | golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
107 | golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
108 | golang.org/x/sys v0.0.0-20221013171732-95e765b1cc43 h1:OK7RB6t2WQX54srQQYSXMW8dF5C6/8+oA/s5QBmmto4=
109 | golang.org/x/sys v0.0.0-20221013171732-95e765b1cc43/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
110 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
111 | golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
112 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
113 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
114 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
115 | gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
116 | gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
117 | gopkg.in/readline.v1 v1.0.0-20160726135117-62c6fe619375/go.mod h1:lNEQeAhU009zbRxng+XOj5ITVgY24WcbNnQopyfKoYQ=
118 | gopkg.in/sourcemap.v1 v1.0.5 h1:inv58fC9f9J3TK2Y2R1NPntXEn3/wjWHkonhIUODNTI=
119 | gopkg.in/sourcemap.v1 v1.0.5/go.mod h1:2RlvNNSMglmRrcvhfuzp4hQHwOtjxlbjX7UPY/GXb78=
120 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
121 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
122 | gopkg.in/yaml.v2 v2.2.1 h1:mUhvW9EsL+naU5Q3cakzfE91YhliOondGd6ZrsDBHQE=
123 | gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
124 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
125 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
126 | zliu.org/goutil v0.0.0-20220902023633-6fcbc3a43c89 h1:91pOnvABI1firRblKJEF+auogF0NsqSDSefEjrCPd/0=
127 | zliu.org/goutil v0.0.0-20220902023633-6fcbc3a43c89/go.mod h1:Z7oA8jTnXTwZyxCPRxHkYK2oZK8eOH7FbXiW4RYLijI=
128 | 


--------------------------------------------------------------------------------
/grammar_index.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"strings"
 6 | 
 7 | 	"github.com/liuzl/dict"
 8 | )
 9 | 
10 | func updateIndex(index map[string]*Index, k string, cate string, v RbKey) error {
11 | 	if index == nil {
12 | 		return fmt.Errorf("nil grammar index")
13 | 	}
14 | 	if cate != "frame" && cate != "rule" {
15 | 		return fmt.Errorf("invalid cate %s", cate)
16 | 	}
17 | 	if index[k] == nil {
18 | 		index[k] = &Index{make(map[RbKey]struct{}), make(map[RbKey]struct{})}
19 | 	}
20 | 	switch cate {
21 | 	case "frame":
22 | 		index[k].Frames[v] = struct{}{}
23 | 	case "rule":
24 | 		index[k].Rules[v] = struct{}{}
25 | 	}
26 | 	return nil
27 | }
28 | 
29 | func (g *Grammar) indexRules(rules map[string]*Rule, cate string) error {
30 | 	var err error
31 | 	for _, rule := range rules {
32 | 		for id, body := range rule.Body {
33 | 			for _, term := range body.Terms {
34 | 				v := RbKey{rule.Name, id}
35 | 				value := strings.TrimSpace(term.Value)
36 | 				if value == "" {
37 | 					continue
38 | 				}
39 | 				switch term.Type {
40 | 				case Terminal:
41 | 					if err = g.trie.SafeUpdate([]byte(value), 1); err != nil {
42 | 						return err
43 | 					}
44 | 					if err = updateIndex(g.index, value, cate, v); err != nil {
45 | 						return err
46 | 					}
47 | 				case Nonterminal:
48 | 					if err = updateIndex(g.ruleIndex, value, cate, v); err != nil {
49 | 						return err
50 | 					}
51 | 				}
52 | 			}
53 | 		}
54 | 	}
55 | 	return nil
56 | }
57 | 
58 | func (g *Grammar) buildIndex() error {
59 | 	if g.Refined {
60 | 		return fmt.Errorf("should call Grammar.index before Grammar.refine")
61 | 	}
62 | 	g.trie = dict.New()
63 | 	g.index = make(map[string]*Index)
64 | 	g.ruleIndex = make(map[string]*Index)
65 | 
66 | 	gs := []*Grammar{g}
67 | 	gs = append(gs, g.includes...)
68 | 	for _, ig := range gs {
69 | 		if err := g.indexRules(ig.Frames, "frame"); err != nil {
70 | 			return err
71 | 		}
72 | 		if err := g.indexRules(ig.Rules, "rule"); err != nil {
73 | 			return err
74 | 		}
75 | 	}
76 | 	return nil
77 | }
78 | 


--------------------------------------------------------------------------------
/grammar_index_test.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestGrammarIndex(t *testing.T) {
 8 | 	g, err := GrammarFromFile("sf.grammar")
 9 | 	if err != nil {
10 | 		t.Error(err)
11 | 	}
12 | 	t.Log(g)
13 | }
14 | 


--------------------------------------------------------------------------------
/grammar_refine.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"strings"
 6 | 
 7 | 	"github.com/liuzl/ling"
 8 | 	"github.com/liuzl/unidecode"
 9 | 	"github.com/mitchellh/hashstructure"
10 | )
11 | 
12 | func (g *Grammar) refine(prefix string) error {
13 | 	if g.Refined {
14 | 		return nil
15 | 	}
16 | 	var terminalRules []*Rule
17 | 	var terminals = make(map[string]string)
18 | 	var names = make(map[string]bool)
19 | 	var n int
20 | 	var name string
21 | 	for _, rule := range g.Rules {
22 | 		for _, body := range rule.Body {
23 | 			for _, term := range body.Terms {
24 | 				if term.Type != Terminal {
25 | 					continue
26 | 				}
27 | 				// if this is a terminal text inside a ruleBody
28 | 				if t, has := terminals[term.Value]; has {
29 | 					term.Value = t
30 | 				} else {
31 | 					d := ling.NewDocument(term.Value)
32 | 					if err := NLP().Annotate(d); err != nil {
33 | 						return err
34 | 					}
35 | 					tname := prefix + "_t"
36 | 					rb := &RuleBody{}
37 | 					for _, token := range d.Tokens {
38 | 						if token.Type == ling.Space {
39 | 							continue
40 | 						}
41 | 						if token.Type != ling.Punct {
42 | 							ascii := unidecode.Unidecode(token.Text)
43 | 							ascii = strings.Join(strings.Fields(ascii), "_")
44 | 							tname += "_" + ascii
45 | 						}
46 | 						rb.Terms = append(rb.Terms,
47 | 							&Term{Value: token.Text, Type: Terminal, Meta: term.Meta})
48 | 						if gTokens.get(token.Text) == nil {
49 | 							gTokens.put(token.Text, token)
50 | 						}
51 | 					}
52 | 					for name, n = tname, 0; ; name, n =
53 | 						fmt.Sprintf("%s_%d", tname, n), n+1 {
54 | 						if g.Rules[name] == nil && !names[name] {
55 | 							break
56 | 						}
57 | 					}
58 | 					names[name] = true
59 | 					terminals[term.Value] = name
60 | 					hash, err := hashstructure.Hash(rb, nil)
61 | 					if err != nil {
62 | 						return err
63 | 					}
64 | 					terminalRules = append(terminalRules,
65 | 						&Rule{name, map[uint64]*RuleBody{hash: rb}})
66 | 					term.Value = name
67 | 				}
68 | 				term.Type = Nonterminal
69 | 			}
70 | 		}
71 | 	}
72 | 	for _, r := range terminalRules {
73 | 		g.Rules[r.Name] = r
74 | 	}
75 | 	g.Refined = true
76 | 	return nil
77 | }
78 | 


--------------------------------------------------------------------------------
/grammar_tokens.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"sync"
 5 | 
 6 | 	"github.com/liuzl/ling"
 7 | )
 8 | 
 9 | type cMap struct {
10 | 	tokens map[string]*ling.Token
11 | 	sync.RWMutex
12 | }
13 | 
14 | func (m *cMap) get(k string) *ling.Token {
15 | 	m.RLock()
16 | 	defer m.RUnlock()
17 | 	return m.tokens[k]
18 | }
19 | 
20 | func (m *cMap) put(k string, token *ling.Token) {
21 | 	m.Lock()
22 | 	defer m.Unlock()
23 | 	m.tokens[k] = token
24 | }
25 | 
26 | var gTokens = &cMap{tokens: make(map[string]*ling.Token)}
27 | 


--------------------------------------------------------------------------------
/grammars/limit.grammar:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Author: liang@zliu.org
 3 |  */
 4 | 
 5 | #include "math.zh.grammar"
 6 | 
 7 | <limit_prefix> = "前";
 8 | <limit_suffix> = "个";
 9 | 
10 | <limit> = <limit_prefix> <number> {nf.sql.limit($2)}
11 |         | <number> <limit_suffix> {nf.sql.limit($1)}
12 |         ;
13 | 


--------------------------------------------------------------------------------
/grammars/math.zh.grammar:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Author: Zhanliang Liu
 3 |  */
 4 | 
 5 | #include "number.zh.grammar"
 6 | 
 7 | // for local grammars that come from regex tagger
 8 | <number> = <numeric> {nf.math.to_number($1)};
 9 | 
10 | <number> = <number> "打" {nf.math.mul($1, 12)};
11 | 
12 | <number> = <number> "加" <number>        {nf.math.sum($1,$3)}
13 |          | <number> "加上" <number>      {nf.math.sum($1,$3)}
14 |          | <number> "与" <number> "的和" {nf.math.sum($1,$3)}
15 |          ;
16 | 
17 | <number> = <number> "减" <number>        {nf.math.sub($1,$3)}
18 |          | <number> "减去" <number>      {nf.math.sub($1,$3)}
19 |          | <number> "与" <number> "的差" {nf.math.sub($1,$3)}
20 |          ;
21 | 
22 | <fraction> = <number> "分之" <number> {nf.math.div($3,$1)}
23 |            | <numeric> "%" {nf.math.div($1, 100)}
24 |            ;
25 | 
26 | <number> = <number> "乘" <number>        {nf.math.mul($1,$3)}
27 |          | <number> "乘以" <number>      {nf.math.mul($1,$3)}
28 |          | <number> "与" <number> "的积" {nf.math.mul($1,$3)}
29 |          | <number> "的" <number> "倍"   {nf.math.mul($1,$3)}
30 |          | <number> "的一半"             {nf.math.mul($1,0.5)}
31 |          | <number> "的" <fraction>      {nf.math.mul($1,$3)}
32 |          ;
33 | 
34 | <number> = <number> "除" <number>   {nf.math.div($3,$1)}
35 |          | <number> "除以" <number> {nf.math.div($1,$3)}
36 |          | <fraction>               {nf.I($1)}
37 |          ;
38 | 
39 | <number> = <number> "的" <number> "次方" {nf.math.pow($1,$3)}
40 |          | <number> "的" <number> "次幂" {nf.math.pow($1,$3)}
41 |          | <number> "的平方"   {nf.math.pow($1,2)}
42 |          | <number> "的立方"   {nf.math.pow($1,3)}
43 |          | <number> "的平方根" {nf.math.pow($1,0.5)}
44 |          | <number> "的立方根" {nf.math.pow($1,nf.math.div(1,3))}
45 |          ;
46 | 
47 | 


--------------------------------------------------------------------------------
/grammars/number.zh.grammar:
--------------------------------------------------------------------------------
  1 | <cn_unit> = "一" {nf.I(1)}
  2 |           | "二" {nf.I(2)}
  3 |           | "三" {nf.I(3)}
  4 |           | "四" {nf.I(4)}
  5 |           | "五" {nf.I(5)}
  6 |           | "六" {nf.I(6)}
  7 |           | "七" {nf.I(7)}
  8 |           | "八" {nf.I(8)}
  9 |           | "九" {nf.I(9)}
 10 |           | "壹" {nf.I(1)}
 11 |           | "贰" {nf.I(2)}
 12 |           | "叁" {nf.I(3)}
 13 |           | "肆" {nf.I(4)}
 14 |           | "伍" {nf.I(5)}
 15 |           | "陆" {nf.I(6)}
 16 |           | "柒" {nf.I(7)}
 17 |           | "捌" {nf.I(8)}
 18 |           | "玖" {nf.I(9)}
 19 |           | "两" {nf.I(2)}
 20 |           | "俩" {nf.I(2)}
 21 |           | "仨" {nf.I(3)}
 22 |           ;
 23 | 
 24 | <cn_zero> = "零" {nf.I(0)}
 25 |           | "〇" {nf.I(0)}
 26 |           ;
 27 | 
 28 | 
 29 | <cn_digit> = <cn_unit> {nf.I($1)} | <cn_zero> {nf.I($1)} | <digits> {nf.I($1)};
 30 | 
 31 | <numbers> = <cn_digit> {nf.I($1)}
 32 |           | <cn_digit> <numbers> {nf.util.concat($1, $2)}
 33 |           ;
 34 | 
 35 | <cn_e1> = "十" {nf.I(10)}
 36 |         | "拾" {nf.I(10)}
 37 |         ;
 38 | 
 39 | <cn_e2> = "百" {nf.I(100)}
 40 |         | "佰" {nf.I(100)}
 41 |         ;
 42 | 
 43 | <cn_e3> = "千" {nf.I(1000)}
 44 |         | "仟" {nf.I(1000)}
 45 |         ;
 46 | 
 47 | <cn_e4> = "万" {nf.I(10000)};
 48 | 
 49 | <cn_e8> = "亿"   {nf.I(100000000)}
 50 |         | "万万" {nf.I(100000000)}
 51 |         ;
 52 | 
 53 | <cn_e1s> = <cn_e1>                     {nf.I($1)}
 54 |          | <cn_e1> <cn_unit>           {nf.math.sum($1,$2)}
 55 |          | <cn_unit> <cn_e1>           {nf.math.mul($1, $2)}
 56 |          | <cn_unit> <cn_e1> <cn_unit> {nf.math.sum(nf.math.mul($1, $2), $3)}
 57 |          | <cn_unit>                   {nf.I($1)}
 58 |          | <cn_zero>                   {nf.I($1)}
 59 |          ;
 60 | 
 61 | <cn_e2s> = <cn_unit> <cn_e2>                    {nf.math.mul($1, $2)}
 62 |          | <cn_unit> <cn_e2> <cn_unit>          {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(10,$3))}
 63 |          | <cn_unit> <cn_e2> <cn_e1s>           {nf.math.sum(nf.math.mul($1, $2), $3)}
 64 |          | <cn_unit> <cn_e2> <cn_zero> <cn_e1s> {nf.math.sum(nf.math.mul($1, $2), $4)}
 65 |          | <cn_e1s>                             {nf.I($1)}
 66 |          ;
 67 | 
 68 | <cn_e3s> = <cn_unit> <cn_e3>                    {nf.math.mul($1, $2)}
 69 |          | <cn_unit> <cn_e3> <cn_unit>          {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(100,$3))}
 70 |          | <cn_unit> <cn_e3> <cn_e2s>           {nf.math.sum(nf.math.mul($1, $2), $3)}
 71 |          | <cn_unit> <cn_e3> <cn_zero> <cn_e2s> {nf.math.sum(nf.math.mul($1, $2), $4)}
 72 |          | <cn_e2s>                             {nf.I($1)}
 73 |          ;
 74 | 
 75 | <cn_e4s> = <cn_e3s> <cn_e4>                    {nf.math.mul($1, $2)}
 76 |          | <cn_e3s> <cn_e4> <cn_unit>          {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(1000,$3))}
 77 |          | <cn_e3s> <cn_e4> <cn_e3s>           {nf.math.sum(nf.math.mul($1, $2), $3)}
 78 |          | <cn_e3s> <cn_e4> <cn_zero> <cn_e3s> {nf.math.sum(nf.math.mul($1, $2), $4)}
 79 |          | <cn_e3s>                            {nf.I($1)}
 80 |          | <numeric> <cn_e4>                   {nf.math.mul($1, $2)}
 81 |          ;
 82 | 
 83 | <cn_e8s> = <cn_e4s> <cn_e8>                    {nf.math.mul($1, $2)}
 84 |          | <cn_e4s> <cn_e8> <cn_unit>          {nf.math.sum(nf.math.mul($1, $2), nf.math.mul(10000000,$3))}
 85 |          | <cn_e4s> <cn_e8> <cn_e4s>           {nf.math.sum(nf.math.mul($1, $2), $3)}
 86 |          | <cn_e4s> <cn_e8> <cn_zero> <cn_e4s> {nf.math.sum(nf.math.mul($1, $2), $4)}
 87 |          | <cn_e4s>                            {nf.I($1)}
 88 |          | <numeric> <cn_e8>                   {nf.math.mul($1, $2)}
 89 |          ;
 90 | 
 91 | <cn_decimal> = "点" <numbers> {nf.math.decimal($2)};
 92 | 
 93 | <number> = <cn_e8s>              {nf.I($1)}
 94 |          | <cn_decimal>          {nf.I($1)}
 95 |          | <cn_e8s> <cn_decimal> {nf.math.sum($1,$2)}
 96 |          ;
 97 | 
 98 | <number> = <cn_e2> {nf.I($1)}
 99 |          | <cn_e3> {nf.I($1)}
100 |          | <cn_e4> {nf.I($1)}
101 |          | <cn_e8> {nf.I($1)}
102 |          ;
103 | 


--------------------------------------------------------------------------------
/grammars/order.grammar:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Author: liang@zliu.org
 3 |  */
 4 | 
 5 | // order by clause
 6 | 
 7 | <asc> = "从近到远" | "从少到多" | "从小到大" | "从慢到快" | "升序" | "排名最低"
 8 |       | "最小" | "最少" | "最慢";
 9 | <desc> = "从远到近" | "从多到少" | "从大到小" | "从快到慢" | "降序" | "排名最高"
10 |        | "最大" | "最多" | "最快";
11 | <order_prefix> = "按" | "获得" | "获得的" | "给出" | "根据" | "按照" | "依据";
12 | 
13 | <iorder> = <order_prefix> (any) <asc> {nf.sql.order($2,"asc")}
14 |         | <order_prefix> (any) <desc> {nf.sql.order($2,"desc")}
15 |         ;
16 | 


--------------------------------------------------------------------------------
/grammars/sql.grammar:
--------------------------------------------------------------------------------
 1 | #include "order.grammar"
 2 | #include "limit.grammar"
 3 | 
 4 | <order> = <iorder> {nf.I($1)}
 5 |         | <iorder> <number> {nf.sql.mix(nf.I($1),nf.sql.limit($2))}
 6 |         | <iorder> (any) <number> {nf.sql.mix(nf.I($1),nf.sql.limit($3))}
 7 |         ;
 8 | 
 9 | [sql] = <order> <limit> {nf.sql.gen($1,$2)};
10 | 


--------------------------------------------------------------------------------
/grammars/time.grammar:
--------------------------------------------------------------------------------
 1 | #include "math.zh.grammar"
 2 | 
 3 | <year> = <number> "年" {nf.datetime.year($1)}
 4 | //       | <numbers> "年" {nf.datetime.year($1)}
 5 |        ;
 6 | <month> = <number> "月" {nf.datetime.month($1)};
 7 | <day> = <number> "日" {nf.datetime.month($1)}
 8 |       | <number> "号" {nf.datetime.month($1)};
 9 | 
10 | 
11 | <date> = <year> <month> <day>
12 |        | <month> <day>
13 |        | <year> <month>
14 |        | <year> {nf.I($1)}
15 |        | <month> {nf.I($1)}
16 |        | <day> {nf.I($1)}
17 |        ;
18 | 
19 | <time> = <date>; // tagger里面也会根据正则打标一些
20 | 
21 | <before> = <time> "前"         {nf.sql.where("<", $1)}
22 |          | <time> "之前"       {nf.sql.where("<", $1)}
23 |          | <number> "前"       {nf.sql.where("<", $1)}
24 |          | <number> "之前"     {nf.sql.where("<", $1)}
25 |          | <time> "及之前"     {nf.sql.where("<=", $1)}
26 |          | <time> "及其之前"   {nf.sql.where("<=", $1)}
27 |          | <number> "及之前"   {nf.sql.where("<=", $1)}
28 |          | <number> "及其之前" {nf.sql.where("<=", $1)}
29 |          ;
30 | 
31 | <after> = <time> "后"         {nf.sql.where(">", $1)}
32 |         | <time> "之后"       {nf.sql.where(">", $1)}
33 |         | <number> "后"       {nf.sql.where(">", $1)}
34 |         | <number> "之后"     {nf.sql.where(">", $1)}
35 |         | <time> "及之后"     {nf.sql.where(">=", $1)}
36 |         | <time> "及其之后"   {nf.sql.where(">=", $1)}
37 |         | <number> "及之后"   {nf.sql.where(">=", $1)}
38 |         | <number> "及其之后" {nf.sql.where(">=", $1)}
39 |         ;
40 | 


--------------------------------------------------------------------------------
/list_test.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"testing"
 6 | 
 7 | 	"zliu.org/goutil"
 8 | )
 9 | 
10 | func TestList(t *testing.T) {
11 | 	//Debug = true
12 | 	cases := []string{
13 | 		`直辖市：北京上海天津`,
14 | 		`直辖市：北京、上海和天津`,
15 | 		`直辖市：北京、上海和天津、津城`,
16 | 		`直辖市：帝都、魔都、寨都、旧都`,
17 | 		`直辖市：北京`,
18 | 	}
19 | 	g, err := GrammarFromFile("sf.grammar")
20 | 	if err != nil {
21 | 		t.Error(err)
22 | 	}
23 | 	for _, c := range cases {
24 | 		t.Log(c)
25 | 		trees, err := g.Parse(c, "cities")
26 | 		if err != nil {
27 | 			t.Error(err)
28 | 		}
29 | 		for _, tree := range trees {
30 | 			var buf bytes.Buffer
31 | 			tree.Print(&buf)
32 | 			t.Log(buf.String())
33 | 			sem, err := tree.Semantic()
34 | 			if err != nil {
35 | 				t.Error(err)
36 | 			}
37 | 			t.Log(sem)
38 | 			s, err := tree.Eval()
39 | 			if err != nil {
40 | 				t.Error(err)
41 | 			}
42 | 			t.Log(s)
43 | 			b, _ := goutil.JSONMarshal(s)
44 | 			t.Log(string(b))
45 | 		}
46 | 	}
47 | }
48 | 


--------------------------------------------------------------------------------
/local_grammar.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"github.com/liuzl/ling"
 7 | 	"github.com/mitchellh/hashstructure"
 8 | )
 9 | 
10 | func (g *Grammar) localGrammar(d *ling.Document) (*Grammar, error) {
11 | 	if d == nil {
12 | 		return nil, fmt.Errorf("document is empty")
13 | 	}
14 | 	g.regexpTag(d)
15 | 	if len(d.Spans) == 0 && len(d.Tokens) == 0 {
16 | 		return nil, nil
17 | 	}
18 | 	l := &Grammar{Name: "local", Rules: make(map[string]*Rule)}
19 | 	for _, token := range d.Tokens {
20 | 		k := ""
21 | 		switch token.Type {
22 | 		case ling.Word:
23 | 			k = "word"
24 | 		case ling.Punct:
25 | 			k = "punct"
26 | 		case ling.Symbol:
27 | 			k = "symbol"
28 | 		case ling.Letters:
29 | 			k = "letters"
30 | 		}
31 | 		if k != "" {
32 | 			rb := &RuleBody{
33 | 				[]*Term{{Value: token.String(), Type: Terminal}}, nil}
34 | 			hash, err := hashstructure.Hash(rb, nil)
35 | 			if err != nil {
36 | 				return nil, err
37 | 			}
38 | 			if _, has := l.Rules[k]; has {
39 | 				l.Rules[k].Body[hash] = rb
40 | 			} else {
41 | 				l.Rules[k] = &Rule{k, map[uint64]*RuleBody{hash: rb}}
42 | 			}
43 | 		}
44 | 	}
45 | 	for _, span := range d.Spans {
46 | 		if span.Annotations["value"] == nil {
47 | 			continue
48 | 		}
49 | 		m, ok := span.Annotations["value"].(map[string]interface{})
50 | 		if !ok {
51 | 			continue
52 | 		}
53 | 		//terms := []*Term{{Value: span.String(), Type: Terminal}}
54 | 		var terms []*Term
55 | 		for i := span.Start; i < span.End; i++ {
56 | 			terms = append(terms,
57 | 				&Term{Value: span.Doc.Tokens[i].Text, Type: Terminal})
58 | 		}
59 | 		for k, values := range m {
60 | 			rb := &RuleBody{terms, nil}
61 | 			switch values.(type) {
62 | 			case []interface{}:
63 | 				args := []*Arg{}
64 | 				for _, v := range values.([]interface{}) {
65 | 					args = append(args, &Arg{"string", v})
66 | 				}
67 | 				rb.F = &FMR{"fmr.entity",
68 | 					[]*Arg{{"string", k}, {"func", &FMR{"fmr.list", args}}},
69 | 				}
70 | 				rb.F = &FMR{"fmr.list",
71 | 					[]*Arg{{"string", span.String()}, {"func", rb.F}}}
72 | 			}
73 | 			hash, err := hashstructure.Hash(rb, nil)
74 | 			if err != nil {
75 | 				return nil, err
76 | 			}
77 | 			if _, has := l.Rules[k]; has {
78 | 				l.Rules[k].Body[hash] = rb
79 | 			} else {
80 | 				l.Rules[k] = &Rule{k, map[uint64]*RuleBody{hash: rb}}
81 | 			}
82 | 		}
83 | 	}
84 | 	if len(l.Rules) == 0 {
85 | 		return nil, nil
86 | 	}
87 | 	/*
88 | 		if err := l.refine("l"); err != nil {
89 | 			return nil, err
90 | 		}
91 | 	*/
92 | 	return l, nil
93 | }
94 | 


--------------------------------------------------------------------------------
/local_grammar_test.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/liuzl/ling"
 7 | 	"zliu.org/goutil"
 8 | )
 9 | 
10 | func TestLocalGrammar(t *testing.T) {
11 | 	tests := []string{
12 | 		`天津，liang@zliu.org是我的邮箱，https://crawler.club是爬虫主页`,
13 | 		`关于FMR的介绍在这里：https://zliu.org/project/fmr/,好的`,
14 | 		`柏乡县是一个历史悠久的小城，高邑县也是，南开区呢，海淀区，思明区在哪里`,
15 | 	}
16 | 	for _, c := range tests {
17 | 		d := ling.NewDocument(c)
18 | 		if err := NLP().Annotate(d); err != nil {
19 | 			t.Error(err)
20 | 		}
21 | 		g := Grammar{}
22 | 		l, err := g.localGrammar(d)
23 | 		if err != nil {
24 | 			t.Error(err)
25 | 		}
26 | 		b, err := goutil.JSONMarshalIndent(l, "", "  ")
27 | 		if err != nil {
28 | 			t.Error(err)
29 | 		}
30 | 		t.Log(string(b))
31 | 	}
32 | }
33 | 


--------------------------------------------------------------------------------
/math_funcs.go:
--------------------------------------------------------------------------------
  1 | package fmr
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math"
  6 | 	"math/big"
  7 | 	"strconv"
  8 | )
  9 | 
 10 | func init() {
 11 | 	builtinFuncs["nf.math.sum"] = sum
 12 | 	builtinFuncs["nf.math.sub"] = sub
 13 | 	builtinFuncs["nf.math.mul"] = mul
 14 | 	builtinFuncs["nf.math.div"] = div
 15 | 	builtinFuncs["nf.math.pow"] = pow
 16 | 	builtinFuncs["nf.math.neg"] = neg
 17 | 	builtinFuncs["nf.math.even"] = even
 18 | 	builtinFuncs["nf.math.odd"] = odd
 19 | 	builtinFuncs["nf.math.prime"] = prime
 20 | }
 21 | 
 22 | func sum(x, y string) string {
 23 | 	return calc(x, y, "Add")
 24 | }
 25 | 
 26 | func sub(x, y string) string {
 27 | 	return calc(x, y, "Sub")
 28 | }
 29 | 
 30 | func mul(x, y string) string {
 31 | 	return calc(x, y, "Mul")
 32 | }
 33 | 
 34 | func div(x, y string) string {
 35 | 	fx, err := strconv.ParseFloat(x, 64)
 36 | 	if err != nil {
 37 | 		return fmt.Sprintf("%s/%s", x, y)
 38 | 	}
 39 | 	fy, err := strconv.ParseFloat(y, 64)
 40 | 	if err != nil || fy == 0 {
 41 | 		return fmt.Sprintf("%s/%s", x, y)
 42 | 	}
 43 | 	return fmt.Sprintf("%f", fx/fy)
 44 | }
 45 | 
 46 | func pow(x, y string) string {
 47 | 	fx, err := strconv.ParseFloat(x, 64)
 48 | 	if err != nil {
 49 | 		return fmt.Sprintf("%s^%s", x, y)
 50 | 	}
 51 | 	fy, err := strconv.ParseFloat(y, 64)
 52 | 	if err != nil {
 53 | 		return fmt.Sprintf("%s^%s", x, y)
 54 | 	}
 55 | 	return fmt.Sprintf("%f", math.Pow(fx, fy))
 56 | }
 57 | 
 58 | func neg(x string) string {
 59 | 	xf := new(big.Float)
 60 | 	if _, err := fmt.Sscan(x, xf); err != nil {
 61 | 		return ""
 62 | 	}
 63 | 	return xf.Neg(xf).String()
 64 | }
 65 | 
 66 | func even(x string) string {
 67 | 	xi := new(big.Int)
 68 | 	if _, err := fmt.Sscan(x, xi); err == nil && xi.Bit(0) == 0 {
 69 | 		return "true"
 70 | 	}
 71 | 	return "false"
 72 | }
 73 | 
 74 | func odd(x string) string {
 75 | 	xi := new(big.Int)
 76 | 	if _, err := fmt.Sscan(x, xi); err == nil && xi.Bit(0) == 1 {
 77 | 		return "true"
 78 | 	}
 79 | 	return "false"
 80 | }
 81 | 
 82 | func prime(x string) string {
 83 | 	xi := new(big.Int)
 84 | 	if _, err := fmt.Sscan(x, xi); err == nil && xi.ProbablyPrime(10) {
 85 | 		return "true"
 86 | 	}
 87 | 	return "false"
 88 | }
 89 | 
 90 | func calc(x, y, method string) string {
 91 | 	xf, yf := new(big.Float), new(big.Float)
 92 | 	if _, err := fmt.Sscan(x, xf); err != nil {
 93 | 		return ""
 94 | 	}
 95 | 	if _, err := fmt.Sscan(y, yf); err != nil {
 96 | 		return ""
 97 | 	}
 98 | 	switch method {
 99 | 	case "Add":
100 | 		return xf.Add(xf, yf).String()
101 | 	case "Sub":
102 | 		return xf.Sub(xf, yf).String()
103 | 	case "Mul":
104 | 		return xf.Mul(xf, yf).String()
105 | 	default:
106 | 		return ""
107 | 	}
108 | }
109 | 


--------------------------------------------------------------------------------
/math_funcs_test.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestMathFunc(t *testing.T) {
 8 | 	t.Log(sum("34", "987"))
 9 | 	t.Log(sub("34", "987"))
10 | 	t.Log(div("399", "987"))
11 | 	t.Log(div("3e9", "987"))
12 | 	t.Log(div("abc", "987"))
13 | 	t.Log(pow("2.1", "7.9"))
14 | 	t.Log(neg("-2.1e100"))
15 | 	t.Log(sum(neg("-2.1e100"), "1.1e99"))
16 | 	t.Log(odd("24"))
17 | 	t.Log(even("24"))
18 | 	t.Log(prime("100000223"))
19 | 	t.Log(prime("100000227"))
20 | 	t.Log(Call("nf.math.prime", "100000227"))
21 | 	t.Log(Call("nf.math.prime", "227"))
22 | }
23 | 


--------------------------------------------------------------------------------
/node_methods.go:
--------------------------------------------------------------------------------
  1 | package fmr
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strconv"
  6 | 	"strings"
  7 | 
  8 | 	"zliu.org/goutil"
  9 | )
 10 | 
 11 | // Pos returns the corresponding pos of Node n in original text
 12 | func (n *Node) Pos() *Pos {
 13 | 	return n.p.Boundary(n.Value)
 14 | }
 15 | 
 16 | // Term returns the root Term of tree node
 17 | func (n *Node) Term() *Term {
 18 | 	if n.Value == nil { //|| n.Value.Rb == nil || len(n.Value.Rb.Terms) < 1 {
 19 | 		return nil
 20 | 	}
 21 | 	if n.Value.Term.Value == GammaRule {
 22 | 		return n.Value.Rb.Terms[0]
 23 | 	}
 24 | 	return n.Value.Term
 25 | }
 26 | 
 27 | // F returns the FMR signature of node
 28 | func (n *Node) F() *FMR {
 29 | 	if n.Value == nil || n.Value.Rb == nil || len(n.Value.Rb.Terms) < 1 {
 30 | 		return nil
 31 | 	}
 32 | 	if n.Value.Term.Value == GammaRule {
 33 | 		return n.Children[0].Value.Rb.F
 34 | 	}
 35 | 	return n.Value.Rb.F
 36 | }
 37 | 
 38 | // OriginalText returns the original text of Node n
 39 | func (n *Node) OriginalText() string {
 40 | 	pos := n.Pos()
 41 | 	return n.p.text[pos.StartByte:pos.EndByte]
 42 | }
 43 | 
 44 | // NL returns the normalized text of Node n
 45 | func (n *Node) NL() string {
 46 | 	var s []string
 47 | 	for i := n.Value.Start + 1; i <= n.Value.End; i++ {
 48 | 		s = append(s, n.p.columns[i].token.Text)
 49 | 	}
 50 | 	return goutil.Join(s)
 51 | }
 52 | 
 53 | // Tree returns the parsed tree of Node n
 54 | func (n *Node) Tree() map[string]interface{} {
 55 | 	if n.Value.Term.Value == GammaRule {
 56 | 		return n.Children[0].Tree()
 57 | 	}
 58 | 	if n.p == nil {
 59 | 		return nil
 60 | 	}
 61 | 	ret := map[string]interface{}{
 62 | 		"type": n.Term().Value,
 63 | 		"text": n.OriginalText(),
 64 | 		"pos":  n.Pos(),
 65 | 		//"fmr":  n.F(),
 66 | 	}
 67 | 	if n.Value.Rb == nil || n.Value.Rb.F == nil {
 68 | 		return ret
 69 | 	}
 70 | 
 71 | 	allTerminal := true
 72 | 	for _, node := range n.Children {
 73 | 		if node.Value.Term.Type == Terminal {
 74 | 			continue
 75 | 		}
 76 | 		s := node.Value.Term.Value
 77 | 		if strings.HasPrefix(s, "g_t_") || strings.HasPrefix(s, "l_t_") {
 78 | 			continue
 79 | 		}
 80 | 		allTerminal = false
 81 | 		break
 82 | 	}
 83 | 	if allTerminal {
 84 | 		return ret
 85 | 	}
 86 | 
 87 | 	subnodes := []interface{}{}
 88 | 	for _, node := range n.Children {
 89 | 		subnodes = append(subnodes, node.Tree())
 90 | 	}
 91 | 	ret["nodes"] = subnodes
 92 | 	return ret
 93 | }
 94 | 
 95 | // Bracketed returns the labeled bracket notation of Node
 96 | func (n *Node) Bracketed() string {
 97 | 	if n.Value.Term.Value == GammaRule {
 98 | 		return n.Children[0].Bracketed()
 99 | 	}
100 | 	if n.p == nil {
101 | 		return ""
102 | 	}
103 | 	var b strings.Builder
104 | 	fmt.Fprintf(&b, "[%s ", n.Term().Value)
105 | 
106 | 	allTerminal := true
107 | 	for _, node := range n.Children {
108 | 		if node.Value.Term.Type == Terminal {
109 | 			continue
110 | 		}
111 | 		s := node.Value.Term.Value
112 | 		if strings.HasPrefix(s, "g_t_") || strings.HasPrefix(s, "l_t_") {
113 | 			continue
114 | 		}
115 | 		allTerminal = false
116 | 		break
117 | 	}
118 | 	if allTerminal {
119 | 		fmt.Fprintf(&b, "%s]", strconv.Quote(n.OriginalText()))
120 | 		return b.String()
121 | 	}
122 | 
123 | 	subnodes := []string{}
124 | 	for _, node := range n.Children {
125 | 		subnodes = append(subnodes, node.Bracketed())
126 | 	}
127 | 	fmt.Fprintf(&b, "%s]", strings.Join(subnodes, " "))
128 | 	return b.String()
129 | }
130 | 


--------------------------------------------------------------------------------
/regexp_tagger.go:
--------------------------------------------------------------------------------
 1 | package fmr
 2 | 
 3 | import (
 4 | 	"github.com/liuzl/ling"
 5 | 	"zliu.org/goutil"
 6 | )
 7 | 
 8 | func (g *Grammar) regexpTag(d *ling.Document) {
 9 | 	if d == nil || len(d.Tokens) == 0 || len(g.Regexps) == 0 {
10 | 		return
11 | 	}
12 | 
13 | 	for typ, s := range g.Regexps {
14 | 		re, err := goutil.Regexp(s)
15 | 		if err != nil {
16 | 			continue
17 | 		}
18 | 		matches := re.FindAllStringIndex(d.Text, -1)
19 | 		for _, match := range matches {
20 | 			start := -1
21 | 			end := -1
22 | 			for _, token := range d.Tokens {
23 | 				if token.StartByte == match[0] {
24 | 					start = token.I
25 | 				}
26 | 				if token.EndByte == match[1] {
27 | 					end = token.I + 1
28 | 				}
29 | 			}
30 | 			if start == -1 || end == -1 {
31 | 				continue
32 | 			}
33 | 			span := &ling.Span{Doc: d, Start: start, End: end,
34 | 				Annotations: map[string]interface{}{"from": "grammar_re",
35 | 					"value": map[string]interface{}{typ: ""}}}
36 | 			d.Spans = append(d.Spans, span)
37 | 		}
38 | 	}
39 | }
40 | 


--------------------------------------------------------------------------------
/sf.grammar:
--------------------------------------------------------------------------------
 1 | <flight> = <departure> <arrival> {nf.flight($1, $2)};
 2 | [flight] = <arrival> <departure> {nf.flight($2, $1)};
 3 | 
 4 | <departure> = <from> <city> {nf.I($2)};
 5 | 
 6 | <arrival> = <to> <city> {nf.I($2)};
 7 | [arrival] = <arrival> {nf.arrival($1)};
 8 | 
 9 | <from> = "从" ;
10 | 
11 | <to> = "到" | "去" | "飞"; 
12 | 
13 | <city> = "北京" {nf.I("BeiJing")}
14 |        | "天津" {nf.I("TianJin")}
15 |        | "上海" {nf.I("ShangHai")}
16 |        | `.(?:城|都)`
17 |        ;
18 | 
19 | <city_ext> = <city>            {nf.I($1)}
20 |            | (any{1,1}) <city> {nf.I($2)}
21 |            ;
22 | 
23 | <cities> = "直辖市：" (list<city_ext>) {fmr.entity(@)};//{nf.I($2)};
24 | 


--------------------------------------------------------------------------------
/termtype_jsonenums.go:
--------------------------------------------------------------------------------
 1 | // generated by jsonenums -type=TermType; DO NOT EDIT
 2 | 
 3 | package fmr
 4 | 
 5 | import (
 6 | 	"encoding/json"
 7 | 	"fmt"
 8 | )
 9 | 
10 | var (
11 | 	_TermTypeNameToValue = map[string]TermType{
12 | 		"EOF":         EOF,
13 | 		"Nonterminal": Nonterminal,
14 | 		"Terminal":    Terminal,
15 | 		"Any":         Any,
16 | 		"List":        List,
17 | 	}
18 | 
19 | 	_TermTypeValueToName = map[TermType]string{
20 | 		EOF:         "EOF",
21 | 		Nonterminal: "Nonterminal",
22 | 		Terminal:    "Terminal",
23 | 		Any:         "Any",
24 | 		List:        "List",
25 | 	}
26 | )
27 | 
28 | func init() {
29 | 	var v TermType
30 | 	if _, ok := interface{}(v).(fmt.Stringer); ok {
31 | 		_TermTypeNameToValue = map[string]TermType{
32 | 			interface{}(EOF).(fmt.Stringer).String():         EOF,
33 | 			interface{}(Nonterminal).(fmt.Stringer).String(): Nonterminal,
34 | 			interface{}(Terminal).(fmt.Stringer).String():    Terminal,
35 | 			interface{}(Any).(fmt.Stringer).String():         Any,
36 | 			interface{}(List).(fmt.Stringer).String():        List,
37 | 		}
38 | 	}
39 | }
40 | 
41 | // MarshalJSON is generated so TermType satisfies json.Marshaler.
42 | func (r TermType) MarshalJSON() ([]byte, error) {
43 | 	if s, ok := interface{}(r).(fmt.Stringer); ok {
44 | 		return json.Marshal(s.String())
45 | 	}
46 | 	s, ok := _TermTypeValueToName[r]
47 | 	if !ok {
48 | 		return nil, fmt.Errorf("invalid TermType: %d", r)
49 | 	}
50 | 	return json.Marshal(s)
51 | }
52 | 
53 | // UnmarshalJSON is generated so TermType satisfies json.Unmarshaler.
54 | func (r *TermType) UnmarshalJSON(data []byte) error {
55 | 	var s string
56 | 	if err := json.Unmarshal(data, &s); err != nil {
57 | 		return fmt.Errorf("TermType should be a string, got %s", data)
58 | 	}
59 | 	v, ok := _TermTypeNameToValue[s]
60 | 	if !ok {
61 | 		return fmt.Errorf("invalid TermType %q", s)
62 | 	}
63 | 	*r = v
64 | 	return nil
65 | }
66 | 


--------------------------------------------------------------------------------
/termtype_string.go:
--------------------------------------------------------------------------------
 1 | // Code generated by "stringer -type=TermType"; DO NOT EDIT.
 2 | 
 3 | package fmr
 4 | 
 5 | import "strconv"
 6 | 
 7 | const _TermType_name = "EOFNonterminalTerminalAnyList"
 8 | 
 9 | var _TermType_index = [...]uint8{0, 3, 14, 22, 25, 29}
10 | 
11 | func (i TermType) String() string {
12 | 	if i >= TermType(len(_TermType_index)-1) {
13 | 		return "TermType(" + strconv.FormatInt(int64(i), 10) + ")"
14 | 	}
15 | 	return _TermType_name[_TermType_index[i]:_TermType_index[i+1]]
16 | }
17 | 


--------------------------------------------------------------------------------
/types.go:
--------------------------------------------------------------------------------
  1 | package fmr
  2 | 
  3 | import (
  4 | 	"encoding/gob"
  5 | 	"fmt"
  6 | 
  7 | 	"github.com/liuzl/dict"
  8 | 	"github.com/mitchellh/hashstructure"
  9 | )
 10 | 
 11 | func init() {
 12 | 	gob.Register(RbKey{})
 13 | }
 14 | 
 15 | // A Grammar stores a Context-Free Grammar
 16 | type Grammar struct {
 17 | 	Name    string            `json:"name"`
 18 | 	Rules   map[string]*Rule  `json:"rules"`
 19 | 	Frames  map[string]*Rule  `json:"frames"`
 20 | 	Regexps map[string]string `json:"regexps"`
 21 | 	Refined bool              `json:"refined"`
 22 | 
 23 | 	trie      *dict.Cedar
 24 | 	index     map[string]*Index
 25 | 	ruleIndex map[string]*Index
 26 | 
 27 | 	includes []*Grammar
 28 | }
 29 | 
 30 | // An Index contains two sets for frames' names and rules' names
 31 | type Index struct {
 32 | 	Frames map[RbKey]struct{}
 33 | 	Rules  map[RbKey]struct{}
 34 | }
 35 | 
 36 | // A RbKey identifies a specific RuleBody by name and id
 37 | type RbKey struct {
 38 | 	RuleName string `json:"rule_name"`
 39 | 	BodyID   uint64 `json:"body_id"`
 40 | }
 41 | 
 42 | // A Pos specifies the start and end positions
 43 | type Pos struct {
 44 | 	StartByte int `json:"start_byte"`
 45 | 	EndByte   int `json:"end_byte"`
 46 | }
 47 | 
 48 | // A Slot contains the Pos and its corresponding parse trees
 49 | type Slot struct {
 50 | 	Pos
 51 | 	Trees []*Node
 52 | }
 53 | 
 54 | // A Frame is a frame consists of Slots
 55 | type Frame struct {
 56 | 	Slots    map[uint64][]*Slot
 57 | 	Complete bool
 58 | }
 59 | 
 60 | func (f *Frame) String() string {
 61 | 	return fmt.Sprintf("Complete:%+v, %+v", f.Complete, f.Slots)
 62 | }
 63 | 
 64 | // A Rule stores a set of production rules of Name
 65 | type Rule struct {
 66 | 	Name string               `json:"-"`
 67 | 	Body map[uint64]*RuleBody `json:"body,omitempty"`
 68 | }
 69 | 
 70 | // A RuleBody is one production rule
 71 | type RuleBody struct {
 72 | 	Terms []*Term `json:"terms"`
 73 | 	F     *FMR    `json:"f,omitempty"`
 74 | }
 75 | 
 76 | // TermType of grammar terms
 77 | type TermType byte
 78 | 
 79 | //go:generate jsonenums -type=TermType
 80 | 
 81 | // definition of TermTypes
 82 | const (
 83 | 	EOF TermType = iota
 84 | 	Nonterminal
 85 | 	Terminal
 86 | 	Any
 87 | 	List
 88 | )
 89 | 
 90 | // A Term is the component of RuleBody
 91 | type Term struct {
 92 | 	Value string      `json:"value"`
 93 | 	Type  TermType    `json:"type"`
 94 | 	Meta  interface{} `json:"meta"`
 95 | }
 96 | 
 97 | // Key returns a unique key for Term t
 98 | func (t *Term) Key() uint64 {
 99 | 	hash, err := hashstructure.Hash(t, nil)
100 | 	if err != nil {
101 | 		return 0
102 | 	}
103 | 	return hash
104 | }
105 | 
106 | // Arg is the type of argument for functions
107 | type Arg struct {
108 | 	Type  string      `json:"type"`
109 | 	Value interface{} `json:"value"`
110 | }
111 | 
112 | // FMR stands for Funtional Meaning Representation
113 | type FMR struct {
114 | 	Fn   string `json:"fn,omitempty"`
115 | 	Args []*Arg `json:"args,omitempty"`
116 | }
117 | 


--------------------------------------------------------------------------------
/types_util.go:
--------------------------------------------------------------------------------
  1 | package fmr
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math/big"
  6 | )
  7 | 
  8 | func metaEqual(m1, m2 interface{}) bool {
  9 | 	if m1 == nil && m2 == nil {
 10 | 		return true
 11 | 	}
 12 | 	if m1 != nil && m2 != nil {
 13 | 		if Debug {
 14 | 			fmt.Println("In Equal:", m1, m2)
 15 | 		}
 16 | 		switch m1.(type) {
 17 | 		// meta for (any)
 18 | 		case map[string]int:
 19 | 			t1 := m1.(map[string]int)
 20 | 			t2, ok2 := m2.(map[string]int)
 21 | 			if ok2 && len(t1) == len(t2) {
 22 | 				for k, v := range t1 {
 23 | 					if Debug {
 24 | 						fmt.Println(k, v)
 25 | 					}
 26 | 					if w, ok := t2[k]; !ok || v != w {
 27 | 						if Debug {
 28 | 							fmt.Println(v, w, ok)
 29 | 						}
 30 | 						return false
 31 | 					}
 32 | 				}
 33 | 				return true
 34 | 			}
 35 | 			// meta for terminal text
 36 | 		case string:
 37 | 			s1 := m1.(string)
 38 | 			s2, ok := m2.(string)
 39 | 			if ok && s1 == s2 {
 40 | 				return true
 41 | 			}
 42 | 		}
 43 | 	}
 44 | 	return false
 45 | }
 46 | 
 47 | // Equal func for Term
 48 | func (t *Term) Equal(t1 *Term) bool {
 49 | 	if t == nil && t1 == nil {
 50 | 		return true
 51 | 	}
 52 | 	if t == nil || t1 == nil {
 53 | 		return false
 54 | 	}
 55 | 	if t.Value != t1.Value || t.Type != t1.Type {
 56 | 		return false
 57 | 	}
 58 | 	return metaEqual(t.Meta, t1.Meta)
 59 | }
 60 | 
 61 | // Equal func for RuleBody
 62 | func (r *RuleBody) Equal(rb *RuleBody) bool {
 63 | 	if rb == nil && r == nil {
 64 | 		return true
 65 | 	}
 66 | 	if rb == nil || r == nil {
 67 | 		return false
 68 | 	}
 69 | 	if len(rb.Terms) != len(r.Terms) {
 70 | 		return false
 71 | 	}
 72 | 	for i, term := range rb.Terms {
 73 | 		if !term.Equal(r.Terms[i]) {
 74 | 			return false
 75 | 		}
 76 | 	}
 77 | 	return r.F.Equal(rb.F)
 78 | }
 79 | 
 80 | // Equal func for FMR
 81 | func (f *FMR) Equal(fmr *FMR) bool {
 82 | 	if f == nil && fmr == nil {
 83 | 		return true
 84 | 	}
 85 | 	if !(f != nil && fmr != nil) {
 86 | 		return false
 87 | 	}
 88 | 	if f.Fn != fmr.Fn {
 89 | 		return false
 90 | 	}
 91 | 	if len(f.Args) != len(fmr.Args) {
 92 | 		return false
 93 | 	}
 94 | 	for i, arg := range fmr.Args {
 95 | 		if arg.Type != f.Args[i].Type {
 96 | 			return false
 97 | 		}
 98 | 		switch arg.Type {
 99 | 		case "string":
100 | 			s1, ok1 := arg.Value.(string)
101 | 			s2, ok2 := f.Args[i].Value.(string)
102 | 			if !ok1 || !ok2 || s1 != s2 {
103 | 				return false
104 | 			}
105 | 		case "int":
106 | 			s1, ok1 := arg.Value.(*big.Int)
107 | 			s2, ok2 := f.Args[i].Value.(*big.Int)
108 | 			if !ok1 || !ok2 || s1.Cmp(s2) != 0 {
109 | 				return false
110 | 			}
111 | 		case "float":
112 | 			s1, ok1 := arg.Value.(*big.Float)
113 | 			s2, ok2 := f.Args[i].Value.(*big.Float)
114 | 			if !ok1 || !ok2 || s1.Cmp(s2) != 0 {
115 | 				return false
116 | 			}
117 | 		case "index":
118 | 			s1, ok1 := arg.Value.(int)
119 | 			s2, ok2 := f.Args[i].Value.(int)
120 | 			if !ok1 || !ok2 || s1 != s2 {
121 | 				return false
122 | 			}
123 | 		case "func":
124 | 			s1, ok1 := arg.Value.(*FMR)
125 | 			s2, ok2 := f.Args[i].Value.(*FMR)
126 | 			if !ok1 || !ok2 || !s1.Equal(s2) {
127 | 				return false
128 | 			}
129 | 		}
130 | 	}
131 | 	return true
132 | }
133 | 


--------------------------------------------------------------------------------