├── .babelrc
├── .gitignore
├── .npmignore
├── .travis.yml
├── LICENSE
├── README.md
├── index.ts
├── package-lock.json
├── package.json
├── src
    ├── earley
    │   ├── chart
    │   │   ├── addable-expressions-container.ts
    │   │   ├── chart.ts
    │   │   ├── state-index.ts
    │   │   ├── state-to-object-map.ts
    │   │   ├── state.ts
    │   │   └── viterbi-score.ts
    │   ├── complete.ts
    │   ├── expression
    │   │   └── value.ts
    │   ├── parser.ts
    │   ├── parsetree.ts
    │   ├── predict.ts
    │   └── scan.ts
    ├── grammar
    │   ├── category.ts
    │   ├── grammar.ts
    │   ├── left-corner.ts
    │   ├── rule.ts
    │   └── token.ts
    ├── index.ts
    └── util.ts
├── test
    ├── earley
    │   ├── chart.spec.ts
    │   ├── earley.spec.ts
    │   └── parser.spec.ts
    ├── grammar
    │   └── grammar.spec.ts
    └── sample-grammar.ts
├── tsconfig.json
├── tslint.json
├── version.js
└── webpack.config.js


/.babelrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "plugins": [
 3 |     "transform-class-properties"
 4 |   ],
 5 |   "presets": [
 6 |     ["es2015"]
 7 |   ],
 8 |   "env": {
 9 |     "development": {
10 |       "presets": [
11 |       ]
12 |     }
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Created by .ignore support plugin (hsz.mobi)
 2 | ### Node template
 3 | # Logs
 4 | logs
 5 | *.log
 6 | npm-debug.log*
 7 | 
 8 | # Runtime data
 9 | pids
10 | *.pid
11 | *.seed
12 | *.pid.lock
13 | 
14 | # Directory for instrumented libs generated by jscoverage/JSCover
15 | lib-cov
16 | 
17 | # Coverage directory used by tools like istanbul
18 | coverage
19 | 
20 | # nyc test coverage
21 | .nyc_output
22 | 
23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
24 | .grunt
25 | 
26 | # node-waf configuration
27 | .lock-wscript
28 | 
29 | # Compiled binary addons (http://nodejs.org/api/addons.html)
30 | build/Release
31 | 
32 | # Dependency directories
33 | node_modules
34 | jspm_packages
35 | 
36 | # Optional npm cache directory
37 | .npm
38 | 
39 | # Optional eslint cache
40 | .eslintcache
41 | 
42 | # Optional REPL history
43 | .node_repl_history
44 | 
45 | # Output of 'npm pack'
46 | *.tgz
47 | 
48 | # Yarn Integrity file
49 | .yarn-integrity
50 | 
51 | 
52 | .idea/
53 | 
54 | /probabilistic-earley-parser.*.*.*.min.*
55 | /dist


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | /node_modules
2 | /.idea
3 | /src
4 | /test
5 | /.yarn.lock
6 | /*.tgz
7 | /.travis.yml
8 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 | sudo: false
3 | node_js:
4 |   - stable
5 |   - 5
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Maarten Trompper
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![Build Status](https://travis-ci.org/digitalheir/probabilistic-earley-parser-javascript.svg?branch=master)
  2 | [![npm version](https://badge.fury.io/js/probabilistic-earley-parser.svg)](https://www.npmjs.com/package/probabilistic-earley-parser)
  3 | [![License](https://img.shields.io/npm/l/probabilistic-earley-parser.svg)](https://github.com/digitalheir/probabilistic-earley-parser-javascript/blob/master/LICENSE)
  4 | 
  5 | # Probabilistic Earley parser
  6 | 
  7 | ## ⚠️ Warning
  8 | This code is currently broken. It has a subtle bug which produces invalid results, can make your code run exponentially & could use exponential memory. 
  9 | 
 10 | **DO NOT USE THIS LIBRARY!!!**
 11 | 
 12 | Pull requests are welcome, but seeing as this is GitHub, nobody will care & the project is thus effectively abandoned. Contact maartentrompper@freedom.nl if you really need a functioning Probabilistic Earley Parser enough so that you are willing to fund it.  
 13 | 
 14 | ---
 15 | 
 16 | 
 17 | This is a library for parsing a sequence of tokens (like words) into tree structures, along with the probability that the particular sequence generates that tree structure. This is mainly useful for linguistic purposes, such as morphological parsing, speech recognition and generally information extraction. It also finds applications in computational biology. 
 18 | 
 19 | For example:
 20 | 
 21 | * As a computational linguist, you want [derive all ways to interpret an English sentence along with probabilities](https://web.stanford.edu/~jurafsky/icassp95-tc.pdf)
 22 | 
 23 | |tokens|parse tree|
 24 | |---|---|
 25 | |[i, want, british, food]|![i want british food](https://cloud.githubusercontent.com/assets/178797/21772897/64838a1e-d68d-11e6-9a9d-11c7c17cb996.png)|
 26 | 
 27 | * As a computational biologist, you want to [predict the secondary structure for an RNA sequence](https://en.wikipedia.org/wiki/Stochastic_context-free_grammar#RNA_structure_prediction)
 28 | 
 29 | |tokens|parse tree|
 30 | |---|---|
 31 | |`GGGC``UAUU``AGCU``CAGU`<br>`UGGU``UAGA``GCGC``ACCC`<br>`CUGA``UAAG``GGUG``AGGU`<br>`CGCU``GAUU``CGAA``UUCA`<br>`GCAU``AGCC``CA` |![rna secondary structure](https://cloud.githubusercontent.com/assets/178797/21773797/af94f972-d690-11e6-97b4-0aad06071634.jpg)|
 32 | 
 33 | * As a computational linguist, [you want to know the most likely table of contents structure for a list of paragraphs](https://digitalheir.github.io/java-rechtspraak-library/document-structure/)
 34 | 
 35 | 
 36 | 
 37 | 
 38 | 
 39 | 
 40 | This library allows you to do these things [efficiently](https://github.com/digitalheir/probabilistic-earley-parser-javascript#runtime-complexity), as long as you can describe the rules as a [Context-free Grammar](https://en.wikipedia.org/wiki/Context-free_grammar) (CFG).
 41 | 
 42 | The innovation of this library with respect to the many other parsing libraries is that this one allows the production rules in your grammar to have a probability attached to them. That is: it parses [Stochastic Context-free Grammars](https://en.wikipedia.org/wiki/Stochastic_context-free_grammar). This allows us to make better choices in case of ambiguous sentences: we can order them by probability. If you do not need probabilities attached to your parse trees, you are probably better off using [nearley](http://nearley.js.org) instead.
 43 | 
 44 | For a theoretical grounding of this work, refer to [*Stolcke; An Efficient Probabilistic Context-Free
 45 |            Parsing Algorithm that Computes Prefix
 46 |            Probabilities*](http://www.aclweb.org/anthology/J95-2002).
 47 |   
 48 | ## Motivation
 49 | While libraries for nondeterministic grammars abound, I could not find an existing JavaScript
 50 | implementation of the Probabilistic Earley Parser. I have made a stochastic CYK parser before, but I wanted something
 51 | more top down that makes it easier to intervene in the parsing process,
 52 | for instance when an unexpected token is encountered. 
 53 | In many cases Earley also parses faster than CYK (sparse grammars) and it doesn't require the grammar to be 
 54 | rewritten in any normal form.
 55 |    
 56 | ## Usage
 57 | 
 58 | Get the most likely parse tree (the *Viterbi parse*) for the sentence "the man chases the man with a stick":
 59 | 
 60 | ````javascript
 61 | import {getViterbiParse, Grammar} from 'probabilistic-earley-parser';
 62 | import treeify from 'treeify';
 63 | 
 64 | // Nonterminals are string
 65 | const S = "S"; // : NonTerminal 
 66 | const NP = "NP"; // : NonTerminal 
 67 | const VP = "VP"; // : NonTerminal 
 68 | const TV = "TV"; // : NonTerminal 
 69 | const Det = "Det"; // : NonTerminal 
 70 | const N = "N"; // : NonTerminal 
 71 | const Mod = "Mod"; // : NonTerminal 
 72 | 
 73 | // Terminals are functions that should return true when the parameter is of given type
 74 | const transitiveVerb = (token) => !!token.match(/(hit|chased)/); // : Terminal<string>
 75 | const the = (token) => !!token.match(/the/i);// : Terminal<string> 
 76 | const a = (token) => !!token.match(/a/i);// : Terminal<string> 
 77 | const man = (token) => !!token.match(/man/);// : Terminal<string> 
 78 | const stick = (token) => !!token.match(/stick/);// : Terminal<string> 
 79 | const with_ = (token) => !!token.match(/with/);// : Terminal<string> 
 80 | 
 81 | const grammar = Grammar.builder("test") //: Grammar<string,number> 
 82 |     .addNewRule(
 83 |         1.0,   // Probability between 0.0 and 1.0, defaults to 1.0. The builder takes care of converting it to the semiring element
 84 |         S,     // Left hand side of the rule
 85 |         [NP, VP] // Right hand side of the rule
 86 |     )
 87 |     // NP -> Det N (1.0)
 88 |     .addNewRule(
 89 |         1.0,
 90 |         NP,
 91 |         [Det, N] // eg. The man
 92 |     )
 93 |     // NP -> Det N Mod (1.0)
 94 |     .addNewRule(
 95 |         1.0,
 96 |         NP,
 97 |         [Det, N, Mod] // eg. The man (with a stick)
 98 |     )
 99 |     // VP -> TV NP Mod (0.4)
100 |     .addNewRule(
101 |         0.4,
102 |         VP,
103 |         [TV, NP, Mod] // eg. (chased) (the man) (with a stick)
104 |     )
105 |     // VP -> TV NP (0.6)
106 |     .addNewRule(
107 |         0.6,
108 |         VP,
109 |         [TV, NP] // eg. (chased) (the man with a stick)
110 |     )
111 |     .addNewRule(1.0, Det, [a])
112 |     .addNewRule(1.0, Det, [the])
113 |     .addNewRule(1.0, N, [man])
114 |     .addNewRule(1.0, N, [stick])
115 |     .addNewRule(1.0, TV, [transitiveVerb])
116 |     .addNewRule(1.0, Mod, [with_, NP]) // eg. with a stick
117 |     .build();
118 | 
119 | const tokens = ["The", "man", "chased", "the", "man", "with", "a", "stick"];
120 | const viterbi = getViterbiParse(
121 |     S,
122 |     grammar,
123 |     tokens
124 | ); // : ParseTreeWithScore<string>
125 | 
126 | console.log(viterbi.probability); // 0.6
127 | 
128 | /*
129 | 0.6
130 | └─ S
131 |    ├─ NP
132 |    │  ├─ Det
133 |    │  │  └─ The
134 |    │  └─ N
135 |    │     └─ man
136 |    └─ VP
137 |       ├─ TV
138 |       │  └─ chased
139 |       └─ NP
140 |          ├─ Det
141 |          │  └─ the
142 |          ├─ N
143 |          │  └─ man
144 |          └─ Mod
145 |             ├─ with
146 |             └─ NP
147 |                ├─ Det
148 |                │  └─ a
149 |                └─ N
150 |                   └─ stick
151 | */
152 | function printTree(tree) {
153 |   function makeTree(o){if(o.children && o.children.length > 0){const obj = {};
154 |         for(var i=0;i<o.children.length;i++){
155 |             const name = o.children[i].token?o.children[i].token:o.children[i].category;
156 |             obj[name] = makeTree(o.children[i]);
157 |         }
158 |         return obj;
159 |     }else {if(o.token) {return o.token;}
160 |     else {return o.category;}}
161 |   }
162 |   console.log(treeify.asTree(makeTree(tree)));
163 | }
164 | 
165 | printTree(viterbi.parseTree);
166 | 
167 | ````
168 | 
169 | You may pass a function to the parser with an addition probability multiplier for parsed tokens for additional logic that is hard to capture in a grammar. It is also possible to define `predict`, `scan` and `complete` callbacks, but not currently implemented. (Pull requests welcome!)
170 | 
171 | ## Some notes on implementation
172 | 
173 | Written in TypeScript, published as a [commonjs module on NPM](https://www.npmjs.com/package/probabilistic-earley-parser) (ES6; use `--harmony_collections` flag if your Node version is < 6) and a [single-file minified UMD module on Github](https://github.com/digitalheir/probabilistic-earley-parser-javascript/releases) in vulgar ES5.
174 | 
175 | This is an implementation of a probabilistic Earley parsing algorithm, which can parse any Probabilistic Context Free Grammar (PCFG) (also
176 | known as Stochastic Context Free Grammar (SCFG)),
177 | or equivalently any language described in Backus-Naur Form (BNF). In these grammars, 
178 | rewrite rules may be non-deterministic and have a probability attached to them.
179 | 
180 | The probability of a parse is defined as the product of the probalities all the applied rules. Usually,
181 | we define probability as a number between 0 and 1 inclusive, and use common algebraic notions of addition and
182 | multiplication.
183 | 
184 | This code makes it possible to use *any* [semiring](https://en.wikipedia.org/wiki/Semiring) for computing
185 | scores. My use for this is to avoid arithmetic underflow: imagine a computation like 0.1 * 0.1 * ... * 0.1.
186 | At some point, floating point arithmetic will be unable to represent a number so small. To counter, we use the Log
187 | semiring which holds the minus log of the probability. So that maps the numbers 0 and 1 to the numbers
188 | between infinity and zero, skewed towards lower probabilities:
189 | 
190 | #### Graph plot of f(x) = -log(x)
191 | 
192 | ![Graph for f(x) = -log x](https://leibniz.cloudant.com/assets/_design/ddoc/graph%20for%20-log%20x.PNG)
193 | 
194 | 
195 | ### Runtime complexity
196 | The Earley algorithm has nice complexity properties. In particular, it can
197 | parse:
198 | 
199 | * any CFG in O(n³), 
200 | * unambiguous CFGs in O(n²)
201 | * left-recursive unambiguous grammars in O(n)
202 | 
203 | Note that this implementation does not apply innovations such as [Joop Leo's improvement](http://www.sciencedirect.com/science/article/pii/030439759190180A) to run linearly on on right-recursive grammars as well. It might be complicated to implement this: making the parser stochastic is not as easy for Earley as it is for CYK.
204 | 
205 | For a faster parser that work on non-probabilistic grammars, look into [nearley](nearley.js.org).
206 | 
207 | ### Limitations
208 | * I have not provisioned for ε-rules (rules with an empty right hand side)
209 | * Rule probability estimation may be performed using the inside-outside algorithm, but is not currently implemented
210 | * Higher level concepts such as wildcards, * and + are not implemented
211 | * Viterbi parsing (querying the most likely parse tree) only returns one single parse. In the case of an ambiguous sentence in which multiple dervation have the highest probability, the returned parse is not guaranteed the left-most parse (I think).
212 | 
213 | ## License
214 | This software is licensed under a permissive [MIT license](https://opensource.org/licenses/MIT).
215 | 
216 | ## References
217 | [Stolcke, Andreas. "An efficient probabilistic context-free parsing algorithm that computes prefix probabilities." *Computational linguistics* 21.2 (1995): 165-201.
218 | APA](http://www.aclweb.org/anthology/J95-2002)
219 | 
220 | 
221 | ## Contact
222 | Inquiries go to maarten.trompper@gmail.com
223 | 


--------------------------------------------------------------------------------
/index.ts:
--------------------------------------------------------------------------------
1 | export * from './dist/earley/parser';
2 | export * from './dist/earley/parsetree';
3 | export * from './dist/earley/state/viterbi-score';
4 | export * from './dist/grammar/grammar';
5 | export * from './dist/grammar/category';
6 | export * from './dist/grammar/rule';


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "probabilistic-earley-parser",
 3 |   "version": "0.9.5",
 4 |   "description": "A parser for parsing Probabilistic Context Free Grammars",
 5 |   "main": "dist/index.js",
 6 |   "author": "Maarten Trompper",
 7 |   "license": "MIT",
 8 |   "repository": "git@github.com:digitalheir/probabilistic-earley-parser-javascript.git",
 9 |   "scripts": {
10 |     "build": "npm run build:clean && npm run build:npm && npm run build:cp && npm run build:min",
11 |     "build:clean": "rimraf dist",
12 |     "build:min": "webpack -p",
13 |     "build:npm": "tsc",
14 |     "build:cp": "copyfiles .npmignore README.md package.json dist/",
15 |     "test": "mocha  --require ts-node/register test/**/*.spec.ts",
16 |     "test:chart": "mocha  --require ts-node/register test/**/chart.spec.ts",
17 |     "test:grammar": "mocha  --require ts-node/register test/**/grammar.spec.ts",
18 |     "test:earley": "mocha  --require ts-node/register test/**/earley.spec.ts"
19 |   },
20 |   "dependencies": {
21 |     "semiring": "^1.3.0"
22 |   },
23 |   "devDependencies": {
24 |     "@types/chai": "^4.3.0",
25 |     "@types/mocha": "^9.0.0",
26 |     "babel-core": "^6.26.3",
27 |     "babel-eslint": "^10.1.0",
28 |     "babel-loader": "^8.2.3",
29 |     "babel-plugin-transform-class-properties": "^6.24.1",
30 |     "babel-plugin-transform-runtime": "^6.23.0",
31 |     "babel-preset-es2015": "^6.24.1",
32 |     "babel-preset-es2016": "^6.24.1",
33 |     "babel-register": "^6.26.0",
34 |     "chai": "^4.3.4",
35 |     "convert-source-map": "^1.8.0",
36 |     "copyfiles": "^2.4.1",
37 |     "mocha": "^9.1.3",
38 |     "rimraf": "^3.0.2",
39 |     "ts-babel-node": "^1.1.1",
40 |     "ts-loader": "^9.2.6",
41 |     "ts-node": "^10.4.0",
42 |     "typescript": "^4.5.3",
43 |     "webpack": "^5.65.0",
44 |     "webpack-cli": "^4.9.1",
45 |     "yargs": "^17.3.0"
46 |   }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/earley/chart/addable-expressions-container.ts:
--------------------------------------------------------------------------------
 1 | import { AtomicValue, Semiring } from "semiring";
 2 | import { Expression } from "semiring";
 3 | import { State } from "./state";
 4 | import { Rule } from "../../grammar/rule";
 5 | 
 6 | import { StateToObjectMap } from "./state-to-object-map";
 7 | import { DeferredValue } from "../expression/value";
 8 | 
 9 | /**
10 |  * Contains references to deferred computations. Only supports addition. Used in completion stage.
11 |  */
12 | export class DeferredStateScoreComputations<SemiringType, TokenType> {
13 |     readonly semiring: Semiring<Expression<SemiringType>>;
14 | 
15 |     private states: StateToObjectMap<TokenType, DeferredValue<SemiringType>>;
16 |     private readonly ZERO: Expression<SemiringType>;
17 | 
18 |     constructor(semiring: Semiring<Expression<SemiringType>>) {
19 |         this.states = new StateToObjectMap<TokenType, DeferredValue<SemiringType>>();
20 |         this.semiring = semiring;
21 |         this.ZERO = new AtomicValue<SemiringType>(this.semiring.additiveIdentity.resolve());
22 |     }
23 | 
24 | 
25 |     // getExpression(rule: Rule<TokenType>, index: number, ruleStart: number, dot: number): Expression<SemiringType> {
26 |     //     return this.states.get(rule, index, ruleStart, dot).expression;
27 |     // }
28 | 
29 |     getOrCreateByState(state: State<SemiringType, TokenType>,
30 |                        defaultValue: Expression<SemiringType>): DeferredValue<SemiringType> {
31 |         if (this.states.hasByState(state)) {
32 |             return this.states.getByState(state);
33 |         } else {
34 |             const deferredValue = new DeferredValue(defaultValue);
35 |             this.states.putByState(state, deferredValue);
36 |             return deferredValue;
37 |         }
38 |     }
39 | 
40 |     getOrCreate(rule: Rule<TokenType>,
41 |                 index: number,
42 |                 ruleStart: number,
43 |                 dotPosition: number,
44 |                 defaultValue: Expression<SemiringType>): DeferredValue<SemiringType> {
45 |         if (this.states.has(rule, index, ruleStart, dotPosition)) {
46 |             return this.states.get(rule, index, ruleStart, dotPosition);
47 |         } else {
48 |             const deferredValue = new DeferredValue(defaultValue);
49 |             this.states.put(rule, index, ruleStart, dotPosition, deferredValue);
50 |             return deferredValue;
51 |         }
52 |     }
53 | 
54 |     get(rule: Rule<TokenType>,
55 |         index: number,
56 |         ruleStart: number,
57 |         dotPosition: number): Expression<SemiringType> {
58 |         if (this.states.has(rule, index, ruleStart, dotPosition)) {
59 |             return this.states.get(rule, index, ruleStart, dotPosition).expression;
60 |         } else {
61 |             return undefined;
62 |         }
63 |     }
64 | 
65 | 
66 |     plus(rule: Rule<TokenType>,
67 |          index: number,
68 |          ruleStart: number,
69 |          dotPosition: number,
70 |          addValue: Expression<SemiringType>): void {
71 |         const current: DeferredValue<SemiringType> = this.getOrCreate(
72 |             rule, index, ruleStart, dotPosition,
73 |             this.ZERO
74 |         );
75 |         current.expression = this.semiring.plus(addValue, current.expression);
76 |         this.states.put(rule, index, ruleStart, dotPosition, current);
77 |     }
78 | 
79 |     forEach(f: (index: number, ruleStart: number, dot: number, rule: Rule<TokenType>, score: Expression<SemiringType>) => any) {
80 |         this.states.forEach((i, r, d, rr, v) => f(i, r, d, rr, v.expression));
81 |     }
82 | }
83 | 


--------------------------------------------------------------------------------
/src/earley/chart/chart.ts:
--------------------------------------------------------------------------------
  1 | //noinspection ES6UnusedImports
  2 | import { StateIndex } from "./state-index";
  3 | import { Grammar } from "../../grammar/grammar";
  4 | import { State, isCompleted, isActive, getActiveCategory } from "./state";
  5 | import { NonTerminal, Terminal, isNonTerminal } from "../../grammar/category";
  6 | import { Semiring } from "semiring";
  7 | import { getOrCreateSet, getOrCreateMap } from "../../util";
  8 | import { isUnitProduction, Rule, invalidDotPosition } from "../../grammar/rule";
  9 | import { ViterbiScore } from "./viterbi-score";
 10 | import { StateToObjectMap } from "./state-to-object-map";
 11 | 
 12 | export class Chart<T, S> {
 13 |     readonly grammar: Grammar<T, S>;
 14 | 
 15 |     private states = new StateIndex<S, T>();
 16 |     private byIndex = new Map<number, Set<State<S, T>>>();
 17 | 
 18 |     /**
 19 |      * The forward probability <code>α_i</code> of a chart is
 20 |      * the sum of the probabilities of
 21 |      * all constrained paths of length i that end in that chart, do all
 22 |      * paths from start to position i. So this includes multiple
 23 |      * instances of the same history, which may happen because of recursion.
 24 |      */
 25 |     private forwardScores = new StateToObjectMap<T, S>();
 26 | 
 27 |     /**
 28 |      * The inner probability <code>γ_{i}</code> of a chart
 29 |      * is the sum of the probabilities of all
 30 |      * paths of length (i - k) that start at position k (the rule's start position),
 31 |      * and end at the current chart and generate the input the input symbols up to k.
 32 |      * Note that this is conditional on the chart happening at position k with
 33 |      * a certain non-terminal X
 34 |      */
 35 |     private innerScores = new StateToObjectMap<T, S>();
 36 |     private viterbiScores = new StateToObjectMap<T, ViterbiScore<S, T>>();
 37 | 
 38 |     completedStates = new Map<number, Set<State<S, T>>>();
 39 |     completedStatesFor = new Map<number, Map<NonTerminal, Set<State<S, T>>>>();
 40 |     completedStatesThatAreNotUnitProductions = new Map<number, Set<State<S, T>>>();
 41 |     statesActiveOnNonTerminals = new Map<number, Set<State<S, T>>>();
 42 | 
 43 |     nonTerminalActiveAtIWithNonZeroUnitStarToY = new Map<number, Map<NonTerminal, Set<State<S, T>>>>();
 44 |     statesActiveOnTerminals = new Map<number, Map<Terminal<T>, Set<State<S, T>>>>();
 45 |     statesActiveOnNonTerminal = new Map<NonTerminal, Map<number, Set<State<S, T>>>>();
 46 |     private EMPTY_SET: Set<State<S, T>> = new Set<State<S, T>>();
 47 | 
 48 | 
 49 |     constructor(grammar: Grammar<T, S>) {
 50 |         this.grammar = grammar;
 51 |     }
 52 | 
 53 | // getCompletedStates(int i, NonTerminal s):Set<State<SemiringType, T>> {
 54 | //     Multimap<NonTerminal, State> m = this.completedStatesFor.get(i);
 55 | //     if (m != null && m.containsKey(s)) return m.get(s);
 56 | //     return Collections.emptySet();
 57 | // }
 58 | //
 59 | // public Set<State> getCompletedStates(int index) {
 60 | //     return getCompletedStates(index, true);
 61 | // }
 62 | //
 63 | // public Set<State> getCompletedStatesThatAreNotUnitProductions(int index) {
 64 | //     return getCompletedStates(index, false);
 65 | // }
 66 | //
 67 | // public Set<State> getCompletedStates(int index, boolean allowUnitProductions) {
 68 | //     if (allowUnitProductions) {
 69 | //         if (!completedStates.containsKey(index))
 70 | //             completedStates.put(index, new HashSet<>());
 71 | //         return completedStates.get(index);
 72 | //     } else {
 73 | //         if (!completedStatesThatAreNotUnitProductions.containsKey(index))
 74 | //             completedStatesThatAreNotUnitProductions.put(index, new HashSet<>());
 75 | //         return completedStatesThatAreNotUnitProductions.get(index);
 76 | //     }
 77 | // }
 78 | //
 79 | 
 80 |     getStatesActiveOnNonTerminalWithNonZeroUnitStarScoreToY(index: number, Y: NonTerminal): Set<State<S, T>> {
 81 |         return getOrCreateSet(getOrCreateMap(this.nonTerminalActiveAtIWithNonZeroUnitStarToY, index), Y);
 82 |     }
 83 | 
 84 |     getStatesActiveOnNonTerminal(y: NonTerminal, position: number, beforeOrOnPosition: number): Set<State<S, T>> {
 85 |         if (position <= beforeOrOnPosition)
 86 |             return getOrCreateSet(getOrCreateMap(this.statesActiveOnNonTerminal, y), position);
 87 |         else
 88 |             throw new Error("Querying position after what we're on?");
 89 |     }
 90 | 
 91 |     /**
 92 |      * Default zero
 93 |      *
 94 |      * @param s chart
 95 |      * @return forward score so far
 96 |      */
 97 |     public getForwardScore(s: State<S, T>): S {
 98 |         return this.forwardScores.getByStateOrDefault(s, this.grammar.probabilityMapping.ZERO);
 99 |     }
100 | 
101 | 
102 |     addForwardScore(state: State<S, T>, increment: S, semiring: Semiring<S>): S {
103 |         const fw = semiring.plus(this.getForwardScore(state)/*default zero*/, increment);
104 |         this.setForwardScore(
105 |             state,
106 |             fw
107 |         );
108 |         return fw;
109 |     }
110 | 
111 |     setForwardScore(s: State<S, T>, probability: S) {
112 |         return this.forwardScores.putByState(s, probability);
113 |     }
114 | 
115 |     //noinspection JSUnusedLocalSymbols
116 |     private hasForwardScore(s: State<S, T>): boolean {
117 |         return this.forwardScores.hasByState(s);
118 |     }
119 | 
120 |     public getState(rule: Rule<T>,
121 |                     positionInInput: number,
122 |                     ruleStartPosition: number,
123 |                     ruleDotPosition: number): State<S, T> {
124 |         return this.states.getState(rule, positionInInput, ruleStartPosition, ruleDotPosition);
125 |     }
126 | 
127 |     /**
128 |      * Adds chart if it does not exist yet
129 |      *
130 |      * @param positionInInput     State position
131 |      * @param ruleStartPosition    Rule start position
132 |      * @param ruleDotPosition  Rule dot position
133 |      * @param rule         State rule
134 |      * @param scannedToken The token that was scanned to create this chart
135 |      * @return State specified by parameter. May or may not be in the chart table. If not, it is added.
136 |      */
137 |     public getOrCreate(positionInInput: number,
138 |                        ruleStartPosition: number,
139 |                        ruleDotPosition: number,
140 |                        rule: Rule<T>,
141 |                        scannedToken?: T): State<S, T> {
142 |         if (this.states.has(rule, positionInInput, ruleStartPosition, ruleDotPosition)) {
143 |             return this.states.getState(rule, positionInInput, ruleStartPosition, ruleDotPosition);
144 |         } else {
145 |             // Add chart if it does not exist yet
146 |             const scannedCategory: Terminal<T> = scannedToken
147 |                 ? <Terminal<T>>rule.right[ruleDotPosition - 1]
148 |                 : undefined;
149 |             const state: State<S, T> = {
150 |                 rule,
151 |                 position: positionInInput,
152 |                 ruleStartPosition,
153 |                 ruleDotPosition,
154 |                 scannedToken: scannedToken,
155 |                 scannedCategory
156 |             };
157 |             this.addState(state);
158 |             return state;
159 |         }
160 |     }
161 | 
162 |     hasState(state: State<S, T>): boolean {
163 |         return this.states.has(state.rule, state.position, state.ruleStartPosition, state.ruleDotPosition);
164 |     }
165 | 
166 |     has(rule: Rule<T>, index: number, ruleStart: number, ruleDot: number): boolean {
167 |         return this.states.has(rule, index, ruleStart, ruleDot);
168 |     }
169 | 
170 |     addState(state: State<S, T>): void {
171 |         if (state.ruleDotPosition < 0 || state.ruleDotPosition > state.rule.right.length)
172 |             invalidDotPosition(state.ruleDotPosition, state);
173 | 
174 |         this.states.addState(state);
175 | 
176 |         const position = state.position;
177 | 
178 |         getOrCreateSet(this.byIndex, position).add(state);
179 | 
180 |         if (isCompleted(state)) {
181 |             getOrCreateSet(this.completedStates, position).add(state);
182 |             if (!isUnitProduction(state.rule))
183 |                 getOrCreateSet(this.completedStatesThatAreNotUnitProductions, position).add(state);
184 | 
185 |             getOrCreateSet(getOrCreateMap(this.completedStatesFor,
186 |                 state.position), state.rule.left)
187 |                 .add(state);
188 |         }
189 |         if (isActive(state)) {
190 |             const activeCategory = getActiveCategory(state);
191 |             if (isNonTerminal(activeCategory)) {
192 |                 getOrCreateSet(getOrCreateMap(this.statesActiveOnNonTerminal,
193 |                     activeCategory), state.position)
194 |                     .add(state);
195 |                 getOrCreateSet(this.statesActiveOnNonTerminals,
196 |                     state.position)
197 |                     .add(state);
198 | 
199 |                 this.grammar.unitStarScores
200 |                     .getNonZeroScoresToNonTerminals(activeCategory)
201 |                     .forEach((FromNonTerminal: NonTerminal) => {
202 |                         getOrCreateSet(getOrCreateMap(
203 |                             this.nonTerminalActiveAtIWithNonZeroUnitStarToY,
204 |                             position), FromNonTerminal).add(state);
205 |                     });
206 |             } else {
207 |                 // activeCategory MUST be terminal
208 |                 getOrCreateSet(getOrCreateMap(this.statesActiveOnTerminals, position), activeCategory).add(state);
209 |             }
210 |         }
211 |     }
212 | 
213 |     setInnerScore(s: State<S, T>, probability: S) {
214 |         this.innerScores.putByState(s, probability);
215 |     }
216 | 
217 |     /**
218 |      * @param v viterbi score
219 |      */
220 |     setViterbiScore(v: ViterbiScore<S, T>) {
221 |         this.viterbiScores.putByState(v.resultingState, v);
222 |     }
223 | 
224 |     getViterbiScore(s: State<S, T>): ViterbiScore<S, T> {
225 |         /*if (!this.hasViterbiScore(s))
226 |          throw new Error(
227 |          "Viterbi not available for chart ("
228 |          + s.position + ", " + s.ruleStartPosition + ", " + s.ruleDotPosition
229 |          + ") " + s.rule.left + " -> " + s.rule.right.map(f => f.toString()));
230 |          else */
231 |         return this.viterbiScores.getByState(s);
232 |     }
233 | 
234 |     hasViterbiScore(s: State<S, T>): boolean {
235 |         return this.viterbiScores.hasByState(s);
236 |     }
237 | 
238 |     /**
239 |      * Default zero
240 |      *
241 |      * @param s chart
242 |      * @return inner score so far
243 |      */
244 |     public getInnerScore(s: State<S, T>): S {
245 |         return this.innerScores.getByStateOrDefault(s, this.grammar.probabilityMapping.ZERO);
246 |     }
247 | 
248 |     public getCompletedStatesThatAreNotUnitProductions(position: number) {
249 |         return this.completedStatesThatAreNotUnitProductions.get(position);
250 |     }
251 | 
252 |     public getCompletedStates(position: number) {
253 |         if (this.completedStates.has(position))
254 |             return this.completedStates.get(position);
255 |         else return this.EMPTY_SET;
256 |     }
257 | 
258 |     public getStatesActiveOnNonTerminals(index: number) {
259 |         return this.statesActiveOnNonTerminals.get(index);
260 |     }
261 | 
262 |     public getStatesActiveOnTerminals(index: number, terminal: Terminal<T>) {
263 |         if (this.statesActiveOnTerminals.has(index))
264 |             return this.statesActiveOnTerminals.get(index).get(terminal);
265 |         else
266 |             return undefined;
267 |     }
268 | 
269 |     // public hasInnerScore(s: State<S, T>): boolean {
270 |     //     let ruleMap = getOrCreateMap(this.innerScores, s.rule);
271 |     //     let posMap = getOrCreateMap(ruleMap, s.position);
272 |     //     let dotMAp = getOrCreateMap(posMap, s.ruleStartPosition);
273 |     //     return dotMAp.has(s.ruleDotPosition);
274 |     // }
275 | 
276 | // public Set<State> getStatesByIndex(int index) {
277 | //     return byIndex.get(index);
278 | // }
279 | //
280 | //
281 | // public void plus(State chart) {
282 | //     Rule rule = chart.getRule();
283 | //     int ruleStart = chart.getRuleStartPosition();
284 | //     int index = chart.getPosition();
285 | //
286 | //     TIntObjectMap<TIntObjectMap<State>> forRuleStart = states.getRuleStartToDotToState(rule, index);
287 | //     if (!forRuleStart.containsKey(ruleStart)) forRuleStart.put(ruleStart, new TIntObjectHashMap<>(50));
288 | //     TIntObjectMap<State> dotToState = forRuleStart.get(ruleStart);
289 | //
290 | //     addState(dotToState, chart);
291 | // }
292 | //
293 | // public synchronized State getSynchronized(int index, int ruleStart, int ruleDot, Rule rule) {
294 | //     return states.getState(rule, index, ruleStart, ruleDot);
295 | // }
296 | //
297 | // public State get(int index, int ruleStart, int ruleDot, Rule rule) {
298 | //     return states.getState(rule, index, ruleStart, ruleDot);
299 | // }
300 | //
301 | // public countStates():number {
302 | //         return this.states.count();
303 | // }
304 | 
305 | }


--------------------------------------------------------------------------------
/src/earley/chart/state-index.ts:
--------------------------------------------------------------------------------
 1 | import { Rule } from "../../grammar/rule";
 2 | import { State } from "./state";
 3 | import { StateToObjectMap } from "./state-to-object-map";
 4 | 
 5 | export class StateIndex<SemiringType, TokenType> {
 6 |     private states: StateToObjectMap<TokenType, State<SemiringType, TokenType>>;
 7 | 
 8 |     constructor() {
 9 |         this.states = new StateToObjectMap<TokenType, State<SemiringType, TokenType>>();
10 |     }
11 | 
12 |     public addState(state: State<SemiringType, TokenType>) {
13 |         if (this.states.hasByState(state))
14 |             throw new Error("State set already contained chart. This is a bug.");
15 |         else
16 |             this.states.putByState(state, state);
17 |     }
18 | 
19 |     public getState(rule: Rule<TokenType>, index: number, ruleStart: number, ruleDot: number): State<SemiringType, TokenType> {
20 |         if (!this.states.has(rule, index, ruleStart, ruleDot))
21 |             throw new Error("State did not exist. This is a bug.");
22 |         else
23 |             return this.states.get(rule, index, ruleStart, ruleDot);
24 |     }
25 | 
26 |     public has(rule: Rule<TokenType>, index: number, ruleStart: number, ruleDot: number): boolean {
27 |         return this.states.has(rule, index, ruleStart, ruleDot);
28 |     }
29 | 
30 |     // /**
31 |     //  * Runs in O(1)
32 |     //  * @returns {number}
33 |     //  */
34 |     // public size(): number {
35 |     //     this.states.size();
36 |     // }
37 | }


--------------------------------------------------------------------------------
/src/earley/chart/state-to-object-map.ts:
--------------------------------------------------------------------------------
 1 | import { Rule } from "../../grammar/rule";
 2 | import { getOrCreateMap } from "../../util";
 3 | import { State } from "./state";
 4 | 
 5 | export class StateToObjectMap<T, O> {
 6 |     private map: Map<Rule<T>,
 7 |         /* index */Map<number,
 8 |         /*rule start*/Map<number,
 9 |         /*dot position*/Map<number,
10 |         O>>>> = new Map<Rule<T>,
11 |         /*index*/Map<number,
12 |         /*rule start*/Map<number,
13 |         /*dot position*/Map<number, O>>>>();
14 | 
15 |     // private _size: number = 0;
16 | 
17 |     constructor() {
18 |     }
19 | 
20 |     put(rule: Rule<T>, position: number, ruleStartPosition: number, ruleDotPosition: number, value: O) {
21 |         getOrCreateMap(getOrCreateMap(getOrCreateMap(this.map, rule), position), ruleStartPosition).set(ruleDotPosition, value);
22 |     }
23 | 
24 |     has(rule: Rule<T>, position: number, ruleStartPosition: number, ruleDotPosition: number): boolean {
25 |         return getOrCreateMap(getOrCreateMap(getOrCreateMap(this.map, rule), position), ruleStartPosition).has(ruleDotPosition);
26 |     }
27 | 
28 |     get(rule: Rule<T>, position: number, ruleStartPosition: number, ruleDotPosition: number): O {
29 |         return getOrCreateMap(getOrCreateMap(getOrCreateMap(this.map, rule), position), ruleStartPosition).get(ruleDotPosition);
30 |     }
31 | 
32 |     putByState<S>(state: State<S, T>, value: O): void {
33 |         this.put(state.rule, state.position, state.ruleStartPosition, state.ruleDotPosition, value);
34 |     }
35 | 
36 |     getOrDefault<S>(rule: Rule<T>, position: number, ruleStartPosition: number, ruleDotPosition: number, _default: O): O {
37 |         if (this.has(rule, position, ruleStartPosition, ruleDotPosition))
38 |             return this.get(rule, position, ruleStartPosition, ruleDotPosition);
39 |         else
40 |             return _default;
41 |     }
42 | 
43 |     getByStateOrDefault<S>(state: State<S, T>, _default: O): O {
44 |         return this.getOrDefault(state.rule, state.position, state.ruleStartPosition, state.ruleDotPosition, _default);
45 |     }
46 | 
47 |     getByState<S>(state: State<S, T>): O {
48 |         return this.get(state.rule, state.position, state.ruleStartPosition, state.ruleDotPosition);
49 |     }
50 | 
51 |     hasByState<S>(state: State<S, T>): boolean {
52 |         return this.has(state.rule, state.position, state.ruleStartPosition, state.ruleDotPosition);
53 |     }
54 | 
55 |     forEach(f: (index: number, ruleStart: number, dot: number, rule: Rule<T>, score: O) => any) {
56 |         this.map.forEach(
57 |             (val, rule) => {
58 |                 val.forEach(
59 |                     (val2, position) => {
60 |                         val2.forEach(
61 |                             (val3, start) => {
62 |                                 val3.forEach(
63 |                                     (object: O, dot: number) => f(position, start, dot, rule, object)
64 |                                 );
65 |                             });
66 |                     });
67 |             });
68 |     }
69 | 
70 |     // size(): number {
71 |     //     return this._size;
72 |     // }
73 | }


--------------------------------------------------------------------------------
/src/earley/chart/state.ts:
--------------------------------------------------------------------------------
 1 | import { Rule, getActiveCategory as getActiveCategoryFromRule, invalidDotPosition } from "../../grammar/rule";
 2 | import { Category } from "../../grammar/category";
 3 | 
 4 | /**
 5 |  * A chart chart, describing a pending derivation.
 6 |  * <p/>
 7 |  * A chart is of the form <code>i: X<sub>k</sub> → λ·μ</code>
 8 |  * where X is a nonterminal of the grammar, λ and μ are strings of nonterminals and/or
 9 |  * terminals, and i and k are indices into the input string. States are derived from productions
10 |  * in the grammar. The above chart is derived from a corresponding production
11 |  * X → λμ
12 |  * with the following semantics:
13 |  * <ul>
14 |  * <li>The current position in the input is <code>i</code>, i.e., <code>x<sub>0</sub>...x<sub>i-1</sub></code>
15 |  * have been processed
16 |  * so far. The states describing the parser chart at position <code>i</code> are collectively
17 |  * called chart set <code>i</code>. Note that there is one more chart set than input
18 |  * symbols: set <code>0</code> describes the parser chart before any input is processed,
19 |  * while set <code>|x|</code> contains the states after all input symbols have been
20 |  * processed.</li>
21 |  * <li>Nonterminal <code>X</code> was expanded starting at position <code>k</code> in
22 |  * the input, i.e., <code>X</code>
23 |  * generates some substring starting at position <code>k</code>.</li>
24 |  * <li>The expansion of X proceeded using the production <code>X → λμ</code>, and has
25 |  * expanded the right-hand side (RHS) <code>λμ</code> up to the position indicated by
26 |  * the dot. The dot thus refers to the current position <code>i</code>.</li>
27 |  * </ul>
28 |  *
29 |  * A chart with the dot to the right of the entire RHS is called a completed chart, since
30 |  * it indicates that the left-hand side (LHS) non-terminal has been fully expanded.
31 |  *
32 |  */
33 | export interface State<SemiringType, TokenType> {
34 |     rule: Rule<TokenType>;
35 |     ruleStartPosition: number;
36 |     ruleDotPosition: number;
37 |     position: number;
38 |     scannedToken?: TokenType;
39 |     scannedCategory?: Category<TokenType>;
40 | }
41 | 
42 | //noinspection JSUnusedGlobalSymbols
43 | export interface StateWithScore<SemiringType, TokenType> {
44 |     forwardScore: SemiringType;
45 |     innerScore: SemiringType;
46 |     state: State<SemiringType, TokenType>;
47 |     origin: State<SemiringType, TokenType>;
48 | }
49 | 
50 | 
51 | export function isCompleted<T, E>(state: State<T, E>): boolean {
52 |     return isPassive(state.rule, state.ruleDotPosition);
53 | }
54 | 
55 | export function isActive<T, E>(state: State<T, E>): boolean {
56 |     return !isCompleted(state);
57 | }
58 | 
59 | /**
60 |  * @return Active category for this chart. May be null.
61 |  */
62 | export function getActiveCategory<Semi, Token>(state: State<Semi, Token>): Category<Token> {
63 |     return getActiveCategoryFromRule(state.rule, state.ruleDotPosition);
64 | }
65 | 
66 | /**
67 |  * Tests whether this is a completed edge or not. An edge is completed when
68 |  * its dotted rule contains no
69 |  * {@link #getActiveCategory(int) active category}, or equivalently the dot is at position == |RHS|.
70 |  * Runs in O(1)
71 |  *
72 |  * @return <code>true</code> iff the active category of this edge's dotted
73 |  * rule is <code>null</code>.
74 |  */
75 | export function isPassive<T>(rule: Rule<T>, dotPosition: number) {
76 |     if (dotPosition < 0 || dotPosition > rule.right.length)
77 |         invalidDotPosition(dotPosition, rule);
78 |     return dotPosition === rule.right.length;
79 | }
80 | 
81 | /**
82 |  * Return dot position advanced by <code>1</code>, or errors if out of bounds.
83 |  *
84 |  * @throws IndexOutOfBoundsException If the dotted rule's dot position
85 |  *                                   is already at the end of its right side.
86 |  */
87 | export function advanceDot<SemiringType, TokenType>(s: State<SemiringType, TokenType>) {
88 |     const position = s.ruleDotPosition;
89 |     if (position < 0 || position > s.rule.right.length) throw new Error(
90 |         "illegal position: " + position + ", " + s.rule);
91 |     return position + 1;
92 | }


--------------------------------------------------------------------------------
/src/earley/chart/viterbi-score.ts:
--------------------------------------------------------------------------------
  1 | import { State, advanceDot, isCompleted } from "./state";
  2 | import { ProbabilitySemiringMapping } from "../../grammar/grammar";
  3 | import { Rule } from "../../grammar/rule";
  4 | import { Chart } from "./chart";
  5 | import { NonTerminal } from "../../grammar/category";
  6 | 
  7 | /**
  8 |  * Representing a Viterbi score coming from a certain chart,
  9 |  * transition to a result chart computing
 10 |  * using a certain semiring
 11 |  */
 12 | export interface ViterbiScore<S, T> {
 13 |     origin: State<S, T>;
 14 |     resultingState: State<S, T>;
 15 |     innerScore: S;
 16 | }
 17 | 
 18 | 
 19 | /**
 20 |  * For finding the Viterbi path, we can't conflate production recursions (ie can't use the left star corner),
 21 |  * exactly because we need it to find the unique Viterbi path.
 22 |  * Luckily, we can avoid looping over unit productions because it only ever lowers probability
 23 |  * (assuming p = [0,1] and Occam's razor).
 24 |  * ~This method does not guarantee a left most parse.~
 25 |  *
 26 |  * @param stateSets
 27 |  * @param completedState Completed chart to calculate Viterbi score for
 28 |  * @param originPathTo
 29 |  * @param m
 30 |  */
 31 | export function setViterbiScores<S, T>(stateSets: Chart<T, S>,
 32 |                                        completedState: State<S, T>,
 33 |                                        originPathTo: Set<State<S, T>>,
 34 |                                        m: ProbabilitySemiringMapping<S>): void {
 35 |     const sr = m.semiring;
 36 |     let newStates: State<S, T>[] = undefined; // init as null to avoid array creation
 37 |     let newCompletedStates: State<S, T>[] = undefined; // init as null to avoid array creation
 38 | 
 39 |     if (!stateSets.hasViterbiScore(completedState))
 40 |         throw new Error("Expected Viterbi score to be set on completed chart. This is a bug.");
 41 | 
 42 |     const completedViterbi: S = stateSets
 43 |         .getViterbiScore(completedState)
 44 |         .innerScore;
 45 | 
 46 | 
 47 |     // noinspection JSSuspiciousNameCombination
 48 |     const Y: NonTerminal = completedState.rule.left;
 49 | 
 50 |     // Get all states in j <= i, such that <code>j: X<sub>k</sub> →  λ·Yμ</code>
 51 |     const pos: number = completedState.position;
 52 |     stateSets.getStatesActiveOnNonTerminal(
 53 |         Y, completedState.ruleStartPosition, pos
 54 |     ).forEach((stateToAdvance) => {
 55 |         if (stateToAdvance.position > pos || stateToAdvance.position != completedState.ruleStartPosition)
 56 |             throw new Error("Index failed. This is a bug.");
 57 | 
 58 |         const ruleStart: number = stateToAdvance.ruleStartPosition;
 59 |         const nextDot: number = advanceDot(stateToAdvance);
 60 |         const rule: Rule<T> = stateToAdvance.rule;
 61 | 
 62 |         let resultingState = stateSets.getState(rule, pos, ruleStart, nextDot);
 63 |         if (!resultingState) {
 64 |             resultingState = stateSets.getOrCreate(pos, ruleStart, nextDot, rule);
 65 |             if (!newStates) newStates = [];
 66 |             newStates.push(resultingState);
 67 |         }
 68 | 
 69 |         if (originPathTo.has(resultingState))
 70 |             throw new Error("This is a bug: Already went past " + resultingState);
 71 | 
 72 |         const viterbiScore: ViterbiScore<S, T> = stateSets.getViterbiScore(resultingState);
 73 |         const prevViterbi: ViterbiScore<S, T> = stateSets.getViterbiScore(stateToAdvance);
 74 | 
 75 |         const prev: S = !!prevViterbi ? prevViterbi.innerScore : sr.multiplicativeIdentity;
 76 |         const newViterbiScore: ViterbiScore<S, T> = {
 77 |             innerScore: sr.times(completedViterbi, prev),
 78 |             origin: completedState,
 79 |             resultingState
 80 |         };
 81 | 
 82 |         if (!viterbiScore
 83 |             ||
 84 |             m.toProbability(viterbiScore.innerScore) < m.toProbability(newViterbiScore.innerScore)
 85 |         ) {
 86 |             stateSets.setViterbiScore(newViterbiScore);
 87 |             if (isCompleted(resultingState)) {
 88 |                 if (!newCompletedStates) newCompletedStates = [];
 89 |                 newCompletedStates.push(resultingState);
 90 |             }
 91 |         }
 92 | 
 93 |     });
 94 | 
 95 |     // Add new states to chart
 96 |     if (!!newStates)
 97 |         newStates.forEach(a => stateSets.addState(a));
 98 | 
 99 |     // Recurse with new states that are completed
100 |     if (!!newCompletedStates) newCompletedStates.forEach(resultingState => {
101 |         const path: Set<State<S, T>> = new Set<State<S, T>>(originPathTo);
102 |         path.add(resultingState);
103 |         setViterbiScores(stateSets, resultingState, path, m);
104 |     });
105 | }
106 | 
107 | 


--------------------------------------------------------------------------------
/src/earley/complete.ts:
--------------------------------------------------------------------------------
  1 | import { State, getActiveCategory, advanceDot, isPassive, isCompleted } from "./chart/state";
  2 | import { Chart } from "./chart/chart";
  3 | import { Grammar } from "../grammar/grammar";
  4 | import { NonTerminal, Category } from "../grammar/category";
  5 | import { Rule, isUnitProduction } from "../grammar/rule";
  6 | import { Expression } from "semiring";
  7 | import { DeferredStateScoreComputations } from "./chart/addable-expressions-container";
  8 | import { AtomicValue } from "semiring";
  9 | import { DeferredValue } from "./expression/value";
 10 | 
 11 | /**
 12 |  * Completes states exhaustively and makes resolvable expressions for the forward and inner scores.
 13 |  * Note that these expressions can only be resolved to actual values after finishing completion, because they may depend on one another.
 14 |  *
 15 |  * @param position         State position
 16 |  * @param states           Completed states to use for deducing what states to proceed
 17 |  * @param addForwardScores Container / helper for adding to forward score expressions
 18 |  * @param addInnerScores   Container / helper for adding to inner score expressions
 19 |  * @param grammar
 20 |  * @param stateSets
 21 |  */
 22 | function completeNoViterbi<S, T>(position: number,
 23 |                                  states: Set<State<S, T>>,
 24 |                                  addForwardScores: DeferredStateScoreComputations<S, T>,
 25 |                                  addInnerScores: DeferredStateScoreComputations<S, T>,
 26 |                                  grammar: Grammar<T, S>,
 27 |                                  stateSets: Chart<T, S>) {
 28 |     let definitelyNewStates: DeferredStateScoreComputations<S, T>;
 29 | 
 30 |     // For all states
 31 |     //      i: Y<sub>j</sub> → v·    [a",y"]
 32 |     //      j: X<sub>k</suv> → l·Zm  [a',y']
 33 |     //
 34 |     //  such that the R*(Z =*> Y) is nonzero
 35 |     //  and Y → v is not a unit production
 36 |     states.forEach((completedState: State<S, T>) => {
 37 |         const j: number = completedState.ruleStartPosition;
 38 |         //noinspection JSSuspiciousNameCombination
 39 |         const Y: NonTerminal = completedState.rule.left;
 40 |         const probM = grammar.probabilityMapping;
 41 | 
 42 | 
 43 |         const innerScore: S = stateSets.getInnerScore(completedState);
 44 |         // TODO pre-create atom?
 45 |         const unresolvedCompletedInner: DeferredValue<S> = addInnerScores.getOrCreateByState(
 46 |             completedState,
 47 |             new AtomicValue(innerScore)
 48 |         );
 49 | 
 50 | 
 51 |         stateSets.getStatesActiveOnNonTerminalWithNonZeroUnitStarScoreToY(j, Y).forEach((stateToAdvance: State<S, T>) => {
 52 |             if (j !== stateToAdvance.position) throw new Error("Index failed. This is a bug.");
 53 |             // Make i: X_k → lZ·m
 54 |             const innerScore2 = stateSets.getInnerScore(stateToAdvance);
 55 |             // TODO pre-create atom?
 56 |             const prevInner: DeferredValue<S> = addInnerScores.getOrCreateByState(stateToAdvance,
 57 |                 new AtomicValue(innerScore2)
 58 |             );
 59 |             const forwardScore = stateSets.getForwardScore(stateToAdvance);
 60 |             // TODO pre-create atom?
 61 |             const prevForward: DeferredValue<S> = addForwardScores.getOrCreateByState(stateToAdvance,
 62 |                 new AtomicValue(forwardScore)
 63 |             );
 64 | 
 65 |             const Z: Category<T> = getActiveCategory(stateToAdvance);
 66 | 
 67 |             // TODO pre-create atom?
 68 |             const unitStarScore: Expression<S> = new AtomicValue(
 69 |                 probM.fromProbability(
 70 |                     grammar.getUnitStarScore(Z, Y)
 71 |                 )
 72 |             );
 73 |             const sr = grammar.deferrableSemiring;
 74 |             const fw: Expression<S> = sr.times(
 75 |                 unitStarScore,
 76 |                 sr.times(prevForward, unresolvedCompletedInner)
 77 |             );
 78 |             const inner: Expression<S> = sr.times(
 79 |                 unitStarScore,
 80 |                 sr.times(prevInner, unresolvedCompletedInner)
 81 |             );
 82 | 
 83 |             const newStateRule: Rule<T> = stateToAdvance.rule;
 84 |             const newStateDotPosition: number = advanceDot(stateToAdvance);
 85 |             const newStateRuleStart: number = stateToAdvance.ruleStartPosition;
 86 | 
 87 | 
 88 |             addForwardScores.plus(
 89 |                 newStateRule,
 90 |                 position,
 91 |                 newStateRuleStart,
 92 |                 newStateDotPosition,
 93 |                 fw
 94 |             );
 95 | 
 96 | 
 97 |             // If this is a new completed chart that is no unit production,
 98 |             // make a note of it it because we want to recursively call *complete* on these states
 99 |             if (
100 |                 isPassive(newStateRule, newStateDotPosition)
101 |                 && !isUnitProduction(newStateRule)
102 |                 && !stateSets.has(newStateRule, position, newStateRuleStart, newStateDotPosition)) {
103 |                 if (!definitelyNewStates) definitelyNewStates = new DeferredStateScoreComputations<S, T>(sr);
104 |                 definitelyNewStates.plus(
105 |                     newStateRule,
106 |                     position,
107 |                     newStateRuleStart,
108 |                     newStateDotPosition,
109 |                     fw
110 |                 );
111 |             }
112 | 
113 |             addInnerScores.plus(
114 |                 newStateRule,
115 |                 position,
116 |                 newStateRuleStart,
117 |                 newStateDotPosition,
118 |                 inner
119 |             );
120 |         });
121 |     });
122 | 
123 | 
124 |     if (!!definitelyNewStates) {
125 |         const newCompletedStates: Set<State<S, T>> = new Set<State<S, T>>();
126 |         definitelyNewStates.forEach(
127 |             (index: number,
128 |              ruleStart: number,
129 |              dot: number,
130 |              rule: Rule<T>,
131 |              ignored: Expression<S>) => {
132 |                 // const isNew: boolean = !stateSets.has(index, ruleStart, dot, rule);
133 |                 if (stateSets.has(rule, index, ruleStart, dot)) {
134 |                     throw new Error("State wasn't new");
135 |                 }
136 | 
137 |                 const state: State<S, T> = stateSets.getOrCreate(index, ruleStart, dot, rule);
138 |                 if (!isCompleted(state) || isUnitProduction(state.rule))
139 |                     throw new Error("Unexpected chart found in possible new states. This is a bug.");
140 | 
141 |                 newCompletedStates.add(state);
142 |             });
143 |         if (!!newCompletedStates && newCompletedStates.size > 0) {
144 |             completeNoViterbi(position,
145 |                 newCompletedStates,
146 |                 addForwardScores,
147 |                 addInnerScores,
148 |                 grammar, stateSets
149 |             );
150 |         }
151 |     }
152 | }
153 | 
154 | /**
155 |  * Makes completions in the specified chart at the given index.
156 |  *
157 |  * @param i The index to make completions at.
158 |  * @param stateSets
159 |  * @param grammar
160 |  */
161 | export function complete<S, T>(i: number,
162 |                                stateSets: Chart<T, S>,
163 |                                grammar: Grammar<T, S>) {
164 |     const addForwardScores = new DeferredStateScoreComputations(grammar.deferrableSemiring);
165 |     const addInnerScores = new DeferredStateScoreComputations(grammar.deferrableSemiring);
166 | 
167 |     const completeOnStates = stateSets.getCompletedStatesThatAreNotUnitProductions(i);
168 | 
169 |     if (!!completeOnStates) completeNoViterbi(
170 |         i,
171 |         completeOnStates,
172 |         addForwardScores,
173 |         addInnerScores,
174 |         grammar,
175 |         stateSets
176 |     );
177 | 
178 |     // Resolve and set forward score
179 |     addForwardScores.forEach((position, ruleStart, dot, rule, score) => {
180 |         const state: State<S, T> = stateSets.getOrCreate(position, ruleStart, dot, rule);
181 |         // TODO dont getorcreate chart
182 |         stateSets.setForwardScore(state, score.resolve());
183 |     });
184 | 
185 |     // Resolve and set inner score
186 |     addInnerScores.forEach((position, ruleStart, dot, rule, score) => {
187 |         // TODO dont getorcreate chart
188 |         const state: State<S, T> = stateSets.getOrCreate(position, ruleStart, dot, rule);
189 |         stateSets.setInnerScore(state, score.resolve());
190 |     });
191 | }


--------------------------------------------------------------------------------
/src/earley/expression/value.ts:
--------------------------------------------------------------------------------
 1 | import { Expression } from "semiring";
 2 | 
 3 | export class DeferredValue<T> implements Expression<T> {
 4 |     public expression: Expression<T>;
 5 | 
 6 |     constructor(e: Expression<T>) {
 7 |         this.expression = e;
 8 |     }
 9 | 
10 |     resolve() {
11 |         return this.expression.resolve();
12 |     }
13 | }


--------------------------------------------------------------------------------
/src/earley/parser.ts:
--------------------------------------------------------------------------------
  1 | import { Grammar } from "../grammar/grammar";
  2 | import { NonTerminal, Category, isNonTerminal, Terminal, WordWithTypes } from "../grammar/category";
  3 | import { Rule } from "../grammar/rule";
  4 | import { State } from "./chart/state";
  5 | import { setViterbiScores, ViterbiScore } from "./chart/viterbi-score";
  6 | import { Chart } from "./chart/chart";
  7 | import { scan } from "./scan";
  8 | import { predict } from "./predict";
  9 | import { complete } from "./complete";
 10 | import { ParseTree, addRightMost } from "./parsetree";
 11 | 
 12 | export function addState<S, T>(stateSets: Chart<T, S>,
 13 |                                index: number,
 14 |                                ruleStartPosition: number,
 15 |                                ruleDotPosition: number,
 16 |                                rule: Rule<T>,
 17 |                                forward: S,
 18 |                                inner: S): State<S, T> {
 19 |     const state = stateSets.getOrCreate(index, ruleStartPosition, ruleDotPosition, rule);
 20 |     stateSets.setInnerScore(state, inner);
 21 |     stateSets.setForwardScore(state, forward);
 22 | 
 23 |     if (stateSets.hasViterbiScore(state)) {
 24 |         throw new Error("Viterbi score was already set for new chart?!");
 25 |     }
 26 |     // stateSets.setViterbiScore(
 27 |     //     {
 28 |     //         origin: null,
 29 |     //         innerScore:
 30 |     //     }
 31 |     //         State.ViterbiScore(
 32 |     //         grammar.getSemiring().one(), null, chart, grammar.getSemiring()
 33 |     //     )
 34 |     // );
 35 |     return state;
 36 | }
 37 | 
 38 | /**
 39 |  * Performs the backward part of the forward-backward algorithm
 40 |  */
 41 | export function getViterbiParseFromChart<S, T>(state: State<S, T>, chart: Chart<T, S>): ParseTree<T> {
 42 |     switch (state.ruleDotPosition) {
 43 |         case 0:
 44 |             // Prediction chart
 45 |             return {category: state.rule.left, children: []};
 46 |         default:
 47 |             const prefixEnd: Category<T> = state.rule.right[state.ruleDotPosition - 1];
 48 |             if (!isNonTerminal(prefixEnd)) {
 49 |                 // Scanned terminal chart
 50 |                 if (!state.scannedToken)
 51 |                     throw new Error("Expected chart to be a scanned chart. This is a bug.");
 52 | 
 53 |                 // let \'a = \, call
 54 |                 const T: ParseTree<T> = getViterbiParseFromChart(
 55 |                     chart.getOrCreate(
 56 |                         state.position - 1,
 57 |                         state.ruleStartPosition,
 58 |                         state.ruleDotPosition - 1,
 59 |                         state.rule
 60 |                     ),
 61 |                     chart
 62 |                 );
 63 |                 addRightMost(T, {token: state.scannedToken, category: state.scannedCategory, children: []});
 64 |                 return T;
 65 |             } else {
 66 |                 // Completed non-terminal chart
 67 |                 const viterbi: ViterbiScore<S, T> = chart.getViterbiScore(state); // must exist
 68 | 
 69 |                 // Completed chart that led to the current chart
 70 |                 const origin: State<S, T> = viterbi.origin;
 71 | 
 72 |                 // Recurse for predecessor chart (before the completion happened)
 73 |                 const T: ParseTree<T> = getViterbiParseFromChart(
 74 |                     chart.getOrCreate(
 75 |                         origin.ruleStartPosition,
 76 |                         state.ruleStartPosition,
 77 |                         state.ruleDotPosition - 1,
 78 |                         state.rule
 79 |                     )
 80 |                     , chart);
 81 | 
 82 |                 // Recurse for completed chart
 83 |                 const Tprime: ParseTree<T> = getViterbiParseFromChart(origin, chart);
 84 | 
 85 |                 addRightMost(T, Tprime);
 86 |                 return T;
 87 |             }
 88 |     }
 89 | }
 90 | 
 91 | 
 92 | export function parseSentenceIntoChart<S, T>(Start: NonTerminal,
 93 |                                              grammar: Grammar<T, S>,
 94 |                                              tokens: T[],
 95 |                                              scanProbability?: (x: T, t: Terminal<T>[]) => S): [Chart<T, S>, number, State<S, T>] {
 96 |     // ScanProbability scanProbability//TODO
 97 | 
 98 |     const stateSets: Chart<T, S> = new Chart(grammar);
 99 |     // Initial chart
100 |     // const initialState:State<S,T> = undefined;//todo
101 |     // new State(
102 |     //     Rule.create(sr, 1.0, Category.START, S), 0
103 |     // );
104 | 
105 |     // Index words to their applicable terminals
106 |     const wordToTypesMap = new Map<T, Terminal<T>[]>();
107 |     const tokensWithWords: WordWithTypes<T>[] = tokens.map(word => {
108 |         if (wordToTypesMap.has(word))
109 |             return {types: wordToTypesMap.get(word), word};
110 |         else {
111 |             const types: Terminal<T>[] = grammar.terminals.filter((isOfType: Terminal<T>) => isOfType(word));
112 |             wordToTypesMap.set(word, types);
113 |             return {types, word};
114 |         }
115 |     });
116 | 
117 |     const init = addState(
118 |         stateSets, 0, 0, 0,
119 |         {left: "<start>", right: [Start], probability: 1.0},
120 |         grammar.probabilityMapping.ONE,
121 |         grammar.probabilityMapping.ONE
122 |     );
123 | 
124 |     // Cycle through input
125 |     let i = 0;
126 |     tokensWithWords.forEach(
127 |         (token: WordWithTypes<T>) => {
128 |             predict(i, grammar, stateSets);
129 |             scan(i, token, grammar.probabilityMapping.semiring, stateSets, scanProbability);
130 |             complete(i + 1, stateSets, grammar);
131 | 
132 |             const completedStates: State<S, T>[] = [];
133 |             const completedStatez = stateSets.getCompletedStates(i + 1);
134 |             if (!!completedStatez) completedStatez.forEach(s => completedStates.push(s));
135 | 
136 |             completedStates.forEach(s => setViterbiScores(stateSets,
137 |                 s,
138 |                 new Set<State<S, T>>(),
139 |                 grammar.probabilityMapping));
140 |             i++;
141 |         }
142 |     );
143 | 
144 | 
145 |     // Set<State> completed = chart.getCompletedStates(i, Category.START);
146 |     // if (completed.size() > 1) throw new Error("This is a bug");
147 |     return [stateSets, i, init];
148 | }
149 | 
150 | export interface ParseTreeWithScore<T> {
151 |     parseTree: ParseTree<T>;
152 |     probability: number;
153 | }
154 | 
155 | export function getViterbiParse<S, T>(Start: NonTerminal,
156 |                                       grammar: Grammar<T, S>,
157 |                                       tokens: T[],
158 |                                       scanProbability?: (x: T, t: Terminal<T>[]) => S): ParseTreeWithScore<T> {
159 |     const [chart, ignored, init] = parseSentenceIntoChart(Start, grammar, tokens, scanProbability);
160 | 
161 |     if (!chart.has(init.rule, tokens.length,
162 |         0,
163 |         init.rule.right.length)) throw new Error("Could not parse sentence.");
164 | 
165 |     const finalState = chart.getOrCreate(
166 |         tokens.length,
167 |         0,
168 |         init.rule.right.length,
169 |         init.rule
170 |     );
171 | 
172 |     const parseTree: ParseTree<T> = getViterbiParseFromChart(finalState, chart);
173 |     const toProbability = grammar.probabilityMapping.toProbability;
174 |     const finalScore = chart.getViterbiScore(finalState).innerScore;
175 | 
176 |     return {
177 |         parseTree,
178 |         probability: toProbability(finalScore)
179 |     };
180 | }


--------------------------------------------------------------------------------
/src/earley/parsetree.ts:
--------------------------------------------------------------------------------
 1 | import { Category } from "../grammar/category";
 2 | 
 3 | /**
 4 |  * A parse tree that represents the derivation of a string based on the
 5 |  * rules in a {@link Grammar}. Parse trees recursively contain
 6 |  * other parse trees, so they can be iterated through to
 7 |  * find the entire derivation of a category.
 8 |  * <p>
 9 |  * Parse trees are essentially partial views of a Chart from a
10 |  * given {@link State} or {@link Category}. They represent the completed
11 |  * category at a given string index and origin position.
12 |  */
13 | export interface ParseTree<T> {
14 |     category: Category<T>;
15 |     children: ParseTree<T>[];
16 |     token?: T;
17 | }
18 | 
19 | //noinspection JSUnusedGlobalSymbols
20 | /**
21 |  * Creates a new parse tree with the specified category, parent, and
22 |  * child trees.
23 |  *
24 |  * @param category The category of the category of this parse
25 |  *                 tree.
26 |  * @param children The list of children of this parse tree, in their linear
27 |  *                 order.
28 |  */
29 | export function createParseTree<T>(category: Category<T>, children: ParseTree<T>[] = []): ParseTree<T> {
30 |     return {category, children};
31 | }
32 | 
33 | export function addRightMost<T>(addTo: ParseTree<T>, addMe: ParseTree<T>) {
34 |     addTo.children.push(addMe);
35 | }
36 | 
37 | 
38 | // public static class Token<E> extends ParseTree {
39 | //     public final org.leibnizcenter.cfg.token.Token<E> token;
40 | //
41 | //     public Token(org.leibnizcenter.cfg.token.Token<E> scannedToken, Category category) {
42 | //     super(category, null);
43 | //     this.token = scannedToken;
44 | // }
45 | //
46 | // public Token(ScannedTokenState<E> scannedState) {
47 | //     this(scannedState.scannedToken, scannedState.scannedCategory);
48 | // }
49 | //
50 | //
51 | // @Override
52 | // public int hashCode() {
53 | //     return super.hashCode() + token.hashCode();
54 | // }
55 | //
56 | // @Override
57 | // public boolean equals(Object o) {
58 | //     return o instanceof Token && super.equals(o) && token.equals(((Token) o).token);
59 | // }
60 | // }
61 | //
62 | // public static class NonToken extends ParseTree {
63 | //     public NonToken(Category node) {
64 | //     super(node);
65 | // }
66 | //
67 | // public NonToken(Category node, LinkedList<ParseTree> children) {
68 | //     super(node, children);
69 | // }
70 | //
71 | // @Override
72 | // public boolean equals(Object o) {
73 | //     return o instanceof NonToken && super.equals(o);
74 | // }
75 | // }
76 | // }


--------------------------------------------------------------------------------
/src/earley/predict.ts:
--------------------------------------------------------------------------------
 1 | import {Grammar} from "../grammar/grammar";
 2 | import {Chart} from "./chart/chart";
 3 | import {State, getActiveCategory} from "./chart/state";
 4 | import {Category, isNonTerminal, NonTerminal} from "../grammar/category";
 5 | import {Rule} from "../grammar/rule";
 6 | 
 7 | /**
 8 |  * Makes predictions in the specified chart at the given index.
 9 | 
10 |  * For each chart at position i, look at the the nonterminal at the dot position,
11 |  * plus a chart that expands that nonterminal at position i, with the dot position at 0
12 |  *
13 |  * @param index The string index to make predictions at.
14 |  * @param grammar
15 |  * @param stateSets
16 |  */
17 | export function predict<S, T>(index: number,
18 |                               grammar: Grammar<T, S>,
19 |                               stateSets: Chart<T, S>) {
20 |     const changes: any[] = [];
21 |     const statesToPredictOn: Set<State<S, T>> = stateSets.getStatesActiveOnNonTerminals(index);
22 |     if (statesToPredictOn) {
23 |         const newStates = new Set<State<S, T>>();
24 |         const probMap = grammar.probabilityMapping;
25 |         const sr = probMap.semiring;
26 |         const fromProb = probMap.fromProbability;
27 |         // O(|stateset(i)|) = O(|grammar|): For all states <code>i: X<sub>k</sub> → λ·Zμ</code>...
28 |         statesToPredictOn.forEach((statePredecessor: State<S, T>) => {
29 |             const Z: Category<T> = getActiveCategory(statePredecessor);
30 |             const prevForward: S = stateSets.getForwardScore(statePredecessor);
31 | 
32 |             // For all productions Y → v such that R(Z =*L> Y) is nonzero
33 |             grammar.leftStarCorners
34 |                 .getNonZeroScores(Z)
35 |                 .forEach((Y: Category<T>) => {
36 |                     // TODO ? can be more efficient by indexing on Y?
37 |                     if (isNonTerminal(Y) && grammar.ruleMap.has(Y))
38 |                         grammar.ruleMap.get(Y).forEach((Y_to_v: Rule<T>) => {
39 |                             // we predict chart <code>i: Y<sub>i</sub> → ·v</code>
40 |                             // noinspection JSSuspiciousNameCombination
41 |                             const Y: NonTerminal = Y_to_v.left;
42 | 
43 | 
44 |                             // γ' = P(Y → v)
45 |                             const Y_to_vScore: S = fromProb(Y_to_v.probability);
46 | 
47 |                             // α' = α * R(Z =*L> Y) * P(Y → v)
48 |                             const fw: S = sr.times(
49 |                                 prevForward,
50 |                                 sr.times(
51 |                                     fromProb(grammar.getLeftStarScore(Z, Y)),
52 |                                     Y_to_vScore
53 |                                 )
54 |                             );
55 | 
56 |                             let predicted: State<S, T>;
57 | 
58 |                             // We might want to increment the probability of an existing chart
59 |                             const isNew = !stateSets.has(Y_to_v, index, index, 0);
60 |                             predicted = isNew ? {
61 |                                     position: index,
62 |                                     ruleStartPosition: index,
63 |                                     ruleDotPosition: 0,
64 |                                     rule: Y_to_v
65 |                                 } : stateSets.getOrCreate(index, index, 0, Y_to_v);
66 |                             if (isNew) // save for later
67 |                                 newStates.add(predicted);
68 | 
69 |                             const innerScore: S = stateSets.getInnerScore(predicted);
70 |                             if (!(Y_to_vScore === innerScore || probMap.ZERO === innerScore))throw new Error(Y_to_vScore + " != " + innerScore);
71 | 
72 |                             const viterbi = {
73 |                                 origin: statePredecessor,
74 |                                 resultingState: predicted,
75 |                                 innerScore: Y_to_vScore,
76 |                             };
77 | 
78 |                             stateSets.addForwardScore(predicted, fw, sr);
79 |                             stateSets.setInnerScore(predicted, Y_to_vScore);
80 |                             stateSets.setViterbiScore(viterbi);
81 | 
82 | 
83 |                             const change = {
84 |                                 state: predicted,
85 |                                 innerScore: Y_to_vScore,
86 |                                 forwardScore: fw,
87 |                                 viterbiScore: viterbi,
88 |                                 origin: statePredecessor
89 |                             };
90 |                             changes.push(change);
91 |                         });
92 |                 });
93 |         });
94 |         newStates.forEach(ss => stateSets.getOrCreate(ss.position, ss.ruleStartPosition, ss.ruleDotPosition, ss.rule));
95 |     }
96 |     return changes;
97 | }
98 | 


--------------------------------------------------------------------------------
/src/earley/scan.ts:
--------------------------------------------------------------------------------
  1 | import { isNonTerminal, WordWithTypes, Terminal } from "../grammar/category";
  2 | import { Semiring } from "semiring";
  3 | import { Chart } from "./chart/chart";
  4 | import { getActiveCategory, State, advanceDot } from "./chart/state";
  5 | 
  6 | 
  7 | /**
  8 |  * Handles a token scanned from the input string.
  9 |  *
 10 |  * @param tokenPosition   The start index of the scan.
 11 |  * @param word
 12 |  * @param types
 13 |  * @param scanProbability Function that provides the probability of scanning the given token at this position. Might be null for a probability of 1.0.
 14 |  * @param sr
 15 |  * @param stateSets
 16 |  */
 17 | export function scan<S, T>(tokenPosition: number,
 18 |                            {word, types}: WordWithTypes<T>,
 19 |                            sr: Semiring<S>,
 20 |                            stateSets: Chart<T, S>,
 21 |                            scanProbability?: (x: T, t: Terminal<T>[]) => S) {
 22 |     const changes: any[] = [];
 23 | 
 24 |     const scanProb: S = !!scanProbability ? scanProbability(word, types) : undefined;
 25 | 
 26 |     /*
 27 |      * Get all states that are active on a terminal
 28 |      *   O(|stateset(i)|) = O(|grammar|): For all states <code>i: X<sub>k</sub> → λ·tμ</code>, where t is a terminal that matches the given token...
 29 |      */
 30 |     types.forEach(terminal => {
 31 |         const statesActiveOnTerminals: Set<State<S, T>> = stateSets.getStatesActiveOnTerminals(tokenPosition, terminal);
 32 |         if (statesActiveOnTerminals) statesActiveOnTerminals.forEach((preScanState: State<S, T>) => {
 33 |             const activeCategory = getActiveCategory(preScanState);
 34 |             if (isNonTerminal(activeCategory)) throw new Error("this is a bug");
 35 |             else {
 36 |                 if (!activeCategory(word)) throw new Error("Index failed");
 37 |                 // TODO can this be more efficient, ie have tokens make their category be explicit? (Do we want to maintain the possibility of such "fluid" categories?)
 38 |                 // Create the chart <code>i+1: X<sub>k</sub> → λt·μ</code>
 39 |                 const preScanForward: S = stateSets.getForwardScore(preScanState);
 40 |                 const preScanInner: S = stateSets.getInnerScore(preScanState);
 41 |                 // Note that this chart is unique for each preScanState
 42 |                 const postScanState: State<S, T> = stateSets.getOrCreate(
 43 |                     tokenPosition + 1, preScanState.ruleStartPosition,
 44 |                     advanceDot(preScanState),
 45 |                     preScanState.rule,
 46 |                     word
 47 |                 );
 48 | 
 49 |                 const postScanForward = calculateForwardScore(sr, preScanForward, scanProb);
 50 |                 // Set forward score
 51 |                 stateSets.setForwardScore(
 52 |                     postScanState,
 53 |                     postScanForward
 54 |                 );
 55 | 
 56 |                 // Get inner score (no side effects)
 57 |                 const postScanInner: S = calculateInnerScore(sr, preScanInner, scanProb);
 58 | 
 59 |                 // Set inner score
 60 |                 stateSets.setInnerScore(
 61 |                     postScanState,
 62 |                     postScanInner
 63 |                 );
 64 | 
 65 |                 // Set Viterbi score
 66 |                 const viterbiScore = {
 67 |                     origin: preScanState,
 68 |                     resultingState: postScanState,
 69 |                     innerScore: postScanInner
 70 |                 };
 71 |                 stateSets.setViterbiScore(viterbiScore);
 72 | 
 73 |                 changes.push({
 74 |                     state: postScanState,
 75 |                     viterbi: viterbiScore,
 76 |                     inner: postScanInner,
 77 |                     forward: postScanForward
 78 |                 });
 79 |             }
 80 |         });
 81 |     });
 82 |     return changes;
 83 | }
 84 | 
 85 | /**
 86 |  * Function to calculate the new inner score from given values
 87 |  *
 88 |  * @param scanProbability The probability of scanning this particular token
 89 |  * @param sr              The semiring to calculate with
 90 |  * @param previousInner   The previous inner score
 91 |  * @return The inner score for the new chart
 92 |  */
 93 | function calculateInnerScore<S>(sr: Semiring<S>, previousInner: S, scanProbability?: S): S {
 94 |     if (!scanProbability)
 95 |         return previousInner;
 96 |     else
 97 |         return sr.times(previousInner, scanProbability);
 98 | }
 99 | 
100 | /**
101 |  * Function to compute the forward score for the new chart after scanning the given token.
102 |  *
103 |  * @param scanProbability           The probability of scanning this particular token
104 |  * @param sr                        The semiring to calculate with
105 |  * @param previousStateForwardScore The previous forward score
106 |  * @return Computed forward score for the new chart
107 |  */
108 | function calculateForwardScore<S>(sr: Semiring<S>, previousStateForwardScore: S, scanProbability?: S): S {
109 |     if (!scanProbability) {
110 |         return previousStateForwardScore;
111 |     } else {
112 |         return sr.times(previousStateForwardScore, scanProbability);
113 |     }
114 | }


--------------------------------------------------------------------------------
/src/grammar/category.ts:
--------------------------------------------------------------------------------
 1 | export type Category<T> = Terminal<T> | NonTerminal;
 2 | export type Terminal<T> = (t: T) => boolean;
 3 | export type NonTerminal = string;
 4 | 
 5 | export interface WordWithTypes<T> {
 6 |     types: Terminal<T>[];
 7 |     word: T;
 8 | }
 9 | 
10 | export function isNonTerminal(element: any): element is NonTerminal {
11 |     return typeof element === "string";
12 | }


--------------------------------------------------------------------------------
/src/grammar/grammar.ts:
--------------------------------------------------------------------------------
  1 | import { NonTerminal, Category, isNonTerminal, Terminal } from "./category";
  2 | import { Rule } from "./rule";
  3 | import {
  4 |     getLeftCorners,
  5 |     getUnitStarCorners,
  6 |     getReflexiveTransitiveClosure,
  7 |     LeftCorners
  8 | } from "./left-corner";
  9 | 
 10 | import { Semiring, LogSemiring, makeDeferrable } from "semiring";
 11 | import { Expression } from "semiring";
 12 | 
 13 | function getOrCreateSet<X, Y>(map: Map<X, Set<Y>>, x: X): Set<Y> {
 14 |     if (map.has(x)) {
 15 |         return map.get(x);
 16 |     } else {
 17 |         const yToP: Set<Y> = new Set<Y>();
 18 |         map.set(x, yToP);
 19 |         return yToP;
 20 |     }
 21 | }
 22 | 
 23 | export interface ProbabilitySemiringMapping<Y> {
 24 |     semiring: Semiring<Y>;
 25 | 
 26 |     fromProbability(p: number): Y;
 27 | 
 28 |     toProbability(p: Y): number;
 29 | 
 30 |     ZERO: Y;
 31 |     ONE: Y;
 32 | }
 33 | 
 34 | export class Grammar<T, SemiringType> {
 35 |     readonly name: string;
 36 |     readonly ruleMap: Map<NonTerminal, Set<Rule<T>>>;
 37 |     readonly rules: Set<Rule<T>> = new Set<Rule<T>>();
 38 |     readonly nonTerminals: Set<NonTerminal> = new Set<NonTerminal>();
 39 |     readonly terminals: Terminal<T>[];
 40 | 
 41 |     //
 42 |     // pre-compute some scores for efficient earley parsing
 43 |     //
 44 |     private readonly leftCorners: LeftCorners<T>;
 45 |     readonly leftStarCorners: LeftCorners<T>;
 46 |     readonly unitStarScores: LeftCorners<T>;
 47 |     readonly probabilityMapping: ProbabilitySemiringMapping<SemiringType>;
 48 |     readonly deferrableSemiring: Semiring<Expression<SemiringType>>;
 49 | 
 50 | 
 51 |     constructor(name: string,
 52 |                 ruleMap: Map<NonTerminal, Set<Rule<T>>>,
 53 |                 probabilityMapping: ProbabilitySemiringMapping<SemiringType>) {
 54 |         this.name = name;
 55 |         this.ruleMap = ruleMap;
 56 | 
 57 |         this.probabilityMapping = probabilityMapping;
 58 |         this.deferrableSemiring = makeDeferrable(probabilityMapping.semiring);
 59 | 
 60 |         const values: IterableIterator<Set<Rule<T>>> = ruleMap.values();
 61 | 
 62 | 
 63 |         let done = false;
 64 |         const terminals = new Set<Terminal<T>>();
 65 |         while (!done) {
 66 |             const next: IteratorResult<Set<Rule<T>>> = values.next();
 67 |             done = next.done;
 68 |             if (!done) {
 69 |                 const rulez = next.value;
 70 |                 rulez.forEach((rule: Rule<T>) => {
 71 |                         this.rules.add(rule);
 72 |                         this.nonTerminals.add(rule.left);
 73 |                         rule.right.forEach((a: Category<T>) => {
 74 |                             if (isNonTerminal(a))
 75 |                                 this.nonTerminals.add(a);
 76 |                             else
 77 |                                 terminals.add(a);
 78 |                         });
 79 |                     }
 80 |                 );
 81 |             }
 82 |         }
 83 |         this.terminals = Array.from(terminals);
 84 | 
 85 |         const zero = 0.0;
 86 |         this.leftCorners = getLeftCorners(this.rules, zero);
 87 |         this.leftStarCorners = getReflexiveTransitiveClosure(this.nonTerminals, this.leftCorners, zero);
 88 |         this.unitStarScores = getUnitStarCorners(this.rules, this.nonTerminals, zero);
 89 | 
 90 |     }
 91 | 
 92 |     getLeftStarScore(from: Category<T>, to: Category<T>): number {
 93 |         return this.leftStarCorners.get(from, to);
 94 |     }
 95 | 
 96 |     getLeftScore(from: Category<T>, to: Category<T>): number {
 97 |         return this.leftCorners.get(from, to);
 98 |     }
 99 | 
100 |     getUnitStarScore(from: Category<T>, to: Category<T>): number {
101 |         return this.unitStarScores.get(from, to);
102 |     }
103 | 
104 |     // noinspection JSUnusedGlobalSymbols
105 |     static withSemiring<T, Y>(semiringMapping: ProbabilitySemiringMapping<Y>, name?: string): GrammarBuilder<T, Y> {
106 |         return new GrammarBuilder<T, Y>(semiringMapping, name);
107 |     }
108 | 
109 |     static builder<T>(name?: string): GrammarBuilder<T, number> {
110 |         return new GrammarBuilder<T, number>(LOG_SEMIRING, name);
111 |     }
112 | 
113 | }
114 | 
115 | 
116 | const LOG_SEMIRING: ProbabilitySemiringMapping<number> = {
117 |     semiring: LogSemiring,
118 |     fromProbability: (x) => -Math.log(x),
119 |     toProbability: (x) => Math.exp(-x),
120 |     ZERO: LogSemiring.additiveIdentity,
121 |     ONE: LogSemiring.multiplicativeIdentity
122 | };
123 | 
124 | export class GrammarBuilder<T, SemiringType> {
125 | 
126 |     private readonly ruleMap: Map<NonTerminal, Set<Rule<T>>>;
127 |     // private rules: Set<Rule>;
128 |     private readonly name: string;
129 |     private semiringMapping: ProbabilitySemiringMapping<SemiringType>;
130 | 
131 |     constructor(semiringMapping: ProbabilitySemiringMapping<SemiringType>, name?: string) {
132 |         this.ruleMap = new Map<NonTerminal, Set<Rule<T>>>();
133 |         // this.rules = new Set<Rule>();
134 |         this.name = name;
135 |         this.semiringMapping = semiringMapping;
136 |     }
137 | 
138 |     //noinspection JSUnusedGlobalSymbols
139 |     setSemiringMapping(semiringMapping: ProbabilitySemiringMapping<SemiringType>) {
140 |         this.semiringMapping = semiringMapping;
141 |         return this;
142 |     }
143 | 
144 |     addNewRule(probability: number, left: NonTerminal, right: Category<T>[]): GrammarBuilder<T, SemiringType> {
145 |         this.addRule({
146 |             left,
147 |             right,
148 |             probability
149 |         });
150 | 
151 |         return this;
152 |     }
153 | 
154 |     addRule(rule: Rule<T>): GrammarBuilder<T, SemiringType> {
155 |         if (!rule.probability || typeof rule.probability !== "number")
156 |             throw new Error("Probability not defined: " + rule.probability);
157 |         if (!rule.left) throw new Error("Left hand side not defined: " + rule.left);
158 |         if (!rule.right || !rule.right.length || typeof rule.right.length !== "number"! || rule.right.length <= 0)
159 |             throw new Error("Right hand side not defined: " + rule.right);
160 | 
161 |         if (this.ruleMap.has(rule.left)) {
162 |             this.ruleMap.get(rule.left).forEach(rle => {
163 |                 if (rule.right.length === rle.right.length) {
164 |                     for (let i = 0; i < rule.right.length; i++) if (rule.right[i] !== rle.right[i]) return;
165 |                     throw new Error("Already added rule " + rule.left + " -> " + rule.right.toString());
166 |                 }
167 |             });
168 |         }
169 | 
170 |         getOrCreateSet(this.ruleMap, rule.left).add(rule);
171 | 
172 |         return this;
173 |     }
174 | 
175 |     build(): Grammar<T, SemiringType> {
176 |         return new Grammar(this.name, this.ruleMap, this.semiringMapping);
177 |     }
178 | }
179 | 


--------------------------------------------------------------------------------
/src/grammar/left-corner.ts:
--------------------------------------------------------------------------------
  1 | import { NonTerminal, Category, isNonTerminal } from "./category";
  2 | import { Rule, isUnitProduction } from "./rule";
  3 | import { getOrCreateMap, getOrCreateSet } from "../util";
  4 | 
  5 | /**
  6 |  * Returns the inverse of matrix `M`.
  7 |  * Use Gaussian Elimination to calculate the inverse:
  8 |  * (1) 'augment' the matrix (left) by the identity (on the right)
  9 |  * (2) Turn the matrix on the left into the identity by elemetry row ops
 10 |  * (3) The matrix on the right is the inverse (was the identity matrix)
 11 |  *
 12 |  * There are 3 elementary row ops: (I combine b and c in my code)
 13 |  * (a) Swap 2 rows
 14 |  * (b) Multiply a row by a scalar
 15 |  * (c) Add 2 rows
 16 |  */
 17 | function invert(M: number[][]) {
 18 |     // if the matrix isn't square
 19 |     if (M.length !== M[0].length) {
 20 |         throw new Error("Matrix must be square");
 21 |     }
 22 | 
 23 |     // create the identity matrix (I), and a copy (C) of the original
 24 | 
 25 |     const dim = M.length;
 26 |     const I: number[][] = [];
 27 |     const C: number[][] = [];
 28 |     for (let i = 0; i < dim; i += 1) {
 29 |         // Create the row
 30 |         I[I.length] = [];
 31 |         C[C.length] = [];
 32 |         for (let j = 0; j < dim; j += 1) {
 33 | 
 34 |             // if we're on the diagonal, put a 1 (for identity)
 35 |             if (i == j) {
 36 |                 I[i][j] = 1;
 37 |             } else {
 38 |                 I[i][j] = 0;
 39 |             }
 40 | 
 41 |             // Also, make the copy of the original
 42 |             C[i][j] = M[i][j];
 43 |         }
 44 |     }
 45 | 
 46 |     // Perform elementary row operations
 47 |     for (let i = 0; i < dim; i += 1) {
 48 |         // get the element e on the diagonal
 49 |         let e: number = C[i][i];
 50 | 
 51 |         // if we have a 0 on the diagonal (we'll need to swap with a lower row)
 52 |         if (e === 0) {
 53 |             // look through every row below the i'th row
 54 |             for (let ii = i + 1; ii < dim; ii += 1) {
 55 |                 // if the ii'th row has a non-0 in the i'th col
 56 |                 if (C[ii][i] !== 0) {
 57 |                     // it would make the diagonal have a non-0 so swap it
 58 |                     for (let j = 0; j < dim; j++) {
 59 |                         e = C[i][j];       // temp store i'th row
 60 |                         C[i][j] = C[ii][j]; // replace i'th row by ii'th
 61 |                         C[ii][j] = e;      // repace ii'th by temp
 62 |                         e = I[i][j];       // temp store i'th row
 63 |                         I[i][j] = I[ii][j]; // replace i'th row by ii'th
 64 |                         I[ii][j] = e;      // repace ii'th by temp
 65 |                     }
 66 |                     // don't bother checking other rows since we've swapped
 67 |                     break;
 68 |                 }
 69 |             }
 70 |             // get the new diagonal
 71 |             e = C[i][i];
 72 |             // if it's still 0, not invertable (error)
 73 |             if (e == 0) {
 74 |                 throw new Error("Matrix was not invertable");
 75 |             }
 76 |         }
 77 | 
 78 |         // Scale this row down by e (so we have a 1 on the diagonal)
 79 |         for (let j = 0; j < dim; j++) {
 80 |             C[i][j] = C[i][j] / e; // apply to original matrix
 81 |             I[i][j] = I[i][j] / e; // apply to identity
 82 |         }
 83 | 
 84 |         // Subtract this row (scaled appropriately for each row) from ALL of
 85 |         // the other rows so that there will be 0's in this column in the
 86 |         // rows above and below this one
 87 |         for (let ii = 0; ii < dim; ii++) {
 88 |             // Only apply to other rows (we want a 1 on the diagonal)
 89 |             if (ii == i) {
 90 |                 continue;
 91 |             }
 92 | 
 93 |             // We want to change this element to 0
 94 |             e = C[ii][i];
 95 | 
 96 |             // Subtract (the row above(or below) scaled by e) from (the
 97 |             // current row) but start at the i'th column and assume all the
 98 |             // stuff left of diagonal is 0 (which it should be if we made this
 99 |             // algorithm correctly)
100 |             for (let j = 0; j < dim; j++) {
101 |                 C[ii][j] -= e * C[i][j]; // apply to original matrix
102 |                 I[ii][j] -= e * I[i][j]; // apply to identity
103 |             }
104 |         }
105 |     }
106 | 
107 |     // we've done all operations, C should be the identity
108 |     // matrix I should be the inverse:
109 |     return I;
110 | }
111 | 
112 | 
113 | /**
114 |  * Information holder for left-corner relations and left*-corner relations. Essentially a map from {@link Category}
115 |  * to {@link Category} with some indexing.
116 |  */
117 | export class LeftCorners<T> {
118 |     /**
119 |      * X -L> Y probability, undefined for 0.0
120 |      */
121 |     private readonly map: Map<Category<T>, Map<Category<T>, number>>;
122 |     /**
123 |      * X -L> Y is greater than 0.0
124 |      */
125 |     private nonZeroScores: Map<Category<T>, Set<Category<T>>>;
126 |     /**
127 |      * X -L> Y is greater than 0.0, and Y is a non-terminal
128 |      */
129 |     private nonZeroScoresToNonTerminals: Map<Category<T>, Set<NonTerminal>>;
130 | 
131 |     readonly ZERO: number;
132 | 
133 |     /**
134 |      * Information holder for left-corner relations and left*-corner relations. Essentially a map from {@link Category}
135 |      * to {@link Category} with some utility functions to deal with probabilities.
136 |      * @param ZERO Default value if there is no chance; usually 0
137 |      */
138 |     constructor(ZERO = 0) {
139 |         this.ZERO = ZERO;
140 | 
141 |         this.map = new Map<Category<T>, Map<Category<T>, number>>();
142 |         this.nonZeroScores = new Map<NonTerminal, Set<Category<T>>>();
143 |         this.nonZeroScoresToNonTerminals = new Map<NonTerminal, Set<NonTerminal>>();
144 |     }
145 | 
146 | 
147 |     /**
148 |      * Adds the given number to the current value of [X, Y], using standard +
149 |      *
150 |      * @param x           Left hand side
151 |      * @param y           Right hand side
152 |      * @param probability number to plus
153 |      */
154 |     public add(x: Category<T>, y: Category<T>, probability: number) {
155 |         const newProbability = this.get(x, y) /* defaults to zero */ + probability;
156 |         if (!isFinite(newProbability)) {
157 |             throw new Error("Invalid left-[*]-corner probability: " + newProbability + " for " + x + " -L> " + y + " ... ");
158 |         }
159 |         this.set(x, y, newProbability);
160 |     }
161 | 
162 |     /**
163 |      * @return stored value in left-corner relationship. this.ZERO by default
164 |      */
165 |     public get(x: Category<T>, y: Category<T>): number {
166 |         if (!this.map) throw new Error("Map was not defined");
167 |         const yToP = getOrCreateMap(this.map, x);
168 |         if (!yToP) return this.ZERO;
169 |         else return yToP.get(y) || this.ZERO;
170 |     }
171 | 
172 | 
173 |     /**
174 |      * Sets table entry to a given probability. Will instantiate empty map if it does not exist yet.
175 |      *
176 |      * @param x   LHS
177 |      * @param y   RHS
178 |      * @param val number to set table entry to
179 |      */
180 |     public set(x: Category<T>, y: Category<T>, val: number): void {
181 |         if (val !== this.ZERO) {
182 |             // Set map
183 |             const yToProb = getOrCreateMap(this.map, x);
184 |             yToProb.set(y, val);
185 | 
186 |             // Set non-zero scores
187 | 
188 |             getOrCreateSet(this.nonZeroScores, x).add(y);
189 |             if (isNonTerminal(y))
190 |                 getOrCreateSet(this.nonZeroScoresToNonTerminals, x).add(y);
191 |         }
192 |     }
193 | 
194 |     public getNonZeroScores(x: Category<T>): Set<Category<T>> {
195 |         return this.nonZeroScores.get(x);
196 |     }
197 | 
198 |     public getNonZeroScoresToNonTerminals(x: Category<T>): Set<NonTerminal> {
199 |         return this.nonZeroScoresToNonTerminals.get(x);
200 |     }
201 | }
202 | 
203 | /**
204 |  * Uses a trick to compute left*Corners (R_L), the reflexive transitive closure of leftCorners:
205 |  *
206 |  * ~~ P must have its scores defines as ordinary probabilities between 0 and 1 ~~
207 |  *
208 |  * <code>R_L = I + P_L R_L = (I - P_L)^-1</code>
209 |  */
210 | export function getReflexiveTransitiveClosure<T>(nonTerminals: Set<NonTerminal>,
211 |                                                  P: LeftCorners<T>,
212 |                                                  zero = 0.0): LeftCorners<T> {
213 |     const nonterminalz: NonTerminal[] = [];
214 |     nonTerminals.forEach(a => nonterminalz.push(a));
215 | 
216 |     // Create matrix of value I - P_L
217 |     const R_L_inverse: number[][] = [];
218 |     for (let row = 0; row < nonterminalz.length; row++) {
219 |         const X: NonTerminal = nonterminalz[row];
220 |         R_L_inverse[row] = [];
221 |         for (let col = 0; col < nonterminalz.length; col++) {
222 |             const Y: NonTerminal = nonterminalz[col];
223 |             const prob: number = P.get(X, Y);
224 |             // I - P_L
225 |             R_L_inverse[row][col] = (row === col ? 1 : 0) - prob;
226 |         }
227 |     }
228 |     const R_L: number[][] = invert(R_L_inverse);
229 | 
230 |     const m: LeftCorners<T> = new LeftCorners<T>(zero);
231 |     /**
232 |      * Copy all matrix values into our {@link LeftCorners} object
233 |      */
234 |     for (let roww = 0; roww < nonterminalz.length; roww++) {
235 |         for (let coll = 0; coll < nonterminalz.length; coll++) {
236 |             m.set(nonterminalz[roww], nonterminalz[coll], R_L[roww][coll]);
237 |         }
238 |     }
239 |     return m;
240 | }
241 | 
242 | export function getUnitStarCorners<T>(rules: Set<Rule<T>>,
243 |                                       nonTerminals: Set<NonTerminal>,
244 |                                       zero = 0.0): LeftCorners<T> {
245 |     // Sum all probabilities for unit relations
246 |     const P_U: LeftCorners<T> = new LeftCorners(zero);
247 |     rules.forEach((rule: Rule<T>) => {
248 |         if (isUnitProduction(rule))
249 |             P_U.add(rule.left, rule.right[0], rule.probability);
250 |     });
251 | 
252 |     // R_U = (I - P_U)
253 |     return getReflexiveTransitiveClosure(nonTerminals, P_U, zero);
254 | }
255 | 
256 | 
257 | /**
258 |  * Compute left corner relations
259 |  */
260 | export function getLeftCorners<T>(rules: Set<Rule<T>>, ZERO = 0.0): LeftCorners<T> {
261 |     const leftCorners = new LeftCorners(ZERO);
262 | 
263 |     // Sum all probabilities for left corners
264 |     rules.forEach((rule: Rule<T>) => {
265 |         if (rule.right.length > 0 && isNonTerminal(rule.right[0])) {
266 |             leftCorners.add(rule.left, rule.right[0], rule.probability);
267 |         }
268 |     });
269 |     return leftCorners;
270 | }


--------------------------------------------------------------------------------
/src/grammar/rule.ts:
--------------------------------------------------------------------------------
 1 | import { Category, NonTerminal, isNonTerminal } from "./category";
 2 | 
 3 | export interface Rule<T> {
 4 |     left: NonTerminal;
 5 |     right: Category<T>[];
 6 |     probability: number;
 7 | }
 8 | 
 9 | export function invalidDotPosition<T>(dotPosition: number, rule: any) {
10 |     throw new Error(`Invalid dot position: ${dotPosition}, ${JSON.stringify(rule)}`);
11 | }
12 | 
13 | export function isUnitProduction<T>(rule: Rule<T>): boolean {
14 |     return rule.right.length === 1 && isNonTerminal(rule.right[0]);
15 | }
16 | 
17 | /**
18 |  * Gets the active category in the underlying rule, if any.
19 |  *
20 |  * @return The category at this dotted rule's
21 |  * dot position in the underlying rule's
22 |  * right side category sequence. If this rule's
23 |  * dot position is already at the end of the right side category sequence,
24 |  * returns <code>null</code>.
25 |  */
26 | export function getActiveCategory<T>(rule: Rule<T>, dotPosition: number): Category<T> {
27 |     if (dotPosition < 0 || dotPosition > rule.right.length) {
28 |         invalidDotPosition(dotPosition, rule.right);
29 |     } else if (dotPosition < rule.right.length) {
30 |         const returnValue: Category<T> = rule.right[dotPosition];
31 |         if (!returnValue) {
32 |             throw new Error(`category did not exist at position ${dotPosition}: ${returnValue}`);
33 |         } else {
34 |             return returnValue;
35 |         }
36 |     } else {
37 |         return undefined;
38 |     }
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/src/grammar/token.ts:
--------------------------------------------------------------------------------
 1 | export type Token = any;
 2 | 
 3 | //noinspection JSUnusedGlobalSymbols
 4 | export function wrapped<T>(source: T): Token {
 5 |     if (!source) {
 6 |         throw new Error("Source object can't be null for an instantiated token.");
 7 |     }
 8 |     return {source};
 9 | }
10 | 
11 | export default Token;


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./earley/parser";
2 | export * from "./earley/parsetree";
3 | export * from "./earley/chart/viterbi-score";
4 | export * from "./grammar/grammar";
5 | export * from "./grammar/category";
6 | export * from "./grammar/rule";


--------------------------------------------------------------------------------
/src/util.ts:
--------------------------------------------------------------------------------
 1 | export function getOrCreateMap<X, Y, Z>(map: Map<X, Map<Y, Z>>, key: X): Map<Y, Z> {
 2 |     if (map.has(key))
 3 |         return map.get(key);
 4 |     else {
 5 |         const yToP: Map<Y, Z> = new Map<Y, Z>();
 6 |         map.set(key, yToP);
 7 |         return yToP;
 8 |     }
 9 | }
10 | 
11 | export function getOrCreateSet<X, Y>(map: Map<X, Set<Y>>, key: X): Set<Y> {
12 |     if (map.has(key))
13 |         return map.get(key);
14 |     else {
15 |         const yToP: Set<Y> = new Set<Y>();
16 |         map.set(key, yToP);
17 |         return yToP;
18 |     }
19 | }


--------------------------------------------------------------------------------
/test/earley/chart.spec.ts:
--------------------------------------------------------------------------------
  1 | import {NonTerminal, isNonTerminal, Category} from "../../src";
  2 | import {Rule, isUnitProduction} from "../../src";
  3 | 
  4 | import {expect} from "chai";
  5 | import {Chart} from "../../src/earley/chart/chart";
  6 | import {g} from "../sample-grammar";
  7 | import {isPassive, isCompleted, State, isActive, getActiveCategory} from "../../src/earley/chart/state";
  8 | import {getOrCreateSet, getOrCreateMap} from "../../src/util";
  9 | 
 10 | describe("Chart", () => {
 11 |     // ss.getStatesActiveOnNonTerminalWithNonZeroUnitStarScoreToY();
 12 |     // ss.getStatesActiveOnNonTerminal();
 13 |     // ss.getState();
 14 |     // ss.getOrCreate();
 15 |     // ss.hasState();
 16 |     // ss.has();
 17 |     // ss.addState();
 18 |     // ss.getCompletedStatesThatAreNotUnitProductions();
 19 |     // ss.getCompletedStates();
 20 |     // ss.getStatesActiveOnNonTerminals();
 21 |     // ss.getStatesActiveOnTerminals();
 22 | 
 23 |     it("should index new states correctly", () => {
 24 |         // ss.addState()
 25 |         // expect(ss.states).to.exist;
 26 |         g.rules.forEach((r: Rule<string>, i) => {
 27 |             const s: State< number, string> = {
 28 |                 rule: r,
 29 |                 ruleStartPosition: 1,
 30 |                 ruleDotPosition: 1,
 31 |                 position: 2,
 32 |                 scannedToken: "state " + i
 33 |             };
 34 |             expect(ss.has(r, 2, 1, 1)).to.equal(false);
 35 |             expect(ss.hasState(s)).to.equal(false);
 36 | 
 37 |             const state: State<number, string> =
 38 |                 ss.getOrCreate(2, 1, 1, r, "state " + i);
 39 |             expect(state).to.exist;
 40 |             expect(ss.has(r, 2, 1, 1)).to.equal(true);
 41 |             expect(ss.hasState(s)).to.equal(true);
 42 | 
 43 |             expect(isCompleted(s)).to.equal(r.right.length === 1);
 44 |             expect(ss.getCompletedStates(s.position).has(state)).to.equal(isCompleted(state));
 45 |             expect(getOrCreateSet(getOrCreateMap(ss.completedStatesFor, (state.position)), r.left).has(state)).to.equal(isCompleted(state));
 46 |             expect(getOrCreateSet(ss.completedStatesThatAreNotUnitProductions, (state.position)).has(state)).to.equal(isCompleted(state) && !isUnitProduction(state.rule));
 47 | 
 48 |             const activeCategory: Category<string> = getActiveCategory(state);
 49 |             expect((ss.getStatesActiveOnNonTerminals(state.position)).has(state)).to.equal(isActive(state) && isNonTerminal(activeCategory));
 50 | 
 51 |             const nonZeroScoresToNonTerminals = g.unitStarScores.getNonZeroScoresToNonTerminals(activeCategory);
 52 |             if (!!nonZeroScoresToNonTerminals) nonZeroScoresToNonTerminals.forEach((FromNonTerminal: NonTerminal) => expect(getOrCreateSet(getOrCreateMap(ss.nonTerminalActiveAtIWithNonZeroUnitStarToY, (state.position)), FromNonTerminal).has(state)).to.equal(true));
 53 | 
 54 |             expect(
 55 |                 getOrCreateSet(getOrCreateMap(ss.statesActiveOnNonTerminal, activeCategory), state.position)
 56 |                     .has(state)
 57 |             )
 58 |                 .to.equal(isActive(state) && isNonTerminal(activeCategory));
 59 |             // TODO
 60 |             // expect(
 61 |             //     getOrCreateMap(ss.statesActiveOnTerminals, state.position)
 62 |             //         .has(state)
 63 |             // ).to.equal(isActive(state) && !isNonTerminal(activeCategory));
 64 |         });
 65 | 
 66 |         // readonly byIndex: Map<number, Set<State<S, T>>>;
 67 |         // readonly forwardScores: Map<State<S, T>, S>;
 68 |         // readonly innerScores: Map<State<S, T>, S>;
 69 |         // readonly viterbiScores: Map<State<S, T>, ViterbiScore<T,S>>;
 70 | 
 71 |         // console.log(ss);
 72 |         // ss.getForwardScore()
 73 |     });
 74 |     const ss = new Chart(g);
 75 |     const ZERO = g.probabilityMapping.ZERO;
 76 |     const ONE = g.probabilityMapping.ONE;
 77 |     const plus = g.probabilityMapping.semiring.plus;
 78 |     const rulesIterator = g.rules.values();
 79 |     const r0: Rule<string> = rulesIterator.next().value;
 80 |     // const r1: Rule<string> = rulesIterator.next().value;
 81 |     it("should handle forward scores correctly", () => {
 82 |         const s1: State< number, string> = {
 83 |             rule: r0,
 84 |             ruleStartPosition: 1,
 85 |             ruleDotPosition: 2,
 86 |             position: 3,
 87 |             scannedToken: "a",
 88 |             scannedCategory: r0.right[0]
 89 |         };
 90 | 
 91 |         const s2: State< number, string> = {
 92 |             rule: r0,
 93 |             ruleStartPosition: 1,
 94 |             ruleDotPosition: 2,
 95 |             position: 3,
 96 |             scannedToken: "a",
 97 |             scannedCategory: r0.right[0]
 98 |         };
 99 | 
100 |         // expect(ss.hasForwardScore(s1)).to.equal(false);
101 |         expect(ss.getForwardScore(s2)).to.equal(ZERO);
102 |         ss.addForwardScore(s1, ONE, g.probabilityMapping.semiring);
103 |         // expect(ss.hasForwardScore(s1)).to.equal(true);
104 |         ss.addForwardScore(s2, ONE, g.probabilityMapping.semiring);
105 |         expect(ss.getForwardScore(s1)).to.equal(plus(plus(ZERO, ONE), ONE));
106 |         // expect(ss.hasForwardScore(s1)).to.equal(true);
107 |         ss.setForwardScore(s1, ONE);
108 |         expect(ss.getForwardScore(s2)).to.equal(ONE);
109 |         // expect(ss.hasForwardScore(s1)).to.equal(true);
110 |     });
111 |     it("should handle inner scores correctly", () => {
112 |         const s1: State< number, string> = {
113 |             rule: r0,
114 |             ruleStartPosition: 1,
115 |             ruleDotPosition: 2,
116 |             position: 3,
117 |             scannedToken: "a",
118 |             scannedCategory: r0.right[0]
119 |         };
120 |         const s2: State< number, string> = {
121 |             rule: r0,
122 |             ruleStartPosition: 1,
123 |             ruleDotPosition: 2,
124 |             position: 3,
125 |             scannedToken: "a",
126 |             scannedCategory: r0.right[0]
127 |         };
128 | 
129 |         // expect(ss.hasForwardScore(s1)).to.equal(false);
130 |         expect(ss.getInnerScore(s1)).to.equal(ZERO);
131 |         ss.setInnerScore(s2, ONE);
132 |         // expect(ss.hasForwardScore(s1)).to.equal(true);
133 |         expect(ss.getForwardScore(s1)).to.equal(ONE);
134 | 
135 |     });
136 | 
137 |     it("should handle viterbi scores correctly", () => {
138 |         const s1: State< number, string> = {
139 |             rule: r0,
140 |             ruleStartPosition: 0,
141 |             ruleDotPosition: 0,
142 |             position: 0,
143 |             scannedToken: "b",
144 |             scannedCategory: r0.right[1]
145 |         };
146 |         const s2: State< number, string> = {
147 |             rule: r0,
148 |             ruleStartPosition: 1,
149 |             ruleDotPosition: 2,
150 |             position: 3,
151 |             scannedToken: "a",
152 |             scannedCategory: r0.right[0]
153 |         };
154 | 
155 |         const viterbiScore = {
156 |             origin: s1,
157 |             resultingState: s2,
158 |             innerScore: ONE
159 |         };
160 | 
161 |         expect(ss.hasViterbiScore(s1)).to.equal(false);
162 |         expect(ss.hasViterbiScore(s2)).to.equal(false);
163 |         expect(ss.getViterbiScore(s1)).to.equal(undefined);
164 |         ss.setViterbiScore(viterbiScore); // TODO check if viterbiscore is valid?
165 |         expect(ss.getViterbiScore(s2)).to.equal(viterbiScore);
166 | 
167 |     });
168 | 
169 | });
170 | 
171 | describe("State", () => {
172 |     it("isUnitProduction should behave correctly", () => {
173 |         g.rules.forEach((rule: Rule<string>) => {
174 |             if (rule.right.length === 1 && isNonTerminal(rule.right[0]))
175 |                 isUnitProduction(rule);
176 |         });
177 |     });
178 | 
179 |     it("isPassive should behave correctly", () => {
180 |         g.rules.forEach((r: Rule<string>) => {
181 |             expect(isPassive(r, r.right.length)).to.equal(true);
182 |             expect(isPassive(r, r.right.length - 1)).to.equal(false);
183 |             expect(isPassive(r, 0)).to.equal(false);
184 |         });
185 |     });
186 | 
187 |     it("isCompleted should behave correctly", () => {
188 |         g.rules.forEach((r: Rule<string>) => {
189 |             expect(isCompleted({
190 |                 rule: r,
191 |                 ruleStartPosition: 0,
192 |                 ruleDotPosition: r.right.length,
193 |                 position: 0
194 |             })).to.equal(true);
195 |             expect(isCompleted({
196 |                 rule: r,
197 |                 ruleStartPosition: 0,
198 |                 ruleDotPosition: 0,
199 |                 position: 0
200 |             })).to.equal(false);
201 |             expect(isCompleted({
202 |                 rule: r,
203 |                 ruleStartPosition: 0,
204 |                 ruleDotPosition: r.right.length - 1,
205 |                 position: 0
206 |             })).to.equal(false);
207 |         });
208 |     });
209 | });
210 | 


--------------------------------------------------------------------------------
/test/earley/earley.spec.ts:
--------------------------------------------------------------------------------
 1 | // import {LogSemiring} from "semiring";
 2 | import {expect} from "chai";
 3 | import {Chart} from "../../src/earley/chart/chart";
 4 | import {simpleRecursiveGrammar as g, S} from "../sample-grammar";
 5 | import {addState} from "../../src";
 6 | // import {scan} from "../../src/earley/scan";
 7 | import {predict} from "../../src/earley/predict";
 8 | // import {complete} from "../../src/earley/complete";
 9 | 
10 | // TODO
11 | describe("parser", () => {
12 |     it("should scan correctly", () => {
13 |         const ss = new Chart(g);
14 |         // noinspection JSUnusedLocalSymbols
15 |         const init = addState(
16 |             ss, 0, 0, 0,
17 |             {left: "<start>", right: [S], probability: 1.0},
18 |             g.probabilityMapping.ONE,
19 |             g.probabilityMapping.ONE
20 |         );
21 | 
22 |         const predict0 = predict(0, g, ss);
23 |         predict0.forEach(
24 |             p => {
25 |                 expect(p.state.ruleDotPosition).to.equal(0);
26 |                 expect(p.state.ruleStartPosition).to.equal(0);
27 |                 expect(p.state.position).to.equal(0);
28 |             }
29 |         );
30 |         // const scan0 = scan(
31 |         //     0,
32 |         //     {word: "a", types: [a]},
33 |         //     LogSemiring,
34 |         //     ss
35 |         // );
36 |         // const complete0 = complete(0, ss, g);
37 |         // const predict1 = predict(1, g, ss);
38 |         // const scan1 = scan(1, {word: "a", types: [a]}, LogSemiring, ss);
39 |         // const complete1 = complete(1, ss, g);
40 |         // const predict2 = predict(2, g, ss);
41 |         // const scan2 = scan(2, {word: "a", types: [a]}, LogSemiring, ss);
42 |         // const complete2 = complete(2, ss, g);
43 |         // const predict3 = predict(3, g, ss);
44 |         // const scan3 = scan(3, {word: "a", types: [a]}, LogSemiring, ss);
45 |         // const complete3 = complete(3, ss, g);
46 | 
47 |     });
48 | });


--------------------------------------------------------------------------------
/test/earley/parser.spec.ts:
--------------------------------------------------------------------------------
  1 | import {NonTerminal, Terminal} from "../../src";
  2 | import {getViterbiParse, ParseTreeWithScore, Grammar} from "../../src";
  3 | 
  4 | import {expect} from "chai";
  5 | import {g, A} from "../sample-grammar";
  6 | import {parseSentenceIntoChart} from "../../src";
  7 | 
  8 | // TODO
  9 | describe("parser", () => {
 10 | 
 11 | 
 12 |     it("should complete correctly", () => {
 13 |         // complete(
 14 |         //     0,
 15 |         //     "e",
 16 |         //     LogSemiring,
 17 |         //     ss
 18 |         // )
 19 |     });
 20 |     it("should predict correctly", () => {
 21 |         // complete(
 22 |         //     0,
 23 |         //     "e",
 24 |         //     LogSemiring,
 25 |         //     ss
 26 |         // )
 27 |     });
 28 |     it("should parse the man chase the man with a stick", () => {
 29 |         const S: NonTerminal = "S";
 30 |         const NP: NonTerminal = "NP";
 31 |         const VP: NonTerminal = "VP";
 32 |         const TV: NonTerminal = "TV";
 33 |         const Det: NonTerminal = "Det";
 34 |         const N: NonTerminal = "N";
 35 |         const Mod: NonTerminal = "Mod";
 36 | 
 37 |         // Token types (terminals) are functions that should return true when the parameter is of given type.
 38 |         const transitiveVerb: Terminal<string> = (token) => !!token.match(/(hit|chased)/);
 39 |         // Some utility terminal types are pre-defined:
 40 |         const the: Terminal<string> = (token) => !!token.match(/the/i);
 41 |         const a: Terminal<string> = (token) => !!token.match(/a/i);
 42 |         const man: Terminal<string> = (token) => !!token.match(/man/);
 43 |         const stick: Terminal<string> = (token) => !!token.match(/stick/);
 44 |         const with_: Terminal<string> = (token) => !!token.match(/with/);
 45 | 
 46 |         const grammar: Grammar<string, number> = Grammar.builder("test")
 47 |         // .setSemiring(new LogSemiring()) // If not set, defaults to Log semiring which is probably what you want
 48 |             .addNewRule(
 49 |                 1.0,   // Probability between 0.0 and 1.0, defaults to 1.0. The builder takes care of converting it to the semiring element
 50 |                 S,     // Left hand side of the rule
 51 |                 [NP, VP] // Right hand side of the rule
 52 |             )
 53 |             .addNewRule(
 54 |                 1.0,
 55 |                 NP,
 56 |                 [Det, N] // eg. The man
 57 |             )
 58 |             .addNewRule(
 59 |                 1.0,
 60 |                 NP,
 61 |                 [Det, N, Mod] // eg. The man (with a stick)
 62 |             )
 63 |             .addNewRule(
 64 |                 0.4,
 65 |                 VP,
 66 |                 [TV, NP, Mod] // eg. (chased) (the man) (with a stick)
 67 |             )
 68 |             .addNewRule(
 69 |                 0.6,
 70 |                 VP,
 71 |                 [TV, NP] // eg. (chased) (the man with a stick)
 72 |             )
 73 |             .addNewRule(1.0, Det, [a])
 74 |             .addNewRule(1.0, Det, [the])
 75 |             .addNewRule(1.0, N, [man])
 76 |             .addNewRule(1.0, N, [stick])
 77 |             .addNewRule(1.0, TV, [transitiveVerb])
 78 |             .addNewRule(1.0, Mod, [with_, NP]) // eg. with a stick
 79 |             .build();
 80 | 
 81 |         const tokens = ["The", "man", "chased", "the", "man", "with", "a", "stick"];
 82 |         // noinspection JSUnusedLocalSymbols
 83 |         const viterbi: ParseTreeWithScore<string> = getViterbiParse(
 84 |             S,
 85 |             grammar,
 86 |             tokens
 87 |         );
 88 |         // console.log(JSON.stringify(viterbi.parseTree)); // {"category":"<start>","children":[{"category":"S","children":[{"category":"NP","children":[{"category":"Det","children":[{"token":"The","children":[    ]}]},{"category":"N","children":[{"token":"man","children":[]}]}]},{"category":"VP","children":[{"category":"TV","children":[{"token":"chased","children":[]}]},{"category":"NP","children":[{"category":"Det","children":[{"token":"the","children":[]}]},{"category":"N","children":[{"token":"man","c        hildren":[]}]},{"category":"Mod","children":[{"token":"with","children":[]},{"category":"NP","children":[{"category":"Det","children":[{"token":"a",        "children":[]}]},{"category":"N","children":[{"token":"stick","children":[]}]}]}]}]}]}]}]}
 89 |         // console.log(viterbi.probability); // 0.6
 90 |         // Parser.recognize(S, grammar, Tokens.tokenize("the", "stick", "chased", "the", "man"))
 91 |     });
 92 | 
 93 | 
 94 |     const tokens = ["a", "a", "a", "e"];
 95 |     it("should deal with scan probability correctly", () => {
 96 |         const p1 = getViterbiParse(
 97 |             A,
 98 |             g,
 99 |             tokens,
100 |             (ignore, ignored) => {
101 |                 return g.probabilityMapping.fromProbability(1.0);
102 |             }
103 |         ).probability;
104 | 
105 |         const p2 = getViterbiParse(
106 |             A,
107 |             g,
108 |             tokens,
109 |             (word, ignored) => {
110 |                 return word === "a" ? g.probabilityMapping.fromProbability(0.5) : undefined;
111 |             }
112 |         ).probability;
113 | 
114 |         const eq = p2 * 2 * 2 * 2;
115 |         const epsilon = 0.0000000000000001;
116 |         expect(p1).to.be.above(eq - epsilon).and.below(eq + epsilon);
117 |     });
118 | 
119 |     it("should parse aaae", () => {
120 |         // noinspection JSUnusedLocalSymbols
121 |         const [chart, ignored, init] = parseSentenceIntoChart(
122 |             A,
123 |             g,
124 |             tokens,
125 |             (word, terminalTypes) => {
126 |                 return g.probabilityMapping.fromProbability(1.0);
127 |             }
128 |         );
129 | 
130 |         expect(chart.getCompletedStates(tokens.length).has(
131 |             chart.getOrCreate(
132 |                 tokens.length, 0, init.rule.right.length, init.rule
133 |             )
134 |         )).to.equal(true);
135 | 
136 |     });
137 | });
138 | 


--------------------------------------------------------------------------------
/test/grammar/grammar.spec.ts:
--------------------------------------------------------------------------------
  1 | import { parseSentenceIntoChart, getViterbiParseFromChart } from "../../src";
  2 | import { ParseTree } from "../../src";
  3 | import { expect } from 'chai';
  4 | 
  5 | import { g, A, B, C, D, X, simpleRecursiveGrammar, S2a, S2SS, p, q, a, S } from "../sample-grammar";
  6 | 
  7 | describe('examples from paper', () => {
  8 |     const tokens = ["a", "a", "a"];
  9 | 
 10 |     // noinspection JSUnusedLocalSymbols
 11 |     const [chart, i, init] = parseSentenceIntoChart(S, simpleRecursiveGrammar, tokens);
 12 |     const finalState = chart.getOrCreate(
 13 |         tokens.length,
 14 |         0,
 15 |         init.rule.right.length,
 16 |         init.rule
 17 |     );
 18 |     // noinspection JSUnusedLocalSymbols
 19 |     const parseTree: ParseTree<string> = getViterbiParseFromChart(finalState, chart);
 20 | 
 21 |     const prob = simpleRecursiveGrammar.probabilityMapping.toProbability;
 22 | 
 23 |     const alpha = chart.getForwardScore.bind(chart);
 24 |     const gamma = chart.getInnerScore.bind(chart);
 25 | 
 26 |     it('State set 0', () => {
 27 |         const s00Sa = chart.getState(S2a, 0, 0, 0);
 28 | 
 29 |         expect(prob(alpha(s00Sa))).to.equal(1.0);
 30 |         expect(prob(gamma(s00Sa))).to.equal(p);
 31 | 
 32 | 
 33 |         const s00SSS = chart.getState(S2SS, 0, 0, 0);
 34 | 
 35 |         expect(prob(alpha(s00SSS))).to.equal(q / p);
 36 |         expect(prob(gamma(s00SSS))).to.equal(q);
 37 | 
 38 | 
 39 |     });
 40 | 
 41 |     it('State set 1', () => {
 42 | 
 43 |         // scanned
 44 |         const s01Sa1 = chart.getState(S2a, 1, 0, 1);
 45 |         expect(prob(alpha(s01Sa1))).to.equal(1);
 46 |         expect(prob(gamma(s01Sa1))).to.equal(p);
 47 | 
 48 |         // completed
 49 |         const s01SSS1 = chart.getState(S2SS, 1, 0, 1);
 50 | 
 51 |         expect(prob(alpha(s01SSS1))).to.equal(q);
 52 |         expect(prob(gamma(s01SSS1))).to.be.above((p * q) - 0.000001).and.below((p * q) + 0.000001);
 53 | 
 54 |         // predicted
 55 |         const s11Sa0 = chart.getState(S2a, 1, 1, 0);
 56 |         expect(prob(alpha(s11Sa0))).to.equal(q);
 57 |         expect(prob(gamma(s11Sa0))).to.equal(p);
 58 | 
 59 |         const s11SSS0 = chart.getState(S2SS, 1, 1, 0);
 60 |         expect(prob(alpha(s11SSS0))).to.be.above((Math.pow(q, 2) / p) - 0.0001).and.below((Math.pow(q, 2) / p) + 0.0001);
 61 |         expect(prob(gamma(s11SSS0))).to.equal(q);
 62 | 
 63 |     });
 64 |     it('State set 2', () => {
 65 |         // scanned
 66 |         const s12Sa1 = chart.getState(S2a, 2, 1, 1);
 67 |         expect(prob(alpha(s12Sa1))).to.equal(q);
 68 |         expect(prob(gamma(s12Sa1))).to.equal(p);
 69 | 
 70 |         // completed
 71 |         const s12SSS1 = chart.getState(S2SS, 2, 1, 1);
 72 |         expect(prob(alpha(s12SSS1))).to.equal(q * q);
 73 |         expect(prob(gamma(s12SSS1))).to.be.above((p * q) - 0.000001).and.below((p * q) + 0.000001);
 74 | 
 75 |         const s02SSS2 = chart.getState(S2SS, 2, 0, 2);
 76 |         expect(prob(alpha(s02SSS2))).to.be.above((p * q) - 0.000001).and.below((p * q) + 0.000001);
 77 |         expect(prob(gamma(s02SSS2))).to.be.above((p * p * q) - 0.000001).and.below((p * p * q) + 0.000001);
 78 | 
 79 |         const s02SSS1 = chart.getState(S2SS, 2, 0, 1);
 80 |         expect(prob(alpha(s02SSS1))).to.be.above((p * q * q) - 0.0001).and.below(((p * q * q) + 0.0001));
 81 |         expect(prob(gamma(s02SSS1))).to.be.above((p * p * q * q) - 0.0001).and.below(((p * p * q * q) + 0.0001));
 82 | 
 83 |         const s02S1 = chart.getState(init.rule, 2, 0, 1);
 84 |         expect(prob(alpha(s02S1))).to.be.above((p * p * q) - 0.0001).and.below(((p * p * q) + 0.0001));
 85 |         expect(prob(gamma(s02S1))).to.be.above((p * p * q) - 0.0001).and.below(((p * p * q) + 0.0001));
 86 | 
 87 |         // predicted
 88 |         const s22S0 = chart.getState(S2a, 2, 2, 0);
 89 | 
 90 |         expect(prob(gamma(s22S0))).to.equal(p);
 91 |         expect(prob(alpha(s22S0))).to.be.above(((1 + p) * q * q) - 0.00000001).and.below(((1 + p) * q * q) + 0.000000000000001);
 92 | 
 93 |         const s22SS0 = chart.getState(S2SS, 2, 2, 0);
 94 |         expect(prob(alpha(s22SS0))).to.be.above(((1 + (1 / p)) * q * q * q) - 0.0001).and.below(((1 + 1 / p) * q * q * q) + 0.0001);
 95 |         expect(prob(gamma(s22SS0))).to.equal(q);
 96 | 
 97 |     });
 98 |     it('State set 3', () => {
 99 |         // scanned
100 |         const s23Sa1 = chart.getState(S2a, 3, 2, 1);
101 |         expect(prob(alpha(s23Sa1))).to.be.below(((1 + p) * q * q) + 0.0001).and.above(((1 + p) * q * q) - 0.000001);
102 |         expect(prob(gamma(s23Sa1))).to.equal(p);
103 | 
104 |         // completed
105 |         const s23S1 = chart.getState(S2SS, 3, 2, 1);
106 |         expect(prob(alpha(s23S1))).to.be.below(((1 + p) * q * q * q) + 0.0001).and.above(((1 + p) * q * q * q) - 0.0001);
107 |         expect(prob(gamma(s23S1))).to.be.below((p * q) + 0.0001).and.above((p * q) - 0.0001);
108 | 
109 |         const s13S2 = chart.getState(S2SS, 3, 1, 2);
110 |         expect(prob(alpha(s13S2))).to.be.below((p * q * q) + 0.0001).and.above((p * q * q) - 0.0001);
111 |         expect(prob(gamma(s13S2))).to.be.below((p * p * q) + 0.0001).and.above((p * p * q) - 0.0001);
112 | 
113 |         const s13S1 = chart.getState(S2SS, 3, 1, 1);
114 |         expect(prob(alpha(s13S1))).to.be.above((p * q * q * q) - 0.0001).and.below((p * q * q * q) + 0.0001);
115 |         expect(prob(gamma(s13S1))).to.be.above((p * p * q * q) - 0.0001).and.below((p * p * q * q) + 0.0001);
116 | 
117 |         const s03S2 = chart.getState(S2SS, 3, 0, 2);
118 |         expect(prob(alpha(s03S2))).to.be.above((2 * p * p * q * q) - 0.0001).and.below(((2 * p * p * q * q) + 0.0001));
119 |         expect(prob(gamma(s03S2))).to.be.above((2 * p * p * p * q * q) - 0.0001).and.below(((2 * p * p * p * q * q) + 0.0001));
120 | 
121 |         const s03S1 = chart.getState(S2SS, 3, 0, 1);
122 |         expect(prob(alpha(s03S1))).to.be.above((2 * p * p * q * q * q) - 0.0001).and.below((2 * p * p * q * q * q) + 0.0001);
123 |         expect(prob(gamma(s03S1))).to.be.above((2 * p * p * p * q * q * q) - 0.0001).and.below((2 * p * p * p * q * q * q) + 0.0001);
124 | 
125 |         expect(prob(alpha(init))).to.be.above((2 * (Math.pow(p, 3) * Math.pow(q, 2))) - 0.0001).and.above((2 * (Math.pow(p, 3) * Math.pow(q, 2))) + 0.0001);
126 |         expect(prob(gamma(init))).to.be.above((2 * (Math.pow(p, 3) * Math.pow(q, 2))) - 0.0001).and.above((2 * (Math.pow(p, 3) * Math.pow(q, 2))) + 0.0001);
127 | 
128 |         /*
129 | 
130 |                 for (int j = 0; j <= tokens.size(); j++) {
131 |                     chart.getStates(j).forEach(s -> {
132 |                         double probFw = sr.toProbability(chart.getForwardScore(s));
133 |                         double probInn = sr.toProbability(chart.getInnerScore(s));
134 |                         double v = 0.0;
135 |                         if (chart.getViterbiScore(s) == null) {
136 |                             //System.out.println();
137 |                         } else
138 |                             v = sr.toProbability(chart.getViterbiScore(s).getScore());
139 | 
140 |                         //System.out.println(s + "[" + probFw + "]" + "[" + probInn + "] v: " + v);
141 |                     });
142 |                 }
143 |                     */
144 |     });
145 | 
146 | });
147 | 
148 | 
149 | describe('grammar', () => {
150 |     it('should calculate all left star values', () => {
151 |         expect(
152 |             g.getLeftStarScore(A, B)
153 |         ).to.be.above(0.999).and.below(1.00001);
154 |         expect(
155 |             g.getLeftStarScore(B, C)
156 |         ).to.be.above(0.4999).and.below(0.500001);
157 |         expect(
158 |             g.getLeftStarScore(B, D)
159 |         ).to.be.above(0.24999).and.below(0.2500001);
160 |         expect(
161 |             g.getLeftStarScore(A, D)
162 |         ).to.be.above(0.24999).and.below(0.2500001);
163 |         expect(
164 |             g.getLeftStarScore(A, X)
165 |         ).to.equal(0.0);
166 |     });
167 | 
168 |     it('should calculate all left values', () => {
169 |         expect(g.getLeftScore(A, B)).to.be.above(0.9999999).and.below(1.00001);
170 |         expect(g.getLeftScore(A, D)).to.be.above(-0.000001).and.below(0.00001);
171 |         expect(g.getLeftScore(A, X)).to.be.above(-0.000001).and.below(0.00001);
172 |         expect(g.getLeftScore(B, C)).to.be.above(0.4999999).and.below(0.50001);
173 |     });
174 | 
175 |     it('should calculate unit star values', () => {
176 |         //TODO
177 |     });
178 | 
179 |     it('should get rules', () => {
180 |         //TODO
181 |         // Set<Rule> setOfrules = new HashSet<>();
182 |         // setOfrules.plus(rule1);
183 |         // setOfrules.plus(rule2);
184 |         // Assert.assertEquals(setOfrules, new HashSet<>(g.getRules(rule1.left)));
185 |         // Assert.assertEquals(setOfrules, new HashSet<>(g.getRules(rule2.left)));
186 |         //
187 |         // setOfrules.clear();
188 |         // setOfrules.plus(rule3);
189 |         // Assert.assertEquals(setOfrules, new HashSet<>(g.getRules(rule3.left)));
190 |     });
191 | 
192 |     it('should contain rules', () => {
193 |         // TODO
194 |         // Assert.assertTrue(g.containsRules(rule1.left));
195 |         // Assert.assertTrue(g.getRules(rule2.left).contains(rule2));
196 |         // Assert.assertFalse(g.getRules(rule3.left).contains(rule2));
197 | 
198 |         // Assert.assertEquals(ruleB, Rule.create(sr, 0.5, B, C));
199 |         // Assert.assertEquals(ruleC, Rule.create(sr, 0.5, C, D));
200 |         // Assert.assertEquals(ruleD, ruleD);
201 |         // Assert.assertEquals(ruleE, ruleE);
202 |         // Assert.assertEquals(rule1, rule1);
203 |         // Assert.assertEquals(rule2, rule2);
204 |         // Assert.assertEquals(rule3, rule3);
205 | 
206 |         // Assert.assertNotEquals(Rule.create(sr, 1.0, X, e), Rule.create(sr, 1.0, A, e));
207 |         // Assert.assertNotEquals(Rule.create(sr, 1.0, X, e), Rule.create(sr, 0.5, X, e));
208 |         // Assert.assertEquals(Rule.create(sr, 1.0, X, e), Rule.create(sr, 1.0, X, e));
209 |     });
210 | });
211 | 


--------------------------------------------------------------------------------
/test/sample-grammar.ts:
--------------------------------------------------------------------------------
 1 | import { Terminal, NonTerminal } from "../src";
 2 | import { Grammar } from "../src";
 3 | 
 4 | export const A: NonTerminal = "A";
 5 | export const B: NonTerminal = "B";
 6 | export const C: NonTerminal = "C";
 7 | export const D: NonTerminal = "D";
 8 | export const E: NonTerminal = "E";
 9 | export const X: NonTerminal = "X";
10 | export const Y: NonTerminal = "Y";
11 | export const Z: NonTerminal = "Z";
12 | export const e: Terminal<string> = (s) => s === "e";
13 | export const a = (t: string) => !!t.match(/a/i);
14 | 
15 | const builder = Grammar.builder("test");
16 | export const g: Grammar<string, number> = builder
17 |     .addNewRule(1.0, A, [B, C, D, E])
18 |     .addNewRule(1.0, A, [e])
19 |     .addNewRule(1.0, X, [Y, Z])
20 |     .addNewRule(0.5, B, [C])
21 |     .addNewRule(0.5, C, [D])
22 |     .addNewRule(0.5, D, [E])
23 |     .addNewRule(0.5, D, [a])
24 |     .addNewRule(0.5, E, [E, E])
25 |     .addNewRule(0.5, E, [e])
26 |     // .addRule(0.1, E, [C])
27 |     .build();
28 | 
29 | export const p: number = (0.6);
30 | export const q: number = (0.4);
31 | export const S = "S";
32 | 
33 | export const S2a = {left: S, right: [a], probability: p};
34 | export const S2SS = {left: S, right: [S, S], probability: q};
35 | 
36 | export const simpleRecursiveGrammar: Grammar<string, number> = Grammar.builder("simple-recursive-grammar")
37 |     .addRule(S2a)
38 |     .addRule(S2SS)
39 |     .build();


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compileOnSave": false,
 3 |   "compilerOptions": {
 4 |     "declaration": true,
 5 |     "module": "commonjs",
 6 |     "moduleResolution": "node",
 7 |     "noImplicitAny": true,
 8 |     "preserveConstEnums": true,
 9 |     "removeComments": true,
10 |     "sourceMap": false,
11 |     "target": "es2015",
12 |     "outDir": "dist"
13 |   },
14 |   "include": [
15 |     "src/**/*.ts"
16 |   ],
17 |   "exclude": [
18 |   ]
19 | }


--------------------------------------------------------------------------------
/tslint.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "rules": {
 3 |     "class-name": true,
 4 |     "comment-format": [true,
 5 |       "check-space"
 6 |     ],
 7 |     "indent": [true,
 8 |       "spaces"
 9 |     ],
10 |     "one-line": [true,
11 |       "check-open-brace",
12 |       "check-whitespace"
13 |     ],
14 |     "no-var-keyword": true,
15 |     "quotemark": [true,
16 |       "double",
17 |       "avoid-escape"
18 |     ],
19 |     "semicolon": true,
20 |     "whitespace": [true,
21 |       "check-branch",
22 |       "check-decl",
23 |       "check-operator",
24 |       "check-module",
25 |       "check-separator",
26 |       "check-type"
27 |     ],
28 |     "typedef-whitespace": [true, {
29 |       "call-signature": "nospace",
30 |       "index-signature": "nospace",
31 |       "parameter": "nospace",
32 |       "property-declaration": "nospace",
33 |       "variable-declaration": "nospace"
34 |     }],
35 |     "no-internal-module": true,
36 |     "no-trailing-whitespace": true,
37 |     "no-inferrable-types": true,
38 |     "no-null-keyword": true,
39 |     "prefer-const": true
40 |   }
41 | }


--------------------------------------------------------------------------------
/version.js:
--------------------------------------------------------------------------------
1 | exports.default = "0.9.6";


--------------------------------------------------------------------------------
/webpack.config.js:
--------------------------------------------------------------------------------
 1 | const webpack = require('webpack');
 2 | const path = require('path');
 3 | const yargs = require('yargs');
 4 | 
 5 | const libraryName = 'probabilistic-earley-parser',
 6 |     plugins = [
 7 |         new webpack.LoaderOptionsPlugin({
 8 |             options: {
 9 |                 tslint: {
10 |                     emitErrors: true,
11 |                     failOnHint: true
12 |                 }
13 |             }
14 |         })
15 |     ];
16 | 
17 | let outputFile;
18 | const VERSION = require('./version').default;
19 | if (yargs.argv.p) {
20 |     outputFile = `${libraryName}.${VERSION}.min.js`;
21 | } else {
22 |     outputFile = `${libraryName}.${VERSION}.js`;
23 | }
24 | 
25 | const config = {
26 |     entry: [
27 |         __dirname + '/src/index.ts'
28 |     ],
29 |     devtool: 'source-map',
30 |     output: {
31 |         path: path.join(__dirname, '/'),
32 |         filename: outputFile,
33 |         library: libraryName,
34 | 
35 |         libraryTarget: "umd",
36 |         umdNamedDefine: true
37 |     },
38 |     module: {
39 |         rules: [
40 |             // {
41 |             //     enforce: 'pre',
42 |             //     test: /\.tsx?$/,
43 |             //     loader: 'tslint-loader',
44 |             //     exclude: /node_modules/
45 |             // },
46 |             {
47 |                 test: /\.tsx?$/,
48 |                 loader: ['babel-loader', 'ts-loader'],
49 |                 exclude: /node_modules/
50 |             }
51 |         ],
52 |         loaders: []
53 |     },
54 |     resolve: {
55 |         extensions: ['.js', '.ts', '.jsx', '.tsx']
56 |     },
57 |     plugins: plugins
58 | };
59 | 
60 | module.exports = config;


--------------------------------------------------------------------------------