├── .babelrc ├── .gitignore ├── .npmignore ├── .travis.yml ├── LICENSE ├── README.md ├── index.ts ├── package-lock.json ├── package.json ├── src ├── earley │ ├── chart │ │ ├── addable-expressions-container.ts │ │ ├── chart.ts │ │ ├── state-index.ts │ │ ├── state-to-object-map.ts │ │ ├── state.ts │ │ └── viterbi-score.ts │ ├── complete.ts │ ├── expression │ │ └── value.ts │ ├── parser.ts │ ├── parsetree.ts │ ├── predict.ts │ └── scan.ts ├── grammar │ ├── category.ts │ ├── grammar.ts │ ├── left-corner.ts │ ├── rule.ts │ └── token.ts ├── index.ts └── util.ts ├── test ├── earley │ ├── chart.spec.ts │ ├── earley.spec.ts │ └── parser.spec.ts ├── grammar │ └── grammar.spec.ts └── sample-grammar.ts ├── tsconfig.json ├── tslint.json ├── version.js └── webpack.config.js /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "plugins": [ 3 | "transform-class-properties" 4 | ], 5 | "presets": [ 6 | ["es2015"] 7 | ], 8 | "env": { 9 | "development": { 10 | "presets": [ 11 | ] 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Node template 3 | # Logs 4 | logs 5 | *.log 6 | npm-debug.log* 7 | 8 | # Runtime data 9 | pids 10 | *.pid 11 | *.seed 12 | *.pid.lock 13 | 14 | # Directory for instrumented libs generated by jscoverage/JSCover 15 | lib-cov 16 | 17 | # Coverage directory used by tools like istanbul 18 | coverage 19 | 20 | # nyc test coverage 21 | .nyc_output 22 | 23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 24 | .grunt 25 | 26 | # node-waf configuration 27 | .lock-wscript 28 | 29 | # Compiled binary addons (http://nodejs.org/api/addons.html) 30 | build/Release 31 | 32 | # Dependency directories 33 | node_modules 34 | jspm_packages 35 | 36 | # Optional npm cache directory 37 | .npm 38 | 39 | # Optional eslint cache 40 | .eslintcache 41 | 42 | # Optional REPL history 43 | .node_repl_history 44 | 45 | # Output of 'npm pack' 46 | *.tgz 47 | 48 | # Yarn Integrity file 49 | .yarn-integrity 50 | 51 | 52 | .idea/ 53 | 54 | /probabilistic-earley-parser.*.*.*.min.* 55 | /dist -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | /node_modules 2 | /.idea 3 | /src 4 | /test 5 | /.yarn.lock 6 | /*.tgz 7 | /.travis.yml 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | sudo: false 3 | node_js: 4 | - stable 5 | - 5 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Maarten Trompper 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Build Status](https://travis-ci.org/digitalheir/probabilistic-earley-parser-javascript.svg?branch=master) 2 | [![npm version](https://badge.fury.io/js/probabilistic-earley-parser.svg)](https://www.npmjs.com/package/probabilistic-earley-parser) 3 | [![License](https://img.shields.io/npm/l/probabilistic-earley-parser.svg)](https://github.com/digitalheir/probabilistic-earley-parser-javascript/blob/master/LICENSE) 4 | 5 | # Probabilistic Earley parser 6 | 7 | ## ⚠️ Warning 8 | This code is currently broken. It has a subtle bug which produces invalid results, can make your code run exponentially & could use exponential memory. 9 | 10 | **DO NOT USE THIS LIBRARY!!!** 11 | 12 | Pull requests are welcome, but seeing as this is GitHub, nobody will care & the project is thus effectively abandoned. Contact maartentrompper@freedom.nl if you really need a functioning Probabilistic Earley Parser enough so that you are willing to fund it. 13 | 14 | --- 15 | 16 | 17 | This is a library for parsing a sequence of tokens (like words) into tree structures, along with the probability that the particular sequence generates that tree structure. This is mainly useful for linguistic purposes, such as morphological parsing, speech recognition and generally information extraction. It also finds applications in computational biology. 18 | 19 | For example: 20 | 21 | * As a computational linguist, you want [derive all ways to interpret an English sentence along with probabilities](https://web.stanford.edu/~jurafsky/icassp95-tc.pdf) 22 | 23 | |tokens|parse tree| 24 | |---|---| 25 | |[i, want, british, food]|![i want british food](https://cloud.githubusercontent.com/assets/178797/21772897/64838a1e-d68d-11e6-9a9d-11c7c17cb996.png)| 26 | 27 | * As a computational biologist, you want to [predict the secondary structure for an RNA sequence](https://en.wikipedia.org/wiki/Stochastic_context-free_grammar#RNA_structure_prediction) 28 | 29 | |tokens|parse tree| 30 | |---|---| 31 | |`GGGC``UAUU``AGCU``CAGU`
`UGGU``UAGA``GCGC``ACCC`
`CUGA``UAAG``GGUG``AGGU`
`CGCU``GAUU``CGAA``UUCA`
`GCAU``AGCC``CA` |![rna secondary structure](https://cloud.githubusercontent.com/assets/178797/21773797/af94f972-d690-11e6-97b4-0aad06071634.jpg)| 32 | 33 | * As a computational linguist, [you want to know the most likely table of contents structure for a list of paragraphs](https://digitalheir.github.io/java-rechtspraak-library/document-structure/) 34 | 35 | 36 | 37 | 38 | 39 | 40 | This library allows you to do these things [efficiently](https://github.com/digitalheir/probabilistic-earley-parser-javascript#runtime-complexity), as long as you can describe the rules as a [Context-free Grammar](https://en.wikipedia.org/wiki/Context-free_grammar) (CFG). 41 | 42 | The innovation of this library with respect to the many other parsing libraries is that this one allows the production rules in your grammar to have a probability attached to them. That is: it parses [Stochastic Context-free Grammars](https://en.wikipedia.org/wiki/Stochastic_context-free_grammar). This allows us to make better choices in case of ambiguous sentences: we can order them by probability. If you do not need probabilities attached to your parse trees, you are probably better off using [nearley](http://nearley.js.org) instead. 43 | 44 | For a theoretical grounding of this work, refer to [*Stolcke; An Efficient Probabilistic Context-Free 45 | Parsing Algorithm that Computes Prefix 46 | Probabilities*](http://www.aclweb.org/anthology/J95-2002). 47 | 48 | ## Motivation 49 | While libraries for nondeterministic grammars abound, I could not find an existing JavaScript 50 | implementation of the Probabilistic Earley Parser. I have made a stochastic CYK parser before, but I wanted something 51 | more top down that makes it easier to intervene in the parsing process, 52 | for instance when an unexpected token is encountered. 53 | In many cases Earley also parses faster than CYK (sparse grammars) and it doesn't require the grammar to be 54 | rewritten in any normal form. 55 | 56 | ## Usage 57 | 58 | Get the most likely parse tree (the *Viterbi parse*) for the sentence "the man chases the man with a stick": 59 | 60 | ````javascript 61 | import {getViterbiParse, Grammar} from 'probabilistic-earley-parser'; 62 | import treeify from 'treeify'; 63 | 64 | // Nonterminals are string 65 | const S = "S"; // : NonTerminal 66 | const NP = "NP"; // : NonTerminal 67 | const VP = "VP"; // : NonTerminal 68 | const TV = "TV"; // : NonTerminal 69 | const Det = "Det"; // : NonTerminal 70 | const N = "N"; // : NonTerminal 71 | const Mod = "Mod"; // : NonTerminal 72 | 73 | // Terminals are functions that should return true when the parameter is of given type 74 | const transitiveVerb = (token) => !!token.match(/(hit|chased)/); // : Terminal 75 | const the = (token) => !!token.match(/the/i);// : Terminal 76 | const a = (token) => !!token.match(/a/i);// : Terminal 77 | const man = (token) => !!token.match(/man/);// : Terminal 78 | const stick = (token) => !!token.match(/stick/);// : Terminal 79 | const with_ = (token) => !!token.match(/with/);// : Terminal 80 | 81 | const grammar = Grammar.builder("test") //: Grammar 82 | .addNewRule( 83 | 1.0, // Probability between 0.0 and 1.0, defaults to 1.0. The builder takes care of converting it to the semiring element 84 | S, // Left hand side of the rule 85 | [NP, VP] // Right hand side of the rule 86 | ) 87 | // NP -> Det N (1.0) 88 | .addNewRule( 89 | 1.0, 90 | NP, 91 | [Det, N] // eg. The man 92 | ) 93 | // NP -> Det N Mod (1.0) 94 | .addNewRule( 95 | 1.0, 96 | NP, 97 | [Det, N, Mod] // eg. The man (with a stick) 98 | ) 99 | // VP -> TV NP Mod (0.4) 100 | .addNewRule( 101 | 0.4, 102 | VP, 103 | [TV, NP, Mod] // eg. (chased) (the man) (with a stick) 104 | ) 105 | // VP -> TV NP (0.6) 106 | .addNewRule( 107 | 0.6, 108 | VP, 109 | [TV, NP] // eg. (chased) (the man with a stick) 110 | ) 111 | .addNewRule(1.0, Det, [a]) 112 | .addNewRule(1.0, Det, [the]) 113 | .addNewRule(1.0, N, [man]) 114 | .addNewRule(1.0, N, [stick]) 115 | .addNewRule(1.0, TV, [transitiveVerb]) 116 | .addNewRule(1.0, Mod, [with_, NP]) // eg. with a stick 117 | .build(); 118 | 119 | const tokens = ["The", "man", "chased", "the", "man", "with", "a", "stick"]; 120 | const viterbi = getViterbiParse( 121 | S, 122 | grammar, 123 | tokens 124 | ); // : ParseTreeWithScore 125 | 126 | console.log(viterbi.probability); // 0.6 127 | 128 | /* 129 | 0.6 130 | └─ S 131 | ├─ NP 132 | │ ├─ Det 133 | │ │ └─ The 134 | │ └─ N 135 | │ └─ man 136 | └─ VP 137 | ├─ TV 138 | │ └─ chased 139 | └─ NP 140 | ├─ Det 141 | │ └─ the 142 | ├─ N 143 | │ └─ man 144 | └─ Mod 145 | ├─ with 146 | └─ NP 147 | ├─ Det 148 | │ └─ a 149 | └─ N 150 | └─ stick 151 | */ 152 | function printTree(tree) { 153 | function makeTree(o){if(o.children && o.children.length > 0){const obj = {}; 154 | for(var i=0;i { 13 | readonly semiring: Semiring>; 14 | 15 | private states: StateToObjectMap>; 16 | private readonly ZERO: Expression; 17 | 18 | constructor(semiring: Semiring>) { 19 | this.states = new StateToObjectMap>(); 20 | this.semiring = semiring; 21 | this.ZERO = new AtomicValue(this.semiring.additiveIdentity.resolve()); 22 | } 23 | 24 | 25 | // getExpression(rule: Rule, index: number, ruleStart: number, dot: number): Expression { 26 | // return this.states.get(rule, index, ruleStart, dot).expression; 27 | // } 28 | 29 | getOrCreateByState(state: State, 30 | defaultValue: Expression): DeferredValue { 31 | if (this.states.hasByState(state)) { 32 | return this.states.getByState(state); 33 | } else { 34 | const deferredValue = new DeferredValue(defaultValue); 35 | this.states.putByState(state, deferredValue); 36 | return deferredValue; 37 | } 38 | } 39 | 40 | getOrCreate(rule: Rule, 41 | index: number, 42 | ruleStart: number, 43 | dotPosition: number, 44 | defaultValue: Expression): DeferredValue { 45 | if (this.states.has(rule, index, ruleStart, dotPosition)) { 46 | return this.states.get(rule, index, ruleStart, dotPosition); 47 | } else { 48 | const deferredValue = new DeferredValue(defaultValue); 49 | this.states.put(rule, index, ruleStart, dotPosition, deferredValue); 50 | return deferredValue; 51 | } 52 | } 53 | 54 | get(rule: Rule, 55 | index: number, 56 | ruleStart: number, 57 | dotPosition: number): Expression { 58 | if (this.states.has(rule, index, ruleStart, dotPosition)) { 59 | return this.states.get(rule, index, ruleStart, dotPosition).expression; 60 | } else { 61 | return undefined; 62 | } 63 | } 64 | 65 | 66 | plus(rule: Rule, 67 | index: number, 68 | ruleStart: number, 69 | dotPosition: number, 70 | addValue: Expression): void { 71 | const current: DeferredValue = this.getOrCreate( 72 | rule, index, ruleStart, dotPosition, 73 | this.ZERO 74 | ); 75 | current.expression = this.semiring.plus(addValue, current.expression); 76 | this.states.put(rule, index, ruleStart, dotPosition, current); 77 | } 78 | 79 | forEach(f: (index: number, ruleStart: number, dot: number, rule: Rule, score: Expression) => any) { 80 | this.states.forEach((i, r, d, rr, v) => f(i, r, d, rr, v.expression)); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/earley/chart/chart.ts: -------------------------------------------------------------------------------- 1 | //noinspection ES6UnusedImports 2 | import { StateIndex } from "./state-index"; 3 | import { Grammar } from "../../grammar/grammar"; 4 | import { State, isCompleted, isActive, getActiveCategory } from "./state"; 5 | import { NonTerminal, Terminal, isNonTerminal } from "../../grammar/category"; 6 | import { Semiring } from "semiring"; 7 | import { getOrCreateSet, getOrCreateMap } from "../../util"; 8 | import { isUnitProduction, Rule, invalidDotPosition } from "../../grammar/rule"; 9 | import { ViterbiScore } from "./viterbi-score"; 10 | import { StateToObjectMap } from "./state-to-object-map"; 11 | 12 | export class Chart { 13 | readonly grammar: Grammar; 14 | 15 | private states = new StateIndex(); 16 | private byIndex = new Map>>(); 17 | 18 | /** 19 | * The forward probability α_i of a chart is 20 | * the sum of the probabilities of 21 | * all constrained paths of length i that end in that chart, do all 22 | * paths from start to position i. So this includes multiple 23 | * instances of the same history, which may happen because of recursion. 24 | */ 25 | private forwardScores = new StateToObjectMap(); 26 | 27 | /** 28 | * The inner probability γ_{i} of a chart 29 | * is the sum of the probabilities of all 30 | * paths of length (i - k) that start at position k (the rule's start position), 31 | * and end at the current chart and generate the input the input symbols up to k. 32 | * Note that this is conditional on the chart happening at position k with 33 | * a certain non-terminal X 34 | */ 35 | private innerScores = new StateToObjectMap(); 36 | private viterbiScores = new StateToObjectMap>(); 37 | 38 | completedStates = new Map>>(); 39 | completedStatesFor = new Map>>>(); 40 | completedStatesThatAreNotUnitProductions = new Map>>(); 41 | statesActiveOnNonTerminals = new Map>>(); 42 | 43 | nonTerminalActiveAtIWithNonZeroUnitStarToY = new Map>>>(); 44 | statesActiveOnTerminals = new Map, Set>>>(); 45 | statesActiveOnNonTerminal = new Map>>>(); 46 | private EMPTY_SET: Set> = new Set>(); 47 | 48 | 49 | constructor(grammar: Grammar) { 50 | this.grammar = grammar; 51 | } 52 | 53 | // getCompletedStates(int i, NonTerminal s):Set> { 54 | // Multimap m = this.completedStatesFor.get(i); 55 | // if (m != null && m.containsKey(s)) return m.get(s); 56 | // return Collections.emptySet(); 57 | // } 58 | // 59 | // public Set getCompletedStates(int index) { 60 | // return getCompletedStates(index, true); 61 | // } 62 | // 63 | // public Set getCompletedStatesThatAreNotUnitProductions(int index) { 64 | // return getCompletedStates(index, false); 65 | // } 66 | // 67 | // public Set getCompletedStates(int index, boolean allowUnitProductions) { 68 | // if (allowUnitProductions) { 69 | // if (!completedStates.containsKey(index)) 70 | // completedStates.put(index, new HashSet<>()); 71 | // return completedStates.get(index); 72 | // } else { 73 | // if (!completedStatesThatAreNotUnitProductions.containsKey(index)) 74 | // completedStatesThatAreNotUnitProductions.put(index, new HashSet<>()); 75 | // return completedStatesThatAreNotUnitProductions.get(index); 76 | // } 77 | // } 78 | // 79 | 80 | getStatesActiveOnNonTerminalWithNonZeroUnitStarScoreToY(index: number, Y: NonTerminal): Set> { 81 | return getOrCreateSet(getOrCreateMap(this.nonTerminalActiveAtIWithNonZeroUnitStarToY, index), Y); 82 | } 83 | 84 | getStatesActiveOnNonTerminal(y: NonTerminal, position: number, beforeOrOnPosition: number): Set> { 85 | if (position <= beforeOrOnPosition) 86 | return getOrCreateSet(getOrCreateMap(this.statesActiveOnNonTerminal, y), position); 87 | else 88 | throw new Error("Querying position after what we're on?"); 89 | } 90 | 91 | /** 92 | * Default zero 93 | * 94 | * @param s chart 95 | * @return forward score so far 96 | */ 97 | public getForwardScore(s: State): S { 98 | return this.forwardScores.getByStateOrDefault(s, this.grammar.probabilityMapping.ZERO); 99 | } 100 | 101 | 102 | addForwardScore(state: State, increment: S, semiring: Semiring): S { 103 | const fw = semiring.plus(this.getForwardScore(state)/*default zero*/, increment); 104 | this.setForwardScore( 105 | state, 106 | fw 107 | ); 108 | return fw; 109 | } 110 | 111 | setForwardScore(s: State, probability: S) { 112 | return this.forwardScores.putByState(s, probability); 113 | } 114 | 115 | //noinspection JSUnusedLocalSymbols 116 | private hasForwardScore(s: State): boolean { 117 | return this.forwardScores.hasByState(s); 118 | } 119 | 120 | public getState(rule: Rule, 121 | positionInInput: number, 122 | ruleStartPosition: number, 123 | ruleDotPosition: number): State { 124 | return this.states.getState(rule, positionInInput, ruleStartPosition, ruleDotPosition); 125 | } 126 | 127 | /** 128 | * Adds chart if it does not exist yet 129 | * 130 | * @param positionInInput State position 131 | * @param ruleStartPosition Rule start position 132 | * @param ruleDotPosition Rule dot position 133 | * @param rule State rule 134 | * @param scannedToken The token that was scanned to create this chart 135 | * @return State specified by parameter. May or may not be in the chart table. If not, it is added. 136 | */ 137 | public getOrCreate(positionInInput: number, 138 | ruleStartPosition: number, 139 | ruleDotPosition: number, 140 | rule: Rule, 141 | scannedToken?: T): State { 142 | if (this.states.has(rule, positionInInput, ruleStartPosition, ruleDotPosition)) { 143 | return this.states.getState(rule, positionInInput, ruleStartPosition, ruleDotPosition); 144 | } else { 145 | // Add chart if it does not exist yet 146 | const scannedCategory: Terminal = scannedToken 147 | ? >rule.right[ruleDotPosition - 1] 148 | : undefined; 149 | const state: State = { 150 | rule, 151 | position: positionInInput, 152 | ruleStartPosition, 153 | ruleDotPosition, 154 | scannedToken: scannedToken, 155 | scannedCategory 156 | }; 157 | this.addState(state); 158 | return state; 159 | } 160 | } 161 | 162 | hasState(state: State): boolean { 163 | return this.states.has(state.rule, state.position, state.ruleStartPosition, state.ruleDotPosition); 164 | } 165 | 166 | has(rule: Rule, index: number, ruleStart: number, ruleDot: number): boolean { 167 | return this.states.has(rule, index, ruleStart, ruleDot); 168 | } 169 | 170 | addState(state: State): void { 171 | if (state.ruleDotPosition < 0 || state.ruleDotPosition > state.rule.right.length) 172 | invalidDotPosition(state.ruleDotPosition, state); 173 | 174 | this.states.addState(state); 175 | 176 | const position = state.position; 177 | 178 | getOrCreateSet(this.byIndex, position).add(state); 179 | 180 | if (isCompleted(state)) { 181 | getOrCreateSet(this.completedStates, position).add(state); 182 | if (!isUnitProduction(state.rule)) 183 | getOrCreateSet(this.completedStatesThatAreNotUnitProductions, position).add(state); 184 | 185 | getOrCreateSet(getOrCreateMap(this.completedStatesFor, 186 | state.position), state.rule.left) 187 | .add(state); 188 | } 189 | if (isActive(state)) { 190 | const activeCategory = getActiveCategory(state); 191 | if (isNonTerminal(activeCategory)) { 192 | getOrCreateSet(getOrCreateMap(this.statesActiveOnNonTerminal, 193 | activeCategory), state.position) 194 | .add(state); 195 | getOrCreateSet(this.statesActiveOnNonTerminals, 196 | state.position) 197 | .add(state); 198 | 199 | this.grammar.unitStarScores 200 | .getNonZeroScoresToNonTerminals(activeCategory) 201 | .forEach((FromNonTerminal: NonTerminal) => { 202 | getOrCreateSet(getOrCreateMap( 203 | this.nonTerminalActiveAtIWithNonZeroUnitStarToY, 204 | position), FromNonTerminal).add(state); 205 | }); 206 | } else { 207 | // activeCategory MUST be terminal 208 | getOrCreateSet(getOrCreateMap(this.statesActiveOnTerminals, position), activeCategory).add(state); 209 | } 210 | } 211 | } 212 | 213 | setInnerScore(s: State, probability: S) { 214 | this.innerScores.putByState(s, probability); 215 | } 216 | 217 | /** 218 | * @param v viterbi score 219 | */ 220 | setViterbiScore(v: ViterbiScore) { 221 | this.viterbiScores.putByState(v.resultingState, v); 222 | } 223 | 224 | getViterbiScore(s: State): ViterbiScore { 225 | /*if (!this.hasViterbiScore(s)) 226 | throw new Error( 227 | "Viterbi not available for chart (" 228 | + s.position + ", " + s.ruleStartPosition + ", " + s.ruleDotPosition 229 | + ") " + s.rule.left + " -> " + s.rule.right.map(f => f.toString())); 230 | else */ 231 | return this.viterbiScores.getByState(s); 232 | } 233 | 234 | hasViterbiScore(s: State): boolean { 235 | return this.viterbiScores.hasByState(s); 236 | } 237 | 238 | /** 239 | * Default zero 240 | * 241 | * @param s chart 242 | * @return inner score so far 243 | */ 244 | public getInnerScore(s: State): S { 245 | return this.innerScores.getByStateOrDefault(s, this.grammar.probabilityMapping.ZERO); 246 | } 247 | 248 | public getCompletedStatesThatAreNotUnitProductions(position: number) { 249 | return this.completedStatesThatAreNotUnitProductions.get(position); 250 | } 251 | 252 | public getCompletedStates(position: number) { 253 | if (this.completedStates.has(position)) 254 | return this.completedStates.get(position); 255 | else return this.EMPTY_SET; 256 | } 257 | 258 | public getStatesActiveOnNonTerminals(index: number) { 259 | return this.statesActiveOnNonTerminals.get(index); 260 | } 261 | 262 | public getStatesActiveOnTerminals(index: number, terminal: Terminal) { 263 | if (this.statesActiveOnTerminals.has(index)) 264 | return this.statesActiveOnTerminals.get(index).get(terminal); 265 | else 266 | return undefined; 267 | } 268 | 269 | // public hasInnerScore(s: State): boolean { 270 | // let ruleMap = getOrCreateMap(this.innerScores, s.rule); 271 | // let posMap = getOrCreateMap(ruleMap, s.position); 272 | // let dotMAp = getOrCreateMap(posMap, s.ruleStartPosition); 273 | // return dotMAp.has(s.ruleDotPosition); 274 | // } 275 | 276 | // public Set getStatesByIndex(int index) { 277 | // return byIndex.get(index); 278 | // } 279 | // 280 | // 281 | // public void plus(State chart) { 282 | // Rule rule = chart.getRule(); 283 | // int ruleStart = chart.getRuleStartPosition(); 284 | // int index = chart.getPosition(); 285 | // 286 | // TIntObjectMap> forRuleStart = states.getRuleStartToDotToState(rule, index); 287 | // if (!forRuleStart.containsKey(ruleStart)) forRuleStart.put(ruleStart, new TIntObjectHashMap<>(50)); 288 | // TIntObjectMap dotToState = forRuleStart.get(ruleStart); 289 | // 290 | // addState(dotToState, chart); 291 | // } 292 | // 293 | // public synchronized State getSynchronized(int index, int ruleStart, int ruleDot, Rule rule) { 294 | // return states.getState(rule, index, ruleStart, ruleDot); 295 | // } 296 | // 297 | // public State get(int index, int ruleStart, int ruleDot, Rule rule) { 298 | // return states.getState(rule, index, ruleStart, ruleDot); 299 | // } 300 | // 301 | // public countStates():number { 302 | // return this.states.count(); 303 | // } 304 | 305 | } -------------------------------------------------------------------------------- /src/earley/chart/state-index.ts: -------------------------------------------------------------------------------- 1 | import { Rule } from "../../grammar/rule"; 2 | import { State } from "./state"; 3 | import { StateToObjectMap } from "./state-to-object-map"; 4 | 5 | export class StateIndex { 6 | private states: StateToObjectMap>; 7 | 8 | constructor() { 9 | this.states = new StateToObjectMap>(); 10 | } 11 | 12 | public addState(state: State) { 13 | if (this.states.hasByState(state)) 14 | throw new Error("State set already contained chart. This is a bug."); 15 | else 16 | this.states.putByState(state, state); 17 | } 18 | 19 | public getState(rule: Rule, index: number, ruleStart: number, ruleDot: number): State { 20 | if (!this.states.has(rule, index, ruleStart, ruleDot)) 21 | throw new Error("State did not exist. This is a bug."); 22 | else 23 | return this.states.get(rule, index, ruleStart, ruleDot); 24 | } 25 | 26 | public has(rule: Rule, index: number, ruleStart: number, ruleDot: number): boolean { 27 | return this.states.has(rule, index, ruleStart, ruleDot); 28 | } 29 | 30 | // /** 31 | // * Runs in O(1) 32 | // * @returns {number} 33 | // */ 34 | // public size(): number { 35 | // this.states.size(); 36 | // } 37 | } -------------------------------------------------------------------------------- /src/earley/chart/state-to-object-map.ts: -------------------------------------------------------------------------------- 1 | import { Rule } from "../../grammar/rule"; 2 | import { getOrCreateMap } from "../../util"; 3 | import { State } from "./state"; 4 | 5 | export class StateToObjectMap { 6 | private map: Map, 7 | /* index */Map>>> = new Map, 11 | /*index*/Map>>>(); 14 | 15 | // private _size: number = 0; 16 | 17 | constructor() { 18 | } 19 | 20 | put(rule: Rule, position: number, ruleStartPosition: number, ruleDotPosition: number, value: O) { 21 | getOrCreateMap(getOrCreateMap(getOrCreateMap(this.map, rule), position), ruleStartPosition).set(ruleDotPosition, value); 22 | } 23 | 24 | has(rule: Rule, position: number, ruleStartPosition: number, ruleDotPosition: number): boolean { 25 | return getOrCreateMap(getOrCreateMap(getOrCreateMap(this.map, rule), position), ruleStartPosition).has(ruleDotPosition); 26 | } 27 | 28 | get(rule: Rule, position: number, ruleStartPosition: number, ruleDotPosition: number): O { 29 | return getOrCreateMap(getOrCreateMap(getOrCreateMap(this.map, rule), position), ruleStartPosition).get(ruleDotPosition); 30 | } 31 | 32 | putByState(state: State, value: O): void { 33 | this.put(state.rule, state.position, state.ruleStartPosition, state.ruleDotPosition, value); 34 | } 35 | 36 | getOrDefault(rule: Rule, position: number, ruleStartPosition: number, ruleDotPosition: number, _default: O): O { 37 | if (this.has(rule, position, ruleStartPosition, ruleDotPosition)) 38 | return this.get(rule, position, ruleStartPosition, ruleDotPosition); 39 | else 40 | return _default; 41 | } 42 | 43 | getByStateOrDefault(state: State, _default: O): O { 44 | return this.getOrDefault(state.rule, state.position, state.ruleStartPosition, state.ruleDotPosition, _default); 45 | } 46 | 47 | getByState(state: State): O { 48 | return this.get(state.rule, state.position, state.ruleStartPosition, state.ruleDotPosition); 49 | } 50 | 51 | hasByState(state: State): boolean { 52 | return this.has(state.rule, state.position, state.ruleStartPosition, state.ruleDotPosition); 53 | } 54 | 55 | forEach(f: (index: number, ruleStart: number, dot: number, rule: Rule, score: O) => any) { 56 | this.map.forEach( 57 | (val, rule) => { 58 | val.forEach( 59 | (val2, position) => { 60 | val2.forEach( 61 | (val3, start) => { 62 | val3.forEach( 63 | (object: O, dot: number) => f(position, start, dot, rule, object) 64 | ); 65 | }); 66 | }); 67 | }); 68 | } 69 | 70 | // size(): number { 71 | // return this._size; 72 | // } 73 | } -------------------------------------------------------------------------------- /src/earley/chart/state.ts: -------------------------------------------------------------------------------- 1 | import { Rule, getActiveCategory as getActiveCategoryFromRule, invalidDotPosition } from "../../grammar/rule"; 2 | import { Category } from "../../grammar/category"; 3 | 4 | /** 5 | * A chart chart, describing a pending derivation. 6 | *

7 | * A chart is of the form i: Xk → λ·μ 8 | * where X is a nonterminal of the grammar, λ and μ are strings of nonterminals and/or 9 | * terminals, and i and k are indices into the input string. States are derived from productions 10 | * in the grammar. The above chart is derived from a corresponding production 11 | * X → λμ 12 | * with the following semantics: 13 | *

    14 | *
  • The current position in the input is i, i.e., x0...xi-1 15 | * have been processed 16 | * so far. The states describing the parser chart at position i are collectively 17 | * called chart set i. Note that there is one more chart set than input 18 | * symbols: set 0 describes the parser chart before any input is processed, 19 | * while set |x| contains the states after all input symbols have been 20 | * processed.
  • 21 | *
  • Nonterminal X was expanded starting at position k in 22 | * the input, i.e., X 23 | * generates some substring starting at position k.
  • 24 | *
  • The expansion of X proceeded using the production X → λμ, and has 25 | * expanded the right-hand side (RHS) λμ up to the position indicated by 26 | * the dot. The dot thus refers to the current position i.
  • 27 | *
28 | * 29 | * A chart with the dot to the right of the entire RHS is called a completed chart, since 30 | * it indicates that the left-hand side (LHS) non-terminal has been fully expanded. 31 | * 32 | */ 33 | export interface State { 34 | rule: Rule; 35 | ruleStartPosition: number; 36 | ruleDotPosition: number; 37 | position: number; 38 | scannedToken?: TokenType; 39 | scannedCategory?: Category; 40 | } 41 | 42 | //noinspection JSUnusedGlobalSymbols 43 | export interface StateWithScore { 44 | forwardScore: SemiringType; 45 | innerScore: SemiringType; 46 | state: State; 47 | origin: State; 48 | } 49 | 50 | 51 | export function isCompleted(state: State): boolean { 52 | return isPassive(state.rule, state.ruleDotPosition); 53 | } 54 | 55 | export function isActive(state: State): boolean { 56 | return !isCompleted(state); 57 | } 58 | 59 | /** 60 | * @return Active category for this chart. May be null. 61 | */ 62 | export function getActiveCategory(state: State): Category { 63 | return getActiveCategoryFromRule(state.rule, state.ruleDotPosition); 64 | } 65 | 66 | /** 67 | * Tests whether this is a completed edge or not. An edge is completed when 68 | * its dotted rule contains no 69 | * {@link #getActiveCategory(int) active category}, or equivalently the dot is at position == |RHS|. 70 | * Runs in O(1) 71 | * 72 | * @return true iff the active category of this edge's dotted 73 | * rule is null. 74 | */ 75 | export function isPassive(rule: Rule, dotPosition: number) { 76 | if (dotPosition < 0 || dotPosition > rule.right.length) 77 | invalidDotPosition(dotPosition, rule); 78 | return dotPosition === rule.right.length; 79 | } 80 | 81 | /** 82 | * Return dot position advanced by 1, or errors if out of bounds. 83 | * 84 | * @throws IndexOutOfBoundsException If the dotted rule's dot position 85 | * is already at the end of its right side. 86 | */ 87 | export function advanceDot(s: State) { 88 | const position = s.ruleDotPosition; 89 | if (position < 0 || position > s.rule.right.length) throw new Error( 90 | "illegal position: " + position + ", " + s.rule); 91 | return position + 1; 92 | } -------------------------------------------------------------------------------- /src/earley/chart/viterbi-score.ts: -------------------------------------------------------------------------------- 1 | import { State, advanceDot, isCompleted } from "./state"; 2 | import { ProbabilitySemiringMapping } from "../../grammar/grammar"; 3 | import { Rule } from "../../grammar/rule"; 4 | import { Chart } from "./chart"; 5 | import { NonTerminal } from "../../grammar/category"; 6 | 7 | /** 8 | * Representing a Viterbi score coming from a certain chart, 9 | * transition to a result chart computing 10 | * using a certain semiring 11 | */ 12 | export interface ViterbiScore { 13 | origin: State; 14 | resultingState: State; 15 | innerScore: S; 16 | } 17 | 18 | 19 | /** 20 | * For finding the Viterbi path, we can't conflate production recursions (ie can't use the left star corner), 21 | * exactly because we need it to find the unique Viterbi path. 22 | * Luckily, we can avoid looping over unit productions because it only ever lowers probability 23 | * (assuming p = [0,1] and Occam's razor). 24 | * ~This method does not guarantee a left most parse.~ 25 | * 26 | * @param stateSets 27 | * @param completedState Completed chart to calculate Viterbi score for 28 | * @param originPathTo 29 | * @param m 30 | */ 31 | export function setViterbiScores(stateSets: Chart, 32 | completedState: State, 33 | originPathTo: Set>, 34 | m: ProbabilitySemiringMapping): void { 35 | const sr = m.semiring; 36 | let newStates: State[] = undefined; // init as null to avoid array creation 37 | let newCompletedStates: State[] = undefined; // init as null to avoid array creation 38 | 39 | if (!stateSets.hasViterbiScore(completedState)) 40 | throw new Error("Expected Viterbi score to be set on completed chart. This is a bug."); 41 | 42 | const completedViterbi: S = stateSets 43 | .getViterbiScore(completedState) 44 | .innerScore; 45 | 46 | 47 | // noinspection JSSuspiciousNameCombination 48 | const Y: NonTerminal = completedState.rule.left; 49 | 50 | // Get all states in j <= i, such that j: Xk → λ·Yμ 51 | const pos: number = completedState.position; 52 | stateSets.getStatesActiveOnNonTerminal( 53 | Y, completedState.ruleStartPosition, pos 54 | ).forEach((stateToAdvance) => { 55 | if (stateToAdvance.position > pos || stateToAdvance.position != completedState.ruleStartPosition) 56 | throw new Error("Index failed. This is a bug."); 57 | 58 | const ruleStart: number = stateToAdvance.ruleStartPosition; 59 | const nextDot: number = advanceDot(stateToAdvance); 60 | const rule: Rule = stateToAdvance.rule; 61 | 62 | let resultingState = stateSets.getState(rule, pos, ruleStart, nextDot); 63 | if (!resultingState) { 64 | resultingState = stateSets.getOrCreate(pos, ruleStart, nextDot, rule); 65 | if (!newStates) newStates = []; 66 | newStates.push(resultingState); 67 | } 68 | 69 | if (originPathTo.has(resultingState)) 70 | throw new Error("This is a bug: Already went past " + resultingState); 71 | 72 | const viterbiScore: ViterbiScore = stateSets.getViterbiScore(resultingState); 73 | const prevViterbi: ViterbiScore = stateSets.getViterbiScore(stateToAdvance); 74 | 75 | const prev: S = !!prevViterbi ? prevViterbi.innerScore : sr.multiplicativeIdentity; 76 | const newViterbiScore: ViterbiScore = { 77 | innerScore: sr.times(completedViterbi, prev), 78 | origin: completedState, 79 | resultingState 80 | }; 81 | 82 | if (!viterbiScore 83 | || 84 | m.toProbability(viterbiScore.innerScore) < m.toProbability(newViterbiScore.innerScore) 85 | ) { 86 | stateSets.setViterbiScore(newViterbiScore); 87 | if (isCompleted(resultingState)) { 88 | if (!newCompletedStates) newCompletedStates = []; 89 | newCompletedStates.push(resultingState); 90 | } 91 | } 92 | 93 | }); 94 | 95 | // Add new states to chart 96 | if (!!newStates) 97 | newStates.forEach(a => stateSets.addState(a)); 98 | 99 | // Recurse with new states that are completed 100 | if (!!newCompletedStates) newCompletedStates.forEach(resultingState => { 101 | const path: Set> = new Set>(originPathTo); 102 | path.add(resultingState); 103 | setViterbiScores(stateSets, resultingState, path, m); 104 | }); 105 | } 106 | 107 | -------------------------------------------------------------------------------- /src/earley/complete.ts: -------------------------------------------------------------------------------- 1 | import { State, getActiveCategory, advanceDot, isPassive, isCompleted } from "./chart/state"; 2 | import { Chart } from "./chart/chart"; 3 | import { Grammar } from "../grammar/grammar"; 4 | import { NonTerminal, Category } from "../grammar/category"; 5 | import { Rule, isUnitProduction } from "../grammar/rule"; 6 | import { Expression } from "semiring"; 7 | import { DeferredStateScoreComputations } from "./chart/addable-expressions-container"; 8 | import { AtomicValue } from "semiring"; 9 | import { DeferredValue } from "./expression/value"; 10 | 11 | /** 12 | * Completes states exhaustively and makes resolvable expressions for the forward and inner scores. 13 | * Note that these expressions can only be resolved to actual values after finishing completion, because they may depend on one another. 14 | * 15 | * @param position State position 16 | * @param states Completed states to use for deducing what states to proceed 17 | * @param addForwardScores Container / helper for adding to forward score expressions 18 | * @param addInnerScores Container / helper for adding to inner score expressions 19 | * @param grammar 20 | * @param stateSets 21 | */ 22 | function completeNoViterbi(position: number, 23 | states: Set>, 24 | addForwardScores: DeferredStateScoreComputations, 25 | addInnerScores: DeferredStateScoreComputations, 26 | grammar: Grammar, 27 | stateSets: Chart) { 28 | let definitelyNewStates: DeferredStateScoreComputations; 29 | 30 | // For all states 31 | // i: Yj → v· [a",y"] 32 | // j: Xk → l·Zm [a',y'] 33 | // 34 | // such that the R*(Z =*> Y) is nonzero 35 | // and Y → v is not a unit production 36 | states.forEach((completedState: State) => { 37 | const j: number = completedState.ruleStartPosition; 38 | //noinspection JSSuspiciousNameCombination 39 | const Y: NonTerminal = completedState.rule.left; 40 | const probM = grammar.probabilityMapping; 41 | 42 | 43 | const innerScore: S = stateSets.getInnerScore(completedState); 44 | // TODO pre-create atom? 45 | const unresolvedCompletedInner: DeferredValue = addInnerScores.getOrCreateByState( 46 | completedState, 47 | new AtomicValue(innerScore) 48 | ); 49 | 50 | 51 | stateSets.getStatesActiveOnNonTerminalWithNonZeroUnitStarScoreToY(j, Y).forEach((stateToAdvance: State) => { 52 | if (j !== stateToAdvance.position) throw new Error("Index failed. This is a bug."); 53 | // Make i: X_k → lZ·m 54 | const innerScore2 = stateSets.getInnerScore(stateToAdvance); 55 | // TODO pre-create atom? 56 | const prevInner: DeferredValue = addInnerScores.getOrCreateByState(stateToAdvance, 57 | new AtomicValue(innerScore2) 58 | ); 59 | const forwardScore = stateSets.getForwardScore(stateToAdvance); 60 | // TODO pre-create atom? 61 | const prevForward: DeferredValue = addForwardScores.getOrCreateByState(stateToAdvance, 62 | new AtomicValue(forwardScore) 63 | ); 64 | 65 | const Z: Category = getActiveCategory(stateToAdvance); 66 | 67 | // TODO pre-create atom? 68 | const unitStarScore: Expression = new AtomicValue( 69 | probM.fromProbability( 70 | grammar.getUnitStarScore(Z, Y) 71 | ) 72 | ); 73 | const sr = grammar.deferrableSemiring; 74 | const fw: Expression = sr.times( 75 | unitStarScore, 76 | sr.times(prevForward, unresolvedCompletedInner) 77 | ); 78 | const inner: Expression = sr.times( 79 | unitStarScore, 80 | sr.times(prevInner, unresolvedCompletedInner) 81 | ); 82 | 83 | const newStateRule: Rule = stateToAdvance.rule; 84 | const newStateDotPosition: number = advanceDot(stateToAdvance); 85 | const newStateRuleStart: number = stateToAdvance.ruleStartPosition; 86 | 87 | 88 | addForwardScores.plus( 89 | newStateRule, 90 | position, 91 | newStateRuleStart, 92 | newStateDotPosition, 93 | fw 94 | ); 95 | 96 | 97 | // If this is a new completed chart that is no unit production, 98 | // make a note of it it because we want to recursively call *complete* on these states 99 | if ( 100 | isPassive(newStateRule, newStateDotPosition) 101 | && !isUnitProduction(newStateRule) 102 | && !stateSets.has(newStateRule, position, newStateRuleStart, newStateDotPosition)) { 103 | if (!definitelyNewStates) definitelyNewStates = new DeferredStateScoreComputations(sr); 104 | definitelyNewStates.plus( 105 | newStateRule, 106 | position, 107 | newStateRuleStart, 108 | newStateDotPosition, 109 | fw 110 | ); 111 | } 112 | 113 | addInnerScores.plus( 114 | newStateRule, 115 | position, 116 | newStateRuleStart, 117 | newStateDotPosition, 118 | inner 119 | ); 120 | }); 121 | }); 122 | 123 | 124 | if (!!definitelyNewStates) { 125 | const newCompletedStates: Set> = new Set>(); 126 | definitelyNewStates.forEach( 127 | (index: number, 128 | ruleStart: number, 129 | dot: number, 130 | rule: Rule, 131 | ignored: Expression) => { 132 | // const isNew: boolean = !stateSets.has(index, ruleStart, dot, rule); 133 | if (stateSets.has(rule, index, ruleStart, dot)) { 134 | throw new Error("State wasn't new"); 135 | } 136 | 137 | const state: State = stateSets.getOrCreate(index, ruleStart, dot, rule); 138 | if (!isCompleted(state) || isUnitProduction(state.rule)) 139 | throw new Error("Unexpected chart found in possible new states. This is a bug."); 140 | 141 | newCompletedStates.add(state); 142 | }); 143 | if (!!newCompletedStates && newCompletedStates.size > 0) { 144 | completeNoViterbi(position, 145 | newCompletedStates, 146 | addForwardScores, 147 | addInnerScores, 148 | grammar, stateSets 149 | ); 150 | } 151 | } 152 | } 153 | 154 | /** 155 | * Makes completions in the specified chart at the given index. 156 | * 157 | * @param i The index to make completions at. 158 | * @param stateSets 159 | * @param grammar 160 | */ 161 | export function complete(i: number, 162 | stateSets: Chart, 163 | grammar: Grammar) { 164 | const addForwardScores = new DeferredStateScoreComputations(grammar.deferrableSemiring); 165 | const addInnerScores = new DeferredStateScoreComputations(grammar.deferrableSemiring); 166 | 167 | const completeOnStates = stateSets.getCompletedStatesThatAreNotUnitProductions(i); 168 | 169 | if (!!completeOnStates) completeNoViterbi( 170 | i, 171 | completeOnStates, 172 | addForwardScores, 173 | addInnerScores, 174 | grammar, 175 | stateSets 176 | ); 177 | 178 | // Resolve and set forward score 179 | addForwardScores.forEach((position, ruleStart, dot, rule, score) => { 180 | const state: State = stateSets.getOrCreate(position, ruleStart, dot, rule); 181 | // TODO dont getorcreate chart 182 | stateSets.setForwardScore(state, score.resolve()); 183 | }); 184 | 185 | // Resolve and set inner score 186 | addInnerScores.forEach((position, ruleStart, dot, rule, score) => { 187 | // TODO dont getorcreate chart 188 | const state: State = stateSets.getOrCreate(position, ruleStart, dot, rule); 189 | stateSets.setInnerScore(state, score.resolve()); 190 | }); 191 | } -------------------------------------------------------------------------------- /src/earley/expression/value.ts: -------------------------------------------------------------------------------- 1 | import { Expression } from "semiring"; 2 | 3 | export class DeferredValue implements Expression { 4 | public expression: Expression; 5 | 6 | constructor(e: Expression) { 7 | this.expression = e; 8 | } 9 | 10 | resolve() { 11 | return this.expression.resolve(); 12 | } 13 | } -------------------------------------------------------------------------------- /src/earley/parser.ts: -------------------------------------------------------------------------------- 1 | import { Grammar } from "../grammar/grammar"; 2 | import { NonTerminal, Category, isNonTerminal, Terminal, WordWithTypes } from "../grammar/category"; 3 | import { Rule } from "../grammar/rule"; 4 | import { State } from "./chart/state"; 5 | import { setViterbiScores, ViterbiScore } from "./chart/viterbi-score"; 6 | import { Chart } from "./chart/chart"; 7 | import { scan } from "./scan"; 8 | import { predict } from "./predict"; 9 | import { complete } from "./complete"; 10 | import { ParseTree, addRightMost } from "./parsetree"; 11 | 12 | export function addState(stateSets: Chart, 13 | index: number, 14 | ruleStartPosition: number, 15 | ruleDotPosition: number, 16 | rule: Rule, 17 | forward: S, 18 | inner: S): State { 19 | const state = stateSets.getOrCreate(index, ruleStartPosition, ruleDotPosition, rule); 20 | stateSets.setInnerScore(state, inner); 21 | stateSets.setForwardScore(state, forward); 22 | 23 | if (stateSets.hasViterbiScore(state)) { 24 | throw new Error("Viterbi score was already set for new chart?!"); 25 | } 26 | // stateSets.setViterbiScore( 27 | // { 28 | // origin: null, 29 | // innerScore: 30 | // } 31 | // State.ViterbiScore( 32 | // grammar.getSemiring().one(), null, chart, grammar.getSemiring() 33 | // ) 34 | // ); 35 | return state; 36 | } 37 | 38 | /** 39 | * Performs the backward part of the forward-backward algorithm 40 | */ 41 | export function getViterbiParseFromChart(state: State, chart: Chart): ParseTree { 42 | switch (state.ruleDotPosition) { 43 | case 0: 44 | // Prediction chart 45 | return {category: state.rule.left, children: []}; 46 | default: 47 | const prefixEnd: Category = state.rule.right[state.ruleDotPosition - 1]; 48 | if (!isNonTerminal(prefixEnd)) { 49 | // Scanned terminal chart 50 | if (!state.scannedToken) 51 | throw new Error("Expected chart to be a scanned chart. This is a bug."); 52 | 53 | // let \'a = \, call 54 | const T: ParseTree = getViterbiParseFromChart( 55 | chart.getOrCreate( 56 | state.position - 1, 57 | state.ruleStartPosition, 58 | state.ruleDotPosition - 1, 59 | state.rule 60 | ), 61 | chart 62 | ); 63 | addRightMost(T, {token: state.scannedToken, category: state.scannedCategory, children: []}); 64 | return T; 65 | } else { 66 | // Completed non-terminal chart 67 | const viterbi: ViterbiScore = chart.getViterbiScore(state); // must exist 68 | 69 | // Completed chart that led to the current chart 70 | const origin: State = viterbi.origin; 71 | 72 | // Recurse for predecessor chart (before the completion happened) 73 | const T: ParseTree = getViterbiParseFromChart( 74 | chart.getOrCreate( 75 | origin.ruleStartPosition, 76 | state.ruleStartPosition, 77 | state.ruleDotPosition - 1, 78 | state.rule 79 | ) 80 | , chart); 81 | 82 | // Recurse for completed chart 83 | const Tprime: ParseTree = getViterbiParseFromChart(origin, chart); 84 | 85 | addRightMost(T, Tprime); 86 | return T; 87 | } 88 | } 89 | } 90 | 91 | 92 | export function parseSentenceIntoChart(Start: NonTerminal, 93 | grammar: Grammar, 94 | tokens: T[], 95 | scanProbability?: (x: T, t: Terminal[]) => S): [Chart, number, State] { 96 | // ScanProbability scanProbability//TODO 97 | 98 | const stateSets: Chart = new Chart(grammar); 99 | // Initial chart 100 | // const initialState:State = undefined;//todo 101 | // new State( 102 | // Rule.create(sr, 1.0, Category.START, S), 0 103 | // ); 104 | 105 | // Index words to their applicable terminals 106 | const wordToTypesMap = new Map[]>(); 107 | const tokensWithWords: WordWithTypes[] = tokens.map(word => { 108 | if (wordToTypesMap.has(word)) 109 | return {types: wordToTypesMap.get(word), word}; 110 | else { 111 | const types: Terminal[] = grammar.terminals.filter((isOfType: Terminal) => isOfType(word)); 112 | wordToTypesMap.set(word, types); 113 | return {types, word}; 114 | } 115 | }); 116 | 117 | const init = addState( 118 | stateSets, 0, 0, 0, 119 | {left: "", right: [Start], probability: 1.0}, 120 | grammar.probabilityMapping.ONE, 121 | grammar.probabilityMapping.ONE 122 | ); 123 | 124 | // Cycle through input 125 | let i = 0; 126 | tokensWithWords.forEach( 127 | (token: WordWithTypes) => { 128 | predict(i, grammar, stateSets); 129 | scan(i, token, grammar.probabilityMapping.semiring, stateSets, scanProbability); 130 | complete(i + 1, stateSets, grammar); 131 | 132 | const completedStates: State[] = []; 133 | const completedStatez = stateSets.getCompletedStates(i + 1); 134 | if (!!completedStatez) completedStatez.forEach(s => completedStates.push(s)); 135 | 136 | completedStates.forEach(s => setViterbiScores(stateSets, 137 | s, 138 | new Set>(), 139 | grammar.probabilityMapping)); 140 | i++; 141 | } 142 | ); 143 | 144 | 145 | // Set completed = chart.getCompletedStates(i, Category.START); 146 | // if (completed.size() > 1) throw new Error("This is a bug"); 147 | return [stateSets, i, init]; 148 | } 149 | 150 | export interface ParseTreeWithScore { 151 | parseTree: ParseTree; 152 | probability: number; 153 | } 154 | 155 | export function getViterbiParse(Start: NonTerminal, 156 | grammar: Grammar, 157 | tokens: T[], 158 | scanProbability?: (x: T, t: Terminal[]) => S): ParseTreeWithScore { 159 | const [chart, ignored, init] = parseSentenceIntoChart(Start, grammar, tokens, scanProbability); 160 | 161 | if (!chart.has(init.rule, tokens.length, 162 | 0, 163 | init.rule.right.length)) throw new Error("Could not parse sentence."); 164 | 165 | const finalState = chart.getOrCreate( 166 | tokens.length, 167 | 0, 168 | init.rule.right.length, 169 | init.rule 170 | ); 171 | 172 | const parseTree: ParseTree = getViterbiParseFromChart(finalState, chart); 173 | const toProbability = grammar.probabilityMapping.toProbability; 174 | const finalScore = chart.getViterbiScore(finalState).innerScore; 175 | 176 | return { 177 | parseTree, 178 | probability: toProbability(finalScore) 179 | }; 180 | } -------------------------------------------------------------------------------- /src/earley/parsetree.ts: -------------------------------------------------------------------------------- 1 | import { Category } from "../grammar/category"; 2 | 3 | /** 4 | * A parse tree that represents the derivation of a string based on the 5 | * rules in a {@link Grammar}. Parse trees recursively contain 6 | * other parse trees, so they can be iterated through to 7 | * find the entire derivation of a category. 8 | *

9 | * Parse trees are essentially partial views of a Chart from a 10 | * given {@link State} or {@link Category}. They represent the completed 11 | * category at a given string index and origin position. 12 | */ 13 | export interface ParseTree { 14 | category: Category; 15 | children: ParseTree[]; 16 | token?: T; 17 | } 18 | 19 | //noinspection JSUnusedGlobalSymbols 20 | /** 21 | * Creates a new parse tree with the specified category, parent, and 22 | * child trees. 23 | * 24 | * @param category The category of the category of this parse 25 | * tree. 26 | * @param children The list of children of this parse tree, in their linear 27 | * order. 28 | */ 29 | export function createParseTree(category: Category, children: ParseTree[] = []): ParseTree { 30 | return {category, children}; 31 | } 32 | 33 | export function addRightMost(addTo: ParseTree, addMe: ParseTree) { 34 | addTo.children.push(addMe); 35 | } 36 | 37 | 38 | // public static class Token extends ParseTree { 39 | // public final org.leibnizcenter.cfg.token.Token token; 40 | // 41 | // public Token(org.leibnizcenter.cfg.token.Token scannedToken, Category category) { 42 | // super(category, null); 43 | // this.token = scannedToken; 44 | // } 45 | // 46 | // public Token(ScannedTokenState scannedState) { 47 | // this(scannedState.scannedToken, scannedState.scannedCategory); 48 | // } 49 | // 50 | // 51 | // @Override 52 | // public int hashCode() { 53 | // return super.hashCode() + token.hashCode(); 54 | // } 55 | // 56 | // @Override 57 | // public boolean equals(Object o) { 58 | // return o instanceof Token && super.equals(o) && token.equals(((Token) o).token); 59 | // } 60 | // } 61 | // 62 | // public static class NonToken extends ParseTree { 63 | // public NonToken(Category node) { 64 | // super(node); 65 | // } 66 | // 67 | // public NonToken(Category node, LinkedList children) { 68 | // super(node, children); 69 | // } 70 | // 71 | // @Override 72 | // public boolean equals(Object o) { 73 | // return o instanceof NonToken && super.equals(o); 74 | // } 75 | // } 76 | // } -------------------------------------------------------------------------------- /src/earley/predict.ts: -------------------------------------------------------------------------------- 1 | import {Grammar} from "../grammar/grammar"; 2 | import {Chart} from "./chart/chart"; 3 | import {State, getActiveCategory} from "./chart/state"; 4 | import {Category, isNonTerminal, NonTerminal} from "../grammar/category"; 5 | import {Rule} from "../grammar/rule"; 6 | 7 | /** 8 | * Makes predictions in the specified chart at the given index. 9 | 10 | * For each chart at position i, look at the the nonterminal at the dot position, 11 | * plus a chart that expands that nonterminal at position i, with the dot position at 0 12 | * 13 | * @param index The string index to make predictions at. 14 | * @param grammar 15 | * @param stateSets 16 | */ 17 | export function predict(index: number, 18 | grammar: Grammar, 19 | stateSets: Chart) { 20 | const changes: any[] = []; 21 | const statesToPredictOn: Set> = stateSets.getStatesActiveOnNonTerminals(index); 22 | if (statesToPredictOn) { 23 | const newStates = new Set>(); 24 | const probMap = grammar.probabilityMapping; 25 | const sr = probMap.semiring; 26 | const fromProb = probMap.fromProbability; 27 | // O(|stateset(i)|) = O(|grammar|): For all states i: Xk → λ·Zμ... 28 | statesToPredictOn.forEach((statePredecessor: State) => { 29 | const Z: Category = getActiveCategory(statePredecessor); 30 | const prevForward: S = stateSets.getForwardScore(statePredecessor); 31 | 32 | // For all productions Y → v such that R(Z =*L> Y) is nonzero 33 | grammar.leftStarCorners 34 | .getNonZeroScores(Z) 35 | .forEach((Y: Category) => { 36 | // TODO ? can be more efficient by indexing on Y? 37 | if (isNonTerminal(Y) && grammar.ruleMap.has(Y)) 38 | grammar.ruleMap.get(Y).forEach((Y_to_v: Rule) => { 39 | // we predict chart i: Yi → ·v 40 | // noinspection JSSuspiciousNameCombination 41 | const Y: NonTerminal = Y_to_v.left; 42 | 43 | 44 | // γ' = P(Y → v) 45 | const Y_to_vScore: S = fromProb(Y_to_v.probability); 46 | 47 | // α' = α * R(Z =*L> Y) * P(Y → v) 48 | const fw: S = sr.times( 49 | prevForward, 50 | sr.times( 51 | fromProb(grammar.getLeftStarScore(Z, Y)), 52 | Y_to_vScore 53 | ) 54 | ); 55 | 56 | let predicted: State; 57 | 58 | // We might want to increment the probability of an existing chart 59 | const isNew = !stateSets.has(Y_to_v, index, index, 0); 60 | predicted = isNew ? { 61 | position: index, 62 | ruleStartPosition: index, 63 | ruleDotPosition: 0, 64 | rule: Y_to_v 65 | } : stateSets.getOrCreate(index, index, 0, Y_to_v); 66 | if (isNew) // save for later 67 | newStates.add(predicted); 68 | 69 | const innerScore: S = stateSets.getInnerScore(predicted); 70 | if (!(Y_to_vScore === innerScore || probMap.ZERO === innerScore))throw new Error(Y_to_vScore + " != " + innerScore); 71 | 72 | const viterbi = { 73 | origin: statePredecessor, 74 | resultingState: predicted, 75 | innerScore: Y_to_vScore, 76 | }; 77 | 78 | stateSets.addForwardScore(predicted, fw, sr); 79 | stateSets.setInnerScore(predicted, Y_to_vScore); 80 | stateSets.setViterbiScore(viterbi); 81 | 82 | 83 | const change = { 84 | state: predicted, 85 | innerScore: Y_to_vScore, 86 | forwardScore: fw, 87 | viterbiScore: viterbi, 88 | origin: statePredecessor 89 | }; 90 | changes.push(change); 91 | }); 92 | }); 93 | }); 94 | newStates.forEach(ss => stateSets.getOrCreate(ss.position, ss.ruleStartPosition, ss.ruleDotPosition, ss.rule)); 95 | } 96 | return changes; 97 | } 98 | -------------------------------------------------------------------------------- /src/earley/scan.ts: -------------------------------------------------------------------------------- 1 | import { isNonTerminal, WordWithTypes, Terminal } from "../grammar/category"; 2 | import { Semiring } from "semiring"; 3 | import { Chart } from "./chart/chart"; 4 | import { getActiveCategory, State, advanceDot } from "./chart/state"; 5 | 6 | 7 | /** 8 | * Handles a token scanned from the input string. 9 | * 10 | * @param tokenPosition The start index of the scan. 11 | * @param word 12 | * @param types 13 | * @param scanProbability Function that provides the probability of scanning the given token at this position. Might be null for a probability of 1.0. 14 | * @param sr 15 | * @param stateSets 16 | */ 17 | export function scan(tokenPosition: number, 18 | {word, types}: WordWithTypes, 19 | sr: Semiring, 20 | stateSets: Chart, 21 | scanProbability?: (x: T, t: Terminal[]) => S) { 22 | const changes: any[] = []; 23 | 24 | const scanProb: S = !!scanProbability ? scanProbability(word, types) : undefined; 25 | 26 | /* 27 | * Get all states that are active on a terminal 28 | * O(|stateset(i)|) = O(|grammar|): For all states i: Xk → λ·tμ, where t is a terminal that matches the given token... 29 | */ 30 | types.forEach(terminal => { 31 | const statesActiveOnTerminals: Set> = stateSets.getStatesActiveOnTerminals(tokenPosition, terminal); 32 | if (statesActiveOnTerminals) statesActiveOnTerminals.forEach((preScanState: State) => { 33 | const activeCategory = getActiveCategory(preScanState); 34 | if (isNonTerminal(activeCategory)) throw new Error("this is a bug"); 35 | else { 36 | if (!activeCategory(word)) throw new Error("Index failed"); 37 | // TODO can this be more efficient, ie have tokens make their category be explicit? (Do we want to maintain the possibility of such "fluid" categories?) 38 | // Create the chart i+1: Xk → λt·μ 39 | const preScanForward: S = stateSets.getForwardScore(preScanState); 40 | const preScanInner: S = stateSets.getInnerScore(preScanState); 41 | // Note that this chart is unique for each preScanState 42 | const postScanState: State = stateSets.getOrCreate( 43 | tokenPosition + 1, preScanState.ruleStartPosition, 44 | advanceDot(preScanState), 45 | preScanState.rule, 46 | word 47 | ); 48 | 49 | const postScanForward = calculateForwardScore(sr, preScanForward, scanProb); 50 | // Set forward score 51 | stateSets.setForwardScore( 52 | postScanState, 53 | postScanForward 54 | ); 55 | 56 | // Get inner score (no side effects) 57 | const postScanInner: S = calculateInnerScore(sr, preScanInner, scanProb); 58 | 59 | // Set inner score 60 | stateSets.setInnerScore( 61 | postScanState, 62 | postScanInner 63 | ); 64 | 65 | // Set Viterbi score 66 | const viterbiScore = { 67 | origin: preScanState, 68 | resultingState: postScanState, 69 | innerScore: postScanInner 70 | }; 71 | stateSets.setViterbiScore(viterbiScore); 72 | 73 | changes.push({ 74 | state: postScanState, 75 | viterbi: viterbiScore, 76 | inner: postScanInner, 77 | forward: postScanForward 78 | }); 79 | } 80 | }); 81 | }); 82 | return changes; 83 | } 84 | 85 | /** 86 | * Function to calculate the new inner score from given values 87 | * 88 | * @param scanProbability The probability of scanning this particular token 89 | * @param sr The semiring to calculate with 90 | * @param previousInner The previous inner score 91 | * @return The inner score for the new chart 92 | */ 93 | function calculateInnerScore(sr: Semiring, previousInner: S, scanProbability?: S): S { 94 | if (!scanProbability) 95 | return previousInner; 96 | else 97 | return sr.times(previousInner, scanProbability); 98 | } 99 | 100 | /** 101 | * Function to compute the forward score for the new chart after scanning the given token. 102 | * 103 | * @param scanProbability The probability of scanning this particular token 104 | * @param sr The semiring to calculate with 105 | * @param previousStateForwardScore The previous forward score 106 | * @return Computed forward score for the new chart 107 | */ 108 | function calculateForwardScore(sr: Semiring, previousStateForwardScore: S, scanProbability?: S): S { 109 | if (!scanProbability) { 110 | return previousStateForwardScore; 111 | } else { 112 | return sr.times(previousStateForwardScore, scanProbability); 113 | } 114 | } -------------------------------------------------------------------------------- /src/grammar/category.ts: -------------------------------------------------------------------------------- 1 | export type Category = Terminal | NonTerminal; 2 | export type Terminal = (t: T) => boolean; 3 | export type NonTerminal = string; 4 | 5 | export interface WordWithTypes { 6 | types: Terminal[]; 7 | word: T; 8 | } 9 | 10 | export function isNonTerminal(element: any): element is NonTerminal { 11 | return typeof element === "string"; 12 | } -------------------------------------------------------------------------------- /src/grammar/grammar.ts: -------------------------------------------------------------------------------- 1 | import { NonTerminal, Category, isNonTerminal, Terminal } from "./category"; 2 | import { Rule } from "./rule"; 3 | import { 4 | getLeftCorners, 5 | getUnitStarCorners, 6 | getReflexiveTransitiveClosure, 7 | LeftCorners 8 | } from "./left-corner"; 9 | 10 | import { Semiring, LogSemiring, makeDeferrable } from "semiring"; 11 | import { Expression } from "semiring"; 12 | 13 | function getOrCreateSet(map: Map>, x: X): Set { 14 | if (map.has(x)) { 15 | return map.get(x); 16 | } else { 17 | const yToP: Set = new Set(); 18 | map.set(x, yToP); 19 | return yToP; 20 | } 21 | } 22 | 23 | export interface ProbabilitySemiringMapping { 24 | semiring: Semiring; 25 | 26 | fromProbability(p: number): Y; 27 | 28 | toProbability(p: Y): number; 29 | 30 | ZERO: Y; 31 | ONE: Y; 32 | } 33 | 34 | export class Grammar { 35 | readonly name: string; 36 | readonly ruleMap: Map>>; 37 | readonly rules: Set> = new Set>(); 38 | readonly nonTerminals: Set = new Set(); 39 | readonly terminals: Terminal[]; 40 | 41 | // 42 | // pre-compute some scores for efficient earley parsing 43 | // 44 | private readonly leftCorners: LeftCorners; 45 | readonly leftStarCorners: LeftCorners; 46 | readonly unitStarScores: LeftCorners; 47 | readonly probabilityMapping: ProbabilitySemiringMapping; 48 | readonly deferrableSemiring: Semiring>; 49 | 50 | 51 | constructor(name: string, 52 | ruleMap: Map>>, 53 | probabilityMapping: ProbabilitySemiringMapping) { 54 | this.name = name; 55 | this.ruleMap = ruleMap; 56 | 57 | this.probabilityMapping = probabilityMapping; 58 | this.deferrableSemiring = makeDeferrable(probabilityMapping.semiring); 59 | 60 | const values: IterableIterator>> = ruleMap.values(); 61 | 62 | 63 | let done = false; 64 | const terminals = new Set>(); 65 | while (!done) { 66 | const next: IteratorResult>> = values.next(); 67 | done = next.done; 68 | if (!done) { 69 | const rulez = next.value; 70 | rulez.forEach((rule: Rule) => { 71 | this.rules.add(rule); 72 | this.nonTerminals.add(rule.left); 73 | rule.right.forEach((a: Category) => { 74 | if (isNonTerminal(a)) 75 | this.nonTerminals.add(a); 76 | else 77 | terminals.add(a); 78 | }); 79 | } 80 | ); 81 | } 82 | } 83 | this.terminals = Array.from(terminals); 84 | 85 | const zero = 0.0; 86 | this.leftCorners = getLeftCorners(this.rules, zero); 87 | this.leftStarCorners = getReflexiveTransitiveClosure(this.nonTerminals, this.leftCorners, zero); 88 | this.unitStarScores = getUnitStarCorners(this.rules, this.nonTerminals, zero); 89 | 90 | } 91 | 92 | getLeftStarScore(from: Category, to: Category): number { 93 | return this.leftStarCorners.get(from, to); 94 | } 95 | 96 | getLeftScore(from: Category, to: Category): number { 97 | return this.leftCorners.get(from, to); 98 | } 99 | 100 | getUnitStarScore(from: Category, to: Category): number { 101 | return this.unitStarScores.get(from, to); 102 | } 103 | 104 | // noinspection JSUnusedGlobalSymbols 105 | static withSemiring(semiringMapping: ProbabilitySemiringMapping, name?: string): GrammarBuilder { 106 | return new GrammarBuilder(semiringMapping, name); 107 | } 108 | 109 | static builder(name?: string): GrammarBuilder { 110 | return new GrammarBuilder(LOG_SEMIRING, name); 111 | } 112 | 113 | } 114 | 115 | 116 | const LOG_SEMIRING: ProbabilitySemiringMapping = { 117 | semiring: LogSemiring, 118 | fromProbability: (x) => -Math.log(x), 119 | toProbability: (x) => Math.exp(-x), 120 | ZERO: LogSemiring.additiveIdentity, 121 | ONE: LogSemiring.multiplicativeIdentity 122 | }; 123 | 124 | export class GrammarBuilder { 125 | 126 | private readonly ruleMap: Map>>; 127 | // private rules: Set; 128 | private readonly name: string; 129 | private semiringMapping: ProbabilitySemiringMapping; 130 | 131 | constructor(semiringMapping: ProbabilitySemiringMapping, name?: string) { 132 | this.ruleMap = new Map>>(); 133 | // this.rules = new Set(); 134 | this.name = name; 135 | this.semiringMapping = semiringMapping; 136 | } 137 | 138 | //noinspection JSUnusedGlobalSymbols 139 | setSemiringMapping(semiringMapping: ProbabilitySemiringMapping) { 140 | this.semiringMapping = semiringMapping; 141 | return this; 142 | } 143 | 144 | addNewRule(probability: number, left: NonTerminal, right: Category[]): GrammarBuilder { 145 | this.addRule({ 146 | left, 147 | right, 148 | probability 149 | }); 150 | 151 | return this; 152 | } 153 | 154 | addRule(rule: Rule): GrammarBuilder { 155 | if (!rule.probability || typeof rule.probability !== "number") 156 | throw new Error("Probability not defined: " + rule.probability); 157 | if (!rule.left) throw new Error("Left hand side not defined: " + rule.left); 158 | if (!rule.right || !rule.right.length || typeof rule.right.length !== "number"! || rule.right.length <= 0) 159 | throw new Error("Right hand side not defined: " + rule.right); 160 | 161 | if (this.ruleMap.has(rule.left)) { 162 | this.ruleMap.get(rule.left).forEach(rle => { 163 | if (rule.right.length === rle.right.length) { 164 | for (let i = 0; i < rule.right.length; i++) if (rule.right[i] !== rle.right[i]) return; 165 | throw new Error("Already added rule " + rule.left + " -> " + rule.right.toString()); 166 | } 167 | }); 168 | } 169 | 170 | getOrCreateSet(this.ruleMap, rule.left).add(rule); 171 | 172 | return this; 173 | } 174 | 175 | build(): Grammar { 176 | return new Grammar(this.name, this.ruleMap, this.semiringMapping); 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/grammar/left-corner.ts: -------------------------------------------------------------------------------- 1 | import { NonTerminal, Category, isNonTerminal } from "./category"; 2 | import { Rule, isUnitProduction } from "./rule"; 3 | import { getOrCreateMap, getOrCreateSet } from "../util"; 4 | 5 | /** 6 | * Returns the inverse of matrix `M`. 7 | * Use Gaussian Elimination to calculate the inverse: 8 | * (1) 'augment' the matrix (left) by the identity (on the right) 9 | * (2) Turn the matrix on the left into the identity by elemetry row ops 10 | * (3) The matrix on the right is the inverse (was the identity matrix) 11 | * 12 | * There are 3 elementary row ops: (I combine b and c in my code) 13 | * (a) Swap 2 rows 14 | * (b) Multiply a row by a scalar 15 | * (c) Add 2 rows 16 | */ 17 | function invert(M: number[][]) { 18 | // if the matrix isn't square 19 | if (M.length !== M[0].length) { 20 | throw new Error("Matrix must be square"); 21 | } 22 | 23 | // create the identity matrix (I), and a copy (C) of the original 24 | 25 | const dim = M.length; 26 | const I: number[][] = []; 27 | const C: number[][] = []; 28 | for (let i = 0; i < dim; i += 1) { 29 | // Create the row 30 | I[I.length] = []; 31 | C[C.length] = []; 32 | for (let j = 0; j < dim; j += 1) { 33 | 34 | // if we're on the diagonal, put a 1 (for identity) 35 | if (i == j) { 36 | I[i][j] = 1; 37 | } else { 38 | I[i][j] = 0; 39 | } 40 | 41 | // Also, make the copy of the original 42 | C[i][j] = M[i][j]; 43 | } 44 | } 45 | 46 | // Perform elementary row operations 47 | for (let i = 0; i < dim; i += 1) { 48 | // get the element e on the diagonal 49 | let e: number = C[i][i]; 50 | 51 | // if we have a 0 on the diagonal (we'll need to swap with a lower row) 52 | if (e === 0) { 53 | // look through every row below the i'th row 54 | for (let ii = i + 1; ii < dim; ii += 1) { 55 | // if the ii'th row has a non-0 in the i'th col 56 | if (C[ii][i] !== 0) { 57 | // it would make the diagonal have a non-0 so swap it 58 | for (let j = 0; j < dim; j++) { 59 | e = C[i][j]; // temp store i'th row 60 | C[i][j] = C[ii][j]; // replace i'th row by ii'th 61 | C[ii][j] = e; // repace ii'th by temp 62 | e = I[i][j]; // temp store i'th row 63 | I[i][j] = I[ii][j]; // replace i'th row by ii'th 64 | I[ii][j] = e; // repace ii'th by temp 65 | } 66 | // don't bother checking other rows since we've swapped 67 | break; 68 | } 69 | } 70 | // get the new diagonal 71 | e = C[i][i]; 72 | // if it's still 0, not invertable (error) 73 | if (e == 0) { 74 | throw new Error("Matrix was not invertable"); 75 | } 76 | } 77 | 78 | // Scale this row down by e (so we have a 1 on the diagonal) 79 | for (let j = 0; j < dim; j++) { 80 | C[i][j] = C[i][j] / e; // apply to original matrix 81 | I[i][j] = I[i][j] / e; // apply to identity 82 | } 83 | 84 | // Subtract this row (scaled appropriately for each row) from ALL of 85 | // the other rows so that there will be 0's in this column in the 86 | // rows above and below this one 87 | for (let ii = 0; ii < dim; ii++) { 88 | // Only apply to other rows (we want a 1 on the diagonal) 89 | if (ii == i) { 90 | continue; 91 | } 92 | 93 | // We want to change this element to 0 94 | e = C[ii][i]; 95 | 96 | // Subtract (the row above(or below) scaled by e) from (the 97 | // current row) but start at the i'th column and assume all the 98 | // stuff left of diagonal is 0 (which it should be if we made this 99 | // algorithm correctly) 100 | for (let j = 0; j < dim; j++) { 101 | C[ii][j] -= e * C[i][j]; // apply to original matrix 102 | I[ii][j] -= e * I[i][j]; // apply to identity 103 | } 104 | } 105 | } 106 | 107 | // we've done all operations, C should be the identity 108 | // matrix I should be the inverse: 109 | return I; 110 | } 111 | 112 | 113 | /** 114 | * Information holder for left-corner relations and left*-corner relations. Essentially a map from {@link Category} 115 | * to {@link Category} with some indexing. 116 | */ 117 | export class LeftCorners { 118 | /** 119 | * X -L> Y probability, undefined for 0.0 120 | */ 121 | private readonly map: Map, Map, number>>; 122 | /** 123 | * X -L> Y is greater than 0.0 124 | */ 125 | private nonZeroScores: Map, Set>>; 126 | /** 127 | * X -L> Y is greater than 0.0, and Y is a non-terminal 128 | */ 129 | private nonZeroScoresToNonTerminals: Map, Set>; 130 | 131 | readonly ZERO: number; 132 | 133 | /** 134 | * Information holder for left-corner relations and left*-corner relations. Essentially a map from {@link Category} 135 | * to {@link Category} with some utility functions to deal with probabilities. 136 | * @param ZERO Default value if there is no chance; usually 0 137 | */ 138 | constructor(ZERO = 0) { 139 | this.ZERO = ZERO; 140 | 141 | this.map = new Map, Map, number>>(); 142 | this.nonZeroScores = new Map>>(); 143 | this.nonZeroScoresToNonTerminals = new Map>(); 144 | } 145 | 146 | 147 | /** 148 | * Adds the given number to the current value of [X, Y], using standard + 149 | * 150 | * @param x Left hand side 151 | * @param y Right hand side 152 | * @param probability number to plus 153 | */ 154 | public add(x: Category, y: Category, probability: number) { 155 | const newProbability = this.get(x, y) /* defaults to zero */ + probability; 156 | if (!isFinite(newProbability)) { 157 | throw new Error("Invalid left-[*]-corner probability: " + newProbability + " for " + x + " -L> " + y + " ... "); 158 | } 159 | this.set(x, y, newProbability); 160 | } 161 | 162 | /** 163 | * @return stored value in left-corner relationship. this.ZERO by default 164 | */ 165 | public get(x: Category, y: Category): number { 166 | if (!this.map) throw new Error("Map was not defined"); 167 | const yToP = getOrCreateMap(this.map, x); 168 | if (!yToP) return this.ZERO; 169 | else return yToP.get(y) || this.ZERO; 170 | } 171 | 172 | 173 | /** 174 | * Sets table entry to a given probability. Will instantiate empty map if it does not exist yet. 175 | * 176 | * @param x LHS 177 | * @param y RHS 178 | * @param val number to set table entry to 179 | */ 180 | public set(x: Category, y: Category, val: number): void { 181 | if (val !== this.ZERO) { 182 | // Set map 183 | const yToProb = getOrCreateMap(this.map, x); 184 | yToProb.set(y, val); 185 | 186 | // Set non-zero scores 187 | 188 | getOrCreateSet(this.nonZeroScores, x).add(y); 189 | if (isNonTerminal(y)) 190 | getOrCreateSet(this.nonZeroScoresToNonTerminals, x).add(y); 191 | } 192 | } 193 | 194 | public getNonZeroScores(x: Category): Set> { 195 | return this.nonZeroScores.get(x); 196 | } 197 | 198 | public getNonZeroScoresToNonTerminals(x: Category): Set { 199 | return this.nonZeroScoresToNonTerminals.get(x); 200 | } 201 | } 202 | 203 | /** 204 | * Uses a trick to compute left*Corners (R_L), the reflexive transitive closure of leftCorners: 205 | * 206 | * ~~ P must have its scores defines as ordinary probabilities between 0 and 1 ~~ 207 | * 208 | * R_L = I + P_L R_L = (I - P_L)^-1 209 | */ 210 | export function getReflexiveTransitiveClosure(nonTerminals: Set, 211 | P: LeftCorners, 212 | zero = 0.0): LeftCorners { 213 | const nonterminalz: NonTerminal[] = []; 214 | nonTerminals.forEach(a => nonterminalz.push(a)); 215 | 216 | // Create matrix of value I - P_L 217 | const R_L_inverse: number[][] = []; 218 | for (let row = 0; row < nonterminalz.length; row++) { 219 | const X: NonTerminal = nonterminalz[row]; 220 | R_L_inverse[row] = []; 221 | for (let col = 0; col < nonterminalz.length; col++) { 222 | const Y: NonTerminal = nonterminalz[col]; 223 | const prob: number = P.get(X, Y); 224 | // I - P_L 225 | R_L_inverse[row][col] = (row === col ? 1 : 0) - prob; 226 | } 227 | } 228 | const R_L: number[][] = invert(R_L_inverse); 229 | 230 | const m: LeftCorners = new LeftCorners(zero); 231 | /** 232 | * Copy all matrix values into our {@link LeftCorners} object 233 | */ 234 | for (let roww = 0; roww < nonterminalz.length; roww++) { 235 | for (let coll = 0; coll < nonterminalz.length; coll++) { 236 | m.set(nonterminalz[roww], nonterminalz[coll], R_L[roww][coll]); 237 | } 238 | } 239 | return m; 240 | } 241 | 242 | export function getUnitStarCorners(rules: Set>, 243 | nonTerminals: Set, 244 | zero = 0.0): LeftCorners { 245 | // Sum all probabilities for unit relations 246 | const P_U: LeftCorners = new LeftCorners(zero); 247 | rules.forEach((rule: Rule) => { 248 | if (isUnitProduction(rule)) 249 | P_U.add(rule.left, rule.right[0], rule.probability); 250 | }); 251 | 252 | // R_U = (I - P_U) 253 | return getReflexiveTransitiveClosure(nonTerminals, P_U, zero); 254 | } 255 | 256 | 257 | /** 258 | * Compute left corner relations 259 | */ 260 | export function getLeftCorners(rules: Set>, ZERO = 0.0): LeftCorners { 261 | const leftCorners = new LeftCorners(ZERO); 262 | 263 | // Sum all probabilities for left corners 264 | rules.forEach((rule: Rule) => { 265 | if (rule.right.length > 0 && isNonTerminal(rule.right[0])) { 266 | leftCorners.add(rule.left, rule.right[0], rule.probability); 267 | } 268 | }); 269 | return leftCorners; 270 | } -------------------------------------------------------------------------------- /src/grammar/rule.ts: -------------------------------------------------------------------------------- 1 | import { Category, NonTerminal, isNonTerminal } from "./category"; 2 | 3 | export interface Rule { 4 | left: NonTerminal; 5 | right: Category[]; 6 | probability: number; 7 | } 8 | 9 | export function invalidDotPosition(dotPosition: number, rule: any) { 10 | throw new Error(`Invalid dot position: ${dotPosition}, ${JSON.stringify(rule)}`); 11 | } 12 | 13 | export function isUnitProduction(rule: Rule): boolean { 14 | return rule.right.length === 1 && isNonTerminal(rule.right[0]); 15 | } 16 | 17 | /** 18 | * Gets the active category in the underlying rule, if any. 19 | * 20 | * @return The category at this dotted rule's 21 | * dot position in the underlying rule's 22 | * right side category sequence. If this rule's 23 | * dot position is already at the end of the right side category sequence, 24 | * returns null. 25 | */ 26 | export function getActiveCategory(rule: Rule, dotPosition: number): Category { 27 | if (dotPosition < 0 || dotPosition > rule.right.length) { 28 | invalidDotPosition(dotPosition, rule.right); 29 | } else if (dotPosition < rule.right.length) { 30 | const returnValue: Category = rule.right[dotPosition]; 31 | if (!returnValue) { 32 | throw new Error(`category did not exist at position ${dotPosition}: ${returnValue}`); 33 | } else { 34 | return returnValue; 35 | } 36 | } else { 37 | return undefined; 38 | } 39 | } 40 | 41 | -------------------------------------------------------------------------------- /src/grammar/token.ts: -------------------------------------------------------------------------------- 1 | export type Token = any; 2 | 3 | //noinspection JSUnusedGlobalSymbols 4 | export function wrapped(source: T): Token { 5 | if (!source) { 6 | throw new Error("Source object can't be null for an instantiated token."); 7 | } 8 | return {source}; 9 | } 10 | 11 | export default Token; -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./earley/parser"; 2 | export * from "./earley/parsetree"; 3 | export * from "./earley/chart/viterbi-score"; 4 | export * from "./grammar/grammar"; 5 | export * from "./grammar/category"; 6 | export * from "./grammar/rule"; -------------------------------------------------------------------------------- /src/util.ts: -------------------------------------------------------------------------------- 1 | export function getOrCreateMap(map: Map>, key: X): Map { 2 | if (map.has(key)) 3 | return map.get(key); 4 | else { 5 | const yToP: Map = new Map(); 6 | map.set(key, yToP); 7 | return yToP; 8 | } 9 | } 10 | 11 | export function getOrCreateSet(map: Map>, key: X): Set { 12 | if (map.has(key)) 13 | return map.get(key); 14 | else { 15 | const yToP: Set = new Set(); 16 | map.set(key, yToP); 17 | return yToP; 18 | } 19 | } -------------------------------------------------------------------------------- /test/earley/chart.spec.ts: -------------------------------------------------------------------------------- 1 | import {NonTerminal, isNonTerminal, Category} from "../../src"; 2 | import {Rule, isUnitProduction} from "../../src"; 3 | 4 | import {expect} from "chai"; 5 | import {Chart} from "../../src/earley/chart/chart"; 6 | import {g} from "../sample-grammar"; 7 | import {isPassive, isCompleted, State, isActive, getActiveCategory} from "../../src/earley/chart/state"; 8 | import {getOrCreateSet, getOrCreateMap} from "../../src/util"; 9 | 10 | describe("Chart", () => { 11 | // ss.getStatesActiveOnNonTerminalWithNonZeroUnitStarScoreToY(); 12 | // ss.getStatesActiveOnNonTerminal(); 13 | // ss.getState(); 14 | // ss.getOrCreate(); 15 | // ss.hasState(); 16 | // ss.has(); 17 | // ss.addState(); 18 | // ss.getCompletedStatesThatAreNotUnitProductions(); 19 | // ss.getCompletedStates(); 20 | // ss.getStatesActiveOnNonTerminals(); 21 | // ss.getStatesActiveOnTerminals(); 22 | 23 | it("should index new states correctly", () => { 24 | // ss.addState() 25 | // expect(ss.states).to.exist; 26 | g.rules.forEach((r: Rule, i) => { 27 | const s: State< number, string> = { 28 | rule: r, 29 | ruleStartPosition: 1, 30 | ruleDotPosition: 1, 31 | position: 2, 32 | scannedToken: "state " + i 33 | }; 34 | expect(ss.has(r, 2, 1, 1)).to.equal(false); 35 | expect(ss.hasState(s)).to.equal(false); 36 | 37 | const state: State = 38 | ss.getOrCreate(2, 1, 1, r, "state " + i); 39 | expect(state).to.exist; 40 | expect(ss.has(r, 2, 1, 1)).to.equal(true); 41 | expect(ss.hasState(s)).to.equal(true); 42 | 43 | expect(isCompleted(s)).to.equal(r.right.length === 1); 44 | expect(ss.getCompletedStates(s.position).has(state)).to.equal(isCompleted(state)); 45 | expect(getOrCreateSet(getOrCreateMap(ss.completedStatesFor, (state.position)), r.left).has(state)).to.equal(isCompleted(state)); 46 | expect(getOrCreateSet(ss.completedStatesThatAreNotUnitProductions, (state.position)).has(state)).to.equal(isCompleted(state) && !isUnitProduction(state.rule)); 47 | 48 | const activeCategory: Category = getActiveCategory(state); 49 | expect((ss.getStatesActiveOnNonTerminals(state.position)).has(state)).to.equal(isActive(state) && isNonTerminal(activeCategory)); 50 | 51 | const nonZeroScoresToNonTerminals = g.unitStarScores.getNonZeroScoresToNonTerminals(activeCategory); 52 | if (!!nonZeroScoresToNonTerminals) nonZeroScoresToNonTerminals.forEach((FromNonTerminal: NonTerminal) => expect(getOrCreateSet(getOrCreateMap(ss.nonTerminalActiveAtIWithNonZeroUnitStarToY, (state.position)), FromNonTerminal).has(state)).to.equal(true)); 53 | 54 | expect( 55 | getOrCreateSet(getOrCreateMap(ss.statesActiveOnNonTerminal, activeCategory), state.position) 56 | .has(state) 57 | ) 58 | .to.equal(isActive(state) && isNonTerminal(activeCategory)); 59 | // TODO 60 | // expect( 61 | // getOrCreateMap(ss.statesActiveOnTerminals, state.position) 62 | // .has(state) 63 | // ).to.equal(isActive(state) && !isNonTerminal(activeCategory)); 64 | }); 65 | 66 | // readonly byIndex: Map>>; 67 | // readonly forwardScores: Map, S>; 68 | // readonly innerScores: Map, S>; 69 | // readonly viterbiScores: Map, ViterbiScore>; 70 | 71 | // console.log(ss); 72 | // ss.getForwardScore() 73 | }); 74 | const ss = new Chart(g); 75 | const ZERO = g.probabilityMapping.ZERO; 76 | const ONE = g.probabilityMapping.ONE; 77 | const plus = g.probabilityMapping.semiring.plus; 78 | const rulesIterator = g.rules.values(); 79 | const r0: Rule = rulesIterator.next().value; 80 | // const r1: Rule = rulesIterator.next().value; 81 | it("should handle forward scores correctly", () => { 82 | const s1: State< number, string> = { 83 | rule: r0, 84 | ruleStartPosition: 1, 85 | ruleDotPosition: 2, 86 | position: 3, 87 | scannedToken: "a", 88 | scannedCategory: r0.right[0] 89 | }; 90 | 91 | const s2: State< number, string> = { 92 | rule: r0, 93 | ruleStartPosition: 1, 94 | ruleDotPosition: 2, 95 | position: 3, 96 | scannedToken: "a", 97 | scannedCategory: r0.right[0] 98 | }; 99 | 100 | // expect(ss.hasForwardScore(s1)).to.equal(false); 101 | expect(ss.getForwardScore(s2)).to.equal(ZERO); 102 | ss.addForwardScore(s1, ONE, g.probabilityMapping.semiring); 103 | // expect(ss.hasForwardScore(s1)).to.equal(true); 104 | ss.addForwardScore(s2, ONE, g.probabilityMapping.semiring); 105 | expect(ss.getForwardScore(s1)).to.equal(plus(plus(ZERO, ONE), ONE)); 106 | // expect(ss.hasForwardScore(s1)).to.equal(true); 107 | ss.setForwardScore(s1, ONE); 108 | expect(ss.getForwardScore(s2)).to.equal(ONE); 109 | // expect(ss.hasForwardScore(s1)).to.equal(true); 110 | }); 111 | it("should handle inner scores correctly", () => { 112 | const s1: State< number, string> = { 113 | rule: r0, 114 | ruleStartPosition: 1, 115 | ruleDotPosition: 2, 116 | position: 3, 117 | scannedToken: "a", 118 | scannedCategory: r0.right[0] 119 | }; 120 | const s2: State< number, string> = { 121 | rule: r0, 122 | ruleStartPosition: 1, 123 | ruleDotPosition: 2, 124 | position: 3, 125 | scannedToken: "a", 126 | scannedCategory: r0.right[0] 127 | }; 128 | 129 | // expect(ss.hasForwardScore(s1)).to.equal(false); 130 | expect(ss.getInnerScore(s1)).to.equal(ZERO); 131 | ss.setInnerScore(s2, ONE); 132 | // expect(ss.hasForwardScore(s1)).to.equal(true); 133 | expect(ss.getForwardScore(s1)).to.equal(ONE); 134 | 135 | }); 136 | 137 | it("should handle viterbi scores correctly", () => { 138 | const s1: State< number, string> = { 139 | rule: r0, 140 | ruleStartPosition: 0, 141 | ruleDotPosition: 0, 142 | position: 0, 143 | scannedToken: "b", 144 | scannedCategory: r0.right[1] 145 | }; 146 | const s2: State< number, string> = { 147 | rule: r0, 148 | ruleStartPosition: 1, 149 | ruleDotPosition: 2, 150 | position: 3, 151 | scannedToken: "a", 152 | scannedCategory: r0.right[0] 153 | }; 154 | 155 | const viterbiScore = { 156 | origin: s1, 157 | resultingState: s2, 158 | innerScore: ONE 159 | }; 160 | 161 | expect(ss.hasViterbiScore(s1)).to.equal(false); 162 | expect(ss.hasViterbiScore(s2)).to.equal(false); 163 | expect(ss.getViterbiScore(s1)).to.equal(undefined); 164 | ss.setViterbiScore(viterbiScore); // TODO check if viterbiscore is valid? 165 | expect(ss.getViterbiScore(s2)).to.equal(viterbiScore); 166 | 167 | }); 168 | 169 | }); 170 | 171 | describe("State", () => { 172 | it("isUnitProduction should behave correctly", () => { 173 | g.rules.forEach((rule: Rule) => { 174 | if (rule.right.length === 1 && isNonTerminal(rule.right[0])) 175 | isUnitProduction(rule); 176 | }); 177 | }); 178 | 179 | it("isPassive should behave correctly", () => { 180 | g.rules.forEach((r: Rule) => { 181 | expect(isPassive(r, r.right.length)).to.equal(true); 182 | expect(isPassive(r, r.right.length - 1)).to.equal(false); 183 | expect(isPassive(r, 0)).to.equal(false); 184 | }); 185 | }); 186 | 187 | it("isCompleted should behave correctly", () => { 188 | g.rules.forEach((r: Rule) => { 189 | expect(isCompleted({ 190 | rule: r, 191 | ruleStartPosition: 0, 192 | ruleDotPosition: r.right.length, 193 | position: 0 194 | })).to.equal(true); 195 | expect(isCompleted({ 196 | rule: r, 197 | ruleStartPosition: 0, 198 | ruleDotPosition: 0, 199 | position: 0 200 | })).to.equal(false); 201 | expect(isCompleted({ 202 | rule: r, 203 | ruleStartPosition: 0, 204 | ruleDotPosition: r.right.length - 1, 205 | position: 0 206 | })).to.equal(false); 207 | }); 208 | }); 209 | }); 210 | -------------------------------------------------------------------------------- /test/earley/earley.spec.ts: -------------------------------------------------------------------------------- 1 | // import {LogSemiring} from "semiring"; 2 | import {expect} from "chai"; 3 | import {Chart} from "../../src/earley/chart/chart"; 4 | import {simpleRecursiveGrammar as g, S} from "../sample-grammar"; 5 | import {addState} from "../../src"; 6 | // import {scan} from "../../src/earley/scan"; 7 | import {predict} from "../../src/earley/predict"; 8 | // import {complete} from "../../src/earley/complete"; 9 | 10 | // TODO 11 | describe("parser", () => { 12 | it("should scan correctly", () => { 13 | const ss = new Chart(g); 14 | // noinspection JSUnusedLocalSymbols 15 | const init = addState( 16 | ss, 0, 0, 0, 17 | {left: "", right: [S], probability: 1.0}, 18 | g.probabilityMapping.ONE, 19 | g.probabilityMapping.ONE 20 | ); 21 | 22 | const predict0 = predict(0, g, ss); 23 | predict0.forEach( 24 | p => { 25 | expect(p.state.ruleDotPosition).to.equal(0); 26 | expect(p.state.ruleStartPosition).to.equal(0); 27 | expect(p.state.position).to.equal(0); 28 | } 29 | ); 30 | // const scan0 = scan( 31 | // 0, 32 | // {word: "a", types: [a]}, 33 | // LogSemiring, 34 | // ss 35 | // ); 36 | // const complete0 = complete(0, ss, g); 37 | // const predict1 = predict(1, g, ss); 38 | // const scan1 = scan(1, {word: "a", types: [a]}, LogSemiring, ss); 39 | // const complete1 = complete(1, ss, g); 40 | // const predict2 = predict(2, g, ss); 41 | // const scan2 = scan(2, {word: "a", types: [a]}, LogSemiring, ss); 42 | // const complete2 = complete(2, ss, g); 43 | // const predict3 = predict(3, g, ss); 44 | // const scan3 = scan(3, {word: "a", types: [a]}, LogSemiring, ss); 45 | // const complete3 = complete(3, ss, g); 46 | 47 | }); 48 | }); -------------------------------------------------------------------------------- /test/earley/parser.spec.ts: -------------------------------------------------------------------------------- 1 | import {NonTerminal, Terminal} from "../../src"; 2 | import {getViterbiParse, ParseTreeWithScore, Grammar} from "../../src"; 3 | 4 | import {expect} from "chai"; 5 | import {g, A} from "../sample-grammar"; 6 | import {parseSentenceIntoChart} from "../../src"; 7 | 8 | // TODO 9 | describe("parser", () => { 10 | 11 | 12 | it("should complete correctly", () => { 13 | // complete( 14 | // 0, 15 | // "e", 16 | // LogSemiring, 17 | // ss 18 | // ) 19 | }); 20 | it("should predict correctly", () => { 21 | // complete( 22 | // 0, 23 | // "e", 24 | // LogSemiring, 25 | // ss 26 | // ) 27 | }); 28 | it("should parse the man chase the man with a stick", () => { 29 | const S: NonTerminal = "S"; 30 | const NP: NonTerminal = "NP"; 31 | const VP: NonTerminal = "VP"; 32 | const TV: NonTerminal = "TV"; 33 | const Det: NonTerminal = "Det"; 34 | const N: NonTerminal = "N"; 35 | const Mod: NonTerminal = "Mod"; 36 | 37 | // Token types (terminals) are functions that should return true when the parameter is of given type. 38 | const transitiveVerb: Terminal = (token) => !!token.match(/(hit|chased)/); 39 | // Some utility terminal types are pre-defined: 40 | const the: Terminal = (token) => !!token.match(/the/i); 41 | const a: Terminal = (token) => !!token.match(/a/i); 42 | const man: Terminal = (token) => !!token.match(/man/); 43 | const stick: Terminal = (token) => !!token.match(/stick/); 44 | const with_: Terminal = (token) => !!token.match(/with/); 45 | 46 | const grammar: Grammar = Grammar.builder("test") 47 | // .setSemiring(new LogSemiring()) // If not set, defaults to Log semiring which is probably what you want 48 | .addNewRule( 49 | 1.0, // Probability between 0.0 and 1.0, defaults to 1.0. The builder takes care of converting it to the semiring element 50 | S, // Left hand side of the rule 51 | [NP, VP] // Right hand side of the rule 52 | ) 53 | .addNewRule( 54 | 1.0, 55 | NP, 56 | [Det, N] // eg. The man 57 | ) 58 | .addNewRule( 59 | 1.0, 60 | NP, 61 | [Det, N, Mod] // eg. The man (with a stick) 62 | ) 63 | .addNewRule( 64 | 0.4, 65 | VP, 66 | [TV, NP, Mod] // eg. (chased) (the man) (with a stick) 67 | ) 68 | .addNewRule( 69 | 0.6, 70 | VP, 71 | [TV, NP] // eg. (chased) (the man with a stick) 72 | ) 73 | .addNewRule(1.0, Det, [a]) 74 | .addNewRule(1.0, Det, [the]) 75 | .addNewRule(1.0, N, [man]) 76 | .addNewRule(1.0, N, [stick]) 77 | .addNewRule(1.0, TV, [transitiveVerb]) 78 | .addNewRule(1.0, Mod, [with_, NP]) // eg. with a stick 79 | .build(); 80 | 81 | const tokens = ["The", "man", "chased", "the", "man", "with", "a", "stick"]; 82 | // noinspection JSUnusedLocalSymbols 83 | const viterbi: ParseTreeWithScore = getViterbiParse( 84 | S, 85 | grammar, 86 | tokens 87 | ); 88 | // console.log(JSON.stringify(viterbi.parseTree)); // {"category":"","children":[{"category":"S","children":[{"category":"NP","children":[{"category":"Det","children":[{"token":"The","children":[ ]}]},{"category":"N","children":[{"token":"man","children":[]}]}]},{"category":"VP","children":[{"category":"TV","children":[{"token":"chased","children":[]}]},{"category":"NP","children":[{"category":"Det","children":[{"token":"the","children":[]}]},{"category":"N","children":[{"token":"man","c hildren":[]}]},{"category":"Mod","children":[{"token":"with","children":[]},{"category":"NP","children":[{"category":"Det","children":[{"token":"a", "children":[]}]},{"category":"N","children":[{"token":"stick","children":[]}]}]}]}]}]}]}]} 89 | // console.log(viterbi.probability); // 0.6 90 | // Parser.recognize(S, grammar, Tokens.tokenize("the", "stick", "chased", "the", "man")) 91 | }); 92 | 93 | 94 | const tokens = ["a", "a", "a", "e"]; 95 | it("should deal with scan probability correctly", () => { 96 | const p1 = getViterbiParse( 97 | A, 98 | g, 99 | tokens, 100 | (ignore, ignored) => { 101 | return g.probabilityMapping.fromProbability(1.0); 102 | } 103 | ).probability; 104 | 105 | const p2 = getViterbiParse( 106 | A, 107 | g, 108 | tokens, 109 | (word, ignored) => { 110 | return word === "a" ? g.probabilityMapping.fromProbability(0.5) : undefined; 111 | } 112 | ).probability; 113 | 114 | const eq = p2 * 2 * 2 * 2; 115 | const epsilon = 0.0000000000000001; 116 | expect(p1).to.be.above(eq - epsilon).and.below(eq + epsilon); 117 | }); 118 | 119 | it("should parse aaae", () => { 120 | // noinspection JSUnusedLocalSymbols 121 | const [chart, ignored, init] = parseSentenceIntoChart( 122 | A, 123 | g, 124 | tokens, 125 | (word, terminalTypes) => { 126 | return g.probabilityMapping.fromProbability(1.0); 127 | } 128 | ); 129 | 130 | expect(chart.getCompletedStates(tokens.length).has( 131 | chart.getOrCreate( 132 | tokens.length, 0, init.rule.right.length, init.rule 133 | ) 134 | )).to.equal(true); 135 | 136 | }); 137 | }); 138 | -------------------------------------------------------------------------------- /test/grammar/grammar.spec.ts: -------------------------------------------------------------------------------- 1 | import { parseSentenceIntoChart, getViterbiParseFromChart } from "../../src"; 2 | import { ParseTree } from "../../src"; 3 | import { expect } from 'chai'; 4 | 5 | import { g, A, B, C, D, X, simpleRecursiveGrammar, S2a, S2SS, p, q, a, S } from "../sample-grammar"; 6 | 7 | describe('examples from paper', () => { 8 | const tokens = ["a", "a", "a"]; 9 | 10 | // noinspection JSUnusedLocalSymbols 11 | const [chart, i, init] = parseSentenceIntoChart(S, simpleRecursiveGrammar, tokens); 12 | const finalState = chart.getOrCreate( 13 | tokens.length, 14 | 0, 15 | init.rule.right.length, 16 | init.rule 17 | ); 18 | // noinspection JSUnusedLocalSymbols 19 | const parseTree: ParseTree = getViterbiParseFromChart(finalState, chart); 20 | 21 | const prob = simpleRecursiveGrammar.probabilityMapping.toProbability; 22 | 23 | const alpha = chart.getForwardScore.bind(chart); 24 | const gamma = chart.getInnerScore.bind(chart); 25 | 26 | it('State set 0', () => { 27 | const s00Sa = chart.getState(S2a, 0, 0, 0); 28 | 29 | expect(prob(alpha(s00Sa))).to.equal(1.0); 30 | expect(prob(gamma(s00Sa))).to.equal(p); 31 | 32 | 33 | const s00SSS = chart.getState(S2SS, 0, 0, 0); 34 | 35 | expect(prob(alpha(s00SSS))).to.equal(q / p); 36 | expect(prob(gamma(s00SSS))).to.equal(q); 37 | 38 | 39 | }); 40 | 41 | it('State set 1', () => { 42 | 43 | // scanned 44 | const s01Sa1 = chart.getState(S2a, 1, 0, 1); 45 | expect(prob(alpha(s01Sa1))).to.equal(1); 46 | expect(prob(gamma(s01Sa1))).to.equal(p); 47 | 48 | // completed 49 | const s01SSS1 = chart.getState(S2SS, 1, 0, 1); 50 | 51 | expect(prob(alpha(s01SSS1))).to.equal(q); 52 | expect(prob(gamma(s01SSS1))).to.be.above((p * q) - 0.000001).and.below((p * q) + 0.000001); 53 | 54 | // predicted 55 | const s11Sa0 = chart.getState(S2a, 1, 1, 0); 56 | expect(prob(alpha(s11Sa0))).to.equal(q); 57 | expect(prob(gamma(s11Sa0))).to.equal(p); 58 | 59 | const s11SSS0 = chart.getState(S2SS, 1, 1, 0); 60 | expect(prob(alpha(s11SSS0))).to.be.above((Math.pow(q, 2) / p) - 0.0001).and.below((Math.pow(q, 2) / p) + 0.0001); 61 | expect(prob(gamma(s11SSS0))).to.equal(q); 62 | 63 | }); 64 | it('State set 2', () => { 65 | // scanned 66 | const s12Sa1 = chart.getState(S2a, 2, 1, 1); 67 | expect(prob(alpha(s12Sa1))).to.equal(q); 68 | expect(prob(gamma(s12Sa1))).to.equal(p); 69 | 70 | // completed 71 | const s12SSS1 = chart.getState(S2SS, 2, 1, 1); 72 | expect(prob(alpha(s12SSS1))).to.equal(q * q); 73 | expect(prob(gamma(s12SSS1))).to.be.above((p * q) - 0.000001).and.below((p * q) + 0.000001); 74 | 75 | const s02SSS2 = chart.getState(S2SS, 2, 0, 2); 76 | expect(prob(alpha(s02SSS2))).to.be.above((p * q) - 0.000001).and.below((p * q) + 0.000001); 77 | expect(prob(gamma(s02SSS2))).to.be.above((p * p * q) - 0.000001).and.below((p * p * q) + 0.000001); 78 | 79 | const s02SSS1 = chart.getState(S2SS, 2, 0, 1); 80 | expect(prob(alpha(s02SSS1))).to.be.above((p * q * q) - 0.0001).and.below(((p * q * q) + 0.0001)); 81 | expect(prob(gamma(s02SSS1))).to.be.above((p * p * q * q) - 0.0001).and.below(((p * p * q * q) + 0.0001)); 82 | 83 | const s02S1 = chart.getState(init.rule, 2, 0, 1); 84 | expect(prob(alpha(s02S1))).to.be.above((p * p * q) - 0.0001).and.below(((p * p * q) + 0.0001)); 85 | expect(prob(gamma(s02S1))).to.be.above((p * p * q) - 0.0001).and.below(((p * p * q) + 0.0001)); 86 | 87 | // predicted 88 | const s22S0 = chart.getState(S2a, 2, 2, 0); 89 | 90 | expect(prob(gamma(s22S0))).to.equal(p); 91 | expect(prob(alpha(s22S0))).to.be.above(((1 + p) * q * q) - 0.00000001).and.below(((1 + p) * q * q) + 0.000000000000001); 92 | 93 | const s22SS0 = chart.getState(S2SS, 2, 2, 0); 94 | expect(prob(alpha(s22SS0))).to.be.above(((1 + (1 / p)) * q * q * q) - 0.0001).and.below(((1 + 1 / p) * q * q * q) + 0.0001); 95 | expect(prob(gamma(s22SS0))).to.equal(q); 96 | 97 | }); 98 | it('State set 3', () => { 99 | // scanned 100 | const s23Sa1 = chart.getState(S2a, 3, 2, 1); 101 | expect(prob(alpha(s23Sa1))).to.be.below(((1 + p) * q * q) + 0.0001).and.above(((1 + p) * q * q) - 0.000001); 102 | expect(prob(gamma(s23Sa1))).to.equal(p); 103 | 104 | // completed 105 | const s23S1 = chart.getState(S2SS, 3, 2, 1); 106 | expect(prob(alpha(s23S1))).to.be.below(((1 + p) * q * q * q) + 0.0001).and.above(((1 + p) * q * q * q) - 0.0001); 107 | expect(prob(gamma(s23S1))).to.be.below((p * q) + 0.0001).and.above((p * q) - 0.0001); 108 | 109 | const s13S2 = chart.getState(S2SS, 3, 1, 2); 110 | expect(prob(alpha(s13S2))).to.be.below((p * q * q) + 0.0001).and.above((p * q * q) - 0.0001); 111 | expect(prob(gamma(s13S2))).to.be.below((p * p * q) + 0.0001).and.above((p * p * q) - 0.0001); 112 | 113 | const s13S1 = chart.getState(S2SS, 3, 1, 1); 114 | expect(prob(alpha(s13S1))).to.be.above((p * q * q * q) - 0.0001).and.below((p * q * q * q) + 0.0001); 115 | expect(prob(gamma(s13S1))).to.be.above((p * p * q * q) - 0.0001).and.below((p * p * q * q) + 0.0001); 116 | 117 | const s03S2 = chart.getState(S2SS, 3, 0, 2); 118 | expect(prob(alpha(s03S2))).to.be.above((2 * p * p * q * q) - 0.0001).and.below(((2 * p * p * q * q) + 0.0001)); 119 | expect(prob(gamma(s03S2))).to.be.above((2 * p * p * p * q * q) - 0.0001).and.below(((2 * p * p * p * q * q) + 0.0001)); 120 | 121 | const s03S1 = chart.getState(S2SS, 3, 0, 1); 122 | expect(prob(alpha(s03S1))).to.be.above((2 * p * p * q * q * q) - 0.0001).and.below((2 * p * p * q * q * q) + 0.0001); 123 | expect(prob(gamma(s03S1))).to.be.above((2 * p * p * p * q * q * q) - 0.0001).and.below((2 * p * p * p * q * q * q) + 0.0001); 124 | 125 | expect(prob(alpha(init))).to.be.above((2 * (Math.pow(p, 3) * Math.pow(q, 2))) - 0.0001).and.above((2 * (Math.pow(p, 3) * Math.pow(q, 2))) + 0.0001); 126 | expect(prob(gamma(init))).to.be.above((2 * (Math.pow(p, 3) * Math.pow(q, 2))) - 0.0001).and.above((2 * (Math.pow(p, 3) * Math.pow(q, 2))) + 0.0001); 127 | 128 | /* 129 | 130 | for (int j = 0; j <= tokens.size(); j++) { 131 | chart.getStates(j).forEach(s -> { 132 | double probFw = sr.toProbability(chart.getForwardScore(s)); 133 | double probInn = sr.toProbability(chart.getInnerScore(s)); 134 | double v = 0.0; 135 | if (chart.getViterbiScore(s) == null) { 136 | //System.out.println(); 137 | } else 138 | v = sr.toProbability(chart.getViterbiScore(s).getScore()); 139 | 140 | //System.out.println(s + "[" + probFw + "]" + "[" + probInn + "] v: " + v); 141 | }); 142 | } 143 | */ 144 | }); 145 | 146 | }); 147 | 148 | 149 | describe('grammar', () => { 150 | it('should calculate all left star values', () => { 151 | expect( 152 | g.getLeftStarScore(A, B) 153 | ).to.be.above(0.999).and.below(1.00001); 154 | expect( 155 | g.getLeftStarScore(B, C) 156 | ).to.be.above(0.4999).and.below(0.500001); 157 | expect( 158 | g.getLeftStarScore(B, D) 159 | ).to.be.above(0.24999).and.below(0.2500001); 160 | expect( 161 | g.getLeftStarScore(A, D) 162 | ).to.be.above(0.24999).and.below(0.2500001); 163 | expect( 164 | g.getLeftStarScore(A, X) 165 | ).to.equal(0.0); 166 | }); 167 | 168 | it('should calculate all left values', () => { 169 | expect(g.getLeftScore(A, B)).to.be.above(0.9999999).and.below(1.00001); 170 | expect(g.getLeftScore(A, D)).to.be.above(-0.000001).and.below(0.00001); 171 | expect(g.getLeftScore(A, X)).to.be.above(-0.000001).and.below(0.00001); 172 | expect(g.getLeftScore(B, C)).to.be.above(0.4999999).and.below(0.50001); 173 | }); 174 | 175 | it('should calculate unit star values', () => { 176 | //TODO 177 | }); 178 | 179 | it('should get rules', () => { 180 | //TODO 181 | // Set setOfrules = new HashSet<>(); 182 | // setOfrules.plus(rule1); 183 | // setOfrules.plus(rule2); 184 | // Assert.assertEquals(setOfrules, new HashSet<>(g.getRules(rule1.left))); 185 | // Assert.assertEquals(setOfrules, new HashSet<>(g.getRules(rule2.left))); 186 | // 187 | // setOfrules.clear(); 188 | // setOfrules.plus(rule3); 189 | // Assert.assertEquals(setOfrules, new HashSet<>(g.getRules(rule3.left))); 190 | }); 191 | 192 | it('should contain rules', () => { 193 | // TODO 194 | // Assert.assertTrue(g.containsRules(rule1.left)); 195 | // Assert.assertTrue(g.getRules(rule2.left).contains(rule2)); 196 | // Assert.assertFalse(g.getRules(rule3.left).contains(rule2)); 197 | 198 | // Assert.assertEquals(ruleB, Rule.create(sr, 0.5, B, C)); 199 | // Assert.assertEquals(ruleC, Rule.create(sr, 0.5, C, D)); 200 | // Assert.assertEquals(ruleD, ruleD); 201 | // Assert.assertEquals(ruleE, ruleE); 202 | // Assert.assertEquals(rule1, rule1); 203 | // Assert.assertEquals(rule2, rule2); 204 | // Assert.assertEquals(rule3, rule3); 205 | 206 | // Assert.assertNotEquals(Rule.create(sr, 1.0, X, e), Rule.create(sr, 1.0, A, e)); 207 | // Assert.assertNotEquals(Rule.create(sr, 1.0, X, e), Rule.create(sr, 0.5, X, e)); 208 | // Assert.assertEquals(Rule.create(sr, 1.0, X, e), Rule.create(sr, 1.0, X, e)); 209 | }); 210 | }); 211 | -------------------------------------------------------------------------------- /test/sample-grammar.ts: -------------------------------------------------------------------------------- 1 | import { Terminal, NonTerminal } from "../src"; 2 | import { Grammar } from "../src"; 3 | 4 | export const A: NonTerminal = "A"; 5 | export const B: NonTerminal = "B"; 6 | export const C: NonTerminal = "C"; 7 | export const D: NonTerminal = "D"; 8 | export const E: NonTerminal = "E"; 9 | export const X: NonTerminal = "X"; 10 | export const Y: NonTerminal = "Y"; 11 | export const Z: NonTerminal = "Z"; 12 | export const e: Terminal = (s) => s === "e"; 13 | export const a = (t: string) => !!t.match(/a/i); 14 | 15 | const builder = Grammar.builder("test"); 16 | export const g: Grammar = builder 17 | .addNewRule(1.0, A, [B, C, D, E]) 18 | .addNewRule(1.0, A, [e]) 19 | .addNewRule(1.0, X, [Y, Z]) 20 | .addNewRule(0.5, B, [C]) 21 | .addNewRule(0.5, C, [D]) 22 | .addNewRule(0.5, D, [E]) 23 | .addNewRule(0.5, D, [a]) 24 | .addNewRule(0.5, E, [E, E]) 25 | .addNewRule(0.5, E, [e]) 26 | // .addRule(0.1, E, [C]) 27 | .build(); 28 | 29 | export const p: number = (0.6); 30 | export const q: number = (0.4); 31 | export const S = "S"; 32 | 33 | export const S2a = {left: S, right: [a], probability: p}; 34 | export const S2SS = {left: S, right: [S, S], probability: q}; 35 | 36 | export const simpleRecursiveGrammar: Grammar = Grammar.builder("simple-recursive-grammar") 37 | .addRule(S2a) 38 | .addRule(S2SS) 39 | .build(); -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compileOnSave": false, 3 | "compilerOptions": { 4 | "declaration": true, 5 | "module": "commonjs", 6 | "moduleResolution": "node", 7 | "noImplicitAny": true, 8 | "preserveConstEnums": true, 9 | "removeComments": true, 10 | "sourceMap": false, 11 | "target": "es2015", 12 | "outDir": "dist" 13 | }, 14 | "include": [ 15 | "src/**/*.ts" 16 | ], 17 | "exclude": [ 18 | ] 19 | } -------------------------------------------------------------------------------- /tslint.json: -------------------------------------------------------------------------------- 1 | { 2 | "rules": { 3 | "class-name": true, 4 | "comment-format": [true, 5 | "check-space" 6 | ], 7 | "indent": [true, 8 | "spaces" 9 | ], 10 | "one-line": [true, 11 | "check-open-brace", 12 | "check-whitespace" 13 | ], 14 | "no-var-keyword": true, 15 | "quotemark": [true, 16 | "double", 17 | "avoid-escape" 18 | ], 19 | "semicolon": true, 20 | "whitespace": [true, 21 | "check-branch", 22 | "check-decl", 23 | "check-operator", 24 | "check-module", 25 | "check-separator", 26 | "check-type" 27 | ], 28 | "typedef-whitespace": [true, { 29 | "call-signature": "nospace", 30 | "index-signature": "nospace", 31 | "parameter": "nospace", 32 | "property-declaration": "nospace", 33 | "variable-declaration": "nospace" 34 | }], 35 | "no-internal-module": true, 36 | "no-trailing-whitespace": true, 37 | "no-inferrable-types": true, 38 | "no-null-keyword": true, 39 | "prefer-const": true 40 | } 41 | } -------------------------------------------------------------------------------- /version.js: -------------------------------------------------------------------------------- 1 | exports.default = "0.9.6"; -------------------------------------------------------------------------------- /webpack.config.js: -------------------------------------------------------------------------------- 1 | const webpack = require('webpack'); 2 | const path = require('path'); 3 | const yargs = require('yargs'); 4 | 5 | const libraryName = 'probabilistic-earley-parser', 6 | plugins = [ 7 | new webpack.LoaderOptionsPlugin({ 8 | options: { 9 | tslint: { 10 | emitErrors: true, 11 | failOnHint: true 12 | } 13 | } 14 | }) 15 | ]; 16 | 17 | let outputFile; 18 | const VERSION = require('./version').default; 19 | if (yargs.argv.p) { 20 | outputFile = `${libraryName}.${VERSION}.min.js`; 21 | } else { 22 | outputFile = `${libraryName}.${VERSION}.js`; 23 | } 24 | 25 | const config = { 26 | entry: [ 27 | __dirname + '/src/index.ts' 28 | ], 29 | devtool: 'source-map', 30 | output: { 31 | path: path.join(__dirname, '/'), 32 | filename: outputFile, 33 | library: libraryName, 34 | 35 | libraryTarget: "umd", 36 | umdNamedDefine: true 37 | }, 38 | module: { 39 | rules: [ 40 | // { 41 | // enforce: 'pre', 42 | // test: /\.tsx?$/, 43 | // loader: 'tslint-loader', 44 | // exclude: /node_modules/ 45 | // }, 46 | { 47 | test: /\.tsx?$/, 48 | loader: ['babel-loader', 'ts-loader'], 49 | exclude: /node_modules/ 50 | } 51 | ], 52 | loaders: [] 53 | }, 54 | resolve: { 55 | extensions: ['.js', '.ts', '.jsx', '.tsx'] 56 | }, 57 | plugins: plugins 58 | }; 59 | 60 | module.exports = config; --------------------------------------------------------------------------------