α_i
of a chart is
20 | * the sum of the probabilities of
21 | * all constrained paths of length i that end in that chart, do all
22 | * paths from start to position i. So this includes multiple
23 | * instances of the same history, which may happen because of recursion.
24 | */
25 | private forwardScores = new StateToObjectMapγ_{i}
of a chart
29 | * is the sum of the probabilities of all
30 | * paths of length (i - k) that start at position k (the rule's start position),
31 | * and end at the current chart and generate the input the input symbols up to k.
32 | * Note that this is conditional on the chart happening at position k with
33 | * a certain non-terminal X
34 | */
35 | private innerScores = new StateToObjectMapi: Xk → λ·μ
8 | * where X is a nonterminal of the grammar, λ and μ are strings of nonterminals and/or
9 | * terminals, and i and k are indices into the input string. States are derived from productions
10 | * in the grammar. The above chart is derived from a corresponding production
11 | * X → λμ
12 | * with the following semantics:
13 | * i
, i.e., x0...xi-1
15 | * have been processed
16 | * so far. The states describing the parser chart at position i
are collectively
17 | * called chart set i
. Note that there is one more chart set than input
18 | * symbols: set 0
describes the parser chart before any input is processed,
19 | * while set |x|
contains the states after all input symbols have been
20 | * processed.X
was expanded starting at position k
in
22 | * the input, i.e., X
23 | * generates some substring starting at position k
.X → λμ
, and has
25 | * expanded the right-hand side (RHS) λμ
up to the position indicated by
26 | * the dot. The dot thus refers to the current position i
.true
iff the active category of this edge's dotted
73 | * rule is null
.
74 | */
75 | export function isPassive1
, or errors if out of bounds.
83 | *
84 | * @throws IndexOutOfBoundsException If the dotted rule's dot position
85 | * is already at the end of its right side.
86 | */
87 | export function advanceDotj: Xk → λ·Yμ
51 | const pos: number = completedState.position;
52 | stateSets.getStatesActiveOnNonTerminal(
53 | Y, completedState.ruleStartPosition, pos
54 | ).forEach((stateToAdvance) => {
55 | if (stateToAdvance.position > pos || stateToAdvance.position != completedState.ruleStartPosition)
56 | throw new Error("Index failed. This is a bug.");
57 |
58 | const ruleStart: number = stateToAdvance.ruleStartPosition;
59 | const nextDot: number = advanceDot(stateToAdvance);
60 | const rule: Rule
9 | * Parse trees are essentially partial views of a Chart from a
10 | * given {@link State} or {@link Category}. They represent the completed
11 | * category at a given string index and origin position.
12 | */
13 | export interface ParseTree(index: number,
18 | grammar: Grammari: Xk → λ·Zμ
...
28 | statesToPredictOn.forEach((statePredecessor: State) => {
29 | const Z: Categoryi: Yi → ·v
40 | // noinspection JSSuspiciousNameCombination
41 | const Y: NonTerminal = Y_to_v.left;
42 |
43 |
44 | // γ' = P(Y → v)
45 | const Y_to_vScore: S = fromProb(Y_to_v.probability);
46 |
47 | // α' = α * R(Z =*L> Y) * P(Y → v)
48 | const fw: S = sr.times(
49 | prevForward,
50 | sr.times(
51 | fromProb(grammar.getLeftStarScore(Z, Y)),
52 | Y_to_vScore
53 | )
54 | );
55 |
56 | let predicted: State;
57 |
58 | // We might want to increment the probability of an existing chart
59 | const isNew = !stateSets.has(Y_to_v, index, index, 0);
60 | predicted = isNew ? {
61 | position: index,
62 | ruleStartPosition: index,
63 | ruleDotPosition: 0,
64 | rule: Y_to_v
65 | } : stateSets.getOrCreate(index, index, 0, Y_to_v);
66 | if (isNew) // save for later
67 | newStates.add(predicted);
68 |
69 | const innerScore: S = stateSets.getInnerScore(predicted);
70 | if (!(Y_to_vScore === innerScore || probMap.ZERO === innerScore))throw new Error(Y_to_vScore + " != " + innerScore);
71 |
72 | const viterbi = {
73 | origin: statePredecessor,
74 | resultingState: predicted,
75 | innerScore: Y_to_vScore,
76 | };
77 |
78 | stateSets.addForwardScore(predicted, fw, sr);
79 | stateSets.setInnerScore(predicted, Y_to_vScore);
80 | stateSets.setViterbiScore(viterbi);
81 |
82 |
83 | const change = {
84 | state: predicted,
85 | innerScore: Y_to_vScore,
86 | forwardScore: fw,
87 | viterbiScore: viterbi,
88 | origin: statePredecessor
89 | };
90 | changes.push(change);
91 | });
92 | });
93 | });
94 | newStates.forEach(ss => stateSets.getOrCreate(ss.position, ss.ruleStartPosition, ss.ruleDotPosition, ss.rule));
95 | }
96 | return changes;
97 | }
98 |
--------------------------------------------------------------------------------
/src/earley/scan.ts:
--------------------------------------------------------------------------------
1 | import { isNonTerminal, WordWithTypes, Terminal } from "../grammar/category";
2 | import { Semiring } from "semiring";
3 | import { Chart } from "./chart/chart";
4 | import { getActiveCategory, State, advanceDot } from "./chart/state";
5 |
6 |
7 | /**
8 | * Handles a token scanned from the input string.
9 | *
10 | * @param tokenPosition The start index of the scan.
11 | * @param word
12 | * @param types
13 | * @param scanProbability Function that provides the probability of scanning the given token at this position. Might be null for a probability of 1.0.
14 | * @param sr
15 | * @param stateSets
16 | */
17 | export function scan(tokenPosition: number,
18 | {word, types}: WordWithTypes,
20 | stateSets: Charti: Xk → λ·tμ
, where t is a terminal that matches the given token...
29 | */
30 | types.forEach(terminal => {
31 | const statesActiveOnTerminals: Set) => {
33 | const activeCategory = getActiveCategory(preScanState);
34 | if (isNonTerminal(activeCategory)) throw new Error("this is a bug");
35 | else {
36 | if (!activeCategory(word)) throw new Error("Index failed");
37 | // TODO can this be more efficient, ie have tokens make their category be explicit? (Do we want to maintain the possibility of such "fluid" categories?)
38 | // Create the chart i+1: Xk → λt·μ
39 | const preScanForward: S = stateSets.getForwardScore(preScanState);
40 | const preScanInner: S = stateSets.getInnerScore(preScanState);
41 | // Note that this chart is unique for each preScanState
42 | const postScanState: State = stateSets.getOrCreate(
43 | tokenPosition + 1, preScanState.ruleStartPosition,
44 | advanceDot(preScanState),
45 | preScanState.rule,
46 | word
47 | );
48 |
49 | const postScanForward = calculateForwardScore(sr, preScanForward, scanProb);
50 | // Set forward score
51 | stateSets.setForwardScore(
52 | postScanState,
53 | postScanForward
54 | );
55 |
56 | // Get inner score (no side effects)
57 | const postScanInner: S = calculateInnerScore(sr, preScanInner, scanProb);
58 |
59 | // Set inner score
60 | stateSets.setInnerScore(
61 | postScanState,
62 | postScanInner
63 | );
64 |
65 | // Set Viterbi score
66 | const viterbiScore = {
67 | origin: preScanState,
68 | resultingState: postScanState,
69 | innerScore: postScanInner
70 | };
71 | stateSets.setViterbiScore(viterbiScore);
72 |
73 | changes.push({
74 | state: postScanState,
75 | viterbi: viterbiScore,
76 | inner: postScanInner,
77 | forward: postScanForward
78 | });
79 | }
80 | });
81 | });
82 | return changes;
83 | }
84 |
85 | /**
86 | * Function to calculate the new inner score from given values
87 | *
88 | * @param scanProbability The probability of scanning this particular token
89 | * @param sr The semiring to calculate with
90 | * @param previousInner The previous inner score
91 | * @return The inner score for the new chart
92 | */
93 | function calculateInnerScore(sr: Semiring, previousInner: S, scanProbability?: S): S {
94 | if (!scanProbability)
95 | return previousInner;
96 | else
97 | return sr.times(previousInner, scanProbability);
98 | }
99 |
100 | /**
101 | * Function to compute the forward score for the new chart after scanning the given token.
102 | *
103 | * @param scanProbability The probability of scanning this particular token
104 | * @param sr The semiring to calculate with
105 | * @param previousStateForwardScore The previous forward score
106 | * @return Computed forward score for the new chart
107 | */
108 | function calculateForwardScore(sr: Semiring, previousStateForwardScore: S, scanProbability?: S): S {
109 | if (!scanProbability) {
110 | return previousStateForwardScore;
111 | } else {
112 | return sr.times(previousStateForwardScore, scanProbability);
113 | }
114 | }
--------------------------------------------------------------------------------
/src/grammar/category.ts:
--------------------------------------------------------------------------------
1 | export type Category