├── README.md ├── project.clj └── src └── parsnip ├── asm.clj ├── core.clj ├── samples.clj └── vm.clj /README.md: -------------------------------------------------------------------------------- 1 | # Parsnip 2 | 3 | A parsing library which is the spiritual successor of [parsley](http://github.com/cgrand/parsley) and technical descendant of [seqexp](http://github.com/cgrand/seqexp). 4 | 5 | ## Design goals 6 | 7 | * General: accepts any CFG. 8 | * Deterministic: always select the same tree out of the parse forest. 9 | * Total: for unacceptable inputs, yields a parse for one of the closest acceptable input (distance being character deleted from the original input). 10 | * Scannerless 11 | * Incremental 12 | 13 | ## Implementation details 14 | 15 | Parsnip uses a VM with 6 opcodes (`PRED`, `PEEK`, `JUMP`, `FORK`, `CALL` and `RET`). 16 | 17 | The VM is multithreaded (these are not real threads) and all threads are run in lockstep. 18 | 19 | The state of a thread consists only of its PC (program counter) and its stack. At any time there can't be more than one thread with the same PC and same stack. Each threads also has an error count and a priority; they are both used when deduplicating threads with identical PC and stack. 20 | 21 | The threads are stored in a structure resembling a lazy [graph-structured stack](http://en.wikipedia.org/wiki/Graph-structured_stack). 22 | 23 | ### PRED pred 24 | `pred` is a predicate whose type is determined by the VM (the naive VM expects functions). 25 | 26 | The predicate is applied to the current element. 27 | 28 | If the predicate succeeds, the thread continues to the next instruction and to the next element of the input sequence. When the predicate fails the thread stays at the same PC (thus waits for the next character), increments its error count. 29 | 30 | ### PEEK pred 31 | `pred` is a predicate whose type is determined by the VM (the naive VM expectes functions). 32 | 33 | The predicate is applied to the current element. 34 | 35 | If the predicate succeeds, the thread continues to the next instruction but, unlike `PRED` doesn't advance to the new input. When the predicate fails the thread stays at the same PC (thus waits for the next character), increments its error count. 36 | 37 | ### JUMP address 38 | Performs a jump, `address` is an absolute address. 39 | 40 | ### FORK address 41 | Forks the current thread in two threads, one thread will continue to the next instruction while the other will performs a relative jump to the specified `address`. 42 | 43 | Priority is given to the continuing thread. 44 | 45 | It should be noted that the only effect of this *priority* is parse-tree selection: selecting one parse tree out of the parse forest. 46 | 47 | ### CALL address 48 | Pushes the return address on the stack and jump to `address`. 49 | 50 | ### RET tag 51 | Pops an address from the stack and jumps to it. (`tag` is an arbitrary value) 52 | 53 | ## License 54 | 55 | Copyright © 2014 Christophe Grand 56 | 57 | Distributed under the Eclipse Public License either version 1.0 or (at 58 | your option) any later version. 59 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject parsnip "0.1.0-SNAPSHOT" 2 | :description "parsley is dead, long live parsnip!" 3 | :url "http://github.com/cgrand/parsnip" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.6.0"]]) 7 | -------------------------------------------------------------------------------- /src/parsnip/asm.clj: -------------------------------------------------------------------------------- 1 | (ns parsnip.asm) 2 | 3 | (defn- map-targets-drop-labels [f pgm] 4 | (vec (mapcat (fn [[op x]] 5 | (case op 6 | (:CALL :JUMP :FORK) [op (f x)] 7 | :LABEL nil 8 | [op x])) (partition 2 pgm)))) 9 | 10 | (defn link [pgm] 11 | (let [labels (reduce (fn [labels pc] 12 | (let [label (nth pgm (inc pc)) 13 | pc (- pc (* 2 (count labels)))] 14 | (when-some [pc' (labels label)] 15 | (throw (ex-info "Label used twice." {:label label :pcs [pc' pc]}))) 16 | (assoc labels label pc))) 17 | {} 18 | (filter #(= :LABEL (nth pgm %)) (range 0 (count pgm) 2)))] 19 | (map-targets-drop-labels #(or (labels %) (throw (ex-info "Label not found." {:label (labels %)}))) pgm))) 20 | 21 | (defn unlink [pgm] 22 | (let [labels (into (sorted-map) (keep (fn [[op arg]] (case op (:FORK :JUMP :CALL) [arg (gensym :label_)] nil)) (partition 2 pgm))) 23 | slice (fn [from to] 24 | (map-targets-drop-labels labels (subvec pgm from to)))] 25 | (reduce (fn [unlinked-pgm [[from label] [to]]] 26 | (-> unlinked-pgm 27 | (conj :LABEL label) 28 | (into (slice from to)))) 29 | (slice 0 (first (keys labels))) 30 | (partition 2 1 [[(count pgm)]] labels)))) -------------------------------------------------------------------------------- /src/parsnip/core.clj: -------------------------------------------------------------------------------- 1 | (ns parsnip.core 2 | (:refer-clojure :exclude [+ *]) 3 | (:require [parsnip.asm :as asm] 4 | [parsnip.vm :as vm])) 5 | 6 | (defprotocol Asmable 7 | (asm [x])) 8 | 9 | (defrecord InlineAsm [ops] 10 | Asmable 11 | (asm [_] ops)) 12 | 13 | (extend-protocol Asmable 14 | clojure.lang.APersistentVector 15 | (asm [x] (mapcat asm x)) 16 | clojure.lang.AMapEntry 17 | (asm [[k v]] 18 | (let [[k r] (if-let [[_ pre] (re-matches #"(.*)-" (name k))] 19 | [(keyword (namespace k) pre) nil] 20 | [k k])] 21 | (concat [:LABEL k] (asm v) [:RET r]))) 22 | clojure.lang.APersistentMap 23 | (asm [x] (mapcat asm x)) 24 | String 25 | (asm [x] (mapcat asm x)) 26 | Character 27 | (asm [c] [:PRED #(= c %)]) 28 | clojure.lang.Keyword 29 | (asm [k] [:CALL k])) 30 | 31 | (defn alt [& choices] 32 | (let [end (gensym :altend_) 33 | emit (fn emit [choices] 34 | (when-let [[choice & choices] (seq choices)] 35 | (if choices 36 | (let [addr (gensym :alt_)] (concat [:FORK addr] (asm choice) [:JUMP end :LABEL addr] (emit choices))) 37 | (asm choice))))] 38 | (->InlineAsm (concat (emit choices) [:LABEL end])))) 39 | 40 | (defn * [& xs] 41 | (let [start (gensym :star_) 42 | end (gensym :starend_)] 43 | (->InlineAsm (concat [:LABEL start :FORK end] (mapcat asm xs) [:JUMP start :LABEL end])))) 44 | 45 | (defn + [& xs] 46 | (let [start (gensym :plus_) 47 | end (gensym :plusend_)] 48 | (->InlineAsm (concat [:LABEL start] (mapcat asm xs) [:FORK end :JUMP start :LABEL end])))) 49 | 50 | (defn ? [& xs] 51 | (let [end (gensym :questionmarkend_)] 52 | (->InlineAsm (concat [:FORK end] (mapcat asm xs) [:LABEL end])))) 53 | 54 | (defn as [tag & xs] 55 | (let [start (gensym :as_) 56 | end (gensym :asend_)] 57 | (->InlineAsm (concat [:CALL start :JUMP end :LABEL start] (mapcat asm xs) [:RET tag :LABEL end])))) 58 | 59 | (defn grammar 60 | [start prods] 61 | (list* :JUMP start (asm prods))) 62 | 63 | (defn tree-builder [mk-node s] 64 | (letfn [(flush-skip [state] 65 | (if-let [from (:skip-from state)] 66 | (let [to (:skip-to state)] 67 | (-> state 68 | (assoc :current (conj (:current state) (mk-node :skip [(subs s from to)])) 69 | :offset to) 70 | (dissoc :skip-from))) 71 | state))] 72 | (fn 73 | ([] {:offset 0 :stack (list []) :current []}) 74 | ([state] (:current state)) 75 | ([state [op pos tag]] 76 | (case op 77 | :push (let [state (flush-skip state) 78 | from (:offset state) 79 | current (:current state) 80 | current (if (< from pos) (conj current (subs s from pos)) current)] 81 | (assoc state :offset pos :stack (conj (:stack state) current) :current [])) 82 | :pop (let [state (flush-skip state) 83 | from (:offset state) 84 | current (:current state) 85 | current (if (< from pos) (conj current (subs s from pos)) current) 86 | stack (:stack state) 87 | current (if tag 88 | (conj (peek stack) (mk-node tag current)) 89 | (into (peek stack) current))] 90 | (assoc state :offset pos :stack (pop stack) :current current)) 91 | :skip (if (:skip-from state) 92 | (assoc state :skip-to (inc pos)) 93 | (assoc state 94 | :current (conj (:current state) (subs s (:offset state) pos)) 95 | :skip-from pos 96 | :skip-to (inc pos)))))))) 97 | 98 | (def vector-tree-builder (partial tree-builder (fn [tag content] (into [tag] content)))) 99 | 100 | (def xml-tree-builder (partial tree-builder (fn [tag content] {:tag tag :content content}))) 101 | 102 | (defn parser [start prods] 103 | (let [step (vm/stepper (asm/link (grammar start prods)))] 104 | (fn self 105 | ([input] (self input (vector-tree-builder input))) 106 | ([input builder] 107 | (builder (reduce builder (builder) (:events (:carry (get (reduce-kv step (step) (vec input)) -2))))))))) -------------------------------------------------------------------------------- /src/parsnip/samples.clj: -------------------------------------------------------------------------------- 1 | (ns parsnip.samples 2 | (:refer-clojure :exclude [+ *]) 3 | (:require [parsnip.core :refer :all] 4 | [parsnip.vm :as vm] 5 | [parsnip.asm :as asm])) 6 | 7 | (def pgm 8 | (asm/link 9 | (grammar :E 10 | {:E (alt "x" ["(" (* :E) ")"])}))) 11 | 12 | (def pgm2 13 | (asm/link 14 | (grammar :E 15 | {:E- (alt :X (as "X+E" [:X "+" :E])) 16 | :X- (alt (as "X" "x") (as "(E)" "(" :E ")"))}))) 17 | 18 | (def pgm3 19 | (asm/link 20 | (grammar :E 21 | {:E (alt :XX [:X :X]) 22 | :XX "xx" 23 | :X "x"}))) 24 | 25 | #_#_#_ 26 | => ((parser :E 27 | {:E- (alt :X (as "X+E" [:X "+" :E])) 28 | :X- (alt (as "X" "x") (as "(E)" "(" :E ")"))} 29 | )"x+x") 30 | [{:tag "X+E", :content [{:tag "X", :content ["x"]} "+" {:tag "X", :content ["x"]}]}] 31 | 32 | 33 | #_(let [step (vm/stepper pgm2)] 34 | (:carry (get (reduce-kv step (step) (vec "(x+(x+x)+x)")) -1))) 35 | 36 | #_(let [step (vm/stepper pgm2)] 37 | (reduce-kv step (step) (vec "(x+x"))) 38 | 39 | #_(dotimes [_ 10] (time (let [step (vm/stepper pgm2)] 40 | (:carry (get (reduce-kv step (step) (vec (cons \x (take 2048 (cycle "+x"))))) -1)) 41 | ))) 42 | 43 | #_(dotimes [_ 10] (time (let [step (vm/stepper pgm2)] 44 | (get (reduce-kv step (step) (into (vec (repeat 1024 \()) (cons \x (repeat 1024 \))))) -1) 45 | nil))) 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /src/parsnip/vm.clj: -------------------------------------------------------------------------------- 1 | (ns parsnip.vm) 2 | 3 | (defn has-priority? [a b] 4 | (if-let [[a & as] (seq a)] 5 | (if-let [[b & bs] (seq b)] 6 | (if (= a b) 7 | (recur as bs) 8 | (< a b)) 9 | false) 10 | true)) 11 | 12 | (defn combine [a b] 13 | {:error (+ (:error a 0) (:error b 0)) 14 | :events (concat (:events a nil) (:events b nil)) 15 | :priority (concat (:priority a nil) (:priority b nil))}) 16 | 17 | (defprotocol Stacks 18 | (stacks-map [stacks] "Returns a map of pc to stacks.") 19 | (add [stacks carry]) 20 | (fathomings [stacks] "Returns a set of stacks fathomings.")) 21 | 22 | (defrecord Carried [stacks carry] 23 | Stacks 24 | (stacks-map [c] 25 | (reduce-kv (fn [m pc stacks] (assoc m pc (add stacks carry))) {} (stacks-map stacks))) 26 | (add [c carry'] 27 | (Carried. stacks (combine carry carry'))) 28 | (fathomings [c] (fathomings stacks))) 29 | 30 | (defrecord Delayed [d fathomings] 31 | Stacks 32 | (stacks-map [delayed] 33 | (loop [r @d] 34 | (if (delay? r) 35 | (recur @r) 36 | (stacks-map r)))) 37 | (add [delayed carry] 38 | (->Carried delayed carry)) 39 | (fathomings [delayed] fathomings)) 40 | 41 | (defrecord FSMStacks [pcs transitions path] 42 | Stacks 43 | (stacks-map [fsm-stacks] 44 | (reduce (fn [m pc] (assoc m pc (FSMStacks. (transitions pc #{}) transitions (conj path pc)))) {} pcs)) 45 | (add [fsm-stacks carry] 46 | (->Carried fsm-stacks carry)) 47 | (fathomings [m] 48 | #{path})) 49 | 50 | (defrecord ConsStacks [pc stacks] 51 | Stacks 52 | (stacks-map [cons-stacks] {pc stacks}) 53 | (add [cons-stacks carry] 54 | (->Carried cons-stacks carry)) 55 | (fathomings [m] 56 | (fathomings stacks))) 57 | 58 | (extend-protocol Stacks 59 | clojure.lang.APersistentMap 60 | (stacks-map [m] m) 61 | (add [m carry] 62 | (->Carried m carry)) 63 | (fathomings [m] 64 | (reduce #(into %1 (fathomings %2)) #{} m))) 65 | 66 | (def bottom (->FSMStacks #{-2} {} [])) 67 | 68 | (defn merge-stacks [a b] 69 | (cond 70 | (= a b) a 71 | (and (instance? Carried a) (instance? Carried b) (= (:stacks a) (:stacks b))) 72 | (cond 73 | (< (:error (:carry a)) (:error (:carry b))) a 74 | (> (:error (:carry a)) (:error (:carry b))) b 75 | (has-priority? (:priority (:carry a)) (:priority (:carry b))) a 76 | :else b) 77 | :else 78 | (->Delayed (delay (merge-with merge-stacks (stacks-map a) (stacks-map b))) 79 | (into (fathomings a) (fathomings b))))) 80 | 81 | (defn plus [m pc tails] 82 | (assoc m pc (if-some [tails' (m pc)] 83 | (merge-stacks tails' tails) 84 | tails))) 85 | 86 | (defn push [stacks pc] (->ConsStacks pc stacks)) 87 | 88 | (defn stepper 89 | [pgm] 90 | (letfn [(flow [m pos pc tails has-c c spc stails] 91 | (if (neg? pc) 92 | (if has-c 93 | (plus m spc (add stails {:error 1 :events [[:skip pos]]})) 94 | (plus m pc tails)) 95 | (case (nth pgm pc) 96 | :FORK (-> m (flow pos (+ pc 2) (add tails {:priority [0]}) has-c c spc stails) 97 | (recur pos (nth pgm (inc pc)) (add tails {:priority [1]}) has-c c spc stails)) 98 | :JUMP (recur m pos (nth pgm (inc pc)) tails has-c c spc stails) 99 | :CALL (recur m pos (nth pgm (inc pc)) (push (add tails {:events [[:push pos]]}) (+ pc 2)) has-c c spc stails) 100 | :RET (reduce-kv #(flow %1 pos %2 %3 has-c c spc stails) m (stacks-map (add tails {:events [[:pop pos (nth pgm (inc pc))]]}))) 101 | :PEEK (if has-c 102 | (if ((nth pgm (inc pc)) c) 103 | (recur m pos (+ pc 2) tails true c spc stails) 104 | (plus m spc (add stails {:error 1 :events [[:skip pos]]}))) 105 | (plus m pc tails)) 106 | :PRED (if has-c 107 | (if ((nth pgm (inc pc)) c) 108 | (recur m (inc pos) (+ pc 2) tails false c spc stails) 109 | (plus m spc (add stails {:error 1 :events [[:skip pos]]}))) 110 | (plus m pc tails))))) 111 | (step [stacks pos c m] 112 | (reduce-kv (fn [m pc tails] 113 | (flow m pos pc tails true c pc tails)) 114 | m (stacks-map stacks)))] 115 | (let [init-stacks (flow {} 0 0 bottom false nil 0 bottom)] 116 | (fn 117 | ([] init-stacks) 118 | ([stacks pos c] 119 | (step stacks pos c {})))))) 120 | 121 | ; knowing i'm at a given pc, what where the previous 122 | 123 | (defn- pred-pcs [pgm] (filter #(= (nth pgm %) :PRED) (range 0 (count pgm) 2))) 124 | 125 | (defn- call-targets 126 | "Returs the set of pcs that appear as the argument of a call." 127 | [pgm] 128 | (set (keep #(when (= (nth pgm %) :CALL) (nth pgm (inc %))) (range 0 (count pgm) 2)))) 129 | 130 | (defn- call-sites [pgm] 131 | (reduce (fn [call-sites pc] 132 | (if (= (nth pgm pc) :CALL) 133 | (let [target (nth pgm (inc pc))] 134 | (assoc call-sites target (conj (call-sites target #{}) pc))) 135 | call-sites)) {} (range 0 (count pgm) 2))) 136 | 137 | (defn- successors [pgm pc] 138 | (let [op (nth pgm pc)] 139 | (case op 140 | :RET nil 141 | :FORK [(nth pgm (inc pc)) (+ pc 2)] 142 | :JUMP [(nth pgm (inc pc))] 143 | [(+ pc 2)]))) 144 | 145 | (defn- rets 146 | "Returns a map of pcs to set of reachable ret pcs," 147 | [pgm] 148 | (letfn [(rets [known-rets pc] 149 | (if (contains? known-rets pc) 150 | known-rets 151 | (let [op (nth pgm pc)] 152 | (assoc known-rets pc 153 | (if (= :RET op) 154 | #{pc} 155 | (let [succs (successors pgm pc) 156 | known-rets (dissoc (reduce rets (assoc known-rets pc #{}) succs) pc)] 157 | (reduce #(into %1 (known-rets %2)) #{} succs)))))))] 158 | (reduce rets {} (range 0 (count pgm) 2)))) 159 | 160 | (defn- return-targets 161 | "Returns a map from ret pcs to sets of return targets." 162 | [pgm] 163 | (let [call-sites (call-sites pgm) 164 | rets (rets pgm)] 165 | (reduce (fn [return-targets [ret caller+2]] 166 | (assoc return-targets ret (conj (return-targets ret #{}) caller+2))) 167 | {} (for [[callee callers] call-sites 168 | ret (rets callee) 169 | caller callers] 170 | [ret (+ caller 2)])))) 171 | 172 | (defn all-stacks [stacks] 173 | (let [m (stacks-map stacks)] 174 | (if (= m {}) [[() (:carry stacks)]] 175 | (for [[pc stacks] m 176 | [stack carry] (all-stacks stacks)] 177 | [(cons pc stack) carry])))) --------------------------------------------------------------------------------