├── .github ├── FUNDING.yml └── workflows │ └── build.yml ├── logo.png ├── bench ├── diff-size-bench.png ├── diff-time-bench.png ├── roundtrip-time.csv ├── diff-time.csv ├── diff-sizes.csv ├── project.clj └── bench.clj ├── .gitignore ├── package.json ├── deps-clr.edn ├── test └── editscript │ ├── test.cljs │ ├── util │ └── pairing_test.cljc │ ├── edit_test.cljc │ ├── diff │ ├── quick_test.cljc │ └── a_star_test.cljc │ └── core_test.cljc ├── resources ├── drawing4.edn ├── drawing2.edn ├── drawing1.edn └── drawing3.edn ├── project.clj ├── src └── editscript │ ├── diff │ ├── quick.cljc │ └── a_star.cljc │ ├── patch.cljc │ ├── util │ ├── index.cljc │ ├── pairing.cljc │ └── common.cljc │ ├── core.cljc │ └── edit.cljc ├── CHANGELOG.md ├── LICENSE └── README.md /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: huahaiy 2 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juji-io/editscript/HEAD/logo.png -------------------------------------------------------------------------------- /bench/diff-size-bench.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juji-io/editscript/HEAD/bench/diff-size-bench.png -------------------------------------------------------------------------------- /bench/diff-time-bench.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juji-io/editscript/HEAD/bench/diff-time-bench.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | package-lock.json 2 | /out 3 | /test/out 4 | nashorn_code_cache 5 | /target 6 | /classes 7 | /checkouts 8 | pom.xml 9 | pom.xml.asc 10 | *.jar 11 | *.class 12 | /.lein-* 13 | /.nrepl-port 14 | .hgignore 15 | .hg/ 16 | /node_modules 17 | .cljs_nashorn_repl 18 | .cljs_node_repl 19 | */.lein-env 20 | */.nrepl-port 21 | */target/ 22 | -------------------------------------------------------------------------------- /bench/roundtrip-time.csv: -------------------------------------------------------------------------------- 1 | Data Set,Editscript A*,Editscript Quick,differ 2 | diff1-2,409,48,121 3 | diff2-1,407,49,123 4 | diff1-3,463,62,123 5 | diff3-1,458,63,130 6 | diff1-4,2956,183,64 7 | diff4-1,1288,220,64 8 | diff2-3,451,59,126 9 | diff3-2,457,58,127 10 | diff2-4,2876,184,64 11 | diff4-2,1273,215,63 12 | diff3-4,2884,187,65 13 | diff4-3,1273,221,63 14 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "clj-editscript", 3 | "version": "0.6.6", 4 | "license": "EPL-1.0", 5 | "homepage": "https://github.com/juji-io/editscript", 6 | "repository": { 7 | "type": "git", 8 | "url": "https://github.com/juji-io/editscript" 9 | }, 10 | "author": { 11 | "name" : "Huaha Yang" 12 | }, 13 | "files": [ 14 | "src/*" 15 | ], 16 | "directories": { 17 | "lib": "src" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /deps-clr.edn: -------------------------------------------------------------------------------- 1 | {:paths ["src"] 2 | :deps 3 | {io.github.clojure/clr.data.generators {:git/tag "v1.1.0" :git/sha "d25d292"} 4 | } 5 | 6 | :aliases 7 | {:test 8 | {:extra-paths ["test"] 9 | :extra-deps {io.github.dmiller/test-runner {:git/tag "v0.5.1clr" :git/sha "814e06f"} 10 | io.github.clojure/clr.test.check {:git/tag "v1.1.2" :git/sha "26f34e6"}} 11 | :exec-fn cognitect.test-runner.api/test 12 | :exec-args {:dirs ["test"]}}}} -------------------------------------------------------------------------------- /test/editscript/test.cljs: -------------------------------------------------------------------------------- 1 | (ns editscript.test 2 | (:require [doo.runner :refer-macros [doo-tests]] 3 | [editscript.core-test] 4 | [editscript.util.pairing-test] 5 | [editscript.diff.a-star-test] 6 | [editscript.diff.quick-test])) 7 | 8 | (doo-tests 'editscript.util.pairing-test 9 | 'editscript.diff.a-star-test 10 | 'editscript.diff.quick-test 11 | 'editscript.core-test) 12 | -------------------------------------------------------------------------------- /bench/diff-time.csv: -------------------------------------------------------------------------------- 1 | Data Set,Editscript A*,Editscript Quick,differ,clojure.data,deep-diff2 2 | diff1-2,404,45,119,170,414 3 | diff2-1,416,43,118,176,414 4 | diff1-3,445,49,118,436,410 5 | diff3-1,449,48,118,442,413 6 | diff1-4,2827,115,49,1165,1219 7 | diff4-1,1272,151,48,1203,531 8 | diff2-3,435,47,120,360,422 9 | diff3-2,433,47,117,352,412 10 | diff2-4,2824,116,49,1101,1217 11 | diff4-2,1272,152,48,1192,530 12 | diff3-4,2830,116,49,1169,1224 13 | diff4-3,1275,152,48,1178,537 14 | -------------------------------------------------------------------------------- /bench/diff-sizes.csv: -------------------------------------------------------------------------------- 1 | Data Set,Editscript A*,Editscript Quick,differ,clojure.data,deep-diff2 2 | diff1-2,24,24,22,1026,1061 3 | diff2-1,24,24,22,1026,1061 4 | diff1-3,91,91,61,1065,1292 5 | diff3-1,79,79,49,1065,1292 6 | diff1-4,177,620,259,1233,2477 7 | diff4-1,1007,1129,759,1233,2352 8 | diff2-3,69,69,46,1050,1231 9 | diff3-2,57,57,34,1050,1231 10 | diff2-4,177,620,259,1233,2477 11 | diff4-2,1007,1129,759,1233,2352 12 | diff3-4,177,620,259,1245,2489 13 | diff4-3,1019,1141,771,1245,2364 14 | -------------------------------------------------------------------------------- /bench/project.clj: -------------------------------------------------------------------------------- 1 | (defproject bench "0.2.0" 2 | :description "Compare diff library alternatives" 3 | :url "https://github.com/juji-io/editscript" 4 | :lein-release {:deploy-via :clojars} 5 | :license {:name "Eclipse Public License" 6 | :url "http://www.eclipse.org/legal/epl-v10.html"} 7 | :dependencies [[org.clojure/clojure "1.10.1"] 8 | [org.clojure/math.combinatorics "0.1.6"] 9 | [org.clojure/test.check "1.1.0"] 10 | [org.clojure/data.csv "1.0.0"] 11 | [criterium "0.4.6"] 12 | [com.taoensso/nippy "3.1.1"] 13 | [differ "0.3.3"] 14 | [lambdaisland/deep-diff2 "2.0.108"] 15 | [juji/editscript "0.5.4"]]) 16 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | push: 5 | paths-ignore: 6 | - '*.md' 7 | pull_request: 8 | paths-ignore: 9 | - '*.md' 10 | 11 | jobs: 12 | 13 | test: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Git checkout 17 | uses: actions/checkout@v1 18 | 19 | - name: Prepare java 20 | uses: actions/setup-java@v3 21 | with: 22 | distribution: 'temurin' 23 | java-version: '8' 24 | 25 | - name: Install clojure tools 26 | uses: DeLaGuardo/setup-clojure@13.0 27 | with: 28 | lein: 2.9.1 29 | 30 | - uses: actions/setup-node@v4 31 | with: 32 | node-version: '20' 33 | 34 | - name: Run JVM tests 35 | run: lein test 36 | 37 | - name: Run node tests 38 | run: lein doo node once 39 | -------------------------------------------------------------------------------- /resources/drawing4.edn: -------------------------------------------------------------------------------- 1 | [{:y 13, 2 | :r 0, 3 | :color "#000000", 4 | :fill "#CCCCCC", 5 | :width 262, 6 | :type "rect", 7 | :cap "round", 8 | :borderWidth 1, 9 | :style "Solid", 10 | :x 19, 11 | :height 101} 12 | {:y 122, 13 | :family "sans-serif", 14 | :color "#0000FF", 15 | :fill {:r 256, :g 0, :b 0, :a 0.5}, 16 | :width 10, 17 | :type "textBlock", 18 | :cap "round", 19 | :borderWidth 1, 20 | :size "24px", 21 | :style "Solid", 22 | :pad 3, 23 | :weight "bold", 24 | :x 12, 25 | :height 25.19999999999999, 26 | :text "DojoX Drawing Rocks"} 27 | {:y 208, 28 | :family "sans-serif", 29 | :color "#000000", 30 | :fill "#CCCCCC", 31 | :width 200, 32 | :type "text", 33 | :cap "round", 34 | :borderWidth 1, 35 | :size "18px", 36 | :style "Solid", 37 | :pad 3, 38 | :weight "normal", 39 | :x 12, 40 | :height 25.19999999999999, 41 | :text "This is just text"}] 42 | -------------------------------------------------------------------------------- /test/editscript/util/pairing_test.cljc: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Copyright (c) Huahai Yang. All rights reserved. 3 | ;; The use and distribution terms for this software are covered by the 4 | ;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 5 | ;; which can be found in the file LICENSE at the root of this distribution. 6 | ;; By using this software in any fashion, you are agreeing to be bound by 7 | ;; the terms of this license. 8 | ;; You must not remove this notice, or any other, from this software. 9 | ;; 10 | 11 | (ns editscript.util.pairing-test 12 | (:require 13 | [editscript.util.pairing :refer [priority-map]] 14 | [clojure.test :refer [are deftest testing]])) 15 | 16 | (deftest test-priority-map 17 | (let [a (priority-map :a 1 :b 2 :c 3 :d 4 :e 5 :f 6)] 18 | (testing "Basic priority-map operations that we use in A*" 19 | (are [x y] (= x y) 20 | (empty? a) false 21 | (peek a) [:a 1] 22 | (peek (pop a)) [:b 2] 23 | (peek (assoc a :z 0)) [:z 0] 24 | (peek (conj a [:a -1])) [:a -1])))) 25 | -------------------------------------------------------------------------------- /resources/drawing2.edn: -------------------------------------------------------------------------------- 1 | [{:y 27, 2 | :r 0, 3 | :color "#000000", 4 | :fill "#CCCCCC", 5 | :width 100, 6 | :type "rect", 7 | :cap "round", 8 | :borderWidth 1, 9 | :style "Solid", 10 | :x 50, 11 | :height 100} 12 | {:y 30, 13 | :family "sans-serif", 14 | :color "#0000FF", 15 | :fill {:r 256, :g 0, :b 0, :a 0.5}, 16 | :width 10, 17 | :type "textBlock", 18 | :cap "round", 19 | :borderWidth 1, 20 | :size "24px", 21 | :style "Solid", 22 | :pad 3, 23 | :weight "bold", 24 | :x 20, 25 | :height 25.200000000000003, 26 | :text "DojoX Drawing Rocks"} 27 | {:rx 150, 28 | :color "#0000FF", 29 | :fill "#0000ff", 30 | :type "ellipse", 31 | :cap "round", 32 | :borderWidth 1, 33 | :style "Solid", 34 | :cx 150, 35 | :cy 185, 36 | :ry 100} 37 | {:color "#000000", 38 | :fill "#CCCCCC", 39 | :y1 20, 40 | :type "arrow", 41 | :cap "round", 42 | :borderWidth 3, 43 | :style "Solid", 44 | :label "My Arrow", 45 | :x1 40, 46 | :y2 120.00000000000003, 47 | :x2 -133.20508075688772} 48 | {:y 26, 49 | :family "sans-serif", 50 | :color "#000000", 51 | :fill "#CCCCCC", 52 | :width 200, 53 | :type "text", 54 | :cap "round", 55 | :borderWidth 1, 56 | :size "18px", 57 | :style "Solid", 58 | :pad 3, 59 | :weight "normal", 60 | :x 30, 61 | :height 25.200000000000003, 62 | :text "This is just text"} 63 | {:color "#000000", 64 | :style "Solid", 65 | :cap "round", 66 | :fill "#CCCCCC", 67 | :borderWidth 1, 68 | :points 69 | [{:x 70, :y 20} 70 | {:x 65, :y 15} 71 | {:x 75, :y 15} 72 | {:t "Z", :x 70, :y 20} 73 | {:t "M", :x 70, :y 40} 74 | {:x 68, :y 12} 75 | {:x 72, :y 12}], 76 | :type "path"}] 77 | -------------------------------------------------------------------------------- /resources/drawing1.edn: -------------------------------------------------------------------------------- 1 | [{:y 27, 2 | :r 0, 3 | :color "#000000", 4 | :fill "#CCCCCC", 5 | :width 100, 6 | :type "rect", 7 | :cap "round", 8 | :borderWidth 1, 9 | :style "Solid", 10 | :x 50, 11 | :height 100} 12 | {:y 30, 13 | :family "sans-serif", 14 | :color "#0000FF", 15 | :fill {:r 256, :g 0, :b 0, :a 0.5}, 16 | :width 10, 17 | :type "textBlock", 18 | :cap "round", 19 | :borderWidth 1, 20 | :size "24px", 21 | :style "Solid", 22 | :pad 3, 23 | :weight "bold", 24 | :x 20, 25 | :height 25.200000000000003, 26 | :text "DojoX Drawing Rocks"} 27 | {:rx 150, 28 | :color "#0000FF", 29 | :fill "#ffff00", 30 | :type "ellipse", 31 | :cap "round", 32 | :borderWidth 1, 33 | :style "Solid", 34 | :cx 150, 35 | :cy 185, 36 | :ry 100} 37 | {:color "#000000", 38 | :fill "#CCCCCC", 39 | :y1 20, 40 | :type "arrow", 41 | :cap "round", 42 | :borderWidth 3, 43 | :style "Solid", 44 | :label "My Arrow", 45 | :x1 40, 46 | :y2 120.00000000000003, 47 | :x2 -133.20508075688772} 48 | {:y 26, 49 | :family "sans-serif", 50 | :color "#000000", 51 | :fill "#CCCCCC", 52 | :width 200, 53 | :type "text", 54 | :cap "round", 55 | :borderWidth 1, 56 | :size "18px", 57 | :style "Solid", 58 | :pad 3, 59 | :weight "normal", 60 | :x 30, 61 | :height 25.200000000000003, 62 | :text "This is just text"} 63 | {:color "#000000", 64 | :style "Solid", 65 | :cap "round", 66 | :fill "#CCCCCC", 67 | :borderWidth 1, 68 | :points 69 | [{:x 70, :y 20} 70 | {:x 65, :y 15} 71 | {:x 75, :y 15} 72 | {:t "Z", :x 70, :y 20} 73 | {:t "M", :x 70, :y 40} 74 | {:x 68, :y 12} 75 | {:x 72, :y 12}], 76 | :type "path"}] 77 | -------------------------------------------------------------------------------- /resources/drawing3.edn: -------------------------------------------------------------------------------- 1 | [{:y 27, 2 | :r 0, 3 | :color "#000000", 4 | :fill "#CCCCCC", 5 | :width 100, 6 | :type "rect", 7 | :cap "round", 8 | :borderWidth 1, 9 | :style "Solid", 10 | :x 50, 11 | :height 100} 12 | {:y 30, 13 | :family "sans-serif", 14 | :color "#0000FF", 15 | :fill {:r 256, :g 0, :b 0, :a 0.5}, 16 | :width 10, 17 | :type "textBlock", 18 | :cap "round", 19 | :borderWidth 1, 20 | :size "24px", 21 | :style "Solid", 22 | :pad 3, 23 | :weight "bold", 24 | :x 20, 25 | :height 25.200000000000003, 26 | :text "DojoX Drawing Rocks"} 27 | {:rx 69.5, 28 | :color "#0000FF", 29 | :fill "#0000ff", 30 | :type "ellipse", 31 | :cap "round", 32 | :borderWidth 1, 33 | :style "Solid", 34 | :cx 230.5, 35 | :cy 228, 36 | :ry 57} 37 | {:color "#000000", 38 | :fill "#CCCCCC", 39 | :y1 20, 40 | :type "arrow", 41 | :cap "round", 42 | :borderWidth 3, 43 | :style "Solid", 44 | :label "My Arrow", 45 | :x1 40, 46 | :y2 120.00000000000003, 47 | :x2 -133.20508075688772} 48 | {:y 26, 49 | :family "sans-serif", 50 | :color "#000000", 51 | :fill "#CCCCCC", 52 | :width 200, 53 | :type "text", 54 | :cap "round", 55 | :borderWidth 1, 56 | :size "18px", 57 | :style "Solid", 58 | :pad 3, 59 | :weight "normal", 60 | :x 30, 61 | :height 25.200000000000003, 62 | :text "This is just text"} 63 | {:color "#000000", 64 | :style "Solid", 65 | :cap "round", 66 | :fill "#CCCCCC", 67 | :borderWidth 1, 68 | :points 69 | [{:x 70, :y 20} 70 | {:x 65, :y 15} 71 | {:x 75, :y 15} 72 | {:t "Z", :x 70, :y 20} 73 | {:t "M", :x 70, :y 40} 74 | {:x 68, :y 12} 75 | {:x 72, :y 12}], 76 | :type "path"}] 77 | -------------------------------------------------------------------------------- /test/editscript/edit_test.cljc: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Copyright (c) Huahai Yang. All rights reserved. 3 | ;; The use and distribution terms for this software are covered by the 4 | ;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 5 | ;; which can be found in the file LICENSE at the root of this distribution. 6 | ;; By using this software in any fashion, you are agreeing to be bound by 7 | ;; the terms of this license. 8 | ;; You must not remove this notice, or any other, from this software. 9 | ;; 10 | 11 | (ns editscript.edit-test 12 | (:require [editscript.edit :as e] 13 | [editscript.core :as c] 14 | #?(:clj [clojure.test :refer [is are deftest ]] 15 | :cljr [clojure.test :refer [is are deftest ]] 16 | :cljs [cljs.test :refer [is are deftest] :include-macros true]))) 17 | 18 | (deftest edits-equality-test 19 | (are [d1 d2] (= (e/get-edits d1) (e/get-edits d2)) 20 | (c/diff {} {}) 21 | (c/diff {} {}) 22 | 23 | (c/diff 1 2) 24 | (c/diff 3 2))) 25 | 26 | (deftest valid-edits-test 27 | (are [edits] (e/valid-edits? edits) 28 | [] 29 | [[[0] :-]] 30 | [[[1 2] :r 32]] 31 | [[[:b 2] :+ '()] 32 | [[:a] :-]] 33 | [[[] :s [32 [:- 10] 2 [:r "ab"] 11 [:+ "old"]]]])) 34 | 35 | (deftest invalid-edits-test 36 | (are [edits] (not (e/valid-edits? edits)) 37 | nil 38 | '() 39 | [1] 40 | [0 1] 41 | ['()] 42 | [[]] 43 | [[1 2 3]] 44 | [[[1] :+ 3 4]] 45 | [[[1] :- 3]] 46 | [[[1] :r]] 47 | [[[1] :s [1 [:- "ab"] [:+ "cd"]]]] 48 | [[[1] :s [[:r 10] 2]]])) 49 | 50 | (deftest sizing-test 51 | (are [diff size] (= size (e/get-size diff)) 52 | (e/edits->script []) 1 53 | (c/diff [:a :b] [:a]) 5)) 54 | 55 | (deftest edits->script-test 56 | (are [a b edits] (= b (c/patch a (e/edits->script edits))) 57 | ["abc" 24 22 {:a [1 2 3]} 1 3 #{1 2}] 58 | [24 23 {:a [2 3]} 1 3 #{1 2 3}] 59 | [[[0] :-] 60 | [[1] :r 23] 61 | [[2 :a 0] :-] 62 | [[5 3] :+ 3]] 63 | 64 | {} 65 | {:x :hello-world} 66 | [[[] :r {:x :hello-world}]])) 67 | 68 | (deftest combine-test 69 | (let [a ["abc" 24 22 {:a [1 2 3]} 1 3 #{1 2}] 70 | b [24 23 {:a [2 3]} 1 3 #{1 2 3}] 71 | c [24 {:a [1 2 3]} 1 #{1 2}] 72 | d-ab (c/diff a b) 73 | d-bc (c/diff b c) 74 | d-ac (c/diff a c) 75 | comb-d (e/combine d-ab d-bc)] 76 | (is (= c (c/patch a comb-d))) 77 | (is (= c (c/patch a d-ac))) 78 | (is (= c (c/patch a (e/edits->script 79 | (into (e/get-edits d-ab) (e/get-edits d-bc)))))))) 80 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject juji/editscript "0.6.6" 2 | :description "A diff library for Clojure/ClojureScript data structures" 3 | :url "https://github.com/juji-io/editscript" 4 | :lein-release {:deploy-via :clojars} 5 | :deploy-repositories [["clojars" {:url "https://repo.clojars.org" 6 | :username :env/clojars_username 7 | :password :env/clojars_password 8 | :sign-releases false}]] 9 | :license {:name "Eclipse Public License" 10 | :url "http://www.eclipse.org/legal/epl-v10.html"} 11 | :dependencies [[org.clojure/clojure "1.12.0"]] 12 | :plugins [[lein-cljsbuild "1.1.7"] 13 | [lein-doo "0.1.10"]] 14 | :doo {:build "node" 15 | :paths {:karma "./node_modules/karma/bin/karma"} 16 | :karma {:config {"browserDisconnectTimeout" 30000 17 | "browserNoActivityTimeout" 90000}}} 18 | :clean-targets ^{:protect false} [:target-path "out" "target"] 19 | :jar-exclusions [#"\.edn"] 20 | :cljsbuild {:builds 21 | {:dev 22 | {:source-paths ["src" "test" "dev"] 23 | :compiler {:output-to "target/editscript.js" 24 | :output-dir "target" 25 | :optimizations :none 26 | :source-map true 27 | :cache-analysis true 28 | :checked-arrays :warn 29 | :parallel-build true}} 30 | :node 31 | {:source-paths ["src" "test"] 32 | :compiler {:output-to "out/node/editscript.js" 33 | :output-dir "out/node" 34 | :optimizations :advanced 35 | :main "editscript.test" 36 | :source-map "out/node/editscript.js.map" 37 | :target :nodejs 38 | :cache-analysis true 39 | :checked-arrays :warn 40 | :parallel-build true}} 41 | :browser 42 | {:source-paths ["src" "test"] 43 | :compiler {:output-to "out/browser/editscript.js" 44 | :output-dir "out/browser" 45 | :optimizations :advanced 46 | :main "editscript.test" 47 | :source-map "out/browser/editscript.js.map" 48 | :cache-analysis true 49 | :checked-arrays :warn 50 | :parallel-build true}}}} 51 | :profiles {:deploy 52 | {:aot [#"editscript\.*"] 53 | :jvm-opts ["-Dclojure.compiler.direct-linking=true"] } 54 | :dev 55 | {:dependencies [[org.clojure/clojurescript "1.11.132" 56 | :exclusions [org.clojure/core.rrb-vector]] 57 | ;;see https://github.com/emezeske/lein-cljsbuild/issues/469 58 | [quantum/org.clojure.core.rrb-vector "0.0.12"] 59 | [criterium "0.4.6"] 60 | [doo "0.1.11"] 61 | [org.clojure/test.check "1.1.1"] 62 | ;; [cider/piggieback "0.5.2"] 63 | ] 64 | :source-paths ["src" "test" "dev"] 65 | ;; :repl-options {:nrepl-middleware [cider.piggieback/wrap-cljs-repl]} 66 | }}) 67 | -------------------------------------------------------------------------------- /src/editscript/diff/quick.cljc: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Copyright (c) Huahai Yang. All rights reserved. 3 | ;; The use and distribution terms for this software are covered by the 4 | ;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 5 | ;; which can be found in the file LICENSE at the root of this distribution. 6 | ;; By using this software in any fashion, you are agreeing to be bound by 7 | ;; the terms of this license. 8 | ;; You must not remove this notice, or any other, from this software. 9 | ;; 10 | 11 | (ns ^:no-doc editscript.diff.quick 12 | (:require [clojure.set :as set] 13 | [editscript.edit :as e] 14 | [editscript.util.common :as c 15 | #?@(:cljs [:include-macros true])])) 16 | 17 | #?(:clj (set! *warn-on-reflection* true)) 18 | #?(:clj (set! *unchecked-math* :warn-on-boxed)) 19 | #?(:cljr (set! *warn-on-reflection* true)) 20 | 21 | (declare diff*) 22 | 23 | (defn- diff-map 24 | [script path a b opts] 25 | (reduce-kv 26 | (fn [_ ka va] 27 | (let [path' (conj path ka)] 28 | (if (contains? b ka) 29 | (diff* script path' va (get b ka) opts) 30 | (diff* script path' va (e/nada) opts)))) 31 | nil 32 | a) 33 | (reduce-kv 34 | (fn [_ kb vb] 35 | (when-not (contains? a kb) 36 | (diff* script (conj path kb) (e/nada) vb opts))) 37 | nil 38 | b)) 39 | 40 | (defn- diff-vec 41 | "Adjust the indices to have a correct editscript" 42 | [script path a b opts] 43 | (let [edits (c/vec-edits a b opts)] 44 | (if (= edits :timeout) 45 | (e/replace-data script path b) 46 | (reduce 47 | (fn [[^long ia ^long ia' ^long ib] op] 48 | (case op 49 | :- (do (diff* script (conj path ia') (get a ia) (e/nada) opts) 50 | [(inc ia) ia' ib]) 51 | :+ (do (diff* script (conj path ia') (e/nada) (get b ib) opts) 52 | [ia (inc ia') (inc ib)]) 53 | :r (do (diff* script (conj path ia') (get a ia) (get b ib) opts) 54 | [(inc ia) (inc ia') (inc ib)]) 55 | [(+ ia ^long op) (+ ia' ^long op) (+ ib ^long op)])) 56 | (transient [0 0 0]) 57 | edits)))) 58 | 59 | (defn- diff-set 60 | [script path a b opts] 61 | (doseq [va (set/difference a b)] 62 | (diff* script (conj path va) va (e/nada) opts)) 63 | (doseq [vb (set/difference b a)] 64 | (diff* script (conj path vb) (e/nada) vb opts))) 65 | 66 | (defn- diff-lst 67 | [script path a b opts] 68 | (diff-vec script path (vec a) (vec b) opts)) 69 | 70 | (defn- diff-val 71 | [script path _ b] 72 | (if (= (e/get-type b) :nil) 73 | (e/delete-data script path) 74 | (e/replace-data script path b))) 75 | 76 | (defn diff* 77 | [script path a b {:keys [str-diff] 78 | :or {str-diff :none} 79 | :as opts}] 80 | (when-not (= a b) 81 | (case (e/get-type a) 82 | :nil (e/add-data script path b) 83 | :map (c/coll-case a b script path :map #'diff-map opts) 84 | :vec (c/coll-case a b script path :vec #'diff-vec opts) 85 | :set (c/coll-case a b script path :set #'diff-set opts) 86 | :lst (c/coll-case a b script path :lst #'diff-lst opts) 87 | :str (if (= str-diff :none) 88 | (diff-val script path a b) 89 | (c/coll-case a b script path :str 90 | #'editscript.util.common/diff-str opts)) 91 | :val (diff-val script path a b)))) 92 | 93 | (defn diff 94 | "Create an EditScript that represents the difference between `b` and `a` 95 | This algorithm is fast, but it does not attempt to generate an EditScript 96 | that is minimal in size" 97 | ([a b] 98 | (diff a b nil)) 99 | ([a b opts] 100 | (let [script (e/edits->script [])] 101 | (diff* script [] a b opts) 102 | script))) 103 | -------------------------------------------------------------------------------- /src/editscript/patch.cljc: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Copyright (c) Huahai Yang. All rights reserved. 3 | ;; The use and distribution terms for this software are covered by the 4 | ;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 5 | ;; which can be found in the file LICENSE at the root of this distribution. 6 | ;; By using this software in any fashion, you are agreeing to be bound by 7 | ;; the terms of this license. 8 | ;; You must not remove this notice, or any other, from this software. 9 | ;; 10 | 11 | (ns ^:no-doc editscript.patch 12 | (:require [clojure.set :as set] 13 | [editscript.edit :as e] 14 | [editscript.util.common :as c] 15 | [clojure.string :as s])) 16 | 17 | #?(:clj (set! *warn-on-reflection* true)) 18 | #?(:cljr (set! *warn-on-reflection* true)) 19 | #?(:clj (set! *unchecked-math* :warn-on-boxed)) 20 | 21 | (defn vget 22 | [x p] 23 | (case (e/get-type x) 24 | (:map :vec :set) (get x p) 25 | :lst (nth x p))) 26 | 27 | (defn- vdelete 28 | [x p] 29 | (case (e/get-type x) 30 | ;;NB, there is a special case where dissoc has no effect: 31 | ;;if p is ##NaN, then p cannot be found in x, for (= ##NaN ##NaN) is false! 32 | :map (dissoc x p) 33 | :vec (into (subvec x 0 p) (subvec x (inc ^long p))) 34 | :set (set/difference x #{p}) 35 | :lst (->> (split-at p x) 36 | (#(concat (nth % 0) (next (nth % 1)))) 37 | (apply list)))) 38 | 39 | (defn- vadd 40 | [x p v] 41 | (case (e/get-type x) 42 | :map (assoc x p v) 43 | :vec (into (conj (subvec x 0 p) v) (subvec x p)) 44 | :set (conj x v) 45 | :lst (->> (split-at p x) 46 | (#(concat (nth % 0) (conj (nth % 1) v))) 47 | (apply list)))) 48 | 49 | (defn- sreplace 50 | [x edits level] 51 | (let [x (c/transform-str x level) 52 | sf (if (= level :character) subs subvec) 53 | i (volatile! 0) 54 | ss (persistent! 55 | (reduce 56 | (fn [ss e] 57 | (cond 58 | (integer? e) (let [s (sf x @i (+ ^long @i ^long e))] 59 | (vswap! i (partial + e)) 60 | (conj! ss s)) 61 | (= (nth e 0) :-) (do (vswap! i (partial + (nth e 1))) ss) 62 | (= (nth e 0) :r) (let [s (nth e 1)] 63 | (vswap! i (partial + (count s))) 64 | (conj! ss s)) 65 | (= (nth e 0) :+) (conj! ss (nth e 1)))) 66 | (transient []) 67 | edits))] 68 | (case level 69 | :character (apply str ss) 70 | :word (s/join " " (flatten ss)) 71 | :line (s/join "\n" (flatten ss))))) 72 | 73 | (defn- vreplace 74 | [x p v] 75 | (case (e/get-type x) 76 | :map (assoc x p v) 77 | :vec (into (conj (subvec x 0 p) v) (subvec x (inc ^long p))) 78 | :set (-> x (set/difference #{p}) (conj v)) 79 | :lst (->> (split-at p x) 80 | (#(concat (nth % 0) (conj (rest (nth % 1)) v))) 81 | (apply list)))) 82 | 83 | (defn- valter 84 | [x p o v] 85 | (case o 86 | :- (vdelete x p) 87 | :+ (vadd x p v) 88 | :r (vreplace x p v) 89 | :s (vreplace x p (sreplace (vget x p) v :character)) 90 | :sw (vreplace x p (sreplace (vget x p) v :word)) 91 | :sl (vreplace x p (sreplace (vget x p) v :line)))) 92 | 93 | (defn patch* 94 | [old [path op value]] 95 | (letfn [(up [x p o v] 96 | (let [[f & r] p] 97 | (if r 98 | (valter x f :r (up (vget x f) r o v)) 99 | (if (seq p) 100 | (valter x f o v) 101 | (case o 102 | :s (sreplace x v :character) 103 | :sw (sreplace x v :word) 104 | :sl (sreplace x v :line) 105 | v)))))] 106 | (up old path op value))) 107 | -------------------------------------------------------------------------------- /src/editscript/util/index.cljc: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Copyright (c) Huahai Yang. All rights reserved. 3 | ;; The use and distribution terms for this software are covered by the 4 | ;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 5 | ;; which can be found in the file LICENSE at the root of this distribution. 6 | ;; By using this software in any fashion, you are agreeing to be bound by 7 | ;; the terms of this license. 8 | ;; You must not remove this notice, or any other, from this software. 9 | ;; 10 | 11 | (ns editscript.util.index 12 | (:require [editscript.edit :as e] 13 | #?(:cljs [goog.math.Long :refer [getMaxValue]])) 14 | #?(:clj (:import [clojure.lang PersistentVector] 15 | [java.io Writer]) 16 | :cljr (:import [clojure.lang PersistentVector])) ) 17 | 18 | ;; indexing 19 | 20 | (defprotocol INode 21 | (get-path [this] "Get the path to the node from root") 22 | (get-value [this] "Get the actual data") 23 | (get-children [this] "Get all children node in a map") 24 | (add-child [this node] "Add a child node") 25 | (get-key [this] "Get the key of this node") 26 | (get-parent [this] "Get the parent node") 27 | (get-first [this] "Get the first child node") 28 | (get-last [this] "Get the last child node") 29 | (get-next [this] "Get the next sibling node") 30 | (set-next [this node] "Set the next sibling node") 31 | (set-order [this o] "Set the traversal order of this node") 32 | (get-order [this] "Get the order of this node in traversal") 33 | (get-size [this] "Get the size of sub-tree, used to estimate cost") 34 | (set-size [this s] "Set the size of sub-tree")) 35 | 36 | (deftype Node [^PersistentVector path 37 | value 38 | parent 39 | ^:unsynchronized-mutable children 40 | ^:unsynchronized-mutable first 41 | ^:unsynchronized-mutable last 42 | ^:unsynchronized-mutable next 43 | ^:unsynchronized-mutable index 44 | ^:unsynchronized-mutable ^long order 45 | ^:unsynchronized-mutable ^long size] 46 | INode 47 | (get-path [_] path) 48 | (get-key [this] (-> this get-path peek)) 49 | (get-value [_] value) 50 | (get-parent [_] parent) 51 | (get-children [_] children) 52 | (get-first [_] first) 53 | (get-last [_] last) 54 | (get-next [_] next) 55 | (set-next [_ n] (set! next n)) 56 | (get-order [_] order) 57 | (set-order [this o] (set! order (long o)) this) 58 | (get-size [_] size) 59 | (set-size [this s] (set! size (long s)) this) 60 | (add-child [_ node] 61 | (set! children (assoc children (get-key node) node)) 62 | (when last (set-next last node)) 63 | (when-not first (set! first node)) 64 | (set! last node) 65 | node)) 66 | 67 | #?(:clj 68 | (defmethod print-method Node 69 | [x ^Writer writer] 70 | (print-method {:value (get-value x) 71 | :order (get-order x) 72 | :children (get-children x)} 73 | writer))) 74 | 75 | (declare index*) 76 | 77 | (defn- associative-children 78 | "map and vector are associative" 79 | [order path data parent] 80 | (reduce-kv 81 | (fn [_ k v] 82 | (index* order (conj path k) v parent)) 83 | nil 84 | data)) 85 | 86 | (defn- set-children 87 | "set is a map of keys to themselves" 88 | [order path data parent] 89 | (doseq [x data] 90 | (index* order (conj path x) x parent))) 91 | 92 | (defn- list-children 93 | "add index as key" 94 | [order path data parent] 95 | (reduce 96 | (fn [i x] 97 | (index* order (conj path i) x parent) 98 | (inc ^long i)) 99 | 0 100 | data)) 101 | 102 | (defn- inc-order 103 | "order value reflects the size of elements" 104 | [order ^long size] 105 | (vswap! order (fn [o] (+ size ^long o)))) 106 | 107 | (defn- index-collection 108 | [type order path data parent] 109 | (let [node (->Node path data parent {} nil nil nil 0 0 1)] 110 | (add-child parent node) 111 | (case type 112 | (:map :vec) (associative-children order path data node) 113 | :set (set-children order path data node) 114 | :lst (list-children order path data node)) 115 | (let [^long cs (->> (get-children node) vals (map get-size) (reduce +)) 116 | size (+ (long (get-size node)) cs)] 117 | (doto node 118 | (set-order @order) 119 | (set-size size)) 120 | (inc-order order size)) 121 | node)) 122 | 123 | (defn- index-value 124 | [order path data parent] 125 | (let [node (->Node path data parent nil nil nil nil 0 @order 1)] 126 | (add-child parent node) 127 | (inc-order order 1) 128 | node)) 129 | 130 | (defn- index* 131 | [order path data parent] 132 | (let [type (e/get-type data)] 133 | (if (or (= type :val) (= type :str)) 134 | (index-value order path data parent) 135 | (index-collection type order path data parent)))) 136 | 137 | (defn index 138 | "Traverse data to build an indexing tree of Nodes, 139 | compute path, sizes of sub-trees, siblings, etc. for each Node. 140 | This takes little time" 141 | [data] 142 | (let [order (volatile! 0)] 143 | (index* order [] data (->Node [] ::dummy nil {} nil nil nil 0 -1 0)))) 144 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## [0.6.6] - 2025-01-24 4 | ### Added 5 | - `data-nodes` to return the number of nodes of a piece of data. 6 | - `change-ratio` to return an approximation of an editscript's ratio of change 7 | in term of the original data. 8 | 9 | ## [0.6.4] - 2024-09-24 10 | ### Improved 11 | - Fix boxed math warning. [thx @tonsky] 12 | - Update deps 13 | 14 | ## [0.6.3] - 2023-03-14 15 | ### Improved 16 | - Simplify equality test for A* algorithm, increasing diff speed up to 60% for 17 | some data sets. 18 | 19 | ## [0.6.2] - 2022-08-25 20 | ### Fixed 21 | - A couple of typos in code and documentation. 22 | 23 | ## [0.6.1] - 2022-08-24 24 | ### Changed 25 | - **Breaking** Change `:str-diff?` option for `diff` to `:str-diff`, whose value 26 | could be `:none` (default, no diff inside string), `:line` (diff by line), 27 | `:word` (diff by word) or `:character` (diff by character, original string 28 | diff, very expensive). 29 | ### Added 30 | - `:vec-timeout` option for `diff`, to specify a timeout in milliseconds 31 | (default 1000), for it is sometimes too expensive to diff vectors. It is 32 | O(n^2), after all. When timed-out, a replacement will be used. 33 | - `:str-change-limit` option for `diff`, a less than `1.0` and greater than 34 | `0.0` double, representing percentage (default `0.2`). Only diff string when 35 | less than given percentage is changed, otherwise replace the string. Because 36 | string diff is expensive, it may be cheaper to replace the whole string. 37 | ### Improved 38 | - Speed up string diff by coercing into vector first. 39 | - Bump Clojure version 40 | 41 | ## [0.5.8] - 2021-12-30 42 | ### Improved 43 | - Remove a warning in certain cljs environment, and it's an error in latest cljs 44 | [Thanks @Outrovurt] 45 | 46 | ## [0.5.7] - 2021-03-03 47 | ### Fixed 48 | - Correct conversion of edits to editscript for string diffs 49 | ### Improved 50 | - Consolidate all public functions in core 51 | 52 | ## [0.5.6] - 2021-03-02 53 | ### Improved 54 | - Better handling of MapEntry [Thanks @lnostdal] 55 | 56 | ## [0.5.5] - 2021-03-01 57 | ### Fixed 58 | - handle MapEntry [#18] 59 | 60 | ## [0.5.4] - 2020-12-29 61 | ### Improved 62 | - Enhanced A\* diff algorithm speed for cases of increased data size 63 | 64 | ## [0.5.3] - 2020-12-28 65 | ### Improved 66 | - Sligtly better heuristic for A\* diff algorithm 67 | 68 | ## [0.5.2] - 2020-12-24 69 | ### Fixed 70 | - consistent use of keywords 71 | - correct `patch` with string diff inside 72 | 73 | ## [0.5.1] - 2020-12-22 74 | ### Fixed 75 | - require both algorithms in core 76 | 77 | ## [0.5.0] - 2020-12-22 78 | ### Added 79 | - `:str-diff? `option to determine if strings need to be diffed, if so, `:s` 80 | operator is used to represents the diff of two strings 81 | ### Improved 82 | - Better heuristic for A\* diff algorithm, more than 2X speed improvement for some data sets 83 | 84 | ## [0.4.6] - 2020-08-09 85 | ### Changed 86 | - Documentation improvement and dependency bump 87 | 88 | ## [0.4.5] - 2020-08-09 89 | ### Fixed 90 | - Fix suboptimality for vectors and lists too 91 | 92 | ## [0.4.4] - 2020-08-08 93 | ### Fixed 94 | - Fix A\* optimality for special cases of smaller `a` 95 | 96 | ## [0.4.3] - 2020-04-29 97 | ### Changed 98 | - Change A\* algorithm equality handling to improve speed for very small diffs 99 | 100 | ## [0.4.2] - 2019-09-24 101 | ### Changed 102 | - Change equality handling to accommodate older versions of Clojure (1.9.0 and older) 103 | 104 | ## [0.4.1] - 2019-09-20 105 | ### Fixed 106 | - Relax `valid-edits?` to accept more valid edits 107 | 108 | ## [0.4.0] - 2019-07-15 109 | ### Added 110 | - `edits->script` function to convert a vector of edits to an EditScript 111 | - `valid-edits?` function to validate the edits vector 112 | - link to cljdoc documentation 113 | ### Changed 114 | - Instead of using cost, use a more accurate size for `get-size` of an EditScript 115 | 116 | ## [0.3.3] - 2019-04-04 117 | ### Changed 118 | - Minor dependency bump 119 | 120 | ## [0.3.2] - 2018-05-31 121 | ### Added 122 | - `combine` function to combine two EditScripts 123 | - package.json for npm publising [Andrea Richiardi] 124 | ### Fixed 125 | - Fix a cljs warning 126 | - Minor speed improvement 127 | 128 | ## [0.3.1] - 2018-05-09 129 | ### Fixed 130 | - Revert heuristic change in 0.3.0, which breaks optimality 131 | 132 | ## [0.3.0] - 2018-05-09 133 | ### Changed 134 | - cljc version 135 | - Simplify heuristic 136 | - Use defn in place of declare, see http://dev.clojure.org/jira/browse/CLJS-1871 137 | 138 | ## [0.2.4] - 2018-05-05 139 | ### Changed 140 | - Expand the use of quick algorithm in `A*` to cases where one party contains only leaves 141 | - Implements pairing heap for priority queue 142 | 143 | ### Removed 144 | - java.util.PriorityQueue and HashMap as dependency 145 | 146 | ### Fixed 147 | - Wrong test ns declaration preventing `lein test` 148 | 149 | ## [0.2.3] - 2018-05-03 150 | ### Changed 151 | - `A*` uses quick algorithm for all leaves list/vector comparison 152 | - Quick algorithm aggressively converts replacement 153 | 154 | ## [0.2.2] - 2018-05-02 155 | ### Changed 156 | - `A*` uses global order number for heuristic 157 | 158 | ## [0.2.1] - 2018-04-30 159 | ### Changed 160 | - Developed an `A*` algorithm for diffing 161 | 162 | ### Removed 163 | - clojure.data.priority-map as a dependency 164 | 165 | ### Fixed 166 | - all tests passing 167 | 168 | ## 0.1.1 - 2018-03-04 169 | ### Added 170 | - Initial commits 171 | -------------------------------------------------------------------------------- /src/editscript/core.cljc: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Copyright (c) Huahai Yang. All rights reserved. 3 | ;; The use and distribution terms for this software are covered by the 4 | ;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 5 | ;; which can be found in the file LICENSE at the root of this distribution. 6 | ;; By using this software in any fashion, you are agreeing to be bound by 7 | ;; the terms of this license. 8 | ;; You must not remove this notice, or any other, from this software. 9 | ;; 10 | 11 | (ns editscript.core 12 | (:require [editscript.edit :as e] 13 | [editscript.patch :as p] 14 | [editscript.util.index :as i] 15 | [editscript.diff.quick :as q] 16 | [editscript.diff.a-star :as a]) 17 | #?(:clj (:import [editscript.edit EditScript] 18 | [clojure.lang MapEntry]) 19 | :cljr (:import [editscript.edit EditScript] 20 | [clojure.lang MapEntry]))) 21 | 22 | (defn diff 23 | "Create an editscript to represent the transformations needed to turn a 24 | Clojure data structure `a` into another Clojure data structure `b`. 25 | 26 | This function accepts any nested Clojure data structures. In Clojure, those 27 | implement `IPersistentVector`, `IPersistentMap`, `IPersistentList`, 28 | and `IPersistentSet` will be treated as collections. The same are true for 29 | the corresponding deftypes in Clojurescript, such as `PersistentVector`, 30 | `PersistentMap`, and so on. Anything else are treated as atomic values. 31 | 32 | The editscript is represented as a vector of basic operations: add `:+`, 33 | delete `:-`, and replace `:r`. Each operation also include a path to the 34 | location of the operation, which is similar to the path vector in `update-in`. 35 | However, editscript path works for all above four collection types, not just 36 | associative ones. For `:+` and `:r`, a new value is also required. 37 | 38 | The following options are supported in the option map of the last argument: 39 | 40 | * `:algo` chooses the diff algorithm. The value can be `:a-star` (default) or 41 | `:quick`; `:a-star` algorithm minimize the size of the resulting editscript, 42 | `:quick` algorithm is much faster, but does not producing diff with minimal size. 43 | 44 | * `:str-diff` specifies the granularity of string diffing. It may have one of 45 | the following values: 46 | - `:none` (default), do not perform string diffing, the fastest. 47 | - `:line`, diff by line. 48 | - `:word`, diff by word, 49 | - `:character`, diff by character, the slowest. 50 | 51 | * `:str-change-limit`, a less than `1.0` and greater than `0.0` double value, 52 | representing percentage (default `0.2`). Only diff string when less than given 53 | percentage is changed, otherwise replace the string. 54 | 55 | * `:vec-timeout` specifies a timeout in milliseconds (default `1000`), 56 | for diffing vectors, lists or strings, as it has O(n^2) running time. When 57 | timed-out, a replacement operation will be used." 58 | ([a b] 59 | (diff a b nil)) 60 | ([a b {:keys [algo] 61 | :or {algo :a-star} 62 | :as opts}] 63 | (if (= algo :a-star) 64 | (a/diff a b opts) 65 | (q/diff a b opts)))) 66 | 67 | (defn patch 68 | "Apply the editscript `script` on `a` to produce `b`, assuming the 69 | script is the results of running `(diff a b)`, such that 70 | `(= b (patch a (diff a b)))` is true" 71 | [a script] 72 | {:pre [(instance? editscript.edit.EditScript script)]} 73 | (reduce 74 | #(p/patch* %1 %2) 75 | a 76 | (e/get-edits script))) 77 | 78 | (def ^{:arglists '([edits]) 79 | :doc "Check if the given vector represents valid edits that can be turned 80 | into an EditScript"} 81 | valid-edits? e/valid-edits?) 82 | 83 | (def ^{:arglists '([this that]) 84 | :doc "Concate that editscript onto this editscript, return the new 85 | editscript"} 86 | combine e/combine) 87 | 88 | (def ^{:arglists '([es]) 89 | :doc "Report the size of the editscript"} 90 | get-size e/get-size) 91 | 92 | (def ^{:arglists '([es]) 93 | :doc "Report the edit distance of the editscript, i.e. number of 94 | operations"} 95 | edit-distance e/edit-distance) 96 | 97 | (def ^{:arglists '([es]) 98 | :doc "Report the edits of the editscript as a vector"} 99 | get-edits e/get-edits) 100 | 101 | (def ^{:arglists '([es]) 102 | :doc "Report the number of additions in the editscript"} 103 | get-adds-num e/get-adds-num) 104 | 105 | (def ^{:arglists '([es]) 106 | :doc "Report the number of deletes in the editscript"} 107 | get-dels-num e/get-dels-num) 108 | 109 | (def ^{:arglists '([es]) 110 | :doc "Report the edits of replacements in the editscript"} 111 | get-reps-num e/get-reps-num) 112 | 113 | (def ^{:arglists '([edits]) 114 | :doc "Create an EditScript instance from a vector of edits, like those 115 | obtained through calling `get-edits` on an EditScript"} 116 | edits->script e/edits->script) 117 | 118 | (defn data-nodes 119 | "Return the number of nodes of a piece of data." 120 | [data] 121 | (i/get-size (i/index data))) 122 | 123 | (defn- get-data 124 | [data path] 125 | (loop [[f & r] path 126 | v data] 127 | (let [c (p/vget v f)] 128 | (if r 129 | (recur r c) 130 | c)))) 131 | 132 | (defn change-ratio 133 | "Return an approximation of the ratio of changes of an editscript, a double" 134 | [origin editscript] 135 | (double 136 | (/ (reduce 137 | (fn [^long sum [path op v]] 138 | (+ sum (case op 139 | (:r :+) (data-nodes v) 140 | :s 1 141 | :- (data-nodes (get-data origin path))))) 142 | 0 (get-edits editscript)) 143 | (data-nodes origin)))) 144 | -------------------------------------------------------------------------------- /test/editscript/diff/quick_test.cljc: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Copyright (c) Huahai Yang. All rights reserved. 3 | ;; The use and distribution terms for this software are covered by the 4 | ;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 5 | ;; which can be found in the file LICENSE at the root of this distribution. 6 | ;; By using this software in any fashion, you are agreeing to be bound by 7 | ;; the terms of this license. 8 | ;; You must not remove this notice, or any other, from this software. 9 | ;; 10 | 11 | (ns editscript.diff.quick-test 12 | (:require [clojure.test :refer [is testing deftest]] 13 | [editscript.edit :refer [get-edits]] 14 | [editscript.util.common :refer [vec-edits min+plus->replace]] 15 | [editscript.diff.quick :refer [diff]] 16 | [editscript.core :refer [patch]])) 17 | 18 | (deftest vec-edits-test 19 | (testing "Wu 1990 vector edit example and more" 20 | (let [a (vec (seq "acbdeacbed")) 21 | b (vec (seq "acebdabbabed")) 22 | c [0 0] 23 | d [1 -1 -1 nil -1 1 -1 -1 -1]] 24 | (is (= (vec-edits a b nil) [2 :+ 2 :- 1 :- 1 :+ :+ :+ 2])) 25 | (is (= (vec-edits c d nil) [:+ :+ :+ :+ :+ :+ :+ :r :r]))))) 26 | 27 | (deftest min+plus->replace-test 28 | (testing "Replacement of consecutive :- :+ with :r" 29 | (is (= (min+plus->replace [:- :+ 2 3 4 :- :- :+ 3 4 :- 3 :+ 3]) 30 | [:r 2 3 4 :- :r 3 4 :- 3 :+ 3])) 31 | (is (= (min+plus->replace [:- :+ 2 3 4 :- :- :+ 3 4 :- :+ 3]) 32 | [:r 2 3 4 :- :r 3 4 :r 3])) 33 | (is (= (min+plus->replace [:- :+]) [:r])) 34 | (is (= (min+plus->replace [:- :+ 3]) [:r 3])) 35 | (is (= (min+plus->replace []) [])) 36 | (is (= (min+plus->replace [:-]) [:-])))) 37 | 38 | (deftest diff-patch-test 39 | (testing "Diffing and patching some nested data structures" 40 | (let [a {:a {:o 4} :b 'b} 41 | b {:a {:o 3} :b 'c :c 42} 42 | b-a (diff a b) 43 | a-b (diff b a) 44 | c [nil 3 'c {:a 3} 4] 45 | d [3 'c {:b 3} 4] 46 | d-c (diff c d) 47 | c-d (diff d c) 48 | e ["abc" 24 23 {:a [1 2 3]} 1 3 #{1 2}] 49 | f [24 23 {:a [2 3]} 1 3 #{1 2 3}] 50 | f-e (diff e f) 51 | e-f (diff f e) 52 | g {nil 1} 53 | h {nil 2} 54 | h-g (diff g h) 55 | g-h (diff h g) 56 | i {nil 3} 57 | j '() 58 | j-i (diff i j) 59 | i-j (diff j i) 60 | k {1 3} 61 | l {1 nil} 62 | l-k (diff k l) 63 | k-l (diff l k) 64 | m "hello world, this is our first visit to your planet. we come in peace." 65 | n "hello worldhis is our first visit to your planet. We come in peace. haha" 66 | n-m (diff m n {:str-diff :character}) 67 | m-n (diff n m {:str-diff :character})] 68 | (is (= (get-edits b-a) 69 | [[[:a :o] :r 3] 70 | [[:b] :r 'c] 71 | [[:c] :+ 42]])) 72 | (is (= (get-edits d-c) 73 | [[[0] :-] 74 | [[2 :a] :-] 75 | [[2 :b] :+ 3]])) 76 | (is (= (get-edits f-e) 77 | [[[0] :-] 78 | [[2 :a 0] :-] 79 | [[5 3] :+ 3]])) 80 | (is (= (get-edits n-m) 81 | [[[] :s [11 [:- 3] 39 [:r "W"] 16 [:+ " haha"]]]])) 82 | (is (= a (patch b a-b))) 83 | (is (= b (patch a b-a))) 84 | (is (= c (patch d c-d))) 85 | (is (= d (patch c d-c))) 86 | (is (= e (patch f e-f))) 87 | (is (= f (patch e f-e))) 88 | (is (= g (patch h g-h))) 89 | (is (= h (patch g h-g))) 90 | (is (= i (patch j i-j))) 91 | (is (= j (patch e j-i))) 92 | (is (= k (patch l k-l))) 93 | (is (= l (patch k l-k))) 94 | (is (= m (patch n m-n))) 95 | (is (= n (patch m n-m)))))) 96 | 97 | 98 | (comment 99 | 100 | (require '[criterium.core :as c]) 101 | 102 | ;; sequence diff benchmark from https://github.com/friemen/diffit 103 | 104 | (defn rand-alter 105 | [pass-prob remove-prob add-prob xs] 106 | (let [ops (vec (concat (repeat pass-prob :=) 107 | (repeat remove-prob :-) 108 | (repeat add-prob :+)))] 109 | (reduce (fn [xs x] 110 | (case (rand-nth ops) 111 | :+ (conj xs x "-") 112 | :- xs 113 | := (conj xs x))) 114 | [] 115 | xs))) 116 | 117 | (def as (vec (range 2000))) 118 | (def bs (rand-alter 80 10 10 as)) 119 | 120 | (c/bench (editscript.diff.a-star/diff as bs)) 121 | ;; ==> 122 | ;; Evaluation count : 960 in 60 samples of 16 calls. 123 | ;; Execution time mean : 65.203017 ms 124 | ;; Execution time std-deviation : 583.138552 µs 125 | ;; Execution time lower quantile : 64.500410 ms ( 2.5%) 126 | ;; Execution time upper quantile : 66.464167 ms (97.5%) 127 | ;; Overhead used : 9.792106 ns 128 | 129 | ;; Found 4 outliers in 60 samples (6.6667 %) 130 | ;; low-severe 3 (5.0000 %) 131 | ;; low-mild 1 (1.6667 %) 132 | ;; Variance from outliers : 1.6389 % Variance is slightly inflated by outliers 133 | 134 | (c/bench (vec-edits as bs nil)) 135 | ;; ==> 136 | ;; Evaluation count : 1920 in 60 samples of 32 calls. 137 | ;; Execution time mean : 32.714460 ms 138 | ;; Execution time std-deviation : 997.703094 µs 139 | ;; Execution time lower quantile : 32.008704 ms ( 2.5%) 140 | ;; Execution time upper quantile : 35.291895 ms (97.5%) 141 | ;; Overhead used : 9.788943 ns 142 | 143 | ;; Found 9 outliers in 60 samples (15.0000 %) 144 | ;; low-severe 9 (15.0000 %) 145 | ;; Variance from outliers : 17.3922 % Variance is moderately inflated by outliers 146 | 147 | (c/bench (diff as bs)) 148 | ;; ==> 149 | ;; Evaluation count : 1800 in 60 samples of 30 calls. 150 | ;; Execution time mean : 34.128722 ms 151 | ;; Execution time std-deviation : 1.284325 ms 152 | ;; Execution time lower quantile : 33.047449 ms ( 2.5%) 153 | ;; Execution time upper quantile : 37.014303 ms (97.5%) 154 | ;; Overhead used : 9.788943 ns 155 | 156 | ;; Found 3 outliers in 60 samples (5.0000 %) 157 | ;; low-severe 3 (5.0000 %) 158 | ;; Variance from outliers : 23.8507 % Variance is moderately inflated by outliers 159 | 160 | (c/bench (diffit.vec/diff as bs)) 161 | ;; ==> 162 | ;; Evaluation count : 1500 in 60 samples of 25 calls. 163 | ;; Execution time mean : 42.089736 ms 164 | ;; Execution time std-deviation : 1.517260 ms 165 | ;; Execution time lower quantile : 40.642024 ms ( 2.5%) 166 | ;; Execution time upper quantile : 45.306760 ms (97.5%) 167 | ;; Overhead used : 9.788943 ns 168 | 169 | ) 170 | -------------------------------------------------------------------------------- /bench/bench.clj: -------------------------------------------------------------------------------- 1 | (ns bench 2 | (:require [clojure.data :as clj] 3 | [clojure.data.csv :as csv] 4 | [clojure.java.io :as io] 5 | [clojure.test.check.properties :as prop] 6 | [clojure.test.check.generators :as gen] 7 | [clojure.test.check.clojure-test :as test] 8 | [clojure.math.combinatorics :as combo] 9 | [criterium.core :as c] 10 | [differ.core :as differ] 11 | [editscript.core :as editscript] 12 | [editscript.edit] 13 | [lambdaisland.deep-diff2 :as deep] 14 | [taoensso.nippy :as nippy])) 15 | 16 | (def data1 (-> "../resources/drawing1.edn" 17 | slurp 18 | read-string)) 19 | (def data2 (-> "../resources/drawing2.edn" 20 | slurp 21 | read-string)) 22 | (def data3 (-> "../resources/drawing3.edn" 23 | slurp 24 | read-string)) 25 | (def data4 (-> "../resources/drawing4.edn" 26 | slurp 27 | read-string)) 28 | 29 | ;; original data size 30 | 31 | (def size1 (count (nippy/freeze data1))) ;1004 32 | (def size2 (count (nippy/freeze data2))) ;1004 33 | (def size3 (count (nippy/freeze data3))) ;1016 34 | (def size4 (count (nippy/freeze data4))) ;555 35 | 36 | ;; diff function candidates 37 | (def diffs {"Editscript A*" #'editscript/diff 38 | "Editscript Quick" #(editscript/diff %1 %2 {:algo :quick}) 39 | "differ" #'differ/diff 40 | "clojure.data" #'clj/diff 41 | "deep-diff2" #'deep/diff}) 42 | 43 | (def ids (combo/permuted-combinations [1 2 3 4] 2)) 44 | (def datas (combo/permuted-combinations [data1 data2 data3 data4] 2)) 45 | 46 | ;; write diff sizes in file diff-sizes.csv 47 | 48 | (defn diff-size 49 | [k [d1 d2]] 50 | (count (nippy/fast-freeze (let [d ((diffs k) d1 d2)] 51 | (if (instance? editscript.edit.EditScript d) 52 | (editscript.edit/get-edits d) 53 | d))))) 54 | 55 | (let [res (conj (map (fn [[i j] ds] 56 | (conj (for [k (keys diffs)] (diff-size k ds)) 57 | (str "diff" i "-" j))) 58 | ids 59 | datas) 60 | (conj (keys diffs) "Data Set"))] 61 | (with-open [writer (io/writer "diff-sizes.csv")] 62 | (csv/write-csv writer res))) 63 | 64 | ;; write diff times in file diff-time.csv 65 | 66 | (defn diff-time 67 | [k [d1 d2]] 68 | (-> (c/quick-benchmark ((diffs k) d1 d2) {}) 69 | :mean 70 | first 71 | ((partial * 1000000)) 72 | (double) 73 | (Math/round))) 74 | 75 | (let [res (conj (map (fn [[i j] ds] 76 | (conj (for [k (keys diffs)] (diff-time k ds)) 77 | (str "diff" i "-" j))) 78 | ids 79 | datas) 80 | (conj (keys diffs) "Data Set"))] 81 | (with-open [writer (io/writer "diff-time.csv")] 82 | (csv/write-csv writer res))) 83 | 84 | 85 | ;; round trip 86 | 87 | (def patches {"Editscript A*" #'editscript/patch 88 | "Editscript Quick" #'editscript/patch 89 | "differ" #'differ/patch}) 90 | 91 | (defn roundtrip-time 92 | [k [d1 d2]] 93 | (let [diff (diffs k) 94 | patch (patches k) 95 | correct? (= d2 (patch d1 (diff d1 d2)))] 96 | (if correct? 97 | (-> (c/quick-benchmark (patch d1 (diff d1 d2)) {}) 98 | :mean 99 | first 100 | ((partial * 1000000)) 101 | (double) 102 | (Math/round)) 103 | "Round trip failed!"))) 104 | 105 | (let [res (conj (map (fn [[i j] ds] 106 | (conj (for [k (keys patches)] (roundtrip-time k ds)) 107 | (str "diff" i "-" j))) 108 | ids 109 | datas) 110 | (conj (keys patches) "Data Set"))] 111 | (with-open [writer (io/writer "roundtrip-time.csv")] 112 | (csv/write-csv writer res))) 113 | 114 | ;; property based testing 115 | 116 | (def compound (fn [inner-gen] 117 | (gen/one-of [(gen/list inner-gen) 118 | (gen/vector inner-gen) 119 | (gen/set inner-gen) 120 | (gen/map inner-gen inner-gen)]))) 121 | 122 | (def compound-wo-set (fn [inner-gen] 123 | (gen/one-of [(gen/list inner-gen) 124 | (gen/vector inner-gen) 125 | (gen/map inner-gen inner-gen)]))) 126 | 127 | (def compound-vec-map-only (fn [inner-gen] 128 | (gen/one-of [(gen/vector inner-gen) 129 | (gen/map inner-gen inner-gen)]))) 130 | 131 | (def scalars (gen/frequency [[19 (gen/one-of [gen/int 132 | gen/string])] 133 | [1 (gen/return nil)]])) 134 | 135 | (test/defspec differ-roundtrip-generative-test 136 | 2000 137 | (prop/for-all [a (gen/recursive-gen compound scalars) 138 | b (gen/recursive-gen compound scalars)] 139 | (let [d (differ/diff a b)] 140 | (= b (differ/patch a d))))) 141 | ;;==> fail 142 | 143 | (test/defspec differ-roundtrip-wo-set-generative-test 144 | 2000 145 | (prop/for-all [a (gen/recursive-gen compound-wo-set scalars) 146 | b (gen/recursive-gen compound-wo-set scalars)] 147 | (let [d (differ/diff a b)] 148 | (= b (differ/patch a d))))) 149 | ;;==> fail 150 | 151 | (test/defspec differ-roundtrip-vec-map-only-generative-test 152 | 2000 153 | (prop/for-all [a (gen/recursive-gen compound-vec-map-only scalars) 154 | b (gen/recursive-gen compound-vec-map-only scalars)] 155 | (let [d (differ/diff a b)] 156 | (= b (differ/patch a d))))) 157 | ;;==> fail 158 | 159 | (test/defspec editscript-quick-roundtrip-generative-test 160 | 2000 161 | (prop/for-all [a (gen/recursive-gen compound scalars) 162 | b (gen/recursive-gen compound scalars)] 163 | (let [d (editscript/diff a b {:algo :quick})] 164 | (= b (editscript/patch a d))))) 165 | ;;==> success 166 | 167 | (test/defspec editscript-a*-roundtrip-generative-test 168 | 2000 169 | (prop/for-all [a (gen/recursive-gen compound scalars) 170 | b (gen/recursive-gen compound scalars)] 171 | (let [d (editscript/diff a b)] 172 | (= b (editscript/patch a d))))) 173 | ;;==> success 174 | -------------------------------------------------------------------------------- /src/editscript/util/pairing.cljc: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Copyright (c) Huahai Yang. All rights reserved. 3 | ;; The use and distribution terms for this software are covered by the 4 | ;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 5 | ;; which can be found in the file LICENSE at the root of this distribution. 6 | ;; By using this software in any fashion, you are agreeing to be bound by 7 | ;; the terms of this license. 8 | ;; You must not remove this notice, or any other, from this software. 9 | ;; 10 | 11 | (ns ^:no-doc editscript.util.pairing 12 | #?(:clj 13 | (:import [clojure.lang IPersistentStack IPersistentMap IPersistentCollection] 14 | [java.io Writer]) 15 | :cljr 16 | (:import [clojure.lang IPersistentStack IPersistentMap IPersistentCollection] 17 | ))) 18 | 19 | #?(:clj (set! *warn-on-reflection* true)) 20 | 21 | (defprotocol IHeapNode 22 | (get-left [this] "Get the left child node") 23 | (get-right [this] "Get the right sibling node") 24 | (set-right [this right] "Set the right sibling") 25 | (add-child [this node] "Add a child to a node")) 26 | 27 | (deftype HeapNode [item 28 | priority 29 | ^:unsynchronized-mutable left 30 | ^:unsynchronized-mutable right] 31 | IHeapNode 32 | (get-left [_] left) 33 | (get-right [_] right) 34 | (set-right [_ r] (set! right r)) 35 | (add-child [this node] 36 | (when left (set-right node left)) 37 | (set! left node) 38 | this)) 39 | 40 | #?(:clj (defmethod print-method HeapNode 41 | [x ^Writer writer] 42 | (print-method {:item (.-item ^HeapNode x) 43 | :priority (.-priority ^HeapNode x) 44 | :left (get-left x) 45 | :right (get-right x)} 46 | writer))) 47 | 48 | (defn merge-nodes 49 | [^HeapNode a ^HeapNode b] 50 | (cond 51 | (nil? a) b 52 | (nil? b) a 53 | (< (.-priority a) (.-priority b)) (add-child a b) 54 | :else (add-child b a))) 55 | 56 | (defn insert 57 | [^HeapNode node item priority] 58 | (merge-nodes node (->HeapNode item priority nil nil))) 59 | 60 | (defn two-pass 61 | [^HeapNode node] 62 | (if (or (nil? node) (nil? (get-right node))) 63 | node 64 | (let [a node 65 | b (get-right node) 66 | n (get-right b)] 67 | (set-right a nil) 68 | (set-right b nil) 69 | (merge-nodes (merge-nodes a b) (two-pass n))))) 70 | 71 | #?(:clj 72 | (deftype PriorityMap [^:unsynchronized-mutable ^HeapNode heap 73 | ^:unsynchronized-mutable map] 74 | IPersistentCollection 75 | (count [_] (count map)) 76 | (cons [this e] 77 | (let [[item priority] e] 78 | (set! map (assoc map item priority)) 79 | (set! heap (insert heap item priority)) 80 | this)) 81 | (empty [this] 82 | (set! heap nil) 83 | (set! map {}) 84 | this) 85 | (equiv [this o] (identical? this o)) 86 | 87 | IPersistentMap 88 | (assoc [this item priority] 89 | (set! map (assoc map item priority)) 90 | (set! heap (insert heap item priority)) 91 | this) 92 | (hashCode [_] (hash map)) 93 | (equals [this o] (identical? this o)) 94 | (containsKey [_ item] (contains? map item)) 95 | (entryAt [_ k] (find map k)) 96 | (seq [_] (seq map)) 97 | (without [this item] (dissoc map item) this) 98 | 99 | IPersistentStack 100 | (peek [_] [(.-item heap) (.-priority heap)]) 101 | (pop [this] 102 | (let [n (two-pass (get-left heap))] 103 | (set! map (dissoc map (.-item heap))) 104 | (set! heap n) 105 | this))) 106 | :cljr 107 | (deftype PriorityMap [^:unsynchronized-mutable ^HeapNode heap 108 | ^:unsynchronized-mutable map] 109 | IPersistentCollection 110 | (count [_] (count map)) 111 | (^IPersistentCollection cons [this e] 112 | (let [[item priority] e] 113 | (set! map (assoc map item priority)) 114 | (set! heap (insert heap item priority)) 115 | this)) 116 | (empty [this] 117 | (set! heap nil) 118 | (set! map {}) 119 | this) 120 | (equiv [this o] (identical? this o)) 121 | 122 | IPersistentMap 123 | (^IPersistentMap assoc [this item priority] 124 | (set! map (assoc map item priority)) 125 | (set! heap (insert heap item priority)) 126 | this) 127 | (^clojure.lang.Associative assoc [this item priority] 128 | (set! map (assoc map item priority)) 129 | (set! heap (insert heap item priority)) 130 | this) 131 | (GetHashCode [_] (hash map)) 132 | (Equals [this o] (identical? this o)) 133 | (containsKey [_ item] (contains? map item)) 134 | (entryAt [_ k] (find map k)) 135 | (seq [_] (seq map)) 136 | (without [this item] (dissoc map item) this) 137 | 138 | IPersistentStack 139 | (peek [_] [(.-item heap) (.-priority heap)]) 140 | (pop [this] 141 | (let [n (two-pass (get-left heap))] 142 | (set! map (dissoc map (.-item heap))) 143 | (set! heap n) 144 | this))) 145 | :cljs 146 | (deftype PriorityMap [^:mutable ^HeapNode heap 147 | ^:mutable map] 148 | 149 | ISeqable 150 | (-seq [_] (seq map)) 151 | 152 | ICollection 153 | (-conj [this e] 154 | (let [[item priority] e] 155 | (set! map (assoc map item priority)) 156 | (set! heap (insert heap item priority)) 157 | this)) 158 | 159 | IAssociative 160 | IAssociative 161 | (-assoc [this item priority] 162 | (set! map (assoc map item priority)) 163 | (set! heap (insert heap item priority)) 164 | this) 165 | (-contains-key? [_ item] (contains? map item)) 166 | 167 | IMap 168 | (-dissoc [this item] (dissoc map item) this) 169 | 170 | IStack 171 | (-peek [_] [(.-item heap) (.-priority heap)]) 172 | (-pop [this] 173 | (let [n (two-pass (get-left heap))] 174 | (set! map (dissoc map (.-item heap))) 175 | (set! heap n) 176 | this)))) 177 | 178 | (defn priority-map 179 | "A priority queue that also functions as a map. 180 | Backed by a pairing heap implementation, and a regular map. 181 | NB. We do not implement `decrease-key` for the pairing heap, 182 | instead just insert the item again with a new priority." 183 | ([] 184 | (->PriorityMap nil {})) 185 | ([& keyvals] 186 | {:pre [(even? (count keyvals))]} 187 | (reduce conj (priority-map) (partition 2 keyvals)))) 188 | -------------------------------------------------------------------------------- /src/editscript/util/common.cljc: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Copyright (c) Huahai Yang. All rights reserved. 3 | ;; The use and distribution terms for this software are covered by the 4 | ;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 5 | ;; which can be found in the file LICENSE at the root of this distribution. 6 | ;; By using this software in any fashion, you are agreeing to be bound by 7 | ;; the terms of this license. 8 | ;; You must not remove this notice, or any other, from this software. 9 | ;; 10 | 11 | (ns ^:no-doc editscript.util.common 12 | (:refer-clojure :exclude [slurp]) 13 | (:require [editscript.edit :as e] 14 | [clojure.string :as s])) 15 | 16 | #?(:clj (set! *warn-on-reflection* true)) 17 | #?(:clj (set! *unchecked-math* :warn-on-boxed)) 18 | 19 | (defn szudzik 20 | "Szudzik's paring function" 21 | [^long x ^long y] 22 | (if (> y x) 23 | (+ x (* y y)) 24 | (+ x y (* x x)))) 25 | 26 | (defmacro coll-case 27 | [a b script path type diff-fn opts] 28 | `(case (e/get-type ~b) 29 | :nil (e/delete-data ~script ~path) 30 | ~type (~diff-fn ~script ~path ~a ~b ~opts) 31 | (e/replace-data ~script ~path ~b))) 32 | 33 | (defn current-time 34 | ^long [] 35 | #?(:clj (System/currentTimeMillis) 36 | :cljr (.ToUnixTimeMilliseconds (DateTimeOffset/Now)) 37 | :cljs (.getTime (js/Date.)))) 38 | 39 | (defn- vec-edits* 40 | "Based on 'Wu, S. et al., 1990, An O(NP) Sequence Comparison Algorithm, 41 | Information Processing Letters, 35:6, p317-23.' 42 | 43 | A greedy algorithm, attempting to get to the furthest points with a given 44 | number of edits. Very fast. However, it does not have replacement operations, 45 | so it is not very useful for nested trees. It can also only do unit cost for 46 | addition and deletion. " 47 | [a b n m timeout] 48 | (let [^long n n 49 | ^long m m 50 | delta (- n m) 51 | snake (fn [^long k ^long x] 52 | (loop [x x y (- x k)] 53 | (let [ax (get a x) by (get b y)] 54 | (if (and (< x n) 55 | (< y m) 56 | (= (type ax) (type by)) 57 | (= ax by)) 58 | (recur (inc x) (inc y)) 59 | x)))) 60 | fp-fn (fn [fp ^long k] 61 | (let [[dk-1 vk-1] (get fp (dec k) [-1 []]) 62 | dk-1 (inc ^long dk-1) 63 | [dk+1 vk+1] (get fp (inc k) [-1 []]) 64 | x (max dk-1 ^long dk+1) 65 | ^long sk (snake k x) 66 | ops (let [es (if (> dk-1 ^long dk+1) 67 | (conj vk-1 :-) 68 | (conj vk+1 :+))] 69 | (if (> sk x) 70 | (conj es (- sk x)) 71 | es))] 72 | (assoc! fp k [sk ops]))) 73 | begin (current-time)] 74 | (loop [p 0 fp (transient {})] 75 | (let [fp (loop [k (* -1 p) fp fp] 76 | (if (< k delta) 77 | (recur (inc k) (fp-fn fp k)) 78 | fp)) 79 | fp (loop [k (+ delta p) fp fp] 80 | (if (< delta k) 81 | (recur (dec k) (fp-fn fp k)) 82 | fp)) 83 | fp (fp-fn fp delta)] 84 | (cond 85 | (and timeout (< ^long timeout (- (current-time) begin))) 86 | :timeout 87 | (= n (nth (get fp delta) 0)) 88 | (-> (persistent! fp) (get delta) (#(nth % 1)) rest) 89 | :else 90 | (recur (inc p) fp)))))) 91 | 92 | (defn- swap-ops 93 | [edits] 94 | (mapv (fn [op] (case op :+ :- :- :+ op)) edits)) 95 | 96 | (defn min+plus->replace 97 | "Aggressively turn :- and :+ into replacements." 98 | [v] 99 | (into [] 100 | (comp 101 | (partition-by integer?) 102 | (mapcat 103 | (fn [coll] 104 | (let [m (nth coll 0)] 105 | (if (or (integer? m) (= 1 (count coll))) 106 | coll 107 | (let [p (if (= m :-) :+ :-) 108 | [ms ps] (split-with #(= % m) coll) 109 | mc (count ms) 110 | pc (count ps) 111 | delta (#?(:cljr Math/Abs :default Math/abs) (- mc pc)) 112 | rs (repeat (- (max mc pc) delta) :r)] 113 | (cond 114 | (< mc pc) (concat rs (repeat delta p)) 115 | (= mc pc) rs 116 | :else (concat (repeat delta m) rs)))))))) 117 | v)) 118 | 119 | (defn vec-edits 120 | [a b {:keys [vec-timeout] 121 | :or {vec-timeout 1000}}] 122 | (let [a (vec a) 123 | b (vec b) 124 | n (count a) 125 | m (count b) 126 | e (if (< n m) 127 | (vec-edits* b a m n vec-timeout) 128 | (vec-edits* a b n m vec-timeout))] 129 | (if (= e :timeout) 130 | e 131 | (min+plus->replace (if (< n m) (swap-ops e) e))))) 132 | 133 | (defn- group-strs 134 | [edits b level] 135 | (let [sf (if (= level :character) subs subvec) 136 | i (volatile! 0)] 137 | (into [] 138 | (comp 139 | (partition-by identity) 140 | (mapcat 141 | (fn [coll] 142 | (let [x (nth coll 0) 143 | c (count coll)] 144 | (cond 145 | (integer? x) (do (vswap! i (partial + x)) coll) 146 | (= :- x) [[x c]] 147 | (= :r x) (let [s (sf b @i (+ ^long @i c))] 148 | (vswap! i (partial + c)) 149 | [[x s]]) 150 | (= :+ x) (let [s (sf b @i (+ ^long @i c))] 151 | (vswap! i (partial + c)) 152 | [[x s]])))))) 153 | edits))) 154 | 155 | (defn transform-str 156 | [s level] 157 | (case level 158 | :character s 159 | :word (vec (s/split s #" ")) 160 | :line (vec (s/split-lines s)) 161 | (throw (ex-info "Unknown string diff level" {:str-diff level})))) 162 | 163 | (defn diff-str 164 | [script path a b {:keys [str-change-limit str-diff] 165 | :or {str-change-limit 0.2} 166 | :as opts}] 167 | (let [a' (transform-str a str-diff) 168 | b' (transform-str b str-diff) 169 | edits (vec-edits a' b' opts)] 170 | (if (= edits :timeout) 171 | (e/replace-data script path b) 172 | (let [ca (count a') 173 | unchanged (double (transduce (filter integer?) + edits))] 174 | (if (and (< 0 str-change-limit 1.0) 175 | (< (* ca (- 1.0 ^double str-change-limit)) unchanged)) 176 | (let [edits' (group-strs edits b' str-diff)] 177 | (e/replace-str script path edits' str-diff)) 178 | (e/replace-data script path b')))))) 179 | 180 | #?(:clj (defmacro vslurp 181 | [file] 182 | (clojure.core/slurp file))) 183 | -------------------------------------------------------------------------------- /test/editscript/diff/a_star_test.cljc: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Copyright (c) Huahai Yang. All rights reserved. 3 | ;; The use and distribution terms for this software are covered by the 4 | ;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 5 | ;; which can be found in the file LICENSE at the root of this distribution. 6 | ;; By using this software in any fashion, you are agreeing to be bound by 7 | ;; the terms of this license. 8 | ;; You must not remove this notice, or any other, from this software. 9 | ;; 10 | 11 | (ns editscript.diff.a-star-test 12 | (:require [clojure.test :refer [is testing deftest]] 13 | [editscript.edit :refer [get-edits]] 14 | [editscript.diff.a-star :refer [diff]])) 15 | 16 | (deftest vec-diff-test 17 | (testing "Testing A* with some nested vector data" 18 | (is (= (get-edits (diff (vec (seq "ab")) 19 | (vec (seq "bc")))) 20 | [[[0] :-] 21 | [[1] :+ \c]])) 22 | (is (= (get-edits (diff (vec (seq "abd")) 23 | (vec (seq "bc")))) 24 | [[[0] :-] 25 | [[1] :r \c]])) 26 | (is (#{[[[0] :r [-1]] 27 | [[1] :+ 1]] 28 | [[[0] :+ [-1]] 29 | [[1] :r 1]]} 30 | (get-edits (diff [[0 0 0]] 31 | [[-1] 1])))) 32 | (is (= (get-edits (diff [:a nil [:b :c]] 33 | [:d :a :b nil])) 34 | [[[0] :+ :d] 35 | [[2] :+ :b] 36 | [[4] :-]])) 37 | (is (#{[[[0] :-] 38 | [[1] :+ [:f]] 39 | [[2] :r :d]] 40 | [[[0] :-] 41 | [[1] :r [:f]] 42 | [[2] :+ :d]] 43 | [[[0 0] :r :e] 44 | [[1 0] :r :f] 45 | [[2] :r :d]] } 46 | (get-edits (diff [[:d] [:e] :f] 47 | [[:e] [:f] :d])))) 48 | (is (#{[[[0 0] :r :b] 49 | [[1] :-] 50 | [[1 1 0] :r :e] 51 | [[1 2 0] :r :f] 52 | [[1 3] :r :d]] 53 | [[[0 0] :r :b] 54 | [[1] :-] 55 | [[1 1] :-] 56 | [[1 2] :+ [:f]] 57 | [[1 3] :r :d]] 58 | [[[0 0] :r :b] 59 | [[1] :-] 60 | [[1 1] :-] 61 | [[1 2] :r [:f]] 62 | [[1 3] :+ :d]]} 63 | (get-edits (diff [[:a] :b [:c [:d] [:e] :f]] 64 | [[:b] [:c [:e] [:f] :d]])))) 65 | (is (= (get-edits (diff [:a [:b :c :d] :e :f] 66 | [[:b :c :d :e] [:f]])) 67 | [[[0] :-] 68 | [[0 3] :+ :e] 69 | [[1] :-] 70 | [[1] :r [:f]]])) 71 | (is (= (get-edits (diff [:e [:a :b] :c] 72 | [:a [:b :c] :d])) 73 | [[[0] :r :a] 74 | [[1 0] :-] 75 | [[1 1] :+ :c] 76 | [[2] :r :d]])) 77 | (is (#{[[[0] :+ :s] 78 | [[1] :r :t]] 79 | [[[0] :r :s] 80 | [[1] :+ :t]]} 81 | (get-edits (diff [[:u]] 82 | [:s :t])))) 83 | (is (= [[[] :r [:b :c [:e] :f :g]]] 84 | (get-edits (diff [[:a [:b :c] :d] :e :f] 85 | [:b :c [:e] :f :g])))) 86 | (is (= (get-edits (diff [[:a :b] :c [:d]] 87 | [:c [:d] [:a :b]])) 88 | [[[0] :-] 89 | [[2] :+ [:a :b]]])) 90 | (is (#{[[[0 1] :-] 91 | [[1] :+ :t] 92 | [[2] :r :s]] 93 | [[[0 1] :-] 94 | [[1] :r :t] 95 | [[2] :+ :s]]} 96 | (get-edits (diff [[:s :t] [:u]] 97 | [[:s] :t :s])))) 98 | (is (= (get-edits (diff [:a [:s :t] :u] 99 | [[:b] [:s :t :u]])) 100 | [[[0] :r [:b]] 101 | [[1 2] :+ :u] 102 | [[2] :-]])) 103 | (is (#{[[[0] :-] 104 | [[0 1] :r :u] 105 | [[1] :+ :t] 106 | [[2] :r :s]] 107 | [[[0] :-] 108 | [[0 1] :r :u] 109 | [[1] :r :t] 110 | [[2] :+ :s]]} 111 | (get-edits (diff [:a [:s :t] [:u]] 112 | [[:s :u] :t :s])))) 113 | (is (#{[[[1 1] :+ :c] 114 | [[1 2] :r :d] 115 | [[2] :+ :e]] 116 | [[[1 1] :r :c] 117 | [[1 2] :+ :d] 118 | [[2] :+ :e]]} 119 | (get-edits (diff [:a [:b [:c [:d :e] :f]]] 120 | [:a [:b :c :d] :e])))))) 121 | 122 | (deftest mix-diff-test 123 | (testing "Testing diff with some mixed data structures" 124 | (is (= (get-edits (diff 1 125 | 2)) 126 | [[[] :r 2]])) 127 | (is (= (get-edits (diff [0 -1] 128 | [1])) 129 | [[[0] :-] 130 | [[0] :r 1]])) 131 | (is (= (get-edits (diff [{} {0 0}] 132 | [{() ()}])) 133 | [[[0] :-] 134 | [[0 0] :-] 135 | [[0 ()] :+ ()]])) 136 | (is (#{[[[] :r #{1}]] 137 | [[[0] :-] 138 | [[-1] :-] 139 | [[1] :+ 1]]} 140 | (get-edits (diff #{0 -1} 141 | #{1})))) 142 | (is (= (get-edits (diff [] 143 | [[{-1 3}]])) 144 | [[[] :r [[{-1 3}]]]])) 145 | (is (= (get-edits (diff #{nil -30} 146 | #{[()] {}})) 147 | [[[] :r #{[()] {}}]])) 148 | (is (= (get-edits (diff #{0 15 ""} 149 | #{nil 0 15})) 150 | [[[""] :-] 151 | [[nil] :+ nil]])) 152 | (is (= (get-edits (diff {-37 0} 153 | {"" 5 2 nil -37 1})) 154 | [[[] :r {"" 5 2 nil -37 1}]])) 155 | (is (= [[[] :r '(0 0 1)]] 156 | (get-edits (diff '(()) 157 | '(0 0 1))))) 158 | (is (#{[[[0] :+ [-1]] 159 | [[1] :r '()]] 160 | [[[0] :r [-1]] 161 | [[1] :+ '()]]} 162 | (get-edits (diff '([0 0 0]) 163 | '([-1] ()))))) 164 | (is (= (get-edits (diff {:a {:o 4} :b 'b} 165 | {:a {:o 3} :b 'c :c 42})) 166 | [[[:a :o] :r 3] 167 | [[:b] :r 'c] 168 | [[:c] :+ 42]])) 169 | (is (= (get-edits (diff {:a [3 4] :b [1 2]} 170 | {:a [3] :b {:a 3} :c 42})) 171 | [[[:a 1] :-] 172 | [[:b] :r {:a 3}] 173 | [[:c] :+ 42]])) 174 | (is (= (get-edits (diff [:zero {:x :y}] 175 | [:zero {:a "a" :b "b"}])) 176 | [[[1] :r {:a "a" :b "b"}]])) 177 | (is (= (get-edits (diff [:zero :one] 178 | [:zero {:a "a" :b "b"}])) 179 | [[[1] :r {:a "a" :b "b"}]])) 180 | (is (= (get-edits (diff [:zero {:a "a" :b "b"}] 181 | [:zero :one])) 182 | [[[1] :r :one]])) 183 | (is (= (get-edits (diff [:zero [:a :b :c :d :e :f]] 184 | [:zero [:a]])) 185 | [[[1] :r [:a]]])) 186 | (is 187 | (= (get-edits 188 | (diff 189 | "hello world, this is our first visit to your planet. we come in peace." 190 | "hello worldhis is our first visit to your planet. We come in peace. haha" 191 | {:str-diff :character})) 192 | [[[] :s [11 [:- 3] 39 [:r "W"] 16 [:+ " haha"]]]])) 193 | )) 194 | -------------------------------------------------------------------------------- /src/editscript/edit.cljc: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Copyright (c) Huahai Yang. All rights reserved. 3 | ;; The use and distribution terms for this software are covered by the 4 | ;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 5 | ;; which can be found in the file LICENSE at the root of this distribution. 6 | ;; By using this software in any fashion, you are agreeing to be bound by 7 | ;; the terms of this license. 8 | ;; You must not remove this notice, or any other, from this software. 9 | ;; 10 | 11 | (ns ^:no-doc editscript.edit 12 | #?(:clj (:import [clojure.lang PersistentVector IPersistentList IPersistentMap 13 | IPersistentSet IPersistentVector MapEntry] 14 | [java.util Map$Entry]) 15 | :cljr (:import [clojure.lang PersistentVector IPersistentList IPersistentMap 16 | IPersistentSet IPersistentVector MapEntry] 17 | ))) 18 | 19 | (defprotocol IEdit 20 | (auto-sizing [this path value]) 21 | (add-data [this path value]) 22 | (delete-data [this path]) 23 | (replace-data [this path value]) 24 | (replace-str [this path ops level])) 25 | 26 | (defprotocol IEditScript 27 | (combine [this that] 28 | "Concate that editscript onto this editscript, return the new editscript") 29 | (get-size [this] "Report the size of the editscript") 30 | (set-size [this size] "Set the size, return the script") 31 | (edit-distance [this] "Report the edit distance, i.e number of operations") 32 | (get-edits [this] "Report the edits as a vector") 33 | (get-adds-num [this] "Report the number of additions") 34 | (get-dels-num [this] "Report the number of deletions") 35 | (get-reps-num [this] "Report the number of replacements")) 36 | 37 | (defprotocol IType 38 | (get-type [this] "Return a type keyword, :val, :map, :lst, etc.")) 39 | 40 | (defn nada 41 | "A special type means 'not present'" 42 | [] 43 | (reify IType 44 | (get-type [_] :nil))) 45 | 46 | #?(:clj 47 | (extend-protocol IType 48 | IPersistentList 49 | (get-type [_] :lst) 50 | 51 | IPersistentMap 52 | (get-type [_] :map) 53 | 54 | IPersistentVector 55 | (get-type [_] :vec) 56 | 57 | IPersistentSet 58 | (get-type [_] :set) 59 | 60 | Map$Entry 61 | (get-type [_] :val) 62 | 63 | MapEntry 64 | (get-type [_] :val) 65 | 66 | nil 67 | (get-type [_] :val) 68 | 69 | String 70 | (get-type [_] :str) 71 | 72 | Object 73 | (get-type [_] :val)) 74 | 75 | :cljr 76 | (extend-protocol IType 77 | IPersistentList 78 | (get-type [_] :lst) 79 | 80 | IPersistentMap 81 | (get-type [_] :map) 82 | 83 | IPersistentVector 84 | (get-type [_] :vec) 85 | 86 | IPersistentSet 87 | (get-type [_] :set) 88 | 89 | MapEntry 90 | (get-type [_] :val) 91 | 92 | nil 93 | (get-type [_] :val) 94 | 95 | String 96 | (get-type [_] :str) 97 | 98 | Object 99 | (get-type [_] :val)) 100 | 101 | :cljs 102 | (extend-protocol IType 103 | List 104 | (get-type [_] :lst) 105 | 106 | EmptyList 107 | (get-type [_] :lst) 108 | 109 | Cons 110 | (get-type [_] :lst) 111 | 112 | PersistentArrayMap 113 | (get-type [_] :map) 114 | 115 | PersistentHashMap 116 | (get-type [_] :map) 117 | 118 | PersistentTreeMap 119 | (get-type [_] :map) 120 | 121 | PersistentVector 122 | (get-type [_] :vec) 123 | 124 | Subvec 125 | (get-type [_] :vec) 126 | 127 | MapEntry 128 | (get-type [_] :val) 129 | 130 | PersistentHashSet 131 | (get-type [_] :set) 132 | 133 | PersistentTreeSet 134 | (get-type [_] :set) 135 | 136 | nil 137 | (get-type [_] :val) 138 | 139 | string 140 | (get-type [_] :str) 141 | 142 | default 143 | (get-type [_] :val))) 144 | 145 | (defn- sizing* 146 | [data size] 147 | (let [up (fn [s] (inc ^long s))] 148 | (if (#{:vec :lst :map :set} (get-type data)) 149 | (do (vswap! size up) 150 | (doseq [child data] 151 | (sizing* child size))) 152 | (vswap! size up)))) 153 | 154 | (defn- sizing 155 | [data] 156 | (let [size (volatile! 0) ] 157 | (sizing* data size) 158 | @size)) 159 | 160 | (deftype ^:no-doc EditScript [^:unsynchronized-mutable ^PersistentVector edits 161 | ^boolean auto-sizing? 162 | ^:unsynchronized-mutable ^long size 163 | ^:unsynchronized-mutable ^long adds-num 164 | ^:unsynchronized-mutable ^long dels-num 165 | ^:unsynchronized-mutable ^long reps-num] 166 | 167 | IEdit 168 | (auto-sizing [this path value] 169 | (when auto-sizing? 170 | (set! size (long (+ 2 size (sizing path) (if value (sizing value) 0))))) 171 | this) 172 | (add-data [this path value] 173 | (locking this 174 | (set! adds-num (inc adds-num)) 175 | (set! edits (conj edits [path :+ value])) 176 | (auto-sizing this path value))) 177 | (delete-data [this path] 178 | (locking this 179 | (set! dels-num (inc dels-num)) 180 | (set! edits (conj edits [path :-])) 181 | (auto-sizing this path nil))) 182 | (replace-data [this path value] 183 | (locking this 184 | (set! reps-num (inc reps-num)) 185 | (set! edits (conj edits [path :r value])) 186 | (auto-sizing this path value))) 187 | (replace-str [this path ops level] 188 | (locking this 189 | (set! reps-num (inc reps-num)) 190 | (set! edits (conj edits [path 191 | (case level 192 | :character :s 193 | :word :sw 194 | :line :sl) 195 | ops])) 196 | (auto-sizing this path ""))) 197 | 198 | IEditScript 199 | (combine [_ that] 200 | (EditScript. (into edits (get-edits that)) 201 | auto-sizing? 202 | (+ size (get-size that)) 203 | (+ adds-num (get-adds-num that)) 204 | (+ dels-num (get-dels-num that)) 205 | (+ reps-num (get-reps-num that)))) 206 | (get-size [_] size) 207 | (set-size [this s] (set! size (long s)) this) 208 | (get-edits [_] edits) 209 | (get-adds-num [_] adds-num) 210 | (get-dels-num [_] dels-num) 211 | (get-reps-num [_] reps-num) 212 | (edit-distance [_] (+ adds-num dels-num reps-num))) 213 | 214 | (defn- valid-str-edits? 215 | [data level] 216 | (and (vector? data) 217 | (every? (fn [x] 218 | (or (nat-int? x) 219 | (and (vector? x) 220 | (= 2 (count x)) 221 | (let [[op y] x] 222 | (and 223 | (#{:- :r :+} op) 224 | (case op 225 | :- (nat-int? y) 226 | (:+ :r) (case level 227 | :s (string? y) 228 | (:sl :sw) (vector? y)))))))) 229 | data))) 230 | 231 | (defn- valid-edit? 232 | [edit] 233 | (when (vector? edit) 234 | (let [c (count edit)] 235 | (when (< 1 c 4) 236 | (let [[path op data] edit] 237 | (and (vector? path) 238 | (#{:- :r :+ :s :sw :sl} op) 239 | (if (= :- op) (nil? data) (= c 3)) 240 | (if (#{:s :sw :sl} op) 241 | (valid-str-edits? data op) 242 | true))))))) 243 | 244 | (defn valid-edits? 245 | [edits] 246 | (when (vector? edits) 247 | (if (seq edits) 248 | (every? valid-edit? edits) 249 | true))) 250 | 251 | (defn- count-str-ops 252 | [data adds dels reps] 253 | (doseq [d data 254 | :when (vector? d)] 255 | (case (nth d 0) 256 | :+ (vswap! adds inc) 257 | :- (vswap! dels inc) 258 | :r (vswap! reps inc)))) 259 | 260 | (defn- count-ops 261 | [edits] 262 | (let [adds (volatile! 0) 263 | dels (volatile! 0) 264 | reps (volatile! 0)] 265 | (doseq [[_ op data] edits] 266 | (case op 267 | :+ (vswap! adds inc) 268 | :- (vswap! dels inc) 269 | :r (vswap! reps inc) 270 | (:s :sw :sl) (count-str-ops data adds dels reps))) 271 | [@adds @dels @reps])) 272 | 273 | (defn edits->script 274 | "Create an EditScript instance from a vector of edits, like those obtained 275 | through calling `get-edits` on an EditScript" 276 | [edits] 277 | (assert (valid-edits? edits) "Not a vector of valid edits") 278 | (let [[adds dels reps] (count-ops edits)] 279 | (->EditScript edits true (sizing edits) adds dels reps))) 280 | 281 | 282 | #?(:clj (defmethod print-method EditScript 283 | [x ^java.io.Writer writer] 284 | (print-method (get-edits x) writer)) 285 | :cljs (extend-protocol IPrintWithWriter 286 | EditScript 287 | (-pr-writer [o writer opts] 288 | (write-all writer (str (get-edits o)))))) 289 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor to control, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of New York and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

🔦 Diff and patch for Clojure/Clojurescript data. 🧩
4 | 5 | 11 | 12 | ## :hear_no_evil: What is it? 13 | 14 | Editscript is a library designed to extract the differences between two 15 | Clojure/Clojurescript data structures as an "editscript", which represents the 16 | minimal modification necessary to transform one to another. 17 | 18 | Currently, this library can diff and patch any nested Clojure/Clojurescript data 19 | structures consisting of regular maps, vectors, lists, sets and values. Custom 20 | data can also be handled if you implement our protocols. 21 | 22 | ## :satisfied: Status 23 | 24 | This library is stable and has been in production use to power the core product 25 | of [Juji](https://juji.io) for several years now. If you are also using 26 | Editscript, please drop a line at issue 27 | [#17](https://github.com/juji-io/editscript/issues/17) so we may make a list of 28 | users here: 29 | 30 | * [clerk](https://github.com/nextjournal/clerk) uses Editscript to improves 31 | usability of synchronised atom by sending a minimal diff from the JVM to the 32 | browser, achieving 60fps sync for updates from the browser to the JVM and 33 | back. 34 | * [Evident Systems](https://www.evidentsystems.com/) uses Editscript as the main 35 | way of evaluating changes within the convergent reference type in their CRDT 36 | library, [Converge](https://github.com/evidentsystems/converge). 37 | * [microdata.no](https://microdata.no) uses Editscript to sync client state to 38 | server so users can pick up their work where they left it. 39 | * [Oche](https://oche.com) uses Editscript to sync game state between client and server. 40 | * [Streetlinx](https://streetlinx.com) uses Editscript to capture deltas to 41 | drive a newsfeed and generate alerts. 42 | 43 | ## :tada: Usage 44 | 45 | See my [Clojure/north 2020 Talk](https://youtu.be/n-avEZHEHg8): Data Diffing 46 | Based Software Architecture Patterns. 47 | 48 | ```Clojure 49 | (require '[editscript.core :as e]) 50 | 51 | ;; Here are two pieces of data, a and b 52 | (def a ["Hello word" 24 22 {:a [1 2 3]} 1 3 #{1 2}]) 53 | (def b ["Hello world" 24 23 {:a [2 3]} 1 3 #{1 2 3}]) 54 | 55 | ;; compute the editscript between a and b using the default options 56 | (def d (e/diff a b)) 57 | 58 | ;; look at the editscript 59 | (e/get-edits d) 60 | ;;==> 61 | ;; [[[0] :r "Hello world"] [[2] :r 23] [[3 :a 0] :-] [[6 3] :+ 3]] 62 | 63 | ;; diff using the quick algorithm and diff the strings by character 64 | ;; there are other string diff levels: :word, :line, or :none (default) 65 | (def d-q (e/diff a b {:algo :quick :str-diff :character})) 66 | 67 | (e/get-edits d-q) 68 | ;;=> 69 | ;; [[[0] :s [9 [:+ "l"] 1]] [[2] :r 23] [[3 :a 0] :-] [[6 3] :+ 3]] 70 | 71 | ;; get the edit distance, i.e. number of edits 72 | (e/edit-distance d) 73 | ;;==> 4 74 | 75 | ;; get the size of the editscript, i.e. number of nodes 76 | (e/get-size d) 77 | ;;==> 23 78 | 79 | ;; patch a with the editscript to get back b, so that 80 | (= b (e/patch a d)) 81 | ;;==> true 82 | (= b (e/patch a d-q)) 83 | ;;==> true 84 | 85 | ``` 86 | 87 | An Editscript contains a vector of edits, where each edit is a vector of two or 88 | three elements. 89 | 90 | The first element of an edit is the path, similar to the path vector in the 91 | function call `update-in`. However, `update-in` only works for associative data 92 | structures (map and vector), whereas the editscript works for map, vector, list 93 | and set alike. 94 | 95 | The second element of an edit is a keyword representing the edit operation, 96 | which is one of `:-` (deletion), `:+` (addition), `:r ` (data replacement) or 97 | `:s` (string edit). 98 | 99 | For addition and replacement operation, the third element is the value of new data. 100 | 101 | ```Clojure 102 | 103 | ;; get the edits as a plain Clojure vector 104 | (def v (e/get-edits d)) 105 | 106 | v 107 | ;;==> 108 | ;;[[[0] :r "Hello world"] [[2] :r 23] [[3 :a 0] :-] [[6 3] :+ 3]] 109 | 110 | ;; the plain Clojure vector can be passed around, stored, or modified as usual, 111 | ;; then be loaded back as a new EditScript 112 | (def d' (e/edits->script v)) 113 | 114 | ;; the new EditScript works the same as the old one 115 | (= b (e/patch a d')) 116 | ;;==> true 117 | 118 | ``` 119 | 120 | ## :green_book: Documentation 121 | 122 | Please see [API Documentation](https://cljdoc.org/d/juji/editscript/CURRENT) for 123 | more details. 124 | 125 | ## :shopping: Alternatives 126 | 127 | Depending on your use cases, different libraries in this space may suit you 128 | needs better. The `/bench` folder of this repo contains a benchmark comparing 129 | the alternatives. The resulting charts of running [the benchmark](https://juji.io/blog/comparing-clojure-diff-libraries/) are included below: 130 | 131 |  132 |  133 | 134 | [deep-diff2](https://github.com/lambdaisland/deep-diff2) applies Wu et al. 1990 135 | [3] algorithm by first converting trees into linear structures. It is only 136 | faster than A\* algorithm of Editscript. Its results are the largest in size. 137 | Although unable to achieve optimal tree diffing with this approach, it has some 138 | interesting use, e.g. visualization. So if you want to visualize the 139 | differences, use deep-diff2. This library does not do patch. 140 | 141 | [clojure.data/diff](https://clojuredocs.org/clojure.data/diff) and 142 | [differ](https://github.com/Skinney/differ) are similar to the quick algorithm 143 | of Editscript, in that they all do a naive walk-through of the data, so the 144 | generated diff is not going to be optimal. 145 | 146 | clojure.data/diff is good for detecting what part of the data have been changed 147 | and how. But it is slow and the results are also large. It does not do patch 148 | either. 149 | 150 | differ looks very good by the numbers in the benchmark. It does patch, is fast 151 | and the results the smallest (for it doesn't record editing operators). 152 | Unfortunately, it cuts corners. It fails all the property based tests, even if 153 | the tests considered only vectors and maps. Use it if you understand its failing 154 | patterns and are able to avoid them in your data. 155 | 156 | Editscript is designed for data diffing, e.g. data preservation and recovery, 157 | not for being looked at by humans. If speed is your primary concern, the quick 158 | algorithm of Editscript is the fastest among all the alternatives, and its diff 159 | size is reasonably small for the benchmarked data sets. If the diff size is your 160 | primary concern, A\* algorithm is the only available option that guarantees 161 | optimal data size, but it is also the slowest. 162 | 163 | ## :zap: Diffing Algorithms 164 | 165 | As mentioned, the library currently implements two diffing algorithms. The 166 | default algorithm produces diffs that are optimal in the number of editing 167 | operations and the resulting script size. A quick algorithm is also provided, 168 | which does not guarantee optimal results but is very fast. 169 | 170 | ### A\* diffing 171 | 172 | This A\* algorithm aims to achieve optimal diffing in term of minimal size of 173 | resulting editscript, useful for storage, query and restoration. This is an 174 | original algorithm that has some unique properties: unlike many other general 175 | tree differing algorithms such as Zhang & Shasha 1989 [4], our algorithm is 176 | structure preserving. 177 | 178 | Roughly speaking, the edit distance is defined on sub-trees rather than nodes, 179 | such that the ancestor-descendant relationship and tree traversal order are 180 | preserved, and nodes in the original tree does not split or merge. These 181 | properties are useful for diffing and patching Clojure's immutable data 182 | structures because we want to leverage structure sharing and use `identical?` 183 | reference checks. The additional constraints also yield algorithms with better 184 | run time performance than the general ones. Finally, these constraints feel 185 | natural for a Clojure programmer. 186 | 187 | The structure preserving properties were proposed in Lu 1979 [1] and Tanaka 1995 188 | [2]. These papers describe diffing algorithms with O(|a||b|) time and space 189 | complexity. We designed an A\* based algorithm to achieve some speedup. Instead 190 | of searching the whole editing graph, we typically search a portion of it along 191 | the diagonal. 192 | 193 | The implementation is optimized for speed. Currently the algorithm spent most of 194 | its running time calculating the cost of next steps, perhaps due to the use of a 195 | very generic heuristic. A more specialized heuristic for our case should reduce 196 | the number of steps considered. For special cases of vectors and lists 197 | consisting of leaves only, we also use the quick algorithm below to enhance the 198 | speed. 199 | 200 | Although much slower than the non-optimizing quick algorithm below, the 201 | algorithm is practical for common Clojure data that include lots of maps. Maps 202 | and sets do not incur the penalty of a large search space in the cases of 203 | vectors and lists. For a [drawing data 204 | set](https://github.com/justsml/json-diff-performance), the diffing time is less 205 | than 3ms on a 2014 2.8 GHz Core i5 16GB MacBook Pro. 206 | 207 | ### Quick diffing 208 | 209 | This quick diffing algorithm simply does an one pass comparison of two trees so 210 | it is very fast. For sequence (vector and list) comparison, we implement Wu et 211 | al. 1990, an algorithm with O(NP) time complexity, where P is the number of 212 | deletions if `b` is longer than `a`. The same sequence diffing algorithm is 213 | also implemented in [diffit](https://github.com/friemen/diffit). Using their 214 | benchmark, our implementation has slightly better performance due to more 215 | optimizations. Keep in mind that our algorithm also handles nested Clojure data 216 | structures. Compared with our A\* algorithm, our quick algorithm can be up to 217 | two orders of magnitude faster. 218 | 219 | The Wu algorithm does not have replacement operations, and assumes each edit has 220 | a unit cost. These do not work well for tree diffing. Consequently, the quick 221 | algorithm does not produce optimal results in term of script size. In principle, 222 | simply changing a pointer to point to `b` instead of `a` produces the fastest 223 | "diffing" algorithm of the world, but that is not very useful. The quick 224 | algorithm has a similar problem. 225 | 226 | For instances, when consecutive deletions involving nested elements occur in a 227 | sequence, the generated editscript can be large. For example: 228 | 229 | ```Clojure 230 | (def a [2 {:a 42} 3 {:b 4} {:c 29}]) 231 | (def b [{:a 5} {:b 5}]) 232 | 233 | (diff a b {:algo :quick}) 234 | ;;==> 235 | ;;[[[0] :-] 236 | ;; [[0] :-] 237 | ;; [[0] :-] 238 | ;; [[0 :b] :-] 239 | ;; [[0 :a] :+ 5] 240 | ;; [[1 :c] :-] 241 | ;; [[1 :b] :+ 5]] 242 | 243 | (diff a b) 244 | ;;==> 245 | ;; [[[] :r [{:a 5} {:b 5}]]] 246 | 247 | ``` 248 | In this case, the quick algorithm seems to delete the original and then add 249 | new ones back. The reason is that the quick algorithm does not drill down 250 | (i.e. do replacement) at the correct places. It currently drills down wherever it 251 | can. In this particular case, replacing the whole thing produces a smaller diff. 252 | An optimizing algorithm is needed if minimal diffs are desired. 253 | 254 | ## :station: Platform 255 | 256 | The library supports JVM Clojure and Clojurescript. The later has been tested 257 | with node, nashorn, chrome, safari, firefox and lumo. E.g. run our test suite: 258 | 259 | ```bash 260 | # Run Clojure tests 261 | lein test 262 | 263 | # Run Clojurescript tests on node.js 264 | lein doo node 265 | 266 | # Run Clojurescript tests on chrome 267 | lein doo chrome browser once 268 | 269 | ``` 270 | 271 | ## :bulb: Rationale 272 | 273 | At Juji, we send changes of UI states back to server for persistence [see blog 274 | post](https://juji.io/blog/this-is-how-we-revamped-the-ui-in-less-than-a-month/). 275 | Such a use case requires a good diffing library for nested Clojure data 276 | structures to avoid overwhelming our storage systems. I have not found such a 277 | library in Clojure ecosystem, so I implemented my own. Hopefully this little 278 | library could be of some use to further enhance the Clojure's unique strength of 279 | [Data-Oriented 280 | Programming](https://livebook.manning.com/#!/book/the-joy-of-clojure-second-edition/chapter-14/1). 281 | 282 | Editscript is designed with stream processing in mind. An editscript should be 283 | conceptualized as a chunk in a potentially endless stream of changes. Individual 284 | editscripts can combine (concatenate) into a larger edistscript. I consider 285 | editscript as a part of a larger data-oriented effort, that tries to elevate 286 | the level of abstraction of data from the granularity of characters, bytes or 287 | lines to that of maps, sets, vectors, and lists. So instead of talking about 288 | change streams in bytes, we can talk about change streams in term of higher 289 | level data structures. 290 | 291 | ## :roller_coaster: Roadmap 292 | 293 | There are a few things I have some interest in exploring with this library. Of 294 | course, ideas, suggestions and contributions are very welcome. 295 | 296 | * Further speed up of the algorithms, e.g. better heuristic, hashing, and so on. 297 | * Globally optimize an editscript stream. 298 | 299 | ## :green_book: References 300 | 301 | [1] Lu, S. 1979, A Tree-to-tree distance and its application to cluster 302 | analysis. IEEE Transactions on Pattern Analysis and Machine Intelligence. Vol. 303 | PAMI-1 No.2. p219-224 304 | 305 | [2] Tanaka, E., 1995, A note on a tree-to-tree editing problem. International 306 | Journal of Pattern Recognition and Artificial Intelligence. p167-172 307 | 308 | [3] Wu, S. et al., 1990, An O(NP) Sequence Comparison Algorithm, Information 309 | Processing Letters, 35:6, p317-23. 310 | 311 | [4] Zhang, K. and Shasha, D. 1989, Simple fast algorithms for the editing 312 | distance between trees and related problems. SIAM Journal of Computing, 313 | 18:1245–1262 314 | 315 | 316 | ## License 317 | 318 | Copyright © 2018-2025 [Juji, Inc.](https://juji.io) 319 | 320 | Distributed under the Eclipse Public License either version 1.0 or (at 321 | your option) any later version. 322 | -------------------------------------------------------------------------------- /test/editscript/core_test.cljc: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Copyright (c) Huahai Yang. All rights reserved. 3 | ;; The use and distribution terms for this software are covered by the 4 | ;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 5 | ;; which can be found in the file LICENSE at the root of this distribution. 6 | ;; By using this software in any fashion, you are agreeing to be bound by 7 | ;; the terms of this license. 8 | ;; You must not remove this notice, or any other, from this software. 9 | ;; 10 | 11 | (ns editscript.core-test 12 | (:require [clojure.test :refer [is are testing deftest]] 13 | [editscript.core :refer [patch diff get-edits edits->script 14 | edit-distance get-size change-ratio]] 15 | [editscript.edit :as e] 16 | ;; [editscript.diff.quick :as q] 17 | ;; [editscript.diff.a-star :as a] 18 | [editscript.util.common :as com 19 | #?@(:cljs [:include-macros true])] 20 | [clojure.test.check.generators :as gen] 21 | #?(:cljs [clojure.test.check :refer [quick-check]]) 22 | #?(:cljs [cljs.reader :as reader]) 23 | [clojure.test.check.clojure-test :as test 24 | #?@(:cljs [:refer-macros [defspec] :include-macros true])] 25 | [clojure.test.check.properties :as prop 26 | #?@(:cljs [:include-macros true])] 27 | [editscript.util.index :as i])) 28 | 29 | (deftest readme-test 30 | (let [a ["Hello word" 24 22 {:a [1 2 3]} 1 3 #{1 2}] 31 | b ["Hello world" 24 23 {:a [2 3]} 1 3 #{1 2 3}] 32 | d (diff a b) 33 | d-q (diff a b {:algo :quick :str-diff :character}) 34 | v (get-edits d) 35 | ds (edits->script v)] 36 | (is (= (get-edits d) [[[0] :r "Hello world"] 37 | [[2] :r 23] 38 | [[3 :a 0] :-] 39 | [[6 3] :+ 3]])) 40 | (is (= (get-edits d-q) 41 | [[[0] :s [9 [:+ "l"] 1]] 42 | [[2] :r 23] 43 | [[3 :a 0] :-] 44 | [[6 3] :+ 3]])) 45 | (is (= 4 (edit-distance d))) 46 | (is (= 23 (get-size d))) 47 | (is (= 4 (edit-distance d-q))) 48 | (is (= 23 (get-size d-q))) 49 | (is (= b (patch a d))) 50 | (is (= b (patch a d-q))) 51 | (is (= b (patch a ds)))) 52 | (let [a [2 {:a 42} 3 {:b 4} {:c 29}] 53 | b [{:a 5} {:b 5}] 54 | d (diff a b) 55 | d-q (diff a b {:algo :quick})] 56 | (is (= 1 (edit-distance d))) 57 | (is (= 9 (get-size d))) 58 | (is (= (get-edits d) [[[] :r [{:a 5} {:b 5}]]])) 59 | (is (= 7 (edit-distance d-q))) 60 | (is (= 35 (get-size d-q))) 61 | (is (= (get-edits d-q) [[[0] :-] 62 | [[0] :-] 63 | [[0] :-] 64 | [[0 :b] :-] 65 | [[0 :a] :+ 5] 66 | [[1 :c] :-] 67 | [[1 :b] :+ 5]])))) 68 | 69 | (deftest char-str-diff-test 70 | (let [a {:data ["hello word" 24 22 {:a [1 2 3]} 1 3 #{1 2 3}]} 71 | b {:data ["Hello world!" 42 22 {:a [1 3]} 1 3 #{1 2 3}]} 72 | d-q (diff a b {:algo :quick :str-diff :character}) 73 | d-a (diff a b {:algo :a-star :str-diff :character}) 74 | e-q (e/get-edits d-q) 75 | e-a (e/get-edits d-a) 76 | d-q1 (e/edits->script e-q) 77 | d-a1 (e/edits->script e-a)] 78 | (is (e/valid-edits? e-q)) 79 | (is (e/valid-edits? e-a)) 80 | (is (= e-q 81 | e-a 82 | [[[:data 0] :s [[:r "H"] 8 [:+ "l"] 1 [:+ "!"]]] 83 | [[:data 1] :r 42] 84 | [[:data 3 :a 1] :-]])) 85 | (is (= b (patch a d-q))) 86 | (is (= b (patch a d-a))) 87 | (is (= b (patch a d-q1))) 88 | (is (= b (patch a d-a1))))) 89 | 90 | (deftest word-str-diff-test 91 | (let [a "You know, it does not matter how slowly you go as long as you do not stop." 92 | b "Hey, do you know, it does not matter how fast you go as long as you don't stop." 93 | d (diff a b {:str-diff :word :str-change-limit 0.5}) 94 | e (e/get-edits d) 95 | ds (e/edits->script e)] 96 | (is (e/valid-edits? e)) 97 | (is (= e [[[] 98 | :sw 99 | [[:+ ["Hey," "do"]] 100 | [:r ["you"]] 101 | 6 102 | [:r ["fast"]] 103 | 6 104 | [:r ["don't"]] 105 | [:- 1] 106 | 1]]])) 107 | (is (= b (patch a d))) 108 | (is (= b (patch a ds))))) 109 | 110 | (deftest line-str-diff-test 111 | (let [a 112 | "侠客行 113 | 唐 李白 114 | 赵客缦胡缨,吴钩霜雪明。 115 | 银鞍照白马,飒沓如流星。 116 | 十步杀一人,千里不留行。 117 | 事了拂衣去,深藏身与名。 118 | 闲过信陵饮,脱剑膝前横。 119 | 将炙啖朱亥,持觞劝侯嬴。 120 | 三杯吐然诺,五岳倒为轻。 121 | 眼花耳热后,意气素霓生。 122 | 救赵挥金槌,邯郸先震惊。 123 | 千秋二壮士,烜赫大梁城。 124 | 纵死侠骨香,不惭世上英。 125 | 谁能书阁下,白首太玄经。" 126 | b 127 | "侠客行 128 | 赵客缦胡缨,吴钩霜雪明。 129 | 银鞍照白马,飒沓如流星。 130 | 十步杀百人,千里不留行。 131 | 事了拂衣去,深藏身与名。 132 | 闲过信陵饮,脱剑膝前横。 133 | 将炙啖朱亥,持觞劝侯嬴。 134 | 三杯吐然诺,五岳倒为轻。 135 | 眼花耳热后,意气素霓生。 136 | 救赵挥金槌,邯郸先震惊。 137 | 千秋二壮士,烜赫大梁城。 138 | 纵死侠骨香,不惭世上英。 139 | 谁能书阁下,白首太玄经。" 140 | d (diff a b {:str-diff :line :str-change-limit 0.5}) 141 | e (e/get-edits d) 142 | ds (e/edits->script e)] 143 | (is (e/valid-edits? e)) 144 | (is (= e [[[] :sl [1 [:- 1] 145 | 2 [:r [" 十步杀百人,千里不留行。"]] 9]]])) 146 | (is (= b (patch a d))) 147 | (is (= b (patch a ds))))) 148 | 149 | (deftest map-entry-test 150 | (let [a (first {:a :c}) 151 | b (first {:a :b}) 152 | d-a (diff a b {:algo :a-star}) 153 | d-q (diff a b {:algo :quick})] 154 | (is (= b (patch a d-a))) 155 | (is (= b (patch a d-q))))) 156 | 157 | (deftest vec-timeout-test 158 | (let [a (vec (range 30000)) 159 | b (vec (concat (range 100) [213 222 223 224 123] (range 300 800) 160 | [100 950 221 897 1232] (range 990 2810) (range 2810 30000))) 161 | d (diff a b) 162 | d-o (diff a b {:vec-timeout 1}) 163 | d-q (diff a b {:vec-timeout 1 :algo :quick}) 164 | ] 165 | (is (= b (patch a d))) 166 | (is (< 1 (e/edit-distance d))) 167 | (is (= b (patch a d-o))) 168 | (is (= (e/edit-distance d-o) 1)) 169 | (is (= b (patch a d-q))) 170 | (is (= (e/edit-distance d-q) 1)) 171 | )) 172 | 173 | (deftest change-ratio-test 174 | (are [a b r] (is (= (change-ratio a (diff a b)) r)) 175 | {:a {:b 2 :c 3}} {:a {:b 3 :c 2}} 0.5 176 | 1 2 1.0 177 | [:a :b] [:a :c] (double (/ 1 3)) 178 | [:a] [:b] 0.5 179 | [:a] [:a :b] 0.5 180 | [] [:a] 2.0 181 | [:a] [] 0.5 182 | [:a] nil 0.5)) 183 | 184 | ;; generative tests 185 | 186 | (def compound (fn [inner-gen] 187 | (gen/one-of [(gen/list inner-gen) 188 | (gen/vector inner-gen) 189 | (gen/set inner-gen) 190 | (gen/map inner-gen inner-gen)]))) 191 | 192 | (def scalars (gen/frequency [[19 (gen/one-of [gen/int 193 | gen/string])] 194 | [1 (gen/return nil)]])) 195 | 196 | (test/defspec quick-end-2-end-generative-test 197 | 2000 198 | (prop/for-all [a (gen/recursive-gen compound scalars) 199 | b (gen/recursive-gen compound scalars)] 200 | (let [s (diff a b {:algo :quick}) 201 | e (e/get-edits s) 202 | s' (e/edits->script e)] 203 | (and (= b (patch a s)) 204 | (= b (patch a s')))))) 205 | 206 | 207 | (test/defspec a-star-end-2-end-generative-test 208 | 2000 209 | (prop/for-all [a (gen/recursive-gen compound scalars) 210 | b (gen/recursive-gen compound scalars)] 211 | (let [s (diff a b) 212 | e (e/get-edits s) 213 | s' (e/edits->script e)] 214 | (and (= b (patch a s)) 215 | (= b (patch a s')))))) 216 | 217 | (test/defspec combine-edits-generative-test 218 | 2000 219 | (prop/for-all [a (gen/recursive-gen compound scalars) 220 | b (gen/recursive-gen compound scalars) 221 | c (gen/recursive-gen compound scalars)] 222 | (let [d-ab (diff a b {:algo :quick}) 223 | d-bc (diff b c {:algo :quick}) 224 | d-ac (diff a c {:algo :quick})] 225 | (and (= c (patch a d-ac)) 226 | (= c (patch a (e/combine d-ab d-bc))) 227 | (= c (patch a (e/edits->script 228 | (into (e/get-edits d-ab) (e/get-edits d-bc))))))))) 229 | 230 | ;; sample data tests 231 | 232 | (def data1 (-> "resources/drawing1.edn" 233 | #?(:default slurp :cljs com/vslurp) 234 | #?(:default read-string :cljs reader/read-string))) 235 | (def data2 (-> "resources/drawing2.edn" 236 | #?(:default slurp :cljs com/vslurp) 237 | #?(:default read-string :cljs reader/read-string))) 238 | (def data3 (-> "resources/drawing3.edn" 239 | #?(:default slurp :cljs com/vslurp) 240 | #?(:default read-string :cljs reader/read-string))) 241 | (def data4 (-> "resources/drawing4.edn" 242 | #?(:default slurp :cljs com/vslurp) 243 | #?(:default read-string :cljs reader/read-string))) 244 | 245 | (deftest drawing-sample-test 246 | (testing "A sample JSON data of a drawing program from https://github.com/justsml/json-diff-performance, converted to edn using https://github.com/peterschwarz/json-to-edn" 247 | (let [diff12 (diff data1 data2) 248 | diff13 (diff data1 data3) 249 | diff14 (diff data1 data4)] 250 | (is (= data2 (patch data1 diff12))) 251 | (is (= 1 (e/edit-distance diff12))) 252 | (is (= 7 (e/get-size diff12))) 253 | (is (= (e/get-edits diff12) 254 | [[[2 :fill] :r "#0000ff"]])) 255 | (is (= data3 (patch data1 diff13))) 256 | (is (= 5 (e/edit-distance diff13))) 257 | (is (= 31 (e/get-size diff13))) 258 | (is (= (e/get-edits diff13) 259 | [[[2 :rx] :r 69.5] 260 | [[2 :fill] :r "#0000ff"] 261 | [[2 :cx] :r 230.5] 262 | [[2 :cy] :r 228] 263 | [[2 :ry] :r 57]])) 264 | (is (= data4 (patch data1 diff14))) 265 | (is (= 13 (e/edit-distance diff14))) 266 | (is (= 73 (e/get-size diff14))) 267 | (is (= (e/get-edits diff14) 268 | [[[0 :y] :r 13] 269 | [[0 :width] :r 262] 270 | [[0 :x] :r 19] 271 | [[0 :height] :r 101] 272 | [[1 :y] :r 122] 273 | [[1 :x] :r 12] 274 | [[1 :height] :r 25.19999999999999] 275 | [[2] :-] 276 | [[2] :-] 277 | [[2 :y] :r 208] 278 | [[2 :x] :r 12] 279 | [[2 :height] :r 25.19999999999999] 280 | [[3] :-]]))))) 281 | 282 | (comment 283 | 284 | 285 | (require '[criterium.core :as c]) 286 | 287 | ;; benchmarks on Intel i7-6850K CPU @ 3.60GHz 288 | 289 | ;; default A* algorithm 290 | 291 | (c/quick-bench (diff data1 data2)) 292 | ;; ==> 293 | ;; Evaluation count : 1752 in 6 samples of 292 calls. 294 | ;; Execution time mean : 358.745200 µs 295 | ;; Execution time std-deviation : 20.783677 µs 296 | ;; Execution time lower quantile : 344.041945 µs ( 2.5%) 297 | ;; Execution time upper quantile : 383.072120 µs (97.5%) 298 | ;; Overhead used : 14.553786 ns 299 | 300 | (c/quick-bench (diff data1 data3)) 301 | ;; ==> 302 | ;; Evaluation count : 1638 in 6 samples of 273 calls. 303 | ;; Execution time mean : 391.184907 µs 304 | ;; Execution time std-deviation : 23.452044 µs 305 | ;; Execution time lower quantile : 371.116374 µs ( 2.5%) 306 | ;; Execution time upper quantile : 423.216158 µs (97.5%) 307 | ;; Overhead used : 14.553786 ns 308 | 309 | (c/quick-bench (diff data1 data4)) 310 | ;; ==> 311 | ;; Evaluation count : 246 in 6 samples of 41 calls. 312 | ;; Execution time mean : 2.147089 ms 313 | ;; Execution time std-deviation : 18.701984 µs 314 | ;; Execution time lower quantile : 2.132857 ms ( 2.5%) 315 | ;; Execution time upper quantile : 2.179175 ms (97.5%) 316 | ;; Overhead used : 14.553786 ns 317 | 318 | (c/quick-bench (diff data4 data1)) 319 | ;; ==> 320 | ;; Evaluation count : 624 in 6 samples of 104 calls. 321 | ;; Execution time mean : 1.044390 ms 322 | ;; Execution time std-deviation : 35.679732 µs 323 | ;; Execution time lower quantile : 1.006069 ms ( 2.5%) 324 | ;; Execution time upper quantile : 1.097140 ms (97.5%) 325 | ;; Overhead used : 14.553786 ns 326 | 327 | (e/edit-distance (diff data1 data4)) 328 | ;; ==> 13 329 | (e/get-size (diff data1 data4)) 330 | ;; ==> 73 331 | (diff data1 data4) 332 | ;; ==> 333 | ;; [[[0 :y] :r 13] [[0 :width] :r 262] [[0 :x] :r 19] [[0 :height] :r 101] [[1 :y] :r 122] [[1 :x] :r 12] [[1 :height] :r 25.19999999999999] [[2] :-] [[2] :-] [[2 :y] :r 208] [[2 :x] :r 12] [[2 :height] :r 25.19999999999999] [[3] :-]] 334 | 335 | ;; quick algorithm 336 | 337 | (c/quick-bench (diff data1 data2 {:algo :quick})) 338 | ;; ==> 339 | ;; Evaluation count : 14100 in 6 samples of 2350 calls. 340 | ;; Execution time mean : 41.946587 µs 341 | ;; Execution time std-deviation : 3.521578 µs 342 | ;; Execution time lower quantile : 37.960159 µs ( 2.5%) 343 | ;; Execution time upper quantile : 45.623306 µs (97.5%) 344 | ;; Overhead used : 9.966537 ns 345 | 346 | (c/quick-bench (diff data1 data3 {:algo :quick})) 347 | ;; ==> 348 | ;; Evaluation count : 13794 in 6 samples of 2299 calls. 349 | ;; Execution time mean : 45.373427 µs 350 | ;; Execution time std-deviation : 2.745173 µs 351 | ;; Execution time lower quantile : 42.548519 µs ( 2.5%) 352 | ;; Execution time upper quantile : 49.367947 µs (97.5%) 353 | ;; Overhead used : 9.966537 ns 354 | 355 | (c/quick-bench (diff data1 data4 {:algo :quick})) 356 | ;; ==> 357 | ;; Evaluation count : 4674 in 6 samples of 779 calls. 358 | ;; Execution time mean : 135.947273 µs 359 | ;; Execution time std-deviation : 10.746898 µs 360 | ;; Execution time lower quantile : 124.835175 µs ( 2.5%) 361 | ;; Execution time upper quantile : 150.795063 µs (97.5%) 362 | ;; Overhead used : 9.966537 ns 363 | (e/edit-distance (diff data1 data4 {:algo :quick})) 364 | ;; ==> 36 365 | (e/get-size (diff data1 data4)) 366 | ;; ==> 217 367 | (diff data1 data4) 368 | ;; [[[0] :-] [[0] :-] [[0] :-] [[0 :y1] :-] [[0 :type] :r "rect"] [[0 :borderWidth] :r 1] [[0 :label] :-] [[0 :x1] :-] [[0 :y2] :-] [[0 :x2] :-] [[0 :y] :+ 13] [[0 :r] :+ 0] [[0 :width] :+ 262] [[0 :x] :+ 19] [[0 :height] :+ 101] [[1 :y] :r 122] [[1 :color] :r "#0000FF"] [[1 :fill] :r {:r 256, :g 0, :b 0, :a 0.5}] [[1 :width] :r 10] [[1 :type] :r "textBlock"] [[1 :size] :r "24px"] [[1 :weight] :r "bold"] [[1 :x] :r 12] [[1 :height] :r 25.19999999999999] [[1 :text] :r "DojoX Drawing Rocks"] [[2 :points] :-] [[2 :type] :r "text"] [[2 :y] :+ 208] [[2 :family] :+ "sans-serif"] [[2 :width] :+ 200] [[2 :size] :+ "18px"] [[2 :pad] :+ 3] [[2 :weight] :+ "normal"] [[2 :x] :+ 12] [[2 :height] :+ 25.19999999999999] [[2 :text] :+ "This is just text"]] 369 | 370 | 371 | (def old (-> "resources/old.edn" 372 | #?(:clj slurp :cljs com/vslurp) 373 | #?(:clj read-string :cljs reader/read-string))) 374 | (def new (-> "resources/new.edn" 375 | #?(:clj slurp :cljs com/vslurp) 376 | #?(:clj read-string :cljs reader/read-string))) 377 | (diff old new {:str-diff :word :algo :quick}) 378 | 379 | 380 | 381 | 382 | ) 383 | -------------------------------------------------------------------------------- /src/editscript/diff/a_star.cljc: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Copyright (c) Huahai Yang. All rights reserved. 3 | ;; The use and distribution terms for this software are covered by the 4 | ;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 5 | ;; which can be found in the file LICENSE at the root of this distribution. 6 | ;; By using this software in any fashion, you are agreeing to be bound by 7 | ;; the terms of this license. 8 | ;; You must not remove this notice, or any other, from this software. 9 | ;; 10 | 11 | (ns ^:no-doc editscript.diff.a-star 12 | (:require [editscript.edit :as e] 13 | [editscript.util.pairing :as pa] 14 | [editscript.util.index :as i] 15 | [editscript.util.common :as co] 16 | #?(:cljs [goog.math.Long :refer [getMaxValue]])) 17 | #?(:clj (:import [clojure.lang Keyword] 18 | [java.io Writer] 19 | [java.lang Comparable] 20 | [editscript.util.index Node]) 21 | :cljr (:import [clojure.lang Keyword] 22 | [editscript.util.index Node]))) 23 | 24 | #?(:clj (set! *warn-on-reflection* true)) 25 | #?(:cljr (set! *warn-on-reflection* true)) 26 | #?(:clj (set! *unchecked-math* :warn-on-boxed)) 27 | 28 | ;; diffing 29 | 30 | (defn- coord-hash [a b] (co/szudzik (i/get-order a) (i/get-order b))) 31 | 32 | #?(:clj 33 | (deftype Coord [^Node a 34 | ^Node b] 35 | ;; Java's native hash is too slow, 36 | ;; overriding hashCode significantly speeds things up 37 | Object 38 | (hashCode [_] (coord-hash a b)) 39 | (equals [_ that] 40 | (and (= (i/get-order a) (i/get-order (.-a ^Coord that))) 41 | (= (i/get-order b) (i/get-order (.-b ^Coord that))))) 42 | (toString [_] 43 | (str "[" (i/get-value a) "," (i/get-value b) "]")) 44 | 45 | Comparable 46 | (compareTo [this that] 47 | (- (.hashCode this) (.hashCode that)))) 48 | :cljr 49 | (deftype Coord [^Node a 50 | ^Node b] 51 | ;; Java's native hash is too slow, 52 | ;; overriding hashCode significantly speeds things up 53 | Object 54 | (GetHashCode [_] (coord-hash a b)) 55 | (Equals [_ that] 56 | (and (= (i/get-order a) (i/get-order (.-a ^Coord that))) 57 | (= (i/get-order b) (i/get-order (.-b ^Coord that))))) 58 | (ToString [_] 59 | (str "[" (i/get-value a) "," (i/get-value b) "]")) 60 | 61 | IComparable 62 | (CompareTo [this that] 63 | (- (.GetHashCode this) (.GetHashCode that)))) 64 | 65 | :cljs 66 | (deftype Coord [^Node a 67 | ^Node b] 68 | IHash 69 | (-hash [_] (coord-hash a b)) 70 | 71 | IEquiv 72 | (-equiv [_ that] 73 | (and (= (i/get-order a) (i/get-order (.-a ^Coord that))) 74 | (= (i/get-order b) (i/get-order (.-b ^Coord that))))) 75 | 76 | IComparable 77 | (-compare [this that] 78 | (- (-hash this) (-hash that))))) 79 | 80 | (defn- get-coord [^Coord coord] [(.-a coord) (.-b coord)]) 81 | 82 | (defprotocol IStep 83 | (operator [this] "Operator to try") 84 | (current [this] "Starting pair of nodes") 85 | (neighbor [this] "Destination pair of nodes")) 86 | 87 | (deftype Step [^Keyword op 88 | ^Coord cur 89 | ^Coord nbr] 90 | IStep 91 | (operator [_] op) 92 | (current [_] cur) 93 | (neighbor [_] nbr)) 94 | 95 | #?(:clj 96 | (defmethod print-method Step 97 | [x ^Writer writer] 98 | (print-method {:op (operator x) 99 | :cur (current x) 100 | :nbr (neighbor x)} 101 | writer))) 102 | 103 | (defn- get-step 104 | [step] 105 | ((juxt operator current neighbor) step)) 106 | 107 | (defprotocol IState 108 | (get-came [this] "Get the local succession map") 109 | (set-came [this came] "Set the local succession map") 110 | (get-open [this] "Get the open priority queue") 111 | (set-open [this open] "Set the open priority queue") 112 | (get-g [this] "Get the g cost map") 113 | (set-g [this g] "Set the g cost map")) 114 | 115 | (deftype State [^:unsynchronized-mutable came 116 | ^:unsynchronized-mutable open 117 | ^:unsynchronized-mutable g] 118 | IState 119 | (get-came [_] came) 120 | (set-came [this c] (set! came c) this) 121 | (get-open [_] open) 122 | (set-open [this o] (set! open o) this) 123 | (get-g [_] g) 124 | (set-g [this got] (set! g got) this)) 125 | 126 | (defn- get-state 127 | [state] 128 | ((juxt get-came get-open get-g) state)) 129 | 130 | (defn- access-g 131 | [g cur] 132 | (get g cur #?(:clj Long/MAX_VALUE 133 | :cljr Int64/MaxValue 134 | :cljs (getMaxValue)))) 135 | 136 | (declare diff*) 137 | 138 | (defn- compute-cost 139 | [^Coord cur came g op opts] 140 | (let [^long gc (access-g g cur)] 141 | (case op 142 | := gc 143 | ;; delete only cost 1, for not including deleted data in script 144 | :- (inc gc) 145 | ;; these cost the size of included data, plus 1 146 | (:a :i) (let [sb (i/get-size (.-b cur))] 147 | (+ gc (inc ^long sb))) 148 | :r (+ gc ^long (diff* (.-a cur) (.-b cur) came opts))))) 149 | 150 | (defn- heuristic 151 | "A simplistic but optimistic estimate of the cost to reach goal when at (x y). 152 | 153 | For nested structure, multiple deletion may be merged into one. 154 | Also, because addition/replacement requires new value to be present in 155 | editscript, whereas deletion does not, we assign estimate differently. " 156 | ^long [type cur end [gx gy]] 157 | (case type 158 | (:map :set) 0 159 | (:vec :lst) (let [[na nb] (get-coord cur) 160 | [ra rb] (get-coord end) 161 | x (if (identical? ra na) gx (i/get-order na)) 162 | y (if (identical? rb nb) gy (i/get-order nb)) 163 | dy (- ^long gy ^long y) 164 | dx (- ^long gx ^long x)] 165 | (cond 166 | (== dx 0) dy 167 | (== dy 0) 1 168 | (> dx dy) 3 169 | (< dx dy) (- dy dx) 170 | :else 2)))) 171 | 172 | (defn- explore 173 | [type end came goal state step opts] 174 | (let [[came' open g] (get-state state) 175 | [op cur nbr] (get-step step) 176 | tmp-g (compute-cost cur came g op opts)] 177 | (if (>= ^long tmp-g ^long (access-g g nbr)) 178 | state 179 | (doto state 180 | (set-came (assoc! came' nbr [cur op])) 181 | (set-open (assoc open nbr 182 | (+ ^long tmp-g ^long (heuristic type nbr end goal)))) 183 | (set-g (assoc! g nbr tmp-g)))))) 184 | 185 | (defn- next-node 186 | [na ra] 187 | (or (i/get-next na) ra)) 188 | 189 | (defn- vec-frontier 190 | [end cur] 191 | (let [[ra rb] (get-coord end) 192 | [na nb] (get-coord cur) 193 | a=b (= (i/get-value na) (i/get-value nb)) 194 | x=gx (identical? na ra) 195 | x