├── .gitignore
├── LICENSE
├── README.md
├── project.clj
├── src
└── lambda_ml
│ ├── clustering
│ ├── dbscan.clj
│ ├── hierarchical.clj
│ └── k_means.clj
│ ├── core.clj
│ ├── data
│ ├── binary_tree.clj
│ └── kd_tree.clj
│ ├── decision_tree.clj
│ ├── distance.clj
│ ├── ensemble.clj
│ ├── examples
│ ├── kaggle
│ │ ├── digit_recognizer.clj
│ │ ├── march_madness.clj
│ │ └── titanic.clj
│ └── worksheets
│ │ ├── dbscan.clj
│ │ ├── decision_tree.clj
│ │ ├── hierarchical.clj
│ │ └── k_means.clj
│ ├── factorization.clj
│ ├── metrics.clj
│ ├── naive_bayes.clj
│ ├── nearest_neighbors.clj
│ ├── neural_network.clj
│ ├── random_forest.clj
│ ├── regression.clj
│ └── util.clj
└── test
└── lambda_ml
├── clustering
├── dbscan_test.clj
├── hierarchical_test.clj
└── k_means_test.clj
├── core_test.clj
├── data
├── binary_tree_test.clj
└── kd_tree_test.clj
├── decision_tree_test.clj
├── distance_test.clj
├── ensemble_test.clj
├── factorization_test.clj
├── metrics_test.clj
├── naive_bayes_test.clj
├── nearest_neighbors_test.clj
├── neural_network_test.clj
├── random_forest_test.clj
└── regression_test.clj
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /classes
3 | /checkouts
4 | pom.xml
5 | pom.xml.asc
6 | *.jar
7 | *.class
8 | /.lein-*
9 | /.nrepl-port
10 | .hgignore
11 | .hg/
12 | *~
13 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2015-2018 Kelvin Jiang
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 |
21 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # lambda-ml
2 |
3 | A small machine learning library aimed at providing simple, concise
4 | implementations of machine learning techniques and utilities. It is written in
5 | Lisp (using the implementation du jour, Clojure) to maximize expressiveness and
6 | enjoyment.
7 |
8 | ## Installation
9 |
10 | Add the following dependency to your project:
11 |
12 | [](https://clojars.org/lambda-ml)
13 |
14 | ## Documentation
15 |
16 | * [API Docs](https://cloudkj.github.io/lambda-ml/)
17 |
18 | ### Supervised Learning Algorithms
19 |
20 | * [Artificial neural network](https://cloudkj.github.io/lambda-ml/lambda-ml.neural-network.html)
21 | * [Decision tree](https://cloudkj.github.io/lambda-ml/lambda-ml.decision-tree.html)
22 | * [Ensemble methods](https://cloudkj.github.io/lambda-ml/lambda-ml.ensemble.html)
23 | * [K-nearest neighbors](https://cloudkj.github.io/lambda-ml/lambda-ml.nearest-neighbors.html)
24 | * [Linear regression](https://cloudkj.github.io/lambda-ml/lambda-ml.regression.html)
25 | * [Logistic regression](https://cloudkj.github.io/lambda-ml/lambda-ml.regression.html)
26 | * [Naive Bayes](https://cloudkj.github.io/lambda-ml/lambda-ml.naive-bayes.html)
27 | * [Random forest](https://cloudkj.github.io/lambda-ml/lambda-ml.random-forest.html)
28 |
29 | ### Unsupervised Learning Algorithms
30 |
31 | * [DBSCAN](https://cloudkj.github.io/lambda-ml/lambda-ml.clustering.dbscan.html)
32 | * [Hierarchical agglomerative clustering](https://cloudkj.github.io/lambda-ml/lambda-ml.clustering.hierarchical.html)
33 | * [K-means](https://cloudkj.github.io/lambda-ml/lambda-ml.clustering.k-means.html)
34 | * [Non-negative matrix factorization](https://cloudkj.github.io/lambda-ml/lambda-ml.factorization.html)
35 |
36 | ## Examples
37 |
38 | * [Classifying handwritten digits with an artificial neural network](http://viewer.gorilla-repl.org/view.html?source=github&user=cloudkj&repo=lambda-ml&path=src/lambda_ml/examples/kaggle/digit_recognizer.clj)
39 | * [DBSCAN example](http://viewer.gorilla-repl.org/view.html?source=github&user=cloudkj&repo=lambda-ml&path=src/lambda_ml/examples/worksheets/dbscan.clj)
40 | * [Decision tree example](http://viewer.gorilla-repl.org/view.html?source=github&user=cloudkj&repo=lambda-ml&path=src/lambda_ml/examples/worksheets/decision_tree.clj)
41 | * [Hierarchical agglomerative clustering example](http://viewer.gorilla-repl.org/view.html?source=github&user=cloudkj&repo=lambda-ml&path=src/lambda_ml/examples/worksheets/hierarchical.clj)
42 | * [K-means example](http://viewer.gorilla-repl.org/view.html?source=github&user=cloudkj&repo=lambda-ml&path=src/lambda_ml/examples/worksheets/k_means.clj)
43 | * [Predicting survival on the Titanic with logistic regression](http://viewer.gorilla-repl.org/view.html?source=github&user=cloudkj&repo=lambda-ml&path=src/lambda_ml/examples/kaggle/titanic.clj)
44 |
45 | ## License
46 |
47 | Copyright © 2015-2018
48 |
--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
1 | (defproject lambda-ml "0.1.1"
2 | :description "A small machine learning library aimed at providing simple, concise implementations of machine learning techniques and utilities."
3 | :url "http://github.com/cloudkj/lambda-ml"
4 | :license {:name "MIT License"
5 | :url "http://opensource.org/licenses/MIT"}
6 | :plugins [[lein-ancient "0.6.15"]
7 | [lein-codox "0.10.1"]
8 | [lein-exec "0.3.6"]
9 | [lein-gorilla "0.4.0"]]
10 | :codox {:metadata {:doc/format :markdown}
11 | :namespaces [#"^lambda-ml\.(?!examples)"]
12 | :source-uri "https://github.com/cloudkj/lambda-ml/blob/master/{filepath}#L{line}"}
13 | :jvm-opts ["-Xmx8g"]
14 | :dependencies [[org.clojure/clojure "1.8.0"]
15 | [org.clojure/data.csv "0.1.4"]
16 | [org.clojure/data.priority-map "0.0.10"]
17 | [org.clojure/math.numeric-tower "0.0.4"]
18 | [gorilla-plot "0.1.4"]
19 | [net.mikera/core.matrix "0.62.0"]
20 | [net.mikera/vectorz-clj "0.47.0"]])
21 |
--------------------------------------------------------------------------------
/src/lambda_ml/clustering/dbscan.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.clustering.dbscan
2 | "Density-based clustering with DBSCAN.
3 |
4 | Example usage:
5 | ```
6 | (def data [[2 10] [2 5] [8 4] [5 8] [7 5] [6 4] [1 2] [4 9]])
7 | (let [epsilon 4.0
8 | min-pts 2]
9 | (dbscan lambda-ml.distance/euclidean epsilon min-pts data))
10 | ;;=> {[8 4] 1, [6 4] 1, [7 5] 1, [5 8] 2, [4 9] 2}
11 | ```"
12 | (:require [clojure.set :as set]
13 | [lambda-ml.data.binary-tree :as bt]
14 | [lambda-ml.data.kd-tree :as kd]))
15 |
16 | (defn make-proximity-search
17 | "Given a distance function f and a coll of points, returns a function that,
18 | given a distance and a query point, returns a sequence of all points that are
19 | within the given distance of the query point."
20 | [f points]
21 | (let [dims (count (first points))
22 | t (kd/make-tree dims points)]
23 | (fn search
24 | ([dist query]
25 | (search dist query t 0 (list)))
26 | ([dist query tree depth cand]
27 | (if (nil? tree)
28 | cand
29 | (let [[node left right] ((juxt bt/get-value bt/get-left bt/get-right) tree)
30 | dim (mod depth dims)
31 | [near far] (if (<= (nth query dim) (nth node dim)) [left right] [right left])]
32 | (cond->> cand
33 | ;; Add current node if it's within proximity
34 | (<= (f query node) dist)
35 | (cons node)
36 | ;; Explore near branch
37 | true
38 | (search dist query near (inc depth))
39 | ;; Optionally, explore far branch
40 | (< (f query node dim) dist)
41 | (search dist query far (inc depth)))))))))
42 |
43 | (defn dbscan
44 | "Returns a clustering of points represented as a map from cluster id to a set
45 | of points, using the epsilon parameter for neighborhood lookups and forming
46 | clusters with at least min-pts density."
47 | [f epsilon min-pts points]
48 | (let [search (make-proximity-search f points)]
49 | (loop [unvisited points
50 | cluster-id 0
51 | visited #{}
52 | clusters {}]
53 | (let [point (first unvisited)]
54 | (cond
55 | ;; No more points
56 | (nil? point)
57 | clusters
58 | ;; Already visited
59 | (visited point)
60 | (recur (rest unvisited) cluster-id visited clusters)
61 | ;; Visit point
62 | :else
63 | (let [visited (conj visited point)
64 | neighbors (search epsilon point)]
65 | (if (< (count neighbors) min-pts)
66 | ;; Noise
67 | (recur (rest unvisited) cluster-id visited clusters)
68 | ;; Expand cluster
69 | (let [cluster-id (+ 1 cluster-id)
70 | ;; Assign point to cluster
71 | clusters (assoc clusters point cluster-id)
72 | ;; Find all neighbors-of-neighbors
73 | expanded (reduce (fn [n i]
74 | (if (visited i)
75 | (conj n i)
76 | (let [nn (search epsilon i)]
77 | (if (< (count nn) min-pts)
78 | (conj n i)
79 | (set/union n (set nn))))))
80 | #{}
81 | neighbors)]
82 | (recur (rest unvisited)
83 | cluster-id
84 | ;; Mark expanded neighbors as visited
85 | (reduce conj visited expanded)
86 | ;; Assign expanded neighbors to clusters
87 | (reduce (fn [c i] (if (c i) c (assoc c i cluster-id)))
88 | clusters
89 | expanded))))))))))
90 |
--------------------------------------------------------------------------------
/src/lambda_ml/clustering/hierarchical.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.clustering.hierarchical
2 | "Hierarchical agglomerative clustering.
3 |
4 | Example usage:
5 | ```
6 | (def data [[1 1 1 0 1 0 0 1 1 1]
7 | [1 1 0 1 1 0 0 0 0 1]
8 | [0 1 1 0 1 0 0 1 0 0]
9 | [0 0 0 1 0 1 0 0 0 0]
10 | [1 1 1 0 1 0 1 1 1 0]
11 | [0 1 0 1 1 0 0 0 0 1]
12 | [0 1 1 0 1 1 0 1 1 0]])
13 | (agglomerative-clustering single-link lambda-ml.distance/euclidean data)
14 | ;;=> [[1 5] [0 4] [2 6] [0 2] [0 1] [0 3]]
15 | ```"
16 | (:require [clojure.data.priority-map :as pmap]))
17 |
18 | (defn pairwise-distances
19 | "Returns a map representing the distance matrix between all points."
20 | [f points]
21 | (->> (map-indexed vector points)
22 | (reduce (fn [distances [i pi]]
23 | (->> (map-indexed vector points)
24 | (reduce (fn [d [j pj]]
25 | (assoc-in d [i j] {:distance (f pi pj) :index j}))
26 | distances)))
27 | {})))
28 |
29 | (defn distance-queues
30 | "Returns a map of each point to a priority queue of all other points sorted by
31 | increasing distance."
32 | [distances]
33 | (->> (keys distances)
34 | (reduce (fn [queues i]
35 | (->> (vals (dissoc (get distances i) i))
36 | (reduce (fn [q ci] (assoc q (:index ci) ci))
37 | (pmap/priority-map-keyfn :distance))
38 | (assoc queues i)))
39 | {})))
40 |
41 | (defn single-link
42 | "Returns the single-link distance between point x and the merged cluster
43 | containing points y and z, which is the distance between x and the closest
44 | point in the cluster."
45 | [distances x y z]
46 | (min (get-in distances [x y :distance])
47 | (get-in distances [x z :distance])))
48 |
49 | (defn complete-link
50 | "Returns the complete-link distance between point x and the merged cluster
51 | containing points y and z, which is the distance between x and the farthest
52 | point in the cluster."
53 | [distances x y z]
54 | (max (get-in distances [x y :distance])
55 | (get-in distances [x z :distance])))
56 |
57 | (defn agglomerative-clustering
58 | "Returns a clustering of points represented as a seq of merges, where each
59 | merge is a pair of indexes indicating the two points to be merged at each
60 | step, using the linkage function link and distance function f."
61 | [link f points]
62 | (loop [distances (pairwise-distances f points)
63 | queues (distance-queues distances)
64 | active (reduce #(assoc %1 %2 true) {} (range (count points)))
65 | merges []]
66 | (if (<= (count active) 1)
67 | merges
68 | (let [;; Find the two most similar clusters
69 | [_ k1 k2] (->> (keys active)
70 | (reduce (fn [[min-dist k1 k2] i]
71 | (let [[k {dist :distance}] (peek (get queues i))]
72 | (if (< dist min-dist)
73 | [dist i k]
74 | [min-dist k1 k2])))
75 | [Double/MAX_VALUE nil nil]))
76 | ;; Clear queue for k1
77 | queues (assoc queues k1 (pmap/priority-map-keyfn :distance))
78 | ;; Update distances
79 | [distances queues] (->> (keys active)
80 | (filter #(and (not (= k1 %)) (not (= k2 %))))
81 | (reduce (fn [[d q] i]
82 | (let [dist (link distances i k1 k2)
83 | d (-> (assoc-in d [i k1 :distance] dist)
84 | (assoc-in [k1 i :distance] dist))
85 | q (-> (update q i #(dissoc % k1))
86 | (update i #(dissoc % k2))
87 | (update i #(assoc % k1 (get-in d [i k1])))
88 | (update k1 #(assoc % i (get-in d [k1 i]))))]
89 | [d q]))
90 | [distances queues]))]
91 | (recur distances
92 | queues
93 | (dissoc active k2) ;; Deactivate cluster k2
94 | (conj merges [k1 k2])))))) ;; Merge k1 and k2
95 |
--------------------------------------------------------------------------------
/src/lambda_ml/clustering/k_means.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.clustering.k-means
2 | "K-means clustering.
3 |
4 | Example usage:
5 | ```
6 | (def data [[1 1] [1.5 2] [3 4] [5 7] [3.5 5] [4.5 5] [3.5 4.5]])
7 | (let [k 2]
8 | (-> (k-means k lambda-ml.distance/euclidean data)
9 | (nth 100)))
10 | ;;=> {0 ([3.5 4.5] [4.5 5] [3.5 5] [5 7] [3 4]), 1 ([1.5 2] [1 1])}
11 | ```"
12 | (:require [lambda-ml.core :as c]))
13 |
14 | (defn assign-clusters
15 | "Returns cluster assignments based on the closest centroid to each point."
16 | [f mu x]
17 | (let [mu-indexed (map-indexed vector mu)]
18 | (loop [points x
19 | clusters {}]
20 | (if (empty? points)
21 | clusters
22 | (let [xi (first points)
23 | ;; Find the index of the closest centroid
24 | index (first (apply min-key (comp (partial f xi) second) mu-indexed))
25 | cluster (or (clusters index) (list))]
26 | (recur (rest points)
27 | (assoc clusters index (conj cluster xi))))))))
28 |
29 | (defn update-centroids
30 | "Returns updated centroids based on the average of points in each cluster."
31 | [k clusters]
32 | (map (fn [index]
33 | (->> (clusters index)
34 | (apply map +)
35 | (map #(/ % (count (clusters index))))))
36 | (range k)))
37 |
38 | (defn k-means-seq
39 | [k f points centroids]
40 | (lazy-seq (let [clusters (assign-clusters f centroids points)]
41 | (cons clusters
42 | (k-means-seq k f points (update-centroids k clusters))))))
43 |
44 | (defn k-means
45 | "Returns a lazy sequence of a clustering of points using the distance function
46 | f, represented as a map from cluster id to a collection of points, at each
47 | iteration of k-means."
48 | [k f points]
49 | (k-means-seq k f points (c/sample-without-replacement points k)))
50 |
--------------------------------------------------------------------------------
/src/lambda_ml/core.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.core
2 | (:require [clojure.math.numeric-tower :refer :all]))
3 |
4 | (def vector-with-intercept (comp vec (partial cons 1.0)))
5 |
6 | (defn dot-product
7 | [a b]
8 | (reduce + (map * a b)))
9 |
10 | (defn l2-norm
11 | [a]
12 | (sqrt (dot-product a a)))
13 |
14 | (defn mean
15 | [coll]
16 | (/ (reduce + coll) (count coll)))
17 |
18 | (defn median
19 | [coll]
20 | (let [sorted (sort coll)
21 | c (count coll)
22 | mid (quot c 2)]
23 | (if (odd? c)
24 | (nth sorted mid)
25 | (/ (+ (nth sorted (dec mid)) (nth sorted mid)) 2))))
26 |
27 | (defn mode
28 | [coll]
29 | (first (apply max-key second (frequencies coll))))
30 |
31 | (defn random-partition
32 | "Returns n partitions of elements randomly selected from coll."
33 | [n coll]
34 | (let [size (quot (count coll) n)
35 | coll (shuffle coll)]
36 | (partition size size [] coll)))
37 |
38 | (defn sample-with-replacement
39 | "Returns n randomly selected elements, with replacement, from coll."
40 | ([coll n]
41 | (sample-with-replacement coll n (list)))
42 | ([coll n s]
43 | (cond (<= n 0) s
44 | (not (vector? coll)) (sample-with-replacement (vec coll) n s)
45 | :else
46 | (let [index (rand-int (count coll))]
47 | (sample-with-replacement coll
48 | (dec n)
49 | (conj s (nth coll index)))))))
50 |
51 | (defn sample-without-replacement
52 | "Returns n randomly selected elements, without replacement, from coll."
53 | ([coll n]
54 | (sample-without-replacement coll n (list)))
55 | ([coll n s]
56 | (cond (<= n 0) s
57 | (empty? coll) s
58 | (>= n (count coll)) coll
59 | (not (vector? coll)) (sample-without-replacement (vec coll) n s)
60 | :else
61 | (let [index (rand-int (count coll))]
62 | (sample-without-replacement (subvec (assoc coll index (first coll)) 1)
63 | (dec n)
64 | (conj s (nth coll index)))))))
65 |
66 | ;; Common functions
67 |
68 | (defn relu
69 | [z]
70 | (max 0 z))
71 |
72 | (defn relu'
73 | [z]
74 | (if (> z 0) 1 0))
75 |
76 | (defn sigmoid
77 | [z]
78 | (/ 1 (+ 1 (expt Math/E (- z)))))
79 |
80 | (defn sigmoid'
81 | [z]
82 | (* z (- 1 z)))
83 |
84 | (defn derivative
85 | [f]
86 | (cond
87 | (= f relu) relu'
88 | (= f sigmoid) sigmoid'))
89 |
--------------------------------------------------------------------------------
/src/lambda_ml/data/binary_tree.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.data.binary-tree)
2 |
3 | (defn make-tree
4 | ([val]
5 | (make-tree val nil nil))
6 | ([val left right]
7 | (vector val left right)))
8 |
9 | (defn get-value
10 | [tree]
11 | (nth tree 0))
12 |
13 | (defn get-left
14 | [tree]
15 | (nth tree 1))
16 |
17 | (defn get-right
18 | [tree]
19 | (nth tree 2))
20 |
21 | (defn get-path
22 | [tree paths]
23 | (->> paths
24 | (map (fn [path]
25 | (cond (= path :left) 1
26 | (= path :right) 2
27 | :else (throw (IllegalArgumentException. "Invalid tree path")))))
28 | (get-in tree)))
29 |
30 | (defn leaf?
31 | [tree]
32 | (and (nil? (get-left tree)) (nil? (get-right tree))))
33 |
34 | (defn print-tree
35 | ([tree]
36 | (print-tree tree 0))
37 | ([tree level]
38 | (println (str (apply str (repeat level " "))
39 | (let [val (get-value tree)]
40 | (or (meta val) val))))
41 | (when (not (nil? (get-left tree)))
42 | (print-tree (get-left tree) (inc level)))
43 | (when (not (nil? (get-right tree)))
44 | (print-tree (get-right tree) (inc level)))))
45 |
46 | (defn adjacency-matrix
47 | "Returns an adjacency matrix representation of a binary tree."
48 | ([tree]
49 | (adjacency-matrix tree {}))
50 | ([tree matrix]
51 | (let [left (get-left tree)
52 | matrix (if (nil? left) matrix (adjacency-matrix left matrix))
53 | edges (if (nil? left) [] [(dec (count matrix))])
54 | right (get-right tree)
55 | matrix (if (nil? right) matrix (adjacency-matrix right matrix))
56 | edges (if (nil? right) edges (conj edges (dec (count matrix))))]
57 | (assoc matrix (count matrix) {:edges edges :value (get-value tree)}))))
58 |
--------------------------------------------------------------------------------
/src/lambda_ml/data/kd_tree.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.data.kd-tree
2 | (:require [lambda-ml.data.binary-tree :as bt]))
3 |
4 | ;; K-d tree
5 |
6 | (defn make-tree
7 | "Returns a k-d tree, with dims as the number of dimensions, for the given
8 | nodes. Optionally, a function f can be supplied and used to return the
9 | k-dimensional point for a given node. Otherwise, the node itself is assumed to
10 | be the k-dimensional point."
11 | ([dims nodes]
12 | (make-tree dims nodes identity))
13 | ([dims nodes f]
14 | (make-tree dims nodes f 0))
15 | ([dims nodes f depth]
16 | (if (empty? nodes)
17 | nil
18 | (let [dim (fn [node] (nth (f node) (mod depth dims)))
19 | sorted (sort-by dim nodes)
20 | median (quot (count sorted) 2)]
21 | (bt/make-tree (nth sorted median)
22 | (make-tree dims (take median sorted) f (inc depth))
23 | (make-tree dims (drop (+ median 1) sorted) f (inc depth)))))))
24 |
--------------------------------------------------------------------------------
/src/lambda_ml/decision_tree.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.decision-tree
2 | "Decision tree learning using the Classification and Regression Trees (CART)
3 | algorithm.
4 |
5 | Example usage;
6 | ```
7 | (def data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]])
8 | (def fit
9 | (let [min-split 2
10 | min-leaf 1
11 | max-features 2]
12 | (-> (make-classification-tree gini-impurity min-split min-leaf max-features)
13 | (decision-tree-fit data))))
14 | (decision-tree-predict fit (map butlast data))
15 | ;;=> (0 1 1 0)
16 | ```"
17 | (:require [lambda-ml.core :as c]
18 | [lambda-ml.data.binary-tree :as bt]))
19 |
20 | ;; Cost functions
21 |
22 | (defn gini-impurity
23 | "Returns the Gini impurity of a seq of labels."
24 | [labels]
25 | (let [total (count labels)]
26 | (->> (vals (frequencies labels))
27 | (map #(/ % total))
28 | (map #(* % (- 1 %)))
29 | (reduce +))))
30 |
31 | (defn mean-squared-error
32 | "Returns the mean squared error for a seq of predictions."
33 | [labels predictions]
34 | (->> (map - labels predictions)
35 | (map #(* % %))
36 | (reduce +)
37 | (* (/ 1 (count predictions)))))
38 |
39 | (defn classification-weighted-cost
40 | [y1 y2 f g]
41 | (let [n1 (count y1)
42 | n2 (count y2)]
43 | ;; Classification cost doesn't take the prediction value into account
44 | (cond-> 0
45 | (> n1 0) (+ (* (/ n1 (+ n1 n2)) (f y1)))
46 | (> n2 0) (+ (* (/ n2 (+ n1 n2)) (f y2))))))
47 |
48 | (defn regression-weighted-cost
49 | [y1 y2 f g]
50 | (let [n1 (count y1)
51 | n2 (count y2)]
52 | (cond-> 0
53 | (> n1 0) (+ (* (/ n1 (+ n1 n2))
54 | (f y1 (repeat n1 (g y1)))))
55 | (> n2 0) (+ (* (/ n2 (+ n1 n2))
56 | (f y2 (repeat n2 (g y2))))))))
57 |
58 | ;; Tree splitting
59 |
60 | (defn categorical-partitions
61 | "Given a seq of k distinct values, returns the 2^{k-1}-1 possible binary
62 | partitions of the values into sets."
63 | [vals]
64 | (if (<= (count vals) 1)
65 | []
66 | (reduce (fn [p [s1 s2]]
67 | (conj p
68 | [(conj s1 (first vals)) s2]
69 | [(conj s2 (first vals)) s1]))
70 | (vector [(hash-set (first vals)) (set (rest vals))])
71 | (categorical-partitions (rest vals)))))
72 |
73 | (defn numeric-partitions
74 | "Given a seq of k distinct numeric values, returns k-1 possible binary
75 | partitions of the values by taking the average of consecutive elements in the
76 | sorted seq of values."
77 | [vals]
78 | (loop [partitions []
79 | v (sort vals)]
80 | (if (<= (count v) 1)
81 | partitions
82 | (recur (conj partitions (/ (+ (first v) (second v)) 2))
83 | (rest v)))))
84 |
85 | (defn splitters
86 | "Returns a seq of all possible splitters for feature i. A splitter is a
87 | predicate function that evaluates to true if an example belongs in the left
88 | subtree, or false if an example belongs in the right subtree, based on the
89 | splitting criterion."
90 | [x i]
91 | (let [domain (distinct (map #(nth % i) x))
92 | val (first domain)]
93 | (cond (number? val) (->> (numeric-partitions domain)
94 | (map (fn [s]
95 | (with-meta
96 | (fn [x] (<= (nth x i) s))
97 | {:decision (float s)}))))
98 | (or (keyword? val)
99 | (string? val)) (->> (categorical-partitions domain)
100 | (map (fn [[s1 s2]]
101 | (with-meta
102 | (fn [x] (contains? s1 (nth x i)))
103 | {:decision [s1 s2]}))))
104 | :else (throw (IllegalArgumentException. "Invalid feature type")))))
105 |
106 | (defn best-splitter
107 | "Returns the splitter for the given data that minimizes a weighted cost
108 | function, or returns nil if no splitter exists."
109 | [model x y]
110 | (let [{cost :cost prediction :prediction weighted :weighted
111 | min-leaf :min-leaf max-features :max-features} model
112 | ;; Feature bagging - sample a subset of features to split on
113 | features (-> (range (count (first x)))
114 | (c/sample-without-replacement max-features))
115 | data (map #(conj (vec %1) %2) x y)]
116 | (->> (for [i features]
117 | (let [no-splitter [nil Double/MAX_VALUE i]]
118 | ;; Find best splitter for feature i
119 | (->> (splitters x i)
120 | (map (fn [splitter]
121 | (let [[left right] (vals (group-by splitter data))]
122 | ;; Either split would have fewer observations than required
123 | (cond (< (count left) min-leaf) no-splitter
124 | (< (count right) min-leaf) no-splitter
125 | :else (let [cost (weighted (map last left) (map last right) cost prediction)
126 | ;; Add metadata to splitter
127 | splitter (vary-meta splitter merge {:cost (float cost) :feature i})]
128 | [splitter cost i])))))
129 | (#(if (empty? %) (list no-splitter) %))
130 | (apply min-key second))))
131 | ;; Find best splitter amongst all features
132 | (reduce (fn [a b]
133 | (let [[_ c1 i1] a [_ c2 i2] b]
134 | (cond (< c1 c2) a
135 | ;; To match the CART algorithm, break ties in cost by
136 | ;; choosing splitter for feature with lower index
137 | (= c1 c2) (if (< i1 i2) a b)
138 | :else b))))
139 | (first))))
140 |
141 | ;; API
142 |
143 | (defn decision-tree-fit
144 | "Fits a decision tree to the given training data."
145 | ([model data]
146 | (decision-tree-fit model (map butlast data) (map last data)))
147 | ([model x y]
148 | (let [{cost :cost prediction :prediction weighted :weighted
149 | min-split :min-split min-leaf :min-leaf max-features :max-features} model
150 | weighted (fn [left right] (weighted left right cost prediction))]
151 | (->> (cond
152 | ;; Fewer observations than required to split a node
153 | (< (count y) min-split) (bt/make-tree (prediction y))
154 | ;; All observed labels are equivalent
155 | (apply = y) (bt/make-tree (prediction y))
156 | :else
157 | (let [splitter (best-splitter model x y)]
158 | (if (nil? splitter)
159 | (bt/make-tree (prediction y))
160 | (let [data (map #(conj (vec %1) %2) x y)
161 | split (group-by splitter data)
162 | left (get split true)
163 | right (get split false)]
164 | (bt/make-tree splitter
165 | (:parameters (decision-tree-fit model left))
166 | (:parameters (decision-tree-fit model right)))))))
167 | (assoc model :parameters)))))
168 |
169 | (defn decision-tree-predict
170 | "Predicts the values of example data using a decision tree."
171 | [model x]
172 | (let [{tree :parameters} model]
173 | (when (not (nil? tree))
174 | (letfn [(predict [t xi]
175 | (let [val (bt/get-value t)]
176 | (cond (bt/leaf? t) val
177 | (val xi) (predict (bt/get-left t) xi)
178 | :else (predict (bt/get-right t) xi))))]
179 | (map #(predict tree %) x)))))
180 |
181 | (defn print-decision-tree
182 | "Prints information about a given decision tree."
183 | [model]
184 | (println (dissoc model :parameters))
185 | (when (contains? model :parameters)
186 | (bt/print-tree (:parameters model))))
187 |
188 | (defn make-classification-tree
189 | "Returns a classification decision tree model using the given cost function."
190 | [cost min-split min-leaf max-features]
191 | {:cost cost :prediction c/mode :weighted classification-weighted-cost
192 | :min-split min-split
193 | :min-leaf min-leaf
194 | :max-features max-features})
195 |
196 | (defn make-regression-tree
197 | "Returns a regression decision tree model using the given cost function."
198 | [cost min-split min-leaf max-features]
199 | {:cost cost :prediction c/mean :weighted regression-weighted-cost
200 | :min-split min-split
201 | :min-leaf min-leaf
202 | :max-features max-features})
203 |
--------------------------------------------------------------------------------
/src/lambda_ml/distance.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.distance
2 | "Functions that compute measures of distance between values."
3 | (:require [lambda-ml.core :as c]))
4 |
5 | (defn cosine
6 | "Returns the cosine distance between two points by subtracting the cosine
7 | similarity from 1. Assumes that both points are represented as sequences of
8 | the same dimension. Given a dimension d, returns the distance between two
9 | points as if the values for all other dimensions were set to zero."
10 | ([x y]
11 | (- 1 (/ (c/dot-product x y)
12 | (* (Math/sqrt (c/dot-product x x))
13 | (Math/sqrt (c/dot-product y y))))))
14 | ([x y d]
15 | (cosine (vector (nth x d)) (vector (nth y d)))))
16 |
17 | (defn euclidean
18 | "Returns the Euclidean distance (squared) between two points. Assumes that
19 | both points are represented as sequences of the same dimension. Given a
20 | dimension d, returns the distance between two points as if the values for all
21 | other dimensions were set to zero."
22 | ([x y]
23 | (->> (map - x y)
24 | (map #(* % %))
25 | (reduce +)))
26 | ([x y d]
27 | (euclidean (vector (nth x d)) (vector (nth y d)))))
28 |
29 | (defn haversine
30 | "Returns the great-circle distance between two points represented as
31 | geographic coordinates. Given a dimension d, returns the distance between the
32 | two points as if the value for the other dimension was set to zero."
33 | ([[lat1 lng1] [lat2 lng2]]
34 | (let [r 3959.9 ; miles; km = 6372.8
35 | dlat (Math/toRadians (- lat2 lat1))
36 | dlng (Math/toRadians (- lng2 lng1))
37 | lat1 (Math/toRadians lat1)
38 | lat2 (Math/toRadians lat2)
39 | a (+ (* (Math/sin (/ dlat 2)) (Math/sin (/ dlat 2)))
40 | (* (Math/sin (/ dlng 2)) (Math/sin (/ dlng 2)) (Math/cos lat1) (Math/cos lat2)))]
41 | (* r 2 (Math/asin (Math/sqrt a)))))
42 | ([x y d]
43 | (let [other (mod (inc d) 2)]
44 | (haversine (assoc x other 0) (assoc y other 0)))))
45 |
46 | (defn jaccard
47 | "Returns the Jaccard distance between two points by subtracting the Jaccard
48 | similarity coefficient from 1. Assumes that both points are represented as
49 | sequences of the same dimension. Given a dimension d, returns the distance
50 | between two points as if the values for all other dimensions were set to zero."
51 | ([x y]
52 | (- 1 (/ (reduce + (map min x y))
53 | (reduce + (map max x y)))))
54 | ([x y d]
55 | (jaccard (vector (nth x d)) (vector (nth y d)))))
56 |
--------------------------------------------------------------------------------
/src/lambda_ml/ensemble.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.ensemble
2 | "Ensemble learning methods.
3 |
4 | Example usage:
5 | ```
6 | (def data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]])
7 | (def tree
8 | (let [min-split 2
9 | min-leaf 1
10 | max-features 2]
11 | (make-classification-tree gini-impurity min-split min-leaf max-features)))
12 | (def fit
13 | (let [rate 1.0]
14 | (-> (iterate #(add-bagging-estimator % tree decision-tree-fit decision-tree-predict)
15 | (make-bagging-classifier rate))
16 | (nth 1001)
17 | (bagging-ensemble-fit data))))
18 | (bagging-ensemble-predict fit (map butlast data))
19 | ;;=> (0 1 1 0)
20 | ```"
21 | (:require [lambda-ml.core :refer :all]))
22 |
23 | (defn bagging-ensemble-fit
24 | "Fits an ensemble of estimators using bootstrap samples of the training data
25 | for each base estimators."
26 | ([ensemble data]
27 | (let [n (* (:rate ensemble) (count data))]
28 | (->> (:estimators ensemble)
29 | (map (fn [[m f p]] (f m (sample-with-replacement data n))))
30 | (assoc ensemble :fits))))
31 | ([ensemble x y]
32 | (bagging-ensemble-fit ensemble (map concat x (map list y)))))
33 |
34 | (defn bagging-ensemble-predict
35 | "Predicts the values of example data using a bagging ensemble."
36 | [ensemble x]
37 | (->> (:fits ensemble)
38 | (map #(%1 %2 x) (map last (:estimators ensemble)))
39 | (apply map vector)
40 | (map (:aggregation ensemble))))
41 |
42 | (defn add-bagging-estimator
43 | "Adds a base estimator to an ensemble, where each estimator is defined by fit
44 | and predict functions used for training on then predicting from the provided
45 | model, respectively."
46 | [ensemble model fit predict]
47 | (->> [model fit predict]
48 | (conj (get ensemble :estimators []))
49 | (assoc ensemble :estimators)))
50 |
51 | (defn make-bagging-classifier
52 | "Returns a classifier based on an ensemble of classifiers to be fit to random
53 | samples of training data, where rate is the percent of data used to create
54 | each bootstrap sample. Predictions are aggregated across classifiers by taking
55 | the mode of predicted values."
56 | [rate]
57 | {:rate rate
58 | :aggregation mode})
59 |
60 | (defn make-bagging-regressor
61 | "Returns a regressor based on an ensemble of regressors to be fit to random
62 | samples of training data, where rate is the percent of data used to create
63 | each bootstrap sample. Predictions are aggregated across regressors by taking
64 | the mean of predicted values."
65 | [rate]
66 | {:rate rate
67 | :aggregation mean})
68 |
--------------------------------------------------------------------------------
/src/lambda_ml/examples/kaggle/march_madness.clj:
--------------------------------------------------------------------------------
1 | ;; gorilla-repl.fileformat = 1
2 |
3 | ;; **
4 | ;;; # Lambda ML Example: Kaggle March Machine Learning Mania 2016
5 | ;;;
6 | ;;; An example of using a couple different classification techniques on the data from the [March Machine Learning Mania 2016](https://www.kaggle.com/c/march-machine-learning-mania-2016) competition from Kaggle.
7 | ;;;
8 | ;;; First, lets set up our namespace.
9 | ;; **
10 |
11 | ;; @@
12 | (ns lambda-ml.examples.kaggle.march-madness
13 | (require [lambda-ml.core :refer :all]
14 | [lambda-ml.metrics :refer :all]
15 | [lambda-ml.regression :refer :all]
16 | [clojure.data.csv :as csv]
17 | [gorilla-plot.core :as plot]))
18 | ;; @@
19 | ;; =>
20 | ;;; {"type":"html","content":"nil","value":"nil"}
21 | ;; <=
22 |
23 | ;; @@
24 | (def teams
25 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/march-machine-learning-mania-2016-v2/Teams.csv")]
26 | (doall
27 | (->> (rest (csv/read-csv in))
28 | (reduce (fn [m [id name]] (assoc m (read-string id) name)) {})))))
29 |
30 | (def slots
31 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/march-machine-learning-mania-2016-v2/TourneySlots.csv")]
32 | (doall
33 | (->> (rest (csv/read-csv in))
34 | (reduce (fn [m [season slot hi lo]]
35 | (let [keys [(read-string season) slot]
36 | val [hi lo]]
37 | (assoc-in m keys val)))
38 | {})))))
39 |
40 | (def seeds
41 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/march-machine-learning-mania-2016-v2/TourneySeeds.csv")]
42 | (doall
43 | (->> (rest (csv/read-csv in))
44 | (reduce (fn [m [season seed team]]
45 | (assoc-in m [(read-string season) seed] (read-string team)))
46 | {})))))
47 |
48 | (defn parse-seed
49 | [s]
50 | (Integer/parseInt (subs s 1 3)))
51 |
52 | (def seeds-index
53 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/march-machine-learning-mania-2016-v2/TourneySeeds.csv")]
54 | (doall
55 | (->> (rest (csv/read-csv in))
56 | (reduce (fn [m [season seed team]]
57 | (assoc-in m (map read-string [season team]) (parse-seed seed)))
58 | {})))))
59 |
60 | (def tourney-results
61 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/march-machine-learning-mania-2016-v2/TourneyCompactResults.csv")]
62 | (doall
63 | (->> (rest (csv/read-csv in))
64 | (reduce (fn [m row]
65 | (let [[season daynum wteam wscore lteam lscore wloc numot] (map read-string row)]
66 | (-> m
67 | (assoc-in [season wteam lteam] {:score wscore :oppscore lscore})
68 | (assoc-in [season lteam wteam] {:score lscore :oppscore wscore}))))
69 | {})))))
70 | ;; @@
71 | ;; =>
72 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.march-madness/tourney-results","value":"#'lambda-ml.examples.kaggle.march-madness/tourney-results"}
73 | ;; <=
74 |
75 | ;; @@
76 | (def stats
77 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/march-machine-learning-mania-2016-v2/RegularSeasonCompactResults.csv")]
78 | (doall
79 | (->> (rest (csv/read-csv in))
80 | (reduce (fn [m row]
81 | (let [[season daynum wteam wscore lteam lscore wloc numot] (map read-string row)]
82 | (-> m
83 | (update-in [season wteam] (fn [s]
84 | (-> (or s {:g 0 :w 0 :l 0 :pf 0 :pa 0 :streak 0})
85 | (update-in [:w] inc)
86 | (update-in [:streak] (fn [x] (if (> x 0) (inc x) 1)))
87 | (update-in [:pf] (partial + wscore))
88 | (update-in [:pa] (partial + lscore)))))
89 | (update-in [season lteam] (fn [s]
90 | (-> (or s {:g 0 :w 0 :l 0 :pf 0 :pa 0 :streak 0})
91 | (update-in [:l] inc)
92 | (update-in [:streak] (fn [x] (if (< x 0) (dec x) -1)))
93 | (update-in [:pf] (partial + lscore))
94 | (update-in [:pa] (partial + wscore))))))))
95 | {})))))
96 |
97 | (defn win-percentage
98 | [year team]
99 | (let [w (get-in stats [year team :w])
100 | l (get-in stats [year team :l])]
101 | (float (/ w (+ w l)))))
102 |
103 | (defn points-ratio
104 | [year team]
105 | (let [pf (get-in stats [year team :pf])
106 | pa (get-in stats [year team :pa])]
107 | (float (/ pf pa))))
108 |
109 | (defn streak
110 | [year team]
111 | (get-in stats [year team :streak]))
112 | ;; @@
113 | ;; =>
114 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.march-madness/streak","value":"#'lambda-ml.examples.kaggle.march-madness/streak"}
115 | ;; <=
116 |
117 | ;; @@
118 | (defn generate-index
119 | ([year]
120 | (generate-index year (seeds year) {}))
121 | ([year index prev]
122 | (if (<= (count index) (count prev))
123 | index
124 | (let [updated (loop [slots (slots year)
125 | result index]
126 | (if (empty? slots)
127 | result
128 | (let [[slot [hi lo]] (first slots)
129 | hiteam (get-in index [hi])
130 | loteam (get-in index [lo])
131 | scores (get-in tourney-results [year hiteam loteam])]
132 | (if (and hiteam loteam scores)
133 | (let [winner (if (> (:score scores) (:oppscore scores)) hiteam loteam)]
134 | (recur (rest slots) (assoc-in result [slot] winner)))
135 | (recur (rest slots) result)))))]
136 | (generate-index year updated index)))))
137 |
138 | (defn parse-round
139 | [slot]
140 | (if (= "R" (subs slot 0 1))
141 | (Integer/parseInt (subs slot 1 2))
142 | 0))
143 |
144 | (defn generate-matchups
145 | [year]
146 | (let [index (generate-index year)]
147 | (for [[slot t] (slots year)
148 | :let [[hi lo] (map index t)]
149 | :when (and hi lo)]
150 | (let [round (parse-round slot)
151 | hiseed (get-in seeds-index [year hi])
152 | loseed (get-in seeds-index [year lo])
153 | scores (get-in tourney-results [year hi lo])
154 | ; TODO: features go here
155 | matchup [year round hi lo
156 | (teams hi)
157 | (teams lo)
158 | (win-percentage year hi)
159 | (win-percentage year lo)
160 | (points-ratio year hi)
161 | (points-ratio year lo)
162 | (streak year hi)
163 | (streak year lo)
164 | hiseed
165 | loseed]]
166 | (if scores
167 | (let [winner (if (> (:score scores) (:oppscore scores)) 0 1)]
168 | (conj matchup winner))
169 | matchup)))))
170 | ;; @@
171 | ;; =>
172 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.march-madness/generate-matchups","value":"#'lambda-ml.examples.kaggle.march-madness/generate-matchups"}
173 | ;; <=
174 |
175 | ;; @@
176 | (def training-set
177 | (apply concat
178 | (for [year (range 1985 2012)]
179 | (generate-matchups year))))
180 |
181 | (def test-set
182 | (apply concat
183 | (for [year (range 2012 2016)]
184 | (generate-matchups year))))
185 | ;; @@
186 | ;; =>
187 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.march-madness/test-set","value":"#'lambda-ml.examples.kaggle.march-madness/test-set"}
188 | ;; <=
189 |
190 | ;; **
191 | ;;; Baseline model that always picks the high seed.
192 | ;; **
193 |
194 | ;; @@
195 | (def baseline-predictions
196 | (map (fn [matchup]
197 | (let [hiseed (last (drop-last 2 matchup))
198 | loseed (last (drop-last matchup))]
199 | (if (<= hiseed loseed) 0 1)))
200 | test-set))
201 |
202 | (def baseline-accuracy
203 | (float (/ (count (filter identity (map = (map last test-set) baseline-predictions)))
204 | (count test-set))))
205 |
206 | ;(def baseline-roc (roc-curve (map last test-set) baseline-predictions))
207 |
208 | (println "baseline accuracy =" baseline-accuracy)
209 | ;; @@
210 | ;; ->
211 | ;;; baseline accuracy = 0.6865672
212 | ;;;
213 | ;; <-
214 | ;; =>
215 | ;;; {"type":"html","content":"nil","value":"nil"}
216 | ;; <=
217 |
218 | ;; @@
219 | (defn encode-features
220 | [matchup]
221 | (let [[year round hi lo
222 | hiteam loteam
223 | hiwinpct lowinpct
224 | hiptsratio loptsratio
225 | histreak lostreak
226 | hiseed loseed winner] matchup]
227 | [hiwinpct lowinpct
228 | hiptsratio loptsratio
229 | histreak lostreak
230 | ;hiseed loseed
231 | winner]))
232 |
233 | (def alpha 0.03)
234 | (def iters 2000)
235 | (def threshold 0.5)
236 |
237 | (doseq [lambda [0 0.1 1.0]]
238 | (let [model (-> (make-logistic-regression alpha lambda iters)
239 | (regression-fit (map encode-features training-set)))
240 | predictions (regression-predict model (map (comp butlast encode-features) test-set))
241 | accuracy (float (/ (->> (map #(if (<= % threshold) 0 1) predictions)
242 | (map = (map last test-set))
243 | (filter identity)
244 | (count))
245 | (count test-set)))]
246 | (println (l2-norm (:parameters model)))
247 | (println "lambda =" lambda "accuracy =" accuracy)))
248 | ;; @@
249 | ;; ->
250 | ;;; 1.0195345016026474
251 | ;;; lambda = 0 accuracy = 0.6865672
252 | ;;; 1.0140108570708222
253 | ;;; lambda = 0.1 accuracy = 0.6865672
254 | ;;; 1.1385485393391344
255 | ;;; lambda = 1.0 accuracy = 0.6865672
256 | ;;;
257 | ;; <-
258 | ;; =>
259 | ;;; {"type":"html","content":"nil","value":"nil"}
260 | ;; <=
261 |
262 | ;; @@
263 |
264 | ;; @@
265 |
--------------------------------------------------------------------------------
/src/lambda_ml/examples/kaggle/titanic.clj:
--------------------------------------------------------------------------------
1 | ;; gorilla-repl.fileformat = 1
2 |
3 | ;; **
4 | ;;; # Lambda ML Example: Kaggle Titanic with Logistic Regression
5 | ;;;
6 | ;;; An example of applying logistic regression to the data from the [Titanic: Machine Learning from Disaster](https://www.kaggle.com/c/titanic) competition from Kaggle.
7 | ;;;
8 | ;;; First, lets set up our namespace and define a helper function for sanitizing data.
9 | ;; **
10 |
11 | ;; @@
12 | (ns lambda-ml.examples.kaggle.titanic
13 | (require [lambda-ml.core :refer :all]
14 | [lambda-ml.metrics :refer :all]
15 | [lambda-ml.regression :refer :all]
16 | [clojure.data.csv :as csv]
17 | [gorilla-plot.core :as plot]))
18 |
19 | (defn sanitize
20 | [s]
21 | (cond (number? s) s
22 | (clojure.string/blank? s) nil
23 | :else (read-string s)))
24 | ;; @@
25 | ;; =>
26 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.titanic/sanitize","value":"#'lambda-ml.examples.kaggle.titanic/sanitize"}
27 | ;; <=
28 |
29 | ;; **
30 | ;;; Load the training data. Note that we're being arbitrarily selective about columns since we'll be using only a subset of the features.
31 | ;;;
32 | ;;; The categorical features also need to be converted into indicator variables. For example, the passenger class feature ("pclass") becomes three features ("pclass1", "pclass2", and "pclass3"), where the value for "pclass1" is 1 if the passenger was in 1st class and 0 otherwise, and so on.
33 | ;; **
34 |
35 | ;; @@
36 | (def train
37 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/titanic/train.csv")]
38 | (doall
39 | (->> (rest (csv/read-csv in))
40 | (map (fn [[id survival pclass name sex age sibsp parch ticket fare cabin embarked]]
41 | (let [pclass1 (if (= pclass "1") 1 0)
42 | pclass2 (if (= pclass "2") 1 0)
43 | pclass3 (if (= pclass "3") 1 0)
44 | male (if (= sex "male") 1 0)]
45 | (map sanitize [pclass1 pclass2 pclass3 male sibsp parch survival]))))))))
46 | ;; @@
47 | ;; =>
48 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.titanic/train","value":"#'lambda-ml.examples.kaggle.titanic/train"}
49 | ;; <=
50 |
51 | ;; **
52 | ;;; Visualize the categorical features in the training data.
53 | ;; **
54 |
55 | ;; @@
56 | (for [i [0 1 2 3]]
57 | (let [data (frequencies (map #(nth % i) train))]
58 | (plot/bar-chart (keys data) (vals data))))
59 | ;; @@
60 | ;; =>
61 | ;;; {"type":"list-like","open":"(","close":")","separator":" ","items":[{"type":"vega","content":{"width":400,"height":247.2187957763672,"padding":{"top":10,"left":55,"bottom":40,"right":10},"data":[{"name":"c78229a5-ee84-4434-89ba-eeb6ed9177e6","values":[{"x":0,"y":675},{"x":1,"y":216}]}],"marks":[{"type":"rect","from":{"data":"c78229a5-ee84-4434-89ba-eeb6ed9177e6"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"width":{"scale":"x","band":true,"offset":-1},"y":{"scale":"y","field":"data.y"},"y2":{"scale":"y","value":0}},"update":{"fill":{"value":"steelblue"},"opacity":{"value":1}},"hover":{"fill":{"value":"#FF29D2"}}}}],"scales":[{"name":"x","type":"ordinal","range":"width","domain":{"data":"c78229a5-ee84-4434-89ba-eeb6ed9177e6","field":"data.x"}},{"name":"y","range":"height","nice":true,"domain":{"data":"c78229a5-ee84-4434-89ba-eeb6ed9177e6","field":"data.y"}}],"axes":[{"type":"x","scale":"x"},{"type":"y","scale":"y"}]},"value":"#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\", :values ({:x 0, :y 675} {:x 1, :y 216})}], :marks [{:type \"rect\", :from {:data \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}}"},{"type":"vega","content":{"width":400,"height":247.2187957763672,"padding":{"top":10,"left":55,"bottom":40,"right":10},"data":[{"name":"26b2827c-5029-4b91-b2e2-51366411c66e","values":[{"x":0,"y":707},{"x":1,"y":184}]}],"marks":[{"type":"rect","from":{"data":"26b2827c-5029-4b91-b2e2-51366411c66e"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"width":{"scale":"x","band":true,"offset":-1},"y":{"scale":"y","field":"data.y"},"y2":{"scale":"y","value":0}},"update":{"fill":{"value":"steelblue"},"opacity":{"value":1}},"hover":{"fill":{"value":"#FF29D2"}}}}],"scales":[{"name":"x","type":"ordinal","range":"width","domain":{"data":"26b2827c-5029-4b91-b2e2-51366411c66e","field":"data.x"}},{"name":"y","range":"height","nice":true,"domain":{"data":"26b2827c-5029-4b91-b2e2-51366411c66e","field":"data.y"}}],"axes":[{"type":"x","scale":"x"},{"type":"y","scale":"y"}]},"value":"#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"26b2827c-5029-4b91-b2e2-51366411c66e\", :values ({:x 0, :y 707} {:x 1, :y 184})}], :marks [{:type \"rect\", :from {:data \"26b2827c-5029-4b91-b2e2-51366411c66e\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"26b2827c-5029-4b91-b2e2-51366411c66e\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"26b2827c-5029-4b91-b2e2-51366411c66e\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}}"},{"type":"vega","content":{"width":400,"height":247.2187957763672,"padding":{"top":10,"left":55,"bottom":40,"right":10},"data":[{"name":"4f5e40bc-dc70-4bb8-8700-7c71212858b0","values":[{"x":1,"y":491},{"x":0,"y":400}]}],"marks":[{"type":"rect","from":{"data":"4f5e40bc-dc70-4bb8-8700-7c71212858b0"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"width":{"scale":"x","band":true,"offset":-1},"y":{"scale":"y","field":"data.y"},"y2":{"scale":"y","value":0}},"update":{"fill":{"value":"steelblue"},"opacity":{"value":1}},"hover":{"fill":{"value":"#FF29D2"}}}}],"scales":[{"name":"x","type":"ordinal","range":"width","domain":{"data":"4f5e40bc-dc70-4bb8-8700-7c71212858b0","field":"data.x"}},{"name":"y","range":"height","nice":true,"domain":{"data":"4f5e40bc-dc70-4bb8-8700-7c71212858b0","field":"data.y"}}],"axes":[{"type":"x","scale":"x"},{"type":"y","scale":"y"}]},"value":"#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\", :values ({:x 1, :y 491} {:x 0, :y 400})}], :marks [{:type \"rect\", :from {:data \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}}"},{"type":"vega","content":{"width":400,"height":247.2187957763672,"padding":{"top":10,"left":55,"bottom":40,"right":10},"data":[{"name":"34492d64-58ea-4f4f-826c-2196dfad4bea","values":[{"x":1,"y":577},{"x":0,"y":314}]}],"marks":[{"type":"rect","from":{"data":"34492d64-58ea-4f4f-826c-2196dfad4bea"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"width":{"scale":"x","band":true,"offset":-1},"y":{"scale":"y","field":"data.y"},"y2":{"scale":"y","value":0}},"update":{"fill":{"value":"steelblue"},"opacity":{"value":1}},"hover":{"fill":{"value":"#FF29D2"}}}}],"scales":[{"name":"x","type":"ordinal","range":"width","domain":{"data":"34492d64-58ea-4f4f-826c-2196dfad4bea","field":"data.x"}},{"name":"y","range":"height","nice":true,"domain":{"data":"34492d64-58ea-4f4f-826c-2196dfad4bea","field":"data.y"}}],"axes":[{"type":"x","scale":"x"},{"type":"y","scale":"y"}]},"value":"#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"34492d64-58ea-4f4f-826c-2196dfad4bea\", :values ({:x 1, :y 577} {:x 0, :y 314})}], :marks [{:type \"rect\", :from {:data \"34492d64-58ea-4f4f-826c-2196dfad4bea\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"34492d64-58ea-4f4f-826c-2196dfad4bea\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"34492d64-58ea-4f4f-826c-2196dfad4bea\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}}"}],"value":"(#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\", :values ({:x 0, :y 675} {:x 1, :y 216})}], :marks [{:type \"rect\", :from {:data \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}} #gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"26b2827c-5029-4b91-b2e2-51366411c66e\", :values ({:x 0, :y 707} {:x 1, :y 184})}], :marks [{:type \"rect\", :from {:data \"26b2827c-5029-4b91-b2e2-51366411c66e\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"26b2827c-5029-4b91-b2e2-51366411c66e\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"26b2827c-5029-4b91-b2e2-51366411c66e\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}} #gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\", :values ({:x 1, :y 491} {:x 0, :y 400})}], :marks [{:type \"rect\", :from {:data \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}} #gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"34492d64-58ea-4f4f-826c-2196dfad4bea\", :values ({:x 1, :y 577} {:x 0, :y 314})}], :marks [{:type \"rect\", :from {:data \"34492d64-58ea-4f4f-826c-2196dfad4bea\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"34492d64-58ea-4f4f-826c-2196dfad4bea\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"34492d64-58ea-4f4f-826c-2196dfad4bea\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}})"}
62 | ;; <=
63 |
64 | ;; **
65 | ;;; Visualize the numerical features in the training data.
66 | ;; **
67 |
68 | ;; @@
69 | (for [i [4 5]]
70 | (plot/histogram (map #(nth % i) train)))
71 | ;; @@
72 | ;; =>
73 | ;;; {"type":"list-like","open":"(","close":")","separator":" ","items":[{"type":"vega","content":{"width":400,"height":247.2187957763672,"padding":{"top":10,"left":55,"bottom":40,"right":10},"data":[{"name":"22467d50-0f32-44e2-afe5-e710e41f63e0","values":[{"x":0.0,"y":0},{"x":0.7272727272727274,"y":608.0},{"x":1.4545454545454548,"y":209.0},{"x":2.181818181818182,"y":28.0},{"x":2.9090909090909096,"y":0.0},{"x":3.636363636363637,"y":16.0},{"x":4.363636363636364,"y":18.0},{"x":5.090909090909092,"y":5.0},{"x":5.818181818181819,"y":0.0},{"x":6.545454545454547,"y":0.0},{"x":7.272727272727274,"y":0.0},{"x":8.000000000000002,"y":7.0},{"x":8.727272727272728,"y":0}]}],"marks":[{"type":"line","from":{"data":"22467d50-0f32-44e2-afe5-e710e41f63e0"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"y":{"scale":"y","field":"data.y"},"interpolate":{"value":"step-before"},"fill":{"value":"steelblue"},"fillOpacity":{"value":0.4},"stroke":{"value":"steelblue"},"strokeWidth":{"value":2},"strokeOpacity":{"value":1}}}}],"scales":[{"name":"x","type":"linear","range":"width","zero":false,"domain":{"data":"22467d50-0f32-44e2-afe5-e710e41f63e0","field":"data.x"}},{"name":"y","type":"linear","range":"height","nice":true,"zero":false,"domain":{"data":"22467d50-0f32-44e2-afe5-e710e41f63e0","field":"data.y"}}],"axes":[{"type":"x","scale":"x"},{"type":"y","scale":"y"}]},"value":"#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"22467d50-0f32-44e2-afe5-e710e41f63e0\", :values ({:x 0.0, :y 0} {:x 0.7272727272727274, :y 608.0} {:x 1.4545454545454548, :y 209.0} {:x 2.181818181818182, :y 28.0} {:x 2.9090909090909096, :y 0.0} {:x 3.636363636363637, :y 16.0} {:x 4.363636363636364, :y 18.0} {:x 5.090909090909092, :y 5.0} {:x 5.818181818181819, :y 0.0} {:x 6.545454545454547, :y 0.0} {:x 7.272727272727274, :y 0.0} {:x 8.000000000000002, :y 7.0} {:x 8.727272727272728, :y 0})}], :marks [{:type \"line\", :from {:data \"22467d50-0f32-44e2-afe5-e710e41f63e0\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :interpolate {:value \"step-before\"}, :fill {:value \"steelblue\"}, :fillOpacity {:value 0.4}, :stroke {:value \"steelblue\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}}], :scales [{:name \"x\", :type \"linear\", :range \"width\", :zero false, :domain {:data \"22467d50-0f32-44e2-afe5-e710e41f63e0\", :field \"data.x\"}} {:name \"y\", :type \"linear\", :range \"height\", :nice true, :zero false, :domain {:data \"22467d50-0f32-44e2-afe5-e710e41f63e0\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}}"},{"type":"vega","content":{"width":400,"height":247.2187957763672,"padding":{"top":10,"left":55,"bottom":40,"right":10},"data":[{"name":"9b0a6172-a144-4771-b161-ba32e93f6ae0","values":[{"x":0.0,"y":0},{"x":0.5454545454545455,"y":678.0},{"x":1.090909090909091,"y":118.0},{"x":1.6363636363636367,"y":0.0},{"x":2.181818181818182,"y":80.0},{"x":2.7272727272727275,"y":0.0},{"x":3.272727272727273,"y":5.0},{"x":3.8181818181818183,"y":0.0},{"x":4.363636363636364,"y":4.0},{"x":4.90909090909091,"y":0.0},{"x":5.454545454545456,"y":5.0},{"x":6.000000000000002,"y":1.0},{"x":6.545454545454548,"y":0}]}],"marks":[{"type":"line","from":{"data":"9b0a6172-a144-4771-b161-ba32e93f6ae0"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"y":{"scale":"y","field":"data.y"},"interpolate":{"value":"step-before"},"fill":{"value":"steelblue"},"fillOpacity":{"value":0.4},"stroke":{"value":"steelblue"},"strokeWidth":{"value":2},"strokeOpacity":{"value":1}}}}],"scales":[{"name":"x","type":"linear","range":"width","zero":false,"domain":{"data":"9b0a6172-a144-4771-b161-ba32e93f6ae0","field":"data.x"}},{"name":"y","type":"linear","range":"height","nice":true,"zero":false,"domain":{"data":"9b0a6172-a144-4771-b161-ba32e93f6ae0","field":"data.y"}}],"axes":[{"type":"x","scale":"x"},{"type":"y","scale":"y"}]},"value":"#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"9b0a6172-a144-4771-b161-ba32e93f6ae0\", :values ({:x 0.0, :y 0} {:x 0.5454545454545455, :y 678.0} {:x 1.090909090909091, :y 118.0} {:x 1.6363636363636367, :y 0.0} {:x 2.181818181818182, :y 80.0} {:x 2.7272727272727275, :y 0.0} {:x 3.272727272727273, :y 5.0} {:x 3.8181818181818183, :y 0.0} {:x 4.363636363636364, :y 4.0} {:x 4.90909090909091, :y 0.0} {:x 5.454545454545456, :y 5.0} {:x 6.000000000000002, :y 1.0} {:x 6.545454545454548, :y 0})}], :marks [{:type \"line\", :from {:data \"9b0a6172-a144-4771-b161-ba32e93f6ae0\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :interpolate {:value \"step-before\"}, :fill {:value \"steelblue\"}, :fillOpacity {:value 0.4}, :stroke {:value \"steelblue\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}}], :scales [{:name \"x\", :type \"linear\", :range \"width\", :zero false, :domain {:data \"9b0a6172-a144-4771-b161-ba32e93f6ae0\", :field \"data.x\"}} {:name \"y\", :type \"linear\", :range \"height\", :nice true, :zero false, :domain {:data \"9b0a6172-a144-4771-b161-ba32e93f6ae0\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}}"}],"value":"(#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"22467d50-0f32-44e2-afe5-e710e41f63e0\", :values ({:x 0.0, :y 0} {:x 0.7272727272727274, :y 608.0} {:x 1.4545454545454548, :y 209.0} {:x 2.181818181818182, :y 28.0} {:x 2.9090909090909096, :y 0.0} {:x 3.636363636363637, :y 16.0} {:x 4.363636363636364, :y 18.0} {:x 5.090909090909092, :y 5.0} {:x 5.818181818181819, :y 0.0} {:x 6.545454545454547, :y 0.0} {:x 7.272727272727274, :y 0.0} {:x 8.000000000000002, :y 7.0} {:x 8.727272727272728, :y 0})}], :marks [{:type \"line\", :from {:data \"22467d50-0f32-44e2-afe5-e710e41f63e0\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :interpolate {:value \"step-before\"}, :fill {:value \"steelblue\"}, :fillOpacity {:value 0.4}, :stroke {:value \"steelblue\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}}], :scales [{:name \"x\", :type \"linear\", :range \"width\", :zero false, :domain {:data \"22467d50-0f32-44e2-afe5-e710e41f63e0\", :field \"data.x\"}} {:name \"y\", :type \"linear\", :range \"height\", :nice true, :zero false, :domain {:data \"22467d50-0f32-44e2-afe5-e710e41f63e0\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}} #gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"9b0a6172-a144-4771-b161-ba32e93f6ae0\", :values ({:x 0.0, :y 0} {:x 0.5454545454545455, :y 678.0} {:x 1.090909090909091, :y 118.0} {:x 1.6363636363636367, :y 0.0} {:x 2.181818181818182, :y 80.0} {:x 2.7272727272727275, :y 0.0} {:x 3.272727272727273, :y 5.0} {:x 3.8181818181818183, :y 0.0} {:x 4.363636363636364, :y 4.0} {:x 4.90909090909091, :y 0.0} {:x 5.454545454545456, :y 5.0} {:x 6.000000000000002, :y 1.0} {:x 6.545454545454548, :y 0})}], :marks [{:type \"line\", :from {:data \"9b0a6172-a144-4771-b161-ba32e93f6ae0\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :interpolate {:value \"step-before\"}, :fill {:value \"steelblue\"}, :fillOpacity {:value 0.4}, :stroke {:value \"steelblue\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}}], :scales [{:name \"x\", :type \"linear\", :range \"width\", :zero false, :domain {:data \"9b0a6172-a144-4771-b161-ba32e93f6ae0\", :field \"data.x\"}} {:name \"y\", :type \"linear\", :range \"height\", :nice true, :zero false, :domain {:data \"9b0a6172-a144-4771-b161-ba32e93f6ae0\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}})"}
74 | ;; <=
75 |
76 | ;; **
77 | ;;; Load the test data, which is structured slightly differently since we'll be making predictions on this data
78 | ;; **
79 |
80 | ;; @@
81 | (def test
82 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/titanic/test.csv")]
83 | (doall
84 | (->> (rest (csv/read-csv in))
85 | (map (fn [[id pclass name sex age sibsp parch ticket fare cabin embarked]]
86 | (let [pclass1 (if (= pclass "1") 1 0)
87 | pclass2 (if (= pclass "2") 1 0)
88 | pclass3 (if (= pclass "3") 1 0)
89 | male (if (= sex "male") 1 0)]
90 | (cons id (map sanitize [pclass1 pclass2 pclass3 male sibsp parch])))))))))
91 | ;; @@
92 | ;; =>
93 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.titanic/test","value":"#'lambda-ml.examples.kaggle.titanic/test"}
94 | ;; <=
95 |
96 | ;; **
97 | ;;; Define parameters and use k-fold cross validation to train logistic regression models.
98 | ;; **
99 |
100 | ;; @@
101 | (def alpha 0.01)
102 | (def lambda 0.1)
103 | (def iters 200)
104 |
105 | (def k 4)
106 | (def colors ["red" "green" "blue" "black"])
107 | (def partitions (random-partition k train))
108 |
109 | (let [rocs (for [fold (range k)]
110 | (let [train-sample (->> partitions
111 | (keep-indexed #(if (not (= fold %1)) %2))
112 | (apply concat))
113 | test-sample (nth partitions fold)
114 | model (regression-fit (make-logistic-regression alpha lambda iters) train-sample)
115 | predictions (regression-predict model (map butlast test-sample))]
116 | (roc-curve (map last test-sample) predictions)))]
117 | (println (map (comp float auc) rocs))
118 | (->> (map (fn [roc color] (plot/list-plot roc :joined true :color color)) rocs colors)
119 | (apply plot/compose)))
120 | ;; @@
121 | ;; ->
122 | ;;; (0.7198606 0.77715176 0.57431024 0.7750871)
123 | ;;;
124 | ;; <-
125 | ;; =>
126 | ;;; {"type":"vega","content":{"width":400,"height":247.2187957763672,"padding":{"top":10,"left":55,"bottom":40,"right":10},"scales":[{"name":"x","type":"linear","range":"width","zero":false,"domain":{"data":"3727b48d-eeb7-4450-b28a-760c1c221378","field":"data.x"}},{"name":"y","type":"linear","range":"height","nice":true,"zero":false,"domain":{"data":"3727b48d-eeb7-4450-b28a-760c1c221378","field":"data.y"}}],"axes":[{"type":"x","scale":"x"},{"type":"y","scale":"y"}],"data":[{"name":"3727b48d-eeb7-4450-b28a-760c1c221378","values":[{"x":0.007142857142857143,"y":0},{"x":0.007142857142857143,"y":0.01219512195121951},{"x":0.01428571428571429,"y":0.01219512195121951},{"x":0.02142857142857143,"y":0.01219512195121951},{"x":0.02142857142857143,"y":0.02439024390243902},{"x":0.02142857142857143,"y":0.03658536585365854},{"x":0.02142857142857143,"y":0.04878048780487805},{"x":0.02142857142857143,"y":0.06097560975609756},{"x":0.02142857142857143,"y":0.07317073170731707},{"x":0.02857142857142857,"y":0.07317073170731707},{"x":0.02857142857142857,"y":0.08536585365853659},{"x":0.02857142857142857,"y":0.0975609756097561},{"x":0.02857142857142857,"y":0.1097560975609756},{"x":0.02857142857142857,"y":0.1219512195121951},{"x":0.02857142857142857,"y":0.1341463414634146},{"x":0.02857142857142857,"y":0.1463414634146341},{"x":0.02857142857142857,"y":0.1585365853658537},{"x":0.03571428571428571,"y":0.1585365853658537},{"x":0.04285714285714286,"y":0.1585365853658537},{"x":0.05,"y":0.1585365853658537},{"x":0.05714285714285714,"y":0.1585365853658537},{"x":0.0642857142857143,"y":0.1585365853658537},{"x":0.0642857142857143,"y":0.1707317073170732},{"x":0.0642857142857143,"y":0.1829268292682927},{"x":0.0642857142857143,"y":0.1951219512195122},{"x":0.0642857142857143,"y":0.2073170731707317},{"x":0.0642857142857143,"y":0.2195121951219512},{"x":0.07142857142857142,"y":0.2195121951219512},{"x":0.07857142857142857,"y":0.2195121951219512},{"x":0.07857142857142857,"y":0.2317073170731707},{"x":0.07857142857142857,"y":0.2439024390243902},{"x":0.07857142857142857,"y":0.2560975609756098},{"x":0.07857142857142857,"y":0.2682926829268293},{"x":0.07857142857142857,"y":0.2804878048780488},{"x":0.07857142857142857,"y":0.2926829268292683},{"x":0.07857142857142857,"y":0.3048780487804878},{"x":0.07857142857142857,"y":0.3170731707317073},{"x":0.07857142857142857,"y":0.3292682926829268},{"x":0.07857142857142857,"y":0.3414634146341463},{"x":0.07857142857142857,"y":0.3536585365853659},{"x":0.07857142857142857,"y":0.3658536585365854},{"x":0.07857142857142857,"y":0.3780487804878049},{"x":0.08571428571428572,"y":0.3780487804878049},{"x":0.08571428571428572,"y":0.3902439024390244},{"x":0.08571428571428572,"y":0.4024390243902439},{"x":0.08571428571428572,"y":0.4146341463414634},{"x":0.09285714285714286,"y":0.4146341463414634},{"x":0.1,"y":0.4146341463414634},{"x":0.1071428571428571,"y":0.4146341463414634},{"x":0.1142857142857143,"y":0.4146341463414634},{"x":0.1214285714285714,"y":0.4146341463414634},{"x":0.1214285714285714,"y":0.4268292682926829},{"x":0.1285714285714286,"y":0.4268292682926829},{"x":0.1285714285714286,"y":0.4390243902439024},{"x":0.1357142857142857,"y":0.4390243902439024},{"x":0.1428571428571429,"y":0.4390243902439024},{"x":0.15,"y":0.4390243902439024},{"x":0.15,"y":0.451219512195122},{"x":0.15,"y":0.4634146341463415},{"x":0.15,"y":0.475609756097561},{"x":0.15,"y":0.4878048780487805},{"x":0.15,"y":0.5},{"x":0.15,"y":0.5121951219512195},{"x":0.15,"y":0.524390243902439},{"x":0.15,"y":0.5365853658536585},{"x":0.1571428571428571,"y":0.5365853658536585},{"x":0.1642857142857143,"y":0.5365853658536585},{"x":0.1714285714285714,"y":0.5365853658536585},{"x":0.1785714285714286,"y":0.5365853658536585},{"x":0.1785714285714286,"y":0.548780487804878},{"x":0.1857142857142857,"y":0.548780487804878},{"x":0.1857142857142857,"y":0.5609756097560976},{"x":0.1928571428571429,"y":0.5609756097560976},{"x":0.2,"y":0.5609756097560976},{"x":0.2,"y":0.5731707317073171},{"x":0.2,"y":0.5853658536585366},{"x":0.2071428571428571,"y":0.5853658536585366},{"x":0.2071428571428571,"y":0.5975609756097561},{"x":0.2142857142857143,"y":0.5975609756097561},{"x":0.2214285714285714,"y":0.5975609756097561},{"x":0.2285714285714286,"y":0.5975609756097561},{"x":0.2357142857142857,"y":0.5975609756097561},{"x":0.2428571428571429,"y":0.5975609756097561},{"x":0.25,"y":0.5975609756097561},{"x":0.2571428571428571,"y":0.5975609756097561},{"x":0.2642857142857143,"y":0.5975609756097561},{"x":0.2642857142857143,"y":0.6097560975609756},{"x":0.2714285714285714,"y":0.6097560975609756},{"x":0.2785714285714286,"y":0.6097560975609756},{"x":0.2785714285714286,"y":0.6219512195121951},{"x":0.2857142857142857,"y":0.6219512195121951},{"x":0.2928571428571429,"y":0.6219512195121951},{"x":0.3,"y":0.6219512195121951},{"x":0.3071428571428571,"y":0.6219512195121951},{"x":0.3142857142857143,"y":0.6219512195121951},{"x":0.3214285714285714,"y":0.6219512195121951},{"x":0.3285714285714286,"y":0.6219512195121951},{"x":0.3285714285714286,"y":0.6341463414634146},{"x":0.3285714285714286,"y":0.646341463414634},{"x":0.3357142857142857,"y":0.646341463414634},{"x":0.3428571428571429,"y":0.646341463414634},{"x":0.3428571428571429,"y":0.6585365853658537},{"x":0.3428571428571429,"y":0.6707317073170732},{"x":0.35,"y":0.6707317073170732},{"x":0.3571428571428571,"y":0.6707317073170732},{"x":0.3642857142857143,"y":0.6707317073170732},{"x":0.3714285714285714,"y":0.6707317073170732},{"x":0.3785714285714286,"y":0.6707317073170732},{"x":0.3785714285714286,"y":0.6829268292682927},{"x":0.3857142857142857,"y":0.6829268292682927},{"x":0.3928571428571429,"y":0.6829268292682927},{"x":0.4,"y":0.6829268292682927},{"x":0.4071428571428571,"y":0.6829268292682927},{"x":0.4142857142857143,"y":0.6829268292682927},{"x":0.4214285714285714,"y":0.6829268292682927},{"x":0.4285714285714286,"y":0.6829268292682927},{"x":0.4285714285714286,"y":0.6951219512195121},{"x":0.4357142857142857,"y":0.6951219512195121},{"x":0.4428571428571429,"y":0.6951219512195121},{"x":0.4428571428571429,"y":0.7073170731707317},{"x":0.45,"y":0.7073170731707317},{"x":0.4571428571428571,"y":0.7073170731707317},{"x":0.4642857142857143,"y":0.7073170731707317},{"x":0.4642857142857143,"y":0.7195121951219512},{"x":0.4642857142857143,"y":0.7317073170731707},{"x":0.4642857142857143,"y":0.7439024390243902},{"x":0.4714285714285714,"y":0.7439024390243902},{"x":0.4714285714285714,"y":0.7560975609756098},{"x":0.4785714285714286,"y":0.7560975609756098},{"x":0.4785714285714286,"y":0.7682926829268293},{"x":0.4857142857142857,"y":0.7682926829268293},{"x":0.4857142857142857,"y":0.7804878048780488},{"x":0.4928571428571429,"y":0.7804878048780488},{"x":0.4928571428571429,"y":0.7926829268292683},{"x":0.5,"y":0.7926829268292683},{"x":0.5,"y":0.8048780487804879},{"x":0.5071428571428571,"y":0.8048780487804879},{"x":0.5142857142857143,"y":0.8048780487804879},{"x":0.5214285714285714,"y":0.8048780487804879},{"x":0.5285714285714286,"y":0.8048780487804879},{"x":0.5357142857142857,"y":0.8048780487804879},{"x":0.5357142857142857,"y":0.8170731707317073},{"x":0.5357142857142857,"y":0.8292682926829268},{"x":0.5428571428571429,"y":0.8292682926829268},{"x":0.5428571428571429,"y":0.8414634146341463},{"x":0.55,"y":0.8414634146341463},{"x":0.55,"y":0.853658536585366},{"x":0.557142857142857,"y":0.853658536585366},{"x":0.557142857142857,"y":0.8658536585365854},{"x":0.557142857142857,"y":0.8780487804878049},{"x":0.5642857142857143,"y":0.8780487804878049},{"x":0.5642857142857143,"y":0.8902439024390244},{"x":0.5714285714285714,"y":0.8902439024390244},{"x":0.5785714285714286,"y":0.8902439024390244},{"x":0.5857142857142857,"y":0.8902439024390244},{"x":0.5928571428571429,"y":0.8902439024390244},{"x":0.6,"y":0.8902439024390244},{"x":0.6071428571428571,"y":0.8902439024390244},{"x":0.6142857142857143,"y":0.8902439024390244},{"x":0.6214285714285714,"y":0.8902439024390244},{"x":0.6285714285714286,"y":0.8902439024390244},{"x":0.6357142857142857,"y":0.8902439024390244},{"x":0.6428571428571429,"y":0.8902439024390244},{"x":0.65,"y":0.8902439024390244},{"x":0.6571428571428571,"y":0.8902439024390244},{"x":0.6642857142857143,"y":0.8902439024390244},{"x":0.6714285714285714,"y":0.8902439024390244},{"x":0.6785714285714286,"y":0.8902439024390244},{"x":0.6857142857142857,"y":0.8902439024390244},{"x":0.692857142857143,"y":0.8902439024390244},{"x":0.7,"y":0.8902439024390244},{"x":0.7,"y":0.9024390243902439},{"x":0.7071428571428571,"y":0.9024390243902439},{"x":0.7142857142857143,"y":0.9024390243902439},{"x":0.7214285714285714,"y":0.9024390243902439},{"x":0.7285714285714286,"y":0.9024390243902439},{"x":0.7357142857142857,"y":0.9024390243902439},{"x":0.7428571428571429,"y":0.9024390243902439},{"x":0.7428571428571429,"y":0.9146341463414634},{"x":0.75,"y":0.9146341463414634},{"x":0.7571428571428571,"y":0.9146341463414634},{"x":0.7642857142857143,"y":0.9146341463414634},{"x":0.7714285714285714,"y":0.9146341463414634},{"x":0.7785714285714286,"y":0.9146341463414634},{"x":0.7857142857142857,"y":0.9146341463414634},{"x":0.7928571428571429,"y":0.9146341463414634},{"x":0.8,"y":0.9146341463414634},{"x":0.807142857142857,"y":0.9146341463414634},{"x":0.8142857142857143,"y":0.9146341463414634},{"x":0.8214285714285714,"y":0.9146341463414634},{"x":0.8285714285714286,"y":0.9146341463414634},{"x":0.8357142857142857,"y":0.9146341463414634},{"x":0.8357142857142857,"y":0.9268292682926829},{"x":0.8428571428571429,"y":0.9268292682926829},{"x":0.85,"y":0.9268292682926829},{"x":0.85,"y":0.9390243902439024},{"x":0.8571428571428571,"y":0.9390243902439024},{"x":0.8642857142857143,"y":0.9390243902439024},{"x":0.8714285714285714,"y":0.9390243902439024},{"x":0.8785714285714286,"y":0.9390243902439024},{"x":0.8857142857142857,"y":0.9390243902439024},{"x":0.8928571428571429,"y":0.9390243902439024},{"x":0.8928571428571429,"y":0.951219512195122},{"x":0.9,"y":0.951219512195122},{"x":0.9071428571428571,"y":0.951219512195122},{"x":0.9142857142857143,"y":0.951219512195122},{"x":0.9214285714285714,"y":0.951219512195122},{"x":0.9285714285714286,"y":0.951219512195122},{"x":0.9357142857142857,"y":0.951219512195122},{"x":0.9357142857142857,"y":0.9634146341463415},{"x":0.942857142857143,"y":0.9634146341463415},{"x":0.942857142857143,"y":0.975609756097561},{"x":0.95,"y":0.975609756097561},{"x":0.9571428571428571,"y":0.975609756097561},{"x":0.9571428571428571,"y":0.9878048780487805},{"x":0.9642857142857143,"y":0.9878048780487805},{"x":0.9714285714285714,"y":0.9878048780487805},{"x":0.9785714285714286,"y":0.9878048780487805},{"x":0.9785714285714286,"y":1},{"x":0.9857142857142857,"y":1},{"x":0.9928571428571429,"y":1},{"x":1,"y":1}]},{"name":"0502fc6a-c9c5-4064-bd08-84a227b4178d","values":[{"x":0.007194244604316547,"y":0},{"x":0.01438848920863309,"y":0},{"x":0.01438848920863309,"y":0.01204819277108434},{"x":0.02158273381294964,"y":0.01204819277108434},{"x":0.02158273381294964,"y":0.02409638554216867},{"x":0.02158273381294964,"y":0.03614457831325301},{"x":0.02158273381294964,"y":0.04819277108433735},{"x":0.02158273381294964,"y":0.06024096385542169},{"x":0.02158273381294964,"y":0.07228915662650602},{"x":0.02158273381294964,"y":0.08433734939759036},{"x":0.02158273381294964,"y":0.0963855421686747},{"x":0.02158273381294964,"y":0.108433734939759},{"x":0.02158273381294964,"y":0.1204819277108434},{"x":0.02877697841726619,"y":0.1204819277108434},{"x":0.02877697841726619,"y":0.1325301204819277},{"x":0.02877697841726619,"y":0.144578313253012},{"x":0.02877697841726619,"y":0.1566265060240964},{"x":0.03597122302158273,"y":0.1566265060240964},{"x":0.03597122302158273,"y":0.1686746987951807},{"x":0.04316546762589928,"y":0.1686746987951807},{"x":0.04316546762589928,"y":0.1807228915662651},{"x":0.04316546762589928,"y":0.1927710843373494},{"x":0.04316546762589928,"y":0.2048192771084337},{"x":0.05035971223021583,"y":0.2048192771084337},{"x":0.05035971223021583,"y":0.2168674698795181},{"x":0.05035971223021583,"y":0.2289156626506024},{"x":0.05035971223021583,"y":0.2409638554216867},{"x":0.05035971223021583,"y":0.2530120481927711},{"x":0.05035971223021583,"y":0.2650602409638554},{"x":0.05755395683453237,"y":0.2650602409638554},{"x":0.05755395683453237,"y":0.2771084337349398},{"x":0.05755395683453237,"y":0.2891566265060241},{"x":0.05755395683453237,"y":0.3012048192771084},{"x":0.06474820143884892,"y":0.3012048192771084},{"x":0.06474820143884892,"y":0.3132530120481928},{"x":0.06474820143884892,"y":0.3253012048192771},{"x":0.06474820143884892,"y":0.3373493975903614},{"x":0.06474820143884892,"y":0.3493975903614458},{"x":0.06474820143884892,"y":0.3614457831325301},{"x":0.06474820143884892,"y":0.3734939759036145},{"x":0.06474820143884892,"y":0.3855421686746988},{"x":0.06474820143884892,"y":0.3975903614457831},{"x":0.06474820143884892,"y":0.4096385542168675},{"x":0.06474820143884892,"y":0.4216867469879518},{"x":0.06474820143884892,"y":0.4337349397590361},{"x":0.06474820143884892,"y":0.4457831325301205},{"x":0.06474820143884892,"y":0.4578313253012048},{"x":0.06474820143884892,"y":0.4698795180722892},{"x":0.06474820143884892,"y":0.4819277108433735},{"x":0.06474820143884892,"y":0.4939759036144578},{"x":0.06474820143884892,"y":0.5060240963855422},{"x":0.07194244604316546,"y":0.5060240963855422},{"x":0.07194244604316546,"y":0.5180722891566265},{"x":0.07913669064748201,"y":0.5180722891566265},{"x":0.08633093525179857,"y":0.5180722891566265},{"x":0.08633093525179857,"y":0.5301204819277108},{"x":0.08633093525179857,"y":0.5421686746987951},{"x":0.09352517985611511,"y":0.5421686746987951},{"x":0.09352517985611511,"y":0.5542168674698795},{"x":0.1007194244604317,"y":0.5542168674698795},{"x":0.1079136690647482,"y":0.5542168674698795},{"x":0.1151079136690647,"y":0.5542168674698795},{"x":0.1151079136690647,"y":0.5662650602409639},{"x":0.1151079136690647,"y":0.5783132530120482},{"x":0.1151079136690647,"y":0.5903614457831325},{"x":0.1223021582733813,"y":0.5903614457831325},{"x":0.1294964028776978,"y":0.5903614457831325},{"x":0.1366906474820144,"y":0.5903614457831325},{"x":0.1438848920863309,"y":0.5903614457831325},{"x":0.1510791366906475,"y":0.5903614457831325},{"x":0.158273381294964,"y":0.5903614457831325},{"x":0.1654676258992806,"y":0.5903614457831325},{"x":0.1654676258992806,"y":0.6024096385542169},{"x":0.1726618705035971,"y":0.6024096385542169},{"x":0.1726618705035971,"y":0.6144578313253012},{"x":0.1726618705035971,"y":0.6265060240963854},{"x":0.1726618705035971,"y":0.6385542168674699},{"x":0.1726618705035971,"y":0.6506024096385542},{"x":0.1726618705035971,"y":0.6626506024096386},{"x":0.1798561151079137,"y":0.6626506024096386},{"x":0.1870503597122302,"y":0.6626506024096386},{"x":0.1870503597122302,"y":0.674698795180723},{"x":0.1942446043165468,"y":0.674698795180723},{"x":0.1942446043165468,"y":0.6867469879518072},{"x":0.2014388489208633,"y":0.6867469879518072},{"x":0.2086330935251799,"y":0.6867469879518072},{"x":0.2158273381294964,"y":0.6867469879518072},{"x":0.2230215827338129,"y":0.6867469879518072},{"x":0.2302158273381295,"y":0.6867469879518072},{"x":0.237410071942446,"y":0.6867469879518072},{"x":0.2446043165467626,"y":0.6867469879518072},{"x":0.2517985611510791,"y":0.6867469879518072},{"x":0.2589928057553957,"y":0.6867469879518072},{"x":0.2661870503597122,"y":0.6867469879518072},{"x":0.2733812949640288,"y":0.6867469879518072},{"x":0.2733812949640288,"y":0.6987951807228916},{"x":0.2805755395683453,"y":0.6987951807228916},{"x":0.2877697841726619,"y":0.6987951807228916},{"x":0.2949640287769784,"y":0.6987951807228916},{"x":0.302158273381295,"y":0.6987951807228916},{"x":0.3093525179856115,"y":0.6987951807228916},{"x":0.3165467625899281,"y":0.6987951807228916},{"x":0.3165467625899281,"y":0.7108433734939759},{"x":0.3165467625899281,"y":0.7228915662650602},{"x":0.3165467625899281,"y":0.7349397590361446},{"x":0.3237410071942446,"y":0.7349397590361446},{"x":0.3309352517985612,"y":0.7349397590361446},{"x":0.3381294964028777,"y":0.7349397590361446},{"x":0.3381294964028777,"y":0.7469879518072289},{"x":0.3453237410071942,"y":0.7469879518072289},{"x":0.3453237410071942,"y":0.7590361445783133},{"x":0.3525179856115108,"y":0.7590361445783133},{"x":0.3597122302158273,"y":0.7590361445783133},{"x":0.3597122302158273,"y":0.7710843373493976},{"x":0.3597122302158273,"y":0.7831325301204819},{"x":0.3597122302158273,"y":0.7951807228915663},{"x":0.3597122302158273,"y":0.8072289156626506},{"x":0.3669064748201439,"y":0.8072289156626506},{"x":0.3741007194244604,"y":0.8072289156626506},{"x":0.381294964028777,"y":0.8072289156626506},{"x":0.3884892086330935,"y":0.8072289156626506},{"x":0.3956834532374101,"y":0.8072289156626506},{"x":0.3956834532374101,"y":0.8192771084337349},{"x":0.3956834532374101,"y":0.8313253012048193},{"x":0.4028776978417266,"y":0.8313253012048193},{"x":0.4028776978417266,"y":0.8433734939759036},{"x":0.4100719424460432,"y":0.8433734939759036},{"x":0.4172661870503597,"y":0.8433734939759036},{"x":0.4172661870503597,"y":0.855421686746988},{"x":0.4244604316546763,"y":0.855421686746988},{"x":0.4316546762589928,"y":0.855421686746988},{"x":0.4388489208633094,"y":0.855421686746988},{"x":0.4460431654676259,"y":0.855421686746988},{"x":0.4532374100719424,"y":0.855421686746988},{"x":0.460431654676259,"y":0.855421686746988},{"x":0.4676258992805755,"y":0.855421686746988},{"x":0.4748201438848921,"y":0.855421686746988},{"x":0.4820143884892086,"y":0.855421686746988},{"x":0.4892086330935252,"y":0.855421686746988},{"x":0.4964028776978417,"y":0.855421686746988},{"x":0.5035971223021583,"y":0.855421686746988},{"x":0.5107913669064748,"y":0.855421686746988},{"x":0.5179856115107914,"y":0.855421686746988},{"x":0.5251798561151079,"y":0.855421686746988},{"x":0.5323741007194245,"y":0.855421686746988},{"x":0.539568345323741,"y":0.855421686746988},{"x":0.5467625899280576,"y":0.855421686746988},{"x":0.5539568345323741,"y":0.855421686746988},{"x":0.5611510791366906,"y":0.855421686746988},{"x":0.5683453237410072,"y":0.855421686746988},{"x":0.5755395683453237,"y":0.855421686746988},{"x":0.5827338129496403,"y":0.855421686746988},{"x":0.5827338129496403,"y":0.8674698795180723},{"x":0.5899280575539568,"y":0.8674698795180723},{"x":0.5899280575539568,"y":0.8795180722891566},{"x":0.5971223021582734,"y":0.8795180722891566},{"x":0.60431654676259,"y":0.8795180722891566},{"x":0.6115107913669064,"y":0.8795180722891566},{"x":0.618705035971223,"y":0.8795180722891566},{"x":0.6258992805755396,"y":0.8795180722891566},{"x":0.6330935251798561,"y":0.8795180722891566},{"x":0.6402877697841727,"y":0.8795180722891566},{"x":0.6474820143884892,"y":0.8795180722891566},{"x":0.6546762589928058,"y":0.8795180722891566},{"x":0.6618705035971223,"y":0.8795180722891566},{"x":0.6690647482014388,"y":0.8795180722891566},{"x":0.6762589928057554,"y":0.8795180722891566},{"x":0.6834532374100719,"y":0.8795180722891566},{"x":0.6906474820143885,"y":0.8795180722891566},{"x":0.697841726618705,"y":0.8795180722891566},{"x":0.7050359712230216,"y":0.8795180722891566},{"x":0.7050359712230216,"y":0.891566265060241},{"x":0.7122302158273381,"y":0.891566265060241},{"x":0.7194244604316548,"y":0.891566265060241},{"x":0.7194244604316548,"y":0.9036144578313253},{"x":0.7266187050359711,"y":0.9036144578313253},{"x":0.7338129496402878,"y":0.9036144578313253},{"x":0.7410071942446043,"y":0.9036144578313253},{"x":0.7482014388489209,"y":0.9036144578313253},{"x":0.7553956834532374,"y":0.9036144578313253},{"x":0.7553956834532374,"y":0.9156626506024096},{"x":0.7553956834532374,"y":0.927710843373494},{"x":0.762589928057554,"y":0.927710843373494},{"x":0.7697841726618705,"y":0.927710843373494},{"x":0.7769784172661871,"y":0.927710843373494},{"x":0.7841726618705036,"y":0.927710843373494},{"x":0.7913669064748201,"y":0.927710843373494},{"x":0.7985611510791367,"y":0.927710843373494},{"x":0.8057553956834532,"y":0.927710843373494},{"x":0.8129496402877698,"y":0.927710843373494},{"x":0.8201438848920863,"y":0.927710843373494},{"x":0.827338129496403,"y":0.927710843373494},{"x":0.8345323741007195,"y":0.927710843373494},{"x":0.841726618705036,"y":0.927710843373494},{"x":0.8489208633093525,"y":0.927710843373494},{"x":0.8489208633093525,"y":0.9397590361445783},{"x":0.8561151079136691,"y":0.9397590361445783},{"x":0.8561151079136691,"y":0.9518072289156627},{"x":0.8633093525179856,"y":0.9518072289156627},{"x":0.8633093525179856,"y":0.963855421686747},{"x":0.8705035971223022,"y":0.963855421686747},{"x":0.8776978417266187,"y":0.963855421686747},{"x":0.8776978417266187,"y":0.9759036144578312},{"x":0.8848920863309353,"y":0.9759036144578312},{"x":0.8920863309352518,"y":0.9759036144578312},{"x":0.8992805755395683,"y":0.9759036144578312},{"x":0.9064748201438849,"y":0.9759036144578312},{"x":0.9136690647482014,"y":0.9759036144578312},{"x":0.920863309352518,"y":0.9759036144578312},{"x":0.9280575539568345,"y":0.9759036144578312},{"x":0.9352517985611511,"y":0.9759036144578312},{"x":0.9352517985611511,"y":0.9879518072289157},{"x":0.9424460431654677,"y":0.9879518072289157},{"x":0.9496402877697842,"y":0.9879518072289157},{"x":0.9568345323741007,"y":0.9879518072289157},{"x":0.9640287769784173,"y":0.9879518072289157},{"x":0.9712230215827338,"y":0.9879518072289157},{"x":0.9712230215827338,"y":1},{"x":0.9784172661870504,"y":1},{"x":0.9856115107913669,"y":1},{"x":0.9928057553956835,"y":1},{"x":1,"y":1}]},{"name":"a3035d60-74ea-49f9-93b1-f358ec1ada53","values":[{"x":0.007751937984496124,"y":0},{"x":0.007751937984496124,"y":0.01075268817204301},{"x":0.01550387596899225,"y":0.01075268817204301},{"x":0.02325581395348837,"y":0.01075268817204301},{"x":0.02325581395348837,"y":0.02150537634408602},{"x":0.02325581395348837,"y":0.03225806451612903},{"x":0.02325581395348837,"y":0.04301075268817204},{"x":0.02325581395348837,"y":0.05376344086021505},{"x":0.02325581395348837,"y":0.06451612903225806},{"x":0.0310077519379845,"y":0.06451612903225806},{"x":0.03875968992248062,"y":0.06451612903225806},{"x":0.03875968992248062,"y":0.07526881720430108},{"x":0.04651162790697674,"y":0.07526881720430108},{"x":0.05426356589147287,"y":0.07526881720430108},{"x":0.05426356589147287,"y":0.08602150537634409},{"x":0.05426356589147287,"y":0.0967741935483871},{"x":0.06201550387596899,"y":0.0967741935483871},{"x":0.06976744186046512,"y":0.0967741935483871},{"x":0.07751937984496124,"y":0.0967741935483871},{"x":0.07751937984496124,"y":0.1075268817204301},{"x":0.08527131782945736,"y":0.1075268817204301},{"x":0.09302325581395349,"y":0.1075268817204301},{"x":0.09302325581395349,"y":0.1182795698924731},{"x":0.1007751937984496,"y":0.1182795698924731},{"x":0.1085271317829457,"y":0.1182795698924731},{"x":0.1085271317829457,"y":0.1290322580645161},{"x":0.1162790697674419,"y":0.1290322580645161},{"x":0.1162790697674419,"y":0.1397849462365591},{"x":0.1162790697674419,"y":0.1505376344086022},{"x":0.1162790697674419,"y":0.1612903225806452},{"x":0.1162790697674419,"y":0.1720430107526882},{"x":0.1162790697674419,"y":0.1827956989247312},{"x":0.1162790697674419,"y":0.1935483870967742},{"x":0.1162790697674419,"y":0.2043010752688172},{"x":0.1162790697674419,"y":0.2150537634408602},{"x":0.124031007751938,"y":0.2150537634408602},{"x":0.124031007751938,"y":0.2258064516129032},{"x":0.124031007751938,"y":0.2365591397849462},{"x":0.124031007751938,"y":0.2473118279569892},{"x":0.124031007751938,"y":0.2580645161290323},{"x":0.124031007751938,"y":0.2688172043010753},{"x":0.124031007751938,"y":0.2795698924731183},{"x":0.124031007751938,"y":0.2903225806451613},{"x":0.124031007751938,"y":0.3010752688172043},{"x":0.124031007751938,"y":0.3118279569892473},{"x":0.124031007751938,"y":0.3225806451612903},{"x":0.1317829457364341,"y":0.3225806451612903},{"x":0.1395348837209302,"y":0.3225806451612903},{"x":0.1472868217054264,"y":0.3225806451612903},{"x":0.1472868217054264,"y":0.3333333333333333},{"x":0.1472868217054264,"y":0.3440860215053763},{"x":0.1472868217054264,"y":0.3548387096774194},{"x":0.1472868217054264,"y":0.3655913978494624},{"x":0.1472868217054264,"y":0.3763440860215054},{"x":0.1550387596899225,"y":0.3763440860215054},{"x":0.1550387596899225,"y":0.3870967741935484},{"x":0.1627906976744186,"y":0.3870967741935484},{"x":0.1627906976744186,"y":0.3978494623655914},{"x":0.1705426356589147,"y":0.3978494623655914},{"x":0.1782945736434109,"y":0.3978494623655914},{"x":0.186046511627907,"y":0.3978494623655914},{"x":0.186046511627907,"y":0.4086021505376344},{"x":0.1937984496124031,"y":0.4086021505376344},{"x":0.1937984496124031,"y":0.4193548387096774},{"x":0.1937984496124031,"y":0.4301075268817204},{"x":0.2015503875968992,"y":0.4301075268817204},{"x":0.2093023255813953,"y":0.4301075268817204},{"x":0.2170542635658915,"y":0.4301075268817204},{"x":0.2248062015503876,"y":0.4301075268817204},{"x":0.2325581395348837,"y":0.4301075268817204},{"x":0.2403100775193798,"y":0.4301075268817204},{"x":0.248062015503876,"y":0.4301075268817204},{"x":0.2558139534883721,"y":0.4301075268817204},{"x":0.2635658914728682,"y":0.4301075268817204},{"x":0.2713178294573643,"y":0.4301075268817204},{"x":0.2790697674418605,"y":0.4301075268817204},{"x":0.2868217054263566,"y":0.4301075268817204},{"x":0.2945736434108527,"y":0.4301075268817204},{"x":0.3023255813953488,"y":0.4301075268817204},{"x":0.310077519379845,"y":0.4301075268817204},{"x":0.3178294573643411,"y":0.4301075268817204},{"x":0.3178294573643411,"y":0.4408602150537634},{"x":0.3178294573643411,"y":0.4516129032258065},{"x":0.3178294573643411,"y":0.4623655913978495},{"x":0.3178294573643411,"y":0.4731182795698925},{"x":0.3178294573643411,"y":0.4838709677419355},{"x":0.3178294573643411,"y":0.4946236559139785},{"x":0.3178294573643411,"y":0.5053763440860215},{"x":0.3255813953488372,"y":0.5053763440860215},{"x":0.3255813953488372,"y":0.5161290322580645},{"x":0.3255813953488372,"y":0.5268817204301075},{"x":0.3255813953488372,"y":0.5376344086021505},{"x":0.3255813953488372,"y":0.5483870967741935},{"x":0.3255813953488372,"y":0.5591397849462366},{"x":0.3333333333333333,"y":0.5591397849462366},{"x":0.3333333333333333,"y":0.5698924731182796},{"x":0.3333333333333333,"y":0.5806451612903226},{"x":0.3333333333333333,"y":0.5913978494623656},{"x":0.3333333333333333,"y":0.6021505376344086},{"x":0.3410852713178295,"y":0.6021505376344086},{"x":0.3410852713178295,"y":0.6129032258064516},{"x":0.3488372093023256,"y":0.6129032258064516},{"x":0.3565891472868217,"y":0.6129032258064516},{"x":0.3643410852713178,"y":0.6129032258064516},{"x":0.372093023255814,"y":0.6129032258064516},{"x":0.3798449612403101,"y":0.6129032258064516},{"x":0.3875968992248062,"y":0.6129032258064516},{"x":0.3953488372093023,"y":0.6129032258064516},{"x":0.4031007751937984,"y":0.6129032258064516},{"x":0.4108527131782946,"y":0.6129032258064516},{"x":0.4186046511627907,"y":0.6129032258064516},{"x":0.4263565891472868,"y":0.6129032258064516},{"x":0.4341085271317829,"y":0.6129032258064516},{"x":0.4341085271317829,"y":0.6236559139784946},{"x":0.4418604651162791,"y":0.6236559139784946},{"x":0.4418604651162791,"y":0.6344086021505376},{"x":0.4496124031007752,"y":0.6344086021505376},{"x":0.4573643410852713,"y":0.6344086021505376},{"x":0.4573643410852713,"y":0.6451612903225806},{"x":0.4651162790697674,"y":0.6451612903225806},{"x":0.4728682170542636,"y":0.6451612903225806},{"x":0.4806201550387597,"y":0.6451612903225806},{"x":0.4883720930232558,"y":0.6451612903225806},{"x":0.4961240310077519,"y":0.6451612903225806},{"x":0.5038759689922481,"y":0.6451612903225806},{"x":0.5116279069767442,"y":0.6451612903225806},{"x":0.5193798449612403,"y":0.6451612903225806},{"x":0.5271317829457364,"y":0.6451612903225806},{"x":0.5348837209302326,"y":0.6451612903225806},{"x":0.5426356589147286,"y":0.6451612903225806},{"x":0.5503875968992248,"y":0.6451612903225806},{"x":0.5581395348837209,"y":0.6451612903225806},{"x":0.5658914728682171,"y":0.6451612903225806},{"x":0.5736434108527132,"y":0.6451612903225806},{"x":0.5813953488372093,"y":0.6451612903225806},{"x":0.5891472868217054,"y":0.6451612903225806},{"x":0.5968992248062016,"y":0.6451612903225806},{"x":0.6046511627906977,"y":0.6451612903225806},{"x":0.6124031007751938,"y":0.6451612903225806},{"x":0.6201550387596899,"y":0.6451612903225806},{"x":0.627906976744186,"y":0.6451612903225806},{"x":0.6356589147286822,"y":0.6451612903225806},{"x":0.6356589147286822,"y":0.6559139784946237},{"x":0.6434108527131783,"y":0.6559139784946237},{"x":0.6511627906976744,"y":0.6559139784946237},{"x":0.6589147286821705,"y":0.6559139784946237},{"x":0.6666666666666667,"y":0.6559139784946237},{"x":0.6744186046511628,"y":0.6559139784946237},{"x":0.6821705426356589,"y":0.6559139784946237},{"x":0.689922480620155,"y":0.6559139784946237},{"x":0.6976744186046512,"y":0.6559139784946237},{"x":0.7054263565891473,"y":0.6559139784946237},{"x":0.7054263565891473,"y":0.6666666666666667},{"x":0.7131782945736433,"y":0.6666666666666667},{"x":0.7209302325581395,"y":0.6666666666666667},{"x":0.7286821705426357,"y":0.6666666666666667},{"x":0.7364341085271318,"y":0.6666666666666667},{"x":0.7441860465116279,"y":0.6666666666666667},{"x":0.751937984496124,"y":0.6666666666666667},{"x":0.7596899224806202,"y":0.6666666666666667},{"x":0.7674418604651163,"y":0.6666666666666667},{"x":0.7674418604651163,"y":0.6774193548387097},{"x":0.7751937984496124,"y":0.6774193548387097},{"x":0.7829457364341085,"y":0.6774193548387097},{"x":0.7906976744186047,"y":0.6774193548387097},{"x":0.7984496124031008,"y":0.6774193548387097},{"x":0.8062015503875969,"y":0.6774193548387097},{"x":0.813953488372093,"y":0.6774193548387097},{"x":0.813953488372093,"y":0.6881720430107527},{"x":0.813953488372093,"y":0.6989247311827957},{"x":0.821705426356589,"y":0.6989247311827957},{"x":0.8294573643410853,"y":0.6989247311827957},{"x":0.8294573643410853,"y":0.7096774193548387},{"x":0.8294573643410853,"y":0.7204301075268817},{"x":0.8372093023255814,"y":0.7204301075268817},{"x":0.8372093023255814,"y":0.7311827956989247},{"x":0.8372093023255814,"y":0.7419354838709677},{"x":0.8449612403100775,"y":0.7419354838709677},{"x":0.8527131782945736,"y":0.7419354838709677},{"x":0.8527131782945736,"y":0.7526881720430108},{"x":0.8527131782945736,"y":0.7634408602150538},{"x":0.8527131782945736,"y":0.7741935483870968},{"x":0.8527131782945736,"y":0.7849462365591398},{"x":0.8527131782945736,"y":0.7956989247311828},{"x":0.8527131782945736,"y":0.8064516129032258},{"x":0.8527131782945736,"y":0.8172043010752688},{"x":0.8527131782945736,"y":0.8279569892473118},{"x":0.8527131782945736,"y":0.8387096774193548},{"x":0.8604651162790699,"y":0.8387096774193548},{"x":0.8604651162790699,"y":0.8494623655913978},{"x":0.8604651162790699,"y":0.8602150537634409},{"x":0.8682170542635659,"y":0.8602150537634409},{"x":0.8682170542635659,"y":0.8709677419354839},{"x":0.8682170542635659,"y":0.8817204301075269},{"x":0.8682170542635659,"y":0.8924731182795699},{"x":0.8682170542635659,"y":0.9032258064516129},{"x":0.875968992248062,"y":0.9032258064516129},{"x":0.8837209302325582,"y":0.9032258064516129},{"x":0.8837209302325582,"y":0.9139784946236559},{"x":0.8914728682170543,"y":0.9139784946236559},{"x":0.8914728682170543,"y":0.9247311827956989},{"x":0.8914728682170543,"y":0.9354838709677419},{"x":0.8992248062015504,"y":0.9354838709677419},{"x":0.9069767441860465,"y":0.9354838709677419},{"x":0.9147286821705426,"y":0.9354838709677419},{"x":0.9147286821705426,"y":0.9462365591397849},{"x":0.9224806201550388,"y":0.9462365591397849},{"x":0.9302325581395349,"y":0.9462365591397849},{"x":0.937984496124031,"y":0.9462365591397849},{"x":0.9457364341085271,"y":0.9462365591397849},{"x":0.9457364341085271,"y":0.956989247311828},{"x":0.9534883720930233,"y":0.956989247311828},{"x":0.9612403100775194,"y":0.956989247311828},{"x":0.9612403100775194,"y":0.967741935483871},{"x":0.9689922480620154,"y":0.967741935483871},{"x":0.9767441860465116,"y":0.967741935483871},{"x":0.9767441860465116,"y":0.978494623655914},{"x":0.9844961240310078,"y":0.978494623655914},{"x":0.9844961240310078,"y":0.989247311827957},{"x":0.9922480620155039,"y":0.989247311827957},{"x":0.9922480620155039,"y":1},{"x":1,"y":1}]},{"name":"89cb3c24-8a7d-4f91-836f-a483e2deeb89","values":[{"x":0.007142857142857143,"y":0},{"x":0.007142857142857143,"y":0.01219512195121951},{"x":0.007142857142857143,"y":0.02439024390243902},{"x":0.007142857142857143,"y":0.03658536585365854},{"x":0.007142857142857143,"y":0.04878048780487805},{"x":0.01428571428571429,"y":0.04878048780487805},{"x":0.02142857142857143,"y":0.04878048780487805},{"x":0.02857142857142857,"y":0.04878048780487805},{"x":0.02857142857142857,"y":0.06097560975609756},{"x":0.03571428571428571,"y":0.06097560975609756},{"x":0.03571428571428571,"y":0.07317073170731707},{"x":0.03571428571428571,"y":0.08536585365853659},{"x":0.04285714285714286,"y":0.08536585365853659},{"x":0.05,"y":0.08536585365853659},{"x":0.05,"y":0.0975609756097561},{"x":0.05,"y":0.1097560975609756},{"x":0.05,"y":0.1219512195121951},{"x":0.05,"y":0.1341463414634146},{"x":0.05,"y":0.1463414634146341},{"x":0.05714285714285714,"y":0.1463414634146341},{"x":0.0642857142857143,"y":0.1463414634146341},{"x":0.07142857142857142,"y":0.1463414634146341},{"x":0.07142857142857142,"y":0.1585365853658537},{"x":0.07142857142857142,"y":0.1707317073170732},{"x":0.07142857142857142,"y":0.1829268292682927},{"x":0.07857142857142857,"y":0.1829268292682927},{"x":0.07857142857142857,"y":0.1951219512195122},{"x":0.08571428571428572,"y":0.1951219512195122},{"x":0.09285714285714286,"y":0.1951219512195122},{"x":0.09285714285714286,"y":0.2073170731707317},{"x":0.09285714285714286,"y":0.2195121951219512},{"x":0.09285714285714286,"y":0.2317073170731707},{"x":0.09285714285714286,"y":0.2439024390243902},{"x":0.09285714285714286,"y":0.2560975609756098},{"x":0.09285714285714286,"y":0.2682926829268293},{"x":0.09285714285714286,"y":0.2804878048780488},{"x":0.09285714285714286,"y":0.2926829268292683},{"x":0.09285714285714286,"y":0.3048780487804878},{"x":0.09285714285714286,"y":0.3170731707317073},{"x":0.09285714285714286,"y":0.3292682926829268},{"x":0.09285714285714286,"y":0.3414634146341463},{"x":0.09285714285714286,"y":0.3536585365853659},{"x":0.09285714285714286,"y":0.3658536585365854},{"x":0.1,"y":0.3658536585365854},{"x":0.1071428571428571,"y":0.3658536585365854},{"x":0.1071428571428571,"y":0.3780487804878049},{"x":0.1071428571428571,"y":0.3902439024390244},{"x":0.1071428571428571,"y":0.4024390243902439},{"x":0.1071428571428571,"y":0.4146341463414634},{"x":0.1071428571428571,"y":0.4268292682926829},{"x":0.1071428571428571,"y":0.4390243902439024},{"x":0.1071428571428571,"y":0.451219512195122},{"x":0.1071428571428571,"y":0.4634146341463415},{"x":0.1071428571428571,"y":0.475609756097561},{"x":0.1071428571428571,"y":0.4878048780487805},{"x":0.1071428571428571,"y":0.5},{"x":0.1071428571428571,"y":0.5121951219512195},{"x":0.1071428571428571,"y":0.524390243902439},{"x":0.1071428571428571,"y":0.5365853658536585},{"x":0.1071428571428571,"y":0.548780487804878},{"x":0.1071428571428571,"y":0.5609756097560976},{"x":0.1142857142857143,"y":0.5609756097560976},{"x":0.1214285714285714,"y":0.5609756097560976},{"x":0.1285714285714286,"y":0.5609756097560976},{"x":0.1357142857142857,"y":0.5609756097560976},{"x":0.1357142857142857,"y":0.5731707317073171},{"x":0.1428571428571429,"y":0.5731707317073171},{"x":0.15,"y":0.5731707317073171},{"x":0.1571428571428571,"y":0.5731707317073171},{"x":0.1571428571428571,"y":0.5853658536585366},{"x":0.1642857142857143,"y":0.5853658536585366},{"x":0.1642857142857143,"y":0.5975609756097561},{"x":0.1714285714285714,"y":0.5975609756097561},{"x":0.1785714285714286,"y":0.5975609756097561},{"x":0.1785714285714286,"y":0.6097560975609756},{"x":0.1857142857142857,"y":0.6097560975609756},{"x":0.1857142857142857,"y":0.6219512195121951},{"x":0.1928571428571429,"y":0.6219512195121951},{"x":0.1928571428571429,"y":0.6341463414634146},{"x":0.2,"y":0.6341463414634146},{"x":0.2071428571428571,"y":0.6341463414634146},{"x":0.2071428571428571,"y":0.646341463414634},{"x":0.2142857142857143,"y":0.646341463414634},{"x":0.2214285714285714,"y":0.646341463414634},{"x":0.2214285714285714,"y":0.6585365853658537},{"x":0.2285714285714286,"y":0.6585365853658537},{"x":0.2357142857142857,"y":0.6585365853658537},{"x":0.2428571428571429,"y":0.6585365853658537},{"x":0.25,"y":0.6585365853658537},{"x":0.25,"y":0.6707317073170732},{"x":0.2571428571428571,"y":0.6707317073170732},{"x":0.2642857142857143,"y":0.6707317073170732},{"x":0.2714285714285714,"y":0.6707317073170732},{"x":0.2785714285714286,"y":0.6707317073170732},{"x":0.2857142857142857,"y":0.6707317073170732},{"x":0.2857142857142857,"y":0.6829268292682927},{"x":0.2928571428571429,"y":0.6829268292682927},{"x":0.2928571428571429,"y":0.6951219512195121},{"x":0.3,"y":0.6951219512195121},{"x":0.3,"y":0.7073170731707317},{"x":0.3,"y":0.7195121951219512},{"x":0.3,"y":0.7317073170731707},{"x":0.3,"y":0.7439024390243902},{"x":0.3071428571428571,"y":0.7439024390243902},{"x":0.3071428571428571,"y":0.7560975609756098},{"x":0.3142857142857143,"y":0.7560975609756098},{"x":0.3214285714285714,"y":0.7560975609756098},{"x":0.3214285714285714,"y":0.7682926829268293},{"x":0.3285714285714286,"y":0.7682926829268293},{"x":0.3357142857142857,"y":0.7682926829268293},{"x":0.3428571428571429,"y":0.7682926829268293},{"x":0.35,"y":0.7682926829268293},{"x":0.35,"y":0.7804878048780488},{"x":0.35,"y":0.7926829268292683},{"x":0.35,"y":0.8048780487804879},{"x":0.35,"y":0.8170731707317073},{"x":0.35,"y":0.8292682926829268},{"x":0.35,"y":0.8414634146341463},{"x":0.3571428571428571,"y":0.8414634146341463},{"x":0.3642857142857143,"y":0.8414634146341463},{"x":0.3642857142857143,"y":0.853658536585366},{"x":0.3642857142857143,"y":0.8658536585365854},{"x":0.3714285714285714,"y":0.8658536585365854},{"x":0.3785714285714286,"y":0.8658536585365854},{"x":0.3857142857142857,"y":0.8658536585365854},{"x":0.3928571428571429,"y":0.8658536585365854},{"x":0.3928571428571429,"y":0.8780487804878049},{"x":0.3928571428571429,"y":0.8902439024390244},{"x":0.4,"y":0.8902439024390244},{"x":0.4071428571428571,"y":0.8902439024390244},{"x":0.4142857142857143,"y":0.8902439024390244},{"x":0.4214285714285714,"y":0.8902439024390244},{"x":0.4285714285714286,"y":0.8902439024390244},{"x":0.4357142857142857,"y":0.8902439024390244},{"x":0.4428571428571429,"y":0.8902439024390244},{"x":0.45,"y":0.8902439024390244},{"x":0.4571428571428571,"y":0.8902439024390244},{"x":0.4642857142857143,"y":0.8902439024390244},{"x":0.4642857142857143,"y":0.9024390243902439},{"x":0.4714285714285714,"y":0.9024390243902439},{"x":0.4785714285714286,"y":0.9024390243902439},{"x":0.4857142857142857,"y":0.9024390243902439},{"x":0.4928571428571429,"y":0.9024390243902439},{"x":0.5,"y":0.9024390243902439},{"x":0.5071428571428571,"y":0.9024390243902439},{"x":0.5142857142857143,"y":0.9024390243902439},{"x":0.5214285714285714,"y":0.9024390243902439},{"x":0.5285714285714286,"y":0.9024390243902439},{"x":0.5357142857142857,"y":0.9024390243902439},{"x":0.5428571428571429,"y":0.9024390243902439},{"x":0.55,"y":0.9024390243902439},{"x":0.557142857142857,"y":0.9024390243902439},{"x":0.5642857142857143,"y":0.9024390243902439},{"x":0.5714285714285714,"y":0.9024390243902439},{"x":0.5785714285714286,"y":0.9024390243902439},{"x":0.5857142857142857,"y":0.9024390243902439},{"x":0.5928571428571429,"y":0.9024390243902439},{"x":0.5928571428571429,"y":0.9146341463414634},{"x":0.6,"y":0.9146341463414634},{"x":0.6071428571428571,"y":0.9146341463414634},{"x":0.6142857142857143,"y":0.9146341463414634},{"x":0.6214285714285714,"y":0.9146341463414634},{"x":0.6285714285714286,"y":0.9146341463414634},{"x":0.6285714285714286,"y":0.9268292682926829},{"x":0.6357142857142857,"y":0.9268292682926829},{"x":0.6428571428571429,"y":0.9268292682926829},{"x":0.6428571428571429,"y":0.9390243902439024},{"x":0.65,"y":0.9390243902439024},{"x":0.6571428571428571,"y":0.9390243902439024},{"x":0.6642857142857143,"y":0.9390243902439024},{"x":0.6714285714285714,"y":0.9390243902439024},{"x":0.6785714285714286,"y":0.9390243902439024},{"x":0.6857142857142857,"y":0.9390243902439024},{"x":0.692857142857143,"y":0.9390243902439024},{"x":0.7,"y":0.9390243902439024},{"x":0.7071428571428571,"y":0.9390243902439024},{"x":0.7142857142857143,"y":0.9390243902439024},{"x":0.7214285714285714,"y":0.9390243902439024},{"x":0.7285714285714286,"y":0.9390243902439024},{"x":0.7357142857142857,"y":0.9390243902439024},{"x":0.7428571428571429,"y":0.9390243902439024},{"x":0.75,"y":0.9390243902439024},{"x":0.7571428571428571,"y":0.9390243902439024},{"x":0.7642857142857143,"y":0.9390243902439024},{"x":0.7714285714285714,"y":0.9390243902439024},{"x":0.7785714285714286,"y":0.9390243902439024},{"x":0.7857142857142857,"y":0.9390243902439024},{"x":0.7928571428571429,"y":0.9390243902439024},{"x":0.8,"y":0.9390243902439024},{"x":0.807142857142857,"y":0.9390243902439024},{"x":0.8142857142857143,"y":0.9390243902439024},{"x":0.8214285714285714,"y":0.9390243902439024},{"x":0.8285714285714286,"y":0.9390243902439024},{"x":0.8357142857142857,"y":0.9390243902439024},{"x":0.8428571428571429,"y":0.9390243902439024},{"x":0.85,"y":0.9390243902439024},{"x":0.8571428571428571,"y":0.9390243902439024},{"x":0.8642857142857143,"y":0.9390243902439024},{"x":0.8714285714285714,"y":0.9390243902439024},{"x":0.8785714285714286,"y":0.9390243902439024},{"x":0.8785714285714286,"y":0.951219512195122},{"x":0.8857142857142857,"y":0.951219512195122},{"x":0.8928571428571429,"y":0.951219512195122},{"x":0.9,"y":0.951219512195122},{"x":0.9071428571428571,"y":0.951219512195122},{"x":0.9071428571428571,"y":0.9634146341463415},{"x":0.9142857142857143,"y":0.9634146341463415},{"x":0.9214285714285714,"y":0.9634146341463415},{"x":0.9285714285714286,"y":0.9634146341463415},{"x":0.9357142857142857,"y":0.9634146341463415},{"x":0.942857142857143,"y":0.9634146341463415},{"x":0.942857142857143,"y":0.975609756097561},{"x":0.95,"y":0.975609756097561},{"x":0.9571428571428571,"y":0.975609756097561},{"x":0.9642857142857143,"y":0.975609756097561},{"x":0.9714285714285714,"y":0.975609756097561},{"x":0.9785714285714286,"y":0.975609756097561},{"x":0.9857142857142857,"y":0.975609756097561},{"x":0.9857142857142857,"y":0.9878048780487805},{"x":0.9928571428571429,"y":0.9878048780487805},{"x":1,"y":0.9878048780487805},{"x":1,"y":1}]}],"marks":[{"type":"line","from":{"data":"3727b48d-eeb7-4450-b28a-760c1c221378"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"y":{"scale":"y","field":"data.y"},"stroke":{"value":"red"},"strokeWidth":{"value":2},"strokeOpacity":{"value":1}}}},{"type":"line","from":{"data":"0502fc6a-c9c5-4064-bd08-84a227b4178d"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"y":{"scale":"y","field":"data.y"},"stroke":{"value":"green"},"strokeWidth":{"value":2},"strokeOpacity":{"value":1}}}},{"type":"line","from":{"data":"a3035d60-74ea-49f9-93b1-f358ec1ada53"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"y":{"scale":"y","field":"data.y"},"stroke":{"value":"blue"},"strokeWidth":{"value":2},"strokeOpacity":{"value":1}}}},{"type":"line","from":{"data":"89cb3c24-8a7d-4f91-836f-a483e2deeb89"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"y":{"scale":"y","field":"data.y"},"stroke":{"value":"black"},"strokeWidth":{"value":2},"strokeOpacity":{"value":1}}}}]},"value":"#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :scales [{:name \"x\", :type \"linear\", :range \"width\", :zero false, :domain {:data \"3727b48d-eeb7-4450-b28a-760c1c221378\", :field \"data.x\"}} {:name \"y\", :type \"linear\", :range \"height\", :nice true, :zero false, :domain {:data \"3727b48d-eeb7-4450-b28a-760c1c221378\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}], :data ({:name \"3727b48d-eeb7-4450-b28a-760c1c221378\", :values ({:x 1/140, :y 0} {:x 1/140, :y 1/82} {:x 1/70, :y 1/82} {:x 3/140, :y 1/82} {:x 3/140, :y 1/41} {:x 3/140, :y 3/82} {:x 3/140, :y 2/41} {:x 3/140, :y 5/82} {:x 3/140, :y 3/41} {:x 1/35, :y 3/41} {:x 1/35, :y 7/82} {:x 1/35, :y 4/41} {:x 1/35, :y 9/82} {:x 1/35, :y 5/41} {:x 1/35, :y 11/82} {:x 1/35, :y 6/41} {:x 1/35, :y 13/82} {:x 1/28, :y 13/82} {:x 3/70, :y 13/82} {:x 1/20, :y 13/82} {:x 2/35, :y 13/82} {:x 9/140, :y 13/82} {:x 9/140, :y 7/41} {:x 9/140, :y 15/82} {:x 9/140, :y 8/41} {:x 9/140, :y 17/82} {:x 9/140, :y 9/41} {:x 1/14, :y 9/41} {:x 11/140, :y 9/41} {:x 11/140, :y 19/82} {:x 11/140, :y 10/41} {:x 11/140, :y 21/82} {:x 11/140, :y 11/41} {:x 11/140, :y 23/82} {:x 11/140, :y 12/41} {:x 11/140, :y 25/82} {:x 11/140, :y 13/41} {:x 11/140, :y 27/82} {:x 11/140, :y 14/41} {:x 11/140, :y 29/82} {:x 11/140, :y 15/41} {:x 11/140, :y 31/82} {:x 3/35, :y 31/82} {:x 3/35, :y 16/41} {:x 3/35, :y 33/82} {:x 3/35, :y 17/41} {:x 13/140, :y 17/41} {:x 1/10, :y 17/41} {:x 3/28, :y 17/41} {:x 4/35, :y 17/41} {:x 17/140, :y 17/41} {:x 17/140, :y 35/82} {:x 9/70, :y 35/82} {:x 9/70, :y 18/41} {:x 19/140, :y 18/41} {:x 1/7, :y 18/41} {:x 3/20, :y 18/41} {:x 3/20, :y 37/82} {:x 3/20, :y 19/41} {:x 3/20, :y 39/82} {:x 3/20, :y 20/41} {:x 3/20, :y 1/2} {:x 3/20, :y 21/41} {:x 3/20, :y 43/82} {:x 3/20, :y 22/41} {:x 11/70, :y 22/41} {:x 23/140, :y 22/41} {:x 6/35, :y 22/41} {:x 5/28, :y 22/41} {:x 5/28, :y 45/82} {:x 13/70, :y 45/82} {:x 13/70, :y 23/41} {:x 27/140, :y 23/41} {:x 1/5, :y 23/41} {:x 1/5, :y 47/82} {:x 1/5, :y 24/41} {:x 29/140, :y 24/41} {:x 29/140, :y 49/82} {:x 3/14, :y 49/82} {:x 31/140, :y 49/82} {:x 8/35, :y 49/82} {:x 33/140, :y 49/82} {:x 17/70, :y 49/82} {:x 1/4, :y 49/82} {:x 9/35, :y 49/82} {:x 37/140, :y 49/82} {:x 37/140, :y 25/41} {:x 19/70, :y 25/41} {:x 39/140, :y 25/41} {:x 39/140, :y 51/82} {:x 2/7, :y 51/82} {:x 41/140, :y 51/82} {:x 3/10, :y 51/82} {:x 43/140, :y 51/82} {:x 11/35, :y 51/82} {:x 9/28, :y 51/82} {:x 23/70, :y 51/82} {:x 23/70, :y 26/41} {:x 23/70, :y 53/82} {:x 47/140, :y 53/82} {:x 12/35, :y 53/82} {:x 12/35, :y 27/41} {:x 12/35, :y 55/82} {:x 7/20, :y 55/82} {:x 5/14, :y 55/82} {:x 51/140, :y 55/82} {:x 13/35, :y 55/82} {:x 53/140, :y 55/82} {:x 53/140, :y 28/41} {:x 27/70, :y 28/41} {:x 11/28, :y 28/41} {:x 2/5, :y 28/41} {:x 57/140, :y 28/41} {:x 29/70, :y 28/41} {:x 59/140, :y 28/41} {:x 3/7, :y 28/41} {:x 3/7, :y 57/82} {:x 61/140, :y 57/82} {:x 31/70, :y 57/82} {:x 31/70, :y 29/41} {:x 9/20, :y 29/41} {:x 16/35, :y 29/41} {:x 13/28, :y 29/41} {:x 13/28, :y 59/82} {:x 13/28, :y 30/41} {:x 13/28, :y 61/82} {:x 33/70, :y 61/82} {:x 33/70, :y 31/41} {:x 67/140, :y 31/41} {:x 67/140, :y 63/82} {:x 17/35, :y 63/82} {:x 17/35, :y 32/41} {:x 69/140, :y 32/41} {:x 69/140, :y 65/82} {:x 1/2, :y 65/82} {:x 1/2, :y 33/41} {:x 71/140, :y 33/41} {:x 18/35, :y 33/41} {:x 73/140, :y 33/41} {:x 37/70, :y 33/41} {:x 15/28, :y 33/41} {:x 15/28, :y 67/82} {:x 15/28, :y 34/41} {:x 19/35, :y 34/41} {:x 19/35, :y 69/82} {:x 11/20, :y 69/82} {:x 11/20, :y 35/41} {:x 39/70, :y 35/41} {:x 39/70, :y 71/82} {:x 39/70, :y 36/41} {:x 79/140, :y 36/41} {:x 79/140, :y 73/82} {:x 4/7, :y 73/82} {:x 81/140, :y 73/82} {:x 41/70, :y 73/82} {:x 83/140, :y 73/82} {:x 3/5, :y 73/82} {:x 17/28, :y 73/82} {:x 43/70, :y 73/82} {:x 87/140, :y 73/82} {:x 22/35, :y 73/82} {:x 89/140, :y 73/82} {:x 9/14, :y 73/82} {:x 13/20, :y 73/82} {:x 23/35, :y 73/82} {:x 93/140, :y 73/82} {:x 47/70, :y 73/82} {:x 19/28, :y 73/82} {:x 24/35, :y 73/82} {:x 97/140, :y 73/82} {:x 7/10, :y 73/82} {:x 7/10, :y 37/41} {:x 99/140, :y 37/41} {:x 5/7, :y 37/41} {:x 101/140, :y 37/41} {:x 51/70, :y 37/41} {:x 103/140, :y 37/41} {:x 26/35, :y 37/41} {:x 26/35, :y 75/82} {:x 3/4, :y 75/82} {:x 53/70, :y 75/82} {:x 107/140, :y 75/82} {:x 27/35, :y 75/82} {:x 109/140, :y 75/82} {:x 11/14, :y 75/82} {:x 111/140, :y 75/82} {:x 4/5, :y 75/82} {:x 113/140, :y 75/82} {:x 57/70, :y 75/82} {:x 23/28, :y 75/82} {:x 29/35, :y 75/82} {:x 117/140, :y 75/82} {:x 117/140, :y 38/41} {:x 59/70, :y 38/41} {:x 17/20, :y 38/41} {:x 17/20, :y 77/82} {:x 6/7, :y 77/82} {:x 121/140, :y 77/82} {:x 61/70, :y 77/82} {:x 123/140, :y 77/82} {:x 31/35, :y 77/82} {:x 25/28, :y 77/82} {:x 25/28, :y 39/41} {:x 9/10, :y 39/41} {:x 127/140, :y 39/41} {:x 32/35, :y 39/41} {:x 129/140, :y 39/41} {:x 13/14, :y 39/41} {:x 131/140, :y 39/41} {:x 131/140, :y 79/82} {:x 33/35, :y 79/82} {:x 33/35, :y 40/41} {:x 19/20, :y 40/41} {:x 67/70, :y 40/41} {:x 67/70, :y 81/82} {:x 27/28, :y 81/82} {:x 34/35, :y 81/82} {:x 137/140, :y 81/82} {:x 137/140, :y 1} {:x 69/70, :y 1} {:x 139/140, :y 1} {:x 1, :y 1})} {:name \"0502fc6a-c9c5-4064-bd08-84a227b4178d\", :values ({:x 1/139, :y 0} {:x 2/139, :y 0} {:x 2/139, :y 1/83} {:x 3/139, :y 1/83} {:x 3/139, :y 2/83} {:x 3/139, :y 3/83} {:x 3/139, :y 4/83} {:x 3/139, :y 5/83} {:x 3/139, :y 6/83} {:x 3/139, :y 7/83} {:x 3/139, :y 8/83} {:x 3/139, :y 9/83} {:x 3/139, :y 10/83} {:x 4/139, :y 10/83} {:x 4/139, :y 11/83} {:x 4/139, :y 12/83} {:x 4/139, :y 13/83} {:x 5/139, :y 13/83} {:x 5/139, :y 14/83} {:x 6/139, :y 14/83} {:x 6/139, :y 15/83} {:x 6/139, :y 16/83} {:x 6/139, :y 17/83} {:x 7/139, :y 17/83} {:x 7/139, :y 18/83} {:x 7/139, :y 19/83} {:x 7/139, :y 20/83} {:x 7/139, :y 21/83} {:x 7/139, :y 22/83} {:x 8/139, :y 22/83} {:x 8/139, :y 23/83} {:x 8/139, :y 24/83} {:x 8/139, :y 25/83} {:x 9/139, :y 25/83} {:x 9/139, :y 26/83} {:x 9/139, :y 27/83} {:x 9/139, :y 28/83} {:x 9/139, :y 29/83} {:x 9/139, :y 30/83} {:x 9/139, :y 31/83} {:x 9/139, :y 32/83} {:x 9/139, :y 33/83} {:x 9/139, :y 34/83} {:x 9/139, :y 35/83} {:x 9/139, :y 36/83} {:x 9/139, :y 37/83} {:x 9/139, :y 38/83} {:x 9/139, :y 39/83} {:x 9/139, :y 40/83} {:x 9/139, :y 41/83} {:x 9/139, :y 42/83} {:x 10/139, :y 42/83} {:x 10/139, :y 43/83} {:x 11/139, :y 43/83} {:x 12/139, :y 43/83} {:x 12/139, :y 44/83} {:x 12/139, :y 45/83} {:x 13/139, :y 45/83} {:x 13/139, :y 46/83} {:x 14/139, :y 46/83} {:x 15/139, :y 46/83} {:x 16/139, :y 46/83} {:x 16/139, :y 47/83} {:x 16/139, :y 48/83} {:x 16/139, :y 49/83} {:x 17/139, :y 49/83} {:x 18/139, :y 49/83} {:x 19/139, :y 49/83} {:x 20/139, :y 49/83} {:x 21/139, :y 49/83} {:x 22/139, :y 49/83} {:x 23/139, :y 49/83} {:x 23/139, :y 50/83} {:x 24/139, :y 50/83} {:x 24/139, :y 51/83} {:x 24/139, :y 52/83} {:x 24/139, :y 53/83} {:x 24/139, :y 54/83} {:x 24/139, :y 55/83} {:x 25/139, :y 55/83} {:x 26/139, :y 55/83} {:x 26/139, :y 56/83} {:x 27/139, :y 56/83} {:x 27/139, :y 57/83} {:x 28/139, :y 57/83} {:x 29/139, :y 57/83} {:x 30/139, :y 57/83} {:x 31/139, :y 57/83} {:x 32/139, :y 57/83} {:x 33/139, :y 57/83} {:x 34/139, :y 57/83} {:x 35/139, :y 57/83} {:x 36/139, :y 57/83} {:x 37/139, :y 57/83} {:x 38/139, :y 57/83} {:x 38/139, :y 58/83} {:x 39/139, :y 58/83} {:x 40/139, :y 58/83} {:x 41/139, :y 58/83} {:x 42/139, :y 58/83} {:x 43/139, :y 58/83} {:x 44/139, :y 58/83} {:x 44/139, :y 59/83} {:x 44/139, :y 60/83} {:x 44/139, :y 61/83} {:x 45/139, :y 61/83} {:x 46/139, :y 61/83} {:x 47/139, :y 61/83} {:x 47/139, :y 62/83} {:x 48/139, :y 62/83} {:x 48/139, :y 63/83} {:x 49/139, :y 63/83} {:x 50/139, :y 63/83} {:x 50/139, :y 64/83} {:x 50/139, :y 65/83} {:x 50/139, :y 66/83} {:x 50/139, :y 67/83} {:x 51/139, :y 67/83} {:x 52/139, :y 67/83} {:x 53/139, :y 67/83} {:x 54/139, :y 67/83} {:x 55/139, :y 67/83} {:x 55/139, :y 68/83} {:x 55/139, :y 69/83} {:x 56/139, :y 69/83} {:x 56/139, :y 70/83} {:x 57/139, :y 70/83} {:x 58/139, :y 70/83} {:x 58/139, :y 71/83} {:x 59/139, :y 71/83} {:x 60/139, :y 71/83} {:x 61/139, :y 71/83} {:x 62/139, :y 71/83} {:x 63/139, :y 71/83} {:x 64/139, :y 71/83} {:x 65/139, :y 71/83} {:x 66/139, :y 71/83} {:x 67/139, :y 71/83} {:x 68/139, :y 71/83} {:x 69/139, :y 71/83} {:x 70/139, :y 71/83} {:x 71/139, :y 71/83} {:x 72/139, :y 71/83} {:x 73/139, :y 71/83} {:x 74/139, :y 71/83} {:x 75/139, :y 71/83} {:x 76/139, :y 71/83} {:x 77/139, :y 71/83} {:x 78/139, :y 71/83} {:x 79/139, :y 71/83} {:x 80/139, :y 71/83} {:x 81/139, :y 71/83} {:x 81/139, :y 72/83} {:x 82/139, :y 72/83} {:x 82/139, :y 73/83} {:x 83/139, :y 73/83} {:x 84/139, :y 73/83} {:x 85/139, :y 73/83} {:x 86/139, :y 73/83} {:x 87/139, :y 73/83} {:x 88/139, :y 73/83} {:x 89/139, :y 73/83} {:x 90/139, :y 73/83} {:x 91/139, :y 73/83} {:x 92/139, :y 73/83} {:x 93/139, :y 73/83} {:x 94/139, :y 73/83} {:x 95/139, :y 73/83} {:x 96/139, :y 73/83} {:x 97/139, :y 73/83} {:x 98/139, :y 73/83} {:x 98/139, :y 74/83} {:x 99/139, :y 74/83} {:x 100/139, :y 74/83} {:x 100/139, :y 75/83} {:x 101/139, :y 75/83} {:x 102/139, :y 75/83} {:x 103/139, :y 75/83} {:x 104/139, :y 75/83} {:x 105/139, :y 75/83} {:x 105/139, :y 76/83} {:x 105/139, :y 77/83} {:x 106/139, :y 77/83} {:x 107/139, :y 77/83} {:x 108/139, :y 77/83} {:x 109/139, :y 77/83} {:x 110/139, :y 77/83} {:x 111/139, :y 77/83} {:x 112/139, :y 77/83} {:x 113/139, :y 77/83} {:x 114/139, :y 77/83} {:x 115/139, :y 77/83} {:x 116/139, :y 77/83} {:x 117/139, :y 77/83} {:x 118/139, :y 77/83} {:x 118/139, :y 78/83} {:x 119/139, :y 78/83} {:x 119/139, :y 79/83} {:x 120/139, :y 79/83} {:x 120/139, :y 80/83} {:x 121/139, :y 80/83} {:x 122/139, :y 80/83} {:x 122/139, :y 81/83} {:x 123/139, :y 81/83} {:x 124/139, :y 81/83} {:x 125/139, :y 81/83} {:x 126/139, :y 81/83} {:x 127/139, :y 81/83} {:x 128/139, :y 81/83} {:x 129/139, :y 81/83} {:x 130/139, :y 81/83} {:x 130/139, :y 82/83} {:x 131/139, :y 82/83} {:x 132/139, :y 82/83} {:x 133/139, :y 82/83} {:x 134/139, :y 82/83} {:x 135/139, :y 82/83} {:x 135/139, :y 1} {:x 136/139, :y 1} {:x 137/139, :y 1} {:x 138/139, :y 1} {:x 1, :y 1})} {:name \"a3035d60-74ea-49f9-93b1-f358ec1ada53\", :values ({:x 1/129, :y 0} {:x 1/129, :y 1/93} {:x 2/129, :y 1/93} {:x 1/43, :y 1/93} {:x 1/43, :y 2/93} {:x 1/43, :y 1/31} {:x 1/43, :y 4/93} {:x 1/43, :y 5/93} {:x 1/43, :y 2/31} {:x 4/129, :y 2/31} {:x 5/129, :y 2/31} {:x 5/129, :y 7/93} {:x 2/43, :y 7/93} {:x 7/129, :y 7/93} {:x 7/129, :y 8/93} {:x 7/129, :y 3/31} {:x 8/129, :y 3/31} {:x 3/43, :y 3/31} {:x 10/129, :y 3/31} {:x 10/129, :y 10/93} {:x 11/129, :y 10/93} {:x 4/43, :y 10/93} {:x 4/43, :y 11/93} {:x 13/129, :y 11/93} {:x 14/129, :y 11/93} {:x 14/129, :y 4/31} {:x 5/43, :y 4/31} {:x 5/43, :y 13/93} {:x 5/43, :y 14/93} {:x 5/43, :y 5/31} {:x 5/43, :y 16/93} {:x 5/43, :y 17/93} {:x 5/43, :y 6/31} {:x 5/43, :y 19/93} {:x 5/43, :y 20/93} {:x 16/129, :y 20/93} {:x 16/129, :y 7/31} {:x 16/129, :y 22/93} {:x 16/129, :y 23/93} {:x 16/129, :y 8/31} {:x 16/129, :y 25/93} {:x 16/129, :y 26/93} {:x 16/129, :y 9/31} {:x 16/129, :y 28/93} {:x 16/129, :y 29/93} {:x 16/129, :y 10/31} {:x 17/129, :y 10/31} {:x 6/43, :y 10/31} {:x 19/129, :y 10/31} {:x 19/129, :y 1/3} {:x 19/129, :y 32/93} {:x 19/129, :y 11/31} {:x 19/129, :y 34/93} {:x 19/129, :y 35/93} {:x 20/129, :y 35/93} {:x 20/129, :y 12/31} {:x 7/43, :y 12/31} {:x 7/43, :y 37/93} {:x 22/129, :y 37/93} {:x 23/129, :y 37/93} {:x 8/43, :y 37/93} {:x 8/43, :y 38/93} {:x 25/129, :y 38/93} {:x 25/129, :y 13/31} {:x 25/129, :y 40/93} {:x 26/129, :y 40/93} {:x 9/43, :y 40/93} {:x 28/129, :y 40/93} {:x 29/129, :y 40/93} {:x 10/43, :y 40/93} {:x 31/129, :y 40/93} {:x 32/129, :y 40/93} {:x 11/43, :y 40/93} {:x 34/129, :y 40/93} {:x 35/129, :y 40/93} {:x 12/43, :y 40/93} {:x 37/129, :y 40/93} {:x 38/129, :y 40/93} {:x 13/43, :y 40/93} {:x 40/129, :y 40/93} {:x 41/129, :y 40/93} {:x 41/129, :y 41/93} {:x 41/129, :y 14/31} {:x 41/129, :y 43/93} {:x 41/129, :y 44/93} {:x 41/129, :y 15/31} {:x 41/129, :y 46/93} {:x 41/129, :y 47/93} {:x 14/43, :y 47/93} {:x 14/43, :y 16/31} {:x 14/43, :y 49/93} {:x 14/43, :y 50/93} {:x 14/43, :y 17/31} {:x 14/43, :y 52/93} {:x 1/3, :y 52/93} {:x 1/3, :y 53/93} {:x 1/3, :y 18/31} {:x 1/3, :y 55/93} {:x 1/3, :y 56/93} {:x 44/129, :y 56/93} {:x 44/129, :y 19/31} {:x 15/43, :y 19/31} {:x 46/129, :y 19/31} {:x 47/129, :y 19/31} {:x 16/43, :y 19/31} {:x 49/129, :y 19/31} {:x 50/129, :y 19/31} {:x 17/43, :y 19/31} {:x 52/129, :y 19/31} {:x 53/129, :y 19/31} {:x 18/43, :y 19/31} {:x 55/129, :y 19/31} {:x 56/129, :y 19/31} {:x 56/129, :y 58/93} {:x 19/43, :y 58/93} {:x 19/43, :y 59/93} {:x 58/129, :y 59/93} {:x 59/129, :y 59/93} {:x 59/129, :y 20/31} {:x 20/43, :y 20/31} {:x 61/129, :y 20/31} {:x 62/129, :y 20/31} {:x 21/43, :y 20/31} {:x 64/129, :y 20/31} {:x 65/129, :y 20/31} {:x 22/43, :y 20/31} {:x 67/129, :y 20/31} {:x 68/129, :y 20/31} {:x 23/43, :y 20/31} {:x 70/129, :y 20/31} {:x 71/129, :y 20/31} {:x 24/43, :y 20/31} {:x 73/129, :y 20/31} {:x 74/129, :y 20/31} {:x 25/43, :y 20/31} {:x 76/129, :y 20/31} {:x 77/129, :y 20/31} {:x 26/43, :y 20/31} {:x 79/129, :y 20/31} {:x 80/129, :y 20/31} {:x 27/43, :y 20/31} {:x 82/129, :y 20/31} {:x 82/129, :y 61/93} {:x 83/129, :y 61/93} {:x 28/43, :y 61/93} {:x 85/129, :y 61/93} {:x 2/3, :y 61/93} {:x 29/43, :y 61/93} {:x 88/129, :y 61/93} {:x 89/129, :y 61/93} {:x 30/43, :y 61/93} {:x 91/129, :y 61/93} {:x 91/129, :y 2/3} {:x 92/129, :y 2/3} {:x 31/43, :y 2/3} {:x 94/129, :y 2/3} {:x 95/129, :y 2/3} {:x 32/43, :y 2/3} {:x 97/129, :y 2/3} {:x 98/129, :y 2/3} {:x 33/43, :y 2/3} {:x 33/43, :y 21/31} {:x 100/129, :y 21/31} {:x 101/129, :y 21/31} {:x 34/43, :y 21/31} {:x 103/129, :y 21/31} {:x 104/129, :y 21/31} {:x 35/43, :y 21/31} {:x 35/43, :y 64/93} {:x 35/43, :y 65/93} {:x 106/129, :y 65/93} {:x 107/129, :y 65/93} {:x 107/129, :y 22/31} {:x 107/129, :y 67/93} {:x 36/43, :y 67/93} {:x 36/43, :y 68/93} {:x 36/43, :y 23/31} {:x 109/129, :y 23/31} {:x 110/129, :y 23/31} {:x 110/129, :y 70/93} {:x 110/129, :y 71/93} {:x 110/129, :y 24/31} {:x 110/129, :y 73/93} {:x 110/129, :y 74/93} {:x 110/129, :y 25/31} {:x 110/129, :y 76/93} {:x 110/129, :y 77/93} {:x 110/129, :y 26/31} {:x 37/43, :y 26/31} {:x 37/43, :y 79/93} {:x 37/43, :y 80/93} {:x 112/129, :y 80/93} {:x 112/129, :y 27/31} {:x 112/129, :y 82/93} {:x 112/129, :y 83/93} {:x 112/129, :y 28/31} {:x 113/129, :y 28/31} {:x 38/43, :y 28/31} {:x 38/43, :y 85/93} {:x 115/129, :y 85/93} {:x 115/129, :y 86/93} {:x 115/129, :y 29/31} {:x 116/129, :y 29/31} {:x 39/43, :y 29/31} {:x 118/129, :y 29/31} {:x 118/129, :y 88/93} {:x 119/129, :y 88/93} {:x 40/43, :y 88/93} {:x 121/129, :y 88/93} {:x 122/129, :y 88/93} {:x 122/129, :y 89/93} {:x 41/43, :y 89/93} {:x 124/129, :y 89/93} {:x 124/129, :y 30/31} {:x 125/129, :y 30/31} {:x 42/43, :y 30/31} {:x 42/43, :y 91/93} {:x 127/129, :y 91/93} {:x 127/129, :y 92/93} {:x 128/129, :y 92/93} {:x 128/129, :y 1} {:x 1, :y 1})} {:name \"89cb3c24-8a7d-4f91-836f-a483e2deeb89\", :values ({:x 1/140, :y 0} {:x 1/140, :y 1/82} {:x 1/140, :y 1/41} {:x 1/140, :y 3/82} {:x 1/140, :y 2/41} {:x 1/70, :y 2/41} {:x 3/140, :y 2/41} {:x 1/35, :y 2/41} {:x 1/35, :y 5/82} {:x 1/28, :y 5/82} {:x 1/28, :y 3/41} {:x 1/28, :y 7/82} {:x 3/70, :y 7/82} {:x 1/20, :y 7/82} {:x 1/20, :y 4/41} {:x 1/20, :y 9/82} {:x 1/20, :y 5/41} {:x 1/20, :y 11/82} {:x 1/20, :y 6/41} {:x 2/35, :y 6/41} {:x 9/140, :y 6/41} {:x 1/14, :y 6/41} {:x 1/14, :y 13/82} {:x 1/14, :y 7/41} {:x 1/14, :y 15/82} {:x 11/140, :y 15/82} {:x 11/140, :y 8/41} {:x 3/35, :y 8/41} {:x 13/140, :y 8/41} {:x 13/140, :y 17/82} {:x 13/140, :y 9/41} {:x 13/140, :y 19/82} {:x 13/140, :y 10/41} {:x 13/140, :y 21/82} {:x 13/140, :y 11/41} {:x 13/140, :y 23/82} {:x 13/140, :y 12/41} {:x 13/140, :y 25/82} {:x 13/140, :y 13/41} {:x 13/140, :y 27/82} {:x 13/140, :y 14/41} {:x 13/140, :y 29/82} {:x 13/140, :y 15/41} {:x 1/10, :y 15/41} {:x 3/28, :y 15/41} {:x 3/28, :y 31/82} {:x 3/28, :y 16/41} {:x 3/28, :y 33/82} {:x 3/28, :y 17/41} {:x 3/28, :y 35/82} {:x 3/28, :y 18/41} {:x 3/28, :y 37/82} {:x 3/28, :y 19/41} {:x 3/28, :y 39/82} {:x 3/28, :y 20/41} {:x 3/28, :y 1/2} {:x 3/28, :y 21/41} {:x 3/28, :y 43/82} {:x 3/28, :y 22/41} {:x 3/28, :y 45/82} {:x 3/28, :y 23/41} {:x 4/35, :y 23/41} {:x 17/140, :y 23/41} {:x 9/70, :y 23/41} {:x 19/140, :y 23/41} {:x 19/140, :y 47/82} {:x 1/7, :y 47/82} {:x 3/20, :y 47/82} {:x 11/70, :y 47/82} {:x 11/70, :y 24/41} {:x 23/140, :y 24/41} {:x 23/140, :y 49/82} {:x 6/35, :y 49/82} {:x 5/28, :y 49/82} {:x 5/28, :y 25/41} {:x 13/70, :y 25/41} {:x 13/70, :y 51/82} {:x 27/140, :y 51/82} {:x 27/140, :y 26/41} {:x 1/5, :y 26/41} {:x 29/140, :y 26/41} {:x 29/140, :y 53/82} {:x 3/14, :y 53/82} {:x 31/140, :y 53/82} {:x 31/140, :y 27/41} {:x 8/35, :y 27/41} {:x 33/140, :y 27/41} {:x 17/70, :y 27/41} {:x 1/4, :y 27/41} {:x 1/4, :y 55/82} {:x 9/35, :y 55/82} {:x 37/140, :y 55/82} {:x 19/70, :y 55/82} {:x 39/140, :y 55/82} {:x 2/7, :y 55/82} {:x 2/7, :y 28/41} {:x 41/140, :y 28/41} {:x 41/140, :y 57/82} {:x 3/10, :y 57/82} {:x 3/10, :y 29/41} {:x 3/10, :y 59/82} {:x 3/10, :y 30/41} {:x 3/10, :y 61/82} {:x 43/140, :y 61/82} {:x 43/140, :y 31/41} {:x 11/35, :y 31/41} {:x 9/28, :y 31/41} {:x 9/28, :y 63/82} {:x 23/70, :y 63/82} {:x 47/140, :y 63/82} {:x 12/35, :y 63/82} {:x 7/20, :y 63/82} {:x 7/20, :y 32/41} {:x 7/20, :y 65/82} {:x 7/20, :y 33/41} {:x 7/20, :y 67/82} {:x 7/20, :y 34/41} {:x 7/20, :y 69/82} {:x 5/14, :y 69/82} {:x 51/140, :y 69/82} {:x 51/140, :y 35/41} {:x 51/140, :y 71/82} {:x 13/35, :y 71/82} {:x 53/140, :y 71/82} {:x 27/70, :y 71/82} {:x 11/28, :y 71/82} {:x 11/28, :y 36/41} {:x 11/28, :y 73/82} {:x 2/5, :y 73/82} {:x 57/140, :y 73/82} {:x 29/70, :y 73/82} {:x 59/140, :y 73/82} {:x 3/7, :y 73/82} {:x 61/140, :y 73/82} {:x 31/70, :y 73/82} {:x 9/20, :y 73/82} {:x 16/35, :y 73/82} {:x 13/28, :y 73/82} {:x 13/28, :y 37/41} {:x 33/70, :y 37/41} {:x 67/140, :y 37/41} {:x 17/35, :y 37/41} {:x 69/140, :y 37/41} {:x 1/2, :y 37/41} {:x 71/140, :y 37/41} {:x 18/35, :y 37/41} {:x 73/140, :y 37/41} {:x 37/70, :y 37/41} {:x 15/28, :y 37/41} {:x 19/35, :y 37/41} {:x 11/20, :y 37/41} {:x 39/70, :y 37/41} {:x 79/140, :y 37/41} {:x 4/7, :y 37/41} {:x 81/140, :y 37/41} {:x 41/70, :y 37/41} {:x 83/140, :y 37/41} {:x 83/140, :y 75/82} {:x 3/5, :y 75/82} {:x 17/28, :y 75/82} {:x 43/70, :y 75/82} {:x 87/140, :y 75/82} {:x 22/35, :y 75/82} {:x 22/35, :y 38/41} {:x 89/140, :y 38/41} {:x 9/14, :y 38/41} {:x 9/14, :y 77/82} {:x 13/20, :y 77/82} {:x 23/35, :y 77/82} {:x 93/140, :y 77/82} {:x 47/70, :y 77/82} {:x 19/28, :y 77/82} {:x 24/35, :y 77/82} {:x 97/140, :y 77/82} {:x 7/10, :y 77/82} {:x 99/140, :y 77/82} {:x 5/7, :y 77/82} {:x 101/140, :y 77/82} {:x 51/70, :y 77/82} {:x 103/140, :y 77/82} {:x 26/35, :y 77/82} {:x 3/4, :y 77/82} {:x 53/70, :y 77/82} {:x 107/140, :y 77/82} {:x 27/35, :y 77/82} {:x 109/140, :y 77/82} {:x 11/14, :y 77/82} {:x 111/140, :y 77/82} {:x 4/5, :y 77/82} {:x 113/140, :y 77/82} {:x 57/70, :y 77/82} {:x 23/28, :y 77/82} {:x 29/35, :y 77/82} {:x 117/140, :y 77/82} {:x 59/70, :y 77/82} {:x 17/20, :y 77/82} {:x 6/7, :y 77/82} {:x 121/140, :y 77/82} {:x 61/70, :y 77/82} {:x 123/140, :y 77/82} {:x 123/140, :y 39/41} {:x 31/35, :y 39/41} {:x 25/28, :y 39/41} {:x 9/10, :y 39/41} {:x 127/140, :y 39/41} {:x 127/140, :y 79/82} {:x 32/35, :y 79/82} {:x 129/140, :y 79/82} {:x 13/14, :y 79/82} {:x 131/140, :y 79/82} {:x 33/35, :y 79/82} {:x 33/35, :y 40/41} {:x 19/20, :y 40/41} {:x 67/70, :y 40/41} {:x 27/28, :y 40/41} {:x 34/35, :y 40/41} {:x 137/140, :y 40/41} {:x 69/70, :y 40/41} {:x 69/70, :y 81/82} {:x 139/140, :y 81/82} {:x 1, :y 81/82} {:x 1, :y 1})}), :marks ({:type \"line\", :from {:data \"3727b48d-eeb7-4450-b28a-760c1c221378\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :stroke {:value \"red\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}} {:type \"line\", :from {:data \"0502fc6a-c9c5-4064-bd08-84a227b4178d\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :stroke {:value \"green\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}} {:type \"line\", :from {:data \"a3035d60-74ea-49f9-93b1-f358ec1ada53\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :stroke {:value \"blue\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}} {:type \"line\", :from {:data \"89cb3c24-8a7d-4f91-836f-a483e2deeb89\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :stroke {:value \"black\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}})}}"}
127 | ;; <=
128 |
129 | ;; **
130 | ;;; Generate predictions using the model. Optionally, print out the predictions in a submission-ready format.
131 | ;; **
132 |
133 | ;; @@
134 | ;(def predictions (regression-predict model (map rest test)))
135 |
136 | ;(println "PassengerId,Survived")
137 | ;(doseq [[id survival] (map (fn [t p] [(first t) p]) test predictions)]
138 | ; (let [survival (if (> survival threshold) 1 0)]
139 | ; (println (str id "," survival))))
140 | ;; @@
141 | ;; =>
142 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.titanic.worksheet/predictions","value":"#'lambda-ml.examples.kaggle.titanic.worksheet/predictions"}
143 | ;; <=
144 |
145 | ;; @@
146 |
147 | ;; @@
148 |
--------------------------------------------------------------------------------
/src/lambda_ml/factorization.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.factorization
2 | "Unsupervised learning with non-negative matrix factorization.
3 |
4 | Example usage:
5 | ```
6 | (def data [[1 2 3] [4 5 6]])
7 | (let [dims 2]
8 | (-> (factorizations data dims)
9 | (nth 300)
10 | ((fn [x] (map #(mapv vec %) x)))))
11 | ;;=> ([[0.20900693256125408 0.2000948450048419]
12 | ;;=> [0.8547267961216941 0.32426625588317753]]
13 | ;;=> [[4.601094573778913 3.4274218917618486 2.1966425686791777]
14 | ;;=> [0.20523936453382804 6.391048036139935 12.709895897835892]])
15 | ```"
16 | (:require [clojure.core.matrix :as m]))
17 |
18 | (m/set-current-implementation :vectorz)
19 |
20 | (defn init-factors
21 | [rows cols]
22 | (m/matrix (repeatedly rows #(repeatedly cols rand))))
23 |
24 | (defn cost
25 | [a b]
26 | (m/esum (m/pow (m/sub a b) 2)))
27 |
28 | (defn factorizations
29 | "Returns a lazy seq of factorizations of the input matrix v. For an m-by-n
30 | input matrix, each factorization is a pair of latent matrices with dimensions
31 | m-by-dims and dims-by-n."
32 | ([v dims]
33 | (factorizations (m/matrix v)
34 | (init-factors (m/row-count v) dims)
35 | (init-factors dims (m/column-count v))))
36 | ([v w h]
37 | (lazy-seq (let [h (m/emul h (m/div (m/mmul (m/transpose w) v)
38 | (m/mmul (m/transpose w) w h)))
39 | ;; Note that `h` is updated before `w`
40 | w (m/emul w (m/div (m/mmul v (m/transpose h))
41 | (m/mmul w h (m/transpose h))))]
42 | (cons [w h] (factorizations v w h))))))
43 |
--------------------------------------------------------------------------------
/src/lambda_ml/metrics.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.metrics
2 | "Functions that compute measures of cost or gain.")
3 |
4 | (defn auc
5 | "Returns the area under the curve of a given collection of points, using the
6 | trapezoidal rule."
7 | [points]
8 | (loop [area 0
9 | [x0 y0] (first points)
10 | points (rest points)]
11 | (if (empty? points)
12 | area
13 | (let [[x1 y1] (first points)
14 | dx (- x1 x0)]
15 | (recur (+ area (* dx (/ (+ y1 y0) 2)))
16 | [x1 y1]
17 | (rest points))))))
18 |
19 | (defn roc-curve
20 | "Returns a sequence of [false positive rate, true positive rate] tuples that
21 | represent the ROC curve of a classifier."
22 | [labels predictions]
23 | (let [p (reduce + (filter (fn [x] (= x 1)) labels))
24 | n (- (count labels) p)
25 | ranked (->> (map vector labels predictions)
26 | (sort-by second)
27 | (map first)
28 | (reverse))]
29 | (loop [ys ranked
30 | fp 0
31 | tp 0
32 | points []]
33 | (if (empty? ys)
34 | points
35 | (let [fp (if (= (first ys) 0) (inc fp) fp)
36 | tp (if (= (first ys) 1) (inc tp) tp)
37 | fpr (/ fp n)
38 | tpr (/ tp p)]
39 | (recur (rest ys) fp tp (conj points [fpr tpr])))))))
40 |
--------------------------------------------------------------------------------
/src/lambda_ml/naive_bayes.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.naive-bayes
2 | "Naive Bayes probabilistic model learning.
3 |
4 | Example usage:
5 | ```
6 | (def data [[6.0 180 12 :male] [5.92 190 11 :male] [5.58 170 12 :male]
7 | [5.92 165 10 :male] [5.0 100 6 :female] [5.5 150 8 :female]
8 | [5.42 130 7 :female] [5.75 150 9 :female]])
9 | (def fit
10 | (-> (make-naive-bayes)
11 | (naive-bayes-fit data)))
12 | (naive-bayes-predict fit [[6.0 130 8]])
13 | ;;=> (:female)
14 | ```"
15 | (require [clojure.math.numeric-tower :refer :all]))
16 |
17 | (defn gaussian
18 | [x mean var]
19 | (* (/ 1 (sqrt (* 2 Math/PI var)))
20 | (expt Math/E (- (/ (expt (- x mean) 2) (* 2 var))))))
21 |
22 | (defn posterior
23 | [distributions xi label]
24 | (if-not (vector? xi)
25 | (posterior distributions (vec xi) label)
26 | (loop [index 0
27 | prob (/ 1 (count distributions))]
28 | (if (>= index (count xi))
29 | prob
30 | (let [[mean var] (get-in distributions [label index])]
31 | (recur (inc index)
32 | (* prob (gaussian (nth xi index) mean var))))))))
33 |
34 | (defn naive-bayes-fit
35 | "Returns a naive Bayes model fit to the given training data."
36 | ([model data]
37 | (naive-bayes-fit model (map butlast data) (map last data)))
38 | ([model x y]
39 | (cond
40 | (not-every? vector? x) (naive-bayes-fit model (map vec x) y)
41 | (not (vector? y)) (naive-bayes-fit model x (vec y))
42 | :else
43 | (let [n (count (first x))]
44 | (loop [index 0
45 | labels (distinct y)
46 | m {}]
47 | (cond (empty? labels) (assoc model :distributions m)
48 | (>= index n) (recur 0 (rest labels) m)
49 | :else
50 | (let [label (first labels)
51 | ;; Feature values for examples with the current label
52 | vals (->> (map #(nth % index) x)
53 | (keep-indexed (fn [i xi] (when (= (nth y i) label) xi))))
54 | mean (/ (apply + vals) (count vals))
55 | ;; Unbiased sample variance
56 | var (/ (apply + (map #(expt (- % mean) 2) vals)) (dec (count vals)))]
57 | (recur (inc index)
58 | labels
59 | (assoc-in m [label index] [mean var])))))))))
60 |
61 | (defn naive-bayes-predict
62 | "Predicts the values of example data using a naive Bayes model."
63 | [model x]
64 | (let [{distributions :distributions} model
65 | labels (keys distributions)]
66 | (map (fn [xi] (apply max-key #(posterior distributions xi %) labels)) x)))
67 |
68 | (defn make-naive-bayes
69 | "Returns a naive Bayes model."
70 | []
71 | {})
72 |
--------------------------------------------------------------------------------
/src/lambda_ml/nearest_neighbors.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.nearest-neighbors
2 | "Classification and regression using the k-nearest neighbors algorithm.
3 |
4 | Example usage:
5 | ```
6 | (def data [[2 3] [5 4] [9 6] [4 7] [8 1] [7 2]])
7 | (def fit
8 | (let [k 1]
9 | (-> (make-nearest-neighbors-regressor k lambda-ml.distance/euclidean)
10 | (nearest-neighbors-fit data))))
11 | (nearest-neighbors-predict fit (map butlast data))
12 | ```"
13 | (:require [lambda-ml.core :refer :all]
14 | [lambda-ml.data.binary-tree :as bt]
15 | [lambda-ml.data.kd-tree :as kd]))
16 |
17 | (defn make-item
18 | [value priority]
19 | (vector priority value))
20 |
21 | (defn item-priority
22 | [item]
23 | (nth item 0))
24 |
25 | (defn item-value
26 | [item]
27 | (nth item 1))
28 |
29 | (defn insert
30 | [v value priority bound]
31 | (let [full (>= (count v) bound)]
32 | (cond
33 | ;; Empty vector
34 | (empty? v)
35 | (vector (make-item value priority))
36 | ;; Full vector and item priority is too high
37 | (and full (>= priority (item-priority (peek v))))
38 | v
39 | :else
40 | ;; Find position and insert item
41 | (let [index (loop [lo 0
42 | hi (count v)]
43 | (if (>= lo hi)
44 | lo
45 | (let [mid (quot (+ lo hi) 2)]
46 | (if (< priority (item-priority (nth v mid)))
47 | (recur lo mid)
48 | (recur (+ mid 1) hi)))))
49 | item (make-item value priority)
50 | end (if full (dec (count v)) (count v))]
51 | (apply conj
52 | (subvec v 0 index)
53 | item
54 | (subvec v index end))))))
55 |
56 | (defn make-nearest-neighbor-search
57 | "Given a distance function f and a coll of items, each of which have an
58 | associated dimensional point, returns a function that, given k and a query
59 | item, returns a priority queue of the k nearest neighboring items. Optionally,
60 | a function g can be supplied and used to return the dimensional point for an
61 | item. Otherwise, the item itself is assumed to be the point. Assumes that all
62 | points are represented as sequences of the same dimension."
63 | ([f items]
64 | (make-nearest-neighbor-search f identity items))
65 | ([f g items]
66 | (let [dims (count (g (first items)))
67 | t (kd/make-tree dims items g)]
68 | (fn knn
69 | ([k query]
70 | (knn k query t 0 (vector)))
71 | ([k query tree depth cand]
72 | (if (nil? tree)
73 | cand
74 | (let [[node left right] ((juxt bt/get-value bt/get-left bt/get-right) tree)
75 | dim (mod depth dims)
76 | node-point (g node)
77 | query-point (g query)
78 | ;; Determine near and far branches
79 | [near far] (if (<= (nth query-point dim) (nth node-point dim)) [left right] [right left])
80 | cand (->>
81 | ;; Try to add current node to candidates
82 | (insert cand node (f query-point node-point) k)
83 | ;; Explore near branch
84 | (knn k query near (inc depth)))]
85 | ;; Optionally, explore far branch
86 | (if (or (< (count cand) k)
87 | (< (f query-point node-point dim)
88 | (item-priority (peek cand))))
89 | (knn k query far (inc depth) cand)
90 | cand))))))))
91 |
92 | (defn nearest-neighbors-fit
93 | "Fits a k-nearest neighbors model to the given training data."
94 | ([model data]
95 | (assoc model :lookup (make-nearest-neighbor-search (:dist model) butlast data)))
96 | ([model x y]
97 | (nearest-neighbors-fit model (map concat x (map list y)))))
98 |
99 | (defn nearest-neighbors-predict
100 | "Predicts the values of example data using a k-nearest neighbors model."
101 | [model x]
102 | (let [{k :k lookup :lookup agg :aggregation} model]
103 | (when (not (nil? lookup))
104 | ;; Append dummy coordinate value to account for assumption of target
105 | ;; values in last position in training data examples
106 | (->> (map #(conj (vec %) nil) x)
107 | (map #(lookup k %))
108 | (map #(map (comp last item-value) %))
109 | (map agg)))))
110 |
111 | (defn make-nearest-neighbors-classifier
112 | "Returns a k-nearest neighbor classification model using the given distance
113 | function."
114 | [k dist]
115 | {:k k
116 | :dist dist
117 | :aggregation mode})
118 |
119 | (defn make-nearest-neighbors-regressor
120 | "Returns a k-nearest neighbor regression model using the given distance
121 | function."
122 | [k dist]
123 | {:k k
124 | :dist dist
125 | :aggregation mean})
126 |
--------------------------------------------------------------------------------
/src/lambda_ml/neural_network.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.neural-network
2 | "Multilayer perceptron neural network learning using backpropagation.
3 |
4 | Example usage:
5 | ```
6 | (def data [[0 0 [0]] [0 1 [1]] [1 0 [1]] [1 1 [0]]])
7 | (def fit
8 | (let [alpha 0.5
9 | lambda 0.001
10 | model (-> (make-neural-network alpha lambda)
11 | (add-neural-network-layer 2 sigmoid) ;; input layer
12 | (add-neural-network-layer 3 sigmoid) ;; hidden layer
13 | (add-neural-network-layer 1 sigmoid))] ;; output layer
14 | (-> (iterate #(neural-network-fit % data) model)
15 | (nth 5000))))
16 | (neural-network-predict fit (map butlast data))
17 | ;;=> [[0.04262340225834812] [0.9582632706756758] [0.9581124103456861] [0.04103544440312673]]
18 | ```"
19 | (:require [lambda-ml.core :as c]
20 | [clojure.core.matrix :as m]))
21 |
22 | (m/set-current-implementation :vectorz)
23 |
24 | (def bias (m/matrix [1.0]))
25 | (def epsilon 0.0001)
26 |
27 | (defn drop-bias
28 | [m]
29 | (m/submatrix m 1 [1 (dec (m/column-count m))]))
30 |
31 | (defn feed-forward
32 | "Returns the activation values for nodes in a neural network after forward
33 | propagating the values of a single input example x through the network."
34 | [x theta fns]
35 | (reduce (fn [activations [weights f]]
36 | (let [inputs (if (empty? activations) (m/matrix x) (last activations))
37 | inputs+bias (m/join bias inputs)
38 | outputs (m/emap f (m/mmul weights inputs+bias))]
39 | (conj activations outputs)))
40 | []
41 | (map vector theta fns)))
42 |
43 | (defn feed-forward-batch
44 | "Returns the activation values for nodes in a neural network after forward
45 | propagating a collection of input examples x through the network."
46 | [x theta fns]
47 | (-> (reduce (fn [inputs [weights f]]
48 | (let [bias (m/broadcast 1.0 [1 (m/column-count inputs)])
49 | inputs+bias (m/join bias inputs)
50 | outputs (m/emap f (m/mmul weights inputs+bias))]
51 | outputs))
52 | (m/transpose (m/matrix x))
53 | (map vector theta fns))
54 | (m/transpose)))
55 |
56 | (defn back-propagate
57 | "Returns the errors of each node in a neural network after propagating the
58 | the errors at the output nodes, computed against a single target value y,
59 | backwards through the network."
60 | [y theta fns' activations output-error]
61 | (->> (map vector
62 | (reverse (rest theta))
63 | (reverse (butlast activations))
64 | (reverse (butlast fns')))
65 | (reduce (fn [errors [w a f]]
66 | (cons (m/mul (m/emap f a) (m/mmul (first errors) (drop-bias w)))
67 | errors))
68 | (list (output-error y (last activations) (last fns'))))
69 | (vec)))
70 |
71 | (defn compute-gradients
72 | "Returns the gradients for each weight given activation values and errors on
73 | a input values of a single example x."
74 | [x activations errors]
75 | (->> (map vector errors (cons (m/matrix x) (butlast activations)))
76 | (reduce (fn [gradients [e a]]
77 | (let [a (m/join bias a)]
78 | (conj gradients (m/outer-product e a))))
79 | [])))
80 |
81 | (defn numeric-gradients
82 | "Returns the numeric approximations of the gradients for each weight given the
83 | input values of a single example x and label y. Used for debugging by checking
84 | against the computed gradients during backpropagation."
85 | [x y theta fns cost]
86 | (mapv (fn [k weights]
87 | (m/matrix (for [i (range (m/row-count weights))]
88 | (for [j (range (m/column-count weights))]
89 | (let [w (m/select weights i j)
90 | theta+ (assoc theta k (m/set-selection weights i j (+ w epsilon)))
91 | theta- (assoc theta k (m/set-selection weights i j (- w epsilon)))]
92 | (/ (- (cost (list x) (list y) theta+ fns)
93 | (cost (list x) (list y) theta- fns))
94 | (* 2 epsilon)))))))
95 | (range)
96 | theta))
97 |
98 | (defn regularize
99 | "Returns regularized weights."
100 | [theta alpha lambda]
101 | (map (fn [w]
102 | (-> (m/mul alpha lambda w)
103 | (m/set-column 0 (m/matrix (repeat (m/row-count w) 0)))))
104 | theta))
105 |
106 | (defn gradient-descent-step
107 | "Performs a single gradient step on the input and target values of a single
108 | example x and label y, and returns the updated weights."
109 | [model x y theta]
110 | (let [{fns :activation-fns alpha :alpha lambda :lambda
111 | cost :cost output-error :output-error} model
112 | activations (feed-forward x theta fns)
113 | errors (back-propagate y theta (map c/derivative fns) activations output-error)
114 | gradients (compute-gradients x activations errors)
115 | regularization (regularize theta alpha lambda)]
116 | ;; Numeric gradient checking
117 | ;;(println (map (comp #(/ (m/esum %) (m/ecount %)) m/abs m/sub) gradients (numeric-gradients x y theta fns cost)))
118 | (mapv m/sub theta (map #(m/mul % alpha) gradients) regularization)))
119 |
120 | (defn gradient-descent
121 | "Performs gradient descent on input and target values of all examples x and
122 | y, and returns the updated weights."
123 | [model x y]
124 | (reduce (fn [weights [xi yi]] (gradient-descent-step model xi yi weights))
125 | (:parameters model)
126 | (map vector x y)))
127 |
128 | (defn init-parameters
129 | [model]
130 | (let [{layers :layers seed :seed} model
131 | r (if seed (java.util.Random. seed) (java.util.Random.))
132 | rand (fn [] (.nextGaussian r))]
133 | (->> (for [i (range (dec (count layers)))]
134 | (let [ni (inc (nth layers i)) ;; number of nodes at layer i (+ bias node)
135 | ni+1 (nth layers (inc i))] ;; number of nodes at layer i+1
136 | ;; initialize random values as parameters
137 | (vec (repeatedly ni+1 #(vec (repeatedly ni rand))))))
138 | (mapv m/matrix))))
139 |
140 | ;; Cost functions
141 |
142 | (defn cross-entropy-cost
143 | [x y theta fns]
144 | (let [a (feed-forward-batch x theta fns)]
145 | (/ (m/esum (m/add (m/mul y (m/log a))
146 | (m/mul (m/sub 1 y) (m/log (m/sub 1 a)))))
147 | (- (count x)))))
148 |
149 | (defn cross-entropy-output-error
150 | [y activations f']
151 | ;; Cross entropy error is independent of the derivative of output activation
152 | (m/sub activations y))
153 |
154 | (defn quadratic-cost
155 | [x y theta fns]
156 | (/ (m/esum (m/square (m/sub (feed-forward-batch x theta fns) y)))
157 | 2))
158 |
159 | (defn quadratic-output-error
160 | [y activations f']
161 | (m/mul (m/sub activations y) (m/emap f' activations)))
162 |
163 | ;; API
164 |
165 | (defn neural-network-fit
166 | "Trains a neural network model for the given training data. For new models,
167 | parameters are initialized as random values from a normal distribution."
168 | ([model data]
169 | (neural-network-fit model (map (comp vec butlast) data) (map (comp vec last) data)))
170 | ([model x y]
171 | (let [{theta :parameters} model
172 | model (-> model
173 | (assoc :parameters (or theta (init-parameters model))))]
174 | (assoc model :parameters (gradient-descent model x y)))))
175 |
176 | (defn neural-network-predict
177 | "Predicts the values of example data using a neural network model."
178 | [model x]
179 | (let [{theta :parameters fns :activation-fns} model]
180 | (when (not (nil? theta))
181 | (mapv vec (feed-forward-batch x theta fns)))))
182 |
183 | (defn neural-network-cost
184 | ([model data]
185 | (neural-network-cost model (map (comp vec butlast) data) (map (comp vec last) data)))
186 | ([model x y]
187 | (let [{theta :parameters fns :activation-fns cost :cost} model]
188 | (when (not (nil? theta))
189 | (cost x y theta fns)))))
190 |
191 | (defn print-neural-network
192 | "Prints information about a given neural network."
193 | [model]
194 | (println
195 | (cond-> model
196 | (contains? model :parameters)
197 | (assoc :parameters (clojure.string/join " -> "
198 | (for [thetai (:parameters model)]
199 | (str (dec (count (first thetai))) " x " (count thetai))))))))
200 |
201 | (defn make-neural-network
202 | "Returns a neural network model where alpha is the learning rate."
203 | ([alpha lambda]
204 | (make-neural-network alpha lambda cross-entropy-cost))
205 | ([alpha lambda cost]
206 | (make-neural-network alpha lambda cost nil))
207 | ([alpha lambda cost seed]
208 | {:alpha alpha
209 | :lambda lambda
210 | :layers []
211 | :activation-fns []
212 | :cost cost
213 | :seed seed
214 | :output-error (cond
215 | (= cost cross-entropy-cost) cross-entropy-output-error
216 | (= cost quadratic-cost) quadratic-output-error)}))
217 |
218 | (defn add-neural-network-layer
219 | "Adds a layer to a neural network model with n nodes and an activation
220 | function f."
221 | [model n f]
222 | (-> model
223 | (update :layers #(conj % n))
224 | (update :activation-fns #(conj % f))))
225 |
--------------------------------------------------------------------------------
/src/lambda_ml/random_forest.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.random-forest
2 | "Random forest classification and regression learning.
3 |
4 | Example usage:
5 | ```
6 | (def data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]])
7 | (def fit
8 | (let [n 1001
9 | min-split 2
10 | min-leaf 1
11 | max-features 2]
12 | (-> (make-random-forest-classifier n min-split min-leaf max-features)
13 | (random-forest-fit data))))
14 | (random-forest-predict fit (map butlast data))
15 | ;;=> (0 1 1 0)
16 | ```"
17 | (:require [lambda-ml.decision-tree :as dt]
18 | [lambda-ml.ensemble :as e]))
19 |
20 | (def random-forest-fit e/bagging-ensemble-fit)
21 |
22 | (def random-forest-predict e/bagging-ensemble-predict)
23 |
24 | (defn make-random-forest-classifier
25 | [n min-split min-leaf max-features]
26 | (let [rate 1.0
27 | estimator (dt/make-classification-tree dt/gini-impurity min-split min-leaf max-features)]
28 | (-> #(e/add-bagging-estimator % estimator dt/decision-tree-fit dt/decision-tree-predict)
29 | (iterate (e/make-bagging-classifier rate))
30 | (nth n))))
31 |
32 | (defn make-random-forest-regressor
33 | [n min-split min-leaf max-features]
34 | (let [rate 1.0
35 | estimator (dt/make-regression-tree dt/mean-squared-error min-split min-leaf max-features)]
36 | (-> #(e/add-bagging-estimator % estimator dt/decision-tree-fit dt/decision-tree-predict)
37 | (iterate (e/make-bagging-regressor rate))
38 | (nth n))))
39 |
--------------------------------------------------------------------------------
/src/lambda_ml/regression.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.regression
2 | "Generalized linear model learning for two of the more popular techniques,
3 | linear regression and logistic regression.
4 |
5 | Linear regression example usage:
6 | ```
7 | (def data [[-2 -1] [1 1] [3 2]])
8 | (def fit
9 | (let [alpha 0.01
10 | lambda 0
11 | iters 5000]
12 | (-> (make-linear-regression alpha lambda iters)
13 | (regression-fit data))))
14 | (regression-predict fit (map butlast data))
15 | ;;=> (-0.9473684210526243 0.8684210526315812 2.0789473684210513)
16 | ```
17 |
18 | Logistic regression example usage:
19 | ```
20 | (def data [[4.0 1] [1.75 0] [4.25 1] [2.75 1] [5.0 1] [0.5 0] [1.0 0] [1.5 0]
21 | [5.5 1] [2.5 0] [2.0 0] [3.5 0] [1.75 1] [3.0 0] [4.75 1] [1.25 0]
22 | [4.5 1] [0.75 0] [3.25 1] [2.25 1]])
23 | (def fit
24 | (let [alpha 0.1
25 | lambda 0
26 | iters 10000]
27 | (-> (make-logistic-regression alpha lambda iters)
28 | (regression-fit data))))
29 | (regression-predict fit (take 3 (map butlast data)))
30 | ;;=> (0.8744474608195764 0.19083657134699333 0.9102776017566352)
31 | ```"
32 | (:require [lambda-ml.core :as c]
33 | [clojure.math.numeric-tower :refer :all]))
34 |
35 | (defn gradient-descent-step
36 | "Performs a single gradient step on the model coefficients."
37 | [h x y alpha lambda theta]
38 | (let [m (count y)
39 | n+1 (count (first x))
40 | ;; Compute gradients
41 | gradients (for [j (range n+1)]
42 | (* (/ 1 m)
43 | (apply + (map (fn [xi yi]
44 | (* (- (h xi theta) yi)
45 | (xi j)))
46 | x y))))]
47 | ;; Simultaneously update all thetas
48 | (map-indexed (fn [i [t g]]
49 | (if (= i 0)
50 | ;; Non-regularized intercept parameter
51 | (- t (* alpha g))
52 | ;; Regularized parameters
53 | (- (* t (- 1 (/ (* alpha lambda) m)))
54 | (* alpha g))))
55 | (map vector theta gradients))))
56 |
57 | (defn gradient-descent
58 | "Returns a lazy sequence of estimates of the model coefficients, along with
59 | the cost, at each iteration of gradient descent. Takes a hypothesis function
60 | h, which returns a predicted value given an example and parameters, and a cost
61 | function j, which computes the cost of applying the current model on all
62 | training examples."
63 | ([h j x y alpha lambda]
64 | (let [n+1 (count (first x))]
65 | (gradient-descent h j x y alpha lambda (repeatedly n+1 rand))))
66 | ([h j x y alpha lambda theta]
67 | (lazy-seq (let [theta (gradient-descent-step h x y alpha lambda theta)
68 | cost (j x y theta)]
69 | (cons [theta cost] (gradient-descent h j x y alpha lambda theta))))))
70 |
71 | (defn regression-fit
72 | "Fits a regression model to the given training data."
73 | ([model data]
74 | (regression-fit model (map butlast data) (map last data)))
75 | ([model x y]
76 | (let [{alpha :alpha lambda :lambda iters :iterations h :hypothesis j :cost} model
77 | x+intercepts (map c/vector-with-intercept x)
78 | estimates (gradient-descent h j x+intercepts y alpha lambda)
79 | [theta cost] (nth estimates iters)]
80 | (-> model
81 | (assoc :parameters theta)
82 | (assoc :costs (map second (take iters estimates)))))))
83 |
84 | (defn regression-predict
85 | "Predicts the values of example data using a regression model."
86 | [model x]
87 | (let [{theta :parameters h :hypothesis} model]
88 | (when (not (nil? theta))
89 | (->> x
90 | (map c/vector-with-intercept)
91 | (map (partial h theta))))))
92 |
93 | ;; Linear regression
94 |
95 | (defn linear-regression-hypothesis
96 | [xi theta]
97 | (c/dot-product xi theta))
98 |
99 | (defn linear-regression-cost
100 | [x y theta]
101 | (let [m (count y)]
102 | (/ (apply + (map (fn [xi yi]
103 | (expt (- (linear-regression-hypothesis xi theta) yi) 2))
104 | x y))
105 | (* 2 m))))
106 |
107 | (defn make-linear-regression
108 | "Returns a linear regression model with the given parameters."
109 | [alpha lambda iters]
110 | {:alpha alpha
111 | :lambda lambda
112 | :iterations iters
113 | :hypothesis linear-regression-hypothesis
114 | :cost linear-regression-cost})
115 |
116 | ;; Logistic regression
117 |
118 | (defn logistic-regression-hypothesis
119 | [xi theta]
120 | (c/sigmoid (c/dot-product xi theta)))
121 |
122 | (defn logistic-regression-cost
123 | [x y theta]
124 | (let [m (count y)]
125 | (/ (apply + (map (fn [xi yi]
126 | (let [hi (logistic-regression-hypothesis xi theta)]
127 | (+ (* yi
128 | (Math/log hi))
129 | (* (- 1 yi)
130 | (Math/log (- 1 hi))))))
131 | x y))
132 | (- m))))
133 |
134 | (defn make-logistic-regression
135 | "Returns a logistic regression model with the given parameters."
136 | [alpha lambda iters]
137 | {:alpha alpha
138 | :lambda lambda
139 | :iterations iters
140 | :hypothesis logistic-regression-hypothesis
141 | :cost logistic-regression-cost})
142 |
--------------------------------------------------------------------------------
/src/lambda_ml/util.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.util
2 | (:import [java.awt Color]
3 | [java.awt.image BufferedImage]))
4 |
5 | (defn pixels->image
6 | [pixels]
7 | (let [w (count (first pixels))
8 | h (count pixels)]
9 | (reduce (fn [image [y row]]
10 | (reduce (fn [image [x pixel]]
11 | (let [[r g b] (repeat 3 (int (* pixel 255)))]
12 | (doto image
13 | (.setRGB x y (.getRGB (Color. r g b))))))
14 | image
15 | (map-indexed vector row)))
16 | (BufferedImage. w h BufferedImage/TYPE_INT_ARGB)
17 | (map-indexed vector pixels))))
18 |
--------------------------------------------------------------------------------
/test/lambda_ml/clustering/dbscan_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.clustering.dbscan-test
2 | (:require [clojure.test :refer :all]
3 | [clojure.set :refer :all]
4 | [lambda-ml.clustering.dbscan :refer :all]
5 | [lambda-ml.distance :as d]))
6 |
7 | (deftest test-proximity-search
8 | (let [points {:SanFrancisco [37.759859 -122.437134]
9 | :Berkeley [37.864012 -122.277832]
10 | :PaloAlto [37.444335 -122.156982]
11 | :MountainView [37.387617 -122.060852]
12 | :SanJose [37.330857 -121.887817]
13 | :SantaCruz [36.971838 -122.019653]}
14 | locations (map-invert points)
15 | search (make-proximity-search d/haversine (vals points))]
16 | (is (= (set (map locations (search 1 (points :MountainView))))
17 | #{:MountainView}))
18 | (is (= (set (map locations (search 2 (points :MountainView))))
19 | #{:MountainView}))
20 | (is (= (set (map locations (search 4 (points :MountainView))))
21 | #{:MountainView}))
22 | (is (= (set (map locations (search 8 (points :MountainView))))
23 | #{:MountainView :PaloAlto}))
24 | (is (= (set (map locations (search 16 (points :MountainView))))
25 | #{:MountainView :PaloAlto :SanJose}))
26 | (is (= (set (map locations (search 30 (points :MountainView))))
27 | #{:MountainView :PaloAlto :SanJose :SantaCruz}))
28 | (is (= (set (map locations (search 35 (points :MountainView))))
29 | #{:MountainView :PaloAlto :SanJose :SantaCruz :SanFrancisco}))
30 | (is (= (set (map locations (search 50 (points :MountainView))))
31 | #{:MountainView :PaloAlto :SanJose :SantaCruz :SanFrancisco :Berkeley}))))
32 |
33 | (deftest test-dbscan
34 | (let [points [[2 10]
35 | [2 5]
36 | [8 4]
37 | [5 8]
38 | [7 5]
39 | [6 4]
40 | [1 2]
41 | [4 9]]]
42 | (let [clustering (dbscan d/euclidean 4 2 points)]
43 | (is (= 2 (count (distinct (vals clustering)))))
44 | (is (= (clustering [5 8])
45 | (clustering [4 9])))
46 | (is (= (clustering [8 4])
47 | (clustering [7 5])
48 | (clustering [6 4]))))
49 | (let [clustering (dbscan d/euclidean 10 2 points)]
50 | (is (= 3 (count (distinct (vals clustering)))))
51 | (is (= (clustering [2 10])
52 | (clustering [5 8])
53 | (clustering [4 9])))
54 | (is (= (clustering [8 4])
55 | (clustering [7 5])
56 | (clustering [6 4])))
57 | (is (= (clustering [2 5])
58 | (clustering [1 2]))))))
59 |
60 | (deftest test-dbscan2
61 | (let [points [[0 100]
62 | [0 200]
63 | [0 275]
64 | [100 150]
65 | [200 100]
66 | [250 200]
67 | [0 300]
68 | [100 200]
69 | [600 700]
70 | [650 700]
71 | [675 700]
72 | [675 710]
73 | [675 720]
74 | [50 400]]
75 | clustering (dbscan d/euclidean 10000 3 points)]
76 | (is (= 2 (count (distinct (vals clustering)))))
77 | (is (= (clustering [0 100])
78 | (clustering [0 200])
79 | (clustering [0 275])
80 | (clustering [100 150])
81 | (clustering [0 300])
82 | (clustering [100 200])))
83 | (is (= (clustering [600 700])
84 | (clustering [650 700])
85 | (clustering [675 700])
86 | (clustering [675 710])
87 | (clustering [675 720])))))
88 |
89 | (deftest test-dbscan3
90 | (let [points [[64.22906466107816 21.979356040013954]
91 | [9.502019068226218 73.5146190142259]
92 | [73.467643359676 49.11882050731219]
93 | [43.89991499437019 30.877086140215397]
94 | [31.66112900408916 62.006799353519455]
95 | [32.96188162290491 62.647924402495846]
96 | [29.860327935311943 61.4603339463938]
97 | [30.252436050213873 61.628358017420396]
98 | [29.478669344214723 63.34734829352237]
99 | [31.20809231721796 60.62778950878419]
100 | [56.719979556510225 12.79888943536207]
101 | [58.65873904968612 12.760463243583217]
102 | [56.961131111718714 13.99614970618859]
103 | [56.45650068600662 14.442072300706059]
104 | [56.4971734713763 15.955536322668195]
105 | [56.84485225030221 14.559824497206137]
106 | [55.51035064924508 11.432733331574127]
107 | [56.13687033439055 10.726739906473085]
108 | [55.76618270971812 11.259738458553329]
109 | [18.786322022089735 39.41377491992898]
110 | [17.121476165484637 39.838845615973696]
111 | [19.683527131564915 39.34956996375695]]
112 | clustering (dbscan d/euclidean 4 2 points)]
113 | (is (= (clustering (nth points 4))
114 | (clustering (nth points 5))
115 | (clustering (nth points 6))
116 | (clustering (nth points 7))
117 | (clustering (nth points 8))
118 | (clustering (nth points 9))))
119 | (is (= (clustering (nth points 10))
120 | (clustering (nth points 11))
121 | (clustering (nth points 12))
122 | (clustering (nth points 13))
123 | (clustering (nth points 14))
124 | (clustering (nth points 15))
125 | (clustering (nth points 16))
126 | (clustering (nth points 17))
127 | (clustering (nth points 18))))
128 | (is (= (clustering (nth points 19))
129 | (clustering (nth points 20))
130 | (clustering (nth points 21))))))
131 |
--------------------------------------------------------------------------------
/test/lambda_ml/clustering/hierarchical_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.clustering.hierarchical-test
2 | (:require [clojure.test :refer :all]
3 | [lambda-ml.clustering.hierarchical :refer :all]
4 | [lambda-ml.distance :as d]))
5 |
6 | (deftest test-pairwise-distances
7 | (let [points [[1 1 1 0 1 0 0 1 1 1]
8 | [1 1 0 1 1 0 0 0 0 1]
9 | [0 1 1 0 1 0 0 1 0 0]
10 | [0 0 0 1 0 1 0 0 0 0]
11 | [1 1 1 0 1 0 1 1 1 0]
12 | [0 1 0 1 1 0 0 0 0 1]
13 | [0 1 1 0 1 1 0 1 1 0]]
14 | distances (pairwise-distances d/jaccard points)]
15 | (is (= 0 (:distance (get-in distances [0 0]))))
16 | (is (= (/ 1 2) (:distance (get-in distances [0 1]))))
17 | (is (= (/ 3 7) (:distance (get-in distances [0 2]))))
18 | (is (= 1 (:distance (get-in distances [0 3]))))
19 | (is (= (/ 1 4) (:distance (get-in distances [0 4]))))
20 | (is (= (/ 5 8) (:distance (get-in distances [0 5]))))
21 | (is (= (/ 3 8) (:distance (get-in distances [0 6]))))
22 | (is (= (/ 1 2) (:distance (get-in distances [1 0]))))
23 | (is (= 0 (:distance (get-in distances [1 1]))))
24 | (is (= (/ 5 7) (:distance (get-in distances [1 2]))))
25 | (is (= (/ 5 6) (:distance (get-in distances [1 3]))))
26 | (is (= (/ 2 3) (:distance (get-in distances [1 4]))))
27 | (is (= (/ 1 5) (:distance (get-in distances [1 5]))))
28 | (is (= (/ 7 9) (:distance (get-in distances [1 6]))))
29 | (is (= (/ 3 7) (:distance (get-in distances [2 0]))))
30 | (is (= (/ 5 7) (:distance (get-in distances [2 1]))))
31 | (is (= 0 (:distance (get-in distances [2 2]))))
32 | (is (= 1 (:distance (get-in distances [2 3]))))
33 | (is (= (/ 3 7) (:distance (get-in distances [2 4]))))
34 | (is (= (/ 2 3) (:distance (get-in distances [2 5]))))
35 | (is (= (/ 1 3) (:distance (get-in distances [2 6]))))
36 | (is (= 1 (:distance (get-in distances [3 0]))))
37 | (is (= (/ 5 6) (:distance (get-in distances [3 1]))))
38 | (is (= 1 (:distance (get-in distances [3 2]))))
39 | (is (= 0 (:distance (get-in distances [3 3]))))
40 | (is (= 1 (:distance (get-in distances [3 4]))))
41 | (is (= (/ 4 5) (:distance (get-in distances [3 5]))))
42 | (is (= (/ 6 7) (:distance (get-in distances [3 6]))))
43 | (is (= (/ 1 4) (:distance (get-in distances [4 0]))))
44 | (is (= (/ 2 3) (:distance (get-in distances [4 1]))))
45 | (is (= (/ 3 7) (:distance (get-in distances [4 2]))))
46 | (is (= 1 (:distance (get-in distances [4 3]))))
47 | (is (= 0 (:distance (get-in distances [4 4]))))
48 | (is (= (/ 7 9) (:distance (get-in distances [4 5]))))
49 | (is (= (/ 3 8) (:distance (get-in distances [4 6]))))
50 | (is (= (/ 5 8) (:distance (get-in distances [5 0]))))
51 | (is (= (/ 1 5) (:distance (get-in distances [5 1]))))
52 | (is (= (/ 2 3) (:distance (get-in distances [5 2]))))
53 | (is (= (/ 4 5) (:distance (get-in distances [5 3]))))
54 | (is (= (/ 7 9) (:distance (get-in distances [5 4]))))
55 | (is (= 0 (:distance (get-in distances [5 5]))))
56 | (is (= (/ 3 4) (:distance (get-in distances [5 6]))))
57 | (is (= (/ 3 8) (:distance (get-in distances [6 0]))))
58 | (is (= (/ 7 9) (:distance (get-in distances [6 1]))))
59 | (is (= (/ 1 3) (:distance (get-in distances [6 2]))))
60 | (is (= (/ 6 7) (:distance (get-in distances [6 3]))))
61 | (is (= (/ 3 8) (:distance (get-in distances [6 4]))))
62 | (is (= (/ 3 4) (:distance (get-in distances [6 5]))))
63 | (is (= 0 (:distance (get-in distances [6 6]))))))
64 |
65 | (deftest test-distance-queues
66 | (let [distances {0 {0 {:distance 0 :index 0}
67 | 1 {:distance 7 :index 1}
68 | 2 {:distance 3 :index 2}}
69 | 1 {0 {:distance 7 :index 0}
70 | 1 {:distance 0 :index 1}
71 | 2 {:distance 5 :index 2}}
72 | 2 {0 {:distance 3 :index 0}
73 | 1 {:distance 5 :index 1}
74 | 2 {:distance 0 :index 2}}}
75 | queues (distance-queues distances)]
76 | (is (= 2 (first (first (get queues 0)))))
77 | (is (= 1 (first (second (get queues 0)))))
78 | (is (= 2 (first (first (get queues 1)))))
79 | (is (= 0 (first (second (get queues 1)))))
80 | (is (= 0 (first (first (get queues 2)))))
81 | (is (= 1 (first (second (get queues 2)))))))
82 |
83 | (deftest test-agglomerative-clustering
84 | (let [distances {"BA" {"BA" 0 "FI" 662 "MI" 877 "NA" 255 "RM" 412 "TO" 996}
85 | "FI" {"BA" 662 "FI" 0 "MI" 295 "NA" 468 "RM" 268 "TO" 400}
86 | "MI" {"BA" 877 "FI" 295 "MI" 0 "NA" 754 "RM" 564 "TO" 138}
87 | "NA" {"BA" 255 "FI" 468 "MI" 754 "NA" 0 "RM" 219 "TO" 869}
88 | "RM" {"BA" 412 "FI" 268 "MI" 564 "NA" 219 "RM" 0 "TO" 669}
89 | "TO" {"BA" 996 "FI" 400 "MI" 138 "NA" 869 "RM" 669 "TO" 0}}
90 | f (fn [a b] (get-in distances [a b]))
91 | merges (agglomerative-clustering single-link f (keys distances))]
92 | (is (= [2 5] (nth merges 0)))
93 | (is (= [3 4] (nth merges 1)))
94 | (is (= [0 3] (nth merges 2)))
95 | (is (= [0 1] (nth merges 3)))
96 | (is (= [0 2] (nth merges 4)))))
97 |
98 | (deftest test-agglomerative-clustering2
99 | (let [points [[1 1 1 0 1 0 0 1 1 1]
100 | [1 1 0 1 1 0 0 0 0 1]
101 | [0 1 1 0 1 0 0 1 0 0]
102 | [0 0 0 1 0 1 0 0 0 0]
103 | [1 1 1 0 1 0 1 1 1 0]
104 | [0 1 0 1 1 0 0 0 0 1]
105 | [0 1 1 0 1 1 0 1 1 0]]
106 | merges (agglomerative-clustering single-link d/jaccard points)]
107 | (is (= [1 5] (nth merges 0)))
108 | (is (= [0 4] (nth merges 1)))
109 | (is (= [2 6] (nth merges 2)))
110 | (is (= [0 2] (nth merges 3)))
111 | (is (= [0 1] (nth merges 4)))
112 | (is (= [0 3] (nth merges 5)))))
113 |
114 | (deftest test-agglomerative-clustering3
115 | (let [distances {"A" {"A" 0.00 "B" 0.71 "C" 5.66 "D" 3.61 "E" 4.24 "F" 3.20}
116 | "B" {"A" 0.71 "B" 0.00 "C" 4.95 "D" 2.92 "E" 3.54 "F" 2.50}
117 | "C" {"A" 5.66 "B" 4.95 "C" 0.00 "D" 2.24 "E" 1.41 "F" 2.50}
118 | "D" {"A" 3.61 "B" 2.92 "C" 2.24 "D" 0.00 "E" 1.00 "F" 0.50}
119 | "E" {"A" 4.24 "B" 3.54 "C" 1.41 "D" 1.00 "E" 0.00 "F" 1.12}
120 | "F" {"A" 3.20 "B" 2.50 "C" 2.50 "D" 0.50 "E" 1.12 "F" 0.00}}
121 | f (fn [a b] (get-in distances [a b]))
122 | merges (agglomerative-clustering single-link f (keys distances))]
123 | (is (= [3 5] (nth merges 0)))
124 | (is (= [0 1] (nth merges 1)))
125 | (is (= [3 4] (nth merges 2)))
126 | (is (= [2 3] (nth merges 3)))
127 | (is (= [0 2] (nth merges 4)))))
128 |
--------------------------------------------------------------------------------
/test/lambda_ml/clustering/k_means_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.clustering.k-means-test
2 | (:require [clojure.test :refer :all]
3 | [clojure.set :refer :all]
4 | [lambda-ml.clustering.k-means :refer :all]
5 | [lambda-ml.distance :as d]))
6 |
7 | (deftest test-k-means
8 | (let [points [[1 1] [1.5 2] [3 4] [5 7] [3.5 5] [4.5 5] [3.5 4.5]]]
9 | (let [clustering (nth (k-means 2 d/euclidean points) 100)
10 | index (map-invert clustering)]
11 | (is (= 2 (count clustering)))
12 | (is (= (index [3.5 4.5])
13 | (index [4.5 5])
14 | (index [3.5 5])
15 | (index [5 7])
16 | (index [3 4]))
17 | (= (index [1.5 2])
18 | (index [1 1]))))))
19 |
--------------------------------------------------------------------------------
/test/lambda_ml/core_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.core-test
2 | (:require [clojure.test :refer :all]
3 | [lambda-ml.core :refer :all]))
4 |
5 | (deftest test-median
6 | (is (= (median [5 2 4 1 3]) 3))
7 | (is (= (median [7 0 2 3]) (/ 5 2))))
8 |
9 | (deftest test-sample-with-replacement
10 | (doseq [k (range 5 11)]
11 | (let [s (sample-with-replacement (range 10) k)]
12 | (is (= k (count s)))
13 | (is (every? #(< % 10) s)))))
14 |
15 | (deftest test-sample-without-replacement
16 | (is (= 10 (count (sample-without-replacement (range 10) 10))))
17 | (is (= 10 (count (sample-without-replacement (range 10) 100))))
18 | (is (= 10 (count (sample-without-replacement (range 10) 1000))))
19 | (doseq [k (range 5 11)]
20 | (let [s (sample-without-replacement (range 10) k)]
21 | (is (= k (count s)))
22 | (is (= k (count (distinct s))))
23 | (is (every? #(< % 10) s)))))
24 |
--------------------------------------------------------------------------------
/test/lambda_ml/data/binary_tree_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.data.binary-tree-test
2 | (:require [clojure.test :refer :all]
3 | [lambda-ml.data.binary-tree :refer :all]))
4 |
5 | (deftest test-binary-tree-leaf
6 | (let [tree (make-tree 42)]
7 | (is (= (get-value tree) 42))
8 | (is (nil? (get-left tree)))
9 | (is (nil? (get-right tree)))
10 | (is (leaf? tree))))
11 |
12 | (deftest test-binary-tree
13 | (let [tree (make-tree 2
14 | (make-tree 7
15 | (make-tree 2)
16 | (make-tree 6 (make-tree 5) (make-tree 11)))
17 | (make-tree 5
18 | nil
19 | (make-tree 9 (make-tree 4) nil)))]
20 | (is (= (get-value tree) 2))
21 | (is (= (get-path tree [:left]) (get-left tree)))
22 | (is (= (get-path tree [:right]) (get-right tree)))
23 | (is (= (get-value (get-path tree [:left :right :left])) 5))
24 | (is (= (get-value (get-path tree [:right :right :left])) 4))))
25 |
26 | (deftest test-adjacency-matrix
27 | (let [tree (make-tree :a
28 | (make-tree :b
29 | (make-tree :c)
30 | (make-tree :d (make-tree :e) (make-tree :f)))
31 | (make-tree :g
32 | nil
33 | (make-tree :h (make-tree :i) nil)))
34 | matrix (adjacency-matrix tree)]
35 | (is (= (count matrix) 9))
36 | (is (empty? (:edges (first (filter #(= :c (:value %)) (vals matrix))))))
37 | (is (empty? (:edges (first (filter #(= :e (:value %)) (vals matrix))))))
38 | (is (empty? (:edges (first (filter #(= :f (:value %)) (vals matrix))))))
39 | (is (empty? (:edges (first (filter #(= :i (:value %)) (vals matrix))))))
40 | (is (= (count (:edges (first (filter #(= :a (:value %)) (vals matrix))))) 2))
41 | (is (= (count (:edges (first (filter #(= :b (:value %)) (vals matrix))))) 2))
42 | (is (= (count (:edges (first (filter #(= :d (:value %)) (vals matrix))))) 2))
43 | (is (= (count (:edges (first (filter #(= :g (:value %)) (vals matrix))))) 1))
44 | (is (= (count (:edges (first (filter #(= :h (:value %)) (vals matrix))))) 1))))
45 |
--------------------------------------------------------------------------------
/test/lambda_ml/data/kd_tree_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.data.kd-tree-test
2 | (:require [clojure.test :refer :all]
3 | [lambda-ml.data.binary-tree :as bt]
4 | [lambda-ml.data.kd-tree :refer :all]))
5 |
6 | (deftest test-kd-tree
7 | (let [tree (make-tree 2 [[2 3] [5 4] [9 6] [4 7] [8 1] [7 2]])]
8 | (is (= (bt/get-value tree) [7 2]))
9 | (is (= (bt/get-path tree [:left]) (bt/get-left tree)))
10 | (is (= (bt/get-path tree [:right]) (bt/get-right tree)))
11 | (is (= (bt/get-path tree [:left :left]) (-> tree bt/get-left bt/get-left)))
12 | (is (= (bt/get-path tree [:left :right]) (-> tree bt/get-left bt/get-right)))
13 | (is (= (bt/get-path tree [:right :left]) (-> tree bt/get-right bt/get-left)))
14 | (is (= (bt/get-value (bt/get-path tree [:left])) [5 4]))
15 | (is (= (bt/get-value (bt/get-path tree [:right])) [9 6]))
16 | (is (= (bt/get-value (bt/get-path tree [:left :left])) [2 3]))
17 | (is (= (bt/get-value (bt/get-path tree [:left :right])) [4 7]))
18 | (is (= (bt/get-value (bt/get-path tree [:right :left])) [8 1]))
19 | (is (nil? (bt/get-path tree [:left :left :left])))
20 | (is (nil? (bt/get-path tree [:left :left :right])))
21 | (is (nil? (bt/get-path tree [:left :right :left])))
22 | (is (nil? (bt/get-path tree [:left :right :right])))
23 | (is (nil? (bt/get-path tree [:right :left :left])))
24 | (is (nil? (bt/get-path tree [:right :left :right])))
25 | (is (nil? (bt/get-path tree [:right :right])))))
26 |
--------------------------------------------------------------------------------
/test/lambda_ml/decision_tree_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.decision-tree-test
2 | (:require [clojure.test :refer :all]
3 | [lambda-ml.core :refer :all]
4 | [lambda-ml.decision-tree :refer :all]))
5 |
6 | (deftest test-gini-impurity
7 | (is (< (Math/abs (- (gini-impurity [:b :b :b :b :b :b]) 0)) 1E-6))
8 | (is (< (Math/abs (- (gini-impurity [:a :b :b :b :b :b]) 0.277778)) 1E-6))
9 | (is (< (Math/abs (- (gini-impurity [:a :a :a :b :b :b]) 0.5)) 1E-6)))
10 |
11 | (deftest test-mean-squared-error
12 | (is (= 0.375 (mean-squared-error [3 -0.5 2 7] [2.5 0.0 2 8]))))
13 |
14 | (deftest test-classification-weighted-cost
15 | (is (< (Math/abs (- (classification-weighted-cost [:a :a :a :b :b :b] [:a :b :b :b :b :b]
16 | gini-impurity mode)
17 | 0.388889))
18 | 1E-6)))
19 |
20 | (deftest test-regression-weighted-cost
21 | (is (< (Math/abs (- (regression-weighted-cost [-1 0 3 5] [-2 0 4] mean-squared-error mean)
22 | 5.916667))
23 | 1E-6)))
24 |
25 | (deftest test-categorical-partitions
26 | (let [p0 (categorical-partitions [:foo])
27 | p1 (categorical-partitions [:high :normal])
28 | p2 (categorical-partitions [:sunny :overcast :rain])
29 | p3 (categorical-partitions [:A :B :C :D])
30 | partitions-equal? (fn [p1 p2]
31 | (or (= p1 p2)
32 | (= p1 (reverse p2))))]
33 | (is (empty? p0))
34 | (is (= (count p1) 1))
35 | (is (= (count p2) 3))
36 | (is (= (count p3) 7))
37 | (is (some #(partitions-equal? % [#{:A} #{:B :C :D}]) p3))
38 | (is (some #(partitions-equal? % [#{:A :B} #{:C :D}]) p3))
39 | (is (some #(partitions-equal? % [#{:A :C} #{:B :D}]) p3))
40 | (is (some #(partitions-equal? % [#{:B :C} #{:A :D}]) p3))
41 | (is (some #(partitions-equal? % [#{:B} #{:A :C :D}]) p3))
42 | (is (some #(partitions-equal? % [#{:C} #{:A :B :D}]) p3))
43 | (is (some #(partitions-equal? % [#{:D} #{:A :B :C}]) p3))))
44 |
45 | (deftest test-numeric-partitions
46 | (let [eq? (fn [a b]
47 | (->> (map (fn [x y] (Math/abs (- x y))) a b)
48 | (every? #(< % 1E-6))))]
49 | (is (empty? (numeric-partitions [42])))
50 | (is (empty? (numeric-partitions (range 1))))
51 | (is (eq? (numeric-partitions (range 4)) [0.5 1.5 2.5]))
52 | (is (eq? (numeric-partitions (range 5)) [0.5 1.5 2.5 3.5]))
53 | (is (eq? (numeric-partitions [1 0]) [0.5]))
54 | (is (eq? (numeric-partitions [2 1 0 3]) [0.5 1.5 2.5]))
55 | (is (eq? (numeric-partitions [3 4 1 2 0]) [0.5 1.5 2.5 3.5]))))
56 |
57 | (deftest test-categorical-splitters
58 | (is (empty? (splitters [[:foo]] 0)))
59 | (is (empty? (splitters [[:foo] [:foo] [:foo]] 0)))
60 | (is (= (count (splitters [[:foo] [:bar]] 0)) 1))
61 | (is (= (count (splitters [[:foo] [:bar] [:baz]] 0)) 3))
62 | (is (= (count (splitters [[:foo] [:bar] [:baz] [:zap]] 0)) 7)))
63 |
64 | (deftest test-numeric-splitters
65 | (let [data [[64 177]
66 | [65 255]
67 | [85 125]
68 | [80 60]
69 | [72 56]
70 | [75 120]
71 | [75 100]
72 | [68 220]
73 | [71 90]
74 | [83 95]
75 | [69 52]
76 | [70 70]
77 | [72 85]
78 | [81 75]]]
79 | (is (empty? (splitters [[42] [42] [42]] 0)))
80 | (is (= (count (splitters data 0)) 11))
81 | (is (= (count (splitters data 1)) 13))))
82 |
83 | (deftest test-best-splitter
84 | (let [data1 [["foo" "bar" "baz"]
85 | ["foo" "bar" "baz"]]
86 | data2 [[1.0 2.0 3.14]
87 | [1.0 2.0 2.71]]]
88 | (is (nil? (best-splitter (make-classification-tree gini-impurity 2 1 (dec (count (first data1))))
89 | (map butlast data1) (map last data1))))
90 | (is (nil? (best-splitter (make-regression-tree mean-squared-error 2 1 (dec (count (first data2))))
91 | (map butlast data2) (map last data2))))))
92 |
93 | (deftest test-best-splitter-categorical
94 | (let [data [["Sunny" "Hot" "High" "Weak" "No"]
95 | ["Sunny" "Hot" "High" "Strong" "No"]
96 | ["Overcast" "Hot" "High" "Weak" "Yes"]
97 | ["Rain" "Mild" "High" "Weak" "Yes"]
98 | ["Rain" "Cool" "Normal" "Weak" "Yes"]
99 | ["Rain" "Cool" "Normal" "Strong" "No"]
100 | ["Overcast" "Cool" "Normal" "Strong" "Yes"]
101 | ["Sunny" "Mild" "High" "Weak" "No"]
102 | ["Sunny" "Cool" "Normal" "Weak" "Yes"]
103 | ["Rain" "Mild" "Normal" "Weak" "Yes"]
104 | ["Sunny" "Mild" "Normal" "Strong" "Yes"]
105 | ["Overcast" "Mild" "High" "Strong" "Yes"]
106 | ["Overcast" "Hot" "Normal" "Weak" "Yes"]
107 | ["Rain" "Mild" "High" "Strong" "No"]]
108 | splitter (best-splitter (make-classification-tree gini-impurity 2 1 (dec (count (first data))))
109 | (map butlast data) (map last data))
110 | [left right] (vals (group-by splitter data))]
111 | (is (or (and (= (count left) 10) (= (count right) 4))
112 | (and (= (count left) 4) (= (count right) 10))))))
113 |
114 | (deftest test-classification-tree
115 | (let [data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]]
116 | model (make-classification-tree gini-impurity 2 1 (dec (count (first data))))
117 | fit (decision-tree-fit model data)]
118 | (is (= (first (decision-tree-predict fit [[0 0]])) 0))
119 | (is (= (first (decision-tree-predict fit [[0 1]])) 1))
120 | (is (= (first (decision-tree-predict fit [[1 0]])) 1))
121 | (is (= (first (decision-tree-predict fit [[1 1]])) 0))))
122 |
--------------------------------------------------------------------------------
/test/lambda_ml/distance_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.distance-test
2 | (:require [clojure.test :refer :all]
3 | [lambda-ml.distance :refer :all]))
4 |
5 | (deftest test-cosine
6 | (is (< (Math/abs (- (cosine [1 2 0] [0 4 1])
7 | 0.132278))
8 | 1E-6)
9 | (is (< (Math/abs (- (cosine [0 3 4 5] [7 6 3 1])
10 | 0.492167))
11 | 1E-6))))
12 |
13 | (deftest test-euclidean
14 | (is (= 25 (euclidean [2 -1] [-2 2])))
15 | (is (= 95 (euclidean [0 3 4 5] [7 6 3 -1]))))
16 |
17 | (deftest test-haversine
18 | (is (< (Math/abs (- (haversine [36.12 -86.67] [33.94 -118.40])
19 | 1794.0717860923137))
20 | 1E-6)))
21 |
22 | (deftest test-haversine2
23 | (is (< (Math/abs (- (haversine [36.12 -86.67] [33.94 -118.40] 0)
24 | 150.66697884839715))
25 | 1E-6))
26 | (is (< (Math/abs (- (haversine [36.12 -86.67] [33.94 -118.40] 1)
27 | 2192.964788467725))
28 | 1E-6)))
29 |
30 | (deftest test-jaccard
31 | (is (= (/ 3 5) (jaccard [1 1 0 1] [2 0 1 1])))
32 | (is (= (/ 3 7) (jaccard [1 1 1 0 1 0 0 1 1 1] [0 1 1 0 1 0 0 1 0 0])))
33 | (is (= (/ 5 7) (jaccard [1 1 0 1 1 0 0 0 0 1] [0 1 1 0 1 0 0 1 0 0])))
34 | (is (= (/ 3 7) (jaccard [0 1 1 0 1 0 0 1 0 0] [1 1 1 0 1 0 1 1 1 0])))
35 | (is (= (/ 6 7) (jaccard [0 0 0 1 0 1 0 0 0 0] [0 1 1 0 1 1 0 1 1 0]))))
36 |
--------------------------------------------------------------------------------
/test/lambda_ml/ensemble_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.ensemble-test
2 | (:require [clojure.test :refer :all]
3 | [lambda-ml.ensemble :refer :all]
4 | [lambda-ml.decision-tree :refer :all]))
5 |
6 | (deftest test-bagging-classifier
7 | (let [data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]]
8 | tree (make-classification-tree gini-impurity 2 1 (dec (count (first data))))
9 | model (-> (iterate #(add-bagging-estimator % tree decision-tree-fit decision-tree-predict)
10 | (make-bagging-classifier 1.0))
11 | (nth 1013))
12 | fit (bagging-ensemble-fit model data)]
13 | (is (= (first (bagging-ensemble-predict fit [[0 0]])) 0))
14 | (is (= (first (bagging-ensemble-predict fit [[0 1]])) 1))
15 | (is (= (first (bagging-ensemble-predict fit [[1 0]])) 1))
16 | (is (= (first (bagging-ensemble-predict fit [[1 1]])) 0))))
17 |
18 | (deftest test-bagging-regressor
19 | (let [data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]]
20 | tree (make-regression-tree mean-squared-error 2 1 (dec (count (first data))))
21 | model (-> (iterate #(add-bagging-estimator % tree decision-tree-fit decision-tree-predict)
22 | (make-bagging-regressor 1.0))
23 | (nth 1003))
24 | fit (bagging-ensemble-fit model data)]
25 | (is (< (first (bagging-ensemble-predict fit [[0 0]])) 0.5))
26 | (is (> (first (bagging-ensemble-predict fit [[0 1]])) 0.5))
27 | (is (> (first (bagging-ensemble-predict fit [[1 0]])) 0.5))
28 | (is (< (first (bagging-ensemble-predict fit [[1 1]])) 0.5))))
29 |
--------------------------------------------------------------------------------
/test/lambda_ml/factorization_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.factorization-test
2 | (:require [clojure.test :refer :all]
3 | [clojure.core.matrix :refer :all]
4 | [lambda-ml.factorization :refer :all]))
5 |
6 | (deftest test-factorization
7 | (let [data [[1 2 3] [4 5 6]]
8 | [w h] (-> (factorizations data 2)
9 | (nth 200))]
10 | (is (< (cost data (mmul w h)) 1E-6))))
11 |
--------------------------------------------------------------------------------
/test/lambda_ml/metrics_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.metrics-test
2 | (:require [clojure.test :refer :all]
3 | [lambda-ml.metrics :refer :all]))
4 |
5 | (deftest test-auc
6 | (is (= 0.75 (auc [[0 0.5] [0.5 0.5] [0.5 1] [1 1]])))
7 | (is (= 147.66 (auc [[0 100] [1 50] [2 25] [3 12.5] [4 6.25] [5 3.13] [6 1.56]]))))
8 |
--------------------------------------------------------------------------------
/test/lambda_ml/naive_bayes_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.naive-bayes-test
2 | (:require [clojure.test :refer :all]
3 | [lambda-ml.naive-bayes :refer :all]))
4 |
5 | (deftest test-naive-bayes
6 | (let [data [[:male 6.00 180 12]
7 | [:male 5.92 190 11]
8 | [:male 5.58 170 12]
9 | [:male 5.92 165 10]
10 | [:female 5.00 100 6]
11 | [:female 5.50 150 8]
12 | [:female 5.42 130 7]
13 | [:female 5.75 150 9]]
14 | model (make-naive-bayes)
15 | fit (naive-bayes-fit model (map #(subvec % 1) data) (map first data))]
16 | (let [[mean variance] (get-in (:distributions fit) [:male 0])]
17 | (is (<= (- 5.855 mean) 10E-6))
18 | (is (<= (- 3.5033E-2 variance) 10E-6)))
19 | (let [[mean variance] (get-in (:distributions fit) [:female 1])]
20 | (is (<= (- 132.5 mean) 10E-6))
21 | (is (<= (- 5.5833E+2 variance) 10E-6)))
22 | (let [[mean variance] (get-in (:distributions fit) [:male 2])]
23 | (is (<= (- 11.25 mean) 10E-6))
24 | (is (<= (- 9.1667E-1 variance) 10E-6)))
25 | (is (= :female (first (naive-bayes-predict fit [[6.0 130 8]]))))))
26 |
--------------------------------------------------------------------------------
/test/lambda_ml/nearest_neighbors_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.nearest-neighbors-test
2 | (:require [clojure.test :refer :all]
3 | [clojure.set :refer :all]
4 | [lambda-ml.nearest-neighbors :refer :all]
5 | [lambda-ml.distance :as d]))
6 |
7 | (deftest test-nearest-neighbor-search
8 | (let [search (make-nearest-neighbor-search d/euclidean [[2 3] [5 4] [9 6] [4 7] [8 1] [7 2]])]
9 | (is (= [7 2] (item-value (second (search 2 [8 1])))))
10 | (is (= [5 4] (item-value (second (search 2 [2 3])))))
11 | (is (= [8 1] (item-value (second (search 2 [7 2])))))
12 | (is (= [5 4] (item-value (second (search 2 [4 7])))))
13 | (is (= 3 (count (search 3 [2 3]))))
14 | (is (= 6 (count (search 6 [2 3]))))
15 | (is (= 6 (count (search 9 [2 3]))))))
16 |
17 | (deftest test-nearest-neighbor-search2
18 | (let [search (make-nearest-neighbor-search d/euclidean [[1 11] [2 5] [4 8] [6 4] [5 0] [7 9] [8 2]])]
19 | (is (= [4 8] (item-value (first (search 5 [3 9])))))))
20 |
21 | (deftest test-nearest-neighbor-search3
22 | (let [points {[0.0 0.0] 1
23 | [10.1 -10.1] 2
24 | [-12.2 12.2] 3
25 | [38.3 38.3] 4
26 | [79.99 179.99] 5}
27 | search (make-nearest-neighbor-search d/euclidean (keys points))]
28 | (is (= (list 1 2 3 4)
29 | (map (comp points second) (search 4 ((map-invert points) 1)))))
30 | (is (= (list 2 1 3 4)
31 | (map (comp points second) (search 4 ((map-invert points) 2)))))
32 | (is (= (list 3 1 2 4)
33 | (map (comp points second) (search 4 ((map-invert points) 3)))))
34 | (is (= (list 4 1 2 3)
35 | (map (comp points second) (search 4 ((map-invert points) 4)))))
36 | (is (= (list 5 4 3 1)
37 | (map (comp points second) (search 4 ((map-invert points) 5)))))))
38 |
39 | (deftest test-nearest-neighbor-search4
40 | (let [points {[0.436697697345292 0.492281587956396] 1
41 | [0.318000697283004 0.302602867518914] 2
42 | [0.268674100320323 0.684132163547525] 3
43 | [0.347190228888873 0.959920716313895] 4
44 | [0.539212291014011 0.187100169547265] 5
45 | [0.964631186098456 0.129079314315528] 6
46 | [0.171792010609788 0.795749621321345] 7
47 | [0.910157297130659 0.437962722965556] 8
48 | [0.847975159955406 0.169625495659256] 9
49 | [0.793504465072615 0.121750314432942] 10}
50 | search (make-nearest-neighbor-search d/euclidean (keys points))]
51 | (is (= (list 1 2 3 5)
52 | (map (comp points second) (search 4 ((map-invert points) 1)))))
53 | (is (= (list 2 1 5 3)
54 | (map (comp points second) (search 4 ((map-invert points) 2)))))
55 | (is (= (list 3 7 1 4)
56 | (map (comp points second) (search 4 ((map-invert points) 3)))))
57 | (is (= (list 4 7 3 1)
58 | (map (comp points second) (search 4 ((map-invert points) 4)))))
59 | (is (= (list 5 2 10 9)
60 | (map (comp points second) (search 4 ((map-invert points) 5)))))
61 | (is (= (list 6 9 10 8)
62 | (map (comp points second) (search 4 ((map-invert points) 6)))))
63 | (is (= (list 7 3 4 1)
64 | (map (comp points second) (search 4 ((map-invert points) 7)))))
65 | (is (= (list 8 9 6 10)
66 | (map (comp points second) (search 4 ((map-invert points) 8)))))
67 | (is (= (list 9 10 6 8)
68 | (map (comp points second) (search 4 ((map-invert points) 9)))))
69 | (is (= (list 10 9 6 5)
70 | (map (comp points second) (search 4 ((map-invert points) 10)))))))
71 |
72 | (deftest test-nearest-neighbor-search5
73 | (let [points {[36.971838 -122.019653] :SantaCruz,
74 | [37.864012 -122.277832] :Berkeley,
75 | [37.330857 -121.887817] :SanJose,
76 | [37.444335 -122.156982] :PaloAlto,
77 | [37.387617 -122.060852] :MountainView,
78 | [37.759859 -122.437134] :SanFrancisco}
79 | search (make-nearest-neighbor-search d/euclidean (keys points))]
80 | (is (= :SanJose
81 | (-> (search 2 ((map-invert points) :SantaCruz)) second item-value points)))
82 | (is (= :SanFrancisco
83 | (-> (search 2 ((map-invert points) :Berkeley)) second item-value points)))
84 | (is (= :MountainView
85 | (-> (search 2 ((map-invert points) :PaloAlto)) second item-value points)))))
86 |
87 | (deftest test-nearest-neighbor-search-metadata
88 | (let [points [[:a 2 3]
89 | [:b 5 4]
90 | [:c 9 6]
91 | [:d 4 7]
92 | [:e 8 1]
93 | [:f 7 2]]
94 | search (make-nearest-neighbor-search d/euclidean rest points)]
95 | (is (= :f (first (item-value (second (search 2 [:e 8 1]))))))
96 | (is (= :b (first (item-value (second (search 2 [:a 2 3]))))))
97 | (is (= :e (first (item-value (second (search 2 [:f 7 2]))))))
98 | (is (= :b (first (item-value (second (search 2 [:d 4 7]))))))))
99 |
100 | (deftest test-nearest-neighbors-classifier
101 | (let [data [[25 40000 :no]
102 | [35 60000 :no]
103 | [45 80000 :no]
104 | [20 20000 :no]
105 | [35 120000 :no]
106 | [52 18000 :no]
107 | [23 95000 :yes]
108 | [40 62000 :yes]
109 | [60 100000 :yes]
110 | [48 220000 :yes]
111 | [33 150000 :yes]]
112 | fit1 (-> (make-nearest-neighbors-classifier 1 d/euclidean)
113 | (nearest-neighbors-fit data))
114 | fit3 (-> (make-nearest-neighbors-classifier 3 d/euclidean)
115 | (nearest-neighbors-fit data))]
116 | (is (= (first (nearest-neighbors-predict fit1 [[48 142000]])) :yes))
117 | (is (= (first (nearest-neighbors-predict fit3 [[48 142000]])) :yes))))
118 |
119 | (deftest test-nearest-neighbors-regressor
120 | (let [data [[25 40000 135]
121 | [35 60000 256]
122 | [45 80000 231]
123 | [20 20000 267]
124 | [35 120000 139]
125 | [52 18000 150]
126 | [23 95000 127]
127 | [40 62000 216]
128 | [60 100000 139]
129 | [48 220000 250]
130 | [33 150000 264]]
131 | fit1 (-> (make-nearest-neighbors-regressor 1 d/euclidean)
132 | (nearest-neighbors-fit data))
133 | fit3 (-> (make-nearest-neighbors-regressor 3 d/euclidean)
134 | (nearest-neighbors-fit data))]
135 | (is (= (first (nearest-neighbors-predict fit1 [[48 142000]])) 264))
136 | (is (= (first (nearest-neighbors-predict fit3 [[48 142000]])) (/ 542 3)))))
137 |
--------------------------------------------------------------------------------
/test/lambda_ml/neural_network_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.neural-network-test
2 | (:require [clojure.test :refer :all]
3 | [clojure.core.matrix :as m]
4 | [lambda-ml.core :refer :all]
5 | [lambda-ml.neural-network :refer :all]))
6 |
7 | (deftest test-feed-forward
8 | (let [weights [[[0.35 0.15 0.20]
9 | [0.35 0.25 0.30]]
10 | [[0.60 0.40 0.45]
11 | [0.60 0.50 0.55]]]
12 | fs [sigmoid sigmoid]
13 | x [0.05 0.1]
14 | [hidden output] (feed-forward x weights fs)]
15 | (is (< (Math/abs (- 0.593269920 (first hidden))) 1E-6))
16 | (is (< (Math/abs (- 0.596884378 (second hidden))) 1E-6))
17 | (is (< (Math/abs (- 0.751365070 (first output))) 1E-6))
18 | (is (< (Math/abs (- 0.772928465 (second output))) 1E-6))))
19 |
20 | (deftest test-feed-forward2
21 | (let [weights [[[ 0.1 0.1 -0.2]
22 | [ 0.2 0 0.2]
23 | [ 0.5 0.3 -0.4]]
24 | [[-0.1 -0.4 0.1 0.6]
25 | [ 0.6 0.2 -0.1 -0.2]]]
26 | fs [sigmoid sigmoid]
27 | x [0.6 0.1]
28 | [hidden output] (feed-forward x weights fs)]
29 | (is (< (Math/abs (- 0.53494294 (nth hidden 0))) 1E-6))
30 | (is (< (Math/abs (- 0.55477923 (nth hidden 1))) 1E-6))
31 | (is (< (Math/abs (- 0.65475346 (nth hidden 2))) 1E-6))
32 | (is (< (Math/abs (- 0.53353777 (nth output 0))) 1E-6))
33 | (is (< (Math/abs (- 0.62727869 (nth output 1))) 1E-6))))
34 |
35 | (deftest test-back-propagate
36 | (let [weights [[[0.35 0.15 0.20]
37 | [0.35 0.25 0.30]]
38 | [[0.60 0.40 0.45]
39 | [0.60 0.50 0.55]]]
40 | fs' [sigmoid' sigmoid']
41 | y [0.01 0.99]
42 | activations [[0.593269920 0.596884378] [0.751365070 0.772928465]]
43 | [errors1 errors2] (back-propagate y weights fs' activations quadratic-output-error)]
44 | (is (< (Math/abs (- 0.00877136 (first errors1))) 1E-6))
45 | (is (< (Math/abs (- 0.00995425 (second errors1))) 1E-6))
46 | (is (< (Math/abs (- 0.13849856 (first errors2))) 1E-6))
47 | (is (< (Math/abs (- -0.03809824 (second errors2))) 1E-6))))
48 |
49 | (deftest test-compute-gradients
50 | (let [x [0.05 0.1]
51 | activations [[0.593269920 0.596884378] [0.751365070 0.772928465]]
52 | errors [[0.00877136 0.00995425] [0.13849856 -0.03809824]]
53 | [g0 g1] (compute-gradients x activations errors)]
54 | (is (< (Math/abs (- 0.00877136 (nth (nth g0 0) 0))) 1E-6))
55 | (is (< (Math/abs (- 0.00043857 (nth (nth g0 0) 1))) 1E-6))
56 | (is (< (Math/abs (- 0.00087713 (nth (nth g0 0) 2))) 1E-6))
57 | (is (< (Math/abs (- 0.00995425 (nth (nth g0 1) 0))) 1E-6))
58 | (is (< (Math/abs (- 0.00049771 (nth (nth g0 1) 1))) 1E-6))
59 | (is (< (Math/abs (- 0.00099543 (nth (nth g0 1) 2))) 1E-6))
60 | (is (< (Math/abs (- 0.13849856 (nth (nth g1 0) 0))) 1E-6))
61 | (is (< (Math/abs (- 0.08216703 (nth (nth g1 0) 1))) 1E-6))
62 | (is (< (Math/abs (- 0.08266763 (nth (nth g1 0) 2))) 1E-6))
63 | (is (< (Math/abs (- -0.03809824 (nth (nth g1 1) 0))) 1E-6))
64 | (is (< (Math/abs (- -0.02260254 (nth (nth g1 1) 1))) 1E-6))
65 | (is (< (Math/abs (- -0.02274024 (nth (nth g1 1) 2))) 1E-6))))
66 |
67 | (deftest test-regularize
68 | (let [weights [[[0.35 0.15 0.20]
69 | [0.35 0.25 0.30]]
70 | [[0.60 0.40 0.45]
71 | [0.60 0.50 0.55]]]
72 | alpha 0.5
73 | lambda 0.1
74 | [r0 r1] (regularize weights alpha lambda)]
75 | (is (< (Math/abs (- 0.0 (nth (nth r0 0) 0))) 1E-6))
76 | (is (< (Math/abs (- 0.0075 (nth (nth r0 0) 1))) 1E-6))
77 | (is (< (Math/abs (- 0.01 (nth (nth r0 0) 2))) 1E-6))
78 | (is (< (Math/abs (- 0.0 (nth (nth r0 1) 0))) 1E-6))
79 | (is (< (Math/abs (- 0.0125 (nth (nth r0 1) 1))) 1E-6))
80 | (is (< (Math/abs (- 0.015 (nth (nth r0 1) 2))) 1E-6))
81 | (is (< (Math/abs (- 0.0 (nth (nth r1 0) 0))) 1E-6))
82 | (is (< (Math/abs (- 0.02 (nth (nth r1 0) 1))) 1E-6))
83 | (is (< (Math/abs (- 0.0225 (nth (nth r1 0) 2))) 1E-6))
84 | (is (< (Math/abs (- 0.0 (nth (nth r1 1) 0))) 1E-6))
85 | (is (< (Math/abs (- 0.025 (nth (nth r1 1) 1))) 1E-6))
86 | (is (< (Math/abs (- 0.0275 (nth (nth r1 1) 2))) 1E-6))))
87 |
88 | (deftest test-gradient-descent-step
89 | (let [weights [[[0.35 0.15 0.20]
90 | [0.35 0.25 0.30]]
91 | [[0.60 0.40 0.45]
92 | [0.60 0.50 0.55]]]
93 | model (-> (make-neural-network 0.5 0 quadratic-cost)
94 | (add-neural-network-layer 2 sigmoid)
95 | (add-neural-network-layer 2 sigmoid)
96 | (add-neural-network-layer 2 sigmoid))
97 | fs [sigmoid sigmoid]
98 | x [0.05 0.1]
99 | y [0.01 0.99]
100 | [w0 w1] (gradient-descent-step model x y weights)]
101 | (is (< (Math/abs (- 0.149780716 (nth (nth w0 0) 1))) 1E-6))
102 | (is (< (Math/abs (- 0.19956143 (nth (nth w0 0) 2))) 1E-6))
103 | (is (< (Math/abs (- 0.24975114 (nth (nth w0 1) 1))) 1E-6))
104 | (is (< (Math/abs (- 0.29950229 (nth (nth w0 1) 2))) 1E-6))
105 | (is (< (Math/abs (- 0.35891648 (nth (nth w1 0) 1))) 1E-6))
106 | (is (< (Math/abs (- 0.408666186 (nth (nth w1 0) 2))) 1E-6))
107 | (is (< (Math/abs (- 0.51130127 (nth (nth w1 1) 1))) 1E-6))
108 | (is (< (Math/abs (- 0.561370121 (nth (nth w1 1) 2))) 1E-6))))
109 |
110 | (deftest test-init-parameters
111 | (let [model {:layers [2 3 1] :seed 12345}
112 | [w0 w1] (init-parameters model)]
113 | (is (= [3 3] (m/shape w0)))
114 | (is (= [1 4] (m/shape w1)))
115 | (is (< (Math/abs (- -0.18780898 (m/mget w0 0 0))) 1E-6))
116 | (is (< (Math/abs (- 0.58843630 (m/mget w0 0 1))) 1E-6))
117 | (is (< (Math/abs (- 0.94880478 (m/mget w0 0 2))) 1E-6))
118 | (is (< (Math/abs (- -0.49428072 (m/mget w0 1 0))) 1E-6))
119 | (is (< (Math/abs (- -1.22341193 (m/mget w0 1 1))) 1E-6))
120 | (is (< (Math/abs (- -0.69796098 (m/mget w0 1 2))) 1E-6))
121 | (is (< (Math/abs (- -0.77722490 (m/mget w0 2 0))) 1E-6))
122 | (is (< (Math/abs (- 2.06800870 (m/mget w0 2 1))) 1E-6))
123 | (is (< (Math/abs (- -0.58734674 (m/mget w0 2 2))) 1E-6))
124 | (is (< (Math/abs (- 0.46214534 (m/mget w1 0 0))) 1E-6))
125 | (is (< (Math/abs (- 1.37458180 (m/mget w1 0 1))) 1E-6))
126 | (is (< (Math/abs (- -0.09785321 (m/mget w1 0 2))) 1E-6))
127 | (is (< (Math/abs (- -1.07643638 (m/mget w1 0 3))) 1E-6))))
128 |
129 | (deftest test-neural-network
130 | (let [data [[0 0 [0]]
131 | [0 1 [1]]
132 | [1 0 [1]]
133 | [1 1 [0]]]
134 | model (-> (make-neural-network 0.5 0.0 cross-entropy-cost 54321)
135 | (add-neural-network-layer 2 sigmoid)
136 | (add-neural-network-layer 3 sigmoid)
137 | (add-neural-network-layer 1 sigmoid))
138 | fit (nth (iterate #(neural-network-fit % data) model) 5000)
139 | predictions (map first (neural-network-predict fit (map butlast data)))]
140 | (is (> 0.1 (nth predictions 0)))
141 | (is (< 0.9 (nth predictions 1)))
142 | (is (< 0.9 (nth predictions 2)))
143 | (is (> 0.1 (nth predictions 3)))))
144 |
--------------------------------------------------------------------------------
/test/lambda_ml/random_forest_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.random-forest-test
2 | (:require [clojure.test :refer :all]
3 | [lambda-ml.random-forest :refer :all]))
4 |
5 | (deftest test-random-forest-classifier
6 | (let [data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]]
7 | model (make-random-forest-classifier 1001 2 1 2)
8 | fit (random-forest-fit model data)]
9 | (is (= (first (random-forest-predict fit [[0 0]])) 0))
10 | (is (= (first (random-forest-predict fit [[0 1]])) 1))
11 | (is (= (first (random-forest-predict fit [[1 0]])) 1))
12 | (is (= (first (random-forest-predict fit [[1 1]])) 0))))
13 |
14 | (deftest test-random-forest-regressor
15 | (let [data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]]
16 | model (make-random-forest-regressor 1001 2 1 2)
17 | fit (random-forest-fit model data)]
18 | (is (< (first (random-forest-predict fit [[0 0]])) 0.5))
19 | (is (> (first (random-forest-predict fit [[0 1]])) 0.5))
20 | (is (> (first (random-forest-predict fit [[1 0]])) 0.5))
21 | (is (< (first (random-forest-predict fit [[1 1]])) 0.5))))
22 |
--------------------------------------------------------------------------------
/test/lambda_ml/regression_test.clj:
--------------------------------------------------------------------------------
1 | (ns lambda-ml.regression-test
2 | (:require [clojure.test :refer :all]
3 | [lambda-ml.core :refer :all]
4 | [lambda-ml.regression :refer :all]))
5 |
6 | (deftest test-linear-regression
7 | (let [data [[-2 -1]
8 | [1 1]
9 | [3 2]]
10 | model (make-linear-regression 0.01 0.0 5000)
11 | {coeff :parameters} (regression-fit model data)]
12 | (is (< (Math/abs (- (/ 5 19) (first coeff))) 1E-6))
13 | (is (< (Math/abs (- (/ 23 38) (second coeff))) 1E-6))))
14 |
15 | (deftest test-linear-regression-regularization
16 | (let [data (map (fn [[x y]] [x (* x x) (* x x x) (* x x x x) (* x x x x x) y])
17 | [[-0.99768 2.0885]
18 | [-0.69574 1.1646]
19 | [-0.40373 0.3287]
20 | [-0.10236 0.46013]
21 | [0.22024 0.44808]
22 | [0.47742 0.10013]
23 | [0.82229 -0.32952]])
24 | fit-lambda0 (regression-fit (make-linear-regression 0.1 0 10000) data)
25 | fit-lambda1 (regression-fit (make-linear-regression 0.1 1 10000) data)
26 | fit-lambda10 (regression-fit (make-linear-regression 0.1 10 10000) data)]
27 | (is (> (l2-norm (:parameters fit-lambda0))
28 | (l2-norm (:parameters fit-lambda1))))
29 | (is (> (l2-norm (:parameters fit-lambda0))
30 | (l2-norm (:parameters fit-lambda10))))
31 | (is (> (l2-norm (:parameters fit-lambda1))
32 | (l2-norm (:parameters fit-lambda10))))))
33 |
34 | (deftest test-linear-regression2
35 | (let [data [[-1 0]
36 | [0 2]
37 | [1 4]
38 | [2 5]]
39 | model (make-linear-regression 0.01 0.0 5000)
40 | {coeff :parameters} (regression-fit model data)]
41 | (is (< (Math/abs (- 1.9 (first coeff))) 1E-6))
42 | (is (< (Math/abs (- 1.7 (second coeff))) 1E-6))))
43 |
44 | (deftest test-linear-regression3
45 | (let [data [[4 390]
46 | [9 580]
47 | [10 650]
48 | [14 730]
49 | [4 410]
50 | [7 530]
51 | [12 600]
52 | [22 790]
53 | [1 350]
54 | [3 400]
55 | [8 590]
56 | [11 640]
57 | [5 450]
58 | [6 520]
59 | [10 690]
60 | [11 690]
61 | [16 770]
62 | [13 700]
63 | [13 730]
64 | [10 640]]
65 | model (make-linear-regression 0.01 0.0 10000)
66 | {coeff :parameters} (regression-fit model data)]
67 | (is (< (Math/abs (- 353.16487949889 (first coeff))) 1E-6))
68 | (is (< (Math/abs (- 25.326467777896 (second coeff))) 1E-6))))
69 |
70 | (deftest test-logistic-regression
71 | (let [data [[0.50 0]
72 | [0.75 0]
73 | [1.00 0]
74 | [1.25 0]
75 | [1.50 0]
76 | [1.75 0]
77 | [1.75 1]
78 | [2.00 0]
79 | [2.25 1]
80 | [2.50 0]
81 | [2.75 1]
82 | [3.00 0]
83 | [3.25 1]
84 | [3.50 0]
85 | [4.00 1]
86 | [4.25 1]
87 | [4.50 1]
88 | [4.75 1]
89 | [5.00 1]
90 | [5.50 1]]
91 | model (make-logistic-regression 0.1 0 10000)
92 | {coeff :parameters} (regression-fit model data)]
93 | (is (< (Math/abs (- -4.077713 (first coeff))) 1E-6))
94 | (is (< (Math/abs (- 1.504645 (second coeff))) 1E-6))))
95 |
--------------------------------------------------------------------------------