├── .gitignore ├── LICENSE ├── README.md ├── project.clj ├── src └── lambda_ml │ ├── clustering │ ├── dbscan.clj │ ├── hierarchical.clj │ └── k_means.clj │ ├── core.clj │ ├── data │ ├── binary_tree.clj │ └── kd_tree.clj │ ├── decision_tree.clj │ ├── distance.clj │ ├── ensemble.clj │ ├── examples │ ├── kaggle │ │ ├── digit_recognizer.clj │ │ ├── march_madness.clj │ │ └── titanic.clj │ └── worksheets │ │ ├── dbscan.clj │ │ ├── decision_tree.clj │ │ ├── hierarchical.clj │ │ └── k_means.clj │ ├── factorization.clj │ ├── metrics.clj │ ├── naive_bayes.clj │ ├── nearest_neighbors.clj │ ├── neural_network.clj │ ├── random_forest.clj │ ├── regression.clj │ └── util.clj └── test └── lambda_ml ├── clustering ├── dbscan_test.clj ├── hierarchical_test.clj └── k_means_test.clj ├── core_test.clj ├── data ├── binary_tree_test.clj └── kd_tree_test.clj ├── decision_tree_test.clj ├── distance_test.clj ├── ensemble_test.clj ├── factorization_test.clj ├── metrics_test.clj ├── naive_bayes_test.clj ├── nearest_neighbors_test.clj ├── neural_network_test.clj ├── random_forest_test.clj └── regression_test.clj /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | .hgignore 11 | .hg/ 12 | *~ 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015-2018 Kelvin Jiang 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # lambda-ml 2 | 3 | A small machine learning library aimed at providing simple, concise 4 | implementations of machine learning techniques and utilities. It is written in 5 | Lisp (using the implementation du jour, Clojure) to maximize expressiveness and 6 | enjoyment. 7 | 8 | ## Installation 9 | 10 | Add the following dependency to your project: 11 | 12 | [![Clojars Project](https://img.shields.io/clojars/v/lambda-ml.svg)](https://clojars.org/lambda-ml) 13 | 14 | ## Documentation 15 | 16 | * [API Docs](https://cloudkj.github.io/lambda-ml/) 17 | 18 | ### Supervised Learning Algorithms 19 | 20 | * [Artificial neural network](https://cloudkj.github.io/lambda-ml/lambda-ml.neural-network.html) 21 | * [Decision tree](https://cloudkj.github.io/lambda-ml/lambda-ml.decision-tree.html) 22 | * [Ensemble methods](https://cloudkj.github.io/lambda-ml/lambda-ml.ensemble.html) 23 | * [K-nearest neighbors](https://cloudkj.github.io/lambda-ml/lambda-ml.nearest-neighbors.html) 24 | * [Linear regression](https://cloudkj.github.io/lambda-ml/lambda-ml.regression.html) 25 | * [Logistic regression](https://cloudkj.github.io/lambda-ml/lambda-ml.regression.html) 26 | * [Naive Bayes](https://cloudkj.github.io/lambda-ml/lambda-ml.naive-bayes.html) 27 | * [Random forest](https://cloudkj.github.io/lambda-ml/lambda-ml.random-forest.html) 28 | 29 | ### Unsupervised Learning Algorithms 30 | 31 | * [DBSCAN](https://cloudkj.github.io/lambda-ml/lambda-ml.clustering.dbscan.html) 32 | * [Hierarchical agglomerative clustering](https://cloudkj.github.io/lambda-ml/lambda-ml.clustering.hierarchical.html) 33 | * [K-means](https://cloudkj.github.io/lambda-ml/lambda-ml.clustering.k-means.html) 34 | * [Non-negative matrix factorization](https://cloudkj.github.io/lambda-ml/lambda-ml.factorization.html) 35 | 36 | ## Examples 37 | 38 | * [Classifying handwritten digits with an artificial neural network](http://viewer.gorilla-repl.org/view.html?source=github&user=cloudkj&repo=lambda-ml&path=src/lambda_ml/examples/kaggle/digit_recognizer.clj) 39 | * [DBSCAN example](http://viewer.gorilla-repl.org/view.html?source=github&user=cloudkj&repo=lambda-ml&path=src/lambda_ml/examples/worksheets/dbscan.clj) 40 | * [Decision tree example](http://viewer.gorilla-repl.org/view.html?source=github&user=cloudkj&repo=lambda-ml&path=src/lambda_ml/examples/worksheets/decision_tree.clj) 41 | * [Hierarchical agglomerative clustering example](http://viewer.gorilla-repl.org/view.html?source=github&user=cloudkj&repo=lambda-ml&path=src/lambda_ml/examples/worksheets/hierarchical.clj) 42 | * [K-means example](http://viewer.gorilla-repl.org/view.html?source=github&user=cloudkj&repo=lambda-ml&path=src/lambda_ml/examples/worksheets/k_means.clj) 43 | * [Predicting survival on the Titanic with logistic regression](http://viewer.gorilla-repl.org/view.html?source=github&user=cloudkj&repo=lambda-ml&path=src/lambda_ml/examples/kaggle/titanic.clj) 44 | 45 | ## License 46 | 47 | Copyright © 2015-2018 48 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject lambda-ml "0.1.1" 2 | :description "A small machine learning library aimed at providing simple, concise implementations of machine learning techniques and utilities." 3 | :url "http://github.com/cloudkj/lambda-ml" 4 | :license {:name "MIT License" 5 | :url "http://opensource.org/licenses/MIT"} 6 | :plugins [[lein-ancient "0.6.15"] 7 | [lein-codox "0.10.1"] 8 | [lein-exec "0.3.6"] 9 | [lein-gorilla "0.4.0"]] 10 | :codox {:metadata {:doc/format :markdown} 11 | :namespaces [#"^lambda-ml\.(?!examples)"] 12 | :source-uri "https://github.com/cloudkj/lambda-ml/blob/master/{filepath}#L{line}"} 13 | :jvm-opts ["-Xmx8g"] 14 | :dependencies [[org.clojure/clojure "1.8.0"] 15 | [org.clojure/data.csv "0.1.4"] 16 | [org.clojure/data.priority-map "0.0.10"] 17 | [org.clojure/math.numeric-tower "0.0.4"] 18 | [gorilla-plot "0.1.4"] 19 | [net.mikera/core.matrix "0.62.0"] 20 | [net.mikera/vectorz-clj "0.47.0"]]) 21 | -------------------------------------------------------------------------------- /src/lambda_ml/clustering/dbscan.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.clustering.dbscan 2 | "Density-based clustering with DBSCAN. 3 | 4 | Example usage: 5 | ``` 6 | (def data [[2 10] [2 5] [8 4] [5 8] [7 5] [6 4] [1 2] [4 9]]) 7 | (let [epsilon 4.0 8 | min-pts 2] 9 | (dbscan lambda-ml.distance/euclidean epsilon min-pts data)) 10 | ;;=> {[8 4] 1, [6 4] 1, [7 5] 1, [5 8] 2, [4 9] 2} 11 | ```" 12 | (:require [clojure.set :as set] 13 | [lambda-ml.data.binary-tree :as bt] 14 | [lambda-ml.data.kd-tree :as kd])) 15 | 16 | (defn make-proximity-search 17 | "Given a distance function f and a coll of points, returns a function that, 18 | given a distance and a query point, returns a sequence of all points that are 19 | within the given distance of the query point." 20 | [f points] 21 | (let [dims (count (first points)) 22 | t (kd/make-tree dims points)] 23 | (fn search 24 | ([dist query] 25 | (search dist query t 0 (list))) 26 | ([dist query tree depth cand] 27 | (if (nil? tree) 28 | cand 29 | (let [[node left right] ((juxt bt/get-value bt/get-left bt/get-right) tree) 30 | dim (mod depth dims) 31 | [near far] (if (<= (nth query dim) (nth node dim)) [left right] [right left])] 32 | (cond->> cand 33 | ;; Add current node if it's within proximity 34 | (<= (f query node) dist) 35 | (cons node) 36 | ;; Explore near branch 37 | true 38 | (search dist query near (inc depth)) 39 | ;; Optionally, explore far branch 40 | (< (f query node dim) dist) 41 | (search dist query far (inc depth))))))))) 42 | 43 | (defn dbscan 44 | "Returns a clustering of points represented as a map from cluster id to a set 45 | of points, using the epsilon parameter for neighborhood lookups and forming 46 | clusters with at least min-pts density." 47 | [f epsilon min-pts points] 48 | (let [search (make-proximity-search f points)] 49 | (loop [unvisited points 50 | cluster-id 0 51 | visited #{} 52 | clusters {}] 53 | (let [point (first unvisited)] 54 | (cond 55 | ;; No more points 56 | (nil? point) 57 | clusters 58 | ;; Already visited 59 | (visited point) 60 | (recur (rest unvisited) cluster-id visited clusters) 61 | ;; Visit point 62 | :else 63 | (let [visited (conj visited point) 64 | neighbors (search epsilon point)] 65 | (if (< (count neighbors) min-pts) 66 | ;; Noise 67 | (recur (rest unvisited) cluster-id visited clusters) 68 | ;; Expand cluster 69 | (let [cluster-id (+ 1 cluster-id) 70 | ;; Assign point to cluster 71 | clusters (assoc clusters point cluster-id) 72 | ;; Find all neighbors-of-neighbors 73 | expanded (reduce (fn [n i] 74 | (if (visited i) 75 | (conj n i) 76 | (let [nn (search epsilon i)] 77 | (if (< (count nn) min-pts) 78 | (conj n i) 79 | (set/union n (set nn)))))) 80 | #{} 81 | neighbors)] 82 | (recur (rest unvisited) 83 | cluster-id 84 | ;; Mark expanded neighbors as visited 85 | (reduce conj visited expanded) 86 | ;; Assign expanded neighbors to clusters 87 | (reduce (fn [c i] (if (c i) c (assoc c i cluster-id))) 88 | clusters 89 | expanded)))))))))) 90 | -------------------------------------------------------------------------------- /src/lambda_ml/clustering/hierarchical.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.clustering.hierarchical 2 | "Hierarchical agglomerative clustering. 3 | 4 | Example usage: 5 | ``` 6 | (def data [[1 1 1 0 1 0 0 1 1 1] 7 | [1 1 0 1 1 0 0 0 0 1] 8 | [0 1 1 0 1 0 0 1 0 0] 9 | [0 0 0 1 0 1 0 0 0 0] 10 | [1 1 1 0 1 0 1 1 1 0] 11 | [0 1 0 1 1 0 0 0 0 1] 12 | [0 1 1 0 1 1 0 1 1 0]]) 13 | (agglomerative-clustering single-link lambda-ml.distance/euclidean data) 14 | ;;=> [[1 5] [0 4] [2 6] [0 2] [0 1] [0 3]] 15 | ```" 16 | (:require [clojure.data.priority-map :as pmap])) 17 | 18 | (defn pairwise-distances 19 | "Returns a map representing the distance matrix between all points." 20 | [f points] 21 | (->> (map-indexed vector points) 22 | (reduce (fn [distances [i pi]] 23 | (->> (map-indexed vector points) 24 | (reduce (fn [d [j pj]] 25 | (assoc-in d [i j] {:distance (f pi pj) :index j})) 26 | distances))) 27 | {}))) 28 | 29 | (defn distance-queues 30 | "Returns a map of each point to a priority queue of all other points sorted by 31 | increasing distance." 32 | [distances] 33 | (->> (keys distances) 34 | (reduce (fn [queues i] 35 | (->> (vals (dissoc (get distances i) i)) 36 | (reduce (fn [q ci] (assoc q (:index ci) ci)) 37 | (pmap/priority-map-keyfn :distance)) 38 | (assoc queues i))) 39 | {}))) 40 | 41 | (defn single-link 42 | "Returns the single-link distance between point x and the merged cluster 43 | containing points y and z, which is the distance between x and the closest 44 | point in the cluster." 45 | [distances x y z] 46 | (min (get-in distances [x y :distance]) 47 | (get-in distances [x z :distance]))) 48 | 49 | (defn complete-link 50 | "Returns the complete-link distance between point x and the merged cluster 51 | containing points y and z, which is the distance between x and the farthest 52 | point in the cluster." 53 | [distances x y z] 54 | (max (get-in distances [x y :distance]) 55 | (get-in distances [x z :distance]))) 56 | 57 | (defn agglomerative-clustering 58 | "Returns a clustering of points represented as a seq of merges, where each 59 | merge is a pair of indexes indicating the two points to be merged at each 60 | step, using the linkage function link and distance function f." 61 | [link f points] 62 | (loop [distances (pairwise-distances f points) 63 | queues (distance-queues distances) 64 | active (reduce #(assoc %1 %2 true) {} (range (count points))) 65 | merges []] 66 | (if (<= (count active) 1) 67 | merges 68 | (let [;; Find the two most similar clusters 69 | [_ k1 k2] (->> (keys active) 70 | (reduce (fn [[min-dist k1 k2] i] 71 | (let [[k {dist :distance}] (peek (get queues i))] 72 | (if (< dist min-dist) 73 | [dist i k] 74 | [min-dist k1 k2]))) 75 | [Double/MAX_VALUE nil nil])) 76 | ;; Clear queue for k1 77 | queues (assoc queues k1 (pmap/priority-map-keyfn :distance)) 78 | ;; Update distances 79 | [distances queues] (->> (keys active) 80 | (filter #(and (not (= k1 %)) (not (= k2 %)))) 81 | (reduce (fn [[d q] i] 82 | (let [dist (link distances i k1 k2) 83 | d (-> (assoc-in d [i k1 :distance] dist) 84 | (assoc-in [k1 i :distance] dist)) 85 | q (-> (update q i #(dissoc % k1)) 86 | (update i #(dissoc % k2)) 87 | (update i #(assoc % k1 (get-in d [i k1]))) 88 | (update k1 #(assoc % i (get-in d [k1 i]))))] 89 | [d q])) 90 | [distances queues]))] 91 | (recur distances 92 | queues 93 | (dissoc active k2) ;; Deactivate cluster k2 94 | (conj merges [k1 k2])))))) ;; Merge k1 and k2 95 | -------------------------------------------------------------------------------- /src/lambda_ml/clustering/k_means.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.clustering.k-means 2 | "K-means clustering. 3 | 4 | Example usage: 5 | ``` 6 | (def data [[1 1] [1.5 2] [3 4] [5 7] [3.5 5] [4.5 5] [3.5 4.5]]) 7 | (let [k 2] 8 | (-> (k-means k lambda-ml.distance/euclidean data) 9 | (nth 100))) 10 | ;;=> {0 ([3.5 4.5] [4.5 5] [3.5 5] [5 7] [3 4]), 1 ([1.5 2] [1 1])} 11 | ```" 12 | (:require [lambda-ml.core :as c])) 13 | 14 | (defn assign-clusters 15 | "Returns cluster assignments based on the closest centroid to each point." 16 | [f mu x] 17 | (let [mu-indexed (map-indexed vector mu)] 18 | (loop [points x 19 | clusters {}] 20 | (if (empty? points) 21 | clusters 22 | (let [xi (first points) 23 | ;; Find the index of the closest centroid 24 | index (first (apply min-key (comp (partial f xi) second) mu-indexed)) 25 | cluster (or (clusters index) (list))] 26 | (recur (rest points) 27 | (assoc clusters index (conj cluster xi)))))))) 28 | 29 | (defn update-centroids 30 | "Returns updated centroids based on the average of points in each cluster." 31 | [k clusters] 32 | (map (fn [index] 33 | (->> (clusters index) 34 | (apply map +) 35 | (map #(/ % (count (clusters index)))))) 36 | (range k))) 37 | 38 | (defn k-means-seq 39 | [k f points centroids] 40 | (lazy-seq (let [clusters (assign-clusters f centroids points)] 41 | (cons clusters 42 | (k-means-seq k f points (update-centroids k clusters)))))) 43 | 44 | (defn k-means 45 | "Returns a lazy sequence of a clustering of points using the distance function 46 | f, represented as a map from cluster id to a collection of points, at each 47 | iteration of k-means." 48 | [k f points] 49 | (k-means-seq k f points (c/sample-without-replacement points k))) 50 | -------------------------------------------------------------------------------- /src/lambda_ml/core.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.core 2 | (:require [clojure.math.numeric-tower :refer :all])) 3 | 4 | (def vector-with-intercept (comp vec (partial cons 1.0))) 5 | 6 | (defn dot-product 7 | [a b] 8 | (reduce + (map * a b))) 9 | 10 | (defn l2-norm 11 | [a] 12 | (sqrt (dot-product a a))) 13 | 14 | (defn mean 15 | [coll] 16 | (/ (reduce + coll) (count coll))) 17 | 18 | (defn median 19 | [coll] 20 | (let [sorted (sort coll) 21 | c (count coll) 22 | mid (quot c 2)] 23 | (if (odd? c) 24 | (nth sorted mid) 25 | (/ (+ (nth sorted (dec mid)) (nth sorted mid)) 2)))) 26 | 27 | (defn mode 28 | [coll] 29 | (first (apply max-key second (frequencies coll)))) 30 | 31 | (defn random-partition 32 | "Returns n partitions of elements randomly selected from coll." 33 | [n coll] 34 | (let [size (quot (count coll) n) 35 | coll (shuffle coll)] 36 | (partition size size [] coll))) 37 | 38 | (defn sample-with-replacement 39 | "Returns n randomly selected elements, with replacement, from coll." 40 | ([coll n] 41 | (sample-with-replacement coll n (list))) 42 | ([coll n s] 43 | (cond (<= n 0) s 44 | (not (vector? coll)) (sample-with-replacement (vec coll) n s) 45 | :else 46 | (let [index (rand-int (count coll))] 47 | (sample-with-replacement coll 48 | (dec n) 49 | (conj s (nth coll index))))))) 50 | 51 | (defn sample-without-replacement 52 | "Returns n randomly selected elements, without replacement, from coll." 53 | ([coll n] 54 | (sample-without-replacement coll n (list))) 55 | ([coll n s] 56 | (cond (<= n 0) s 57 | (empty? coll) s 58 | (>= n (count coll)) coll 59 | (not (vector? coll)) (sample-without-replacement (vec coll) n s) 60 | :else 61 | (let [index (rand-int (count coll))] 62 | (sample-without-replacement (subvec (assoc coll index (first coll)) 1) 63 | (dec n) 64 | (conj s (nth coll index))))))) 65 | 66 | ;; Common functions 67 | 68 | (defn relu 69 | [z] 70 | (max 0 z)) 71 | 72 | (defn relu' 73 | [z] 74 | (if (> z 0) 1 0)) 75 | 76 | (defn sigmoid 77 | [z] 78 | (/ 1 (+ 1 (expt Math/E (- z))))) 79 | 80 | (defn sigmoid' 81 | [z] 82 | (* z (- 1 z))) 83 | 84 | (defn derivative 85 | [f] 86 | (cond 87 | (= f relu) relu' 88 | (= f sigmoid) sigmoid')) 89 | -------------------------------------------------------------------------------- /src/lambda_ml/data/binary_tree.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.data.binary-tree) 2 | 3 | (defn make-tree 4 | ([val] 5 | (make-tree val nil nil)) 6 | ([val left right] 7 | (vector val left right))) 8 | 9 | (defn get-value 10 | [tree] 11 | (nth tree 0)) 12 | 13 | (defn get-left 14 | [tree] 15 | (nth tree 1)) 16 | 17 | (defn get-right 18 | [tree] 19 | (nth tree 2)) 20 | 21 | (defn get-path 22 | [tree paths] 23 | (->> paths 24 | (map (fn [path] 25 | (cond (= path :left) 1 26 | (= path :right) 2 27 | :else (throw (IllegalArgumentException. "Invalid tree path"))))) 28 | (get-in tree))) 29 | 30 | (defn leaf? 31 | [tree] 32 | (and (nil? (get-left tree)) (nil? (get-right tree)))) 33 | 34 | (defn print-tree 35 | ([tree] 36 | (print-tree tree 0)) 37 | ([tree level] 38 | (println (str (apply str (repeat level " ")) 39 | (let [val (get-value tree)] 40 | (or (meta val) val)))) 41 | (when (not (nil? (get-left tree))) 42 | (print-tree (get-left tree) (inc level))) 43 | (when (not (nil? (get-right tree))) 44 | (print-tree (get-right tree) (inc level))))) 45 | 46 | (defn adjacency-matrix 47 | "Returns an adjacency matrix representation of a binary tree." 48 | ([tree] 49 | (adjacency-matrix tree {})) 50 | ([tree matrix] 51 | (let [left (get-left tree) 52 | matrix (if (nil? left) matrix (adjacency-matrix left matrix)) 53 | edges (if (nil? left) [] [(dec (count matrix))]) 54 | right (get-right tree) 55 | matrix (if (nil? right) matrix (adjacency-matrix right matrix)) 56 | edges (if (nil? right) edges (conj edges (dec (count matrix))))] 57 | (assoc matrix (count matrix) {:edges edges :value (get-value tree)})))) 58 | -------------------------------------------------------------------------------- /src/lambda_ml/data/kd_tree.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.data.kd-tree 2 | (:require [lambda-ml.data.binary-tree :as bt])) 3 | 4 | ;; K-d tree 5 | 6 | (defn make-tree 7 | "Returns a k-d tree, with dims as the number of dimensions, for the given 8 | nodes. Optionally, a function f can be supplied and used to return the 9 | k-dimensional point for a given node. Otherwise, the node itself is assumed to 10 | be the k-dimensional point." 11 | ([dims nodes] 12 | (make-tree dims nodes identity)) 13 | ([dims nodes f] 14 | (make-tree dims nodes f 0)) 15 | ([dims nodes f depth] 16 | (if (empty? nodes) 17 | nil 18 | (let [dim (fn [node] (nth (f node) (mod depth dims))) 19 | sorted (sort-by dim nodes) 20 | median (quot (count sorted) 2)] 21 | (bt/make-tree (nth sorted median) 22 | (make-tree dims (take median sorted) f (inc depth)) 23 | (make-tree dims (drop (+ median 1) sorted) f (inc depth))))))) 24 | -------------------------------------------------------------------------------- /src/lambda_ml/decision_tree.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.decision-tree 2 | "Decision tree learning using the Classification and Regression Trees (CART) 3 | algorithm. 4 | 5 | Example usage; 6 | ``` 7 | (def data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]]) 8 | (def fit 9 | (let [min-split 2 10 | min-leaf 1 11 | max-features 2] 12 | (-> (make-classification-tree gini-impurity min-split min-leaf max-features) 13 | (decision-tree-fit data)))) 14 | (decision-tree-predict fit (map butlast data)) 15 | ;;=> (0 1 1 0) 16 | ```" 17 | (:require [lambda-ml.core :as c] 18 | [lambda-ml.data.binary-tree :as bt])) 19 | 20 | ;; Cost functions 21 | 22 | (defn gini-impurity 23 | "Returns the Gini impurity of a seq of labels." 24 | [labels] 25 | (let [total (count labels)] 26 | (->> (vals (frequencies labels)) 27 | (map #(/ % total)) 28 | (map #(* % (- 1 %))) 29 | (reduce +)))) 30 | 31 | (defn mean-squared-error 32 | "Returns the mean squared error for a seq of predictions." 33 | [labels predictions] 34 | (->> (map - labels predictions) 35 | (map #(* % %)) 36 | (reduce +) 37 | (* (/ 1 (count predictions))))) 38 | 39 | (defn classification-weighted-cost 40 | [y1 y2 f g] 41 | (let [n1 (count y1) 42 | n2 (count y2)] 43 | ;; Classification cost doesn't take the prediction value into account 44 | (cond-> 0 45 | (> n1 0) (+ (* (/ n1 (+ n1 n2)) (f y1))) 46 | (> n2 0) (+ (* (/ n2 (+ n1 n2)) (f y2)))))) 47 | 48 | (defn regression-weighted-cost 49 | [y1 y2 f g] 50 | (let [n1 (count y1) 51 | n2 (count y2)] 52 | (cond-> 0 53 | (> n1 0) (+ (* (/ n1 (+ n1 n2)) 54 | (f y1 (repeat n1 (g y1))))) 55 | (> n2 0) (+ (* (/ n2 (+ n1 n2)) 56 | (f y2 (repeat n2 (g y2)))))))) 57 | 58 | ;; Tree splitting 59 | 60 | (defn categorical-partitions 61 | "Given a seq of k distinct values, returns the 2^{k-1}-1 possible binary 62 | partitions of the values into sets." 63 | [vals] 64 | (if (<= (count vals) 1) 65 | [] 66 | (reduce (fn [p [s1 s2]] 67 | (conj p 68 | [(conj s1 (first vals)) s2] 69 | [(conj s2 (first vals)) s1])) 70 | (vector [(hash-set (first vals)) (set (rest vals))]) 71 | (categorical-partitions (rest vals))))) 72 | 73 | (defn numeric-partitions 74 | "Given a seq of k distinct numeric values, returns k-1 possible binary 75 | partitions of the values by taking the average of consecutive elements in the 76 | sorted seq of values." 77 | [vals] 78 | (loop [partitions [] 79 | v (sort vals)] 80 | (if (<= (count v) 1) 81 | partitions 82 | (recur (conj partitions (/ (+ (first v) (second v)) 2)) 83 | (rest v))))) 84 | 85 | (defn splitters 86 | "Returns a seq of all possible splitters for feature i. A splitter is a 87 | predicate function that evaluates to true if an example belongs in the left 88 | subtree, or false if an example belongs in the right subtree, based on the 89 | splitting criterion." 90 | [x i] 91 | (let [domain (distinct (map #(nth % i) x)) 92 | val (first domain)] 93 | (cond (number? val) (->> (numeric-partitions domain) 94 | (map (fn [s] 95 | (with-meta 96 | (fn [x] (<= (nth x i) s)) 97 | {:decision (float s)})))) 98 | (or (keyword? val) 99 | (string? val)) (->> (categorical-partitions domain) 100 | (map (fn [[s1 s2]] 101 | (with-meta 102 | (fn [x] (contains? s1 (nth x i))) 103 | {:decision [s1 s2]})))) 104 | :else (throw (IllegalArgumentException. "Invalid feature type"))))) 105 | 106 | (defn best-splitter 107 | "Returns the splitter for the given data that minimizes a weighted cost 108 | function, or returns nil if no splitter exists." 109 | [model x y] 110 | (let [{cost :cost prediction :prediction weighted :weighted 111 | min-leaf :min-leaf max-features :max-features} model 112 | ;; Feature bagging - sample a subset of features to split on 113 | features (-> (range (count (first x))) 114 | (c/sample-without-replacement max-features)) 115 | data (map #(conj (vec %1) %2) x y)] 116 | (->> (for [i features] 117 | (let [no-splitter [nil Double/MAX_VALUE i]] 118 | ;; Find best splitter for feature i 119 | (->> (splitters x i) 120 | (map (fn [splitter] 121 | (let [[left right] (vals (group-by splitter data))] 122 | ;; Either split would have fewer observations than required 123 | (cond (< (count left) min-leaf) no-splitter 124 | (< (count right) min-leaf) no-splitter 125 | :else (let [cost (weighted (map last left) (map last right) cost prediction) 126 | ;; Add metadata to splitter 127 | splitter (vary-meta splitter merge {:cost (float cost) :feature i})] 128 | [splitter cost i]))))) 129 | (#(if (empty? %) (list no-splitter) %)) 130 | (apply min-key second)))) 131 | ;; Find best splitter amongst all features 132 | (reduce (fn [a b] 133 | (let [[_ c1 i1] a [_ c2 i2] b] 134 | (cond (< c1 c2) a 135 | ;; To match the CART algorithm, break ties in cost by 136 | ;; choosing splitter for feature with lower index 137 | (= c1 c2) (if (< i1 i2) a b) 138 | :else b)))) 139 | (first)))) 140 | 141 | ;; API 142 | 143 | (defn decision-tree-fit 144 | "Fits a decision tree to the given training data." 145 | ([model data] 146 | (decision-tree-fit model (map butlast data) (map last data))) 147 | ([model x y] 148 | (let [{cost :cost prediction :prediction weighted :weighted 149 | min-split :min-split min-leaf :min-leaf max-features :max-features} model 150 | weighted (fn [left right] (weighted left right cost prediction))] 151 | (->> (cond 152 | ;; Fewer observations than required to split a node 153 | (< (count y) min-split) (bt/make-tree (prediction y)) 154 | ;; All observed labels are equivalent 155 | (apply = y) (bt/make-tree (prediction y)) 156 | :else 157 | (let [splitter (best-splitter model x y)] 158 | (if (nil? splitter) 159 | (bt/make-tree (prediction y)) 160 | (let [data (map #(conj (vec %1) %2) x y) 161 | split (group-by splitter data) 162 | left (get split true) 163 | right (get split false)] 164 | (bt/make-tree splitter 165 | (:parameters (decision-tree-fit model left)) 166 | (:parameters (decision-tree-fit model right))))))) 167 | (assoc model :parameters))))) 168 | 169 | (defn decision-tree-predict 170 | "Predicts the values of example data using a decision tree." 171 | [model x] 172 | (let [{tree :parameters} model] 173 | (when (not (nil? tree)) 174 | (letfn [(predict [t xi] 175 | (let [val (bt/get-value t)] 176 | (cond (bt/leaf? t) val 177 | (val xi) (predict (bt/get-left t) xi) 178 | :else (predict (bt/get-right t) xi))))] 179 | (map #(predict tree %) x))))) 180 | 181 | (defn print-decision-tree 182 | "Prints information about a given decision tree." 183 | [model] 184 | (println (dissoc model :parameters)) 185 | (when (contains? model :parameters) 186 | (bt/print-tree (:parameters model)))) 187 | 188 | (defn make-classification-tree 189 | "Returns a classification decision tree model using the given cost function." 190 | [cost min-split min-leaf max-features] 191 | {:cost cost :prediction c/mode :weighted classification-weighted-cost 192 | :min-split min-split 193 | :min-leaf min-leaf 194 | :max-features max-features}) 195 | 196 | (defn make-regression-tree 197 | "Returns a regression decision tree model using the given cost function." 198 | [cost min-split min-leaf max-features] 199 | {:cost cost :prediction c/mean :weighted regression-weighted-cost 200 | :min-split min-split 201 | :min-leaf min-leaf 202 | :max-features max-features}) 203 | -------------------------------------------------------------------------------- /src/lambda_ml/distance.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.distance 2 | "Functions that compute measures of distance between values." 3 | (:require [lambda-ml.core :as c])) 4 | 5 | (defn cosine 6 | "Returns the cosine distance between two points by subtracting the cosine 7 | similarity from 1. Assumes that both points are represented as sequences of 8 | the same dimension. Given a dimension d, returns the distance between two 9 | points as if the values for all other dimensions were set to zero." 10 | ([x y] 11 | (- 1 (/ (c/dot-product x y) 12 | (* (Math/sqrt (c/dot-product x x)) 13 | (Math/sqrt (c/dot-product y y)))))) 14 | ([x y d] 15 | (cosine (vector (nth x d)) (vector (nth y d))))) 16 | 17 | (defn euclidean 18 | "Returns the Euclidean distance (squared) between two points. Assumes that 19 | both points are represented as sequences of the same dimension. Given a 20 | dimension d, returns the distance between two points as if the values for all 21 | other dimensions were set to zero." 22 | ([x y] 23 | (->> (map - x y) 24 | (map #(* % %)) 25 | (reduce +))) 26 | ([x y d] 27 | (euclidean (vector (nth x d)) (vector (nth y d))))) 28 | 29 | (defn haversine 30 | "Returns the great-circle distance between two points represented as 31 | geographic coordinates. Given a dimension d, returns the distance between the 32 | two points as if the value for the other dimension was set to zero." 33 | ([[lat1 lng1] [lat2 lng2]] 34 | (let [r 3959.9 ; miles; km = 6372.8 35 | dlat (Math/toRadians (- lat2 lat1)) 36 | dlng (Math/toRadians (- lng2 lng1)) 37 | lat1 (Math/toRadians lat1) 38 | lat2 (Math/toRadians lat2) 39 | a (+ (* (Math/sin (/ dlat 2)) (Math/sin (/ dlat 2))) 40 | (* (Math/sin (/ dlng 2)) (Math/sin (/ dlng 2)) (Math/cos lat1) (Math/cos lat2)))] 41 | (* r 2 (Math/asin (Math/sqrt a))))) 42 | ([x y d] 43 | (let [other (mod (inc d) 2)] 44 | (haversine (assoc x other 0) (assoc y other 0))))) 45 | 46 | (defn jaccard 47 | "Returns the Jaccard distance between two points by subtracting the Jaccard 48 | similarity coefficient from 1. Assumes that both points are represented as 49 | sequences of the same dimension. Given a dimension d, returns the distance 50 | between two points as if the values for all other dimensions were set to zero." 51 | ([x y] 52 | (- 1 (/ (reduce + (map min x y)) 53 | (reduce + (map max x y))))) 54 | ([x y d] 55 | (jaccard (vector (nth x d)) (vector (nth y d))))) 56 | -------------------------------------------------------------------------------- /src/lambda_ml/ensemble.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.ensemble 2 | "Ensemble learning methods. 3 | 4 | Example usage: 5 | ``` 6 | (def data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]]) 7 | (def tree 8 | (let [min-split 2 9 | min-leaf 1 10 | max-features 2] 11 | (make-classification-tree gini-impurity min-split min-leaf max-features))) 12 | (def fit 13 | (let [rate 1.0] 14 | (-> (iterate #(add-bagging-estimator % tree decision-tree-fit decision-tree-predict) 15 | (make-bagging-classifier rate)) 16 | (nth 1001) 17 | (bagging-ensemble-fit data)))) 18 | (bagging-ensemble-predict fit (map butlast data)) 19 | ;;=> (0 1 1 0) 20 | ```" 21 | (:require [lambda-ml.core :refer :all])) 22 | 23 | (defn bagging-ensemble-fit 24 | "Fits an ensemble of estimators using bootstrap samples of the training data 25 | for each base estimators." 26 | ([ensemble data] 27 | (let [n (* (:rate ensemble) (count data))] 28 | (->> (:estimators ensemble) 29 | (map (fn [[m f p]] (f m (sample-with-replacement data n)))) 30 | (assoc ensemble :fits)))) 31 | ([ensemble x y] 32 | (bagging-ensemble-fit ensemble (map concat x (map list y))))) 33 | 34 | (defn bagging-ensemble-predict 35 | "Predicts the values of example data using a bagging ensemble." 36 | [ensemble x] 37 | (->> (:fits ensemble) 38 | (map #(%1 %2 x) (map last (:estimators ensemble))) 39 | (apply map vector) 40 | (map (:aggregation ensemble)))) 41 | 42 | (defn add-bagging-estimator 43 | "Adds a base estimator to an ensemble, where each estimator is defined by fit 44 | and predict functions used for training on then predicting from the provided 45 | model, respectively." 46 | [ensemble model fit predict] 47 | (->> [model fit predict] 48 | (conj (get ensemble :estimators [])) 49 | (assoc ensemble :estimators))) 50 | 51 | (defn make-bagging-classifier 52 | "Returns a classifier based on an ensemble of classifiers to be fit to random 53 | samples of training data, where rate is the percent of data used to create 54 | each bootstrap sample. Predictions are aggregated across classifiers by taking 55 | the mode of predicted values." 56 | [rate] 57 | {:rate rate 58 | :aggregation mode}) 59 | 60 | (defn make-bagging-regressor 61 | "Returns a regressor based on an ensemble of regressors to be fit to random 62 | samples of training data, where rate is the percent of data used to create 63 | each bootstrap sample. Predictions are aggregated across regressors by taking 64 | the mean of predicted values." 65 | [rate] 66 | {:rate rate 67 | :aggregation mean}) 68 | -------------------------------------------------------------------------------- /src/lambda_ml/examples/kaggle/march_madness.clj: -------------------------------------------------------------------------------- 1 | ;; gorilla-repl.fileformat = 1 2 | 3 | ;; ** 4 | ;;; # Lambda ML Example: Kaggle March Machine Learning Mania 2016 5 | ;;; 6 | ;;; An example of using a couple different classification techniques on the data from the [March Machine Learning Mania 2016](https://www.kaggle.com/c/march-machine-learning-mania-2016) competition from Kaggle. 7 | ;;; 8 | ;;; First, lets set up our namespace. 9 | ;; ** 10 | 11 | ;; @@ 12 | (ns lambda-ml.examples.kaggle.march-madness 13 | (require [lambda-ml.core :refer :all] 14 | [lambda-ml.metrics :refer :all] 15 | [lambda-ml.regression :refer :all] 16 | [clojure.data.csv :as csv] 17 | [gorilla-plot.core :as plot])) 18 | ;; @@ 19 | ;; => 20 | ;;; {"type":"html","content":"nil","value":"nil"} 21 | ;; <= 22 | 23 | ;; @@ 24 | (def teams 25 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/march-machine-learning-mania-2016-v2/Teams.csv")] 26 | (doall 27 | (->> (rest (csv/read-csv in)) 28 | (reduce (fn [m [id name]] (assoc m (read-string id) name)) {}))))) 29 | 30 | (def slots 31 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/march-machine-learning-mania-2016-v2/TourneySlots.csv")] 32 | (doall 33 | (->> (rest (csv/read-csv in)) 34 | (reduce (fn [m [season slot hi lo]] 35 | (let [keys [(read-string season) slot] 36 | val [hi lo]] 37 | (assoc-in m keys val))) 38 | {}))))) 39 | 40 | (def seeds 41 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/march-machine-learning-mania-2016-v2/TourneySeeds.csv")] 42 | (doall 43 | (->> (rest (csv/read-csv in)) 44 | (reduce (fn [m [season seed team]] 45 | (assoc-in m [(read-string season) seed] (read-string team))) 46 | {}))))) 47 | 48 | (defn parse-seed 49 | [s] 50 | (Integer/parseInt (subs s 1 3))) 51 | 52 | (def seeds-index 53 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/march-machine-learning-mania-2016-v2/TourneySeeds.csv")] 54 | (doall 55 | (->> (rest (csv/read-csv in)) 56 | (reduce (fn [m [season seed team]] 57 | (assoc-in m (map read-string [season team]) (parse-seed seed))) 58 | {}))))) 59 | 60 | (def tourney-results 61 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/march-machine-learning-mania-2016-v2/TourneyCompactResults.csv")] 62 | (doall 63 | (->> (rest (csv/read-csv in)) 64 | (reduce (fn [m row] 65 | (let [[season daynum wteam wscore lteam lscore wloc numot] (map read-string row)] 66 | (-> m 67 | (assoc-in [season wteam lteam] {:score wscore :oppscore lscore}) 68 | (assoc-in [season lteam wteam] {:score lscore :oppscore wscore})))) 69 | {}))))) 70 | ;; @@ 71 | ;; => 72 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.march-madness/tourney-results","value":"#'lambda-ml.examples.kaggle.march-madness/tourney-results"} 73 | ;; <= 74 | 75 | ;; @@ 76 | (def stats 77 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/march-machine-learning-mania-2016-v2/RegularSeasonCompactResults.csv")] 78 | (doall 79 | (->> (rest (csv/read-csv in)) 80 | (reduce (fn [m row] 81 | (let [[season daynum wteam wscore lteam lscore wloc numot] (map read-string row)] 82 | (-> m 83 | (update-in [season wteam] (fn [s] 84 | (-> (or s {:g 0 :w 0 :l 0 :pf 0 :pa 0 :streak 0}) 85 | (update-in [:w] inc) 86 | (update-in [:streak] (fn [x] (if (> x 0) (inc x) 1))) 87 | (update-in [:pf] (partial + wscore)) 88 | (update-in [:pa] (partial + lscore))))) 89 | (update-in [season lteam] (fn [s] 90 | (-> (or s {:g 0 :w 0 :l 0 :pf 0 :pa 0 :streak 0}) 91 | (update-in [:l] inc) 92 | (update-in [:streak] (fn [x] (if (< x 0) (dec x) -1))) 93 | (update-in [:pf] (partial + lscore)) 94 | (update-in [:pa] (partial + wscore)))))))) 95 | {}))))) 96 | 97 | (defn win-percentage 98 | [year team] 99 | (let [w (get-in stats [year team :w]) 100 | l (get-in stats [year team :l])] 101 | (float (/ w (+ w l))))) 102 | 103 | (defn points-ratio 104 | [year team] 105 | (let [pf (get-in stats [year team :pf]) 106 | pa (get-in stats [year team :pa])] 107 | (float (/ pf pa)))) 108 | 109 | (defn streak 110 | [year team] 111 | (get-in stats [year team :streak])) 112 | ;; @@ 113 | ;; => 114 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.march-madness/streak","value":"#'lambda-ml.examples.kaggle.march-madness/streak"} 115 | ;; <= 116 | 117 | ;; @@ 118 | (defn generate-index 119 | ([year] 120 | (generate-index year (seeds year) {})) 121 | ([year index prev] 122 | (if (<= (count index) (count prev)) 123 | index 124 | (let [updated (loop [slots (slots year) 125 | result index] 126 | (if (empty? slots) 127 | result 128 | (let [[slot [hi lo]] (first slots) 129 | hiteam (get-in index [hi]) 130 | loteam (get-in index [lo]) 131 | scores (get-in tourney-results [year hiteam loteam])] 132 | (if (and hiteam loteam scores) 133 | (let [winner (if (> (:score scores) (:oppscore scores)) hiteam loteam)] 134 | (recur (rest slots) (assoc-in result [slot] winner))) 135 | (recur (rest slots) result)))))] 136 | (generate-index year updated index))))) 137 | 138 | (defn parse-round 139 | [slot] 140 | (if (= "R" (subs slot 0 1)) 141 | (Integer/parseInt (subs slot 1 2)) 142 | 0)) 143 | 144 | (defn generate-matchups 145 | [year] 146 | (let [index (generate-index year)] 147 | (for [[slot t] (slots year) 148 | :let [[hi lo] (map index t)] 149 | :when (and hi lo)] 150 | (let [round (parse-round slot) 151 | hiseed (get-in seeds-index [year hi]) 152 | loseed (get-in seeds-index [year lo]) 153 | scores (get-in tourney-results [year hi lo]) 154 | ; TODO: features go here 155 | matchup [year round hi lo 156 | (teams hi) 157 | (teams lo) 158 | (win-percentage year hi) 159 | (win-percentage year lo) 160 | (points-ratio year hi) 161 | (points-ratio year lo) 162 | (streak year hi) 163 | (streak year lo) 164 | hiseed 165 | loseed]] 166 | (if scores 167 | (let [winner (if (> (:score scores) (:oppscore scores)) 0 1)] 168 | (conj matchup winner)) 169 | matchup))))) 170 | ;; @@ 171 | ;; => 172 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.march-madness/generate-matchups","value":"#'lambda-ml.examples.kaggle.march-madness/generate-matchups"} 173 | ;; <= 174 | 175 | ;; @@ 176 | (def training-set 177 | (apply concat 178 | (for [year (range 1985 2012)] 179 | (generate-matchups year)))) 180 | 181 | (def test-set 182 | (apply concat 183 | (for [year (range 2012 2016)] 184 | (generate-matchups year)))) 185 | ;; @@ 186 | ;; => 187 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.march-madness/test-set","value":"#'lambda-ml.examples.kaggle.march-madness/test-set"} 188 | ;; <= 189 | 190 | ;; ** 191 | ;;; Baseline model that always picks the high seed. 192 | ;; ** 193 | 194 | ;; @@ 195 | (def baseline-predictions 196 | (map (fn [matchup] 197 | (let [hiseed (last (drop-last 2 matchup)) 198 | loseed (last (drop-last matchup))] 199 | (if (<= hiseed loseed) 0 1))) 200 | test-set)) 201 | 202 | (def baseline-accuracy 203 | (float (/ (count (filter identity (map = (map last test-set) baseline-predictions))) 204 | (count test-set)))) 205 | 206 | ;(def baseline-roc (roc-curve (map last test-set) baseline-predictions)) 207 | 208 | (println "baseline accuracy =" baseline-accuracy) 209 | ;; @@ 210 | ;; -> 211 | ;;; baseline accuracy = 0.6865672 212 | ;;; 213 | ;; <- 214 | ;; => 215 | ;;; {"type":"html","content":"nil","value":"nil"} 216 | ;; <= 217 | 218 | ;; @@ 219 | (defn encode-features 220 | [matchup] 221 | (let [[year round hi lo 222 | hiteam loteam 223 | hiwinpct lowinpct 224 | hiptsratio loptsratio 225 | histreak lostreak 226 | hiseed loseed winner] matchup] 227 | [hiwinpct lowinpct 228 | hiptsratio loptsratio 229 | histreak lostreak 230 | ;hiseed loseed 231 | winner])) 232 | 233 | (def alpha 0.03) 234 | (def iters 2000) 235 | (def threshold 0.5) 236 | 237 | (doseq [lambda [0 0.1 1.0]] 238 | (let [model (-> (make-logistic-regression alpha lambda iters) 239 | (regression-fit (map encode-features training-set))) 240 | predictions (regression-predict model (map (comp butlast encode-features) test-set)) 241 | accuracy (float (/ (->> (map #(if (<= % threshold) 0 1) predictions) 242 | (map = (map last test-set)) 243 | (filter identity) 244 | (count)) 245 | (count test-set)))] 246 | (println (l2-norm (:parameters model))) 247 | (println "lambda =" lambda "accuracy =" accuracy))) 248 | ;; @@ 249 | ;; -> 250 | ;;; 1.0195345016026474 251 | ;;; lambda = 0 accuracy = 0.6865672 252 | ;;; 1.0140108570708222 253 | ;;; lambda = 0.1 accuracy = 0.6865672 254 | ;;; 1.1385485393391344 255 | ;;; lambda = 1.0 accuracy = 0.6865672 256 | ;;; 257 | ;; <- 258 | ;; => 259 | ;;; {"type":"html","content":"nil","value":"nil"} 260 | ;; <= 261 | 262 | ;; @@ 263 | 264 | ;; @@ 265 | -------------------------------------------------------------------------------- /src/lambda_ml/examples/kaggle/titanic.clj: -------------------------------------------------------------------------------- 1 | ;; gorilla-repl.fileformat = 1 2 | 3 | ;; ** 4 | ;;; # Lambda ML Example: Kaggle Titanic with Logistic Regression 5 | ;;; 6 | ;;; An example of applying logistic regression to the data from the [Titanic: Machine Learning from Disaster](https://www.kaggle.com/c/titanic) competition from Kaggle. 7 | ;;; 8 | ;;; First, lets set up our namespace and define a helper function for sanitizing data. 9 | ;; ** 10 | 11 | ;; @@ 12 | (ns lambda-ml.examples.kaggle.titanic 13 | (require [lambda-ml.core :refer :all] 14 | [lambda-ml.metrics :refer :all] 15 | [lambda-ml.regression :refer :all] 16 | [clojure.data.csv :as csv] 17 | [gorilla-plot.core :as plot])) 18 | 19 | (defn sanitize 20 | [s] 21 | (cond (number? s) s 22 | (clojure.string/blank? s) nil 23 | :else (read-string s))) 24 | ;; @@ 25 | ;; => 26 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.titanic/sanitize","value":"#'lambda-ml.examples.kaggle.titanic/sanitize"} 27 | ;; <= 28 | 29 | ;; ** 30 | ;;; Load the training data. Note that we're being arbitrarily selective about columns since we'll be using only a subset of the features. 31 | ;;; 32 | ;;; The categorical features also need to be converted into indicator variables. For example, the passenger class feature ("pclass") becomes three features ("pclass1", "pclass2", and "pclass3"), where the value for "pclass1" is 1 if the passenger was in 1st class and 0 otherwise, and so on. 33 | ;; ** 34 | 35 | ;; @@ 36 | (def train 37 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/titanic/train.csv")] 38 | (doall 39 | (->> (rest (csv/read-csv in)) 40 | (map (fn [[id survival pclass name sex age sibsp parch ticket fare cabin embarked]] 41 | (let [pclass1 (if (= pclass "1") 1 0) 42 | pclass2 (if (= pclass "2") 1 0) 43 | pclass3 (if (= pclass "3") 1 0) 44 | male (if (= sex "male") 1 0)] 45 | (map sanitize [pclass1 pclass2 pclass3 male sibsp parch survival])))))))) 46 | ;; @@ 47 | ;; => 48 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.titanic/train","value":"#'lambda-ml.examples.kaggle.titanic/train"} 49 | ;; <= 50 | 51 | ;; ** 52 | ;;; Visualize the categorical features in the training data. 53 | ;; ** 54 | 55 | ;; @@ 56 | (for [i [0 1 2 3]] 57 | (let [data (frequencies (map #(nth % i) train))] 58 | (plot/bar-chart (keys data) (vals data)))) 59 | ;; @@ 60 | ;; => 61 | ;;; {"type":"list-like","open":"(","close":")","separator":" ","items":[{"type":"vega","content":{"width":400,"height":247.2187957763672,"padding":{"top":10,"left":55,"bottom":40,"right":10},"data":[{"name":"c78229a5-ee84-4434-89ba-eeb6ed9177e6","values":[{"x":0,"y":675},{"x":1,"y":216}]}],"marks":[{"type":"rect","from":{"data":"c78229a5-ee84-4434-89ba-eeb6ed9177e6"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"width":{"scale":"x","band":true,"offset":-1},"y":{"scale":"y","field":"data.y"},"y2":{"scale":"y","value":0}},"update":{"fill":{"value":"steelblue"},"opacity":{"value":1}},"hover":{"fill":{"value":"#FF29D2"}}}}],"scales":[{"name":"x","type":"ordinal","range":"width","domain":{"data":"c78229a5-ee84-4434-89ba-eeb6ed9177e6","field":"data.x"}},{"name":"y","range":"height","nice":true,"domain":{"data":"c78229a5-ee84-4434-89ba-eeb6ed9177e6","field":"data.y"}}],"axes":[{"type":"x","scale":"x"},{"type":"y","scale":"y"}]},"value":"#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\", :values ({:x 0, :y 675} {:x 1, :y 216})}], :marks [{:type \"rect\", :from {:data \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}}"},{"type":"vega","content":{"width":400,"height":247.2187957763672,"padding":{"top":10,"left":55,"bottom":40,"right":10},"data":[{"name":"26b2827c-5029-4b91-b2e2-51366411c66e","values":[{"x":0,"y":707},{"x":1,"y":184}]}],"marks":[{"type":"rect","from":{"data":"26b2827c-5029-4b91-b2e2-51366411c66e"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"width":{"scale":"x","band":true,"offset":-1},"y":{"scale":"y","field":"data.y"},"y2":{"scale":"y","value":0}},"update":{"fill":{"value":"steelblue"},"opacity":{"value":1}},"hover":{"fill":{"value":"#FF29D2"}}}}],"scales":[{"name":"x","type":"ordinal","range":"width","domain":{"data":"26b2827c-5029-4b91-b2e2-51366411c66e","field":"data.x"}},{"name":"y","range":"height","nice":true,"domain":{"data":"26b2827c-5029-4b91-b2e2-51366411c66e","field":"data.y"}}],"axes":[{"type":"x","scale":"x"},{"type":"y","scale":"y"}]},"value":"#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"26b2827c-5029-4b91-b2e2-51366411c66e\", :values ({:x 0, :y 707} {:x 1, :y 184})}], :marks [{:type \"rect\", :from {:data \"26b2827c-5029-4b91-b2e2-51366411c66e\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"26b2827c-5029-4b91-b2e2-51366411c66e\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"26b2827c-5029-4b91-b2e2-51366411c66e\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}}"},{"type":"vega","content":{"width":400,"height":247.2187957763672,"padding":{"top":10,"left":55,"bottom":40,"right":10},"data":[{"name":"4f5e40bc-dc70-4bb8-8700-7c71212858b0","values":[{"x":1,"y":491},{"x":0,"y":400}]}],"marks":[{"type":"rect","from":{"data":"4f5e40bc-dc70-4bb8-8700-7c71212858b0"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"width":{"scale":"x","band":true,"offset":-1},"y":{"scale":"y","field":"data.y"},"y2":{"scale":"y","value":0}},"update":{"fill":{"value":"steelblue"},"opacity":{"value":1}},"hover":{"fill":{"value":"#FF29D2"}}}}],"scales":[{"name":"x","type":"ordinal","range":"width","domain":{"data":"4f5e40bc-dc70-4bb8-8700-7c71212858b0","field":"data.x"}},{"name":"y","range":"height","nice":true,"domain":{"data":"4f5e40bc-dc70-4bb8-8700-7c71212858b0","field":"data.y"}}],"axes":[{"type":"x","scale":"x"},{"type":"y","scale":"y"}]},"value":"#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\", :values ({:x 1, :y 491} {:x 0, :y 400})}], :marks [{:type \"rect\", :from {:data \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}}"},{"type":"vega","content":{"width":400,"height":247.2187957763672,"padding":{"top":10,"left":55,"bottom":40,"right":10},"data":[{"name":"34492d64-58ea-4f4f-826c-2196dfad4bea","values":[{"x":1,"y":577},{"x":0,"y":314}]}],"marks":[{"type":"rect","from":{"data":"34492d64-58ea-4f4f-826c-2196dfad4bea"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"width":{"scale":"x","band":true,"offset":-1},"y":{"scale":"y","field":"data.y"},"y2":{"scale":"y","value":0}},"update":{"fill":{"value":"steelblue"},"opacity":{"value":1}},"hover":{"fill":{"value":"#FF29D2"}}}}],"scales":[{"name":"x","type":"ordinal","range":"width","domain":{"data":"34492d64-58ea-4f4f-826c-2196dfad4bea","field":"data.x"}},{"name":"y","range":"height","nice":true,"domain":{"data":"34492d64-58ea-4f4f-826c-2196dfad4bea","field":"data.y"}}],"axes":[{"type":"x","scale":"x"},{"type":"y","scale":"y"}]},"value":"#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"34492d64-58ea-4f4f-826c-2196dfad4bea\", :values ({:x 1, :y 577} {:x 0, :y 314})}], :marks [{:type \"rect\", :from {:data \"34492d64-58ea-4f4f-826c-2196dfad4bea\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"34492d64-58ea-4f4f-826c-2196dfad4bea\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"34492d64-58ea-4f4f-826c-2196dfad4bea\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}}"}],"value":"(#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\", :values ({:x 0, :y 675} {:x 1, :y 216})}], :marks [{:type \"rect\", :from {:data \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"c78229a5-ee84-4434-89ba-eeb6ed9177e6\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}} #gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"26b2827c-5029-4b91-b2e2-51366411c66e\", :values ({:x 0, :y 707} {:x 1, :y 184})}], :marks [{:type \"rect\", :from {:data \"26b2827c-5029-4b91-b2e2-51366411c66e\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"26b2827c-5029-4b91-b2e2-51366411c66e\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"26b2827c-5029-4b91-b2e2-51366411c66e\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}} #gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\", :values ({:x 1, :y 491} {:x 0, :y 400})}], :marks [{:type \"rect\", :from {:data \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"4f5e40bc-dc70-4bb8-8700-7c71212858b0\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}} #gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"34492d64-58ea-4f4f-826c-2196dfad4bea\", :values ({:x 1, :y 577} {:x 0, :y 314})}], :marks [{:type \"rect\", :from {:data \"34492d64-58ea-4f4f-826c-2196dfad4bea\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :width {:scale \"x\", :band true, :offset -1}, :y {:scale \"y\", :field \"data.y\"}, :y2 {:scale \"y\", :value 0}}, :update {:fill {:value \"steelblue\"}, :opacity {:value 1}}, :hover {:fill {:value \"#FF29D2\"}}}}], :scales [{:name \"x\", :type \"ordinal\", :range \"width\", :domain {:data \"34492d64-58ea-4f4f-826c-2196dfad4bea\", :field \"data.x\"}} {:name \"y\", :range \"height\", :nice true, :domain {:data \"34492d64-58ea-4f4f-826c-2196dfad4bea\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}})"} 62 | ;; <= 63 | 64 | ;; ** 65 | ;;; Visualize the numerical features in the training data. 66 | ;; ** 67 | 68 | ;; @@ 69 | (for [i [4 5]] 70 | (plot/histogram (map #(nth % i) train))) 71 | ;; @@ 72 | ;; => 73 | ;;; {"type":"list-like","open":"(","close":")","separator":" ","items":[{"type":"vega","content":{"width":400,"height":247.2187957763672,"padding":{"top":10,"left":55,"bottom":40,"right":10},"data":[{"name":"22467d50-0f32-44e2-afe5-e710e41f63e0","values":[{"x":0.0,"y":0},{"x":0.7272727272727274,"y":608.0},{"x":1.4545454545454548,"y":209.0},{"x":2.181818181818182,"y":28.0},{"x":2.9090909090909096,"y":0.0},{"x":3.636363636363637,"y":16.0},{"x":4.363636363636364,"y":18.0},{"x":5.090909090909092,"y":5.0},{"x":5.818181818181819,"y":0.0},{"x":6.545454545454547,"y":0.0},{"x":7.272727272727274,"y":0.0},{"x":8.000000000000002,"y":7.0},{"x":8.727272727272728,"y":0}]}],"marks":[{"type":"line","from":{"data":"22467d50-0f32-44e2-afe5-e710e41f63e0"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"y":{"scale":"y","field":"data.y"},"interpolate":{"value":"step-before"},"fill":{"value":"steelblue"},"fillOpacity":{"value":0.4},"stroke":{"value":"steelblue"},"strokeWidth":{"value":2},"strokeOpacity":{"value":1}}}}],"scales":[{"name":"x","type":"linear","range":"width","zero":false,"domain":{"data":"22467d50-0f32-44e2-afe5-e710e41f63e0","field":"data.x"}},{"name":"y","type":"linear","range":"height","nice":true,"zero":false,"domain":{"data":"22467d50-0f32-44e2-afe5-e710e41f63e0","field":"data.y"}}],"axes":[{"type":"x","scale":"x"},{"type":"y","scale":"y"}]},"value":"#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"22467d50-0f32-44e2-afe5-e710e41f63e0\", :values ({:x 0.0, :y 0} {:x 0.7272727272727274, :y 608.0} {:x 1.4545454545454548, :y 209.0} {:x 2.181818181818182, :y 28.0} {:x 2.9090909090909096, :y 0.0} {:x 3.636363636363637, :y 16.0} {:x 4.363636363636364, :y 18.0} {:x 5.090909090909092, :y 5.0} {:x 5.818181818181819, :y 0.0} {:x 6.545454545454547, :y 0.0} {:x 7.272727272727274, :y 0.0} {:x 8.000000000000002, :y 7.0} {:x 8.727272727272728, :y 0})}], :marks [{:type \"line\", :from {:data \"22467d50-0f32-44e2-afe5-e710e41f63e0\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :interpolate {:value \"step-before\"}, :fill {:value \"steelblue\"}, :fillOpacity {:value 0.4}, :stroke {:value \"steelblue\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}}], :scales [{:name \"x\", :type \"linear\", :range \"width\", :zero false, :domain {:data \"22467d50-0f32-44e2-afe5-e710e41f63e0\", :field \"data.x\"}} {:name \"y\", :type \"linear\", :range \"height\", :nice true, :zero false, :domain {:data \"22467d50-0f32-44e2-afe5-e710e41f63e0\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}}"},{"type":"vega","content":{"width":400,"height":247.2187957763672,"padding":{"top":10,"left":55,"bottom":40,"right":10},"data":[{"name":"9b0a6172-a144-4771-b161-ba32e93f6ae0","values":[{"x":0.0,"y":0},{"x":0.5454545454545455,"y":678.0},{"x":1.090909090909091,"y":118.0},{"x":1.6363636363636367,"y":0.0},{"x":2.181818181818182,"y":80.0},{"x":2.7272727272727275,"y":0.0},{"x":3.272727272727273,"y":5.0},{"x":3.8181818181818183,"y":0.0},{"x":4.363636363636364,"y":4.0},{"x":4.90909090909091,"y":0.0},{"x":5.454545454545456,"y":5.0},{"x":6.000000000000002,"y":1.0},{"x":6.545454545454548,"y":0}]}],"marks":[{"type":"line","from":{"data":"9b0a6172-a144-4771-b161-ba32e93f6ae0"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"y":{"scale":"y","field":"data.y"},"interpolate":{"value":"step-before"},"fill":{"value":"steelblue"},"fillOpacity":{"value":0.4},"stroke":{"value":"steelblue"},"strokeWidth":{"value":2},"strokeOpacity":{"value":1}}}}],"scales":[{"name":"x","type":"linear","range":"width","zero":false,"domain":{"data":"9b0a6172-a144-4771-b161-ba32e93f6ae0","field":"data.x"}},{"name":"y","type":"linear","range":"height","nice":true,"zero":false,"domain":{"data":"9b0a6172-a144-4771-b161-ba32e93f6ae0","field":"data.y"}}],"axes":[{"type":"x","scale":"x"},{"type":"y","scale":"y"}]},"value":"#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"9b0a6172-a144-4771-b161-ba32e93f6ae0\", :values ({:x 0.0, :y 0} {:x 0.5454545454545455, :y 678.0} {:x 1.090909090909091, :y 118.0} {:x 1.6363636363636367, :y 0.0} {:x 2.181818181818182, :y 80.0} {:x 2.7272727272727275, :y 0.0} {:x 3.272727272727273, :y 5.0} {:x 3.8181818181818183, :y 0.0} {:x 4.363636363636364, :y 4.0} {:x 4.90909090909091, :y 0.0} {:x 5.454545454545456, :y 5.0} {:x 6.000000000000002, :y 1.0} {:x 6.545454545454548, :y 0})}], :marks [{:type \"line\", :from {:data \"9b0a6172-a144-4771-b161-ba32e93f6ae0\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :interpolate {:value \"step-before\"}, :fill {:value \"steelblue\"}, :fillOpacity {:value 0.4}, :stroke {:value \"steelblue\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}}], :scales [{:name \"x\", :type \"linear\", :range \"width\", :zero false, :domain {:data \"9b0a6172-a144-4771-b161-ba32e93f6ae0\", :field \"data.x\"}} {:name \"y\", :type \"linear\", :range \"height\", :nice true, :zero false, :domain {:data \"9b0a6172-a144-4771-b161-ba32e93f6ae0\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}}"}],"value":"(#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"22467d50-0f32-44e2-afe5-e710e41f63e0\", :values ({:x 0.0, :y 0} {:x 0.7272727272727274, :y 608.0} {:x 1.4545454545454548, :y 209.0} {:x 2.181818181818182, :y 28.0} {:x 2.9090909090909096, :y 0.0} {:x 3.636363636363637, :y 16.0} {:x 4.363636363636364, :y 18.0} {:x 5.090909090909092, :y 5.0} {:x 5.818181818181819, :y 0.0} {:x 6.545454545454547, :y 0.0} {:x 7.272727272727274, :y 0.0} {:x 8.000000000000002, :y 7.0} {:x 8.727272727272728, :y 0})}], :marks [{:type \"line\", :from {:data \"22467d50-0f32-44e2-afe5-e710e41f63e0\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :interpolate {:value \"step-before\"}, :fill {:value \"steelblue\"}, :fillOpacity {:value 0.4}, :stroke {:value \"steelblue\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}}], :scales [{:name \"x\", :type \"linear\", :range \"width\", :zero false, :domain {:data \"22467d50-0f32-44e2-afe5-e710e41f63e0\", :field \"data.x\"}} {:name \"y\", :type \"linear\", :range \"height\", :nice true, :zero false, :domain {:data \"22467d50-0f32-44e2-afe5-e710e41f63e0\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}} #gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :data [{:name \"9b0a6172-a144-4771-b161-ba32e93f6ae0\", :values ({:x 0.0, :y 0} {:x 0.5454545454545455, :y 678.0} {:x 1.090909090909091, :y 118.0} {:x 1.6363636363636367, :y 0.0} {:x 2.181818181818182, :y 80.0} {:x 2.7272727272727275, :y 0.0} {:x 3.272727272727273, :y 5.0} {:x 3.8181818181818183, :y 0.0} {:x 4.363636363636364, :y 4.0} {:x 4.90909090909091, :y 0.0} {:x 5.454545454545456, :y 5.0} {:x 6.000000000000002, :y 1.0} {:x 6.545454545454548, :y 0})}], :marks [{:type \"line\", :from {:data \"9b0a6172-a144-4771-b161-ba32e93f6ae0\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :interpolate {:value \"step-before\"}, :fill {:value \"steelblue\"}, :fillOpacity {:value 0.4}, :stroke {:value \"steelblue\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}}], :scales [{:name \"x\", :type \"linear\", :range \"width\", :zero false, :domain {:data \"9b0a6172-a144-4771-b161-ba32e93f6ae0\", :field \"data.x\"}} {:name \"y\", :type \"linear\", :range \"height\", :nice true, :zero false, :domain {:data \"9b0a6172-a144-4771-b161-ba32e93f6ae0\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}]}})"} 74 | ;; <= 75 | 76 | ;; ** 77 | ;;; Load the test data, which is structured slightly differently since we'll be making predictions on this data 78 | ;; ** 79 | 80 | ;; @@ 81 | (def test 82 | (with-open [in (clojure.java.io/reader "resources/examples/kaggle/titanic/test.csv")] 83 | (doall 84 | (->> (rest (csv/read-csv in)) 85 | (map (fn [[id pclass name sex age sibsp parch ticket fare cabin embarked]] 86 | (let [pclass1 (if (= pclass "1") 1 0) 87 | pclass2 (if (= pclass "2") 1 0) 88 | pclass3 (if (= pclass "3") 1 0) 89 | male (if (= sex "male") 1 0)] 90 | (cons id (map sanitize [pclass1 pclass2 pclass3 male sibsp parch]))))))))) 91 | ;; @@ 92 | ;; => 93 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.titanic/test","value":"#'lambda-ml.examples.kaggle.titanic/test"} 94 | ;; <= 95 | 96 | ;; ** 97 | ;;; Define parameters and use k-fold cross validation to train logistic regression models. 98 | ;; ** 99 | 100 | ;; @@ 101 | (def alpha 0.01) 102 | (def lambda 0.1) 103 | (def iters 200) 104 | 105 | (def k 4) 106 | (def colors ["red" "green" "blue" "black"]) 107 | (def partitions (random-partition k train)) 108 | 109 | (let [rocs (for [fold (range k)] 110 | (let [train-sample (->> partitions 111 | (keep-indexed #(if (not (= fold %1)) %2)) 112 | (apply concat)) 113 | test-sample (nth partitions fold) 114 | model (regression-fit (make-logistic-regression alpha lambda iters) train-sample) 115 | predictions (regression-predict model (map butlast test-sample))] 116 | (roc-curve (map last test-sample) predictions)))] 117 | (println (map (comp float auc) rocs)) 118 | (->> (map (fn [roc color] (plot/list-plot roc :joined true :color color)) rocs colors) 119 | (apply plot/compose))) 120 | ;; @@ 121 | ;; -> 122 | ;;; (0.7198606 0.77715176 0.57431024 0.7750871) 123 | ;;; 124 | ;; <- 125 | ;; => 126 | ;;; {"type":"vega","content":{"width":400,"height":247.2187957763672,"padding":{"top":10,"left":55,"bottom":40,"right":10},"scales":[{"name":"x","type":"linear","range":"width","zero":false,"domain":{"data":"3727b48d-eeb7-4450-b28a-760c1c221378","field":"data.x"}},{"name":"y","type":"linear","range":"height","nice":true,"zero":false,"domain":{"data":"3727b48d-eeb7-4450-b28a-760c1c221378","field":"data.y"}}],"axes":[{"type":"x","scale":"x"},{"type":"y","scale":"y"}],"data":[{"name":"3727b48d-eeb7-4450-b28a-760c1c221378","values":[{"x":0.007142857142857143,"y":0},{"x":0.007142857142857143,"y":0.01219512195121951},{"x":0.01428571428571429,"y":0.01219512195121951},{"x":0.02142857142857143,"y":0.01219512195121951},{"x":0.02142857142857143,"y":0.02439024390243902},{"x":0.02142857142857143,"y":0.03658536585365854},{"x":0.02142857142857143,"y":0.04878048780487805},{"x":0.02142857142857143,"y":0.06097560975609756},{"x":0.02142857142857143,"y":0.07317073170731707},{"x":0.02857142857142857,"y":0.07317073170731707},{"x":0.02857142857142857,"y":0.08536585365853659},{"x":0.02857142857142857,"y":0.0975609756097561},{"x":0.02857142857142857,"y":0.1097560975609756},{"x":0.02857142857142857,"y":0.1219512195121951},{"x":0.02857142857142857,"y":0.1341463414634146},{"x":0.02857142857142857,"y":0.1463414634146341},{"x":0.02857142857142857,"y":0.1585365853658537},{"x":0.03571428571428571,"y":0.1585365853658537},{"x":0.04285714285714286,"y":0.1585365853658537},{"x":0.05,"y":0.1585365853658537},{"x":0.05714285714285714,"y":0.1585365853658537},{"x":0.0642857142857143,"y":0.1585365853658537},{"x":0.0642857142857143,"y":0.1707317073170732},{"x":0.0642857142857143,"y":0.1829268292682927},{"x":0.0642857142857143,"y":0.1951219512195122},{"x":0.0642857142857143,"y":0.2073170731707317},{"x":0.0642857142857143,"y":0.2195121951219512},{"x":0.07142857142857142,"y":0.2195121951219512},{"x":0.07857142857142857,"y":0.2195121951219512},{"x":0.07857142857142857,"y":0.2317073170731707},{"x":0.07857142857142857,"y":0.2439024390243902},{"x":0.07857142857142857,"y":0.2560975609756098},{"x":0.07857142857142857,"y":0.2682926829268293},{"x":0.07857142857142857,"y":0.2804878048780488},{"x":0.07857142857142857,"y":0.2926829268292683},{"x":0.07857142857142857,"y":0.3048780487804878},{"x":0.07857142857142857,"y":0.3170731707317073},{"x":0.07857142857142857,"y":0.3292682926829268},{"x":0.07857142857142857,"y":0.3414634146341463},{"x":0.07857142857142857,"y":0.3536585365853659},{"x":0.07857142857142857,"y":0.3658536585365854},{"x":0.07857142857142857,"y":0.3780487804878049},{"x":0.08571428571428572,"y":0.3780487804878049},{"x":0.08571428571428572,"y":0.3902439024390244},{"x":0.08571428571428572,"y":0.4024390243902439},{"x":0.08571428571428572,"y":0.4146341463414634},{"x":0.09285714285714286,"y":0.4146341463414634},{"x":0.1,"y":0.4146341463414634},{"x":0.1071428571428571,"y":0.4146341463414634},{"x":0.1142857142857143,"y":0.4146341463414634},{"x":0.1214285714285714,"y":0.4146341463414634},{"x":0.1214285714285714,"y":0.4268292682926829},{"x":0.1285714285714286,"y":0.4268292682926829},{"x":0.1285714285714286,"y":0.4390243902439024},{"x":0.1357142857142857,"y":0.4390243902439024},{"x":0.1428571428571429,"y":0.4390243902439024},{"x":0.15,"y":0.4390243902439024},{"x":0.15,"y":0.451219512195122},{"x":0.15,"y":0.4634146341463415},{"x":0.15,"y":0.475609756097561},{"x":0.15,"y":0.4878048780487805},{"x":0.15,"y":0.5},{"x":0.15,"y":0.5121951219512195},{"x":0.15,"y":0.524390243902439},{"x":0.15,"y":0.5365853658536585},{"x":0.1571428571428571,"y":0.5365853658536585},{"x":0.1642857142857143,"y":0.5365853658536585},{"x":0.1714285714285714,"y":0.5365853658536585},{"x":0.1785714285714286,"y":0.5365853658536585},{"x":0.1785714285714286,"y":0.548780487804878},{"x":0.1857142857142857,"y":0.548780487804878},{"x":0.1857142857142857,"y":0.5609756097560976},{"x":0.1928571428571429,"y":0.5609756097560976},{"x":0.2,"y":0.5609756097560976},{"x":0.2,"y":0.5731707317073171},{"x":0.2,"y":0.5853658536585366},{"x":0.2071428571428571,"y":0.5853658536585366},{"x":0.2071428571428571,"y":0.5975609756097561},{"x":0.2142857142857143,"y":0.5975609756097561},{"x":0.2214285714285714,"y":0.5975609756097561},{"x":0.2285714285714286,"y":0.5975609756097561},{"x":0.2357142857142857,"y":0.5975609756097561},{"x":0.2428571428571429,"y":0.5975609756097561},{"x":0.25,"y":0.5975609756097561},{"x":0.2571428571428571,"y":0.5975609756097561},{"x":0.2642857142857143,"y":0.5975609756097561},{"x":0.2642857142857143,"y":0.6097560975609756},{"x":0.2714285714285714,"y":0.6097560975609756},{"x":0.2785714285714286,"y":0.6097560975609756},{"x":0.2785714285714286,"y":0.6219512195121951},{"x":0.2857142857142857,"y":0.6219512195121951},{"x":0.2928571428571429,"y":0.6219512195121951},{"x":0.3,"y":0.6219512195121951},{"x":0.3071428571428571,"y":0.6219512195121951},{"x":0.3142857142857143,"y":0.6219512195121951},{"x":0.3214285714285714,"y":0.6219512195121951},{"x":0.3285714285714286,"y":0.6219512195121951},{"x":0.3285714285714286,"y":0.6341463414634146},{"x":0.3285714285714286,"y":0.646341463414634},{"x":0.3357142857142857,"y":0.646341463414634},{"x":0.3428571428571429,"y":0.646341463414634},{"x":0.3428571428571429,"y":0.6585365853658537},{"x":0.3428571428571429,"y":0.6707317073170732},{"x":0.35,"y":0.6707317073170732},{"x":0.3571428571428571,"y":0.6707317073170732},{"x":0.3642857142857143,"y":0.6707317073170732},{"x":0.3714285714285714,"y":0.6707317073170732},{"x":0.3785714285714286,"y":0.6707317073170732},{"x":0.3785714285714286,"y":0.6829268292682927},{"x":0.3857142857142857,"y":0.6829268292682927},{"x":0.3928571428571429,"y":0.6829268292682927},{"x":0.4,"y":0.6829268292682927},{"x":0.4071428571428571,"y":0.6829268292682927},{"x":0.4142857142857143,"y":0.6829268292682927},{"x":0.4214285714285714,"y":0.6829268292682927},{"x":0.4285714285714286,"y":0.6829268292682927},{"x":0.4285714285714286,"y":0.6951219512195121},{"x":0.4357142857142857,"y":0.6951219512195121},{"x":0.4428571428571429,"y":0.6951219512195121},{"x":0.4428571428571429,"y":0.7073170731707317},{"x":0.45,"y":0.7073170731707317},{"x":0.4571428571428571,"y":0.7073170731707317},{"x":0.4642857142857143,"y":0.7073170731707317},{"x":0.4642857142857143,"y":0.7195121951219512},{"x":0.4642857142857143,"y":0.7317073170731707},{"x":0.4642857142857143,"y":0.7439024390243902},{"x":0.4714285714285714,"y":0.7439024390243902},{"x":0.4714285714285714,"y":0.7560975609756098},{"x":0.4785714285714286,"y":0.7560975609756098},{"x":0.4785714285714286,"y":0.7682926829268293},{"x":0.4857142857142857,"y":0.7682926829268293},{"x":0.4857142857142857,"y":0.7804878048780488},{"x":0.4928571428571429,"y":0.7804878048780488},{"x":0.4928571428571429,"y":0.7926829268292683},{"x":0.5,"y":0.7926829268292683},{"x":0.5,"y":0.8048780487804879},{"x":0.5071428571428571,"y":0.8048780487804879},{"x":0.5142857142857143,"y":0.8048780487804879},{"x":0.5214285714285714,"y":0.8048780487804879},{"x":0.5285714285714286,"y":0.8048780487804879},{"x":0.5357142857142857,"y":0.8048780487804879},{"x":0.5357142857142857,"y":0.8170731707317073},{"x":0.5357142857142857,"y":0.8292682926829268},{"x":0.5428571428571429,"y":0.8292682926829268},{"x":0.5428571428571429,"y":0.8414634146341463},{"x":0.55,"y":0.8414634146341463},{"x":0.55,"y":0.853658536585366},{"x":0.557142857142857,"y":0.853658536585366},{"x":0.557142857142857,"y":0.8658536585365854},{"x":0.557142857142857,"y":0.8780487804878049},{"x":0.5642857142857143,"y":0.8780487804878049},{"x":0.5642857142857143,"y":0.8902439024390244},{"x":0.5714285714285714,"y":0.8902439024390244},{"x":0.5785714285714286,"y":0.8902439024390244},{"x":0.5857142857142857,"y":0.8902439024390244},{"x":0.5928571428571429,"y":0.8902439024390244},{"x":0.6,"y":0.8902439024390244},{"x":0.6071428571428571,"y":0.8902439024390244},{"x":0.6142857142857143,"y":0.8902439024390244},{"x":0.6214285714285714,"y":0.8902439024390244},{"x":0.6285714285714286,"y":0.8902439024390244},{"x":0.6357142857142857,"y":0.8902439024390244},{"x":0.6428571428571429,"y":0.8902439024390244},{"x":0.65,"y":0.8902439024390244},{"x":0.6571428571428571,"y":0.8902439024390244},{"x":0.6642857142857143,"y":0.8902439024390244},{"x":0.6714285714285714,"y":0.8902439024390244},{"x":0.6785714285714286,"y":0.8902439024390244},{"x":0.6857142857142857,"y":0.8902439024390244},{"x":0.692857142857143,"y":0.8902439024390244},{"x":0.7,"y":0.8902439024390244},{"x":0.7,"y":0.9024390243902439},{"x":0.7071428571428571,"y":0.9024390243902439},{"x":0.7142857142857143,"y":0.9024390243902439},{"x":0.7214285714285714,"y":0.9024390243902439},{"x":0.7285714285714286,"y":0.9024390243902439},{"x":0.7357142857142857,"y":0.9024390243902439},{"x":0.7428571428571429,"y":0.9024390243902439},{"x":0.7428571428571429,"y":0.9146341463414634},{"x":0.75,"y":0.9146341463414634},{"x":0.7571428571428571,"y":0.9146341463414634},{"x":0.7642857142857143,"y":0.9146341463414634},{"x":0.7714285714285714,"y":0.9146341463414634},{"x":0.7785714285714286,"y":0.9146341463414634},{"x":0.7857142857142857,"y":0.9146341463414634},{"x":0.7928571428571429,"y":0.9146341463414634},{"x":0.8,"y":0.9146341463414634},{"x":0.807142857142857,"y":0.9146341463414634},{"x":0.8142857142857143,"y":0.9146341463414634},{"x":0.8214285714285714,"y":0.9146341463414634},{"x":0.8285714285714286,"y":0.9146341463414634},{"x":0.8357142857142857,"y":0.9146341463414634},{"x":0.8357142857142857,"y":0.9268292682926829},{"x":0.8428571428571429,"y":0.9268292682926829},{"x":0.85,"y":0.9268292682926829},{"x":0.85,"y":0.9390243902439024},{"x":0.8571428571428571,"y":0.9390243902439024},{"x":0.8642857142857143,"y":0.9390243902439024},{"x":0.8714285714285714,"y":0.9390243902439024},{"x":0.8785714285714286,"y":0.9390243902439024},{"x":0.8857142857142857,"y":0.9390243902439024},{"x":0.8928571428571429,"y":0.9390243902439024},{"x":0.8928571428571429,"y":0.951219512195122},{"x":0.9,"y":0.951219512195122},{"x":0.9071428571428571,"y":0.951219512195122},{"x":0.9142857142857143,"y":0.951219512195122},{"x":0.9214285714285714,"y":0.951219512195122},{"x":0.9285714285714286,"y":0.951219512195122},{"x":0.9357142857142857,"y":0.951219512195122},{"x":0.9357142857142857,"y":0.9634146341463415},{"x":0.942857142857143,"y":0.9634146341463415},{"x":0.942857142857143,"y":0.975609756097561},{"x":0.95,"y":0.975609756097561},{"x":0.9571428571428571,"y":0.975609756097561},{"x":0.9571428571428571,"y":0.9878048780487805},{"x":0.9642857142857143,"y":0.9878048780487805},{"x":0.9714285714285714,"y":0.9878048780487805},{"x":0.9785714285714286,"y":0.9878048780487805},{"x":0.9785714285714286,"y":1},{"x":0.9857142857142857,"y":1},{"x":0.9928571428571429,"y":1},{"x":1,"y":1}]},{"name":"0502fc6a-c9c5-4064-bd08-84a227b4178d","values":[{"x":0.007194244604316547,"y":0},{"x":0.01438848920863309,"y":0},{"x":0.01438848920863309,"y":0.01204819277108434},{"x":0.02158273381294964,"y":0.01204819277108434},{"x":0.02158273381294964,"y":0.02409638554216867},{"x":0.02158273381294964,"y":0.03614457831325301},{"x":0.02158273381294964,"y":0.04819277108433735},{"x":0.02158273381294964,"y":0.06024096385542169},{"x":0.02158273381294964,"y":0.07228915662650602},{"x":0.02158273381294964,"y":0.08433734939759036},{"x":0.02158273381294964,"y":0.0963855421686747},{"x":0.02158273381294964,"y":0.108433734939759},{"x":0.02158273381294964,"y":0.1204819277108434},{"x":0.02877697841726619,"y":0.1204819277108434},{"x":0.02877697841726619,"y":0.1325301204819277},{"x":0.02877697841726619,"y":0.144578313253012},{"x":0.02877697841726619,"y":0.1566265060240964},{"x":0.03597122302158273,"y":0.1566265060240964},{"x":0.03597122302158273,"y":0.1686746987951807},{"x":0.04316546762589928,"y":0.1686746987951807},{"x":0.04316546762589928,"y":0.1807228915662651},{"x":0.04316546762589928,"y":0.1927710843373494},{"x":0.04316546762589928,"y":0.2048192771084337},{"x":0.05035971223021583,"y":0.2048192771084337},{"x":0.05035971223021583,"y":0.2168674698795181},{"x":0.05035971223021583,"y":0.2289156626506024},{"x":0.05035971223021583,"y":0.2409638554216867},{"x":0.05035971223021583,"y":0.2530120481927711},{"x":0.05035971223021583,"y":0.2650602409638554},{"x":0.05755395683453237,"y":0.2650602409638554},{"x":0.05755395683453237,"y":0.2771084337349398},{"x":0.05755395683453237,"y":0.2891566265060241},{"x":0.05755395683453237,"y":0.3012048192771084},{"x":0.06474820143884892,"y":0.3012048192771084},{"x":0.06474820143884892,"y":0.3132530120481928},{"x":0.06474820143884892,"y":0.3253012048192771},{"x":0.06474820143884892,"y":0.3373493975903614},{"x":0.06474820143884892,"y":0.3493975903614458},{"x":0.06474820143884892,"y":0.3614457831325301},{"x":0.06474820143884892,"y":0.3734939759036145},{"x":0.06474820143884892,"y":0.3855421686746988},{"x":0.06474820143884892,"y":0.3975903614457831},{"x":0.06474820143884892,"y":0.4096385542168675},{"x":0.06474820143884892,"y":0.4216867469879518},{"x":0.06474820143884892,"y":0.4337349397590361},{"x":0.06474820143884892,"y":0.4457831325301205},{"x":0.06474820143884892,"y":0.4578313253012048},{"x":0.06474820143884892,"y":0.4698795180722892},{"x":0.06474820143884892,"y":0.4819277108433735},{"x":0.06474820143884892,"y":0.4939759036144578},{"x":0.06474820143884892,"y":0.5060240963855422},{"x":0.07194244604316546,"y":0.5060240963855422},{"x":0.07194244604316546,"y":0.5180722891566265},{"x":0.07913669064748201,"y":0.5180722891566265},{"x":0.08633093525179857,"y":0.5180722891566265},{"x":0.08633093525179857,"y":0.5301204819277108},{"x":0.08633093525179857,"y":0.5421686746987951},{"x":0.09352517985611511,"y":0.5421686746987951},{"x":0.09352517985611511,"y":0.5542168674698795},{"x":0.1007194244604317,"y":0.5542168674698795},{"x":0.1079136690647482,"y":0.5542168674698795},{"x":0.1151079136690647,"y":0.5542168674698795},{"x":0.1151079136690647,"y":0.5662650602409639},{"x":0.1151079136690647,"y":0.5783132530120482},{"x":0.1151079136690647,"y":0.5903614457831325},{"x":0.1223021582733813,"y":0.5903614457831325},{"x":0.1294964028776978,"y":0.5903614457831325},{"x":0.1366906474820144,"y":0.5903614457831325},{"x":0.1438848920863309,"y":0.5903614457831325},{"x":0.1510791366906475,"y":0.5903614457831325},{"x":0.158273381294964,"y":0.5903614457831325},{"x":0.1654676258992806,"y":0.5903614457831325},{"x":0.1654676258992806,"y":0.6024096385542169},{"x":0.1726618705035971,"y":0.6024096385542169},{"x":0.1726618705035971,"y":0.6144578313253012},{"x":0.1726618705035971,"y":0.6265060240963854},{"x":0.1726618705035971,"y":0.6385542168674699},{"x":0.1726618705035971,"y":0.6506024096385542},{"x":0.1726618705035971,"y":0.6626506024096386},{"x":0.1798561151079137,"y":0.6626506024096386},{"x":0.1870503597122302,"y":0.6626506024096386},{"x":0.1870503597122302,"y":0.674698795180723},{"x":0.1942446043165468,"y":0.674698795180723},{"x":0.1942446043165468,"y":0.6867469879518072},{"x":0.2014388489208633,"y":0.6867469879518072},{"x":0.2086330935251799,"y":0.6867469879518072},{"x":0.2158273381294964,"y":0.6867469879518072},{"x":0.2230215827338129,"y":0.6867469879518072},{"x":0.2302158273381295,"y":0.6867469879518072},{"x":0.237410071942446,"y":0.6867469879518072},{"x":0.2446043165467626,"y":0.6867469879518072},{"x":0.2517985611510791,"y":0.6867469879518072},{"x":0.2589928057553957,"y":0.6867469879518072},{"x":0.2661870503597122,"y":0.6867469879518072},{"x":0.2733812949640288,"y":0.6867469879518072},{"x":0.2733812949640288,"y":0.6987951807228916},{"x":0.2805755395683453,"y":0.6987951807228916},{"x":0.2877697841726619,"y":0.6987951807228916},{"x":0.2949640287769784,"y":0.6987951807228916},{"x":0.302158273381295,"y":0.6987951807228916},{"x":0.3093525179856115,"y":0.6987951807228916},{"x":0.3165467625899281,"y":0.6987951807228916},{"x":0.3165467625899281,"y":0.7108433734939759},{"x":0.3165467625899281,"y":0.7228915662650602},{"x":0.3165467625899281,"y":0.7349397590361446},{"x":0.3237410071942446,"y":0.7349397590361446},{"x":0.3309352517985612,"y":0.7349397590361446},{"x":0.3381294964028777,"y":0.7349397590361446},{"x":0.3381294964028777,"y":0.7469879518072289},{"x":0.3453237410071942,"y":0.7469879518072289},{"x":0.3453237410071942,"y":0.7590361445783133},{"x":0.3525179856115108,"y":0.7590361445783133},{"x":0.3597122302158273,"y":0.7590361445783133},{"x":0.3597122302158273,"y":0.7710843373493976},{"x":0.3597122302158273,"y":0.7831325301204819},{"x":0.3597122302158273,"y":0.7951807228915663},{"x":0.3597122302158273,"y":0.8072289156626506},{"x":0.3669064748201439,"y":0.8072289156626506},{"x":0.3741007194244604,"y":0.8072289156626506},{"x":0.381294964028777,"y":0.8072289156626506},{"x":0.3884892086330935,"y":0.8072289156626506},{"x":0.3956834532374101,"y":0.8072289156626506},{"x":0.3956834532374101,"y":0.8192771084337349},{"x":0.3956834532374101,"y":0.8313253012048193},{"x":0.4028776978417266,"y":0.8313253012048193},{"x":0.4028776978417266,"y":0.8433734939759036},{"x":0.4100719424460432,"y":0.8433734939759036},{"x":0.4172661870503597,"y":0.8433734939759036},{"x":0.4172661870503597,"y":0.855421686746988},{"x":0.4244604316546763,"y":0.855421686746988},{"x":0.4316546762589928,"y":0.855421686746988},{"x":0.4388489208633094,"y":0.855421686746988},{"x":0.4460431654676259,"y":0.855421686746988},{"x":0.4532374100719424,"y":0.855421686746988},{"x":0.460431654676259,"y":0.855421686746988},{"x":0.4676258992805755,"y":0.855421686746988},{"x":0.4748201438848921,"y":0.855421686746988},{"x":0.4820143884892086,"y":0.855421686746988},{"x":0.4892086330935252,"y":0.855421686746988},{"x":0.4964028776978417,"y":0.855421686746988},{"x":0.5035971223021583,"y":0.855421686746988},{"x":0.5107913669064748,"y":0.855421686746988},{"x":0.5179856115107914,"y":0.855421686746988},{"x":0.5251798561151079,"y":0.855421686746988},{"x":0.5323741007194245,"y":0.855421686746988},{"x":0.539568345323741,"y":0.855421686746988},{"x":0.5467625899280576,"y":0.855421686746988},{"x":0.5539568345323741,"y":0.855421686746988},{"x":0.5611510791366906,"y":0.855421686746988},{"x":0.5683453237410072,"y":0.855421686746988},{"x":0.5755395683453237,"y":0.855421686746988},{"x":0.5827338129496403,"y":0.855421686746988},{"x":0.5827338129496403,"y":0.8674698795180723},{"x":0.5899280575539568,"y":0.8674698795180723},{"x":0.5899280575539568,"y":0.8795180722891566},{"x":0.5971223021582734,"y":0.8795180722891566},{"x":0.60431654676259,"y":0.8795180722891566},{"x":0.6115107913669064,"y":0.8795180722891566},{"x":0.618705035971223,"y":0.8795180722891566},{"x":0.6258992805755396,"y":0.8795180722891566},{"x":0.6330935251798561,"y":0.8795180722891566},{"x":0.6402877697841727,"y":0.8795180722891566},{"x":0.6474820143884892,"y":0.8795180722891566},{"x":0.6546762589928058,"y":0.8795180722891566},{"x":0.6618705035971223,"y":0.8795180722891566},{"x":0.6690647482014388,"y":0.8795180722891566},{"x":0.6762589928057554,"y":0.8795180722891566},{"x":0.6834532374100719,"y":0.8795180722891566},{"x":0.6906474820143885,"y":0.8795180722891566},{"x":0.697841726618705,"y":0.8795180722891566},{"x":0.7050359712230216,"y":0.8795180722891566},{"x":0.7050359712230216,"y":0.891566265060241},{"x":0.7122302158273381,"y":0.891566265060241},{"x":0.7194244604316548,"y":0.891566265060241},{"x":0.7194244604316548,"y":0.9036144578313253},{"x":0.7266187050359711,"y":0.9036144578313253},{"x":0.7338129496402878,"y":0.9036144578313253},{"x":0.7410071942446043,"y":0.9036144578313253},{"x":0.7482014388489209,"y":0.9036144578313253},{"x":0.7553956834532374,"y":0.9036144578313253},{"x":0.7553956834532374,"y":0.9156626506024096},{"x":0.7553956834532374,"y":0.927710843373494},{"x":0.762589928057554,"y":0.927710843373494},{"x":0.7697841726618705,"y":0.927710843373494},{"x":0.7769784172661871,"y":0.927710843373494},{"x":0.7841726618705036,"y":0.927710843373494},{"x":0.7913669064748201,"y":0.927710843373494},{"x":0.7985611510791367,"y":0.927710843373494},{"x":0.8057553956834532,"y":0.927710843373494},{"x":0.8129496402877698,"y":0.927710843373494},{"x":0.8201438848920863,"y":0.927710843373494},{"x":0.827338129496403,"y":0.927710843373494},{"x":0.8345323741007195,"y":0.927710843373494},{"x":0.841726618705036,"y":0.927710843373494},{"x":0.8489208633093525,"y":0.927710843373494},{"x":0.8489208633093525,"y":0.9397590361445783},{"x":0.8561151079136691,"y":0.9397590361445783},{"x":0.8561151079136691,"y":0.9518072289156627},{"x":0.8633093525179856,"y":0.9518072289156627},{"x":0.8633093525179856,"y":0.963855421686747},{"x":0.8705035971223022,"y":0.963855421686747},{"x":0.8776978417266187,"y":0.963855421686747},{"x":0.8776978417266187,"y":0.9759036144578312},{"x":0.8848920863309353,"y":0.9759036144578312},{"x":0.8920863309352518,"y":0.9759036144578312},{"x":0.8992805755395683,"y":0.9759036144578312},{"x":0.9064748201438849,"y":0.9759036144578312},{"x":0.9136690647482014,"y":0.9759036144578312},{"x":0.920863309352518,"y":0.9759036144578312},{"x":0.9280575539568345,"y":0.9759036144578312},{"x":0.9352517985611511,"y":0.9759036144578312},{"x":0.9352517985611511,"y":0.9879518072289157},{"x":0.9424460431654677,"y":0.9879518072289157},{"x":0.9496402877697842,"y":0.9879518072289157},{"x":0.9568345323741007,"y":0.9879518072289157},{"x":0.9640287769784173,"y":0.9879518072289157},{"x":0.9712230215827338,"y":0.9879518072289157},{"x":0.9712230215827338,"y":1},{"x":0.9784172661870504,"y":1},{"x":0.9856115107913669,"y":1},{"x":0.9928057553956835,"y":1},{"x":1,"y":1}]},{"name":"a3035d60-74ea-49f9-93b1-f358ec1ada53","values":[{"x":0.007751937984496124,"y":0},{"x":0.007751937984496124,"y":0.01075268817204301},{"x":0.01550387596899225,"y":0.01075268817204301},{"x":0.02325581395348837,"y":0.01075268817204301},{"x":0.02325581395348837,"y":0.02150537634408602},{"x":0.02325581395348837,"y":0.03225806451612903},{"x":0.02325581395348837,"y":0.04301075268817204},{"x":0.02325581395348837,"y":0.05376344086021505},{"x":0.02325581395348837,"y":0.06451612903225806},{"x":0.0310077519379845,"y":0.06451612903225806},{"x":0.03875968992248062,"y":0.06451612903225806},{"x":0.03875968992248062,"y":0.07526881720430108},{"x":0.04651162790697674,"y":0.07526881720430108},{"x":0.05426356589147287,"y":0.07526881720430108},{"x":0.05426356589147287,"y":0.08602150537634409},{"x":0.05426356589147287,"y":0.0967741935483871},{"x":0.06201550387596899,"y":0.0967741935483871},{"x":0.06976744186046512,"y":0.0967741935483871},{"x":0.07751937984496124,"y":0.0967741935483871},{"x":0.07751937984496124,"y":0.1075268817204301},{"x":0.08527131782945736,"y":0.1075268817204301},{"x":0.09302325581395349,"y":0.1075268817204301},{"x":0.09302325581395349,"y":0.1182795698924731},{"x":0.1007751937984496,"y":0.1182795698924731},{"x":0.1085271317829457,"y":0.1182795698924731},{"x":0.1085271317829457,"y":0.1290322580645161},{"x":0.1162790697674419,"y":0.1290322580645161},{"x":0.1162790697674419,"y":0.1397849462365591},{"x":0.1162790697674419,"y":0.1505376344086022},{"x":0.1162790697674419,"y":0.1612903225806452},{"x":0.1162790697674419,"y":0.1720430107526882},{"x":0.1162790697674419,"y":0.1827956989247312},{"x":0.1162790697674419,"y":0.1935483870967742},{"x":0.1162790697674419,"y":0.2043010752688172},{"x":0.1162790697674419,"y":0.2150537634408602},{"x":0.124031007751938,"y":0.2150537634408602},{"x":0.124031007751938,"y":0.2258064516129032},{"x":0.124031007751938,"y":0.2365591397849462},{"x":0.124031007751938,"y":0.2473118279569892},{"x":0.124031007751938,"y":0.2580645161290323},{"x":0.124031007751938,"y":0.2688172043010753},{"x":0.124031007751938,"y":0.2795698924731183},{"x":0.124031007751938,"y":0.2903225806451613},{"x":0.124031007751938,"y":0.3010752688172043},{"x":0.124031007751938,"y":0.3118279569892473},{"x":0.124031007751938,"y":0.3225806451612903},{"x":0.1317829457364341,"y":0.3225806451612903},{"x":0.1395348837209302,"y":0.3225806451612903},{"x":0.1472868217054264,"y":0.3225806451612903},{"x":0.1472868217054264,"y":0.3333333333333333},{"x":0.1472868217054264,"y":0.3440860215053763},{"x":0.1472868217054264,"y":0.3548387096774194},{"x":0.1472868217054264,"y":0.3655913978494624},{"x":0.1472868217054264,"y":0.3763440860215054},{"x":0.1550387596899225,"y":0.3763440860215054},{"x":0.1550387596899225,"y":0.3870967741935484},{"x":0.1627906976744186,"y":0.3870967741935484},{"x":0.1627906976744186,"y":0.3978494623655914},{"x":0.1705426356589147,"y":0.3978494623655914},{"x":0.1782945736434109,"y":0.3978494623655914},{"x":0.186046511627907,"y":0.3978494623655914},{"x":0.186046511627907,"y":0.4086021505376344},{"x":0.1937984496124031,"y":0.4086021505376344},{"x":0.1937984496124031,"y":0.4193548387096774},{"x":0.1937984496124031,"y":0.4301075268817204},{"x":0.2015503875968992,"y":0.4301075268817204},{"x":0.2093023255813953,"y":0.4301075268817204},{"x":0.2170542635658915,"y":0.4301075268817204},{"x":0.2248062015503876,"y":0.4301075268817204},{"x":0.2325581395348837,"y":0.4301075268817204},{"x":0.2403100775193798,"y":0.4301075268817204},{"x":0.248062015503876,"y":0.4301075268817204},{"x":0.2558139534883721,"y":0.4301075268817204},{"x":0.2635658914728682,"y":0.4301075268817204},{"x":0.2713178294573643,"y":0.4301075268817204},{"x":0.2790697674418605,"y":0.4301075268817204},{"x":0.2868217054263566,"y":0.4301075268817204},{"x":0.2945736434108527,"y":0.4301075268817204},{"x":0.3023255813953488,"y":0.4301075268817204},{"x":0.310077519379845,"y":0.4301075268817204},{"x":0.3178294573643411,"y":0.4301075268817204},{"x":0.3178294573643411,"y":0.4408602150537634},{"x":0.3178294573643411,"y":0.4516129032258065},{"x":0.3178294573643411,"y":0.4623655913978495},{"x":0.3178294573643411,"y":0.4731182795698925},{"x":0.3178294573643411,"y":0.4838709677419355},{"x":0.3178294573643411,"y":0.4946236559139785},{"x":0.3178294573643411,"y":0.5053763440860215},{"x":0.3255813953488372,"y":0.5053763440860215},{"x":0.3255813953488372,"y":0.5161290322580645},{"x":0.3255813953488372,"y":0.5268817204301075},{"x":0.3255813953488372,"y":0.5376344086021505},{"x":0.3255813953488372,"y":0.5483870967741935},{"x":0.3255813953488372,"y":0.5591397849462366},{"x":0.3333333333333333,"y":0.5591397849462366},{"x":0.3333333333333333,"y":0.5698924731182796},{"x":0.3333333333333333,"y":0.5806451612903226},{"x":0.3333333333333333,"y":0.5913978494623656},{"x":0.3333333333333333,"y":0.6021505376344086},{"x":0.3410852713178295,"y":0.6021505376344086},{"x":0.3410852713178295,"y":0.6129032258064516},{"x":0.3488372093023256,"y":0.6129032258064516},{"x":0.3565891472868217,"y":0.6129032258064516},{"x":0.3643410852713178,"y":0.6129032258064516},{"x":0.372093023255814,"y":0.6129032258064516},{"x":0.3798449612403101,"y":0.6129032258064516},{"x":0.3875968992248062,"y":0.6129032258064516},{"x":0.3953488372093023,"y":0.6129032258064516},{"x":0.4031007751937984,"y":0.6129032258064516},{"x":0.4108527131782946,"y":0.6129032258064516},{"x":0.4186046511627907,"y":0.6129032258064516},{"x":0.4263565891472868,"y":0.6129032258064516},{"x":0.4341085271317829,"y":0.6129032258064516},{"x":0.4341085271317829,"y":0.6236559139784946},{"x":0.4418604651162791,"y":0.6236559139784946},{"x":0.4418604651162791,"y":0.6344086021505376},{"x":0.4496124031007752,"y":0.6344086021505376},{"x":0.4573643410852713,"y":0.6344086021505376},{"x":0.4573643410852713,"y":0.6451612903225806},{"x":0.4651162790697674,"y":0.6451612903225806},{"x":0.4728682170542636,"y":0.6451612903225806},{"x":0.4806201550387597,"y":0.6451612903225806},{"x":0.4883720930232558,"y":0.6451612903225806},{"x":0.4961240310077519,"y":0.6451612903225806},{"x":0.5038759689922481,"y":0.6451612903225806},{"x":0.5116279069767442,"y":0.6451612903225806},{"x":0.5193798449612403,"y":0.6451612903225806},{"x":0.5271317829457364,"y":0.6451612903225806},{"x":0.5348837209302326,"y":0.6451612903225806},{"x":0.5426356589147286,"y":0.6451612903225806},{"x":0.5503875968992248,"y":0.6451612903225806},{"x":0.5581395348837209,"y":0.6451612903225806},{"x":0.5658914728682171,"y":0.6451612903225806},{"x":0.5736434108527132,"y":0.6451612903225806},{"x":0.5813953488372093,"y":0.6451612903225806},{"x":0.5891472868217054,"y":0.6451612903225806},{"x":0.5968992248062016,"y":0.6451612903225806},{"x":0.6046511627906977,"y":0.6451612903225806},{"x":0.6124031007751938,"y":0.6451612903225806},{"x":0.6201550387596899,"y":0.6451612903225806},{"x":0.627906976744186,"y":0.6451612903225806},{"x":0.6356589147286822,"y":0.6451612903225806},{"x":0.6356589147286822,"y":0.6559139784946237},{"x":0.6434108527131783,"y":0.6559139784946237},{"x":0.6511627906976744,"y":0.6559139784946237},{"x":0.6589147286821705,"y":0.6559139784946237},{"x":0.6666666666666667,"y":0.6559139784946237},{"x":0.6744186046511628,"y":0.6559139784946237},{"x":0.6821705426356589,"y":0.6559139784946237},{"x":0.689922480620155,"y":0.6559139784946237},{"x":0.6976744186046512,"y":0.6559139784946237},{"x":0.7054263565891473,"y":0.6559139784946237},{"x":0.7054263565891473,"y":0.6666666666666667},{"x":0.7131782945736433,"y":0.6666666666666667},{"x":0.7209302325581395,"y":0.6666666666666667},{"x":0.7286821705426357,"y":0.6666666666666667},{"x":0.7364341085271318,"y":0.6666666666666667},{"x":0.7441860465116279,"y":0.6666666666666667},{"x":0.751937984496124,"y":0.6666666666666667},{"x":0.7596899224806202,"y":0.6666666666666667},{"x":0.7674418604651163,"y":0.6666666666666667},{"x":0.7674418604651163,"y":0.6774193548387097},{"x":0.7751937984496124,"y":0.6774193548387097},{"x":0.7829457364341085,"y":0.6774193548387097},{"x":0.7906976744186047,"y":0.6774193548387097},{"x":0.7984496124031008,"y":0.6774193548387097},{"x":0.8062015503875969,"y":0.6774193548387097},{"x":0.813953488372093,"y":0.6774193548387097},{"x":0.813953488372093,"y":0.6881720430107527},{"x":0.813953488372093,"y":0.6989247311827957},{"x":0.821705426356589,"y":0.6989247311827957},{"x":0.8294573643410853,"y":0.6989247311827957},{"x":0.8294573643410853,"y":0.7096774193548387},{"x":0.8294573643410853,"y":0.7204301075268817},{"x":0.8372093023255814,"y":0.7204301075268817},{"x":0.8372093023255814,"y":0.7311827956989247},{"x":0.8372093023255814,"y":0.7419354838709677},{"x":0.8449612403100775,"y":0.7419354838709677},{"x":0.8527131782945736,"y":0.7419354838709677},{"x":0.8527131782945736,"y":0.7526881720430108},{"x":0.8527131782945736,"y":0.7634408602150538},{"x":0.8527131782945736,"y":0.7741935483870968},{"x":0.8527131782945736,"y":0.7849462365591398},{"x":0.8527131782945736,"y":0.7956989247311828},{"x":0.8527131782945736,"y":0.8064516129032258},{"x":0.8527131782945736,"y":0.8172043010752688},{"x":0.8527131782945736,"y":0.8279569892473118},{"x":0.8527131782945736,"y":0.8387096774193548},{"x":0.8604651162790699,"y":0.8387096774193548},{"x":0.8604651162790699,"y":0.8494623655913978},{"x":0.8604651162790699,"y":0.8602150537634409},{"x":0.8682170542635659,"y":0.8602150537634409},{"x":0.8682170542635659,"y":0.8709677419354839},{"x":0.8682170542635659,"y":0.8817204301075269},{"x":0.8682170542635659,"y":0.8924731182795699},{"x":0.8682170542635659,"y":0.9032258064516129},{"x":0.875968992248062,"y":0.9032258064516129},{"x":0.8837209302325582,"y":0.9032258064516129},{"x":0.8837209302325582,"y":0.9139784946236559},{"x":0.8914728682170543,"y":0.9139784946236559},{"x":0.8914728682170543,"y":0.9247311827956989},{"x":0.8914728682170543,"y":0.9354838709677419},{"x":0.8992248062015504,"y":0.9354838709677419},{"x":0.9069767441860465,"y":0.9354838709677419},{"x":0.9147286821705426,"y":0.9354838709677419},{"x":0.9147286821705426,"y":0.9462365591397849},{"x":0.9224806201550388,"y":0.9462365591397849},{"x":0.9302325581395349,"y":0.9462365591397849},{"x":0.937984496124031,"y":0.9462365591397849},{"x":0.9457364341085271,"y":0.9462365591397849},{"x":0.9457364341085271,"y":0.956989247311828},{"x":0.9534883720930233,"y":0.956989247311828},{"x":0.9612403100775194,"y":0.956989247311828},{"x":0.9612403100775194,"y":0.967741935483871},{"x":0.9689922480620154,"y":0.967741935483871},{"x":0.9767441860465116,"y":0.967741935483871},{"x":0.9767441860465116,"y":0.978494623655914},{"x":0.9844961240310078,"y":0.978494623655914},{"x":0.9844961240310078,"y":0.989247311827957},{"x":0.9922480620155039,"y":0.989247311827957},{"x":0.9922480620155039,"y":1},{"x":1,"y":1}]},{"name":"89cb3c24-8a7d-4f91-836f-a483e2deeb89","values":[{"x":0.007142857142857143,"y":0},{"x":0.007142857142857143,"y":0.01219512195121951},{"x":0.007142857142857143,"y":0.02439024390243902},{"x":0.007142857142857143,"y":0.03658536585365854},{"x":0.007142857142857143,"y":0.04878048780487805},{"x":0.01428571428571429,"y":0.04878048780487805},{"x":0.02142857142857143,"y":0.04878048780487805},{"x":0.02857142857142857,"y":0.04878048780487805},{"x":0.02857142857142857,"y":0.06097560975609756},{"x":0.03571428571428571,"y":0.06097560975609756},{"x":0.03571428571428571,"y":0.07317073170731707},{"x":0.03571428571428571,"y":0.08536585365853659},{"x":0.04285714285714286,"y":0.08536585365853659},{"x":0.05,"y":0.08536585365853659},{"x":0.05,"y":0.0975609756097561},{"x":0.05,"y":0.1097560975609756},{"x":0.05,"y":0.1219512195121951},{"x":0.05,"y":0.1341463414634146},{"x":0.05,"y":0.1463414634146341},{"x":0.05714285714285714,"y":0.1463414634146341},{"x":0.0642857142857143,"y":0.1463414634146341},{"x":0.07142857142857142,"y":0.1463414634146341},{"x":0.07142857142857142,"y":0.1585365853658537},{"x":0.07142857142857142,"y":0.1707317073170732},{"x":0.07142857142857142,"y":0.1829268292682927},{"x":0.07857142857142857,"y":0.1829268292682927},{"x":0.07857142857142857,"y":0.1951219512195122},{"x":0.08571428571428572,"y":0.1951219512195122},{"x":0.09285714285714286,"y":0.1951219512195122},{"x":0.09285714285714286,"y":0.2073170731707317},{"x":0.09285714285714286,"y":0.2195121951219512},{"x":0.09285714285714286,"y":0.2317073170731707},{"x":0.09285714285714286,"y":0.2439024390243902},{"x":0.09285714285714286,"y":0.2560975609756098},{"x":0.09285714285714286,"y":0.2682926829268293},{"x":0.09285714285714286,"y":0.2804878048780488},{"x":0.09285714285714286,"y":0.2926829268292683},{"x":0.09285714285714286,"y":0.3048780487804878},{"x":0.09285714285714286,"y":0.3170731707317073},{"x":0.09285714285714286,"y":0.3292682926829268},{"x":0.09285714285714286,"y":0.3414634146341463},{"x":0.09285714285714286,"y":0.3536585365853659},{"x":0.09285714285714286,"y":0.3658536585365854},{"x":0.1,"y":0.3658536585365854},{"x":0.1071428571428571,"y":0.3658536585365854},{"x":0.1071428571428571,"y":0.3780487804878049},{"x":0.1071428571428571,"y":0.3902439024390244},{"x":0.1071428571428571,"y":0.4024390243902439},{"x":0.1071428571428571,"y":0.4146341463414634},{"x":0.1071428571428571,"y":0.4268292682926829},{"x":0.1071428571428571,"y":0.4390243902439024},{"x":0.1071428571428571,"y":0.451219512195122},{"x":0.1071428571428571,"y":0.4634146341463415},{"x":0.1071428571428571,"y":0.475609756097561},{"x":0.1071428571428571,"y":0.4878048780487805},{"x":0.1071428571428571,"y":0.5},{"x":0.1071428571428571,"y":0.5121951219512195},{"x":0.1071428571428571,"y":0.524390243902439},{"x":0.1071428571428571,"y":0.5365853658536585},{"x":0.1071428571428571,"y":0.548780487804878},{"x":0.1071428571428571,"y":0.5609756097560976},{"x":0.1142857142857143,"y":0.5609756097560976},{"x":0.1214285714285714,"y":0.5609756097560976},{"x":0.1285714285714286,"y":0.5609756097560976},{"x":0.1357142857142857,"y":0.5609756097560976},{"x":0.1357142857142857,"y":0.5731707317073171},{"x":0.1428571428571429,"y":0.5731707317073171},{"x":0.15,"y":0.5731707317073171},{"x":0.1571428571428571,"y":0.5731707317073171},{"x":0.1571428571428571,"y":0.5853658536585366},{"x":0.1642857142857143,"y":0.5853658536585366},{"x":0.1642857142857143,"y":0.5975609756097561},{"x":0.1714285714285714,"y":0.5975609756097561},{"x":0.1785714285714286,"y":0.5975609756097561},{"x":0.1785714285714286,"y":0.6097560975609756},{"x":0.1857142857142857,"y":0.6097560975609756},{"x":0.1857142857142857,"y":0.6219512195121951},{"x":0.1928571428571429,"y":0.6219512195121951},{"x":0.1928571428571429,"y":0.6341463414634146},{"x":0.2,"y":0.6341463414634146},{"x":0.2071428571428571,"y":0.6341463414634146},{"x":0.2071428571428571,"y":0.646341463414634},{"x":0.2142857142857143,"y":0.646341463414634},{"x":0.2214285714285714,"y":0.646341463414634},{"x":0.2214285714285714,"y":0.6585365853658537},{"x":0.2285714285714286,"y":0.6585365853658537},{"x":0.2357142857142857,"y":0.6585365853658537},{"x":0.2428571428571429,"y":0.6585365853658537},{"x":0.25,"y":0.6585365853658537},{"x":0.25,"y":0.6707317073170732},{"x":0.2571428571428571,"y":0.6707317073170732},{"x":0.2642857142857143,"y":0.6707317073170732},{"x":0.2714285714285714,"y":0.6707317073170732},{"x":0.2785714285714286,"y":0.6707317073170732},{"x":0.2857142857142857,"y":0.6707317073170732},{"x":0.2857142857142857,"y":0.6829268292682927},{"x":0.2928571428571429,"y":0.6829268292682927},{"x":0.2928571428571429,"y":0.6951219512195121},{"x":0.3,"y":0.6951219512195121},{"x":0.3,"y":0.7073170731707317},{"x":0.3,"y":0.7195121951219512},{"x":0.3,"y":0.7317073170731707},{"x":0.3,"y":0.7439024390243902},{"x":0.3071428571428571,"y":0.7439024390243902},{"x":0.3071428571428571,"y":0.7560975609756098},{"x":0.3142857142857143,"y":0.7560975609756098},{"x":0.3214285714285714,"y":0.7560975609756098},{"x":0.3214285714285714,"y":0.7682926829268293},{"x":0.3285714285714286,"y":0.7682926829268293},{"x":0.3357142857142857,"y":0.7682926829268293},{"x":0.3428571428571429,"y":0.7682926829268293},{"x":0.35,"y":0.7682926829268293},{"x":0.35,"y":0.7804878048780488},{"x":0.35,"y":0.7926829268292683},{"x":0.35,"y":0.8048780487804879},{"x":0.35,"y":0.8170731707317073},{"x":0.35,"y":0.8292682926829268},{"x":0.35,"y":0.8414634146341463},{"x":0.3571428571428571,"y":0.8414634146341463},{"x":0.3642857142857143,"y":0.8414634146341463},{"x":0.3642857142857143,"y":0.853658536585366},{"x":0.3642857142857143,"y":0.8658536585365854},{"x":0.3714285714285714,"y":0.8658536585365854},{"x":0.3785714285714286,"y":0.8658536585365854},{"x":0.3857142857142857,"y":0.8658536585365854},{"x":0.3928571428571429,"y":0.8658536585365854},{"x":0.3928571428571429,"y":0.8780487804878049},{"x":0.3928571428571429,"y":0.8902439024390244},{"x":0.4,"y":0.8902439024390244},{"x":0.4071428571428571,"y":0.8902439024390244},{"x":0.4142857142857143,"y":0.8902439024390244},{"x":0.4214285714285714,"y":0.8902439024390244},{"x":0.4285714285714286,"y":0.8902439024390244},{"x":0.4357142857142857,"y":0.8902439024390244},{"x":0.4428571428571429,"y":0.8902439024390244},{"x":0.45,"y":0.8902439024390244},{"x":0.4571428571428571,"y":0.8902439024390244},{"x":0.4642857142857143,"y":0.8902439024390244},{"x":0.4642857142857143,"y":0.9024390243902439},{"x":0.4714285714285714,"y":0.9024390243902439},{"x":0.4785714285714286,"y":0.9024390243902439},{"x":0.4857142857142857,"y":0.9024390243902439},{"x":0.4928571428571429,"y":0.9024390243902439},{"x":0.5,"y":0.9024390243902439},{"x":0.5071428571428571,"y":0.9024390243902439},{"x":0.5142857142857143,"y":0.9024390243902439},{"x":0.5214285714285714,"y":0.9024390243902439},{"x":0.5285714285714286,"y":0.9024390243902439},{"x":0.5357142857142857,"y":0.9024390243902439},{"x":0.5428571428571429,"y":0.9024390243902439},{"x":0.55,"y":0.9024390243902439},{"x":0.557142857142857,"y":0.9024390243902439},{"x":0.5642857142857143,"y":0.9024390243902439},{"x":0.5714285714285714,"y":0.9024390243902439},{"x":0.5785714285714286,"y":0.9024390243902439},{"x":0.5857142857142857,"y":0.9024390243902439},{"x":0.5928571428571429,"y":0.9024390243902439},{"x":0.5928571428571429,"y":0.9146341463414634},{"x":0.6,"y":0.9146341463414634},{"x":0.6071428571428571,"y":0.9146341463414634},{"x":0.6142857142857143,"y":0.9146341463414634},{"x":0.6214285714285714,"y":0.9146341463414634},{"x":0.6285714285714286,"y":0.9146341463414634},{"x":0.6285714285714286,"y":0.9268292682926829},{"x":0.6357142857142857,"y":0.9268292682926829},{"x":0.6428571428571429,"y":0.9268292682926829},{"x":0.6428571428571429,"y":0.9390243902439024},{"x":0.65,"y":0.9390243902439024},{"x":0.6571428571428571,"y":0.9390243902439024},{"x":0.6642857142857143,"y":0.9390243902439024},{"x":0.6714285714285714,"y":0.9390243902439024},{"x":0.6785714285714286,"y":0.9390243902439024},{"x":0.6857142857142857,"y":0.9390243902439024},{"x":0.692857142857143,"y":0.9390243902439024},{"x":0.7,"y":0.9390243902439024},{"x":0.7071428571428571,"y":0.9390243902439024},{"x":0.7142857142857143,"y":0.9390243902439024},{"x":0.7214285714285714,"y":0.9390243902439024},{"x":0.7285714285714286,"y":0.9390243902439024},{"x":0.7357142857142857,"y":0.9390243902439024},{"x":0.7428571428571429,"y":0.9390243902439024},{"x":0.75,"y":0.9390243902439024},{"x":0.7571428571428571,"y":0.9390243902439024},{"x":0.7642857142857143,"y":0.9390243902439024},{"x":0.7714285714285714,"y":0.9390243902439024},{"x":0.7785714285714286,"y":0.9390243902439024},{"x":0.7857142857142857,"y":0.9390243902439024},{"x":0.7928571428571429,"y":0.9390243902439024},{"x":0.8,"y":0.9390243902439024},{"x":0.807142857142857,"y":0.9390243902439024},{"x":0.8142857142857143,"y":0.9390243902439024},{"x":0.8214285714285714,"y":0.9390243902439024},{"x":0.8285714285714286,"y":0.9390243902439024},{"x":0.8357142857142857,"y":0.9390243902439024},{"x":0.8428571428571429,"y":0.9390243902439024},{"x":0.85,"y":0.9390243902439024},{"x":0.8571428571428571,"y":0.9390243902439024},{"x":0.8642857142857143,"y":0.9390243902439024},{"x":0.8714285714285714,"y":0.9390243902439024},{"x":0.8785714285714286,"y":0.9390243902439024},{"x":0.8785714285714286,"y":0.951219512195122},{"x":0.8857142857142857,"y":0.951219512195122},{"x":0.8928571428571429,"y":0.951219512195122},{"x":0.9,"y":0.951219512195122},{"x":0.9071428571428571,"y":0.951219512195122},{"x":0.9071428571428571,"y":0.9634146341463415},{"x":0.9142857142857143,"y":0.9634146341463415},{"x":0.9214285714285714,"y":0.9634146341463415},{"x":0.9285714285714286,"y":0.9634146341463415},{"x":0.9357142857142857,"y":0.9634146341463415},{"x":0.942857142857143,"y":0.9634146341463415},{"x":0.942857142857143,"y":0.975609756097561},{"x":0.95,"y":0.975609756097561},{"x":0.9571428571428571,"y":0.975609756097561},{"x":0.9642857142857143,"y":0.975609756097561},{"x":0.9714285714285714,"y":0.975609756097561},{"x":0.9785714285714286,"y":0.975609756097561},{"x":0.9857142857142857,"y":0.975609756097561},{"x":0.9857142857142857,"y":0.9878048780487805},{"x":0.9928571428571429,"y":0.9878048780487805},{"x":1,"y":0.9878048780487805},{"x":1,"y":1}]}],"marks":[{"type":"line","from":{"data":"3727b48d-eeb7-4450-b28a-760c1c221378"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"y":{"scale":"y","field":"data.y"},"stroke":{"value":"red"},"strokeWidth":{"value":2},"strokeOpacity":{"value":1}}}},{"type":"line","from":{"data":"0502fc6a-c9c5-4064-bd08-84a227b4178d"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"y":{"scale":"y","field":"data.y"},"stroke":{"value":"green"},"strokeWidth":{"value":2},"strokeOpacity":{"value":1}}}},{"type":"line","from":{"data":"a3035d60-74ea-49f9-93b1-f358ec1ada53"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"y":{"scale":"y","field":"data.y"},"stroke":{"value":"blue"},"strokeWidth":{"value":2},"strokeOpacity":{"value":1}}}},{"type":"line","from":{"data":"89cb3c24-8a7d-4f91-836f-a483e2deeb89"},"properties":{"enter":{"x":{"scale":"x","field":"data.x"},"y":{"scale":"y","field":"data.y"},"stroke":{"value":"black"},"strokeWidth":{"value":2},"strokeOpacity":{"value":1}}}}]},"value":"#gorilla_repl.vega.VegaView{:content {:width 400, :height 247.2188, :padding {:top 10, :left 55, :bottom 40, :right 10}, :scales [{:name \"x\", :type \"linear\", :range \"width\", :zero false, :domain {:data \"3727b48d-eeb7-4450-b28a-760c1c221378\", :field \"data.x\"}} {:name \"y\", :type \"linear\", :range \"height\", :nice true, :zero false, :domain {:data \"3727b48d-eeb7-4450-b28a-760c1c221378\", :field \"data.y\"}}], :axes [{:type \"x\", :scale \"x\"} {:type \"y\", :scale \"y\"}], :data ({:name \"3727b48d-eeb7-4450-b28a-760c1c221378\", :values ({:x 1/140, :y 0} {:x 1/140, :y 1/82} {:x 1/70, :y 1/82} {:x 3/140, :y 1/82} {:x 3/140, :y 1/41} {:x 3/140, :y 3/82} {:x 3/140, :y 2/41} {:x 3/140, :y 5/82} {:x 3/140, :y 3/41} {:x 1/35, :y 3/41} {:x 1/35, :y 7/82} {:x 1/35, :y 4/41} {:x 1/35, :y 9/82} {:x 1/35, :y 5/41} {:x 1/35, :y 11/82} {:x 1/35, :y 6/41} {:x 1/35, :y 13/82} {:x 1/28, :y 13/82} {:x 3/70, :y 13/82} {:x 1/20, :y 13/82} {:x 2/35, :y 13/82} {:x 9/140, :y 13/82} {:x 9/140, :y 7/41} {:x 9/140, :y 15/82} {:x 9/140, :y 8/41} {:x 9/140, :y 17/82} {:x 9/140, :y 9/41} {:x 1/14, :y 9/41} {:x 11/140, :y 9/41} {:x 11/140, :y 19/82} {:x 11/140, :y 10/41} {:x 11/140, :y 21/82} {:x 11/140, :y 11/41} {:x 11/140, :y 23/82} {:x 11/140, :y 12/41} {:x 11/140, :y 25/82} {:x 11/140, :y 13/41} {:x 11/140, :y 27/82} {:x 11/140, :y 14/41} {:x 11/140, :y 29/82} {:x 11/140, :y 15/41} {:x 11/140, :y 31/82} {:x 3/35, :y 31/82} {:x 3/35, :y 16/41} {:x 3/35, :y 33/82} {:x 3/35, :y 17/41} {:x 13/140, :y 17/41} {:x 1/10, :y 17/41} {:x 3/28, :y 17/41} {:x 4/35, :y 17/41} {:x 17/140, :y 17/41} {:x 17/140, :y 35/82} {:x 9/70, :y 35/82} {:x 9/70, :y 18/41} {:x 19/140, :y 18/41} {:x 1/7, :y 18/41} {:x 3/20, :y 18/41} {:x 3/20, :y 37/82} {:x 3/20, :y 19/41} {:x 3/20, :y 39/82} {:x 3/20, :y 20/41} {:x 3/20, :y 1/2} {:x 3/20, :y 21/41} {:x 3/20, :y 43/82} {:x 3/20, :y 22/41} {:x 11/70, :y 22/41} {:x 23/140, :y 22/41} {:x 6/35, :y 22/41} {:x 5/28, :y 22/41} {:x 5/28, :y 45/82} {:x 13/70, :y 45/82} {:x 13/70, :y 23/41} {:x 27/140, :y 23/41} {:x 1/5, :y 23/41} {:x 1/5, :y 47/82} {:x 1/5, :y 24/41} {:x 29/140, :y 24/41} {:x 29/140, :y 49/82} {:x 3/14, :y 49/82} {:x 31/140, :y 49/82} {:x 8/35, :y 49/82} {:x 33/140, :y 49/82} {:x 17/70, :y 49/82} {:x 1/4, :y 49/82} {:x 9/35, :y 49/82} {:x 37/140, :y 49/82} {:x 37/140, :y 25/41} {:x 19/70, :y 25/41} {:x 39/140, :y 25/41} {:x 39/140, :y 51/82} {:x 2/7, :y 51/82} {:x 41/140, :y 51/82} {:x 3/10, :y 51/82} {:x 43/140, :y 51/82} {:x 11/35, :y 51/82} {:x 9/28, :y 51/82} {:x 23/70, :y 51/82} {:x 23/70, :y 26/41} {:x 23/70, :y 53/82} {:x 47/140, :y 53/82} {:x 12/35, :y 53/82} {:x 12/35, :y 27/41} {:x 12/35, :y 55/82} {:x 7/20, :y 55/82} {:x 5/14, :y 55/82} {:x 51/140, :y 55/82} {:x 13/35, :y 55/82} {:x 53/140, :y 55/82} {:x 53/140, :y 28/41} {:x 27/70, :y 28/41} {:x 11/28, :y 28/41} {:x 2/5, :y 28/41} {:x 57/140, :y 28/41} {:x 29/70, :y 28/41} {:x 59/140, :y 28/41} {:x 3/7, :y 28/41} {:x 3/7, :y 57/82} {:x 61/140, :y 57/82} {:x 31/70, :y 57/82} {:x 31/70, :y 29/41} {:x 9/20, :y 29/41} {:x 16/35, :y 29/41} {:x 13/28, :y 29/41} {:x 13/28, :y 59/82} {:x 13/28, :y 30/41} {:x 13/28, :y 61/82} {:x 33/70, :y 61/82} {:x 33/70, :y 31/41} {:x 67/140, :y 31/41} {:x 67/140, :y 63/82} {:x 17/35, :y 63/82} {:x 17/35, :y 32/41} {:x 69/140, :y 32/41} {:x 69/140, :y 65/82} {:x 1/2, :y 65/82} {:x 1/2, :y 33/41} {:x 71/140, :y 33/41} {:x 18/35, :y 33/41} {:x 73/140, :y 33/41} {:x 37/70, :y 33/41} {:x 15/28, :y 33/41} {:x 15/28, :y 67/82} {:x 15/28, :y 34/41} {:x 19/35, :y 34/41} {:x 19/35, :y 69/82} {:x 11/20, :y 69/82} {:x 11/20, :y 35/41} {:x 39/70, :y 35/41} {:x 39/70, :y 71/82} {:x 39/70, :y 36/41} {:x 79/140, :y 36/41} {:x 79/140, :y 73/82} {:x 4/7, :y 73/82} {:x 81/140, :y 73/82} {:x 41/70, :y 73/82} {:x 83/140, :y 73/82} {:x 3/5, :y 73/82} {:x 17/28, :y 73/82} {:x 43/70, :y 73/82} {:x 87/140, :y 73/82} {:x 22/35, :y 73/82} {:x 89/140, :y 73/82} {:x 9/14, :y 73/82} {:x 13/20, :y 73/82} {:x 23/35, :y 73/82} {:x 93/140, :y 73/82} {:x 47/70, :y 73/82} {:x 19/28, :y 73/82} {:x 24/35, :y 73/82} {:x 97/140, :y 73/82} {:x 7/10, :y 73/82} {:x 7/10, :y 37/41} {:x 99/140, :y 37/41} {:x 5/7, :y 37/41} {:x 101/140, :y 37/41} {:x 51/70, :y 37/41} {:x 103/140, :y 37/41} {:x 26/35, :y 37/41} {:x 26/35, :y 75/82} {:x 3/4, :y 75/82} {:x 53/70, :y 75/82} {:x 107/140, :y 75/82} {:x 27/35, :y 75/82} {:x 109/140, :y 75/82} {:x 11/14, :y 75/82} {:x 111/140, :y 75/82} {:x 4/5, :y 75/82} {:x 113/140, :y 75/82} {:x 57/70, :y 75/82} {:x 23/28, :y 75/82} {:x 29/35, :y 75/82} {:x 117/140, :y 75/82} {:x 117/140, :y 38/41} {:x 59/70, :y 38/41} {:x 17/20, :y 38/41} {:x 17/20, :y 77/82} {:x 6/7, :y 77/82} {:x 121/140, :y 77/82} {:x 61/70, :y 77/82} {:x 123/140, :y 77/82} {:x 31/35, :y 77/82} {:x 25/28, :y 77/82} {:x 25/28, :y 39/41} {:x 9/10, :y 39/41} {:x 127/140, :y 39/41} {:x 32/35, :y 39/41} {:x 129/140, :y 39/41} {:x 13/14, :y 39/41} {:x 131/140, :y 39/41} {:x 131/140, :y 79/82} {:x 33/35, :y 79/82} {:x 33/35, :y 40/41} {:x 19/20, :y 40/41} {:x 67/70, :y 40/41} {:x 67/70, :y 81/82} {:x 27/28, :y 81/82} {:x 34/35, :y 81/82} {:x 137/140, :y 81/82} {:x 137/140, :y 1} {:x 69/70, :y 1} {:x 139/140, :y 1} {:x 1, :y 1})} {:name \"0502fc6a-c9c5-4064-bd08-84a227b4178d\", :values ({:x 1/139, :y 0} {:x 2/139, :y 0} {:x 2/139, :y 1/83} {:x 3/139, :y 1/83} {:x 3/139, :y 2/83} {:x 3/139, :y 3/83} {:x 3/139, :y 4/83} {:x 3/139, :y 5/83} {:x 3/139, :y 6/83} {:x 3/139, :y 7/83} {:x 3/139, :y 8/83} {:x 3/139, :y 9/83} {:x 3/139, :y 10/83} {:x 4/139, :y 10/83} {:x 4/139, :y 11/83} {:x 4/139, :y 12/83} {:x 4/139, :y 13/83} {:x 5/139, :y 13/83} {:x 5/139, :y 14/83} {:x 6/139, :y 14/83} {:x 6/139, :y 15/83} {:x 6/139, :y 16/83} {:x 6/139, :y 17/83} {:x 7/139, :y 17/83} {:x 7/139, :y 18/83} {:x 7/139, :y 19/83} {:x 7/139, :y 20/83} {:x 7/139, :y 21/83} {:x 7/139, :y 22/83} {:x 8/139, :y 22/83} {:x 8/139, :y 23/83} {:x 8/139, :y 24/83} {:x 8/139, :y 25/83} {:x 9/139, :y 25/83} {:x 9/139, :y 26/83} {:x 9/139, :y 27/83} {:x 9/139, :y 28/83} {:x 9/139, :y 29/83} {:x 9/139, :y 30/83} {:x 9/139, :y 31/83} {:x 9/139, :y 32/83} {:x 9/139, :y 33/83} {:x 9/139, :y 34/83} {:x 9/139, :y 35/83} {:x 9/139, :y 36/83} {:x 9/139, :y 37/83} {:x 9/139, :y 38/83} {:x 9/139, :y 39/83} {:x 9/139, :y 40/83} {:x 9/139, :y 41/83} {:x 9/139, :y 42/83} {:x 10/139, :y 42/83} {:x 10/139, :y 43/83} {:x 11/139, :y 43/83} {:x 12/139, :y 43/83} {:x 12/139, :y 44/83} {:x 12/139, :y 45/83} {:x 13/139, :y 45/83} {:x 13/139, :y 46/83} {:x 14/139, :y 46/83} {:x 15/139, :y 46/83} {:x 16/139, :y 46/83} {:x 16/139, :y 47/83} {:x 16/139, :y 48/83} {:x 16/139, :y 49/83} {:x 17/139, :y 49/83} {:x 18/139, :y 49/83} {:x 19/139, :y 49/83} {:x 20/139, :y 49/83} {:x 21/139, :y 49/83} {:x 22/139, :y 49/83} {:x 23/139, :y 49/83} {:x 23/139, :y 50/83} {:x 24/139, :y 50/83} {:x 24/139, :y 51/83} {:x 24/139, :y 52/83} {:x 24/139, :y 53/83} {:x 24/139, :y 54/83} {:x 24/139, :y 55/83} {:x 25/139, :y 55/83} {:x 26/139, :y 55/83} {:x 26/139, :y 56/83} {:x 27/139, :y 56/83} {:x 27/139, :y 57/83} {:x 28/139, :y 57/83} {:x 29/139, :y 57/83} {:x 30/139, :y 57/83} {:x 31/139, :y 57/83} {:x 32/139, :y 57/83} {:x 33/139, :y 57/83} {:x 34/139, :y 57/83} {:x 35/139, :y 57/83} {:x 36/139, :y 57/83} {:x 37/139, :y 57/83} {:x 38/139, :y 57/83} {:x 38/139, :y 58/83} {:x 39/139, :y 58/83} {:x 40/139, :y 58/83} {:x 41/139, :y 58/83} {:x 42/139, :y 58/83} {:x 43/139, :y 58/83} {:x 44/139, :y 58/83} {:x 44/139, :y 59/83} {:x 44/139, :y 60/83} {:x 44/139, :y 61/83} {:x 45/139, :y 61/83} {:x 46/139, :y 61/83} {:x 47/139, :y 61/83} {:x 47/139, :y 62/83} {:x 48/139, :y 62/83} {:x 48/139, :y 63/83} {:x 49/139, :y 63/83} {:x 50/139, :y 63/83} {:x 50/139, :y 64/83} {:x 50/139, :y 65/83} {:x 50/139, :y 66/83} {:x 50/139, :y 67/83} {:x 51/139, :y 67/83} {:x 52/139, :y 67/83} {:x 53/139, :y 67/83} {:x 54/139, :y 67/83} {:x 55/139, :y 67/83} {:x 55/139, :y 68/83} {:x 55/139, :y 69/83} {:x 56/139, :y 69/83} {:x 56/139, :y 70/83} {:x 57/139, :y 70/83} {:x 58/139, :y 70/83} {:x 58/139, :y 71/83} {:x 59/139, :y 71/83} {:x 60/139, :y 71/83} {:x 61/139, :y 71/83} {:x 62/139, :y 71/83} {:x 63/139, :y 71/83} {:x 64/139, :y 71/83} {:x 65/139, :y 71/83} {:x 66/139, :y 71/83} {:x 67/139, :y 71/83} {:x 68/139, :y 71/83} {:x 69/139, :y 71/83} {:x 70/139, :y 71/83} {:x 71/139, :y 71/83} {:x 72/139, :y 71/83} {:x 73/139, :y 71/83} {:x 74/139, :y 71/83} {:x 75/139, :y 71/83} {:x 76/139, :y 71/83} {:x 77/139, :y 71/83} {:x 78/139, :y 71/83} {:x 79/139, :y 71/83} {:x 80/139, :y 71/83} {:x 81/139, :y 71/83} {:x 81/139, :y 72/83} {:x 82/139, :y 72/83} {:x 82/139, :y 73/83} {:x 83/139, :y 73/83} {:x 84/139, :y 73/83} {:x 85/139, :y 73/83} {:x 86/139, :y 73/83} {:x 87/139, :y 73/83} {:x 88/139, :y 73/83} {:x 89/139, :y 73/83} {:x 90/139, :y 73/83} {:x 91/139, :y 73/83} {:x 92/139, :y 73/83} {:x 93/139, :y 73/83} {:x 94/139, :y 73/83} {:x 95/139, :y 73/83} {:x 96/139, :y 73/83} {:x 97/139, :y 73/83} {:x 98/139, :y 73/83} {:x 98/139, :y 74/83} {:x 99/139, :y 74/83} {:x 100/139, :y 74/83} {:x 100/139, :y 75/83} {:x 101/139, :y 75/83} {:x 102/139, :y 75/83} {:x 103/139, :y 75/83} {:x 104/139, :y 75/83} {:x 105/139, :y 75/83} {:x 105/139, :y 76/83} {:x 105/139, :y 77/83} {:x 106/139, :y 77/83} {:x 107/139, :y 77/83} {:x 108/139, :y 77/83} {:x 109/139, :y 77/83} {:x 110/139, :y 77/83} {:x 111/139, :y 77/83} {:x 112/139, :y 77/83} {:x 113/139, :y 77/83} {:x 114/139, :y 77/83} {:x 115/139, :y 77/83} {:x 116/139, :y 77/83} {:x 117/139, :y 77/83} {:x 118/139, :y 77/83} {:x 118/139, :y 78/83} {:x 119/139, :y 78/83} {:x 119/139, :y 79/83} {:x 120/139, :y 79/83} {:x 120/139, :y 80/83} {:x 121/139, :y 80/83} {:x 122/139, :y 80/83} {:x 122/139, :y 81/83} {:x 123/139, :y 81/83} {:x 124/139, :y 81/83} {:x 125/139, :y 81/83} {:x 126/139, :y 81/83} {:x 127/139, :y 81/83} {:x 128/139, :y 81/83} {:x 129/139, :y 81/83} {:x 130/139, :y 81/83} {:x 130/139, :y 82/83} {:x 131/139, :y 82/83} {:x 132/139, :y 82/83} {:x 133/139, :y 82/83} {:x 134/139, :y 82/83} {:x 135/139, :y 82/83} {:x 135/139, :y 1} {:x 136/139, :y 1} {:x 137/139, :y 1} {:x 138/139, :y 1} {:x 1, :y 1})} {:name \"a3035d60-74ea-49f9-93b1-f358ec1ada53\", :values ({:x 1/129, :y 0} {:x 1/129, :y 1/93} {:x 2/129, :y 1/93} {:x 1/43, :y 1/93} {:x 1/43, :y 2/93} {:x 1/43, :y 1/31} {:x 1/43, :y 4/93} {:x 1/43, :y 5/93} {:x 1/43, :y 2/31} {:x 4/129, :y 2/31} {:x 5/129, :y 2/31} {:x 5/129, :y 7/93} {:x 2/43, :y 7/93} {:x 7/129, :y 7/93} {:x 7/129, :y 8/93} {:x 7/129, :y 3/31} {:x 8/129, :y 3/31} {:x 3/43, :y 3/31} {:x 10/129, :y 3/31} {:x 10/129, :y 10/93} {:x 11/129, :y 10/93} {:x 4/43, :y 10/93} {:x 4/43, :y 11/93} {:x 13/129, :y 11/93} {:x 14/129, :y 11/93} {:x 14/129, :y 4/31} {:x 5/43, :y 4/31} {:x 5/43, :y 13/93} {:x 5/43, :y 14/93} {:x 5/43, :y 5/31} {:x 5/43, :y 16/93} {:x 5/43, :y 17/93} {:x 5/43, :y 6/31} {:x 5/43, :y 19/93} {:x 5/43, :y 20/93} {:x 16/129, :y 20/93} {:x 16/129, :y 7/31} {:x 16/129, :y 22/93} {:x 16/129, :y 23/93} {:x 16/129, :y 8/31} {:x 16/129, :y 25/93} {:x 16/129, :y 26/93} {:x 16/129, :y 9/31} {:x 16/129, :y 28/93} {:x 16/129, :y 29/93} {:x 16/129, :y 10/31} {:x 17/129, :y 10/31} {:x 6/43, :y 10/31} {:x 19/129, :y 10/31} {:x 19/129, :y 1/3} {:x 19/129, :y 32/93} {:x 19/129, :y 11/31} {:x 19/129, :y 34/93} {:x 19/129, :y 35/93} {:x 20/129, :y 35/93} {:x 20/129, :y 12/31} {:x 7/43, :y 12/31} {:x 7/43, :y 37/93} {:x 22/129, :y 37/93} {:x 23/129, :y 37/93} {:x 8/43, :y 37/93} {:x 8/43, :y 38/93} {:x 25/129, :y 38/93} {:x 25/129, :y 13/31} {:x 25/129, :y 40/93} {:x 26/129, :y 40/93} {:x 9/43, :y 40/93} {:x 28/129, :y 40/93} {:x 29/129, :y 40/93} {:x 10/43, :y 40/93} {:x 31/129, :y 40/93} {:x 32/129, :y 40/93} {:x 11/43, :y 40/93} {:x 34/129, :y 40/93} {:x 35/129, :y 40/93} {:x 12/43, :y 40/93} {:x 37/129, :y 40/93} {:x 38/129, :y 40/93} {:x 13/43, :y 40/93} {:x 40/129, :y 40/93} {:x 41/129, :y 40/93} {:x 41/129, :y 41/93} {:x 41/129, :y 14/31} {:x 41/129, :y 43/93} {:x 41/129, :y 44/93} {:x 41/129, :y 15/31} {:x 41/129, :y 46/93} {:x 41/129, :y 47/93} {:x 14/43, :y 47/93} {:x 14/43, :y 16/31} {:x 14/43, :y 49/93} {:x 14/43, :y 50/93} {:x 14/43, :y 17/31} {:x 14/43, :y 52/93} {:x 1/3, :y 52/93} {:x 1/3, :y 53/93} {:x 1/3, :y 18/31} {:x 1/3, :y 55/93} {:x 1/3, :y 56/93} {:x 44/129, :y 56/93} {:x 44/129, :y 19/31} {:x 15/43, :y 19/31} {:x 46/129, :y 19/31} {:x 47/129, :y 19/31} {:x 16/43, :y 19/31} {:x 49/129, :y 19/31} {:x 50/129, :y 19/31} {:x 17/43, :y 19/31} {:x 52/129, :y 19/31} {:x 53/129, :y 19/31} {:x 18/43, :y 19/31} {:x 55/129, :y 19/31} {:x 56/129, :y 19/31} {:x 56/129, :y 58/93} {:x 19/43, :y 58/93} {:x 19/43, :y 59/93} {:x 58/129, :y 59/93} {:x 59/129, :y 59/93} {:x 59/129, :y 20/31} {:x 20/43, :y 20/31} {:x 61/129, :y 20/31} {:x 62/129, :y 20/31} {:x 21/43, :y 20/31} {:x 64/129, :y 20/31} {:x 65/129, :y 20/31} {:x 22/43, :y 20/31} {:x 67/129, :y 20/31} {:x 68/129, :y 20/31} {:x 23/43, :y 20/31} {:x 70/129, :y 20/31} {:x 71/129, :y 20/31} {:x 24/43, :y 20/31} {:x 73/129, :y 20/31} {:x 74/129, :y 20/31} {:x 25/43, :y 20/31} {:x 76/129, :y 20/31} {:x 77/129, :y 20/31} {:x 26/43, :y 20/31} {:x 79/129, :y 20/31} {:x 80/129, :y 20/31} {:x 27/43, :y 20/31} {:x 82/129, :y 20/31} {:x 82/129, :y 61/93} {:x 83/129, :y 61/93} {:x 28/43, :y 61/93} {:x 85/129, :y 61/93} {:x 2/3, :y 61/93} {:x 29/43, :y 61/93} {:x 88/129, :y 61/93} {:x 89/129, :y 61/93} {:x 30/43, :y 61/93} {:x 91/129, :y 61/93} {:x 91/129, :y 2/3} {:x 92/129, :y 2/3} {:x 31/43, :y 2/3} {:x 94/129, :y 2/3} {:x 95/129, :y 2/3} {:x 32/43, :y 2/3} {:x 97/129, :y 2/3} {:x 98/129, :y 2/3} {:x 33/43, :y 2/3} {:x 33/43, :y 21/31} {:x 100/129, :y 21/31} {:x 101/129, :y 21/31} {:x 34/43, :y 21/31} {:x 103/129, :y 21/31} {:x 104/129, :y 21/31} {:x 35/43, :y 21/31} {:x 35/43, :y 64/93} {:x 35/43, :y 65/93} {:x 106/129, :y 65/93} {:x 107/129, :y 65/93} {:x 107/129, :y 22/31} {:x 107/129, :y 67/93} {:x 36/43, :y 67/93} {:x 36/43, :y 68/93} {:x 36/43, :y 23/31} {:x 109/129, :y 23/31} {:x 110/129, :y 23/31} {:x 110/129, :y 70/93} {:x 110/129, :y 71/93} {:x 110/129, :y 24/31} {:x 110/129, :y 73/93} {:x 110/129, :y 74/93} {:x 110/129, :y 25/31} {:x 110/129, :y 76/93} {:x 110/129, :y 77/93} {:x 110/129, :y 26/31} {:x 37/43, :y 26/31} {:x 37/43, :y 79/93} {:x 37/43, :y 80/93} {:x 112/129, :y 80/93} {:x 112/129, :y 27/31} {:x 112/129, :y 82/93} {:x 112/129, :y 83/93} {:x 112/129, :y 28/31} {:x 113/129, :y 28/31} {:x 38/43, :y 28/31} {:x 38/43, :y 85/93} {:x 115/129, :y 85/93} {:x 115/129, :y 86/93} {:x 115/129, :y 29/31} {:x 116/129, :y 29/31} {:x 39/43, :y 29/31} {:x 118/129, :y 29/31} {:x 118/129, :y 88/93} {:x 119/129, :y 88/93} {:x 40/43, :y 88/93} {:x 121/129, :y 88/93} {:x 122/129, :y 88/93} {:x 122/129, :y 89/93} {:x 41/43, :y 89/93} {:x 124/129, :y 89/93} {:x 124/129, :y 30/31} {:x 125/129, :y 30/31} {:x 42/43, :y 30/31} {:x 42/43, :y 91/93} {:x 127/129, :y 91/93} {:x 127/129, :y 92/93} {:x 128/129, :y 92/93} {:x 128/129, :y 1} {:x 1, :y 1})} {:name \"89cb3c24-8a7d-4f91-836f-a483e2deeb89\", :values ({:x 1/140, :y 0} {:x 1/140, :y 1/82} {:x 1/140, :y 1/41} {:x 1/140, :y 3/82} {:x 1/140, :y 2/41} {:x 1/70, :y 2/41} {:x 3/140, :y 2/41} {:x 1/35, :y 2/41} {:x 1/35, :y 5/82} {:x 1/28, :y 5/82} {:x 1/28, :y 3/41} {:x 1/28, :y 7/82} {:x 3/70, :y 7/82} {:x 1/20, :y 7/82} {:x 1/20, :y 4/41} {:x 1/20, :y 9/82} {:x 1/20, :y 5/41} {:x 1/20, :y 11/82} {:x 1/20, :y 6/41} {:x 2/35, :y 6/41} {:x 9/140, :y 6/41} {:x 1/14, :y 6/41} {:x 1/14, :y 13/82} {:x 1/14, :y 7/41} {:x 1/14, :y 15/82} {:x 11/140, :y 15/82} {:x 11/140, :y 8/41} {:x 3/35, :y 8/41} {:x 13/140, :y 8/41} {:x 13/140, :y 17/82} {:x 13/140, :y 9/41} {:x 13/140, :y 19/82} {:x 13/140, :y 10/41} {:x 13/140, :y 21/82} {:x 13/140, :y 11/41} {:x 13/140, :y 23/82} {:x 13/140, :y 12/41} {:x 13/140, :y 25/82} {:x 13/140, :y 13/41} {:x 13/140, :y 27/82} {:x 13/140, :y 14/41} {:x 13/140, :y 29/82} {:x 13/140, :y 15/41} {:x 1/10, :y 15/41} {:x 3/28, :y 15/41} {:x 3/28, :y 31/82} {:x 3/28, :y 16/41} {:x 3/28, :y 33/82} {:x 3/28, :y 17/41} {:x 3/28, :y 35/82} {:x 3/28, :y 18/41} {:x 3/28, :y 37/82} {:x 3/28, :y 19/41} {:x 3/28, :y 39/82} {:x 3/28, :y 20/41} {:x 3/28, :y 1/2} {:x 3/28, :y 21/41} {:x 3/28, :y 43/82} {:x 3/28, :y 22/41} {:x 3/28, :y 45/82} {:x 3/28, :y 23/41} {:x 4/35, :y 23/41} {:x 17/140, :y 23/41} {:x 9/70, :y 23/41} {:x 19/140, :y 23/41} {:x 19/140, :y 47/82} {:x 1/7, :y 47/82} {:x 3/20, :y 47/82} {:x 11/70, :y 47/82} {:x 11/70, :y 24/41} {:x 23/140, :y 24/41} {:x 23/140, :y 49/82} {:x 6/35, :y 49/82} {:x 5/28, :y 49/82} {:x 5/28, :y 25/41} {:x 13/70, :y 25/41} {:x 13/70, :y 51/82} {:x 27/140, :y 51/82} {:x 27/140, :y 26/41} {:x 1/5, :y 26/41} {:x 29/140, :y 26/41} {:x 29/140, :y 53/82} {:x 3/14, :y 53/82} {:x 31/140, :y 53/82} {:x 31/140, :y 27/41} {:x 8/35, :y 27/41} {:x 33/140, :y 27/41} {:x 17/70, :y 27/41} {:x 1/4, :y 27/41} {:x 1/4, :y 55/82} {:x 9/35, :y 55/82} {:x 37/140, :y 55/82} {:x 19/70, :y 55/82} {:x 39/140, :y 55/82} {:x 2/7, :y 55/82} {:x 2/7, :y 28/41} {:x 41/140, :y 28/41} {:x 41/140, :y 57/82} {:x 3/10, :y 57/82} {:x 3/10, :y 29/41} {:x 3/10, :y 59/82} {:x 3/10, :y 30/41} {:x 3/10, :y 61/82} {:x 43/140, :y 61/82} {:x 43/140, :y 31/41} {:x 11/35, :y 31/41} {:x 9/28, :y 31/41} {:x 9/28, :y 63/82} {:x 23/70, :y 63/82} {:x 47/140, :y 63/82} {:x 12/35, :y 63/82} {:x 7/20, :y 63/82} {:x 7/20, :y 32/41} {:x 7/20, :y 65/82} {:x 7/20, :y 33/41} {:x 7/20, :y 67/82} {:x 7/20, :y 34/41} {:x 7/20, :y 69/82} {:x 5/14, :y 69/82} {:x 51/140, :y 69/82} {:x 51/140, :y 35/41} {:x 51/140, :y 71/82} {:x 13/35, :y 71/82} {:x 53/140, :y 71/82} {:x 27/70, :y 71/82} {:x 11/28, :y 71/82} {:x 11/28, :y 36/41} {:x 11/28, :y 73/82} {:x 2/5, :y 73/82} {:x 57/140, :y 73/82} {:x 29/70, :y 73/82} {:x 59/140, :y 73/82} {:x 3/7, :y 73/82} {:x 61/140, :y 73/82} {:x 31/70, :y 73/82} {:x 9/20, :y 73/82} {:x 16/35, :y 73/82} {:x 13/28, :y 73/82} {:x 13/28, :y 37/41} {:x 33/70, :y 37/41} {:x 67/140, :y 37/41} {:x 17/35, :y 37/41} {:x 69/140, :y 37/41} {:x 1/2, :y 37/41} {:x 71/140, :y 37/41} {:x 18/35, :y 37/41} {:x 73/140, :y 37/41} {:x 37/70, :y 37/41} {:x 15/28, :y 37/41} {:x 19/35, :y 37/41} {:x 11/20, :y 37/41} {:x 39/70, :y 37/41} {:x 79/140, :y 37/41} {:x 4/7, :y 37/41} {:x 81/140, :y 37/41} {:x 41/70, :y 37/41} {:x 83/140, :y 37/41} {:x 83/140, :y 75/82} {:x 3/5, :y 75/82} {:x 17/28, :y 75/82} {:x 43/70, :y 75/82} {:x 87/140, :y 75/82} {:x 22/35, :y 75/82} {:x 22/35, :y 38/41} {:x 89/140, :y 38/41} {:x 9/14, :y 38/41} {:x 9/14, :y 77/82} {:x 13/20, :y 77/82} {:x 23/35, :y 77/82} {:x 93/140, :y 77/82} {:x 47/70, :y 77/82} {:x 19/28, :y 77/82} {:x 24/35, :y 77/82} {:x 97/140, :y 77/82} {:x 7/10, :y 77/82} {:x 99/140, :y 77/82} {:x 5/7, :y 77/82} {:x 101/140, :y 77/82} {:x 51/70, :y 77/82} {:x 103/140, :y 77/82} {:x 26/35, :y 77/82} {:x 3/4, :y 77/82} {:x 53/70, :y 77/82} {:x 107/140, :y 77/82} {:x 27/35, :y 77/82} {:x 109/140, :y 77/82} {:x 11/14, :y 77/82} {:x 111/140, :y 77/82} {:x 4/5, :y 77/82} {:x 113/140, :y 77/82} {:x 57/70, :y 77/82} {:x 23/28, :y 77/82} {:x 29/35, :y 77/82} {:x 117/140, :y 77/82} {:x 59/70, :y 77/82} {:x 17/20, :y 77/82} {:x 6/7, :y 77/82} {:x 121/140, :y 77/82} {:x 61/70, :y 77/82} {:x 123/140, :y 77/82} {:x 123/140, :y 39/41} {:x 31/35, :y 39/41} {:x 25/28, :y 39/41} {:x 9/10, :y 39/41} {:x 127/140, :y 39/41} {:x 127/140, :y 79/82} {:x 32/35, :y 79/82} {:x 129/140, :y 79/82} {:x 13/14, :y 79/82} {:x 131/140, :y 79/82} {:x 33/35, :y 79/82} {:x 33/35, :y 40/41} {:x 19/20, :y 40/41} {:x 67/70, :y 40/41} {:x 27/28, :y 40/41} {:x 34/35, :y 40/41} {:x 137/140, :y 40/41} {:x 69/70, :y 40/41} {:x 69/70, :y 81/82} {:x 139/140, :y 81/82} {:x 1, :y 81/82} {:x 1, :y 1})}), :marks ({:type \"line\", :from {:data \"3727b48d-eeb7-4450-b28a-760c1c221378\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :stroke {:value \"red\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}} {:type \"line\", :from {:data \"0502fc6a-c9c5-4064-bd08-84a227b4178d\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :stroke {:value \"green\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}} {:type \"line\", :from {:data \"a3035d60-74ea-49f9-93b1-f358ec1ada53\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :stroke {:value \"blue\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}} {:type \"line\", :from {:data \"89cb3c24-8a7d-4f91-836f-a483e2deeb89\"}, :properties {:enter {:x {:scale \"x\", :field \"data.x\"}, :y {:scale \"y\", :field \"data.y\"}, :stroke {:value \"black\"}, :strokeWidth {:value 2}, :strokeOpacity {:value 1}}}})}}"} 127 | ;; <= 128 | 129 | ;; ** 130 | ;;; Generate predictions using the model. Optionally, print out the predictions in a submission-ready format. 131 | ;; ** 132 | 133 | ;; @@ 134 | ;(def predictions (regression-predict model (map rest test))) 135 | 136 | ;(println "PassengerId,Survived") 137 | ;(doseq [[id survival] (map (fn [t p] [(first t) p]) test predictions)] 138 | ; (let [survival (if (> survival threshold) 1 0)] 139 | ; (println (str id "," survival)))) 140 | ;; @@ 141 | ;; => 142 | ;;; {"type":"html","content":"#'lambda-ml.examples.kaggle.titanic.worksheet/predictions","value":"#'lambda-ml.examples.kaggle.titanic.worksheet/predictions"} 143 | ;; <= 144 | 145 | ;; @@ 146 | 147 | ;; @@ 148 | -------------------------------------------------------------------------------- /src/lambda_ml/factorization.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.factorization 2 | "Unsupervised learning with non-negative matrix factorization. 3 | 4 | Example usage: 5 | ``` 6 | (def data [[1 2 3] [4 5 6]]) 7 | (let [dims 2] 8 | (-> (factorizations data dims) 9 | (nth 300) 10 | ((fn [x] (map #(mapv vec %) x))))) 11 | ;;=> ([[0.20900693256125408 0.2000948450048419] 12 | ;;=> [0.8547267961216941 0.32426625588317753]] 13 | ;;=> [[4.601094573778913 3.4274218917618486 2.1966425686791777] 14 | ;;=> [0.20523936453382804 6.391048036139935 12.709895897835892]]) 15 | ```" 16 | (:require [clojure.core.matrix :as m])) 17 | 18 | (m/set-current-implementation :vectorz) 19 | 20 | (defn init-factors 21 | [rows cols] 22 | (m/matrix (repeatedly rows #(repeatedly cols rand)))) 23 | 24 | (defn cost 25 | [a b] 26 | (m/esum (m/pow (m/sub a b) 2))) 27 | 28 | (defn factorizations 29 | "Returns a lazy seq of factorizations of the input matrix v. For an m-by-n 30 | input matrix, each factorization is a pair of latent matrices with dimensions 31 | m-by-dims and dims-by-n." 32 | ([v dims] 33 | (factorizations (m/matrix v) 34 | (init-factors (m/row-count v) dims) 35 | (init-factors dims (m/column-count v)))) 36 | ([v w h] 37 | (lazy-seq (let [h (m/emul h (m/div (m/mmul (m/transpose w) v) 38 | (m/mmul (m/transpose w) w h))) 39 | ;; Note that `h` is updated before `w` 40 | w (m/emul w (m/div (m/mmul v (m/transpose h)) 41 | (m/mmul w h (m/transpose h))))] 42 | (cons [w h] (factorizations v w h)))))) 43 | -------------------------------------------------------------------------------- /src/lambda_ml/metrics.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.metrics 2 | "Functions that compute measures of cost or gain.") 3 | 4 | (defn auc 5 | "Returns the area under the curve of a given collection of points, using the 6 | trapezoidal rule." 7 | [points] 8 | (loop [area 0 9 | [x0 y0] (first points) 10 | points (rest points)] 11 | (if (empty? points) 12 | area 13 | (let [[x1 y1] (first points) 14 | dx (- x1 x0)] 15 | (recur (+ area (* dx (/ (+ y1 y0) 2))) 16 | [x1 y1] 17 | (rest points)))))) 18 | 19 | (defn roc-curve 20 | "Returns a sequence of [false positive rate, true positive rate] tuples that 21 | represent the ROC curve of a classifier." 22 | [labels predictions] 23 | (let [p (reduce + (filter (fn [x] (= x 1)) labels)) 24 | n (- (count labels) p) 25 | ranked (->> (map vector labels predictions) 26 | (sort-by second) 27 | (map first) 28 | (reverse))] 29 | (loop [ys ranked 30 | fp 0 31 | tp 0 32 | points []] 33 | (if (empty? ys) 34 | points 35 | (let [fp (if (= (first ys) 0) (inc fp) fp) 36 | tp (if (= (first ys) 1) (inc tp) tp) 37 | fpr (/ fp n) 38 | tpr (/ tp p)] 39 | (recur (rest ys) fp tp (conj points [fpr tpr]))))))) 40 | -------------------------------------------------------------------------------- /src/lambda_ml/naive_bayes.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.naive-bayes 2 | "Naive Bayes probabilistic model learning. 3 | 4 | Example usage: 5 | ``` 6 | (def data [[6.0 180 12 :male] [5.92 190 11 :male] [5.58 170 12 :male] 7 | [5.92 165 10 :male] [5.0 100 6 :female] [5.5 150 8 :female] 8 | [5.42 130 7 :female] [5.75 150 9 :female]]) 9 | (def fit 10 | (-> (make-naive-bayes) 11 | (naive-bayes-fit data))) 12 | (naive-bayes-predict fit [[6.0 130 8]]) 13 | ;;=> (:female) 14 | ```" 15 | (require [clojure.math.numeric-tower :refer :all])) 16 | 17 | (defn gaussian 18 | [x mean var] 19 | (* (/ 1 (sqrt (* 2 Math/PI var))) 20 | (expt Math/E (- (/ (expt (- x mean) 2) (* 2 var)))))) 21 | 22 | (defn posterior 23 | [distributions xi label] 24 | (if-not (vector? xi) 25 | (posterior distributions (vec xi) label) 26 | (loop [index 0 27 | prob (/ 1 (count distributions))] 28 | (if (>= index (count xi)) 29 | prob 30 | (let [[mean var] (get-in distributions [label index])] 31 | (recur (inc index) 32 | (* prob (gaussian (nth xi index) mean var)))))))) 33 | 34 | (defn naive-bayes-fit 35 | "Returns a naive Bayes model fit to the given training data." 36 | ([model data] 37 | (naive-bayes-fit model (map butlast data) (map last data))) 38 | ([model x y] 39 | (cond 40 | (not-every? vector? x) (naive-bayes-fit model (map vec x) y) 41 | (not (vector? y)) (naive-bayes-fit model x (vec y)) 42 | :else 43 | (let [n (count (first x))] 44 | (loop [index 0 45 | labels (distinct y) 46 | m {}] 47 | (cond (empty? labels) (assoc model :distributions m) 48 | (>= index n) (recur 0 (rest labels) m) 49 | :else 50 | (let [label (first labels) 51 | ;; Feature values for examples with the current label 52 | vals (->> (map #(nth % index) x) 53 | (keep-indexed (fn [i xi] (when (= (nth y i) label) xi)))) 54 | mean (/ (apply + vals) (count vals)) 55 | ;; Unbiased sample variance 56 | var (/ (apply + (map #(expt (- % mean) 2) vals)) (dec (count vals)))] 57 | (recur (inc index) 58 | labels 59 | (assoc-in m [label index] [mean var]))))))))) 60 | 61 | (defn naive-bayes-predict 62 | "Predicts the values of example data using a naive Bayes model." 63 | [model x] 64 | (let [{distributions :distributions} model 65 | labels (keys distributions)] 66 | (map (fn [xi] (apply max-key #(posterior distributions xi %) labels)) x))) 67 | 68 | (defn make-naive-bayes 69 | "Returns a naive Bayes model." 70 | [] 71 | {}) 72 | -------------------------------------------------------------------------------- /src/lambda_ml/nearest_neighbors.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.nearest-neighbors 2 | "Classification and regression using the k-nearest neighbors algorithm. 3 | 4 | Example usage: 5 | ``` 6 | (def data [[2 3] [5 4] [9 6] [4 7] [8 1] [7 2]]) 7 | (def fit 8 | (let [k 1] 9 | (-> (make-nearest-neighbors-regressor k lambda-ml.distance/euclidean) 10 | (nearest-neighbors-fit data)))) 11 | (nearest-neighbors-predict fit (map butlast data)) 12 | ```" 13 | (:require [lambda-ml.core :refer :all] 14 | [lambda-ml.data.binary-tree :as bt] 15 | [lambda-ml.data.kd-tree :as kd])) 16 | 17 | (defn make-item 18 | [value priority] 19 | (vector priority value)) 20 | 21 | (defn item-priority 22 | [item] 23 | (nth item 0)) 24 | 25 | (defn item-value 26 | [item] 27 | (nth item 1)) 28 | 29 | (defn insert 30 | [v value priority bound] 31 | (let [full (>= (count v) bound)] 32 | (cond 33 | ;; Empty vector 34 | (empty? v) 35 | (vector (make-item value priority)) 36 | ;; Full vector and item priority is too high 37 | (and full (>= priority (item-priority (peek v)))) 38 | v 39 | :else 40 | ;; Find position and insert item 41 | (let [index (loop [lo 0 42 | hi (count v)] 43 | (if (>= lo hi) 44 | lo 45 | (let [mid (quot (+ lo hi) 2)] 46 | (if (< priority (item-priority (nth v mid))) 47 | (recur lo mid) 48 | (recur (+ mid 1) hi))))) 49 | item (make-item value priority) 50 | end (if full (dec (count v)) (count v))] 51 | (apply conj 52 | (subvec v 0 index) 53 | item 54 | (subvec v index end)))))) 55 | 56 | (defn make-nearest-neighbor-search 57 | "Given a distance function f and a coll of items, each of which have an 58 | associated dimensional point, returns a function that, given k and a query 59 | item, returns a priority queue of the k nearest neighboring items. Optionally, 60 | a function g can be supplied and used to return the dimensional point for an 61 | item. Otherwise, the item itself is assumed to be the point. Assumes that all 62 | points are represented as sequences of the same dimension." 63 | ([f items] 64 | (make-nearest-neighbor-search f identity items)) 65 | ([f g items] 66 | (let [dims (count (g (first items))) 67 | t (kd/make-tree dims items g)] 68 | (fn knn 69 | ([k query] 70 | (knn k query t 0 (vector))) 71 | ([k query tree depth cand] 72 | (if (nil? tree) 73 | cand 74 | (let [[node left right] ((juxt bt/get-value bt/get-left bt/get-right) tree) 75 | dim (mod depth dims) 76 | node-point (g node) 77 | query-point (g query) 78 | ;; Determine near and far branches 79 | [near far] (if (<= (nth query-point dim) (nth node-point dim)) [left right] [right left]) 80 | cand (->> 81 | ;; Try to add current node to candidates 82 | (insert cand node (f query-point node-point) k) 83 | ;; Explore near branch 84 | (knn k query near (inc depth)))] 85 | ;; Optionally, explore far branch 86 | (if (or (< (count cand) k) 87 | (< (f query-point node-point dim) 88 | (item-priority (peek cand)))) 89 | (knn k query far (inc depth) cand) 90 | cand)))))))) 91 | 92 | (defn nearest-neighbors-fit 93 | "Fits a k-nearest neighbors model to the given training data." 94 | ([model data] 95 | (assoc model :lookup (make-nearest-neighbor-search (:dist model) butlast data))) 96 | ([model x y] 97 | (nearest-neighbors-fit model (map concat x (map list y))))) 98 | 99 | (defn nearest-neighbors-predict 100 | "Predicts the values of example data using a k-nearest neighbors model." 101 | [model x] 102 | (let [{k :k lookup :lookup agg :aggregation} model] 103 | (when (not (nil? lookup)) 104 | ;; Append dummy coordinate value to account for assumption of target 105 | ;; values in last position in training data examples 106 | (->> (map #(conj (vec %) nil) x) 107 | (map #(lookup k %)) 108 | (map #(map (comp last item-value) %)) 109 | (map agg))))) 110 | 111 | (defn make-nearest-neighbors-classifier 112 | "Returns a k-nearest neighbor classification model using the given distance 113 | function." 114 | [k dist] 115 | {:k k 116 | :dist dist 117 | :aggregation mode}) 118 | 119 | (defn make-nearest-neighbors-regressor 120 | "Returns a k-nearest neighbor regression model using the given distance 121 | function." 122 | [k dist] 123 | {:k k 124 | :dist dist 125 | :aggregation mean}) 126 | -------------------------------------------------------------------------------- /src/lambda_ml/neural_network.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.neural-network 2 | "Multilayer perceptron neural network learning using backpropagation. 3 | 4 | Example usage: 5 | ``` 6 | (def data [[0 0 [0]] [0 1 [1]] [1 0 [1]] [1 1 [0]]]) 7 | (def fit 8 | (let [alpha 0.5 9 | lambda 0.001 10 | model (-> (make-neural-network alpha lambda) 11 | (add-neural-network-layer 2 sigmoid) ;; input layer 12 | (add-neural-network-layer 3 sigmoid) ;; hidden layer 13 | (add-neural-network-layer 1 sigmoid))] ;; output layer 14 | (-> (iterate #(neural-network-fit % data) model) 15 | (nth 5000)))) 16 | (neural-network-predict fit (map butlast data)) 17 | ;;=> [[0.04262340225834812] [0.9582632706756758] [0.9581124103456861] [0.04103544440312673]] 18 | ```" 19 | (:require [lambda-ml.core :as c] 20 | [clojure.core.matrix :as m])) 21 | 22 | (m/set-current-implementation :vectorz) 23 | 24 | (def bias (m/matrix [1.0])) 25 | (def epsilon 0.0001) 26 | 27 | (defn drop-bias 28 | [m] 29 | (m/submatrix m 1 [1 (dec (m/column-count m))])) 30 | 31 | (defn feed-forward 32 | "Returns the activation values for nodes in a neural network after forward 33 | propagating the values of a single input example x through the network." 34 | [x theta fns] 35 | (reduce (fn [activations [weights f]] 36 | (let [inputs (if (empty? activations) (m/matrix x) (last activations)) 37 | inputs+bias (m/join bias inputs) 38 | outputs (m/emap f (m/mmul weights inputs+bias))] 39 | (conj activations outputs))) 40 | [] 41 | (map vector theta fns))) 42 | 43 | (defn feed-forward-batch 44 | "Returns the activation values for nodes in a neural network after forward 45 | propagating a collection of input examples x through the network." 46 | [x theta fns] 47 | (-> (reduce (fn [inputs [weights f]] 48 | (let [bias (m/broadcast 1.0 [1 (m/column-count inputs)]) 49 | inputs+bias (m/join bias inputs) 50 | outputs (m/emap f (m/mmul weights inputs+bias))] 51 | outputs)) 52 | (m/transpose (m/matrix x)) 53 | (map vector theta fns)) 54 | (m/transpose))) 55 | 56 | (defn back-propagate 57 | "Returns the errors of each node in a neural network after propagating the 58 | the errors at the output nodes, computed against a single target value y, 59 | backwards through the network." 60 | [y theta fns' activations output-error] 61 | (->> (map vector 62 | (reverse (rest theta)) 63 | (reverse (butlast activations)) 64 | (reverse (butlast fns'))) 65 | (reduce (fn [errors [w a f]] 66 | (cons (m/mul (m/emap f a) (m/mmul (first errors) (drop-bias w))) 67 | errors)) 68 | (list (output-error y (last activations) (last fns')))) 69 | (vec))) 70 | 71 | (defn compute-gradients 72 | "Returns the gradients for each weight given activation values and errors on 73 | a input values of a single example x." 74 | [x activations errors] 75 | (->> (map vector errors (cons (m/matrix x) (butlast activations))) 76 | (reduce (fn [gradients [e a]] 77 | (let [a (m/join bias a)] 78 | (conj gradients (m/outer-product e a)))) 79 | []))) 80 | 81 | (defn numeric-gradients 82 | "Returns the numeric approximations of the gradients for each weight given the 83 | input values of a single example x and label y. Used for debugging by checking 84 | against the computed gradients during backpropagation." 85 | [x y theta fns cost] 86 | (mapv (fn [k weights] 87 | (m/matrix (for [i (range (m/row-count weights))] 88 | (for [j (range (m/column-count weights))] 89 | (let [w (m/select weights i j) 90 | theta+ (assoc theta k (m/set-selection weights i j (+ w epsilon))) 91 | theta- (assoc theta k (m/set-selection weights i j (- w epsilon)))] 92 | (/ (- (cost (list x) (list y) theta+ fns) 93 | (cost (list x) (list y) theta- fns)) 94 | (* 2 epsilon))))))) 95 | (range) 96 | theta)) 97 | 98 | (defn regularize 99 | "Returns regularized weights." 100 | [theta alpha lambda] 101 | (map (fn [w] 102 | (-> (m/mul alpha lambda w) 103 | (m/set-column 0 (m/matrix (repeat (m/row-count w) 0))))) 104 | theta)) 105 | 106 | (defn gradient-descent-step 107 | "Performs a single gradient step on the input and target values of a single 108 | example x and label y, and returns the updated weights." 109 | [model x y theta] 110 | (let [{fns :activation-fns alpha :alpha lambda :lambda 111 | cost :cost output-error :output-error} model 112 | activations (feed-forward x theta fns) 113 | errors (back-propagate y theta (map c/derivative fns) activations output-error) 114 | gradients (compute-gradients x activations errors) 115 | regularization (regularize theta alpha lambda)] 116 | ;; Numeric gradient checking 117 | ;;(println (map (comp #(/ (m/esum %) (m/ecount %)) m/abs m/sub) gradients (numeric-gradients x y theta fns cost))) 118 | (mapv m/sub theta (map #(m/mul % alpha) gradients) regularization))) 119 | 120 | (defn gradient-descent 121 | "Performs gradient descent on input and target values of all examples x and 122 | y, and returns the updated weights." 123 | [model x y] 124 | (reduce (fn [weights [xi yi]] (gradient-descent-step model xi yi weights)) 125 | (:parameters model) 126 | (map vector x y))) 127 | 128 | (defn init-parameters 129 | [model] 130 | (let [{layers :layers seed :seed} model 131 | r (if seed (java.util.Random. seed) (java.util.Random.)) 132 | rand (fn [] (.nextGaussian r))] 133 | (->> (for [i (range (dec (count layers)))] 134 | (let [ni (inc (nth layers i)) ;; number of nodes at layer i (+ bias node) 135 | ni+1 (nth layers (inc i))] ;; number of nodes at layer i+1 136 | ;; initialize random values as parameters 137 | (vec (repeatedly ni+1 #(vec (repeatedly ni rand)))))) 138 | (mapv m/matrix)))) 139 | 140 | ;; Cost functions 141 | 142 | (defn cross-entropy-cost 143 | [x y theta fns] 144 | (let [a (feed-forward-batch x theta fns)] 145 | (/ (m/esum (m/add (m/mul y (m/log a)) 146 | (m/mul (m/sub 1 y) (m/log (m/sub 1 a))))) 147 | (- (count x))))) 148 | 149 | (defn cross-entropy-output-error 150 | [y activations f'] 151 | ;; Cross entropy error is independent of the derivative of output activation 152 | (m/sub activations y)) 153 | 154 | (defn quadratic-cost 155 | [x y theta fns] 156 | (/ (m/esum (m/square (m/sub (feed-forward-batch x theta fns) y))) 157 | 2)) 158 | 159 | (defn quadratic-output-error 160 | [y activations f'] 161 | (m/mul (m/sub activations y) (m/emap f' activations))) 162 | 163 | ;; API 164 | 165 | (defn neural-network-fit 166 | "Trains a neural network model for the given training data. For new models, 167 | parameters are initialized as random values from a normal distribution." 168 | ([model data] 169 | (neural-network-fit model (map (comp vec butlast) data) (map (comp vec last) data))) 170 | ([model x y] 171 | (let [{theta :parameters} model 172 | model (-> model 173 | (assoc :parameters (or theta (init-parameters model))))] 174 | (assoc model :parameters (gradient-descent model x y))))) 175 | 176 | (defn neural-network-predict 177 | "Predicts the values of example data using a neural network model." 178 | [model x] 179 | (let [{theta :parameters fns :activation-fns} model] 180 | (when (not (nil? theta)) 181 | (mapv vec (feed-forward-batch x theta fns))))) 182 | 183 | (defn neural-network-cost 184 | ([model data] 185 | (neural-network-cost model (map (comp vec butlast) data) (map (comp vec last) data))) 186 | ([model x y] 187 | (let [{theta :parameters fns :activation-fns cost :cost} model] 188 | (when (not (nil? theta)) 189 | (cost x y theta fns))))) 190 | 191 | (defn print-neural-network 192 | "Prints information about a given neural network." 193 | [model] 194 | (println 195 | (cond-> model 196 | (contains? model :parameters) 197 | (assoc :parameters (clojure.string/join " -> " 198 | (for [thetai (:parameters model)] 199 | (str (dec (count (first thetai))) " x " (count thetai)))))))) 200 | 201 | (defn make-neural-network 202 | "Returns a neural network model where alpha is the learning rate." 203 | ([alpha lambda] 204 | (make-neural-network alpha lambda cross-entropy-cost)) 205 | ([alpha lambda cost] 206 | (make-neural-network alpha lambda cost nil)) 207 | ([alpha lambda cost seed] 208 | {:alpha alpha 209 | :lambda lambda 210 | :layers [] 211 | :activation-fns [] 212 | :cost cost 213 | :seed seed 214 | :output-error (cond 215 | (= cost cross-entropy-cost) cross-entropy-output-error 216 | (= cost quadratic-cost) quadratic-output-error)})) 217 | 218 | (defn add-neural-network-layer 219 | "Adds a layer to a neural network model with n nodes and an activation 220 | function f." 221 | [model n f] 222 | (-> model 223 | (update :layers #(conj % n)) 224 | (update :activation-fns #(conj % f)))) 225 | -------------------------------------------------------------------------------- /src/lambda_ml/random_forest.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.random-forest 2 | "Random forest classification and regression learning. 3 | 4 | Example usage: 5 | ``` 6 | (def data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]]) 7 | (def fit 8 | (let [n 1001 9 | min-split 2 10 | min-leaf 1 11 | max-features 2] 12 | (-> (make-random-forest-classifier n min-split min-leaf max-features) 13 | (random-forest-fit data)))) 14 | (random-forest-predict fit (map butlast data)) 15 | ;;=> (0 1 1 0) 16 | ```" 17 | (:require [lambda-ml.decision-tree :as dt] 18 | [lambda-ml.ensemble :as e])) 19 | 20 | (def random-forest-fit e/bagging-ensemble-fit) 21 | 22 | (def random-forest-predict e/bagging-ensemble-predict) 23 | 24 | (defn make-random-forest-classifier 25 | [n min-split min-leaf max-features] 26 | (let [rate 1.0 27 | estimator (dt/make-classification-tree dt/gini-impurity min-split min-leaf max-features)] 28 | (-> #(e/add-bagging-estimator % estimator dt/decision-tree-fit dt/decision-tree-predict) 29 | (iterate (e/make-bagging-classifier rate)) 30 | (nth n)))) 31 | 32 | (defn make-random-forest-regressor 33 | [n min-split min-leaf max-features] 34 | (let [rate 1.0 35 | estimator (dt/make-regression-tree dt/mean-squared-error min-split min-leaf max-features)] 36 | (-> #(e/add-bagging-estimator % estimator dt/decision-tree-fit dt/decision-tree-predict) 37 | (iterate (e/make-bagging-regressor rate)) 38 | (nth n)))) 39 | -------------------------------------------------------------------------------- /src/lambda_ml/regression.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.regression 2 | "Generalized linear model learning for two of the more popular techniques, 3 | linear regression and logistic regression. 4 | 5 | Linear regression example usage: 6 | ``` 7 | (def data [[-2 -1] [1 1] [3 2]]) 8 | (def fit 9 | (let [alpha 0.01 10 | lambda 0 11 | iters 5000] 12 | (-> (make-linear-regression alpha lambda iters) 13 | (regression-fit data)))) 14 | (regression-predict fit (map butlast data)) 15 | ;;=> (-0.9473684210526243 0.8684210526315812 2.0789473684210513) 16 | ``` 17 | 18 | Logistic regression example usage: 19 | ``` 20 | (def data [[4.0 1] [1.75 0] [4.25 1] [2.75 1] [5.0 1] [0.5 0] [1.0 0] [1.5 0] 21 | [5.5 1] [2.5 0] [2.0 0] [3.5 0] [1.75 1] [3.0 0] [4.75 1] [1.25 0] 22 | [4.5 1] [0.75 0] [3.25 1] [2.25 1]]) 23 | (def fit 24 | (let [alpha 0.1 25 | lambda 0 26 | iters 10000] 27 | (-> (make-logistic-regression alpha lambda iters) 28 | (regression-fit data)))) 29 | (regression-predict fit (take 3 (map butlast data))) 30 | ;;=> (0.8744474608195764 0.19083657134699333 0.9102776017566352) 31 | ```" 32 | (:require [lambda-ml.core :as c] 33 | [clojure.math.numeric-tower :refer :all])) 34 | 35 | (defn gradient-descent-step 36 | "Performs a single gradient step on the model coefficients." 37 | [h x y alpha lambda theta] 38 | (let [m (count y) 39 | n+1 (count (first x)) 40 | ;; Compute gradients 41 | gradients (for [j (range n+1)] 42 | (* (/ 1 m) 43 | (apply + (map (fn [xi yi] 44 | (* (- (h xi theta) yi) 45 | (xi j))) 46 | x y))))] 47 | ;; Simultaneously update all thetas 48 | (map-indexed (fn [i [t g]] 49 | (if (= i 0) 50 | ;; Non-regularized intercept parameter 51 | (- t (* alpha g)) 52 | ;; Regularized parameters 53 | (- (* t (- 1 (/ (* alpha lambda) m))) 54 | (* alpha g)))) 55 | (map vector theta gradients)))) 56 | 57 | (defn gradient-descent 58 | "Returns a lazy sequence of estimates of the model coefficients, along with 59 | the cost, at each iteration of gradient descent. Takes a hypothesis function 60 | h, which returns a predicted value given an example and parameters, and a cost 61 | function j, which computes the cost of applying the current model on all 62 | training examples." 63 | ([h j x y alpha lambda] 64 | (let [n+1 (count (first x))] 65 | (gradient-descent h j x y alpha lambda (repeatedly n+1 rand)))) 66 | ([h j x y alpha lambda theta] 67 | (lazy-seq (let [theta (gradient-descent-step h x y alpha lambda theta) 68 | cost (j x y theta)] 69 | (cons [theta cost] (gradient-descent h j x y alpha lambda theta)))))) 70 | 71 | (defn regression-fit 72 | "Fits a regression model to the given training data." 73 | ([model data] 74 | (regression-fit model (map butlast data) (map last data))) 75 | ([model x y] 76 | (let [{alpha :alpha lambda :lambda iters :iterations h :hypothesis j :cost} model 77 | x+intercepts (map c/vector-with-intercept x) 78 | estimates (gradient-descent h j x+intercepts y alpha lambda) 79 | [theta cost] (nth estimates iters)] 80 | (-> model 81 | (assoc :parameters theta) 82 | (assoc :costs (map second (take iters estimates))))))) 83 | 84 | (defn regression-predict 85 | "Predicts the values of example data using a regression model." 86 | [model x] 87 | (let [{theta :parameters h :hypothesis} model] 88 | (when (not (nil? theta)) 89 | (->> x 90 | (map c/vector-with-intercept) 91 | (map (partial h theta)))))) 92 | 93 | ;; Linear regression 94 | 95 | (defn linear-regression-hypothesis 96 | [xi theta] 97 | (c/dot-product xi theta)) 98 | 99 | (defn linear-regression-cost 100 | [x y theta] 101 | (let [m (count y)] 102 | (/ (apply + (map (fn [xi yi] 103 | (expt (- (linear-regression-hypothesis xi theta) yi) 2)) 104 | x y)) 105 | (* 2 m)))) 106 | 107 | (defn make-linear-regression 108 | "Returns a linear regression model with the given parameters." 109 | [alpha lambda iters] 110 | {:alpha alpha 111 | :lambda lambda 112 | :iterations iters 113 | :hypothesis linear-regression-hypothesis 114 | :cost linear-regression-cost}) 115 | 116 | ;; Logistic regression 117 | 118 | (defn logistic-regression-hypothesis 119 | [xi theta] 120 | (c/sigmoid (c/dot-product xi theta))) 121 | 122 | (defn logistic-regression-cost 123 | [x y theta] 124 | (let [m (count y)] 125 | (/ (apply + (map (fn [xi yi] 126 | (let [hi (logistic-regression-hypothesis xi theta)] 127 | (+ (* yi 128 | (Math/log hi)) 129 | (* (- 1 yi) 130 | (Math/log (- 1 hi)))))) 131 | x y)) 132 | (- m)))) 133 | 134 | (defn make-logistic-regression 135 | "Returns a logistic regression model with the given parameters." 136 | [alpha lambda iters] 137 | {:alpha alpha 138 | :lambda lambda 139 | :iterations iters 140 | :hypothesis logistic-regression-hypothesis 141 | :cost logistic-regression-cost}) 142 | -------------------------------------------------------------------------------- /src/lambda_ml/util.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.util 2 | (:import [java.awt Color] 3 | [java.awt.image BufferedImage])) 4 | 5 | (defn pixels->image 6 | [pixels] 7 | (let [w (count (first pixels)) 8 | h (count pixels)] 9 | (reduce (fn [image [y row]] 10 | (reduce (fn [image [x pixel]] 11 | (let [[r g b] (repeat 3 (int (* pixel 255)))] 12 | (doto image 13 | (.setRGB x y (.getRGB (Color. r g b)))))) 14 | image 15 | (map-indexed vector row))) 16 | (BufferedImage. w h BufferedImage/TYPE_INT_ARGB) 17 | (map-indexed vector pixels)))) 18 | -------------------------------------------------------------------------------- /test/lambda_ml/clustering/dbscan_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.clustering.dbscan-test 2 | (:require [clojure.test :refer :all] 3 | [clojure.set :refer :all] 4 | [lambda-ml.clustering.dbscan :refer :all] 5 | [lambda-ml.distance :as d])) 6 | 7 | (deftest test-proximity-search 8 | (let [points {:SanFrancisco [37.759859 -122.437134] 9 | :Berkeley [37.864012 -122.277832] 10 | :PaloAlto [37.444335 -122.156982] 11 | :MountainView [37.387617 -122.060852] 12 | :SanJose [37.330857 -121.887817] 13 | :SantaCruz [36.971838 -122.019653]} 14 | locations (map-invert points) 15 | search (make-proximity-search d/haversine (vals points))] 16 | (is (= (set (map locations (search 1 (points :MountainView)))) 17 | #{:MountainView})) 18 | (is (= (set (map locations (search 2 (points :MountainView)))) 19 | #{:MountainView})) 20 | (is (= (set (map locations (search 4 (points :MountainView)))) 21 | #{:MountainView})) 22 | (is (= (set (map locations (search 8 (points :MountainView)))) 23 | #{:MountainView :PaloAlto})) 24 | (is (= (set (map locations (search 16 (points :MountainView)))) 25 | #{:MountainView :PaloAlto :SanJose})) 26 | (is (= (set (map locations (search 30 (points :MountainView)))) 27 | #{:MountainView :PaloAlto :SanJose :SantaCruz})) 28 | (is (= (set (map locations (search 35 (points :MountainView)))) 29 | #{:MountainView :PaloAlto :SanJose :SantaCruz :SanFrancisco})) 30 | (is (= (set (map locations (search 50 (points :MountainView)))) 31 | #{:MountainView :PaloAlto :SanJose :SantaCruz :SanFrancisco :Berkeley})))) 32 | 33 | (deftest test-dbscan 34 | (let [points [[2 10] 35 | [2 5] 36 | [8 4] 37 | [5 8] 38 | [7 5] 39 | [6 4] 40 | [1 2] 41 | [4 9]]] 42 | (let [clustering (dbscan d/euclidean 4 2 points)] 43 | (is (= 2 (count (distinct (vals clustering))))) 44 | (is (= (clustering [5 8]) 45 | (clustering [4 9]))) 46 | (is (= (clustering [8 4]) 47 | (clustering [7 5]) 48 | (clustering [6 4])))) 49 | (let [clustering (dbscan d/euclidean 10 2 points)] 50 | (is (= 3 (count (distinct (vals clustering))))) 51 | (is (= (clustering [2 10]) 52 | (clustering [5 8]) 53 | (clustering [4 9]))) 54 | (is (= (clustering [8 4]) 55 | (clustering [7 5]) 56 | (clustering [6 4]))) 57 | (is (= (clustering [2 5]) 58 | (clustering [1 2])))))) 59 | 60 | (deftest test-dbscan2 61 | (let [points [[0 100] 62 | [0 200] 63 | [0 275] 64 | [100 150] 65 | [200 100] 66 | [250 200] 67 | [0 300] 68 | [100 200] 69 | [600 700] 70 | [650 700] 71 | [675 700] 72 | [675 710] 73 | [675 720] 74 | [50 400]] 75 | clustering (dbscan d/euclidean 10000 3 points)] 76 | (is (= 2 (count (distinct (vals clustering))))) 77 | (is (= (clustering [0 100]) 78 | (clustering [0 200]) 79 | (clustering [0 275]) 80 | (clustering [100 150]) 81 | (clustering [0 300]) 82 | (clustering [100 200]))) 83 | (is (= (clustering [600 700]) 84 | (clustering [650 700]) 85 | (clustering [675 700]) 86 | (clustering [675 710]) 87 | (clustering [675 720]))))) 88 | 89 | (deftest test-dbscan3 90 | (let [points [[64.22906466107816 21.979356040013954] 91 | [9.502019068226218 73.5146190142259] 92 | [73.467643359676 49.11882050731219] 93 | [43.89991499437019 30.877086140215397] 94 | [31.66112900408916 62.006799353519455] 95 | [32.96188162290491 62.647924402495846] 96 | [29.860327935311943 61.4603339463938] 97 | [30.252436050213873 61.628358017420396] 98 | [29.478669344214723 63.34734829352237] 99 | [31.20809231721796 60.62778950878419] 100 | [56.719979556510225 12.79888943536207] 101 | [58.65873904968612 12.760463243583217] 102 | [56.961131111718714 13.99614970618859] 103 | [56.45650068600662 14.442072300706059] 104 | [56.4971734713763 15.955536322668195] 105 | [56.84485225030221 14.559824497206137] 106 | [55.51035064924508 11.432733331574127] 107 | [56.13687033439055 10.726739906473085] 108 | [55.76618270971812 11.259738458553329] 109 | [18.786322022089735 39.41377491992898] 110 | [17.121476165484637 39.838845615973696] 111 | [19.683527131564915 39.34956996375695]] 112 | clustering (dbscan d/euclidean 4 2 points)] 113 | (is (= (clustering (nth points 4)) 114 | (clustering (nth points 5)) 115 | (clustering (nth points 6)) 116 | (clustering (nth points 7)) 117 | (clustering (nth points 8)) 118 | (clustering (nth points 9)))) 119 | (is (= (clustering (nth points 10)) 120 | (clustering (nth points 11)) 121 | (clustering (nth points 12)) 122 | (clustering (nth points 13)) 123 | (clustering (nth points 14)) 124 | (clustering (nth points 15)) 125 | (clustering (nth points 16)) 126 | (clustering (nth points 17)) 127 | (clustering (nth points 18)))) 128 | (is (= (clustering (nth points 19)) 129 | (clustering (nth points 20)) 130 | (clustering (nth points 21)))))) 131 | -------------------------------------------------------------------------------- /test/lambda_ml/clustering/hierarchical_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.clustering.hierarchical-test 2 | (:require [clojure.test :refer :all] 3 | [lambda-ml.clustering.hierarchical :refer :all] 4 | [lambda-ml.distance :as d])) 5 | 6 | (deftest test-pairwise-distances 7 | (let [points [[1 1 1 0 1 0 0 1 1 1] 8 | [1 1 0 1 1 0 0 0 0 1] 9 | [0 1 1 0 1 0 0 1 0 0] 10 | [0 0 0 1 0 1 0 0 0 0] 11 | [1 1 1 0 1 0 1 1 1 0] 12 | [0 1 0 1 1 0 0 0 0 1] 13 | [0 1 1 0 1 1 0 1 1 0]] 14 | distances (pairwise-distances d/jaccard points)] 15 | (is (= 0 (:distance (get-in distances [0 0])))) 16 | (is (= (/ 1 2) (:distance (get-in distances [0 1])))) 17 | (is (= (/ 3 7) (:distance (get-in distances [0 2])))) 18 | (is (= 1 (:distance (get-in distances [0 3])))) 19 | (is (= (/ 1 4) (:distance (get-in distances [0 4])))) 20 | (is (= (/ 5 8) (:distance (get-in distances [0 5])))) 21 | (is (= (/ 3 8) (:distance (get-in distances [0 6])))) 22 | (is (= (/ 1 2) (:distance (get-in distances [1 0])))) 23 | (is (= 0 (:distance (get-in distances [1 1])))) 24 | (is (= (/ 5 7) (:distance (get-in distances [1 2])))) 25 | (is (= (/ 5 6) (:distance (get-in distances [1 3])))) 26 | (is (= (/ 2 3) (:distance (get-in distances [1 4])))) 27 | (is (= (/ 1 5) (:distance (get-in distances [1 5])))) 28 | (is (= (/ 7 9) (:distance (get-in distances [1 6])))) 29 | (is (= (/ 3 7) (:distance (get-in distances [2 0])))) 30 | (is (= (/ 5 7) (:distance (get-in distances [2 1])))) 31 | (is (= 0 (:distance (get-in distances [2 2])))) 32 | (is (= 1 (:distance (get-in distances [2 3])))) 33 | (is (= (/ 3 7) (:distance (get-in distances [2 4])))) 34 | (is (= (/ 2 3) (:distance (get-in distances [2 5])))) 35 | (is (= (/ 1 3) (:distance (get-in distances [2 6])))) 36 | (is (= 1 (:distance (get-in distances [3 0])))) 37 | (is (= (/ 5 6) (:distance (get-in distances [3 1])))) 38 | (is (= 1 (:distance (get-in distances [3 2])))) 39 | (is (= 0 (:distance (get-in distances [3 3])))) 40 | (is (= 1 (:distance (get-in distances [3 4])))) 41 | (is (= (/ 4 5) (:distance (get-in distances [3 5])))) 42 | (is (= (/ 6 7) (:distance (get-in distances [3 6])))) 43 | (is (= (/ 1 4) (:distance (get-in distances [4 0])))) 44 | (is (= (/ 2 3) (:distance (get-in distances [4 1])))) 45 | (is (= (/ 3 7) (:distance (get-in distances [4 2])))) 46 | (is (= 1 (:distance (get-in distances [4 3])))) 47 | (is (= 0 (:distance (get-in distances [4 4])))) 48 | (is (= (/ 7 9) (:distance (get-in distances [4 5])))) 49 | (is (= (/ 3 8) (:distance (get-in distances [4 6])))) 50 | (is (= (/ 5 8) (:distance (get-in distances [5 0])))) 51 | (is (= (/ 1 5) (:distance (get-in distances [5 1])))) 52 | (is (= (/ 2 3) (:distance (get-in distances [5 2])))) 53 | (is (= (/ 4 5) (:distance (get-in distances [5 3])))) 54 | (is (= (/ 7 9) (:distance (get-in distances [5 4])))) 55 | (is (= 0 (:distance (get-in distances [5 5])))) 56 | (is (= (/ 3 4) (:distance (get-in distances [5 6])))) 57 | (is (= (/ 3 8) (:distance (get-in distances [6 0])))) 58 | (is (= (/ 7 9) (:distance (get-in distances [6 1])))) 59 | (is (= (/ 1 3) (:distance (get-in distances [6 2])))) 60 | (is (= (/ 6 7) (:distance (get-in distances [6 3])))) 61 | (is (= (/ 3 8) (:distance (get-in distances [6 4])))) 62 | (is (= (/ 3 4) (:distance (get-in distances [6 5])))) 63 | (is (= 0 (:distance (get-in distances [6 6])))))) 64 | 65 | (deftest test-distance-queues 66 | (let [distances {0 {0 {:distance 0 :index 0} 67 | 1 {:distance 7 :index 1} 68 | 2 {:distance 3 :index 2}} 69 | 1 {0 {:distance 7 :index 0} 70 | 1 {:distance 0 :index 1} 71 | 2 {:distance 5 :index 2}} 72 | 2 {0 {:distance 3 :index 0} 73 | 1 {:distance 5 :index 1} 74 | 2 {:distance 0 :index 2}}} 75 | queues (distance-queues distances)] 76 | (is (= 2 (first (first (get queues 0))))) 77 | (is (= 1 (first (second (get queues 0))))) 78 | (is (= 2 (first (first (get queues 1))))) 79 | (is (= 0 (first (second (get queues 1))))) 80 | (is (= 0 (first (first (get queues 2))))) 81 | (is (= 1 (first (second (get queues 2))))))) 82 | 83 | (deftest test-agglomerative-clustering 84 | (let [distances {"BA" {"BA" 0 "FI" 662 "MI" 877 "NA" 255 "RM" 412 "TO" 996} 85 | "FI" {"BA" 662 "FI" 0 "MI" 295 "NA" 468 "RM" 268 "TO" 400} 86 | "MI" {"BA" 877 "FI" 295 "MI" 0 "NA" 754 "RM" 564 "TO" 138} 87 | "NA" {"BA" 255 "FI" 468 "MI" 754 "NA" 0 "RM" 219 "TO" 869} 88 | "RM" {"BA" 412 "FI" 268 "MI" 564 "NA" 219 "RM" 0 "TO" 669} 89 | "TO" {"BA" 996 "FI" 400 "MI" 138 "NA" 869 "RM" 669 "TO" 0}} 90 | f (fn [a b] (get-in distances [a b])) 91 | merges (agglomerative-clustering single-link f (keys distances))] 92 | (is (= [2 5] (nth merges 0))) 93 | (is (= [3 4] (nth merges 1))) 94 | (is (= [0 3] (nth merges 2))) 95 | (is (= [0 1] (nth merges 3))) 96 | (is (= [0 2] (nth merges 4))))) 97 | 98 | (deftest test-agglomerative-clustering2 99 | (let [points [[1 1 1 0 1 0 0 1 1 1] 100 | [1 1 0 1 1 0 0 0 0 1] 101 | [0 1 1 0 1 0 0 1 0 0] 102 | [0 0 0 1 0 1 0 0 0 0] 103 | [1 1 1 0 1 0 1 1 1 0] 104 | [0 1 0 1 1 0 0 0 0 1] 105 | [0 1 1 0 1 1 0 1 1 0]] 106 | merges (agglomerative-clustering single-link d/jaccard points)] 107 | (is (= [1 5] (nth merges 0))) 108 | (is (= [0 4] (nth merges 1))) 109 | (is (= [2 6] (nth merges 2))) 110 | (is (= [0 2] (nth merges 3))) 111 | (is (= [0 1] (nth merges 4))) 112 | (is (= [0 3] (nth merges 5))))) 113 | 114 | (deftest test-agglomerative-clustering3 115 | (let [distances {"A" {"A" 0.00 "B" 0.71 "C" 5.66 "D" 3.61 "E" 4.24 "F" 3.20} 116 | "B" {"A" 0.71 "B" 0.00 "C" 4.95 "D" 2.92 "E" 3.54 "F" 2.50} 117 | "C" {"A" 5.66 "B" 4.95 "C" 0.00 "D" 2.24 "E" 1.41 "F" 2.50} 118 | "D" {"A" 3.61 "B" 2.92 "C" 2.24 "D" 0.00 "E" 1.00 "F" 0.50} 119 | "E" {"A" 4.24 "B" 3.54 "C" 1.41 "D" 1.00 "E" 0.00 "F" 1.12} 120 | "F" {"A" 3.20 "B" 2.50 "C" 2.50 "D" 0.50 "E" 1.12 "F" 0.00}} 121 | f (fn [a b] (get-in distances [a b])) 122 | merges (agglomerative-clustering single-link f (keys distances))] 123 | (is (= [3 5] (nth merges 0))) 124 | (is (= [0 1] (nth merges 1))) 125 | (is (= [3 4] (nth merges 2))) 126 | (is (= [2 3] (nth merges 3))) 127 | (is (= [0 2] (nth merges 4))))) 128 | -------------------------------------------------------------------------------- /test/lambda_ml/clustering/k_means_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.clustering.k-means-test 2 | (:require [clojure.test :refer :all] 3 | [clojure.set :refer :all] 4 | [lambda-ml.clustering.k-means :refer :all] 5 | [lambda-ml.distance :as d])) 6 | 7 | (deftest test-k-means 8 | (let [points [[1 1] [1.5 2] [3 4] [5 7] [3.5 5] [4.5 5] [3.5 4.5]]] 9 | (let [clustering (nth (k-means 2 d/euclidean points) 100) 10 | index (map-invert clustering)] 11 | (is (= 2 (count clustering))) 12 | (is (= (index [3.5 4.5]) 13 | (index [4.5 5]) 14 | (index [3.5 5]) 15 | (index [5 7]) 16 | (index [3 4])) 17 | (= (index [1.5 2]) 18 | (index [1 1])))))) 19 | -------------------------------------------------------------------------------- /test/lambda_ml/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.core-test 2 | (:require [clojure.test :refer :all] 3 | [lambda-ml.core :refer :all])) 4 | 5 | (deftest test-median 6 | (is (= (median [5 2 4 1 3]) 3)) 7 | (is (= (median [7 0 2 3]) (/ 5 2)))) 8 | 9 | (deftest test-sample-with-replacement 10 | (doseq [k (range 5 11)] 11 | (let [s (sample-with-replacement (range 10) k)] 12 | (is (= k (count s))) 13 | (is (every? #(< % 10) s))))) 14 | 15 | (deftest test-sample-without-replacement 16 | (is (= 10 (count (sample-without-replacement (range 10) 10)))) 17 | (is (= 10 (count (sample-without-replacement (range 10) 100)))) 18 | (is (= 10 (count (sample-without-replacement (range 10) 1000)))) 19 | (doseq [k (range 5 11)] 20 | (let [s (sample-without-replacement (range 10) k)] 21 | (is (= k (count s))) 22 | (is (= k (count (distinct s)))) 23 | (is (every? #(< % 10) s))))) 24 | -------------------------------------------------------------------------------- /test/lambda_ml/data/binary_tree_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.data.binary-tree-test 2 | (:require [clojure.test :refer :all] 3 | [lambda-ml.data.binary-tree :refer :all])) 4 | 5 | (deftest test-binary-tree-leaf 6 | (let [tree (make-tree 42)] 7 | (is (= (get-value tree) 42)) 8 | (is (nil? (get-left tree))) 9 | (is (nil? (get-right tree))) 10 | (is (leaf? tree)))) 11 | 12 | (deftest test-binary-tree 13 | (let [tree (make-tree 2 14 | (make-tree 7 15 | (make-tree 2) 16 | (make-tree 6 (make-tree 5) (make-tree 11))) 17 | (make-tree 5 18 | nil 19 | (make-tree 9 (make-tree 4) nil)))] 20 | (is (= (get-value tree) 2)) 21 | (is (= (get-path tree [:left]) (get-left tree))) 22 | (is (= (get-path tree [:right]) (get-right tree))) 23 | (is (= (get-value (get-path tree [:left :right :left])) 5)) 24 | (is (= (get-value (get-path tree [:right :right :left])) 4)))) 25 | 26 | (deftest test-adjacency-matrix 27 | (let [tree (make-tree :a 28 | (make-tree :b 29 | (make-tree :c) 30 | (make-tree :d (make-tree :e) (make-tree :f))) 31 | (make-tree :g 32 | nil 33 | (make-tree :h (make-tree :i) nil))) 34 | matrix (adjacency-matrix tree)] 35 | (is (= (count matrix) 9)) 36 | (is (empty? (:edges (first (filter #(= :c (:value %)) (vals matrix)))))) 37 | (is (empty? (:edges (first (filter #(= :e (:value %)) (vals matrix)))))) 38 | (is (empty? (:edges (first (filter #(= :f (:value %)) (vals matrix)))))) 39 | (is (empty? (:edges (first (filter #(= :i (:value %)) (vals matrix)))))) 40 | (is (= (count (:edges (first (filter #(= :a (:value %)) (vals matrix))))) 2)) 41 | (is (= (count (:edges (first (filter #(= :b (:value %)) (vals matrix))))) 2)) 42 | (is (= (count (:edges (first (filter #(= :d (:value %)) (vals matrix))))) 2)) 43 | (is (= (count (:edges (first (filter #(= :g (:value %)) (vals matrix))))) 1)) 44 | (is (= (count (:edges (first (filter #(= :h (:value %)) (vals matrix))))) 1)))) 45 | -------------------------------------------------------------------------------- /test/lambda_ml/data/kd_tree_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.data.kd-tree-test 2 | (:require [clojure.test :refer :all] 3 | [lambda-ml.data.binary-tree :as bt] 4 | [lambda-ml.data.kd-tree :refer :all])) 5 | 6 | (deftest test-kd-tree 7 | (let [tree (make-tree 2 [[2 3] [5 4] [9 6] [4 7] [8 1] [7 2]])] 8 | (is (= (bt/get-value tree) [7 2])) 9 | (is (= (bt/get-path tree [:left]) (bt/get-left tree))) 10 | (is (= (bt/get-path tree [:right]) (bt/get-right tree))) 11 | (is (= (bt/get-path tree [:left :left]) (-> tree bt/get-left bt/get-left))) 12 | (is (= (bt/get-path tree [:left :right]) (-> tree bt/get-left bt/get-right))) 13 | (is (= (bt/get-path tree [:right :left]) (-> tree bt/get-right bt/get-left))) 14 | (is (= (bt/get-value (bt/get-path tree [:left])) [5 4])) 15 | (is (= (bt/get-value (bt/get-path tree [:right])) [9 6])) 16 | (is (= (bt/get-value (bt/get-path tree [:left :left])) [2 3])) 17 | (is (= (bt/get-value (bt/get-path tree [:left :right])) [4 7])) 18 | (is (= (bt/get-value (bt/get-path tree [:right :left])) [8 1])) 19 | (is (nil? (bt/get-path tree [:left :left :left]))) 20 | (is (nil? (bt/get-path tree [:left :left :right]))) 21 | (is (nil? (bt/get-path tree [:left :right :left]))) 22 | (is (nil? (bt/get-path tree [:left :right :right]))) 23 | (is (nil? (bt/get-path tree [:right :left :left]))) 24 | (is (nil? (bt/get-path tree [:right :left :right]))) 25 | (is (nil? (bt/get-path tree [:right :right]))))) 26 | -------------------------------------------------------------------------------- /test/lambda_ml/decision_tree_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.decision-tree-test 2 | (:require [clojure.test :refer :all] 3 | [lambda-ml.core :refer :all] 4 | [lambda-ml.decision-tree :refer :all])) 5 | 6 | (deftest test-gini-impurity 7 | (is (< (Math/abs (- (gini-impurity [:b :b :b :b :b :b]) 0)) 1E-6)) 8 | (is (< (Math/abs (- (gini-impurity [:a :b :b :b :b :b]) 0.277778)) 1E-6)) 9 | (is (< (Math/abs (- (gini-impurity [:a :a :a :b :b :b]) 0.5)) 1E-6))) 10 | 11 | (deftest test-mean-squared-error 12 | (is (= 0.375 (mean-squared-error [3 -0.5 2 7] [2.5 0.0 2 8])))) 13 | 14 | (deftest test-classification-weighted-cost 15 | (is (< (Math/abs (- (classification-weighted-cost [:a :a :a :b :b :b] [:a :b :b :b :b :b] 16 | gini-impurity mode) 17 | 0.388889)) 18 | 1E-6))) 19 | 20 | (deftest test-regression-weighted-cost 21 | (is (< (Math/abs (- (regression-weighted-cost [-1 0 3 5] [-2 0 4] mean-squared-error mean) 22 | 5.916667)) 23 | 1E-6))) 24 | 25 | (deftest test-categorical-partitions 26 | (let [p0 (categorical-partitions [:foo]) 27 | p1 (categorical-partitions [:high :normal]) 28 | p2 (categorical-partitions [:sunny :overcast :rain]) 29 | p3 (categorical-partitions [:A :B :C :D]) 30 | partitions-equal? (fn [p1 p2] 31 | (or (= p1 p2) 32 | (= p1 (reverse p2))))] 33 | (is (empty? p0)) 34 | (is (= (count p1) 1)) 35 | (is (= (count p2) 3)) 36 | (is (= (count p3) 7)) 37 | (is (some #(partitions-equal? % [#{:A} #{:B :C :D}]) p3)) 38 | (is (some #(partitions-equal? % [#{:A :B} #{:C :D}]) p3)) 39 | (is (some #(partitions-equal? % [#{:A :C} #{:B :D}]) p3)) 40 | (is (some #(partitions-equal? % [#{:B :C} #{:A :D}]) p3)) 41 | (is (some #(partitions-equal? % [#{:B} #{:A :C :D}]) p3)) 42 | (is (some #(partitions-equal? % [#{:C} #{:A :B :D}]) p3)) 43 | (is (some #(partitions-equal? % [#{:D} #{:A :B :C}]) p3)))) 44 | 45 | (deftest test-numeric-partitions 46 | (let [eq? (fn [a b] 47 | (->> (map (fn [x y] (Math/abs (- x y))) a b) 48 | (every? #(< % 1E-6))))] 49 | (is (empty? (numeric-partitions [42]))) 50 | (is (empty? (numeric-partitions (range 1)))) 51 | (is (eq? (numeric-partitions (range 4)) [0.5 1.5 2.5])) 52 | (is (eq? (numeric-partitions (range 5)) [0.5 1.5 2.5 3.5])) 53 | (is (eq? (numeric-partitions [1 0]) [0.5])) 54 | (is (eq? (numeric-partitions [2 1 0 3]) [0.5 1.5 2.5])) 55 | (is (eq? (numeric-partitions [3 4 1 2 0]) [0.5 1.5 2.5 3.5])))) 56 | 57 | (deftest test-categorical-splitters 58 | (is (empty? (splitters [[:foo]] 0))) 59 | (is (empty? (splitters [[:foo] [:foo] [:foo]] 0))) 60 | (is (= (count (splitters [[:foo] [:bar]] 0)) 1)) 61 | (is (= (count (splitters [[:foo] [:bar] [:baz]] 0)) 3)) 62 | (is (= (count (splitters [[:foo] [:bar] [:baz] [:zap]] 0)) 7))) 63 | 64 | (deftest test-numeric-splitters 65 | (let [data [[64 177] 66 | [65 255] 67 | [85 125] 68 | [80 60] 69 | [72 56] 70 | [75 120] 71 | [75 100] 72 | [68 220] 73 | [71 90] 74 | [83 95] 75 | [69 52] 76 | [70 70] 77 | [72 85] 78 | [81 75]]] 79 | (is (empty? (splitters [[42] [42] [42]] 0))) 80 | (is (= (count (splitters data 0)) 11)) 81 | (is (= (count (splitters data 1)) 13)))) 82 | 83 | (deftest test-best-splitter 84 | (let [data1 [["foo" "bar" "baz"] 85 | ["foo" "bar" "baz"]] 86 | data2 [[1.0 2.0 3.14] 87 | [1.0 2.0 2.71]]] 88 | (is (nil? (best-splitter (make-classification-tree gini-impurity 2 1 (dec (count (first data1)))) 89 | (map butlast data1) (map last data1)))) 90 | (is (nil? (best-splitter (make-regression-tree mean-squared-error 2 1 (dec (count (first data2)))) 91 | (map butlast data2) (map last data2)))))) 92 | 93 | (deftest test-best-splitter-categorical 94 | (let [data [["Sunny" "Hot" "High" "Weak" "No"] 95 | ["Sunny" "Hot" "High" "Strong" "No"] 96 | ["Overcast" "Hot" "High" "Weak" "Yes"] 97 | ["Rain" "Mild" "High" "Weak" "Yes"] 98 | ["Rain" "Cool" "Normal" "Weak" "Yes"] 99 | ["Rain" "Cool" "Normal" "Strong" "No"] 100 | ["Overcast" "Cool" "Normal" "Strong" "Yes"] 101 | ["Sunny" "Mild" "High" "Weak" "No"] 102 | ["Sunny" "Cool" "Normal" "Weak" "Yes"] 103 | ["Rain" "Mild" "Normal" "Weak" "Yes"] 104 | ["Sunny" "Mild" "Normal" "Strong" "Yes"] 105 | ["Overcast" "Mild" "High" "Strong" "Yes"] 106 | ["Overcast" "Hot" "Normal" "Weak" "Yes"] 107 | ["Rain" "Mild" "High" "Strong" "No"]] 108 | splitter (best-splitter (make-classification-tree gini-impurity 2 1 (dec (count (first data)))) 109 | (map butlast data) (map last data)) 110 | [left right] (vals (group-by splitter data))] 111 | (is (or (and (= (count left) 10) (= (count right) 4)) 112 | (and (= (count left) 4) (= (count right) 10)))))) 113 | 114 | (deftest test-classification-tree 115 | (let [data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]] 116 | model (make-classification-tree gini-impurity 2 1 (dec (count (first data)))) 117 | fit (decision-tree-fit model data)] 118 | (is (= (first (decision-tree-predict fit [[0 0]])) 0)) 119 | (is (= (first (decision-tree-predict fit [[0 1]])) 1)) 120 | (is (= (first (decision-tree-predict fit [[1 0]])) 1)) 121 | (is (= (first (decision-tree-predict fit [[1 1]])) 0)))) 122 | -------------------------------------------------------------------------------- /test/lambda_ml/distance_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.distance-test 2 | (:require [clojure.test :refer :all] 3 | [lambda-ml.distance :refer :all])) 4 | 5 | (deftest test-cosine 6 | (is (< (Math/abs (- (cosine [1 2 0] [0 4 1]) 7 | 0.132278)) 8 | 1E-6) 9 | (is (< (Math/abs (- (cosine [0 3 4 5] [7 6 3 1]) 10 | 0.492167)) 11 | 1E-6)))) 12 | 13 | (deftest test-euclidean 14 | (is (= 25 (euclidean [2 -1] [-2 2]))) 15 | (is (= 95 (euclidean [0 3 4 5] [7 6 3 -1])))) 16 | 17 | (deftest test-haversine 18 | (is (< (Math/abs (- (haversine [36.12 -86.67] [33.94 -118.40]) 19 | 1794.0717860923137)) 20 | 1E-6))) 21 | 22 | (deftest test-haversine2 23 | (is (< (Math/abs (- (haversine [36.12 -86.67] [33.94 -118.40] 0) 24 | 150.66697884839715)) 25 | 1E-6)) 26 | (is (< (Math/abs (- (haversine [36.12 -86.67] [33.94 -118.40] 1) 27 | 2192.964788467725)) 28 | 1E-6))) 29 | 30 | (deftest test-jaccard 31 | (is (= (/ 3 5) (jaccard [1 1 0 1] [2 0 1 1]))) 32 | (is (= (/ 3 7) (jaccard [1 1 1 0 1 0 0 1 1 1] [0 1 1 0 1 0 0 1 0 0]))) 33 | (is (= (/ 5 7) (jaccard [1 1 0 1 1 0 0 0 0 1] [0 1 1 0 1 0 0 1 0 0]))) 34 | (is (= (/ 3 7) (jaccard [0 1 1 0 1 0 0 1 0 0] [1 1 1 0 1 0 1 1 1 0]))) 35 | (is (= (/ 6 7) (jaccard [0 0 0 1 0 1 0 0 0 0] [0 1 1 0 1 1 0 1 1 0])))) 36 | -------------------------------------------------------------------------------- /test/lambda_ml/ensemble_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.ensemble-test 2 | (:require [clojure.test :refer :all] 3 | [lambda-ml.ensemble :refer :all] 4 | [lambda-ml.decision-tree :refer :all])) 5 | 6 | (deftest test-bagging-classifier 7 | (let [data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]] 8 | tree (make-classification-tree gini-impurity 2 1 (dec (count (first data)))) 9 | model (-> (iterate #(add-bagging-estimator % tree decision-tree-fit decision-tree-predict) 10 | (make-bagging-classifier 1.0)) 11 | (nth 1013)) 12 | fit (bagging-ensemble-fit model data)] 13 | (is (= (first (bagging-ensemble-predict fit [[0 0]])) 0)) 14 | (is (= (first (bagging-ensemble-predict fit [[0 1]])) 1)) 15 | (is (= (first (bagging-ensemble-predict fit [[1 0]])) 1)) 16 | (is (= (first (bagging-ensemble-predict fit [[1 1]])) 0)))) 17 | 18 | (deftest test-bagging-regressor 19 | (let [data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]] 20 | tree (make-regression-tree mean-squared-error 2 1 (dec (count (first data)))) 21 | model (-> (iterate #(add-bagging-estimator % tree decision-tree-fit decision-tree-predict) 22 | (make-bagging-regressor 1.0)) 23 | (nth 1003)) 24 | fit (bagging-ensemble-fit model data)] 25 | (is (< (first (bagging-ensemble-predict fit [[0 0]])) 0.5)) 26 | (is (> (first (bagging-ensemble-predict fit [[0 1]])) 0.5)) 27 | (is (> (first (bagging-ensemble-predict fit [[1 0]])) 0.5)) 28 | (is (< (first (bagging-ensemble-predict fit [[1 1]])) 0.5)))) 29 | -------------------------------------------------------------------------------- /test/lambda_ml/factorization_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.factorization-test 2 | (:require [clojure.test :refer :all] 3 | [clojure.core.matrix :refer :all] 4 | [lambda-ml.factorization :refer :all])) 5 | 6 | (deftest test-factorization 7 | (let [data [[1 2 3] [4 5 6]] 8 | [w h] (-> (factorizations data 2) 9 | (nth 200))] 10 | (is (< (cost data (mmul w h)) 1E-6)))) 11 | -------------------------------------------------------------------------------- /test/lambda_ml/metrics_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.metrics-test 2 | (:require [clojure.test :refer :all] 3 | [lambda-ml.metrics :refer :all])) 4 | 5 | (deftest test-auc 6 | (is (= 0.75 (auc [[0 0.5] [0.5 0.5] [0.5 1] [1 1]]))) 7 | (is (= 147.66 (auc [[0 100] [1 50] [2 25] [3 12.5] [4 6.25] [5 3.13] [6 1.56]])))) 8 | -------------------------------------------------------------------------------- /test/lambda_ml/naive_bayes_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.naive-bayes-test 2 | (:require [clojure.test :refer :all] 3 | [lambda-ml.naive-bayes :refer :all])) 4 | 5 | (deftest test-naive-bayes 6 | (let [data [[:male 6.00 180 12] 7 | [:male 5.92 190 11] 8 | [:male 5.58 170 12] 9 | [:male 5.92 165 10] 10 | [:female 5.00 100 6] 11 | [:female 5.50 150 8] 12 | [:female 5.42 130 7] 13 | [:female 5.75 150 9]] 14 | model (make-naive-bayes) 15 | fit (naive-bayes-fit model (map #(subvec % 1) data) (map first data))] 16 | (let [[mean variance] (get-in (:distributions fit) [:male 0])] 17 | (is (<= (- 5.855 mean) 10E-6)) 18 | (is (<= (- 3.5033E-2 variance) 10E-6))) 19 | (let [[mean variance] (get-in (:distributions fit) [:female 1])] 20 | (is (<= (- 132.5 mean) 10E-6)) 21 | (is (<= (- 5.5833E+2 variance) 10E-6))) 22 | (let [[mean variance] (get-in (:distributions fit) [:male 2])] 23 | (is (<= (- 11.25 mean) 10E-6)) 24 | (is (<= (- 9.1667E-1 variance) 10E-6))) 25 | (is (= :female (first (naive-bayes-predict fit [[6.0 130 8]])))))) 26 | -------------------------------------------------------------------------------- /test/lambda_ml/nearest_neighbors_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.nearest-neighbors-test 2 | (:require [clojure.test :refer :all] 3 | [clojure.set :refer :all] 4 | [lambda-ml.nearest-neighbors :refer :all] 5 | [lambda-ml.distance :as d])) 6 | 7 | (deftest test-nearest-neighbor-search 8 | (let [search (make-nearest-neighbor-search d/euclidean [[2 3] [5 4] [9 6] [4 7] [8 1] [7 2]])] 9 | (is (= [7 2] (item-value (second (search 2 [8 1]))))) 10 | (is (= [5 4] (item-value (second (search 2 [2 3]))))) 11 | (is (= [8 1] (item-value (second (search 2 [7 2]))))) 12 | (is (= [5 4] (item-value (second (search 2 [4 7]))))) 13 | (is (= 3 (count (search 3 [2 3])))) 14 | (is (= 6 (count (search 6 [2 3])))) 15 | (is (= 6 (count (search 9 [2 3])))))) 16 | 17 | (deftest test-nearest-neighbor-search2 18 | (let [search (make-nearest-neighbor-search d/euclidean [[1 11] [2 5] [4 8] [6 4] [5 0] [7 9] [8 2]])] 19 | (is (= [4 8] (item-value (first (search 5 [3 9]))))))) 20 | 21 | (deftest test-nearest-neighbor-search3 22 | (let [points {[0.0 0.0] 1 23 | [10.1 -10.1] 2 24 | [-12.2 12.2] 3 25 | [38.3 38.3] 4 26 | [79.99 179.99] 5} 27 | search (make-nearest-neighbor-search d/euclidean (keys points))] 28 | (is (= (list 1 2 3 4) 29 | (map (comp points second) (search 4 ((map-invert points) 1))))) 30 | (is (= (list 2 1 3 4) 31 | (map (comp points second) (search 4 ((map-invert points) 2))))) 32 | (is (= (list 3 1 2 4) 33 | (map (comp points second) (search 4 ((map-invert points) 3))))) 34 | (is (= (list 4 1 2 3) 35 | (map (comp points second) (search 4 ((map-invert points) 4))))) 36 | (is (= (list 5 4 3 1) 37 | (map (comp points second) (search 4 ((map-invert points) 5))))))) 38 | 39 | (deftest test-nearest-neighbor-search4 40 | (let [points {[0.436697697345292 0.492281587956396] 1 41 | [0.318000697283004 0.302602867518914] 2 42 | [0.268674100320323 0.684132163547525] 3 43 | [0.347190228888873 0.959920716313895] 4 44 | [0.539212291014011 0.187100169547265] 5 45 | [0.964631186098456 0.129079314315528] 6 46 | [0.171792010609788 0.795749621321345] 7 47 | [0.910157297130659 0.437962722965556] 8 48 | [0.847975159955406 0.169625495659256] 9 49 | [0.793504465072615 0.121750314432942] 10} 50 | search (make-nearest-neighbor-search d/euclidean (keys points))] 51 | (is (= (list 1 2 3 5) 52 | (map (comp points second) (search 4 ((map-invert points) 1))))) 53 | (is (= (list 2 1 5 3) 54 | (map (comp points second) (search 4 ((map-invert points) 2))))) 55 | (is (= (list 3 7 1 4) 56 | (map (comp points second) (search 4 ((map-invert points) 3))))) 57 | (is (= (list 4 7 3 1) 58 | (map (comp points second) (search 4 ((map-invert points) 4))))) 59 | (is (= (list 5 2 10 9) 60 | (map (comp points second) (search 4 ((map-invert points) 5))))) 61 | (is (= (list 6 9 10 8) 62 | (map (comp points second) (search 4 ((map-invert points) 6))))) 63 | (is (= (list 7 3 4 1) 64 | (map (comp points second) (search 4 ((map-invert points) 7))))) 65 | (is (= (list 8 9 6 10) 66 | (map (comp points second) (search 4 ((map-invert points) 8))))) 67 | (is (= (list 9 10 6 8) 68 | (map (comp points second) (search 4 ((map-invert points) 9))))) 69 | (is (= (list 10 9 6 5) 70 | (map (comp points second) (search 4 ((map-invert points) 10))))))) 71 | 72 | (deftest test-nearest-neighbor-search5 73 | (let [points {[36.971838 -122.019653] :SantaCruz, 74 | [37.864012 -122.277832] :Berkeley, 75 | [37.330857 -121.887817] :SanJose, 76 | [37.444335 -122.156982] :PaloAlto, 77 | [37.387617 -122.060852] :MountainView, 78 | [37.759859 -122.437134] :SanFrancisco} 79 | search (make-nearest-neighbor-search d/euclidean (keys points))] 80 | (is (= :SanJose 81 | (-> (search 2 ((map-invert points) :SantaCruz)) second item-value points))) 82 | (is (= :SanFrancisco 83 | (-> (search 2 ((map-invert points) :Berkeley)) second item-value points))) 84 | (is (= :MountainView 85 | (-> (search 2 ((map-invert points) :PaloAlto)) second item-value points))))) 86 | 87 | (deftest test-nearest-neighbor-search-metadata 88 | (let [points [[:a 2 3] 89 | [:b 5 4] 90 | [:c 9 6] 91 | [:d 4 7] 92 | [:e 8 1] 93 | [:f 7 2]] 94 | search (make-nearest-neighbor-search d/euclidean rest points)] 95 | (is (= :f (first (item-value (second (search 2 [:e 8 1])))))) 96 | (is (= :b (first (item-value (second (search 2 [:a 2 3])))))) 97 | (is (= :e (first (item-value (second (search 2 [:f 7 2])))))) 98 | (is (= :b (first (item-value (second (search 2 [:d 4 7])))))))) 99 | 100 | (deftest test-nearest-neighbors-classifier 101 | (let [data [[25 40000 :no] 102 | [35 60000 :no] 103 | [45 80000 :no] 104 | [20 20000 :no] 105 | [35 120000 :no] 106 | [52 18000 :no] 107 | [23 95000 :yes] 108 | [40 62000 :yes] 109 | [60 100000 :yes] 110 | [48 220000 :yes] 111 | [33 150000 :yes]] 112 | fit1 (-> (make-nearest-neighbors-classifier 1 d/euclidean) 113 | (nearest-neighbors-fit data)) 114 | fit3 (-> (make-nearest-neighbors-classifier 3 d/euclidean) 115 | (nearest-neighbors-fit data))] 116 | (is (= (first (nearest-neighbors-predict fit1 [[48 142000]])) :yes)) 117 | (is (= (first (nearest-neighbors-predict fit3 [[48 142000]])) :yes)))) 118 | 119 | (deftest test-nearest-neighbors-regressor 120 | (let [data [[25 40000 135] 121 | [35 60000 256] 122 | [45 80000 231] 123 | [20 20000 267] 124 | [35 120000 139] 125 | [52 18000 150] 126 | [23 95000 127] 127 | [40 62000 216] 128 | [60 100000 139] 129 | [48 220000 250] 130 | [33 150000 264]] 131 | fit1 (-> (make-nearest-neighbors-regressor 1 d/euclidean) 132 | (nearest-neighbors-fit data)) 133 | fit3 (-> (make-nearest-neighbors-regressor 3 d/euclidean) 134 | (nearest-neighbors-fit data))] 135 | (is (= (first (nearest-neighbors-predict fit1 [[48 142000]])) 264)) 136 | (is (= (first (nearest-neighbors-predict fit3 [[48 142000]])) (/ 542 3))))) 137 | -------------------------------------------------------------------------------- /test/lambda_ml/neural_network_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.neural-network-test 2 | (:require [clojure.test :refer :all] 3 | [clojure.core.matrix :as m] 4 | [lambda-ml.core :refer :all] 5 | [lambda-ml.neural-network :refer :all])) 6 | 7 | (deftest test-feed-forward 8 | (let [weights [[[0.35 0.15 0.20] 9 | [0.35 0.25 0.30]] 10 | [[0.60 0.40 0.45] 11 | [0.60 0.50 0.55]]] 12 | fs [sigmoid sigmoid] 13 | x [0.05 0.1] 14 | [hidden output] (feed-forward x weights fs)] 15 | (is (< (Math/abs (- 0.593269920 (first hidden))) 1E-6)) 16 | (is (< (Math/abs (- 0.596884378 (second hidden))) 1E-6)) 17 | (is (< (Math/abs (- 0.751365070 (first output))) 1E-6)) 18 | (is (< (Math/abs (- 0.772928465 (second output))) 1E-6)))) 19 | 20 | (deftest test-feed-forward2 21 | (let [weights [[[ 0.1 0.1 -0.2] 22 | [ 0.2 0 0.2] 23 | [ 0.5 0.3 -0.4]] 24 | [[-0.1 -0.4 0.1 0.6] 25 | [ 0.6 0.2 -0.1 -0.2]]] 26 | fs [sigmoid sigmoid] 27 | x [0.6 0.1] 28 | [hidden output] (feed-forward x weights fs)] 29 | (is (< (Math/abs (- 0.53494294 (nth hidden 0))) 1E-6)) 30 | (is (< (Math/abs (- 0.55477923 (nth hidden 1))) 1E-6)) 31 | (is (< (Math/abs (- 0.65475346 (nth hidden 2))) 1E-6)) 32 | (is (< (Math/abs (- 0.53353777 (nth output 0))) 1E-6)) 33 | (is (< (Math/abs (- 0.62727869 (nth output 1))) 1E-6)))) 34 | 35 | (deftest test-back-propagate 36 | (let [weights [[[0.35 0.15 0.20] 37 | [0.35 0.25 0.30]] 38 | [[0.60 0.40 0.45] 39 | [0.60 0.50 0.55]]] 40 | fs' [sigmoid' sigmoid'] 41 | y [0.01 0.99] 42 | activations [[0.593269920 0.596884378] [0.751365070 0.772928465]] 43 | [errors1 errors2] (back-propagate y weights fs' activations quadratic-output-error)] 44 | (is (< (Math/abs (- 0.00877136 (first errors1))) 1E-6)) 45 | (is (< (Math/abs (- 0.00995425 (second errors1))) 1E-6)) 46 | (is (< (Math/abs (- 0.13849856 (first errors2))) 1E-6)) 47 | (is (< (Math/abs (- -0.03809824 (second errors2))) 1E-6)))) 48 | 49 | (deftest test-compute-gradients 50 | (let [x [0.05 0.1] 51 | activations [[0.593269920 0.596884378] [0.751365070 0.772928465]] 52 | errors [[0.00877136 0.00995425] [0.13849856 -0.03809824]] 53 | [g0 g1] (compute-gradients x activations errors)] 54 | (is (< (Math/abs (- 0.00877136 (nth (nth g0 0) 0))) 1E-6)) 55 | (is (< (Math/abs (- 0.00043857 (nth (nth g0 0) 1))) 1E-6)) 56 | (is (< (Math/abs (- 0.00087713 (nth (nth g0 0) 2))) 1E-6)) 57 | (is (< (Math/abs (- 0.00995425 (nth (nth g0 1) 0))) 1E-6)) 58 | (is (< (Math/abs (- 0.00049771 (nth (nth g0 1) 1))) 1E-6)) 59 | (is (< (Math/abs (- 0.00099543 (nth (nth g0 1) 2))) 1E-6)) 60 | (is (< (Math/abs (- 0.13849856 (nth (nth g1 0) 0))) 1E-6)) 61 | (is (< (Math/abs (- 0.08216703 (nth (nth g1 0) 1))) 1E-6)) 62 | (is (< (Math/abs (- 0.08266763 (nth (nth g1 0) 2))) 1E-6)) 63 | (is (< (Math/abs (- -0.03809824 (nth (nth g1 1) 0))) 1E-6)) 64 | (is (< (Math/abs (- -0.02260254 (nth (nth g1 1) 1))) 1E-6)) 65 | (is (< (Math/abs (- -0.02274024 (nth (nth g1 1) 2))) 1E-6)))) 66 | 67 | (deftest test-regularize 68 | (let [weights [[[0.35 0.15 0.20] 69 | [0.35 0.25 0.30]] 70 | [[0.60 0.40 0.45] 71 | [0.60 0.50 0.55]]] 72 | alpha 0.5 73 | lambda 0.1 74 | [r0 r1] (regularize weights alpha lambda)] 75 | (is (< (Math/abs (- 0.0 (nth (nth r0 0) 0))) 1E-6)) 76 | (is (< (Math/abs (- 0.0075 (nth (nth r0 0) 1))) 1E-6)) 77 | (is (< (Math/abs (- 0.01 (nth (nth r0 0) 2))) 1E-6)) 78 | (is (< (Math/abs (- 0.0 (nth (nth r0 1) 0))) 1E-6)) 79 | (is (< (Math/abs (- 0.0125 (nth (nth r0 1) 1))) 1E-6)) 80 | (is (< (Math/abs (- 0.015 (nth (nth r0 1) 2))) 1E-6)) 81 | (is (< (Math/abs (- 0.0 (nth (nth r1 0) 0))) 1E-6)) 82 | (is (< (Math/abs (- 0.02 (nth (nth r1 0) 1))) 1E-6)) 83 | (is (< (Math/abs (- 0.0225 (nth (nth r1 0) 2))) 1E-6)) 84 | (is (< (Math/abs (- 0.0 (nth (nth r1 1) 0))) 1E-6)) 85 | (is (< (Math/abs (- 0.025 (nth (nth r1 1) 1))) 1E-6)) 86 | (is (< (Math/abs (- 0.0275 (nth (nth r1 1) 2))) 1E-6)))) 87 | 88 | (deftest test-gradient-descent-step 89 | (let [weights [[[0.35 0.15 0.20] 90 | [0.35 0.25 0.30]] 91 | [[0.60 0.40 0.45] 92 | [0.60 0.50 0.55]]] 93 | model (-> (make-neural-network 0.5 0 quadratic-cost) 94 | (add-neural-network-layer 2 sigmoid) 95 | (add-neural-network-layer 2 sigmoid) 96 | (add-neural-network-layer 2 sigmoid)) 97 | fs [sigmoid sigmoid] 98 | x [0.05 0.1] 99 | y [0.01 0.99] 100 | [w0 w1] (gradient-descent-step model x y weights)] 101 | (is (< (Math/abs (- 0.149780716 (nth (nth w0 0) 1))) 1E-6)) 102 | (is (< (Math/abs (- 0.19956143 (nth (nth w0 0) 2))) 1E-6)) 103 | (is (< (Math/abs (- 0.24975114 (nth (nth w0 1) 1))) 1E-6)) 104 | (is (< (Math/abs (- 0.29950229 (nth (nth w0 1) 2))) 1E-6)) 105 | (is (< (Math/abs (- 0.35891648 (nth (nth w1 0) 1))) 1E-6)) 106 | (is (< (Math/abs (- 0.408666186 (nth (nth w1 0) 2))) 1E-6)) 107 | (is (< (Math/abs (- 0.51130127 (nth (nth w1 1) 1))) 1E-6)) 108 | (is (< (Math/abs (- 0.561370121 (nth (nth w1 1) 2))) 1E-6)))) 109 | 110 | (deftest test-init-parameters 111 | (let [model {:layers [2 3 1] :seed 12345} 112 | [w0 w1] (init-parameters model)] 113 | (is (= [3 3] (m/shape w0))) 114 | (is (= [1 4] (m/shape w1))) 115 | (is (< (Math/abs (- -0.18780898 (m/mget w0 0 0))) 1E-6)) 116 | (is (< (Math/abs (- 0.58843630 (m/mget w0 0 1))) 1E-6)) 117 | (is (< (Math/abs (- 0.94880478 (m/mget w0 0 2))) 1E-6)) 118 | (is (< (Math/abs (- -0.49428072 (m/mget w0 1 0))) 1E-6)) 119 | (is (< (Math/abs (- -1.22341193 (m/mget w0 1 1))) 1E-6)) 120 | (is (< (Math/abs (- -0.69796098 (m/mget w0 1 2))) 1E-6)) 121 | (is (< (Math/abs (- -0.77722490 (m/mget w0 2 0))) 1E-6)) 122 | (is (< (Math/abs (- 2.06800870 (m/mget w0 2 1))) 1E-6)) 123 | (is (< (Math/abs (- -0.58734674 (m/mget w0 2 2))) 1E-6)) 124 | (is (< (Math/abs (- 0.46214534 (m/mget w1 0 0))) 1E-6)) 125 | (is (< (Math/abs (- 1.37458180 (m/mget w1 0 1))) 1E-6)) 126 | (is (< (Math/abs (- -0.09785321 (m/mget w1 0 2))) 1E-6)) 127 | (is (< (Math/abs (- -1.07643638 (m/mget w1 0 3))) 1E-6)))) 128 | 129 | (deftest test-neural-network 130 | (let [data [[0 0 [0]] 131 | [0 1 [1]] 132 | [1 0 [1]] 133 | [1 1 [0]]] 134 | model (-> (make-neural-network 0.5 0.0 cross-entropy-cost 54321) 135 | (add-neural-network-layer 2 sigmoid) 136 | (add-neural-network-layer 3 sigmoid) 137 | (add-neural-network-layer 1 sigmoid)) 138 | fit (nth (iterate #(neural-network-fit % data) model) 5000) 139 | predictions (map first (neural-network-predict fit (map butlast data)))] 140 | (is (> 0.1 (nth predictions 0))) 141 | (is (< 0.9 (nth predictions 1))) 142 | (is (< 0.9 (nth predictions 2))) 143 | (is (> 0.1 (nth predictions 3))))) 144 | -------------------------------------------------------------------------------- /test/lambda_ml/random_forest_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.random-forest-test 2 | (:require [clojure.test :refer :all] 3 | [lambda-ml.random-forest :refer :all])) 4 | 5 | (deftest test-random-forest-classifier 6 | (let [data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]] 7 | model (make-random-forest-classifier 1001 2 1 2) 8 | fit (random-forest-fit model data)] 9 | (is (= (first (random-forest-predict fit [[0 0]])) 0)) 10 | (is (= (first (random-forest-predict fit [[0 1]])) 1)) 11 | (is (= (first (random-forest-predict fit [[1 0]])) 1)) 12 | (is (= (first (random-forest-predict fit [[1 1]])) 0)))) 13 | 14 | (deftest test-random-forest-regressor 15 | (let [data [[0 0 0] [0 1 1] [1 0 1] [1 1 0]] 16 | model (make-random-forest-regressor 1001 2 1 2) 17 | fit (random-forest-fit model data)] 18 | (is (< (first (random-forest-predict fit [[0 0]])) 0.5)) 19 | (is (> (first (random-forest-predict fit [[0 1]])) 0.5)) 20 | (is (> (first (random-forest-predict fit [[1 0]])) 0.5)) 21 | (is (< (first (random-forest-predict fit [[1 1]])) 0.5)))) 22 | -------------------------------------------------------------------------------- /test/lambda_ml/regression_test.clj: -------------------------------------------------------------------------------- 1 | (ns lambda-ml.regression-test 2 | (:require [clojure.test :refer :all] 3 | [lambda-ml.core :refer :all] 4 | [lambda-ml.regression :refer :all])) 5 | 6 | (deftest test-linear-regression 7 | (let [data [[-2 -1] 8 | [1 1] 9 | [3 2]] 10 | model (make-linear-regression 0.01 0.0 5000) 11 | {coeff :parameters} (regression-fit model data)] 12 | (is (< (Math/abs (- (/ 5 19) (first coeff))) 1E-6)) 13 | (is (< (Math/abs (- (/ 23 38) (second coeff))) 1E-6)))) 14 | 15 | (deftest test-linear-regression-regularization 16 | (let [data (map (fn [[x y]] [x (* x x) (* x x x) (* x x x x) (* x x x x x) y]) 17 | [[-0.99768 2.0885] 18 | [-0.69574 1.1646] 19 | [-0.40373 0.3287] 20 | [-0.10236 0.46013] 21 | [0.22024 0.44808] 22 | [0.47742 0.10013] 23 | [0.82229 -0.32952]]) 24 | fit-lambda0 (regression-fit (make-linear-regression 0.1 0 10000) data) 25 | fit-lambda1 (regression-fit (make-linear-regression 0.1 1 10000) data) 26 | fit-lambda10 (regression-fit (make-linear-regression 0.1 10 10000) data)] 27 | (is (> (l2-norm (:parameters fit-lambda0)) 28 | (l2-norm (:parameters fit-lambda1)))) 29 | (is (> (l2-norm (:parameters fit-lambda0)) 30 | (l2-norm (:parameters fit-lambda10)))) 31 | (is (> (l2-norm (:parameters fit-lambda1)) 32 | (l2-norm (:parameters fit-lambda10)))))) 33 | 34 | (deftest test-linear-regression2 35 | (let [data [[-1 0] 36 | [0 2] 37 | [1 4] 38 | [2 5]] 39 | model (make-linear-regression 0.01 0.0 5000) 40 | {coeff :parameters} (regression-fit model data)] 41 | (is (< (Math/abs (- 1.9 (first coeff))) 1E-6)) 42 | (is (< (Math/abs (- 1.7 (second coeff))) 1E-6)))) 43 | 44 | (deftest test-linear-regression3 45 | (let [data [[4 390] 46 | [9 580] 47 | [10 650] 48 | [14 730] 49 | [4 410] 50 | [7 530] 51 | [12 600] 52 | [22 790] 53 | [1 350] 54 | [3 400] 55 | [8 590] 56 | [11 640] 57 | [5 450] 58 | [6 520] 59 | [10 690] 60 | [11 690] 61 | [16 770] 62 | [13 700] 63 | [13 730] 64 | [10 640]] 65 | model (make-linear-regression 0.01 0.0 10000) 66 | {coeff :parameters} (regression-fit model data)] 67 | (is (< (Math/abs (- 353.16487949889 (first coeff))) 1E-6)) 68 | (is (< (Math/abs (- 25.326467777896 (second coeff))) 1E-6)))) 69 | 70 | (deftest test-logistic-regression 71 | (let [data [[0.50 0] 72 | [0.75 0] 73 | [1.00 0] 74 | [1.25 0] 75 | [1.50 0] 76 | [1.75 0] 77 | [1.75 1] 78 | [2.00 0] 79 | [2.25 1] 80 | [2.50 0] 81 | [2.75 1] 82 | [3.00 0] 83 | [3.25 1] 84 | [3.50 0] 85 | [4.00 1] 86 | [4.25 1] 87 | [4.50 1] 88 | [4.75 1] 89 | [5.00 1] 90 | [5.50 1]] 91 | model (make-logistic-regression 0.1 0 10000) 92 | {coeff :parameters} (regression-fit model data)] 93 | (is (< (Math/abs (- -4.077713 (first coeff))) 1E-6)) 94 | (is (< (Math/abs (- 1.504645 (second coeff))) 1E-6)))) 95 | --------------------------------------------------------------------------------